Re: 오늘의 마지막 문제. 오늘배운 히스토그램, 박스 그래프, 워드 클라우드 등을 이용해서..

# 텍스트 데이터 로드txt <- readLines('여러텍스트.txt', encoding = "UTF-8")# 특수문자 제거 및 공백 처리# 불필요한 텍스트를 제거한다.cleaned_txt <- iconv(txt, "UTF-8", "UTF-8", sub="")# 한글과 숫자, 공백을 제외한 모든 특수문자를 공백으로 대체한다.cleaned_txt <- gsub("[^[:alnum:][:space:]ㄱ-ㅎㅏ-ㅣ가-힣]", " ", cleaned_txt) cleaned_txt <- gsub("\\s+", " ", cleaned_txt)# 연속된 공백 제거# 명사 추출 함수extract_nouns_simple <- function(doc) { doc <- as.character(doc)# 문자로 변환words <- unlist(strsplit(doc, "\\s+"))# 공백을 기준으로 단어 분리nouns <- Filter(function(x) {grepl("^[가-힣]+$", x) && nchar(x) >= 2}, words)# 한글로만 구성된 단어 추출 및 길이 2 이상 필터링return(nouns) }# 명사 추출nouns <- extract_nouns_simple(cleaned_txt)# 추출된 명사 확인print(head(nouns, 10))# 상위 10개 단어 확인# 단어 빈도수 계산word_freq <- table(nouns) word_freq <- as.data.frame(word_freq, stringsAsFactors = FALSE)word_freq <- arrange(word_freq, desc(Freq))# 명사의 빈도수를 내림차순으로 정렬# 상위 10개 단어 확인print(head(word_freq, 10))# 유효하지 않은 값 확인 및 제거word_freq <- word_freq[word_freq$nouns != "", ]# 특정 단어 제외하기word_freq <- subset( word_freq, nouns != "너무")word_freq <- subset( word_freq, nouns != "있다")# 단어 빈도수 데이터프레임 확인 print(head(word_freq, 10))# 워드클라우드 생성 (하트 모양)wordcloud2(data = word_freq, shape = "square", color = brewer.pal(8, "Dark2"), size = 0.2) <div class="figure-img" data-ke-type="image" data-ke-style="alignCenter" data-ke-mobilestyle="widthOrigin"><img src="https://t1.daumcdn.net/cafeattach/zchT/3cbdfaf8b67b7c82cdec0f8703d96729d90e17c5" class="txc-image" data-img-src="https://t1.daumcdn.net/cafeattach/zchT/3cbdfaf8b67b7c82cdec0f8703d96729d90e17c5" data-origin-width="620" data-origin-height="447"></div>