1. SQL 포트폴리오
#히스토그램
install.packages("modeest")
graphics.off()
library(modeest)
jn_rn <- read.csv("전남_지역별.csv",header =T, fileEncoding="euc-kr")
jn_hw <- read.csv("전남_폭염.csv",header =T, fileEncoding="euc-kr")
head(jn_rn)
head(jn_hw)
# null 데이터 변경
jn_rn[is.na(jn_rn)] <- 0
jn_hw[is.na(jn_hw)] <- 0
avgdo <- jn_hw$평균상대습도
# 평균값, 중앙값, 최빈값 계산 함수
calculate_stats <- function(data) {
mean_value <- mean(data)
median_value <- median(data)
mode_value <- density(data)$x[which.max(density(data)$y)]
return(list(mean = mean_value, median = median_value, mode = mode_value))
}
# 통계량 계산
avgdo_stats <- calculate_stats(avgdo)
# 1. 대칭분포 그래프 그리기
hist(avgdo, breaks=30, col="gold", border="lightpink", prob=TRUE, main="대칭분포", xlab="값", ylab="밀도")
lines(density(avgdo), col="tomato")
abline(v=avgdo_stats$mean, col="paleturquoise", lwd=2, lty=2)
abline(v=avgdo_stats$median, col="yellowgreen", lwd=2, lty=2)
abline(v=avgdo_stats$mode, col="mediumorchid", lwd=2, lty=2)
2. 원하는 데이터
library(wordcloud2)
library(RColorBrewer)
library(plyr)
library(data.table)
setwd("c:\\data") # 파일이 있는 디렉토리로 변경
txt <- readLines('햄릿.txt', encoding = "UTF-8")
cleaned_txt <- iconv(txt, "UTF-8", "UTF-8", sub="")
cleaned_txt <- gsub("[^[:alnum:][:space:]ㄱ-ㅎㅏ-ㅣ가-힣]", " ", cleaned_txt)
cleaned_txt <- gsub("\\s+", " ", cleaned_txt)
extract_nouns_simple <- function(doc) {
doc <- as.character(doc)
words <- unlist(strsplit(doc, "\\s+"))
nouns <- Filter(function(x) {grepl("^[가-힣]+$", x) && nchar(x) >= 2}, words)
return(nouns)
}
nouns <- extract_nouns_simple(cleaned_txt)
word_freq <- table(nouns)
word_freq <- as.data.frame(word_freq, stringsAsFactors = FALSE)
word_freq <- arrange(word_freq, desc(Freq))
word_freq <- word_freq[word_freq$nouns != "", ]
# 이름
word_freq <- subset(word_freq, nouns != "햄릿")
word_freq <- subset(word_freq, nouns != "호레이쇼")
print(head(word_freq, 10))
# 워드클라우드 생성 (하트 모양)
wordcloud2(data = word_freq, shape = "heart", color = brewer.pal(8, "Dark2"))