iris <- read.csv('c:\\data\\iris2.csv', stringsAsFactors = T)
head(iris)
str(iris)
colSums(is.na(iris))
table(iris$Species)
iris2 <- iris[ , c(-5)]
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
iris_n <- as.data.frame(lapply(iris2, normalize))
summary(iris_n)
set.seed(49)
train_indx <- createDataPartition(iris$Species, p = 0.9, list = F)
iris_train <- iris_n[train_indx, ]
iris_test <- iris_n[-train_indx, ]
iris_train_label <- iris$Species[train_indx]
iris_test_label <- iris$Species[-train_indx]
accuracies <- data.frame(k = integer(), accuracy = numeric())
set.seed(49)
for (i in seq(1, 57, 2)) {
result9 <- knn(
train = iris_train,
test = iris_test,
cl = iris_train_label,
k = i
)
accuracy <- sum(result9 == iris_test_label) / length(iris_test_label) * 100
accuracies <- rbind(accuracies, data.frame(k = i, accuracy = accuracy))
print(paste(i, '개일 때 정확도 ', accuracy))
}
accuracies
fig <- plot_ly(
accuracies,
x = ~ k,
y = ~ accuracy,
type = 'scatter',
mode = 'lines+markers',
line = list(color = 'red')
)
fig <- fig %>% layout(
title = "K 값에 따른 정확도",
xaxis = list(title = "K 값"),
yaxis = list(title = "정확도")
)
fig