wine <- read.csv("wine2.csv", stringsAsFactors = TRUE)
head(wine)
str(wine)
colSums(is.na(wine))
library(caret)
set.seed(1)
train_num <- createDataPartition(wine$Type, p = 0.8, list=F)
train_data <- wine[ train_num, ] test_data <- wine[ -train_num, ]
nrow(train_data) # 143
nrow(test_data) # 34
wine_model <- C5.0(train_data[ , -1], train_data[ , 1])
summary(wine_model)
train_result <- predict(wine_model, train_data[, -1])
test_result <- predict(wine_model, test_data[, -1])
sum(train_result == train_data[ , 1]) / length(train_data[ , 1]) * 100
sum(test_result == test_data[ , 1]) / length(test_data[ , 1]) * 100
y <- 1 # 초기값을 1로 설정
jumpby <- 1
options(scipen=999)
for (i in 1:10) {
wine_model2 <- C5.0(train_data[ ,-1], train_data[ , 1], trials=y)
test_result2 <- predict(wine_model2, test_data[ ,-1])
train_result2 <- predict(wine_model2, train_data[ ,-1])
a <- sum(test_result2 == test_data[ ,1]) / length(test_data[ ,1]) * 100
b <- sum(train_result2 == train_data[ ,1]) / length(train_data[ ,1]) * 100# 100%
print(paste(i, '일때 훈련데이터', b, '테스트 데이터', a ))
y <- y + jumpby
}