wine <- read.csv("wine2.csv", header=T, stringsAsFactors = T)
head(wine)
colSums(is.na(wine))
nrow(wine)
train_num <- createDataPartition(wine$Type, p=0.8, list=F)
train_data <- wine[train_num, ]
test_data <- wine[-train_num, ]
nrow(train_data) #143
nrow(test_data) #34
head(test_data[ , -1])
model <- C5.0(train_data[ , -1], train_data[ , 1])
train_result <- predict(model, train_data[ , -1])
sum(train_result==train_data[ , 1])/143 * 100
# 99.3007
test_result <- predict(model, test_data[ , -1])
sum(test_result==test_data[ , 1])/34*100
# 94.11765
y <- 1
jumpby <- 1
options(scipen=999)
for ( i in 1:30 ) {
model<-C5.0(train_data[ ,-1],train_data[ ,1], trials=y)
test_result2 <- predict(model, test_data[ ,-1])
a<- sum(test_result2 == test_data[ ,1])/34*100
y <- y + jumpby
print(paste(i,'일때',a))
}
[1] "1 일때 94.1176470588235" [1] "2 일때 94.1176470588235" [1] "3 일때 97.0588235294118" [1] "4 일때 100" [1] "5 일때 94.1176470588235"
model <- C5.0(train_data[ , -1], train_data[ , 1], trial=4)
train_result <- predict(model, train_data[ , -1])
sum(train_result==train_data[ , 1])/143 * 100
#100
test_result <- predict(model, test_data[ , -1])
sum(test_result==test_data[ , 1])/34*100
#100