createDataPartition p=0.9로 train:test=9:1로 할 경우
> plot(wine_model) 
> train_result<-predict(wine_model, train_data[ ,-1]) 
> test_result<-predict(wine_model, test_data[,-1]) 
> sum(train_result==train_data[ ,1])/161*100 
[1] 100 
> sum(test_result==test_data[ ,1])/16*100 
[1] 87.5
-> 과적합이 일어났다고 생각되어 train:test 를 8:2 로 수정해 다시 진행하였다. 
 
 
wine<-read.csv('wine2.csv', stringsAsFactors = T)
str(wine)
summary(wine$Type)
prop.table(table(wine$Type))
colSums(is.na(credit))
train_num<-createDataPartition(wine$Type, p=0.8, list=F)
train_num
train_data<-wine[train_num, ]
test_data<-wine[-train_num, ]
nrow(train_data) #143
nrow(test_data) #34
ncol(train_data) #14
library(C50)
wine_model<-C5.0(train_data[ ,-1], train_data[ ,1])
summary(wine_model)
 
plot(wine_model)
> train_result<-predict(wine_model, train_data[ ,-1]) 
> test_result<-predict(wine_model, test_data[,-1]) 
> sum(train_result==train_data[ ,1])/143*100 
[1] 97.9021 
> sum(test_result==test_data[ ,1])/34*100 
[1] 91.17647
 
set.seed(1)
y <- 1
jumpby <-1
for  ( i  in  1:20 ) {
  wine_model2<-C5.0(train_data[ ,-1],train_data[ ,1], trials=y) 
  test_result2 <- predict(wine_model2, test_data[ ,-1])
  w<- sum(test_result2 == test_data[ ,1])/34*100
  y <- y + jumpby
  print(paste(i,'일때',w))
}
[1] "1 일때 91.1764705882353" 
[1] "2 일때 91.1764705882353" 
[1] "3 일때 97.0588235294118" 
[1] "4 일때 100" 
[1] "5 일때 100" 
[1] "6 일때 100" 
[1] "7 일때 97.0588235294118" 
[1] "8 일때 100" 
[1] "9 일때 100" 
[1] "10 일때 100" 
[1] "11 일때 100" 
[1] "12 일때 97.0588235294118" 
[1] "13 일때 97.0588235294118" 
[1] "14 일때 97.0588235294118" 
[1] "15 일때 97.0588235294118" 
[1] "16 일때 97.0588235294118" 
[1] "17 일때 97.0588235294118" 
[1] "18 일때 97.0588235294118" 
[1] "19 일때 97.0588235294118" 
[1] "20 일때 97.0588235294118"
 
 
> x<-CrossTable(test_data[ ,1], test_result2) 
  
   Cell Contents 
|-------------------------| 
|                       N | 
| Chi-square contribution | 
|           N / Row Total | 
|           N / Col Total | 
|         N / Table Total | 
|-------------------------| 
  
Total Observations in Table:  34  
  
               | test_result3  
test_data[, 1] |        t1 |        t2 |        t3 | Row Total |  
---------------|-----------|-----------|-----------|-----------| 
            t1 |        11 |         0 |         0 |        11 |  
               |    15.559 |     4.529 |     2.912 |           |  
               |     1.000 |     0.000 |     0.000 |     0.324 |  
               |     1.000 |     0.000 |     0.000 |           |  
               |     0.324 |     0.000 |     0.000 |           |  
---------------|-----------|-----------|-----------|-----------| 
            t2 |         0 |        14 |         0 |        14 |  
               |     4.529 |    11.765 |     3.706 |           |  
               |     0.000 |     1.000 |     0.000 |     0.412 |  
               |     0.000 |     1.000 |     0.000 |           |  
               |     0.000 |     0.412 |     0.000 |           |  
---------------|-----------|-----------|-----------|-----------| 
            t3 |         0 |         0 |         9 |         9 |  
               |     2.912 |     3.706 |    18.382 |           |  
               |     0.000 |     0.000 |     1.000 |     0.265 |  
               |     0.000 |     0.000 |     1.000 |           |  
               |     0.000 |     0.000 |     0.265 |           |  
---------------|-----------|-----------|-----------|-----------| 
  Column Total |        11 |        14 |         9 |        34 |  
               |     0.324 |     0.412 |     0.265 |           |  
---------------|-----------|-----------|-----------|-----------|
 
trials=4
wine_model2<-C5.0(train_data[ ,-1],train_data[ ,1], trials=4) 
일 때 train결과가 100, test 결과가 100로 가장 좋은 결과가 나왔다.