#1. 데이터를 불러옵니다.
mydata <- read.csv("c:\\data\\binary.csv", header=T)
mydata$admit <- as.factor(mydata$admit)
head(mydata)
#2. 훈련과 테스틀 8대 2로 나눕니다.
library(caret)
set.seed(123)
k <- createDataPartition(mydata$admit, p=0.8, list=FALSE)
train_data <- mydata[ k , ]
test_data <- mydata[ -k , ]
nrow(train_data) # 321
nrow(test_data) # 79
#정규화
normalize <- function(x) {
return ( ( x-min(x) ) / ( max(x) - min(x) ) )
}
train_data_n <- as.data.frame( lapply( train_data[ , -1], normalize) )
test_data_n <- as.data.frame( lapply( test_data[ , -1], normalize) )
#종속변수 합치기
train_data_n$admit <- train_data$admit
test_data_n$admit <- test_data$admit
library(e1071)
C_values <- c(1:100)
gamma_values <- 2^(-20:-1)
results <- data.frame( C=numeric(), gamma=numeric(), accuracy=numeric() )
for ( C in C_values){
for ( gamma in gamma_values){
set.seed(123)
grade_svm_model <- svm( admit ~ ., data = train_data_n, kernel = 'radial',
cost = C, gamma = gamma)
result <- predict(grade_svm_model, test_data_n[ , -4])
accuracy <- sum(result == test_data_n$admit)/length(test_data_n$admit)
results <- rbind( results, data.frame(C=C, gamma=gamma, accuracy=accuracy))
}
}
library(doBy)
orderBy(~ -accuracy, results)