# 1. 데이터 불러오기
mydata <- read.csv("c:\\data\\binary.csv", header = T, stringsAsFactors = TRUE)
head(mydata)
str(mydata)
# admit이 종속변수인데 0은 불합격이고 1이 합격이다.
mydata$admit <- as.factor(mydata$admit)
# 2. 훈련과 테스트를 8대 2로 나눈다.
library(caret)
set.seed(1)
k <- createDataPartition(mydata$admit, p=0.8, list=FALSE)
train_data <- mydata[ k , ]
test_data <- mydata[ -k , ]
nrow(train_data) # 320
nrow(test_data) # 80
normalize <- function(x) {
return((x-min(x))/(max(x)-min(x)))
}
train_data_n <- as.data.frame( lapply(train_data[,-1],normalize))
test_data_n <- as.data.frame( lapply(test_data[,-1],normalize))
summary(train_data_n)
summary(test_data_n)
train_data_n <- cbind(train_data_n,admit = train_data$admit)
test_data_n <- cbind(test_data_n,admit = test_data$admit)
head(test_data_n)
nrow(train_data_n)
nrow(test_data_n)
library(e1071)
C_values <- c(1:100)
gamma_values <- 2^(-20:-1)
results <- data.frame( C=numeric(), gamma=numeric(), accuracy=numeric() )
for ( C in C_values){
for ( gamma in gamma_values){
set.seed(1)
grade_svm_model <- svm( admit ~ ., data = train_data_n, kernel = 'radial',
cost = C, gamma = gamma)
result <- predict(grade_svm_model, test_data_n[ , -4])
accuracy <- sum(result == test_data_n$admit)/length(test_data_n$admit)
results <- rbind( results, data.frame(C=C, gamma=gamma, accuracy=accuracy))
}
}
library(doBy)
b <- orderBy(~ -accuracy, results)
b