#구현 순서 #1. 데이터를 불러옵니다. bi <- read.csv("c:\\data\\binary.csv", header=T) head(bi)
bi$admit <- as.factor(bi$admit) str(bi)
#2. 훈련과 테스틀 8대 2로 나눕니다. library(caret) set.seed(123) k <- createDataPartition(bi$admit, p=0.8, list=FALSE) train_data <- bi[ k , ] test_data <- bi[ -k , ] nrow(train_data) # 320 nrow(test_data) # 80
normalize <- function(x) { return ( ( x-min(x) ) / ( max(x) - min(x) ) ) }
train_data_n <- as.data.frame( lapply( train_data[ , -1], normalize) ) test_data_n <- as.data.frame( lapply( test_data[ , -1], normalize) )
train_data_n$admit <- train_data[ ,c("admit")] test_data_n$admit <- test_data[ ,c('admit')]
C_values <- c(1:100) gamma_values <- 2^(-20:-1)
results <- data.frame( C=numeric(), gamma=numeric(), accuracy=numeric() )
bi_svm_model <- svm(admit ~ ., data = train_data_n, kernel = "radial", cost = C, gamma = gamma) result <- predict(bi_svm_model, test_data_n[, c(1:3)]) result sum(result == test_data_n$admit) / length(test_data_n$admit)
library(e1071) set.seed(123)
for ( C in C_values) { for ( gamma in gamma_values) {
bi_svm_model <- svm(admit ~ ., data = train_data_n, kernel = "linear", cost = C, gamma = gamma) set.seed(123) # 모델 예측 result <- predict(bi_svm_model, test_data_n) # 정답제외하고 예측함
# 모델 평가 accuracy <- sum(result == test_data_n$admit) / length(test_data_n$admit) results <- rbind( results, data.frame(C=C, gamma=gamma, accuracy=accuracy))
} }
library(doBy) orderBy( ~ -accuracy, results) |