# 데이터 로드
binary<-read.csv('binary.csv', stringsAsFactors = T)
head(binary)
str(binary)
# admit 변수를 팩터형으로 변환
binary$admit <- as.factor(binary$admit)
binary$rank<-as.numeric(binary$rank)
binary$gre<-as.numeric(binary$gre)
str(binary)
# 훈련 데이터와 테스트 데이터를 분리
library(caret)
set.seed(1)
train_num <- createDataPartition(binary$admit, p = 0.8, list = FALSE)
train_data <- binary[train_num,]
test_data <- binary[-train_num,]
nrow(train_data) # 321
nrow(test_data) # 79
normalize <- function(x) {
return((x-min(x))/(max(x)-min(x)))
}
train_data_n<-as.data.frame(lapply(train_data[ , -1], normalize))
train_data2<-cbind(train_data[ , c('admit')], train_data_n)
names(train_data2)[1]<-'admit'
train_data2
test_data_n<-as.data.frame(lapply(test_data[ , -1], normalize))
test_data2<-cbind(test_data[ , c('admit')], test_data_n)
names(test_data2)[1]<-'admit'
test_data2
summary(test_data2)
# 모델 생성 및 하이퍼파라미터 설정
library(e1071)
# C와 gamma 값을 조정하여 모델 훈련
set.seed(1)
C_values <- 2^(-10:10)
gamma_values <- 2^(-10:10)
results <- data.frame( C=numeric(), gamma=numeric(), accuracy=numeric() )
for ( C in C_values) {
for ( gamma in gamma_values) {
set.seed(1)
binary_svm_model <- svm(admit ~ . , data = train_data2,
kernel = "radial", cost = C, gamma = gamma )
result <- predict(binary_svm_model, test_data2[, -1])
result
accuracy <- sum(result == test_data2$admit) / length(test_data2$admit)
results <- rbind( results, data.frame(C=C, gamma=gamma, accuracy=accuracy))
}
}
results
train_data2
library(doBy)
orderBy(~ -accuracy, results)
> orderBy(~ -accuracy, results)
C gamma accuracy
197 0.5000000000 0.1250000000 0.7215190
328 32.0000000000 4.0000000000 0.7215190
198 0.5000000000 0.2500000000 0.7088608
199 0.5000000000 0.5000000000 0.7088608
217 1.0000000000 0.0625000000 0.7088608
219 1.0000000000 0.2500000000 0.7088608
225 1.0000000000 16.0000000000 0.7088608
226 1.0000000000 32.0000000000 0.7088608
240 2.0000000000 0.2500000000 0.7088608
258 4.0000000000 0.0312500000 0.7088608
260 4.0000000000 0.1250000000 0.7088608