import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('cust_data.csv')
df.info()
df.head()
0. 데이터 전처리
1. 문제지, 정답지 분리
X=df.drop('정답지 칼럼명', axis=1).values
y=df.loc[:, '정답지 칼럼명'].values
2. 학습데이터, 검증데이터 분리
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, y_train.shape
3. 0 ~ 1 값으로 정규화
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
4-1. 모델링(회귀)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
model=LinearRegression()
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
mean_absolute_error(y_test,y_pred)
# 성능그래프
plt.figure(figsize=(20,8))
plt.plot(y_test,color='r')
plt.plot(y_pred,color='b')
plt.show()
4-2. 모델링(분류)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
model= KNeighborsClassifier ()
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
accuracy_score (y_test,y_pred)