import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.tsa.statespace.sarimax import SARIMAX
import itertools
# 2021년도와 2022년도 1월~6월 실제 데이터를 포함한 데이터셋 생성
data_with_actual = {
'date': ['21/01', '21/02', '21/03', '21/04', '21/05', '21/06', '21/07', '21/08', '21/09', '21/10', '21/11', '21/12',
'22/01', '22/02', '22/03', '22/04', '22/05', '22/06'],
'sales': [8830, 5748, 9229, 6941, 6972, 6292, 9255, 6560, 6221, 7397, 9334, 8816,
6450, 6343, 7292, 6275, 7427, 7065] # 2022년 1월~6월 실제 값 포함
}
# DataFrame 생성 및 날짜 형식 변환
df_with_actual = pd.DataFrame(data_with_actual)
df_with_actual['date'] = pd.to_datetime(df_with_actual['date'], format='%y/%m')
df_with_actual.set_index('date', inplace=True)
# 명절 더미 변수 생성 (1월, 3월, 7월, 8월, 9월이 판매량 증가하는 달로 가정)
df_with_actual['holiday'] = df_with_actual.index.month.isin([1, 3, 7, 8, 9]).astype(int)
# SARIMA 모델을 위한 그리드 서치 함수 정의 (AIC 최소화)
def evaluate_arima_model(order, seasonal_order, exog):
try:
model = SARIMAX(df_with_actual['sales'], exog=exog, order=order, seasonal_order=seasonal_order)
model_fit = model.fit(disp=False)
return model_fit.aic
except:
return float("inf")
# p, d, q 값의 범위 설정 및 가능한 모든 조합 생성
p_values = range(0, 3)
d_values = range(0, 2)
q_values = range(0, 3)
seasonal_p_values = range(0, 2)
seasonal_d_values = range(0, 2)
seasonal_q_values = range(0, 2)
seasonal_s_values = [12] # 계절성을 12개월로 가정
order_combinations = list(itertools.product(p_values, d_values, q_values))
seasonal_combinations = list(itertools.product(seasonal_p_values, seasonal_d_values, seasonal_q_values, seasonal_s_values))
# 최적의 p, d, q 값 찾기 (그리드 서치)
best_score, best_cfg, best_seasonal_cfg = float("inf"), None, None
for order in order_combinations:
for seasonal_order in seasonal_combinations:
aic = evaluate_arima_model(order, seasonal_order, exog=df_with_actual[['holiday']])
if aic < best_score:
best_score, best_cfg, best_seasonal_cfg = aic, order, seasonal_order
# 최적의 p, d, q 값으로 SARIMA 모델 적용
model_sarima_optimized = SARIMAX(df_with_actual['sales'], exog=df_with_actual[['holiday']], order=best_cfg, seasonal_order=best_seasonal_cfg)
model_fit_sarima_optimized = model_sarima_optimized.fit(disp=False)
# 2022년 7월부터 12월까지 명절 더미 변수 생성
future_holiday_summer = pd.DataFrame({'holiday': [1, 1, 1, 0, 0, 0]}, index=pd.date_range(start='2022-07-01', periods=6, freq='M'))
# 최적화된 SARIMA 모델로 예측
forecast_sarima_optimized = model_fit_sarima_optimized.forecast(steps=6, exog=future_holiday_summer)
forecast_index_sarima_optimized = pd.date_range(start='2022-07-01', periods=6, freq='M')
# 예측 결과 시각화
plt.figure(figsize=(14, 8))
plt.plot(df_with_actual.index, df_with_actual['sales'], marker='o', linestyle='-', color='blue', label='Sales (2021 + 2022 Jan-Jun)')
plt.plot(forecast_index_sarima_optimized, forecast_sarima_optimized, marker='o', linestyle='--', color='red', label='Optimized SARIMA Forecast Sales for 2022 (Jul-Dec)')
plt.title('Optimized SARIMA Model - Sales Forecast for 2022 (Jul-Dec)', fontsize=18, weight='bold')
plt.xlabel('Date', fontsize=14)
plt.ylabel('Sales', fontsize=14)
plt.grid(True)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) # 매달 표시
plt.xticks(rotation=45, fontsize=12)
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
# 예측 결과를 데이터 테이블로 변환 및 표시
forecast_df_sarima_optimized = pd.DataFrame({'Date': forecast_index_sarima_optimized, 'Forecasted Sales': forecast_sarima_optimized})
import ace_tools as tools
tools.display_dataframe_to_user(name="Optimized SARIMA Forecast for 2022 (Jul-Dec)", dataframe=forecast_df_sarima_optimized)