from scipy import stats as st
import numpy as np
import pandas as pd
# 데이터 dental.csv의 resp만 읽어 log-변환
logy=np.log(pd.read_csv('~/data/dental.csv').resp)
# 등분산 검정
>>> st.bartlett(logy[5:10], logy[0:5])
BartlettResult(statistic=0.9715330643707207, pvalue=0.32429813448716616)
# 두 그룹의 분산이 같은 경우
>>> st.ttest_ind(logy[5:10], logy[0:5])
Ttest_indResult(statistic=-2.521742772630017, pvalue=0.035711712354790294)
# 분산이 다른 경우
>>> st.ttest_ind(logy[5:10], logy[0:5], equal_var=False)
Ttest_indResult(statistic=-2.521742772630017, pvalue=0.042454979494495546)
### Wilcoxon's Rank-Sum Test - 정규분포 근사만 가능
>>> st.ranksums(y[5:10],y[0:5])
RanksumsResult(statistic=-1.9844852778949553, pvalue=0.04720176769014221)
SAS의 npar1way에서 Kruskal-Wallis Test와 동일한 결과입니다.
R로 같은 결과를 내려면
> wilcox.test(resp~treatment, exact=FALSE, correct=FALSE, data=dental)
Wilcoxon rank sum test data: resp by treatment W = 3, p-value = 0.0472
alternative hypothesis: true location shift is not equal to 0