|
### 예1
# respire.csv 이용
respire=read.csv("respire.csv")
glm(outcome ~ treat, weights=count, family=binomial, data=respire)
# respire2.csv 이용
respire2=read.csv("respire2.csv")
glm(outcome ~ treat, family=binomial, data=respire2)
# Odds Ratio
> coef(out)["treattest"]
treattest
1.791759
> coef(out)[2]
treattest
1.791759
> exp(coef(out)["treattest"])
treattest
6
# Odds Ratio의 95% 신뢰구간
> exp(confint(out, parm="treattest"))
Waiting for profiling to be done...
2.5 % 97.5 %
2.803526 13.411701
# tapply()이용하기
# respire2를 이용하려면
### 예 2: toxic.csv
# odds ratio와 95% 신뢰구간
### 예 3: death_penaty.csv
# Tables
> death.penalty=read.csv("death_penalty.csv")
> defendant = xtabs(count~defendant+death,data=death.penalty)
> chisq.test(defendant)
Pearson's Chi-squared test with Yates' continuity correction
data: defendant
X-squared = 0.0863, df = 1, p-value = 0.7689
> victim = xtabs(count~victim+death,data=death.penalty)
> chisq.test(victim)
Pearson's Chi-squared test with Yates' continuity correction
data: victim
X-squared = 4.7678, df = 1, p-value = 0.029
# Logistic Regression
> out2=glm(death~victim,weights=count,family=binomial,data=death.penalty)
> anova(out2,out1,test="Chisq")
Analysis of Deviance Table
Model 1: death ~ victim
Model 2: death ~ victim * defendant
Resid. Df Resid. Dev Df Deviance P(>|Chi|)
1 5 220.26
2 3 218.38 2 1.8819 0.3903
95% CI for odds ratio
> exp(confint(out2,parm="victimWhite"))
Waiting for profiling to be done...
2.5 % 97.5 %
1.239661 7.871553
### 예 4
> library(MASS)
> out=glm(low~lwt+factor(race)+smoke+ht+ui,data=birthwt,family=binomial)
> summary(out)
Call:
glm(formula = low ~ lwt + factor(race) + smoke + ht + ui, family = binomial,
data = birthwt)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7396 -0.8322 -0.5359 0.9873 2.1692
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.056276 0.937853 0.060 0.95215
lwt -0.016732 0.006803 -2.459 0.01392 *
factor(race)2 1.324562 0.521464 2.540 0.01108 *
factor(race)3 0.926197 0.430386 2.152 0.03140 *
smoke 1.035831 0.392558 2.639 0.00832 **
ht 1.871416 0.690902 2.709 0.00676 **
ui 0.904974 0.447553 2.022 0.04317 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 234.67 on 188 degrees of freedom
Residual deviance: 204.22 on 182 degrees of freedom
AIC: 218.22
Number of Fisher Scoring iterations: 4
# odds ratio
> exp(coef(out))
(Intercept) lwt factor(race)2 factor(race)3 smoke
1.0578897 0.9834068 3.7605373 2.5248886 2.8174471
ht ui
6.4974921 2.4718677
### R 결과물에서 통계치 추출
> respire=read.csv("respire.csv")
> respire2=read.csv("respire2.csv")
> out1=glm(outcome~treat,family=binomial,weights=count, data=respire)
> out2=glm(outcome~treat,family=binomial,data=respire2)
> B=coef(out1)
> B
(Intercept) treattest
-1.098612 1.791759
> B[1]+B[2]*(respire$treat=="test")
[1] -1.0986123 -1.0986123 0.6931472 0.6931472
> exp(B[1]+B[2]*(respire$treat=="test"))
[1] 0.3333333 0.3333333 2.0000000 2.0000000
> exp(B[1]+B[2]*(respire$treat=="test"))/(1+exp(B[1]+B[2]*(respire$treat=="test")))
[1] 0.2500000 0.2500000 0.6666667 0.6666667
> fitted(out1)
1 2 3 4
0.2500000 0.2500000 0.6666667 0.6666667
> p=unique(fitted(out1))
> p
[1] 0.2500000 0.6666667
> p/(1-p)
[1] 0.3333333 2.0000000
> log(p/(1-p))
[1] -1.0986123 0.6931472
### Simulation
## respire, respire2
# 요약된 자료
> total = with(respire, tapply(count,treat,sum))
> total
placebo test
64 60
> set.seed(1234)
> rbinom(n=length(total),size=total,p=unique(fitted(out1)))
[1] 12 39
# 요약되지 않은 자료
> set.seed(1234)
> rbinom(n=nrow(respire2),size=1,p=fitted(out2))
[1] 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0
[38] 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 1 1 1 1 0 1 0
[75] 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0
[112] 1 0 1 1 0 0 1 1 0 0 0 0 0
## toxic
> toxic=read.csv("toxic.csv")
> out=glm(response~dose,weights=count,family=binomial,data=toxic)
> total=with(toxic,tapply(count,dose,sum))
> set.seed(1234)
> rbinom(3,total,unique(fitted(out)))
[1] 1 5 8
|
첫댓글 out1=glm(death~victim*defendant,weights=count,family=binomial,data=death.penalty)
여기 에서 종속병수에 *defendant 는 종속변수에 대해 무엇을 의미하나요?
또 weights=count 는 왜 지정해 주는건가요?
defendant는 종속변수가 아니라, 법정에서 고소를 당한사람 즉 "피고"를 의미합니다. 이 문제에서는 "피고"가 백인인지 흑인인지 구분하는 변수입니다. weights는 종속변수가 1, 0일때는 weight이 필요없지만, 1인 사람이 몇명, 0인 사람이 몇명인지 count라는 변수로 요약되면 weights=count를 줍니다.