|
##### 10장: 단순회귀분석(Simple Linear Regression)
###
Call:
lm(formula = dist ~ speed, data = cars)
Residuals:
Min 1Q Median 3Q Max
-29.069 -9.525 -2.272 9.215 43.201
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17.5791 6.7584 -2.601 0.0123 *
speed 3.9324 0.4155 9.464 1.49e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.38 on 48 degrees of freedom
Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
F-statistic: 89.57 on 1 and 48 DF, p-value: 1.490e-12
> with(cars, cor(speed,dist))^2
> with(cars, cor.test(speed,dist))
# Scatter plot with trend
> plot(dist ~ speed, data=cars, col="blue")
> abline(out, col="red")
### No intercept model
> lm(dist~speed-1, data=cars)
Call:
lm(formula = dist ~ speed - 1, data = cars)
Coefficients:
speed
2.909
# no intercept model with sqrt-root transformation: sqrt(dist) = 0.39675*speed
> out2 = lm(sqrt(dist) ~ speed - 1, data=cars)
> summary(out2)
Call:
lm(formula = sqrt(dist) ~ speed - 1, data = cars)
Residuals:
Min 1Q Median 3Q Max
-2.27813 -0.69718 0.02079 0.79650 3.38978
Coefficients:
Estimate Std. Error t value Pr(>|t|)
speed 0.39675 0.01015 39.09 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.167 on 49 degrees of freedom
Multiple R-squared: 0.9689, Adjusted R-squared: 0.9683
F-statistic: 1528 on 1 and 49 DF, p-value: < 2.2e-16
> plot(sqrt(dist)~speed, data=cars)
> abline(out2)
# 회귀진단
par(mfrow=c(2,2))
plot(out2)
> qqnorm(resid(out2))
> qqline(resid(out2))
> shapiro.test(resid(out2))
### one_comp.csv
> one.comp=read.csv("one_comp.csv")
> plot(conc~time, data=one.comp)
> plot(conc~time, data=one.comp, log="y")
> summary(lm(log(conc)~time, data=one.comp))
Call:
lm(formula = log(conc) ~ time, data = one.comp)
Residuals:
Min 1Q Median 3Q Max
-0.10308 -0.05859 0.00874 0.04190 0.09032
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.72031 0.03809 97.68 3.10e-12 ***
time -0.63771 0.01485 -42.95 9.68e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.06901 on 7 degrees of freedom
Multiple R-squared: 0.9962, Adjusted R-squared: 0.9957
F-statistic: 1845 on 1 and 7 DF, p-value: 9.676e-10
# 잘못된 모형
> out = lm(conc ~ time, data=one.comp)
> par(mfrow=c(2,2))
> plot(out)
# 결과물 추출
> out = lm(dist ~ speed, data=cars)
> names(out)
[1] "coefficients" "residuals" "effects" "rank" "fitted.values"
[6] "assign" "qr" "df.residual" "xlevels" "call"
[11] "terms" "model"
> out$rank
[1] 2
> out$df.residual
[1] 48
> B = coef(out)
> B
(Intercept) speed
-17.579095 3.932409
> fitted(out)
1 2 3 4 5 6 7 8
-1.849460 -1.849460 9.947766 9.947766 13.880175 17.812584 21.744993 21.744993
9 10 11 12 13 14 15 16
21.744993 25.677401 25.677401 29.609810 29.609810 29.609810 29.609810 33.542219
17 18 19 20 21 22 23 24
33.542219 33.542219 33.542219 37.474628 37.474628 37.474628 37.474628 41.407036
25 26 27 28 29 30 31 32
41.407036 41.407036 45.339445 45.339445 49.271854 49.271854 49.271854 53.204263
33 34 35 36 37 38 39 40
53.204263 53.204263 53.204263 57.136672 57.136672 57.136672 61.069080 61.069080
41 42 43 44 45 46 47 48
61.069080 61.069080 61.069080 68.933898 72.866307 76.798715 76.798715 76.798715
49 50
76.798715 80.731124
> yhat=B[1]+B[2]*cars$speed
> yhat
[1] -1.849460 -1.849460 9.947766 9.947766 13.880175 17.812584 21.744993 21.744993
[9] 21.744993 25.677401 25.677401 29.609810 29.609810 29.609810 29.609810 33.542219
[17] 33.542219 33.542219 33.542219 37.474628 37.474628 37.474628 37.474628 41.407036
[25] 41.407036 41.407036 45.339445 45.339445 49.271854 49.271854 49.271854 53.204263
[33] 53.204263 53.204263 53.204263 57.136672 57.136672 57.136672 61.069080 61.069080
[41] 61.069080 61.069080 61.069080 68.933898 72.866307 76.798715 76.798715 76.798715
[49] 76.798715 80.731124
> resid(out)
1 2 3 4 5 6 7
3.849460 11.849460 -5.947766 12.052234 2.119825 -7.812584 -3.744993
8 9 10 11 12 13 14
4.255007 12.255007 -8.677401 2.322599 -15.609810 -9.609810 -5.609810
15 16 17 18 19 20 21
-1.609810 -7.542219 0.457781 0.457781 12.457781 -11.474628 -1.474628
22 23 24 25 26 27 28
22.525372 42.525372 -21.407036 -15.407036 12.592964 -13.339445 -5.339445
29 30 31 32 33 34 35
-17.271854 -9.271854 0.728146 -11.204263 2.795737 22.795737 30.795737
36 37 38 39 40 41 42
-21.136672 -11.136672 10.863328 -29.069080 -13.069080 -9.069080 -5.069080
43 44 45 46 47 48 49
2.930920 -2.933898 -18.866307 -6.798715 15.201285 16.201285 43.201285
50
4.268876
> cars$dist - yhat
[1] 3.849460 11.849460 -5.947766 12.052234 2.119825 -7.812584 -3.744993
[8] 4.255007 12.255007 -8.677401 2.322599 -15.609810 -9.609810 -5.609810
[15] -1.609810 -7.542219 0.457781 0.457781 12.457781 -11.474628 -1.474628
[22] 22.525372 42.525372 -21.407036 -15.407036 12.592964 -13.339445 -5.339445
[29] -17.271854 -9.271854 0.728146 -11.204263 2.795737 22.795737 30.795737
[36] -21.136672 -11.136672 10.863328 -29.069080 -13.069080 -9.069080 -5.069080
[43] 2.930920 -2.933898 -18.866307 -6.798715 15.201285 16.201285 43.201285
[50] 4.268876
# Residual standard error
> summary(out)$sigma
[1] 15.37959
> sqrt(sum(resid(out)^2)/(length(cars$dist)-2))
[1] 15.37959
### Simulation
# cars
> out = lm(sqrt(dist) ~ speed-1, data=cars)
> dist = rnorm(n=nrow(cars),mean=fitted(out),sd=summary(out)$sigma)^2
# one.comp
> out = lm(log(conc)~time, data=one.comp)
> conc = exp(rnorm(n=nrow(one.comp),mean=fitted(out),sd=summary(out)$sigma))
### 연습
> out=lm(log(conc)~time, data=one.comp)
> par(mfow=c(2,2))
> par(mfrow=c(2,2))
> plot(out)
|