## coag diet
## 1 62 A
## 2 60 A
## 3 63 A
## 4 59 A
## 5 63 B
## 6 67 B
## 7 71 B
## 8 64 B
## 9 65 B
## 10 66 B
## 11 68 C
## 12 66 C
## 13 71 C
## 14 67 C
## 15 68 C
## 16 68 C
## 17 56 D
## 18 62 D
## 19 60 D
## 20 61 D
## 21 63 D
## 22 64 D
## 23 63 D
## 24 59 D
##
## Call:
## lm(formula = coag ~ 0 + diet)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.00 -1.25 0.00 1.25 5.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## dietA 61.000 1.183 51.5 <2e-16 ***
## dietB 66.000 0.966 68.3 <2e-16 ***
## dietC 68.000 0.966 70.4 <2e-16 ***
## dietD 61.000 0.837 72.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.37 on 20 degrees of freedom
## Multiple R-squared: 0.999, Adjusted R-squared: 0.999
## F-statistic: 4.4e+03 on 4 and 20 DF, p-value: <2e-16
## dietA dietB dietC dietD
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 0 1 0 0
## 6 0 1 0 0
## 7 0 1 0 0
## 8 0 1 0 0
## 9 0 1 0 0
## 10 0 1 0 0
## 11 0 0 1 0
## 12 0 0 1 0
## 13 0 0 1 0
## 14 0 0 1 0
## 15 0 0 1 0
## 16 0 0 1 0
## 17 0 0 0 1
## 18 0 0 0 1
## 19 0 0 0 1
## 20 0 0 0 1
## 21 0 0 0 1
## 22 0 0 0 1
## 23 0 0 0 1
## 24 0 0 0 1
This model is written as: \[ y_i = \mu_A I(x_i=A)+\mu_B I(x_i=B)+\mu_C I(x_i=C)+\mu_D I(x_i=D)+\epsilon_i \]
##
## Call:
## lm(formula = coag ~ diet)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.00 -1.25 0.00 1.25 5.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.10e+01 1.18e+00 51.55 < 2e-16 ***
## dietB 5.00e+00 1.53e+00 3.27 0.00380 **
## dietC 7.00e+00 1.53e+00 4.58 0.00018 ***
## dietD 2.99e-15 1.45e+00 0.00 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.37 on 20 degrees of freedom
## Multiple R-squared: 0.671, Adjusted R-squared: 0.621
## F-statistic: 13.6 on 3 and 20 DF, p-value: 4.66e-05
## X.Intercept. dietB dietC dietD
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 1 1 0 0
## 6 1 1 0 0
## 7 1 1 0 0
## 8 1 1 0 0
## 9 1 1 0 0
## 10 1 1 0 0
## 11 1 0 1 0
## 12 1 0 1 0
## 13 1 0 1 0
## 14 1 0 1 0
## 15 1 0 1 0
## 16 1 0 1 0
## 17 1 0 0 1
## 18 1 0 0 1
## 19 1 0 0 1
## 20 1 0 0 1
## 21 1 0 0 1
## 22 1 0 0 1
## 23 1 0 0 1
## 24 1 0 0 1
This model is written as: \[ y_i = \beta_0+\beta_B I(x_i=B)+\beta_CI(x_i=C)+\beta_D I(x_i=D)+\epsilon_i \] where, \(\beta_0=\mu_A, \beta_B=\mu_B-\mu_A, \beta_C=\mu_C-\mu_A, \beta_D=\mu_D-\mu_A\).
This section is for illustrating prediction and residuals
## [1] 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
## [24] 64
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 61 61 61 61 66 66 66 66 66 66 68 68 68 68 68 68 61 61 61 61 61 61 61 61
resid_h0 <- y - y_hat_h0 # residual (error) from h0 model
resid_h1 <- y - y_hat_h1 # residual (error) from h1 model
data.frame (diet, y=y, y_hat_h0, resid_h0, y_hat_h1, resid_h1, y_hat_h1-y_hat_h0 )
## diet y y_hat_h0 resid_h0 y_hat_h1 resid_h1 y_hat_h1...y_hat_h0
## 1 A 62 64 -2 61 1 -3
## 2 A 60 64 -4 61 -1 -3
## 3 A 63 64 -1 61 2 -3
## 4 A 59 64 -5 61 -2 -3
## 5 B 63 64 -1 66 -3 2
## 6 B 67 64 3 66 1 2
## 7 B 71 64 7 66 5 2
## 8 B 64 64 0 66 -2 2
## 9 B 65 64 1 66 -1 2
## 10 B 66 64 2 66 0 2
## 11 C 68 64 4 68 0 4
## 12 C 66 64 2 68 -2 4
## 13 C 71 64 7 68 3 4
## 14 C 67 64 3 68 -1 4
## 15 C 68 64 4 68 0 4
## 16 C 68 64 4 68 0 4
## 17 D 56 64 -8 61 -5 -3
## 18 D 62 64 -2 61 1 -3
## 19 D 60 64 -4 61 -1 -3
## 20 D 61 64 -3 61 0 -3
## 21 D 63 64 -1 61 2 -3
## 22 D 64 64 0 61 3 -3
## 23 D 63 64 -1 61 2 -3
## 24 D 59 64 -5 61 -2 -3
# Visulize the predictive values and residuals
plot (y, col = diet,pch = as.numeric(diet),
main="Coaglution time versus Run Order (Sample ID)")
lines (1:4,g$fitted.values[1:4], col = 1)
lines (5:10,g$fitted.values[5:10], col = 2)
lines (11:16,g$fitted.values[11:16], col = 3)
lines (17:24,g$fitted.values[17:24], col = 4)
abline(h=mean(y))
## [1] 340
## [1] 112
## [1] 228
## [1] 228
## A B C D
## 61 66 68 61
## [1] 64
## diet
## A B C D
## 4 6 6 8
n <- length (y)
SSy <- sum (y^2)
# the above values will be provided in test question
# compute SSt,SSe, SStr
SSt <- SSy-n*ybar_..^2; SSt
## [1] 340
## [1] 228
## [1] 112
## [1] 4
## [1] 24
df1 <- k - 1
df2 <- n - k
MStr <- SStr/df1
MSe <- SSe/df2
f <- MStr /MSe
pvalue <- 1-pf (f, df1=df1, df2=df2); pvalue
## [1] 4.658e-05
anova_table <- data.frame (Df = c(df1,df2),
SS = c(SStr,SSe),
MS = c(MStr, MSe),
F = c(f,NA),
pvalue = c(pvalue, NA));
row.names(anova_table) <- c("diet", "Residuals")
anova_table
## Df SS MS F pvalue
## diet 3 228 76.0 13.57 4.658e-05
## Residuals 20 112 5.6 NA NA
Compute ANOVA with the fitting result with intercept
## Analysis of Variance Table
##
## Response: coag
## Df Sum Sq Mean Sq F value Pr(>F)
## diet 3 228 76.0 13.6 4.7e-05 ***
## Residuals 20 112 5.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Do not compute ANOVA with the fitting result w/o intercept
## Analysis of Variance Table
##
## Response: coag
## Df Sum Sq Mean Sq F value Pr(>F)
## diet 4 98532 24633 4399 <2e-16 ***
## Residuals 20 112 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1