require("knitr")
## Loading required package: knitr
knitr::opts_chunk$set(
    comment = "#",
    fig.width=8, 
    fig.height=6, 
    cache = TRUE
)

1 Input data

issu <- data.frame (
    driving = c(5, 2, 12, 9, 15, 6, 25, 16),
    premium = c(64, 87, 50, 71, 44, 56, 42, 60)
)

y <- issu$premium
x <- issu$driving
xbar <- mean(x)
ybar <- mean(y)
n <- length(y)
plot(x,y, xlab = "Driving", ylab = "Premium")

2 Estimating regression coefficients

fit.issu <- lm(y~x)
plot(x,y)
abline (fit.issu)

3 Sum Squares of Residuals

beta0 <- fit.issu$coefficients[1]
beta1 <- fit.issu$coefficients[2]
fitted1 <- beta0+beta1*x
fitted0 <- rep(ybar, n)
residual1 <- y - fitted1
residual0 <- y - fitted0

data.frame (y, fitted0, residual0, fitted1, residual1, diff.residual=fitted1-fitted0)
#    y fitted0 residual0  fitted1  residual1 diff.residual
# 1 64   59.25      4.75 68.92243  -4.922425      9.672425
# 2 87   59.25     27.75 73.56519  13.434811     14.315189
# 3 50   59.25     -9.25 58.08931  -8.089309     -1.160691
# 4 71   59.25     11.75 62.73207   8.267927      3.482073
# 5 44   59.25    -15.25 53.44654  -9.446545     -5.803455
# 6 56   59.25     -3.25 67.37484 -11.374837      8.124837
# 7 42   59.25    -17.25 37.97066   4.029335    -21.279335
# 8 60   59.25      0.75 51.89896   8.101043     -7.351043

Visualize the fitted line, fitted values, and residuals

Definitions of SSR, SSE, SST:

SST <- sum((y-fitted0)^2); SST
# [1] 1557.5
SSE <- sum ((y-fitted1)^2);SSE
# [1] 639.0065
SSR <- SST-SSE;SSR
# [1] 918.4935

SSR can be computed directly with

sum((fitted1-fitted0)^2)
# [1] 918.4935

Plot of Residual Sum Squares

# --- Assumed Setup Code (from your previous file) ---
# This data and model fit are needed for the calculations
# --- Assumed Setup Code (from your previous file) ---
# This data and model fit are needed for the calculations
issu <- data.frame (
    driving = c(5, 2, 12, 9, 15, 6, 25, 16),
    premium = c(64, 87, 50, 71, 44, 56, 42, 60)
)
y <- issu$premium
x <- issu$driving
n <- length(y)
fit.issu <- lm(y~x)
# --- End of Setup ---


# --- Calculations for ANOVA components ---
SST <- sum((y - mean(y))^2)
SSE <- sum(resid(fit.issu)^2)
SSR <- SST - SSE
df_SSR <- 1 # For simple linear regression (change from 1 to 2 parameters)
df_SSE <- n - 2


# --- Create the Plot ---
# Set plot margins to make extra space at the bottom for the df segments
par(mar = c(6, 4, 4, 2) + 0.1)

# Create the main plot, expanding x-axis to 14
plot(c(1, 2, n), c(SST, SSE, 0), type="b", pch = 19,
     xlab = "Number of Parameters in Model",
     ylab = "Residual Sum of Squares (RSS)",
     main = "Visualizing ANOVA on the RSS Plot",
     xlim = c(0, 14), # Expanded x-axis limit
     ylim = c(-400, SST * 1.1), # Extend y-axis below 0
     xaxt = "n" # Turn off default x-axis to draw a custom one
     )
axis(1, at = c(1, 2, n), labels = c("1 (Intercept)", "2 (+ Slope)", paste(n, "(Saturated)")))
abline(h=seq(0, 2000, by=100), lty=3, col="grey")


# --- Allow drawing outside the main plot area ---
par(xpd=TRUE)

# --- Draw Repositioned Segments ---
# 1. Show SSE (blue) - at x=9
arrows(x0 = 9, y0 = 0, x1 = 9, y1 = SSE, col = "blue", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 9, y = SSE / 2, "SSE", col = "blue", pos = 4, font = 2, cex = 1.2)

# 2. Show SSR (red) - at x=9
arrows(x0 = 9, y0 = SSE, x1 = 9, y1 = SST, col = "red", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 9, y = (SST + SSE) / 2, "SSR", col = "red", pos = 4, font = 2, cex = 1.2)

# 3. Show df_SSE (blue) - below y=0
arrows(x0 = 2, y0 = -200, x1 = n, y1 = -200, col = "blue", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = (2 + n) / 2, y = -250, paste("df_SSE =", df_SSE), col = "blue", font = 2)

# 4. Show df_SSR (red) - below y=0
arrows(x0 = 1, y0 = -200, x1 = 2, y1 = -200, col = "red", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 1.5, y = -250, paste("df_SSR =", df_SSR), col = "red", font = 2)


# --- NEW CODE: Add ANOVA F and p-value to top-right corner ---
# 1. Calculate the F-statistic and p-value
f_value <- (SSR/df_SSR) / (SSE/df_SSE)
p_value <- pf(f_value, df1=df_SSR, df2=df_SSE, lower.tail = FALSE)

# 2. Format the text strings for display
f_stat_text <- sprintf("F-statistic: %.2f", f_value)
p_val_text  <- sprintf("p-value: %.3f", p_value)

# 3. Use legend() to create an info box on the plot
legend(
  "topright", # Position of the box
  legend = c(f_stat_text, p_val_text),
  title = "ANOVA Results",
  bty = "o",  # Draw a box around the legend
  cex = 0.9   # Adjust character size
)

# Reset clipping to default for any subsequent plots
par(xpd=FALSE)

Coefficient of Determination: \(R^2\)

R2 <- SSR/SST; R2
# [1] 0.5897229

Mean Sum Squares and F statististic

f <- (SSR/1)/(SSE/(n-2)); f
# [1] 8.624264

p-value to assess whether the relationship exists (statistical significance measure)

pvf <- pf(f, df1=1, df2=n-2, lower.tail = FALSE); pvf
# [1] 0.0260588

ANOVA table

Ftable <- data.frame(df = c(1,n-2), SS=c(SSR,SSE), 
                     MSS = c(SSR/1, SSE/(n-2)), Fstat=c(f,NA),
                     p.value = c(pvf, NA),
                     R2 = c(SSR, SSE)/SST)
Ftable
#   df       SS      MSS    Fstat   p.value        R2
# 1  1 918.4935 918.4935 8.624264 0.0260588 0.5897229
# 2  6 639.0065 106.5011       NA        NA 0.4102771

4 A single function to produce ANOVA table

fit.issu <- lm(y~x)
anova(fit.issu)
# Analysis of Variance Table
# 
# Response: y
#           Df Sum Sq Mean Sq F value  Pr(>F)  
# x          1 918.49  918.49  8.6243 0.02606 *
# Residuals  6 639.01  106.50                  
# ---
# Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5 Animation for Understanding the Sampling Distributions of SS and F

5.1 When the slope is 0 (\(H_0\) is true)

# Get parameters from the original fit to use in the simulation
# Get parameters from the original fit to use in the simulation
original_fit <- lm(y~x)
intercept_H0 <- coef(original_fit)[1] # beta_0
model_sigma <- sigma(original_fit)   # sigma

# Loop to generate frames for an animation
for (n.sim in 1:50) {
  # Simulate a dataset where H0 is true (slope = 0)
  sim.y <- intercept_H0 + rnorm(n, 0, model_sigma)
  
  # --- Plotting Section ---
  # Set up a 1x2 plot layout
  par(mfrow=c(1,2))
  
  # --- Left Plot: Scatterplot ---
  fit.sim.y <- lm(sim.y~x)
  plot(sim.y~x, ylim=c(0,100), main="Data Simulated Under H0 (Slope=0)") 
  abline(fit.sim.y,col="red", lwd=2)   # Fitted line for simulated data
  abline(h=mean(sim.y),col="blue") # Null model line
  
  # --- Right Plot: RSS Plot with ANOVA Info ---
  anova.fit.sim.y <- anova(fit.sim.y)
  ss.sim.y <- anova.fit.sim.y$`Sum Sq`
  rss.sim.y <- rev(cumsum(c(0, rev(ss.sim.y))))
  num.par <- cumsum(c(1,anova.fit.sim.y$Df))
  plot(rss.sim.y~num.par,xlab="Number of Parameters", ylab = "Residual Sum Squares", type ="b")
  abline(v=1:25, h=(0:50)*100,lty=3,col="grey")
  
  # --- NEW CODE: Add ANOVA table info to the right plot ---
  # 1. Extract the F-statistic and p-value
  f_value <- anova.fit.sim.y$"F value"[1]
  p_value <- anova.fit.sim.y$"Pr(>F)"[1]
  
  # 2. Format the text strings for display
  f_stat_text <- sprintf("F-statistic: %.2f", f_value)
  p_val_text  <- sprintf("p-value: %.3f", p_value)
  
  # 3. Use legend() to create an info box on the plot
  legend(
    "topright", # Position of the box
    legend = c(f_stat_text, p_val_text),
    title = "ANOVA Results",
    bty = "o", # Type of box ("o" for a full box, "n" for no box)
    cex = 0.9  # Character size
  )
}

5.2 When the slope is non-zero (\(H_1\) is true)

# --- Assumed Setup Code (from your previous file) ---

# Get parameters from the original fit to use in the simulation
original_fit <- lm(y~x)
intercept_HA <- coef(original_fit)[1] # beta_0
model_sigma <- sigma(original_fit)   # sigma
slope_HA <- -2                       # A specified non-zero slope for the alternative hypothesis

# Loop to generate frames for an animation
for (n.sim in 1:50) {
  # Simulate a dataset where H0 is false (true slope = -2)
  mean_y <- intercept_HA + slope_HA * x
  sim.y <- mean_y + rnorm(n, 0, model_sigma)

  # --- Plotting Section ---
  # Set up a 1x2 plot layout
  par(mfrow=c(1,2))

  # --- Left Plot: Scatterplot ---
  fit.sim.y <- lm(sim.y~x)
  plot(sim.y~x, ylim=c(0,80), main="Data Simulated Under HA (Slope=-2)") 
  abline(fit.sim.y,col="red", lwd=2)   # Fitted line for simulated data
  abline(h=mean(sim.y),col="blue") # Null model line

  # --- Right Plot: RSS Plot with ANOVA Info ---
  anova.fit.sim.y <- anova(fit.sim.y)
  ss.sim.y <- anova.fit.sim.y$`Sum Sq`
  rss.sim.y <- rev(cumsum(c(0, rev(ss.sim.y))))
  num.par <- cumsum(c(1,anova.fit.sim.y$Df))
  plot(rss.sim.y~num.par,xlab="Number of Parameters", ylab = "Residual Sum Squares", type ="b")
  abline(v=1:25, h=(0:50)*100,lty=3,col="grey")
  
  # --- NEW CODE: Add ANOVA table info to the right plot ---
  # 1. Extract the F-statistic and p-value
  f_value <- anova.fit.sim.y$"F value"[1]
  p_value <- anova.fit.sim.y$"Pr(>F)"[1]
  
  # 2. Format the text strings for display
  f_stat_text <- sprintf("F-statistic: %.2f", f_value)
  # Use scientific notation for potentially very small p-values
  p_val_text  <- sprintf("p-value: %.3e", p_value) 
  
  # 3. Use legend() to create an info box on the plot
  legend(
    "topright", # Position of the box
    legend = c(f_stat_text, p_val_text),
    title = "ANOVA Results",
    bty = "o", # Type of box
    cex = 0.9  # Character size
  )
}