## Loading required package: knitr
issu <- data.frame (
driving = c(5, 2, 12, 9, 15, 6, 25, 16),
premium = c(64, 87, 50, 71, 44, 56, 42, 60)
)
y <- issu$premium
x <- issu$driving
xbar <- mean(x)
ybar <- mean(y)
n <- length(y)
plot(x,y, xlab = "Driving", ylab = "Premium")
beta0 <- fit.issu$coefficients[1]
beta1 <- fit.issu$coefficients[2]
fitted1 <- beta0+beta1*x
fitted0 <- rep(ybar, n)
residual1 <- y - fitted1
residual0 <- y - fitted0
data.frame (y, fitted0, residual0, fitted1, residual1, diff.residual=fitted1-fitted0)
# y fitted0 residual0 fitted1 residual1 diff.residual
# 1 64 59.25 4.75 68.92243 -4.922425 9.672425
# 2 87 59.25 27.75 73.56519 13.434811 14.315189
# 3 50 59.25 -9.25 58.08931 -8.089309 -1.160691
# 4 71 59.25 11.75 62.73207 8.267927 3.482073
# 5 44 59.25 -15.25 53.44654 -9.446545 -5.803455
# 6 56 59.25 -3.25 67.37484 -11.374837 8.124837
# 7 42 59.25 -17.25 37.97066 4.029335 -21.279335
# 8 60 59.25 0.75 51.89896 8.101043 -7.351043
Visualize the fitted line, fitted values, and residuals
Definitions of SSR, SSE, SST:
# [1] 1557.5
# [1] 639.0065
# [1] 918.4935
SSR can be computed directly with
# [1] 918.4935
Plot of Residual Sum Squares
# --- Assumed Setup Code (from your previous file) ---
# This data and model fit are needed for the calculations
# --- Assumed Setup Code (from your previous file) ---
# This data and model fit are needed for the calculations
issu <- data.frame (
driving = c(5, 2, 12, 9, 15, 6, 25, 16),
premium = c(64, 87, 50, 71, 44, 56, 42, 60)
)
y <- issu$premium
x <- issu$driving
n <- length(y)
fit.issu <- lm(y~x)
# --- End of Setup ---
# --- Calculations for ANOVA components ---
SST <- sum((y - mean(y))^2)
SSE <- sum(resid(fit.issu)^2)
SSR <- SST - SSE
df_SSR <- 1 # For simple linear regression (change from 1 to 2 parameters)
df_SSE <- n - 2
# --- Create the Plot ---
# Set plot margins to make extra space at the bottom for the df segments
par(mar = c(6, 4, 4, 2) + 0.1)
# Create the main plot, expanding x-axis to 14
plot(c(1, 2, n), c(SST, SSE, 0), type="b", pch = 19,
xlab = "Number of Parameters in Model",
ylab = "Residual Sum of Squares (RSS)",
main = "Visualizing ANOVA on the RSS Plot",
xlim = c(0, 14), # Expanded x-axis limit
ylim = c(-400, SST * 1.1), # Extend y-axis below 0
xaxt = "n" # Turn off default x-axis to draw a custom one
)
axis(1, at = c(1, 2, n), labels = c("1 (Intercept)", "2 (+ Slope)", paste(n, "(Saturated)")))
abline(h=seq(0, 2000, by=100), lty=3, col="grey")
# --- Allow drawing outside the main plot area ---
par(xpd=TRUE)
# --- Draw Repositioned Segments ---
# 1. Show SSE (blue) - at x=9
arrows(x0 = 9, y0 = 0, x1 = 9, y1 = SSE, col = "blue", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 9, y = SSE / 2, "SSE", col = "blue", pos = 4, font = 2, cex = 1.2)
# 2. Show SSR (red) - at x=9
arrows(x0 = 9, y0 = SSE, x1 = 9, y1 = SST, col = "red", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 9, y = (SST + SSE) / 2, "SSR", col = "red", pos = 4, font = 2, cex = 1.2)
# 3. Show df_SSE (blue) - below y=0
arrows(x0 = 2, y0 = -200, x1 = n, y1 = -200, col = "blue", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = (2 + n) / 2, y = -250, paste("df_SSE =", df_SSE), col = "blue", font = 2)
# 4. Show df_SSR (red) - below y=0
arrows(x0 = 1, y0 = -200, x1 = 2, y1 = -200, col = "red", code = 3, angle = 90, length = 0.1, lwd = 2)
text(x = 1.5, y = -250, paste("df_SSR =", df_SSR), col = "red", font = 2)
# --- NEW CODE: Add ANOVA F and p-value to top-right corner ---
# 1. Calculate the F-statistic and p-value
f_value <- (SSR/df_SSR) / (SSE/df_SSE)
p_value <- pf(f_value, df1=df_SSR, df2=df_SSE, lower.tail = FALSE)
# 2. Format the text strings for display
f_stat_text <- sprintf("F-statistic: %.2f", f_value)
p_val_text <- sprintf("p-value: %.3f", p_value)
# 3. Use legend() to create an info box on the plot
legend(
"topright", # Position of the box
legend = c(f_stat_text, p_val_text),
title = "ANOVA Results",
bty = "o", # Draw a box around the legend
cex = 0.9 # Adjust character size
)
Coefficient of Determination: \(R^2\)
# [1] 0.5897229
Mean Sum Squares and F statististic
# [1] 8.624264
p-value to assess whether the relationship exists (statistical significance measure)
# [1] 0.0260588
ANOVA table
Ftable <- data.frame(df = c(1,n-2), SS=c(SSR,SSE),
MSS = c(SSR/1, SSE/(n-2)), Fstat=c(f,NA),
p.value = c(pvf, NA),
R2 = c(SSR, SSE)/SST)
Ftable
# df SS MSS Fstat p.value R2
# 1 1 918.4935 918.4935 8.624264 0.0260588 0.5897229
# 2 6 639.0065 106.5011 NA NA 0.4102771
# Analysis of Variance Table
#
# Response: y
# Df Sum Sq Mean Sq F value Pr(>F)
# x 1 918.49 918.49 8.6243 0.02606 *
# Residuals 6 639.01 106.50
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Get parameters from the original fit to use in the simulation
# Get parameters from the original fit to use in the simulation
original_fit <- lm(y~x)
intercept_H0 <- coef(original_fit)[1] # beta_0
model_sigma <- sigma(original_fit) # sigma
# Loop to generate frames for an animation
for (n.sim in 1:50) {
# Simulate a dataset where H0 is true (slope = 0)
sim.y <- intercept_H0 + rnorm(n, 0, model_sigma)
# --- Plotting Section ---
# Set up a 1x2 plot layout
par(mfrow=c(1,2))
# --- Left Plot: Scatterplot ---
fit.sim.y <- lm(sim.y~x)
plot(sim.y~x, ylim=c(0,100), main="Data Simulated Under H0 (Slope=0)")
abline(fit.sim.y,col="red", lwd=2) # Fitted line for simulated data
abline(h=mean(sim.y),col="blue") # Null model line
# --- Right Plot: RSS Plot with ANOVA Info ---
anova.fit.sim.y <- anova(fit.sim.y)
ss.sim.y <- anova.fit.sim.y$`Sum Sq`
rss.sim.y <- rev(cumsum(c(0, rev(ss.sim.y))))
num.par <- cumsum(c(1,anova.fit.sim.y$Df))
plot(rss.sim.y~num.par,xlab="Number of Parameters", ylab = "Residual Sum Squares", type ="b")
abline(v=1:25, h=(0:50)*100,lty=3,col="grey")
# --- NEW CODE: Add ANOVA table info to the right plot ---
# 1. Extract the F-statistic and p-value
f_value <- anova.fit.sim.y$"F value"[1]
p_value <- anova.fit.sim.y$"Pr(>F)"[1]
# 2. Format the text strings for display
f_stat_text <- sprintf("F-statistic: %.2f", f_value)
p_val_text <- sprintf("p-value: %.3f", p_value)
# 3. Use legend() to create an info box on the plot
legend(
"topright", # Position of the box
legend = c(f_stat_text, p_val_text),
title = "ANOVA Results",
bty = "o", # Type of box ("o" for a full box, "n" for no box)
cex = 0.9 # Character size
)
}
# --- Assumed Setup Code (from your previous file) ---
# Get parameters from the original fit to use in the simulation
original_fit <- lm(y~x)
intercept_HA <- coef(original_fit)[1] # beta_0
model_sigma <- sigma(original_fit) # sigma
slope_HA <- -2 # A specified non-zero slope for the alternative hypothesis
# Loop to generate frames for an animation
for (n.sim in 1:50) {
# Simulate a dataset where H0 is false (true slope = -2)
mean_y <- intercept_HA + slope_HA * x
sim.y <- mean_y + rnorm(n, 0, model_sigma)
# --- Plotting Section ---
# Set up a 1x2 plot layout
par(mfrow=c(1,2))
# --- Left Plot: Scatterplot ---
fit.sim.y <- lm(sim.y~x)
plot(sim.y~x, ylim=c(0,80), main="Data Simulated Under HA (Slope=-2)")
abline(fit.sim.y,col="red", lwd=2) # Fitted line for simulated data
abline(h=mean(sim.y),col="blue") # Null model line
# --- Right Plot: RSS Plot with ANOVA Info ---
anova.fit.sim.y <- anova(fit.sim.y)
ss.sim.y <- anova.fit.sim.y$`Sum Sq`
rss.sim.y <- rev(cumsum(c(0, rev(ss.sim.y))))
num.par <- cumsum(c(1,anova.fit.sim.y$Df))
plot(rss.sim.y~num.par,xlab="Number of Parameters", ylab = "Residual Sum Squares", type ="b")
abline(v=1:25, h=(0:50)*100,lty=3,col="grey")
# --- NEW CODE: Add ANOVA table info to the right plot ---
# 1. Extract the F-statistic and p-value
f_value <- anova.fit.sim.y$"F value"[1]
p_value <- anova.fit.sim.y$"Pr(>F)"[1]
# 2. Format the text strings for display
f_stat_text <- sprintf("F-statistic: %.2f", f_value)
# Use scientific notation for potentially very small p-values
p_val_text <- sprintf("p-value: %.3e", p_value)
# 3. Use legend() to create an info box on the plot
legend(
"topright", # Position of the box
legend = c(f_stat_text, p_val_text),
title = "ANOVA Results",
bty = "o", # Type of box
cex = 0.9 # Character size
)
}