X <- rgamma (1000000, 10, .08)
# use another sample
# X <- rbinom (100000, 1, 0.3)
mu <- mean (X) # population mean
mu
## [1] 125.0336
## [1] 39.5123
n <- 30
n_rep <- 2000
CI_rep <- matrix (0, n_rep, 3)
xbar <- replicate (n_rep, mean(sample (X, size = n, replace = T) ) )
data.frame(xbar = xbar[1:10])
## xbar
## 1 142.0334
## 2 129.7287
## 3 121.2783
## 4 132.5191
## 5 119.7230
## 6 140.3745
## 7 133.9152
## 8 126.1130
## 9 113.6670
## 10 111.3840
Look at the distribution of sample mean in comparison with the population
# find margin error (me)
alpha <- 0.05
z <- - qnorm (alpha/2) # or using z <- qnorm (alpha/2, lower = F)
me <- z * sigma/sqrt (n)
# find a CI with the first sample
ci <- c(xbar[1] - me, xbar[1] + me)
mu.is.covered <- ci[1] < mu & mu < ci[2]
# repeat n_rep times
for (i in 1:n_rep)
{
CI_rep [i,1:2] <- c(xbar[i] - me, xbar[i] + me)
CI_rep [i, 3] <- CI_rep [i,1] < mu & mu < CI_rep [i,2]
}
library("rmarkdown")
paged_table(
round(
data.frame("true mean"= mu,
"sample mean"=xbar[1:50],
"lower bound" = CI_rep[1:50,1],
"upper bound" = CI_rep[1:50,2],
"true mean covered"=CI_rep[1:50,3])
,2)
)
n_plot <- 50
plot (0,0,type = "n", ylab = "Replication ID", xlab = "x",
xlim = c(90, 160), #xlim = range (CI_rep[,1:2]),
ylim = c(0,n_plot+1))
title (main = "Illustration of Confidence Intervals")
abline (v = mu, col = "green", lwd = 3)
abline (v = mu + c(-me, +me), col = "green")
for (i in 1:n_plot)
{
lines (CI_rep[i,1:2], c(i,i), col = 2 - CI_rep[i, 3] )
points (xbar[i], i, pch = 4, col = 2 - CI_rep[i,3])
}
## [1] 0.953
## [1] "Sex" "Wr.Hnd" "NW.Hnd" "W.Hnd" "Fold" "Pulse" "Clap" "Exer"
## [9] "Smoke" "Height" "M.I" "Age"
##
## One Sample t-test
##
## data: survey$Height
## t = 253.07, df = 208, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 171.0380 173.7237
## sample estimates:
## mean of x
## 172.3809
## 2.5 % 97.5 %
## (Intercept) 171.038 173.7237
##
## Female Male
## 118 118
# Confidence interval for probability of female
prop.test (freq.sex[1], sum(freq.sex), conf.level = 0.95, correct = FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: freq.sex[1] out of sum(freq.sex), null probability 0.5
## X-squared = 0, df = 1, p-value = 1
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.4367215 0.5632785
## sample estimates:
## p
## 0.5
# conputing it directly
p.female <- freq.sex[1]/sum(freq.sex)
ME <- sqrt(p.female*(1 - p.female)/sum(freq.sex))*qnorm(0.975)
# CI
p.female + c(-ME, ME)
## Female Female
## 0.4362086 0.5637914
Note that for 95% CI, should use 0.975 lower tail quantile or 0.025 upper tail quantile of standard normal
## p me
## 1 0.00 0.00000000
## 2 0.05 0.01233140
## 3 0.10 0.01697410
## 4 0.15 0.02020322
## 5 0.20 0.02263213
## 6 0.25 0.02450000
## 7 0.30 0.02592836
## 8 0.35 0.02698710
## 9 0.40 0.02771859
## 10 0.45 0.02814836
## 11 0.50 0.02829016
## 12 0.55 0.02814836
## 13 0.60 0.02771859
## 14 0.65 0.02698710
## 15 0.70 0.02592836
## 16 0.75 0.02450000
## 17 0.80 0.02263213
## 18 0.85 0.02020322
## 19 0.90 0.01697410
## 20 0.95 0.01233140
## 21 1.00 0.00000000
## p me
## 1 0.00 0.000000000
## 2 0.05 0.009388965
## 3 0.10 0.012923857
## 4 0.15 0.015382467
## 5 0.20 0.017231810
## 6 0.25 0.018653981
## 7 0.30 0.019741518
## 8 0.35 0.020547623
## 9 0.40 0.021104571
## 10 0.45 0.021431793
## 11 0.50 0.021539762
## 12 0.55 0.021431793
## 13 0.60 0.021104571
## 14 0.65 0.020547623
## 15 0.70 0.019741518
## 16 0.75 0.018653981
## 17 0.80 0.017231810
## 18 0.85 0.015382467
## 19 0.90 0.012923857
## 20 0.95 0.009388965
## 21 1.00 0.000000000