1 Simulating Confidence Intervals

1.1 Generate a population data

X <- rgamma (1000000, 10, .08)
# use another sample
# X <- rbinom (100000, 1, 0.3)

mu <- mean (X) # population mean
mu
## [1] 125.0336
sigma <- sd (X) # population sd
sigma
## [1] 39.5123
hist (X, main = "Population Distribution")
abline(v=mu,col="green")

1.2 Draw sample of sample mean

n <- 30
n_rep <- 2000
CI_rep <- matrix (0, n_rep, 3) 

xbar <- replicate (n_rep, mean(sample (X, size = n, replace = T) ) )
data.frame(xbar = xbar[1:10])
##        xbar
## 1  142.0334
## 2  129.7287
## 3  121.2783
## 4  132.5191
## 5  119.7230
## 6  140.3745
## 7  133.9152
## 8  126.1130
## 9  113.6670
## 10 111.3840

Look at the distribution of sample mean in comparison with the population

xbar.hist <- hist (xbar, nclass=20, xlim = range (X))
abline(v=mu,col="green")

1.3 Find 95% confidence intervals for each sample mean

# find margin error (me)
alpha <- 0.05
z <- - qnorm (alpha/2) # or using z <- qnorm (alpha/2, lower = F)
me <- z * sigma/sqrt (n)

# find a CI with the first sample
ci <- c(xbar[1] - me, xbar[1] + me)
mu.is.covered <- ci[1] < mu & mu < ci[2] 

# repeat n_rep times
for (i in 1:n_rep)
{
    CI_rep [i,1:2] <- c(xbar[i] - me, xbar[i] + me)
    CI_rep [i, 3] <- CI_rep [i,1] < mu & mu < CI_rep [i,2] 
}
library("rmarkdown")
paged_table(
round(
  data.frame("true mean"= mu,
             "sample mean"=xbar[1:50], 
             "lower bound" = CI_rep[1:50,1],
             "upper bound" = CI_rep[1:50,2],
             "true mean covered"=CI_rep[1:50,3])
,2)
)

1.4 Plot CI

n_plot <- 50

plot (0,0,type = "n", ylab = "Replication ID", xlab = "x",  
      xlim = c(90, 160), #xlim = range (CI_rep[,1:2]),
      ylim = c(0,n_plot+1))
title (main = "Illustration of Confidence Intervals")
abline (v = mu, col = "green", lwd = 3)
abline (v = mu + c(-me, +me), col = "green")
for (i in 1:n_plot)
{
    lines (CI_rep[i,1:2], c(i,i), col =  2 - CI_rep[i, 3] )
    points (xbar[i], i, pch = 4, col = 2 - CI_rep[i,3])
}

## actual cover rate
mean (CI_rep [,3])
## [1] 0.953

2 Finding Confidence Interval for a Population Mean with t Quantile

survey <- read.csv("survey.csv")
colnames(survey)
##  [1] "Sex"    "Wr.Hnd" "NW.Hnd" "W.Hnd"  "Fold"   "Pulse"  "Clap"   "Exer"  
##  [9] "Smoke"  "Height" "M.I"    "Age"
t.test(survey$Height) # for two-sided symmetric CI
## 
##  One Sample t-test
## 
## data:  survey$Height
## t = 253.07, df = 208, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  171.0380 173.7237
## sample estimates:
## mean of x 
##  172.3809
# alternatively
confint (lm(survey$Height~1))
##               2.5 %   97.5 %
## (Intercept) 171.038 173.7237

3 Finding Confidence Interval for a Population Proportion with Z Quantile

survey <- read.csv("survey.csv")
freq.sex <- table(survey$Sex); freq.sex
## 
## Female   Male 
##    118    118
# Confidence interval for probability of female
prop.test (freq.sex[1], sum(freq.sex), conf.level = 0.95, correct = FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  freq.sex[1] out of sum(freq.sex), null probability 0.5
## X-squared = 0, df = 1, p-value = 1
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.4367215 0.5632785
## sample estimates:
##   p 
## 0.5
# conputing it directly
p.female <- freq.sex[1]/sum(freq.sex)
ME <- sqrt(p.female*(1 - p.female)/sum(freq.sex))*qnorm(0.975)
# CI 
p.female + c(-ME, ME)
##    Female    Female 
## 0.4362086 0.5637914

Note that for 95% CI, should use 0.975 lower tail quantile or 0.025 upper tail quantile of standard normal

4 Margin Errors of Proportions

p<- seq (0,1, by=0.05)

n<- 1200

data.frame(p=p, me=sqrt(p*(1-p)/n)*1.96)
##       p         me
## 1  0.00 0.00000000
## 2  0.05 0.01233140
## 3  0.10 0.01697410
## 4  0.15 0.02020322
## 5  0.20 0.02263213
## 6  0.25 0.02450000
## 7  0.30 0.02592836
## 8  0.35 0.02698710
## 9  0.40 0.02771859
## 10 0.45 0.02814836
## 11 0.50 0.02829016
## 12 0.55 0.02814836
## 13 0.60 0.02771859
## 14 0.65 0.02698710
## 15 0.70 0.02592836
## 16 0.75 0.02450000
## 17 0.80 0.02263213
## 18 0.85 0.02020322
## 19 0.90 0.01697410
## 20 0.95 0.01233140
## 21 1.00 0.00000000
n<- 2070

data.frame(p=p, me=sqrt(p*(1-p)/n)*1.96)
##       p          me
## 1  0.00 0.000000000
## 2  0.05 0.009388965
## 3  0.10 0.012923857
## 4  0.15 0.015382467
## 5  0.20 0.017231810
## 6  0.25 0.018653981
## 7  0.30 0.019741518
## 8  0.35 0.020547623
## 9  0.40 0.021104571
## 10 0.45 0.021431793
## 11 0.50 0.021539762
## 12 0.55 0.021431793
## 13 0.60 0.021104571
## 14 0.65 0.020547623
## 15 0.70 0.019741518
## 16 0.75 0.018653981
## 17 0.80 0.017231810
## 18 0.85 0.015382467
## 19 0.90 0.012923857
## 20 0.95 0.009388965
## 21 1.00 0.000000000