This file is a demonstration of Central Limit Theorem

1 Create a population

## generate a population data
X <- rgamma (100000, 10, .2)
mean (X)
## [1] 49.98061
sd (X)
## [1] 15.7762
hist (X, main = "Population Distribution")

2 Sampling distributions

sample (X, size = 10, replace = T)
##  [1] 38.14177 47.19855 45.38707 25.40296 52.05835 50.77913 83.93995
##  [8] 34.38013 48.90529 66.90812
mean(sample (X, size = 10, replace = T) )
## [1] 53.3466
sample (X, size = 10, replace = T)
##  [1] 60.70218 46.57367 42.17821 32.29061 63.89368 15.10524 48.40980
##  [8] 29.52234 58.08300 48.59329
mean(sample (X, size = 10, replace = T) )
## [1] 48.35281
sample (X, size = 10, replace = T)
##  [1] 69.55654 41.48991 55.78634 62.63542 52.70199 62.20427 48.79331
##  [8] 24.37126 52.05422 57.97560
mean(sample (X, size = 10, replace = T) )
## [1] 45.65534
sample (X, size = 10, replace = T)
##  [1] 53.50624 66.70686 72.39108 46.21347 25.13751 64.91788 93.81472
##  [8] 40.29587 45.38460 33.53177
mean(sample (X, size = 10, replace = T) )
## [1] 44.59794

sample size = 10

sample1_xbar <- replicate (10000, mean(sample (X, size = 10, replace = T) ) )
mean (sample1_xbar)
## [1] 50.04278
mean (X)
## [1] 49.98061
sd (sample1_xbar)
## [1] 5.01743
sd (X)
## [1] 15.7762
sd (X)/sqrt(10)
## [1] 4.988873
xlim <- range (sample1_xbar)
hist (sample1_xbar, main = "n = 10", xlim = xlim,
      probability = TRUE, nclass =20 )
x <- seq (min (sample1_xbar), max(sample1_xbar), length = 100)
lines (x, dnorm (x, mean = mean (X), sd = sd (X)/sqrt(10)))
abline (v = mean (X), col = "red")

sample size = 30

sample2_xbar <- replicate (10000, mean(sample (X, size = 30, replace = T) ) )
hist (sample2_xbar, main = "n = 30", xlim = xlim,
      probability = TRUE, nclass = 20 )
x <- seq (min (sample2_xbar), max(sample2_xbar), length = 100)
lines (x, dnorm (x, mean = mean (X), sd = sd (X)/sqrt(30)))
abline (v = mean (X), col = "red")

sample size = 100

sample3_xbar <- replicate (10000, mean(sample (X, size = 100, replace = T) ) )
hist (sample3_xbar, main = "n = 100", xlim = xlim,
      probability = TRUE, nclass = 20 )
x <- seq (min (sample3_xbar), max(sample3_xbar), length = 100)
lines (x, dnorm (x, mean = mean (X), sd = sd (X)/sqrt(100)))
abline (v = mean (X), col = "red")

plot comparison boxplot of sample means

allxbar <- data.frame (sample1_xbar, sample2_xbar, sample3_xbar)
colnames (allxbar) <- c("n = 10", "n = 30","n = 100" )
boxplot (allxbar)
abline (h = mean (X), col = "red")