1 Functions and packages for Analyzing Data

## data --- original data frame for holding data
## cname --- variable recording cluster (psu) identity
## csize --- variable recording cluster (psu) population size (not sample size)
## yvar --- variable of interest
## N --- total number of clusters (psus)
cluster_ratio_est <- function (data, cname, csize, yvar, N = Inf, show.cluster.summary=FALSE)
{
  clust <- data[,cname]
  ydata <- data [, yvar]  
  
  ## cluster-wise summary
  ybari <- tapply (ydata, clust, mean)
  Mi <- tapply (data [, csize], clust, function (x) x[1])
  ## the same as total in cluster if Mi = mi
  t_hat_cls <- ybari * Mi 
  
  if (show.cluster.summary == TRUE){
      cat ("Summary of Clusters:\n")
      print(data.frame (Mi=Mi, yhari=ybari, ti = t_hat_cls))
  }
  ## apply ratio estimate to t_hat_cls and Mi
  srs_ratio_est (t_hat_cls, Mi, N = N)
  
}

## ydata --- observations of the variable of interest
## xdata --- observations of the auxilliary variable
## N --- population size

## the output is the ratio of ybarU/xbarU
srs_ratio_est <- function (ydata, xdata, N = Inf)
{   
  n <- length (xdata)
  xbar <- mean (xdata)
  ybar <- mean (ydata)
  B_hat <- ybar / xbar
  d <- ydata - B_hat * xdata
  var_d <- sum (d^2) / (n - 1)
 
  sd_B_hat <- sqrt ((1 - n/N) * var_d / n) / xbar
  mem <- qt (0.975, df = n - 1) * sd_B_hat
  output <- c (B_hat, sd_B_hat, B_hat - mem, B_hat + mem )
  
  
  names (output) <- c("Est.", "S.E.", "ci.low", "ci.upp" )
  output
}


srs_mean_est <- function (sdata, N = Inf)
{
    n <- length (sdata)
    ybar <- mean (sdata)
    se.ybar <- sqrt((1 - n / N)) * sd (sdata) / sqrt(n)  
    mem <- qt (0.975, df = n - 1) * se.ybar
    c (Est. = ybar, S.E. = se.ybar, ci.low = ybar - mem, ci.upp = ybar + mem)
}

2 Estimating for One-stage cluster sampling: analysis of algebra.csv

Consider a population of 187 high school algebra classes in a city. An investigator takes an SRS of 12 of those classes and gives each student in the sampled classes a test about function knowledge.

algebra <- read.csv ("data/algebra.csv")

algebra
cluster_ratio_est (data = algebra, 
                   cname = "class", csize = "Mi", 
                   yvar = "score", N = 187, show.cluster.summary = TRUE)
## Summary of Clusters:
##     Mi    yhari   ti
## 23  20 61.50000 1230
## 37  26 64.23077 1670
## 38  24 58.41667 1402
## 39  34 58.00000 1972
## 41  26 58.00000 1508
## 44  28 64.85714 1816
## 46  19 55.15789 1048
## 51  32 72.12500 2308
## 58  17 58.17647  989
## 62  21 66.57143 1398
## 106 26 62.34615 1621
## 108 26 67.15385 1746
##      Est.      S.E.    ci.low    ci.upp 
## 62.568562  1.491578 59.285621 65.851503

3 Estimating for two-stage cluster sampling: analysis of coots.csv

coots <- read.csv ("data/coots.csv")
coots
cluster_ratio_est (data = coots, 
                   cname = "clutch", csize = "csize", 
                   yvar = "volume", show.cluster.summary = TRUE)
## Summary of Clusters:
##     Mi     yhari        ti
## 1   13 3.8643033 50.235943
## 2   13 4.1941828 54.524377
## 3    6 0.9162504  5.497502
## 4   11 2.9983346 32.981681
## 5   10 2.4957075 24.957075
## 6   13 3.9842595 51.795373
## 7    9 1.9270690 17.343621
## 8   11 2.9615264 32.576791
## 9   12 3.4605792 41.526950
## 10  11 2.9615264 32.576791
## 11  12 3.4989084 41.986901
## 12  11 2.9998683 32.998551
## 13  12 3.5664408 42.797290
## 14  11 2.9860652 32.846717
## 15  11 2.9829979 32.812977
## 16  10 2.4069825 24.069825
## 17   9 2.0102296 18.092067
## 18  10 2.4374025 24.374025
## 19  11 2.9262519 32.188771
## 20  11 2.9477233 32.424957
## 21  10 2.5375350 25.375350
## 22  13 4.2691555 55.499021
## 23  12 3.7653876 45.184651
## 24  10 2.5641525 25.641525
## 25   9 1.9619759 17.657783
## 26  12 3.4824816 41.789779
## 27  10 2.5742925 25.742925
## 28   9 1.9558159 17.602343
## 29  12 3.4076484 40.891781
## 30  11 2.9553917 32.509309
## 31  10 2.5045800 25.045800
## 32   8 1.5599376 12.479501
## 33  10 2.4919050 24.919050
## 34  10 2.4868350 24.868350
## 35   9 1.9866161 17.879545
## 36   9 1.9250156 17.325141
## 37   8 1.4974752 11.979802
## 38   9 1.9342557 17.408301
## 39   8 1.5648048 12.518438
## 40  10 2.4133200 24.133200
## 41  11 2.8188946 31.007841
## 42   7 1.2340760  8.638532
## 43   7 1.2607823  8.825476
## 44  11 3.0305418 33.335960
## 45  10 2.4716250 24.716250
## 46   9 2.0759368 18.683432
## 47  11 3.1593705 34.753076
## 48   9 1.9342557 17.408301
## 49   9 1.9147489 17.232740
## 50   8 1.5923856 12.739085
## 51  10 2.4703575 24.703575
## 52  11 3.0428112 33.470923
## 53   9 2.0615634 18.554071
## 54  10 2.4259950 24.259950
## 55   8 1.5769728 12.615782
## 56   9 1.9014021 17.112619
## 57  10 2.6364000 26.364000
## 58   6 0.8779212  5.267527
## 59   6 0.8783775  5.270265
## 60   6 0.9025614  5.415368
## 61   8 1.4942304 11.953843
## 62   8 1.5972528 12.778022
## 63   8 1.4974752 11.979802
## 64   9 2.0420566 18.378509
## 65   6 0.8135829  4.881497
## 66   7 1.2042644  8.429851
## 67   5 0.6600506  3.300253
## 68   9 2.0646434 18.581791
## 69   8 1.5477696 12.382157
## 70  12 3.5135100 42.162120
## 71  10 2.5235925 25.235925
## 72   9 1.9784027 17.805625
## 73   9 2.0974970 18.877473
## 74   8 1.5096432 12.077146
## 75  13 3.9169983 50.920978
## 76   9 2.0892836 18.803553
## 77  10 2.4792300 24.792300
## 78   8 1.5858960 12.687168
## 79  11 3.0357563 33.393319
## 80   8 1.5785952 12.628762
## 81   7 1.2173070  8.521149
## 82   7 1.1887375  8.321163
## 83   7 1.1539574  8.077701
## 84   5 0.5774730  2.887365
## 85   5 0.6492769  3.246384
## 86   9 2.1167985 19.051187
## 87  10 2.4779625 24.779625
## 88   9 2.3482947 21.134652
## 89  11 3.0152051 33.167256
## 90   9 1.9843574 17.859217
## 91   9 2.0340485 18.306437
## 92  10 2.4140805 24.140805
## 93   7 1.1835205  8.284644
## 94  10 2.5456470 25.456470
## 95   7 1.2266231  8.586362
## 96   6 0.8803852  5.282311
## 97   5 0.5941406  2.970703
## 98   9 1.9786081 17.807473
## 99  11 2.9879056 32.866962
## 100 10 2.3669295 23.669295
## 101 12 3.4302809 41.163371
## 102  5 0.6167655  3.083828
## 103  7 1.1348282  7.943798
## 104  7 1.1200467  7.840327
## 105  9 1.9874375 17.886937
## 106 11 3.0118310 33.130141
## 107  9 2.0927743 18.834969
## 108  9 1.9597172 17.637455
## 109 10 2.4110385 24.110385
## 110  9 1.9319970 17.387973
## 111 10 2.6004030 26.004030
## 112 12 3.6284976 43.541971
## 113  8 1.6486829 13.189463
## 114  8 1.6477094 13.181676
## 115 11 2.8701194 31.571313
## 116  9 2.1182359 19.064123
## 117 10 2.5818975 25.818975
## 118  8 1.6056893 12.845514
## 119 13 3.9602683 51.483487
## 120 11 3.1550762 34.705838
## 121  9 2.0642328 18.578095
## 122 12 3.6525902 43.831083
## 123 10 2.5998960 25.998960
## 124 11 2.9299327 32.229260
## 125 11 3.0572277 33.629505
## 126 10 2.5213110 25.213110
## 127 12 3.4970832 41.964998
## 128 11 2.9851450 32.836595
## 129  9 1.9324077 17.391669
## 130  9 2.1535535 19.381981
## 131 12 3.5416181 42.499417
## 132 10 2.6528775 26.528775
## 133 11 2.9106084 32.016693
## 134  8 1.5865450 12.692360
## 135 11 2.8753339 31.628673
## 136  9 2.0977024 18.879321
## 137 10 2.3616060 23.616060
## 138 10 2.6237250 26.237250
## 139  8 1.5711322 12.569057
## 140  9 2.0467793 18.421014
## 141 11 2.8725733 31.598306
## 142 11 2.8845359 31.729895
## 143 11 2.9385213 32.323734
## 144 12 3.7318039 44.781647
## 145 10 2.5793625 25.793625
## 146 10 2.6102895 26.102895
## 147  9 2.0381552 18.343397
## 148 10 2.6080080 26.080080
## 149 11 3.1047717 34.152488
## 150 11 2.9259452 32.185397
## 151  7 1.1738318  8.216822
## 152  8 1.5717811 12.574249
## 153  9 1.9714213 17.742792
## 154 10 2.4262485 24.262485
## 155  9 1.8248121 16.423309
## 156 10 2.5633920 25.633920
## 157 10 2.3400585 23.400585
## 158  9 1.9991416 17.992274
## 159  9 1.7527396 15.774656
## 160 11 2.9903595 32.893955
## 161  9 1.9424691 17.482222
## 162  8 1.5578285 12.462628
## 163  8 1.5970906 12.776724
## 164  7 1.1814089  8.269862
## 165 10 2.4366420 24.366420
## 166  8 1.6519277 13.215421
## 167  8 1.6050403 12.840323
## 168  6 0.8824842  5.294905
## 169 10 2.3854350 23.854350
## 170 10 2.4830325 24.830325
## 171 10 2.5147200 25.147200
## 172 11 2.9814642 32.796106
## 173 10 2.3461425 23.461425
## 174 12 3.6193716 43.432459
## 175 10 2.4336000 24.336000
## 176 12 3.7526112 45.031334
## 177 10 2.5273950 25.273950
## 178 11 3.1010909 34.111999
## 179  9 1.9404158 17.463742
## 180  9 1.9465758 17.519182
## 181 12 3.4532784 41.439341
## 182 13 4.2198877 54.858541
## 183 13 4.4148166 57.392615
## 184 12 3.4843068 41.811682
##      Est.      S.E.    ci.low    ci.upp 
## 2.4905786 0.0610403 2.3701453 2.6110118