## authors: Longhai Li, Shi Qiu, Cindy Feng, 2016
## accompanying publication: Longhai Li, Cindy X. Feng, and Shi Qiu*, (2017+), Estimating Cross-validatory Predictive P-values with Integrated Importance Sampling for Disease Mapping Models. arXiv:1603.07668, Statistics in Medicine, available from the following URL:
## http://onlinelibrary.wiley.com/doi/10.1002/sim.7278/full


## load R package "R2OpenBUGS"
library("R2OpenBUGS")

##################################################################################
################### OpenBUGS fitting for Germany larynx data #####################
##################################################################################

## Load data for MCMC simulation
##################################################################################
## Germany dataset is available from 
## http://math.usask.ca/longhai/software/dmpvalues/larynxdata.zip
## download the above zip file and unzip them into directory called larynxdata. 
##################################################################################

O = dget ("larynxdata/Y.txt")
E = dget ("larynxdata/E.txt")
X= dget ("larynxdata/X.txt")
adj = dget ("larynxdata/adj.txt")
num = dget("larynxdata/num.txt")
N <- length (O)
sumNumNeigh <- sum (num)

data <- list(N=N,sumNumNeigh=sumNumNeigh,O=O,E=E,X=X,adj=adj,num=num)


## define BUGS model
## alpha --- intercept.
## prec ---  a scalar, inverse variance,ie, 1/tau^2 in the paper
## beta --- a latent variable for linear effect with covariate X.
## theta --- the linear predictor
## gamma --- spatial strength, ie, phi in the paper
model <- '
model {
    for(i in 1 : N)  m[i] <- 1/E[i] 
    cumsum[1] <- 0
    for(i in 2:(N+1)) cumsum[i] <- sum(num[1:(i-1)])
    
    for(k in 1 : sumNumNeigh) {
        for(i in 1:N) {
            pick[k,i] <- step(k - cumsum[i] - epsilon) * step(cumsum[i+1] - k)
        }
        C[k] <- sqrt(E[adj[k]] / inprod(E[], pick[k,]))
    }
    epsilon <- 0.0001
    for (i in 1 : N) {
        O[i] ~ dpois(mu[i])
        log(mu[i]) <- log(E[i]) + S[i]
        RR[i] <- exp(S[i])
        theta[i] <- alpha + beta*X[i]/100
    }
    # Proper CAR prior distribution for spatial random effects:
    S[1:N] ~ car.proper(theta[], C[], adj[], num[], m[], prec, gamma)
    # Other priors:
    alpha ~ dnorm(0, 0.0001)
    beta ~ dnorm(0, 0.001)
    prec ~ dgamma(0.5, 0.0005)
    v <- 1/prec
    sigma <- sqrt(1 / prec)
    gamma.min <- min.bound(C[], adj[], num[], m[])
    gamma.max <- max.bound(C[], adj[], num[], m[])
    gamma ~ dunif(gamma.min, gamma.max)
}'
write(model, "model_propcar_full.txt")

inits <- function() {
    list(alpha = rnorm(1,0,2), prec = runif(1,0.1,2), gamma =0,S = rep(0,N), 
         beta=rnorm(1,5,5))
}

parameters<-c("alpha","S","sigma","prec","gamma" ,"beta","theta")

## nIter --- number of iterations for each chain of MCMC simulation
## nBur --- number of iterations for burning
nIter <- 3000
nBur <- 500

## fit data with openbugs
fit<-bugs(data,inits,parameters, n.iter = nIter, 
          model.file='model_propcar_full.txt',
          n.chains=2, n.thin=10, n.burnin = nBur, DIC=TRUE, 
          bugs.seed = sample(1:14,1))
## save MCMC samples for future use
save.image (file = "propcar_larynx.RData") 

##################################################################################
###################### Compute predictive p-values ###############################
##################################################################################

library("R2OpenBUGS")

## load MCMC samples and original data
load ("propcar_larynx.RData") 

## attach names in 'data' for directly access
attach(data)

## extract MCMC samples of different variables
mcmc<-fit$sims.matrix 
Sims <- nrow (mcmc) ## number of MCMC samples
alpha_hat<-mcmc[,"alpha"]
beta_hat<-mcmc[,"beta"]
gamma_hat<-mcmc[,"gamma"]
prec_hat<-mcmc[,"prec"]
sigma_hat<-mcmc[,"sigma"]
S_hat<-mcmc[,sprintf("S[%d]",c(1:N))]
theta_hat<-mcmc[,sprintf("theta[%d]",c(1:N))]

## define vectors to store predictive pvalues
p_ghost <- p_iis <-p_nis <- p_post <- rep(0,N)


# compute iIS predictive p_value ------------------------------------------


## define C matrix representing spatial dependency
C <- table(adj,rep(1:N,num))
for(i in 1:N){
    for(j in 1:N){
        C[i,j]<-sqrt(C[i,j]*E[j]/E[i])
    }
}


## number of re-sampling (replicated) latent variables in iIS
R <- 50 
for( i in 1:N )
{
    Iweight_rep <- rep(0,Sims) ## a vector holding integrated importance weights 
    Ip_value_rep <- rep(0,Sims) ## a vector holding integrated p-values 
    
    for(k in 1:Sims) 
    {
        ## the mean of the conditional distribution of S_i
        mu_S <- 
            theta_hat[k,i] + gamma_hat[k]*sum(C[i,]*(S_hat[k,]-theta_hat[k,]))
        ## the standard error of the conditional distribution of S_i
        sigma_S <- 1/sqrt(prec_hat[k]*E[i])
        
        ## compute integrated weight 
        ## generate new S_i for computing integrated weight
        S_rep_w <-rnorm(R, mu_S,sigma_S)
        mu_rep_w <- exp(S_rep_w) * E[i] 
        Iweight_rep[k] <- 1/mean(dpois(O[i], mu_rep_w))
        
        ## compute integrated p-value            
        S_rep_A <- rnorm(R, mu_S,sigma_S)
        mu_rep_A <- exp(S_rep_A) * E[i]
        Ip_value_rep[k] <- mean(ppois(O[i], mu_rep_A,lower.tail=FALSE)) + 
            mean(dpois(O[i],mu_rep_A)) * 0.5
    }
    ## compute iIS p-values
    p_iis [i] <- sum(Ip_value_rep * Iweight_rep) / sum (Iweight_rep)
}


# compute ghosting p-values ---------------------------------------------

for (i in 1:N)
{
    Np_value_rep <- rep(0, Sims) ## a vector holding new p-values
    for (k in 1:Sims)
    {
        ## the mean of the conditional distribution of S_i
        mu_S <-
            theta_hat[k,i]+gamma_hat[k]*sum(C[i, ]*(S_hat[k, ]-theta_hat[k, ]))
        ## the standard error of the conditional distribution of S_i
        sigma_S <- 1 / sqrt(prec_hat[k] * E[i])
        
        ## generate a new S
        S_rep_A <- rnorm(1, mu_S, sigma_S)
        mu_rep_A <- exp(S_rep_A) * E[i]
        Np_value_rep[k] <-
            mean(ppois(O[i], mu_rep_A, lower.tail = FALSE)) +
            mean(dpois(O[i], mu_rep_A)) * 0.5
    }
    p_ghost [i] <- mean (Np_value_rep)
}


# compute p-values with non-integrated importance sampling  ---------------


for (i in 1:N){
    mu_hat <- exp(S_hat[,i]) * E[i]
    prob_hat <- dpois(O[i], mu_hat)
    weight_hat<- 1/prob_hat
    p_value_hat<- ppois(O[i], mu_hat,lower.tail=FALSE) + 0.5*dpois(O[i],mu_hat)
    p_nis[i] <-  sum(p_value_hat * weight_hat) / sum (weight_hat)
}

# compute p-values with posterior predictive checking ---------------------
for (i in 1:N){
    mu_hat <- exp(S_hat[,i]) * E[i]
    p_value_hat<- ppois(O[i], mu_hat,lower.tail=FALSE) + 0.5*dpois(O[i],mu_hat)
    p_post[i] <-  mean(p_value_hat)
}