log_like_cauchy <- function (theta, x) sum(dcauchy(x, theta, log=TRUE))
thetas <- seq (15,25, length=100)
d1 <- rcauchy(50, location = 20)
log.like.d1 <- sapply(thetas, log_like_cauchy, x = d1)
d2 <- rcauchy(100, location = 20)
log.like.d2 <- sapply(thetas, log_like_cauchy, x = d2)
d3 <- rcauchy(200, location = 20)
log.like.d3 <- sapply(thetas, log_like_cauchy, x = d3)
matplot(thetas,
data.frame(log.like.d1, log.like.d2, log.like.d3), type = "l",
ylab = "log likelihood",
main = "Log Likelihood of Cauchy Models")
abline (v = 20)
## Let's look at the variability of log likelihood and MLE
# also show how to automate the above code for multiple datasets
nrep <- 50
for (i in 1:nrep){
name.data <- paste0("smalldata",i)
assign(name.data, rcauchy(20, location = 20) )
assign(paste0("log.like.smalldata",i),
sapply(thetas, log_like_cauchy, x = get(name.data)))
}
for (i in 1:nrep){
name.data <- paste0("largedata",i)
assign(name.data, rcauchy(200, location = 20) )
assign(paste0("log.like.largedata",i),
sapply(thetas, log_like_cauchy, x = get(name.data)))
}
par (mfrow = c(1,2))
matplot(thetas
, data.frame(mget(paste0("log.like.smalldata",1:nrep)))
, type = "l"
, ylab = "log likelihood"
, main = "Sample Size = 20"
)
matplot(thetas
, data.frame(mget(paste0("log.like.largedata",1:nrep)))
, type = "l"
, ylab = "log likelihood"
, main = "Sample Size = 200"
)
lik <- function (x) log(1+x^2)
s <- function (x) 2*x/(1+x^2)
par (mfrow=c(2,1))
plot(lik, xlim = c(-5,5))
plot(s, xlim = c(-5,5))
# For all three functions,
# the arguments are the vector of observations, the number of iterations to
# do, and the initial guess (defaulting to the sample mean). The result is
# a data frame with one row for each iteration (including the initial guess),
# with the columns being the estimate for lambda at that iteration and the
# log likelihood for that value of lambda (minus the factorial terms that
# don’t involve lambda).
# COMPUTE LOG LIKELIHOOD. The arguments are the data vector and a value for
# lambda (or vector of values). The result is the log probability of the data
# given that value for lambda, omitting the factorial terms (or a vector of
# log probabilities if lambda is a vector).
nzp.log.likelihood <- function (n, lambda)
{
sum(n) * log(lambda) - length(n) * (lambda + log(1-exp(-lambda)))
}
nzp.scorefunction <- function(n, lambda)
{
mean.n <- mean (n)
sum(n)/lambda - length(n)/(1-exp(-lambda))
}
# FIND MLE BY SIMPLE ITERATION.
nzp.simple.iteration <- function (n, r, lambda0=mean(n))
{
mean.n <- mean(n)
lambda <- rep(0,r+1)
lambda[1] <- lambda0
for (i in 1:r)
{ lambda[i+1] <- mean.n * (1 - exp(-lambda[i]))
}
data.frame (lambda=lambda, log.lik=nzp.log.likelihood(n,lambda))
}
# FIND MLE BY NEWTON-RAPHSON ITERATION.
nzp.newton.raphson <- function (n, r, lambda0=mean(n))
{
mean.n <- mean(n)
lambda <- rep(0,r+1)
lambda[1] <- lambda0
for (i in 1:r)
{ e <- exp(-lambda[i])
lambda[i+1] <- lambda[i] -
(mean.n/lambda[i] - 1/(1-e)) / (e/(1-e)^2 - mean.n/lambda[i]^2)
}
data.frame (lambda=lambda, log.lik=nzp.log.likelihood(n,lambda))
}
# FIND MLE BY THE METHOD OF SCORING.
nzp.method.of.scoring <- function (n, r, lambda0=mean(n))
{
mean.n <- mean(n)
lambda <- rep(0,r+1)
lambda[1] <- lambda0
for (i in 1:r)
{ e <- exp(-lambda[i])
lambda[i+1] <- lambda[i] -
(mean.n/lambda[i] - 1/(1-e)) / ((e/(1-e) - 1/lambda[i]) / (1-e))
}
data.frame (lambda=lambda, log.lik=nzp.log.likelihood(n,lambda))
}
## test with generated datasets
x<-rpois (100, lambda = 1.5)
xp <- x[x>0]
# mean of xp is upward biased estimates of 1.5
mean(xp)
## [1] 2.012987
## look at the log likelihood
lambdas <- seq (0.1, 20, length = 100)
log.like.values <- sapply (lambdas, nzp.log.likelihood, n = xp)
score.values <- sapply (lambdas, nzp.scorefunction, n = xp)
matplot(lambdas, data.frame(log.like.values, score.values), type = "l")
legend("bottomleft", legend = c("log likelihood", "Score function"), lty = c(1,2))
abline(h=0, lty=3)
## lambda log.lik
## 1 2.012987 -35.51746
## 2 1.744074 -33.28441
## 3 1.661103 -33.02644
## 4 1.630662 -32.98928
## 5 1.618844 -32.98353
## 6 1.614159 -32.98262
## 7 1.612286 -32.98247
## 8 1.611535 -32.98245
## 9 1.611233 -32.98245
## 10 1.611112 -32.98245
## 11 1.611063 -32.98245
## 12 1.611043 -32.98245
## 13 1.611035 -32.98245
## 14 1.611032 -32.98245
## 15 1.611031 -32.98245
## 16 1.611030 -32.98245
## lambda log.lik
## 1 0.1000000 -183.48372
## 2 0.1915610 -136.39160
## 3 0.3509256 -95.58546
## 4 0.5957714 -64.46869
## 5 0.9035549 -45.29923
## 6 1.1974718 -36.59289
## 7 1.4051522 -33.80062
## 8 1.5191415 -33.13838
## 9 1.5723447 -33.00955
## 10 1.5951755 -32.98696
## 11 1.6046065 -32.98318
## 12 1.6084398 -32.98257
## 13 1.6099876 -32.98247
## 14 1.6106108 -32.98245
## 15 1.6108616 -32.98245
## 16 1.6109624 -32.98245
## lambda log.lik
## 1 100.000000 -6986.19862
## 2 2.012987 -35.51746
## 3 1.744074 -33.28441
## 4 1.661103 -33.02644
## 5 1.630662 -32.98928
## 6 1.618844 -32.98353
## 7 1.614159 -32.98262
## 8 1.612286 -32.98247
## 9 1.611535 -32.98245
## 10 1.611233 -32.98245
## 11 1.611112 -32.98245
## 12 1.611063 -32.98245
## 13 1.611043 -32.98245
## 14 1.611035 -32.98245
## 15 1.611032 -32.98245
## 16 1.611031 -32.98245
## lambda log.lik
## 1 2.012987 -35.51746
## 2 1.529361 -33.10515
## 3 1.607426 -32.98268
## 4 1.611023 -32.98245
## 5 1.611030 -32.98245
## 6 1.611030 -32.98245
## 7 1.611030 -32.98245
## 8 1.611030 -32.98245
## 9 1.611030 -32.98245
## 10 1.611030 -32.98245
## 11 1.611030 -32.98245
## 12 1.611030 -32.98245
## 13 1.611030 -32.98245
## 14 1.611030 -32.98245
## 15 1.611030 -32.98245
## 16 1.611030 -32.98245
## lambda log.lik
## 1 0.1000000 -183.48372
## 2 0.1949038 -135.17506
## 3 0.3699029 -92.25191
## 4 0.6648175 -58.85124
## 5 1.0729630 -39.47480
## 6 1.4446931 -33.50821
## 7 1.5959036 -32.98655
## 8 1.6109076 -32.98245
## 9 1.6110301 -32.98245
## 10 1.6110301 -32.98245
## 11 1.6110301 -32.98245
## 12 1.6110301 -32.98245
## 13 1.6110301 -32.98245
## 14 1.6110301 -32.98245
## 15 1.6110301 -32.98245
## 16 1.6110301 -32.98245
## Warning in log(lambda): NaNs produced
## Warning in log(1 - exp(-lambda)): NaNs produced
## lambda log.lik
## 1 100.000 -6986.199
## 2 -4767.742 NaN
## 3 NaN NaN
## 4 NaN NaN
## 5 NaN NaN
## 6 NaN NaN
## 7 NaN NaN
## 8 NaN NaN
## 9 NaN NaN
## 10 NaN NaN
## 11 NaN NaN
## 12 NaN NaN
## 13 NaN NaN
## 14 NaN NaN
## 15 NaN NaN
## 16 NaN NaN
## lambda log.lik
## 1 2.012987 -35.51746
## 2 1.623046 -32.98501
## 3 1.611043 -32.98245
## 4 1.611030 -32.98245
## 5 1.611030 -32.98245
## 6 1.611030 -32.98245
## 7 1.611030 -32.98245
## 8 1.611030 -32.98245
## 9 1.611030 -32.98245
## 10 1.611030 -32.98245
## 11 1.611030 -32.98245
## 12 1.611030 -32.98245
## 13 1.611030 -32.98245
## 14 1.611030 -32.98245
## 15 1.611030 -32.98245
## 16 1.611030 -32.98245
## lambda log.lik
## 1 0.100000 -183.48372
## 2 1.962253 -34.94717
## 3 1.620381 -32.98400
## 4 1.611038 -32.98245
## 5 1.611030 -32.98245
## 6 1.611030 -32.98245
## 7 1.611030 -32.98245
## 8 1.611030 -32.98245
## 9 1.611030 -32.98245
## 10 1.611030 -32.98245
## 11 1.611030 -32.98245
## 12 1.611030 -32.98245
## 13 1.611030 -32.98245
## 14 1.611030 -32.98245
## 15 1.611030 -32.98245
## 16 1.611030 -32.98245
## lambda log.lik
## 1 1000.000000 -75929.29793
## 2 2.012987 -35.51746
## 3 1.623046 -32.98501
## 4 1.611043 -32.98245
## 5 1.611030 -32.98245
## 6 1.611030 -32.98245
## 7 1.611030 -32.98245
## 8 1.611030 -32.98245
## 9 1.611030 -32.98245
## 10 1.611030 -32.98245
## 11 1.611030 -32.98245
## 12 1.611030 -32.98245
## 13 1.611030 -32.98245
## 14 1.611030 -32.98245
## 15 1.611030 -32.98245
## 16 1.611030 -32.98245
golden <- function(f,brack.int,eps=1e-4,...) {
g <- (3-sqrt(5))/2
xl <- min(brack.int)
xu <- max(brack.int)
tmp <- g*(xu-xl)
xmu <- xu-tmp
xml <- xl+tmp
fl <- f(xml,...)
fu <- f(xmu,...)
while(abs(xu-xl)>(1.e-5+abs(xl))*eps) {
if (fl<fu) {
xu <- xmu
xmu <- xml
fu <- fl
xml <- xl+g*(xu-xl)
fl <- f(xml,...)
} else {
xl <- xml
xml <- xmu
fl <- fu
xmu <- xu-g*(xu-xl)
fu <- f(xmu,...)
}
}
if (fl<fu) xml else xmu
}
## a test
f1 <- function(theta) -1997*log(2+theta)-1810*log(1-theta)-32*log(theta)
plot (f1, xlim = c(0.001,0.999))
## [1] 0.03571247
## [1] 10.0001
#Find MLE and SD for Poisson Interval Data with “nlm”
## a utility function to avoid over/under flow in computing log_sum_exp
log_sum_exp <- function (lx)
{
mlx <- max (lx)
mlx + log(sum(exp(lx - mlx)))
}
# Compute the log likelihood for a Poisson mean parameter given interval
# data.
#
# Arguments:
# lambda The Poisson mean parameter
# low Vector of low ends of intervals (positive integer)
# high Vector of high ends of intervals (positive integer)
#
# Value: The log probability of Poisson values lying in the given
# intervals, based on the mean parameter given.
poisson_log_likelihood <- function (lambda, low, high)
{
ll <- 0
for (i in 1:length(low)) {
lp <- c()
for (x in low[i]:high[i])
lp <- c(lp, dpois (x, lambda, log = TRUE))
ll <- ll + log_sum_exp (lp)
}
ll
}
# Find the MLE for the Poisson mean parameter given data on iid values
# specifying an interval in which each value lies (not necessarily a
# single value).
#
# Arguments:
# low Vector of low ends of intervals (non-negative integers)
# high Vector of high ends of intervals (non-negative integers)
#
# Values:
# MLE for the mean parameter and
# standard deviation.
poisson_mle <- function (low, high)
{
if (length(low) != length(high))
stop("low and high have different lengths")
if (any(floor(low)!=low) || any(floor(high)!=high))
stop("interval ends are not all integers")
neg_log_like <-function (lambda) -poisson_log_likelihood (lambda,low,high)
nlmfit <- nlm (neg_log_like,
(mean(low) + mean(high)) / 2, hessian = TRUE)
est <- nlmfit$estimate
sd <- sqrt (1/nlmfit$hessian[1,1])
CI <- est + c(-sd, sd) * 1.96
list (est=est, sd = sd, CI = CI)
}
## a test with a simulated data set
pn <- rpois (1000, lambda = 20)
low <- pmax(pn - 1, 0)
high <- pn + 4
poisson_mle (low, high)
## $est
## [1] 21.57305
##
## $sd
## [1] 0.1566171
##
## $CI
## [1] 21.26608 21.88002
# from the result, we have also found that the sd estimation is not accurate.
# this is reasonable because the sd estimate is only justified "asymptotically".
## test with another data set
pn2 <- rpois (1000, lambda = 20)
low2 <- pmax(pn2 - 50, 0)
high2 <- pn2 + 5
poisson_mle (low2, high2)
## $est
## [1] 1.228943
##
## $sd
## [1] 472.2389
##
## $CI
## [1] -924.3593 926.8172
# plot log like
lambdas <- seq (0, 50, by = 0.5)
nl <- length (lambdas)
logp_lmd <- rep (0, nl)
for (i in 1:nl)
{
logp_lmd [i] <- poisson_log_likelihood (lambdas[i], low2, high2)
}
## the reason that the estimate of sd is very bad is that
## the log likelihood at MLE is very flat
plot (lambdas, logp_lmd, type = "l")