# function to compute logistic regression -log likelihood, -score and
# information. b=parameters, r=binary response, z=covariate
lik_score_info <- function(b,r,z) {
u <- b[1]+b[2]*z
u2 <- exp(u)
l <- -sum(u*r-log(1+u2))
p <- u2/(1+u2)
s <- -c(sum(r-p),sum(z*(r-p)))
v <- matrix(c(sum(p*(1-p)),sum(z*p*(1-p)),0,sum(z*z*p*(1-p))),2,2)
v[1,2] <- v[2,1]
list(neg.loglik=l,neg.score=s,inf=v)
}
neg_logp_logistic <- function (b, z, r)
{
u <- b[1]+b[2]*z
u2 <- exp(u)
-sum(u*r-log(1+u2))
}
####### a function for finding MLE with newton method
mle_logistic_nr <- function(b0, no_iter, r, z, debug=FALSE)
{
result_nr <- matrix(0,no_iter+1, 3)
colnames(result_nr) <- c('beta0','beta1','neg_loglike')
result_nr[1,] <- c(b0, 0)
for( i in 1:no_iter + 1)
{
q <- lik_score_info(result_nr[i-1,1:2],r,z)
result_nr[i,1:2] <- result_nr[i-1,1:2] - solve(q$inf,q$neg.score)
result_nr[i-1,3] <- q$neg.loglik
if(debug) print(result_nr[i-1,])
}
result_nr[-(no_iter+1),]
}
## generate a data set
gen_logistic_data <- function(b,n)
{
z <- sort(runif(n, -2,2))
emu <- exp(b[1]+z*b[2])
p <- emu/(1+emu)
r <- (runif(n)<p)*1
plot (z, p, type = "l",ylim=c(0,1))
points (z, r, col = r+1)
list(z=z,r=r)
}
data <- gen_logistic_data(c(0,1.5),200)
## beta0 beta1 neg_loglike
## [1,] 0.0000000 3.000000 119.05563
## [2,] 0.3124563 -0.792032 208.32250
## [3,] -0.1505522 1.711167 101.59214
## [4,] 0.2325889 1.333272 98.94906
## [5,] 0.1870832 1.415984 98.80625
## [6,] 0.1856721 1.420173 98.80598
## [7,] 0.1856700 1.420182 98.80598
## [8,] 0.1856700 1.420182 98.80598
## [9,] 0.1856700 1.420182 98.80598
## [10,] 0.1856700 1.420182 98.80598
## [11,] 0.1856700 1.420182 98.80598
## [12,] 0.1856700 1.420182 98.80598
## [13,] 0.1856700 1.420182 98.80598
## [14,] 0.1856700 1.420182 98.80598
## [15,] 0.1856700 1.420182 98.80598
## beta0 beta1 neg_loglike
## [1,] 0.0000000 5.0000 165.0582
## [2,] 0.4211062 -16.1995 2577.7598
## [3,] -58.2626433 2892.3818 Inf
## [4,] NaN NaN NaN
## [5,] NaN NaN NaN
## [6,] NaN NaN NaN
## [7,] NaN NaN NaN
## [8,] NaN NaN NaN
## [9,] NaN NaN NaN
## [10,] NaN NaN NaN
## [11,] NaN NaN NaN
## [12,] NaN NaN NaN
## [13,] NaN NaN NaN
## [14,] NaN NaN NaN
## [15,] NaN NaN NaN
## look at contour of bivariate log likelihood
B0 <- seq (-2, 2, by = 0.1)
B1 <- seq (-10, 10, by = 0.1)
loglike_values <- matrix (0, length (B0), length (B1))
for (i in 1:length (B0)) {
for (j in 1:length (B1))
{
loglike_values[i,j] <-
neg_logp_logistic (c(B0[i], B1[j]),z = data$z, r = data$r )
}
}
contour (B0, B1, loglike_values, nlevels = 100)
points(mle_logistic_nr(c(0,3),15,data$r,data$z), col = 1, type = "b")
points(mle_logistic_nr(c(0,5),15,data$r,data$z), col = 2, type = "b")
###### Find MLE using nlm function
nlm (neg_logp_logistic, c(0,0), z = data$z, r = data$r, hessian = T) -> logit_nlm
nlm (neg_logp_logistic, c(0,5), z = data$z, r = data$r, hessian = T)
## $minimum
## [1] 98.80598
##
## $estimate
## [1] 0.1856695 1.4201817
##
## $gradient
## [1] -3.836931e-07 4.702991e-06
##
## $hessian
## [,1] [,2]
## [1,] 32.414326 -2.154326
## [2,] -2.154326 24.980483
##
## $code
## [1] 1
##
## $iterations
## [1] 8
## Warning in nlm(neg_logp_logistic, c(-10, -10), z = data$z, r = data$r,
## hessian = T): NA/Inf replaced by maximum positive value
## $minimum
## [1] 98.80598
##
## $estimate
## [1] 0.1856695 1.4201813
##
## $gradient
## [1] 3.524292e-06 -5.453470e-06
##
## $hessian
## [,1] [,2]
## [1,] 32.414330 -2.154329
## [2,] -2.154329 24.980492
##
## $code
## [1] 1
##
## $iterations
## [1] 16
# find MLE standard errors using hessian of negative log likelihood
sds <- sqrt (diag (solve(logit_nlm$hessian))); sds
## [1] 0.1761488 0.2006540
###### find MLE using glm function
logitfit_glm <- glm (r ~ z, family = binomial(), data = data)
summary (logitfit_glm)
##
## Call:
## glm(formula = r ~ z, family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2888 -0.7409 0.3421 0.7389 2.2902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1857 0.1761 1.054 0.292
## z 1.4202 0.2006 7.078 1.46e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 276.28 on 199 degrees of freedom
## Residual deviance: 197.61 on 198 degrees of freedom
## AIC: 201.61
##
## Number of Fisher Scoring iterations: 4
logitfit_glm <- glm (r ~ z, family = binomial(), data = data, start = c(0,0))
summary (logitfit_glm)
##
## Call:
## glm(formula = r ~ z, family = binomial(), data = data, start = c(0,
## 0))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2888 -0.7409 0.3421 0.7389 2.2902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1857 0.1761 1.054 0.292
## z 1.4202 0.2006 7.078 1.46e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 276.28 on 199 degrees of freedom
## Residual deviance: 197.61 on 198 degrees of freedom
## AIC: 201.61
##
## Number of Fisher Scoring iterations: 5
logitfit_glm <- glm (r ~ z, family = binomial(), data = data, start = c(0,2))
summary (logitfit_glm)
##
## Call:
## glm(formula = r ~ z, family = binomial(), data = data, start = c(0,
## 2))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2888 -0.7409 0.3421 0.7389 2.2902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1857 0.1761 1.054 0.292
## z 1.4202 0.2006 7.078 1.46e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 276.28 on 199 degrees of freedom
## Residual deviance: 197.61 on 198 degrees of freedom
## AIC: 201.61
##
## Number of Fisher Scoring iterations: 5
logitfit_glm <- glm (r ~ z, family = binomial(), data = data, start = c(0,3))
summary (logitfit_glm)
##
## Call:
## glm(formula = r ~ z, family = binomial(), data = data, start = c(0,
## 3))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2888 -0.7409 0.3421 0.7389 2.2902
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1857 0.1761 1.054 0.292
## z 1.4202 0.2006 7.078 1.46e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 276.28 on 199 degrees of freedom
## Residual deviance: 197.61 on 198 degrees of freedom
## AIC: 201.61
##
## Number of Fisher Scoring iterations: 6
#
# The model is for the data on sunspot counts supplied with R. We
# hypothesize that the counts (actually integers, but modelled as
# non-negative continuous values) come from taking the absolute value
# of a normal variate whose mean varies with time according to a sine
# wave. The model parameters are a, b, f, and M, which define the
# sine wave, as M*(a+sin(b+f*t), and the log of the standard deviation
# of the observation, lsigma.
#
# We use "nlm" to find the maximum likelihood estimates
logl <- function (p, x, t)
{
a <- p[1]
b <- p[2]
f <- p[3]
M <- p[4]
lsigma <- p[5]
- sum(log(dnorm ( x, M*(a+sin(b+f*t)), exp (lsigma)) +
dnorm (-x, M*(a+sin(b+f*t)), exp (lsigma))
))
}
## get the data.
x <- sunspots [1:500]
t <- 1:length(x)
# Estimation, from a carefully chosen starting point.
est <- nlm (logl,
p = c(a= 0, b = -1, f = 2*pi/200, M=80,lsigma = 4),
x = x, t = t, hessian = T)
print(est)
## $minimum
## [1] 2334.028
##
## $estimate
## [1] 0.01113550 0.87341653 0.02759776 90.46730054 3.43956442
##
## $gradient
## [1] 1.991998e-04 8.534471e-05 6.612604e-03 1.952099e-07 -2.453170e-04
##
## $hessian
## [,1] [,2] [,3] [,4] [,5]
## [1,] 3.817365e+03 -9.407540e+00 9.867027e+04 4.4115695 3.091087e-02
## [2,] -9.407540e+00 1.614385e+03 4.831372e+05 0.5154039 -5.408146e+01
## [3,] 9.867027e+04 4.831372e+05 1.714552e+08 335.5790898 -3.208757e+04
## [4,] 4.411569e+00 5.154039e-01 3.355791e+02 0.2701867 9.972335e-01
## [5,] 3.091087e-02 -5.408146e+01 -3.208757e+04 0.9972335 8.184459e+02
##
## $code
## [1] 3
##
## $iterations
## [1] 85
x <- sunspots [1:500]
t <- 1:length(x)
ss_data <- data.frame (sunspots = x, t = t)
nlsfit_ss <- summary (
nls (sunspots ~ abs(M*(a+sin(b+f*t))),
data = ss_data,
start = c(a= 0, b = -1, f = 2*pi/200, M = 80))
)
## note that this model is different from the model given in sunspots_nlm.r
names (nlsfit_ss)
## [1] "formula" "residuals" "sigma" "df"
## [5] "cov.unscaled" "call" "convInfo" "control"
## [9] "na.action" "coefficients" "parameters"
https://scipy-lectures.org/advanced/mathematical_optimization/index.html#knowing-your-problem
https://codesachin.wordpress.com/2016/01/16/nelder-mead-optimization/