#******************************************************************************* # Chapter 5: likelihood #******************************************************************************* # # Fränzi Korner-Nievergelt, Tobias Roth, Stefanie von Felten, Jérôme Guélat, # Bettina Almasi, Pius Korner-Nievergelt, 2015. Bayesian data analysis in ecology # using linear models with R, BUGS and Stan. Elsevier. # # Last modification: 4. 10. 2014 # R version 3.1.1 (2014-07-10) #------------------------------------------------------------------------------- # Settings #------------------------------------------------------------------------------- setwd("....") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # 5.1 Theory #----------------------------------------------------------------------------- # the likelihood for the Prairie-dog model 1 dnorm(x=0.8, mean=1, sd=0.2)*dnorm(x=1.2, mean=1, sd=0.2)* dnorm(x=1.1, mean=1, sd=0.2) # the likelihood for the Prairie-dog model 2 dnorm(x=0.8, mean=1.2, sd=0.4)*dnorm(x=1.2, mean=1.2, sd=0.4)* dnorm(x=1.1, mean=1.2, sd=0.4) #--------------------------------------------------------------------- # Figure 5.1 #--------------------------------------------------------------------- jpeg(filename = "../figures/Figure5_1_gauss.jpg", width = 6000, height = 6000, units = "px", res=1200) y <- c(0.8, 1.2, 1.1) x <- seq(0, 2, length=100) dx <- dnorm(x, mean=1, sd=0.2) plot(x,dx, type="l", xlab="Weight (kg)", ylab="Density", lwd=2, las=1, cex.lab=1.2) #points(y, dnorm(y, mean=1, sd=0.2), pch=16) segments(y, rep(0, length(y)), y, dnorm(y, mean=1, sd=0.2)) dev.off() #--------------------------------------------------------------------- rnorm(5, 1, 0.2) # draws 5 random numbers from Norm(1, 0.2) pnorm(q = 0.8, 1, 0.2) # proportion of the distribution that is to the left of a specific value, q qnorm(0.1, 1, 0.2) # quantile function for a theoretical normal distribution lf <- function(mu, sigma) prod(dnorm(y, mu, sigma)) lf(1, 0.2) #------------------------------------------------------------------------------- # 5.2 The maximum likelihood method #------------------------------------------------------------------------------- mu <- seq(0.6, 1.4, length=100) sigma <- seq(0.05, 0.6, length=100) lik <- matrix(nrow=length(mu), ncol=length(sigma)) for(i in 1:length(mu)){ for(j in 1:length(sigma)){ lik[i,j] <- lf(mu[i], sigma[j]) } } # finding the ML estimates using optimisation neglf <- function(x) -prod(dnorm(y, x[1], x[2])) MLest <- optim(c(1, 0.2), neglf) MLest$par #------------------------------------------------------------------------------- # Figure 5.2 #------------------------------------------------------------------------------- jpeg(filename = "../figures/Figure5_2_ML.jpg", width = 6000, height = 6000, units = "px", res=1200) contour(mu, sigma, lik, nlevels=20, xlab=expression(mu), ylab=expression(sigma), las=1, cex.lab=1.4) points(MLest$par[1], MLest$par[2], pch=4) dev.off() #------------------------------------------------------------------------------- #comparison to LS-estimates mod <- lm(y~1) mod # the LS and ML estimate for the mean does not differ MLest$par[1] summary(mod)$sigma # estimate for population variance (sqrt(SS/(n-1))) MLest$par[2] # finite sample variance (sqrt(SS/n)) n <- 3 summary(mod)$sigma^2*(n-1) MLest$par[2]^2*n #------------------------------------------------------------------------------- # 5.3 The log pointwise predictive density #------------------------------------------------------------------------------- library(arm) set.seed(145) nsim <- 2000 bsim <- sim(mod, n.sim=nsim) # simulate from posterior dist. of parameters pyi <- matrix(nrow=length(y), ncol=nsim) for(i in 1:nsim) pyi[,i] <- dnorm(y, mean=bsim@coef[i,1], sd=bsim@sigma[i]) mpyi <- apply(pyi, 1, mean) sum(log(mpyi)) #------------------------------------------------------------------------------- # additional code; not in the text book but useful for teaching! #------------------------------------------------------------------------------- # data simulation-------------------------------------------------------------- set.seed(34676257) # set the seed for the random number generator n <- 50 # sample size sigma <- 5 # standard deviation of the residuals b0 <- 2 # intercept b1 <- 0.7 # slope x <- runif(n, 10, 30) # sample values of the covariate simresid <- rnorm(n, 0, sd=sigma) y <- b0 + b1*x + simresid # end of data simulation------------------------------------------------------- # show how the likelihood is calculated par(mar=c(5,4,1,3)) plot(x,y, pch=16, las=1, cex.lab=1.2) points(x[4],y[4], pch=16, col="orange") # highlight observation number 4 abline(lm(y~x), lwd=2, col="blue") abline(v=x[4]) xtemp <- seq(-10, 10, by=0.05) dnormx <- dnorm(xtemp, mean=0, sd=summary(lm(y~x))$sigma) xline <- x[4]+ dnormx*50 yline <- xtemp + fitted(lm(y~x))[4] lines(xline, yline, lwd=2, col="orange") # add another model abline(10, -0.2, col="blue", lwd=2) xtemp <- seq(-10, 10, by=0.05) dnormx <- dnorm(xtemp, mean=0, sd=2) xline <- x[4]+ dnormx*50 yline <- xtemp + 10-0.2*x[4] lines(xline, yline, lwd=2, col="orange",xpd=NA) # probability of the 4th observation given the model (marked orange on the slide) mod <- lm(y~x) dnorm(x[4], mean=fitted(mod)[4], sd=summary(mod)$sigma) # draw log-likelihood surface for b0 and b1 with fixed sigma b0 <- seq(-6, 6, by=0.2) b1 <- seq(0, 1.2, by=0.01) sigma.ls <- summary(mod)$sigma loglikmat <- matrix(nrow=length(b0) ,ncol=length(b1)) for(i in 1:length(b0)) { for(j in 1:length(b1)){ b0temp <- b1temp <- mu <- b0[i] + b1[j]*x loglikmat[i,j] <- -n*log(sqrt(2*pi))-n*log(sigma.ls) - 0.5 * sum(((y-mu)/sigma.ls)^2) # log likelihood function } } contour(b0, b1, loglikmat) points(coef(mod)[1], coef(mod)[2], pch=3) # approximate ML-etimate of b0 and b1 coefdat[coefdat$loglik==max(coefdat$loglik),] mod # compare with LS-estimate -> difference is due to step size of b0 and b1 in coefdat # in the case of normal distribution, the LS-estimates equal the LS-estimates. # this is not always the case! #------------------------------------------------------------------------------- #-------------------------------------------------------------------------------