#******************************************************************************* # Chapter 7: Linear mixed effects model LMM #******************************************************************************* # # Fränzi Korner-Nievergelt, Tobias Roth, Stefanie von Felten, Jérôme Guélat, # Bettina Almasi, Pius Korner-Nievergelt, 2015. Bayesian data analysis in ecology # using linear models with R, BUGS and Stan. Elsevier. # # Last modification: 2. 10. 2014 # R version 3.1.1 (2014-07-10) #------------------------------------------------------------------------------- # Settings #------------------------------------------------------------------------------- setwd("....") #----------------------------------------------------------------------------- library(arm) #also loads library lme4 library(blmeco) #------------------------------------------------------------------------------- #7.1.2. Random factors and partial pooling #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Figure 7.1 #------------------------------------------------------------------------------- # simulate unbalance data from ngroup groups set.seed(0470) ngroup <- 10 # number of groups sigma <- 2.5 # residual variance (standard deviation) sigma_b <- 3 # between-group variance (standard deviation) group.means <- rnorm(ngroup, 15, sigma_b) # simulate group means npg <- rbinom(ngroup, prob=runif(ngroup), size=100) # draw sample sizes per group at random between 0 and 100 y <- NULL for(i in 1:ngroup) y <- c(y, rnorm(npg[i], group.means[i], sigma)) # simulate data group <- factor(rep(c(1:ngroup), npg)) # create the group-factor # end of data simulations ylimes <- c(floor(min(y)), ceiling(max(y))) x <- jitter(as.numeric(group)) ps <- 1.2 # point size jpeg(filename = "../figures/Figure7-1_partialpooling.jpg", width = 9000, height = 4000, units = "px", res=1200) # complete pooling par(mfrow=c(1,3), mar=c(5, 1,2,1), oma=c(0,3,0,0)) plot(y~x,ylim=ylimes, xlab="group", ylab="y", las=1, cex.lab=1.4, cex.axis=1.2, xaxt="n", main="complete pooling", cex.main=1.7) axis(1, at=1:ngroup, labels=levels(group), cex.axis=1.2) mtext("y", side=2, line=3) m.y <- mean(y) sd(y) se.y <- sd(y)/sqrt(length(y)) rect(0, m.y-1.96*se.y, 21, m.y+1.96*se.y, col=grey(0.6), border=NA) abline(h=m.y) points(mean(1:ngroup), m.y, pch=16, cex=ps, col="orange") points(y~x) # partial pooling mod <- lmer(y~1+(1|group)) summary(mod) m.ppypg <- fixef(mod) + ranef(mod)$group popmean <- fixef(mod) box() # simulates confidence intervals for means bsim <- sim(mod, n.sim=2000) ranefsim <- bsim@ranef$group sem.ppypg <- apply(ranefsim[,,1], 2, sd) sem.popmean <- sd(bsim@fixef[,1]) plot(y~x, type="n", ylim=ylimes, xlab="group", ylab=NA, yaxt="n", las=1, cex.lab=1.4, cex.axis=1.2, xaxt="n", main="partial pooling", cex.main=1.7) axis(1, at=1:ngroup, labels=levels(group), cex.axis=1.2) # insert population mean rect(0, popmean-1.96*sem.popmean, 21, popmean+1.96*sem.popmean, col=grey(0.6), border=NA) abline(h=popmean) points(y~x) segments(1:ngroup, unlist(m.ppypg)-1.96*sem.ppypg, 1:ngroup, unlist(m.ppypg)+1.96*sem.ppypg, lwd=2, lend="butt", col="orange") points(1:ngroup, unlist(m.ppypg), pch=16, cex=ps, col="orange") box() # no pooling plot(y~x,ylim=ylimes, xlab="group", ylab=NA, yaxt="n", las=1, cex.lab=1.4, cex.axis=1.2, xaxt="n", main="no pooling", cex.main=1.7) axis(1, at=1:ngroup, labels=levels(group), cex.axis=1.2) m.ypg <- tapply(y, group, mean) se.ypg <- tapply(y, group, function(x) sd(x)/sqrt(length(x))) segments(1:ngroup, m.ypg-1.96*se.ypg, 1:ngroup, m.ypg+1.96*se.ypg, lwd=2, lend="butt", col="orange") points(1:ngroup, m.ypg, pch=16, cex=ps, col="orange") dev.off() # A consequence of the shrinkage estimator: scatter.smooth(fitted(mod), resid(mod), xlab="fitted values", ylab="residuals", cex.axis=1.2, cex.lab=1.5) abline(h=0, lty=3) #------------------------------------------------------------------------------- # 7.2. Fitting a linear mixed model in R #------------------------------------------------------------------------------- setwd("....") data(cortbowl) dat <- cortbowl dat$days <- factor(dat$days, levels=c("before", "2", "20"))#define the reference levels (before = day of implantation) str(dat) # the data was sampled in 2004,2005, and 2005 by the Swiss Ornithologicla Institute # Brood = Brood number # Ring = individual ring number # Implant = treatment: C= corticosterone implant, P = shame implant # Age = age of the nestlings (days) # days = days when the blood samples were collected (before = day of implantation, day 2 and day 20 after implantation) # totCort = total corticosterone (blood samples taken within 3 minutes of opening the nest box) #--------------------------------------------------- # plot the data # Figure 7.2 #--------------------------------------------------- ifelse(as.numeric(dat$days)==3,9,as.numeric(dat$days)) jpeg(filename = "../figures/Figure7-2_xyplot.jpg", width = 6000, height = 6000, units = "px", res=1200) par(mfrow=c(1,2), mar=c(4,0,2,0.2), oma=c(0,5,0,0)) for(treat in levels(dat$Implant)){ plot(as.numeric(dat$days), dat$totCort, type="n",xlim=c(0.5, 3.5), las=1, yaxt="n", xaxt="n", xlab="Days after implantation") axis(1, at=1:3, labels=c("before", "2", "20")) if(treat=="C") { axis(2, las=1) mtext("Corticosterone",3,line=1) } if(treat=="P") mtext("Placebo",3,line=1) inds <- dat$Ring[dat$Implant==treat] for(i in inds){ lines(dat$days[dat$Ring==i], dat$totCort[dat$Ring==i]) } } mtext("Total corticosterone [ng/ml]", side=2, outer=TRUE, line=3, cex=1.2) dev.off() # library(lattice) # bwplot(totCort ~ days|Implant, data=dat, xlab="days", cex.lab=1.2) # handy but not used as it is intended to be used: # implant is not a grouping factor, but a treatment! #--------------------------------------------------- # fitting a mixed model with lmer # Restricted maximum likelihood estimation (REML) #--------------------------------------------------- mod <- lmer(log(totCort) ~ Implant + days + Implant:days + (1|Ring), data=dat, REML=TRUE) # using REML mod round(fixef(mod),3) # extracts the fixed effects ranef(mod) # extracts the random effects #------------------------------------------------------------------------------- # 7.3 Restricted maximum likelihood estimation (REML) #------------------------------------------------------------------------------- #code for model using ML mod <- lmer(log(totCort) ~ Implant + days + Implant:days + (1|Ring), data=dat, REML=FALSE) # using ML #------------------------------------------------------------------------------- # 7.4 Assessing model assumptions #------------------------------------------------------------------------------- jpeg(filename="../figures/Figure7-3_ModelAssumptions.jpg", width=6000, height=6000, units="px", res=1200) par(mfrow=c(2,2), mar=c(4,4,2,1), mgp=c(2.2,0.8,0)) scatter.smooth(fitted(mod), resid(mod)); abline(h=0, lty=2) mtext("Tukey-Anscombe Plot", 3, line=0.8, cex=0.8) # residuals vs. fitted qqnorm(resid(mod), main="normal qq-plot, residuals", cex.main=0.8) # qq of residuals qqline(resid(mod)) scatter.smooth(fitted(mod), sqrt(abs(resid(mod)))) # res. var vs. fitted qqnorm(ranef(mod)$Ring[,1], main="normal qq-plot, random effects", cex.main=0.8) qqline(ranef(mod)$Ring[,1]) # qq of random effects dev.off() #------------------------------------------------------------------------------- # 7.5 Drawing conclusions #------------------------------------------------------------------------------- #draw 2000 random values from the joint posterior distribution to obtain CrI set.seed(0470) # specify the seed (starting value for your random generator) nsim <- 2000 bsim <- sim(mod, n.sim=nsim) colnames(bsim@fixef) <- names(fixef(mod)) # sim does not assign the names of the parameter in the current version. str(bsim) #From the simulated values, the 2.5% and 97.5% quantiles can be used for the 95% credible interval round(apply(bsim@fixef, 2, quantile, prob=c(0.025,0.5,0.975)),3) #---------------------------------------------- #plot the fitted values with CrI #---------------------------------------------- newdat<-expand.grid(Implant = factor(c('C','P'),levels=levels(dat$Implant)), days = factor(c('before','2','20'),levels=levels(dat$days))) # create a new data frame Xmat <- model.matrix(~ Implant + days + Implant:days, data=newdat) # use exactly the same formula as for the fixed-effect part in the model specification fitmat <- matrix(ncol=nsim, nrow=nrow(newdat)) for(i in 1:nsim) fitmat[,i] <- Xmat %*% bsim@fixef[i,] # fitted values newdat$lower <- apply(fitmat, 1, quantile, prob=0.025) newdat$upper <- apply(fitmat, 1, quantile, prob=0.975) newdat$fit <- Xmat %*% fixef(mod) # Draw a graph with 95% CrI for each group at each day jpeg(filename = "../figures/Figure7-4_cort_fitted.jpg", width = 6000, height = 6000, units = "px", res=1200) par(mfrow=c(1,1), mar=c(2,2,1,1), omi= c(0.5,0.5,0,0)) # "mar" sets the margins (lower, left, upper, right) around the plot indexP <- newdat$Implant=='P' indexC <- newdat$Implant=='C' a <- 1 dat$daysNum <- ifelse(dat$days=='before', 0, as.character(dat$days)) # nummeric day variable dat$daysNum <- as.numeric(dat$daysNum) # nummeric day variable plot(seq(-2,22,1), seq(-0.1,4.8,length=25), yaxt="n", xaxt="n", type="n", xlab="", ylab="", las=2, cex.lab=a, main="", cex.main=a, font.main=1) points(jitter(dat$daysNum[dat$Implant=='P']-0.4), log(dat$totCort[dat$Implant=='P']), lwd=2 ,pch=16, col="blue", cex=0.5) # Placebo raw data points(jitter(dat$daysNum[dat$Implant=='C']+0.2), log(dat$totCort[dat$Implant=='C']), lwd=2 ,pch=16, col="orange", cex=0.5, lty=2) # Cort raw data segments(c(0,2,20)-c(0.4,0.4,0.4), newdat$lower[indexP], c(0,2,20)-c(0.4,0.4,0.4), newdat$upper[indexP], lty=1, lwd=2, col="black" ) # CrI points(c(0,2,20)-c(0.4,0.4,0.4), newdat$fit[indexP], lwd=2 ,pch=16, col="black", cex=1.2) # Placebo segments(c(0,2,20)+c(0.2,0.2,0.2), newdat$upper[indexC], c(0,2,20)+c(0.2,0.2,0.2), newdat$lower[indexC],lty=1, lwd=2, col="black" ) # CrI points(c(0,2,20)+c(0.2,0.2,0.2), newdat$fit[indexC], lwd=1 ,lty=1, pch=21, bg="white", col="black", cex=1.2) # CORT axis(side=2, labels=F,at=seq(0,4.8,0.4),line=0,tcl=-0.3,las=1) axis(side=2, labels=seq(0,4.8,0.4),at=seq(0,4.8,0.4),line=0,tcl=-0.3,las=1, mgp=c(3,0.5,0),cex.axis=a) axis(side=1, labels=c(NA, 2,20),at=c(0, 2,20),line=0,tcl=-0.3,las=1) axis(side=1, labels=c("before"),at=c(-0.5),line=0,tcl=0,las=1) mtext(side=2,line=3,adj=0.5,cex=a,font=2,"Total corticosterone [log, ng/ml]",las=0,outer=FALSE) mtext(side=1,line=3,adj=0.5,cex=a,font=2,"Days after implantation",outer=FALSE) points(c(10,10),c(4.5,4.8),pch=21,bg=c("black","white")) text(c(11,11),c(4.5,4.8),c("placebo", "corticosterone"),adj=c(0,0.5)) dev.off() #-------------------------------------------------------------------------------------------- #7.7 Random intercept and random slope #-------------------------------------------------------------------------------------------- #random slope model data(wingbowl) dat <- wingbowl str(dat) # the data was sampled in 2004 by the Swiss Ornithologicla Institute # Brood = Brood number # Ring = individual ring number # Implant = treatment, C= corticosterone implant, P = shame implant # Age1 = age at the day of implantation # Age = age of the nestlings (days) # days = days when the wing length measurements were taken (day 1 = day of implantation, day 3 and day 21) # Wing = maximum left wing length of barn owl nestlings range(dat$Age1) # include a random slope dat$Age.z <- scale(dat$Age) #center and scale the variable 'Age' mod <- lmer(Wing ~ Age.z + Implant + Age.z:Implant + (Age.z|Ring), data=dat, REML=FALSE) # using ML mod mean(ranef(mod)$Ring[,1]) # should be near 0 # check model assumptions par(mfrow=c(2,3)) scatter.smooth(fitted(mod),resid(mod)); abline(h=0, lty=2) title("Tukey-Anscombe Plot") # residuals vs. fitted qqnorm(resid(mod), main="normal QQ-plot, residuals") # qq of residuals qqline(resid(mod)) scatter.smooth(fitted(mod), sqrt(abs(resid(mod)))) # res. var vs. fitted qqnorm(ranef(mod)$Ring[,1], main="normal QQ-plot, random effects", cex.main=0.8) qqline(ranef(mod)$Ring[,1]) # qq of random effects, Intercept qqnorm(ranef(mod)$Ring[,2], main="normal QQ-plot, random effects", cex.main=0.8) qqline(ranef(mod)$Ring[,2]) # qq of random effects, Age.z # drawing conclusions # 95% CrI of the parameter estimates using sim. set.seed(0470) # specify the seed (starting value for your random generator) nsim <- 2000 bsim <- sim(mod, n.sim=nsim) colnames(bsim@fixef) <- names(fixef(mod)) apply(bsim@fixef, 2, quantile, prob=c(0.025,0.975)) #The CrI for the interaction effect of 0.4 mm, on the original Age-scale round(quantile(bsim@fixef[,"Age.z:ImplantP"]/sd(dat$Age), prob=c(0.025,0.975)),3) #----------------------------------- #Figure 7.5 #------------------------------------- newdat <- expand.grid(Age=seq(20, 48, length=100), Implant=levels(dat$Implant)) newdat$Age.z <- (newdat$Age - mean(dat$Age))/sd(dat$Age) Xmat <- model.matrix(~ Age.z + Implant + Age.z:Implant, data=newdat) fitmat <- matrix(ncol=nsim, nrow=nrow(newdat)) for(i in 1:nsim) fitmat[,i] <- Xmat %*% bsim@fixef[i,] newdat$lower <- apply(fitmat, 1, quantile, prob=0.025) newdat$upper <- apply(fitmat, 1, quantile, prob=0.975) jpeg(filename = "../figures/Figure7-5_wing_fitted.jpg", width = 9000, height = 4500, units = "px", res=1200) par(mfrow=c(1,2), mar=c(5,1,1,0.5), oma=c(0,4,0,0)) plot(dat$Age.z, dat$Wing, col=c("orange", "blue")[as.numeric(dat$Implant)], pch=1, cex=0.8, las=1, xlab="Age (days)", ylab=NA, xaxt="n", xlim=c(-2.23, 1.91)) # plot using Age.z, but label x-Axis manually to represent the original scale at.x_orig <- seq(25,45,by=5) # values on the x-axis that we want to be labeled, on the original scale at.x <- (at.x_orig - mean(dat$Age))/sd(dat$Age) # corresponding transformed values (only works for a linear transformation such as "scale") axis(1, at=at.x, labels=at.x_orig) mtext("Wing length (mm)", side=2, outer=TRUE, line=2, cex=1.2, adj=0.6) abline(fixef(mod)[1], fixef(mod)[2], col="orange", lwd=2) abline(fixef(mod)[1]+fixef(mod)[3], fixef(mod)[2]+fixef(mod)[4], col="blue", lwd=2) for(i in 1:2){ index <- newdat$Implant==levels(newdat$Implant)[i] polygon(c(newdat$Age.z[index], rev(newdat$Age.z[index])), c(newdat$lower[index], rev(newdat$upper[index])), border=NA, col=c(rgb(1,0.65,0,0.5), rgb(0,0,1,0.5))[i]) } legend(x=-1.5,y=100, legend=c("corticosterone", "placebo"), pch=1, col=c("orange","blue"), bty = "n",lwd=1) # plot with individual specific regression lines plot(dat$Age.z, dat$Wing, col=c("orange", "blue")[as.numeric(dat$Implant)], pch=1, cex=0.8, las=1, xlab="Age (days)", ylab=NA, yaxt="n", xaxt="n") at.x_orig <- seq(25,45,by=5) at.x <- (at.x_orig - mean(dat$Age))/sd(dat$Age) axis(1, at=at.x, labels=at.x_orig) indtreat <- tapply(dat$Implant, dat$Ring, function(x) as.character(x[1])) for(i in 1:86){ if(indtreat[i]=="C") abline(fixef(mod)[1] + ranef(mod)$Ring[i,1], fixef(mod)[2] + ranef(mod)$Ring[i,2], col="orange") else abline(fixef(mod)[1] + fixef(mod)[3] + ranef(mod)$Ring[i,1], fixef(mod)[2] + fixef(mod)[4] + ranef(mod)$Ring[i,2], col="blue") } dev.off() # difference in wing length at day 45 (shortly before fleding) transf.45 <- (45-mean(dat$Age))/sd(dat$Age) # get the transformed value of Age==45 fixef(mod)[1] + fixef(mod)[2]*transf.45 - (fixef(mod)[1]+fixef(mod)[3] + (fixef(mod)[2]+fixef(mod)[4])*transf.45) # 8.7 mm shorter wings for Cort. #-------------------------------------------------------------------------------------------- #7.8 Nested and crossed random effects #-------------------------------------------------------------------------------------------- #----------------------------------------------------- #nested random effects #----------------------------------------------------- #we include the random intercept brood to our model data(cortbowl) dat <- cortbowl dat$days <- factor(dat$days, levels=c("before", "2", "20"))#define the reference levels (before = day of implantation) mod <- lmer(log(totCort) ~ Implant + days + Implant:days + (1|Brood) + (1|Ring), data=dat, REML=FALSE) # using ML mod # note that you can also specify directly your nested random effects mod <- lmer(log(totCort) ~ Implant + days + Implant:days + (1|Brood/Ring), data=dat, REML=FALSE) # using ML mod #model fit par(mfrow=c(2,2)) scatter.smooth(fitted(mod),resid(mod)); abline(h=0, lty=2) title("Tukey-Anscombe Plot") # residuals vs. fitted qqnorm(resid(mod), main="normal QQ-plot, residuals") # qq of residuals qqline(resid(mod)) scatter.smooth(fitted(mod), sqrt(abs(resid(mod)))) # res. var vs. fitted qqnorm(ranef(mod)$Brood[,1], main="normal QQ-plot, random effects", cex.main=0.8) qqline(ranef(mod)$Brood[,1]) # qq of random effects #----------------------------------------------------- # estimated coefficients with 95% CrI and P(b>0) for chapter 17 (how to present the results) #----------------------------------------------------- set.seed(0470) nsim <- 2000 bsim <- sim(mod, n.sim=nsim) round(fixef(mod), 2) round(apply(bsim@fixef, 2, quantile, prob=c(0.025, 0.975)), 2) round(apply(bsim@fixef, 2, function(x) sum(x>0)/length(x)), 2) # 95% CrI of variance parameters summary(mod) quantile(apply(bsim@ranef$Brood[,,1], 1, sd), prob=c(0.025, 0.975)) quantile(apply(bsim@ranef[[1]][,,1], 1, sd), prob=c(0.025, 0.975)) quantile(bsim@sigma, prob=c(0.025, 0.975)) # baseline corrected effect of corticosterone round(quantile(bsim@fixef[,5]-bsim@fixef[,2], prob=c(0.025, 0.975)), 2) mean(bsim@fixef[,5]-bsim@fixef[,2]) round(quantile(bsim@fixef[,6]-bsim@fixef[,2], prob=c(0.025, 0.975)), 2) mean(bsim@fixef[,6]-bsim@fixef[,2]) #----------------------------------------------------- # crossed random effects #----------------------------------------------------- data(ellenberg) ellenberg$gradient <- paste(ellenberg$Year, ellenberg$Soil) table(ellenberg$Species, ellenberg$gradient) ellenberg$water.z <- as.numeric(scale(ellenberg$Water)) mod <- lmer(log(Yi.g) ~ water.z + I(water.z^2) + (water.z + I(water.z^2)|Species) + (1|gradient), data=ellenberg) #model fit par(mfrow=c(2,2)) scatter.smooth(fitted(mod),resid(mod)); abline(h=0, lty=2) title("Tukey-Anscombe Plot") # residuals vs. fitted qqnorm(resid(mod), main="normal QQ-plot, residuals") # qq of residuals qqline(resid(mod)) scatter.smooth(fitted(mod), sqrt(abs(resid(mod)))) # res. var vs. fitted qqnorm(ranef(mod)$Species[,1], main="normal QQ-plot, random effects", cex.main=0.8) qqline(ranef(mod)$Species[,1]) # qq of random effects dev.off() library(lattice) xyplot(resid(mod)~Water|Species, data=ellenberg) #------------------------------------------ #Figure 7.7 #------------------------------------------ set.seed(0470) nsim <- 5000 bsim <- sim(mod, n.sim=nsim) newdat <- expand.grid(Water=seq(-5,140, length=100), Species= levels(ellenberg$Species)) newdat$water.z <- (newdat$Water - mean(ellenberg$Water))/sd(ellenberg$Water) Xmat <- model.matrix(~water.z + I(water.z^2), data=newdat) fitmat <- matrix(nrow=nrow(newdat), ncol=nsim) fitmatpop <- matrix(nrow=nrow(newdat), ncol=nsim) for(a in 1:6){ index <- newdat$Species==levels(ellenberg$Species)[a] for(i in 1:nsim){ b <- bsim@fixef[i,] + bsim@ranef$Species[i,a,] fitmat[index,i] <- Xmat[index,]%*%b fitmatpop[index,i] <- Xmat[index,]%*%bsim@fixef[i,] } } newdat$fit <- apply(fitmat, 1, mean) newdat$lower <- apply(fitmat, 1, quantile, prob=0.025) newdat$upper <- apply(fitmat, 1, quantile, prob=0.975) newdat$fitpop <- apply(fitmatpop, 1, mean) newdat$lowerpop <- apply(fitmatpop, 1, quantile, prob=0.025) newdat$upperpop <- apply(fitmatpop, 1, quantile, prob=0.975) colkey <- colorRampPalette(c("orange", "blue"))(6) jpeg(filename = "../figures/Figure7-7_ellenberg.jpg", width = 6000, height = 6000, units = "px", res=1200) par(mgp=c(2.5, 1, 0)) plot(ellenberg$Water, log(ellenberg$Yi.g), las=1, ylab=NA, type="n", xlab="Average distance to ground water (cm)", cex.lab=1.2) mtext(expression(paste("Above-ground biomass ", (g/m^2), " [log]")), side=2, line=2.5, cex=1.2) newdatpop <- newdat[1:100,] polygon(c(newdatpop$Water, rev(newdatpop$Water)), c(newdatpop$lowerpop, rev(newdatpop$upperpop)), border=NA, col=grey(0.6)) for(a in 1:6){ index <- ellenberg$Species==levels(ellenberg$Species)[a] points(ellenberg$Water[index], log(ellenberg$Yi.g[index]), col=colkey[a], lwd=2, pch=16, cex=0.6) } for(a in 1:6){ index <- newdat$Species==levels(ellenberg$Species)[a] lines(newdat$Water[index], newdat$fit[index], col=colkey[a], lwd=2) } lines(newdatpop$Water, newdatpop$fitpop, lwd=3) legend(-30, 9.4, bty="n", lwd=2, col=colkey, legend=levels(ellenberg$Species), horiz=TRUE, xpd=NA, cex=0.9) dev.off()