# Chapter 11: Model selection and multimodel inference #------------------------------------------------------------------------------- # Fränzi Korner-Nievergelt, Tobias Roth, Stefanie von Felten, Jérôme Guélat, # Bettina Almasi, Pius Korner-Nievergelt, 2015. Bayesian data analysis in ecology # using linear models with R, BUGS and Stan. Elsevier. # # Last modification: 23 April 2014 #------------------------------------------------------------------------------- # load data #------------------------------------------------------------------------------- library(blmeco) data(pondfrog1) #------------------------------------------------------------------------------- # fit models #------------------------------------------------------------------------------- # models among which we would like to find the one best suited to # predict the number of frogs in new ponds. mod1 <- glm(frog ~ ph + waterdepth + temp, data=pondfrog1, family=poisson) mod2 <- glm(frog ~ + waterdepth + temp, data=pondfrog1, family=poisson) mod3 <- glm(frog ~ ph + + temp, data=pondfrog1, family=poisson) mod4 <- glm(frog ~ ph + waterdepth , data=pondfrog1, family=poisson) mod5 <- glm(frog ~ ph , data=pondfrog1, family=poisson) mod6 <- glm(frog ~ waterdepth , data=pondfrog1, family=poisson) mod7 <- glm(frog ~ temp, data=pondfrog1, family=poisson) mod8 <- glm(frog ~ 1 , data=pondfrog1, family=poisson) # residual analysis par(mfrow=c(2,2)) plot(mod1) acf(resid(mod1)) #------------------------------------------------------------------------------- # 11.2.1 Bayesian leave-one-out cross validation #------------------------------------------------------------------------------- set.seed(246464) loo.cv(mod1, bias.corr=TRUE) # increase nsim (default is 100; but needs strong computing power!) loo.cv(mod2, bias.corr=TRUE) loo.cv(mod3, bias.corr=TRUE) loo.cv(mod4, bias.corr=TRUE) loo.cv(mod5, bias.corr=TRUE) loo.cv(mod6, bias.corr=TRUE) loo.cv(mod7, bias.corr=TRUE) loo.cv(mod8, bias.corr=FALSE, nsim=1000) #-------------------------------------------------------------------------- # 11.2.2 Information criteria #------------------------------------------------------------------------------- # AICc AICc(mod1, mod2, mod3, mod4, mod5, mod6, mod7, mod8) AICweights(c("mod1", "mod2", "mod3", "mod4", "mod5", "mod6", "mod7", "mod8")) # WAIC #------------------------------------------------------------------------------- WAIC(mod1, nsim=1000) WAIC(mod2, nsim=1000)$WAIC2 WAIC(mod3, nsim=1000)$WAIC2 WAIC(mod4, nsim=1000)$WAIC2 WAIC(mod5, nsim=1000)$WAIC2 WAIC(mod6, nsim=1000)$WAIC2 WAIC(mod7, nsim=1000)$WAIC2 WAIC(mod8, nsim=1000)$WAIC2 #------------------------------------------------------------------------- # 11.2.6 LASSO #------------------------------------------------------------------------- library(lasso2) # z-transformation pondfrog1$ph.z <- as.numeric(scale(pondfrog1$ph)) pondfrog1$waterdepth.z <- as.numeric(scale(pondfrog1$waterdepth)) pondfrog1$temp.z <- as.numeric(scale(pondfrog1$temp)) pondfrog1$frog.z <- as.numeric(scale(log(pondfrog1$frog+1))) par(mfrow=c(2,2)) plot(lm(frog.z ~ ph.z + waterdepth.z + temp.z, data=pondfrog1)) mod <- l1ce(frog.z ~ ph.z + waterdepth.z + temp.z, data=pondfrog1, bound=1:50/50) plotlasso <- plot(mod) mod <- l1ce(frog.z ~ ph.z + waterdepth.z + temp.z, data=pondfrog1, bound=1) jpeg(filename = "../figures/Figure11-1_lasso.jpg", width = 6000, height = 4000, units = "px", res=1200) par(mar=c(5,5,1,1)) matplot(plotlasso$bound[,"rel.bound"], plotlasso$mat[,-1], type="l", lwd=2, xlim=c(0, 1.4), ylim=c(-0.55,0.95), xlab="Constraint", ylab="Coefficient", col=c(1, "orange", "blue"), las=1, xaxt="n") axis(1,seq(0,1,by=0.2)) text(cbind(1.03, coef(mod[50])[-1]), labels(mod), adj=0) dev.off() vignette("lasso2") #------------------------------------------------------------------------------- # 11.3 Multimodel inference #------------------------------------------------------------------------------- library(BMS) # create the data-matrix with the dependent variable in the first column and all other columns containing numeric values pondfrog1$y <- log(pondfrog1$frog+1) X <- pondfrog1[, c("y", "ph", "waterdepth", "temp")] bmsmod <- bms(X, mprior="uniform", g="UIP") # posterior model probability of the top 5 (of 8) models topmodels.bma(bmsmod)[,1:5] # how do the posterior model probabilities change with different priors bmsmod1 <- bms(X, mprior="fixed", g="UIP", mprior.size = 1) topmodels.bma(bmsmod1) bmsmod2 <- bms(X, mprior="fixed", g="UIP", mprior.size = 2) bmsmod3 <- bms(X, mprior="pip", g="UIP", mprior.size = c(0.9, 0.5, 0.5)) #------------------------------------------------------------------------------- # averaging model predictions using BMS and sim library(arm) mod <- lm(y~ph + waterdepth + temp, pondfrog1) allmods <- topmodels.bma(bmsmod) modweights <- allmods["PMP (Exact)",] # extract model weights newdat <- data.frame(ph=7.2, waterdepth=4.2, temp=7.5) nsim <- 2000 nsimpm <- round(nsim*modweights) predmat <- matrix(nrow=nrow(newdat), ncol=sum(nsimpm)) predictivemat <- matrix(nrow=nrow(newdat), ncol=sum(nsimpm)) for(i in 1:length(nsimpm)){ nsimm <- nsimpm[i] if(nsimm==0) next termsinmodi <- rownames(allmods)[c(allmods[1:(nrow(allmods)-2),i], 0,0)==1] if(length(termsinmodi)<1) termsinmodi <- "1" fmla <- as.formula(paste("y ~ ", paste(termsinmodi, collapse= "+"))) modi <- lm(fmla, data=pondfrog1) bsim <- sim(modi, n.sim=nsimpm[i]) Xmat <- model.matrix(fmla[c(1,3)], data=newdat) for(r in 1:nsimpm[i]){ predmat[,c(0, cumsum(nsimpm))[i]+r] <- Xmat%*%bsim@coef[r,] predictivemat[,c(0, cumsum(nsimpm))[i]+r] <- rnorm(1, predmat[,c(0, cumsum(nsimpm))[i]+r], bsim@sigma[r]) } } newdat$pred <- exp(apply(predmat, 1, mean))-1 newdat$lower <- exp(apply(predmat, 1, quantile, prob=0.025))-1 newdat$upper <- exp(apply(predmat, 1, quantile, prob=0.975))-1 newdat$predictive.lower <- exp(apply(predictivemat, 1, quantile, prob=0.025))-1 newdat$predictive.upper <- exp(apply(predictivemat, 1, quantile, prob=0.975))-1 newdat