#------------------------------------------------------------------- # Chapter 10.2 Measures of explained variance #------------------------------------------------------------------- # Fränzi Korner-Nievergelt, Tobias Roth, Stefanie von Felten, Jérôme Guélat, # Bettina Almasi, Pius Korner-Nievergelt, 2015. Bayesian data analysis in ecology # using linear models with R, BUGS and Stan. Elsevier. # # Last modification: 3 October 2014 #------------------------------------------------------------------- library(arm) library(blmeco) #------------------------------------------------------------------- # Load data files #------------------------------------------------------------------- data(spermdepletion) # data from Anthes et al. (2014) dat <- spermdepletion str(dat) # 'data.frame': 264 obs. of 11 variables: # $ donor : slug individual # $ matingN : mating number (which mating it is, per slug) # $ totalsperm : total N sperm transfered # $ MeanPairSize : mean of the weight of the two slugs of the pair # $ RelativeDonorSize : relative size of donor # $ Dec_duration : duration of mating # sort table by donor and matingN dat <- dat[order(dat$donor, dat$matingN),] dat$donor <- as.factor(dat$donor) #------------------------------------------------------------------- # transformation of explanatory variables #------------------------------------------------------------------- hist(dat$totalsperm) # create, center and rename variables dat$sqrtmeanpairweight<-sqrt(dat$MeanPairSize) #SQRT of mean pair weight: better distribution than log or untransfomred (kurtosis & skew) # center variables: dat$weight.c <- dat$sqrtmeanpairweight - mean(dat$sqrtmeanpairweight) dat$relSize.c <- dat$RelativeDonorSize - mean(dat$RelativeDonorSize) dat$dur.c <- dat$Dec_duration - mean(dat$Dec_duration) dat$matN.c <- dat$matingN - mean(dat$matingN) #------------------------------------------------------------------- # fit the model #------------------------------------------------------------------- mod <- lmer(sqrt(totalsperm) ~ weight.c * matN.c + relSize.c + dur.c + (matN.c|donor), data=dat) # with random slopes + random intercept #------------------------------------------------------------------- # residual analysis (not exhaustive here) #------------------------------------------------------------------- qqnorm(resid(mod)) qqline(resid(mod)) mod #------------------------------------------------------------------- # measures of explained variances #------------------------------------------------------------------- # measurements on donor level: # weight.c # relSize.c # # measurements on measurement level (data level): # dur.c # matN.c nsim <- 2000 bsim <- sim(mod, n.sim=nsim) colnames(bsim@fixef) <- names(fixef(mod)) apply(fixef(bsim), 2, quantile, prob=c(0.025, 0.975)) # variance explained on data level Xmat <- model.matrix(mod) y.hat <- matrix(nrow=nrow(dat), ncol=nsim) resid.y <- matrix(nrow=nrow(dat), ncol=nsim) for(i in 1:nsim){ bsimi <- matrix(fixef(bsim)[i,], nrow=nrow(dat), ncol=length(fixef(mod)), byrow=TRUE) bsimi[,1] <- bsimi[,1] + ranef(bsim)$donor[i,match(dat$donor, levels(dat$donor)),1] bsimi[,3] <- bsimi[,3] + ranef(bsim)$donor[i,match(dat$donor, levels(dat$donor)),2] for(j in 1:nrow(dat)){ y.hat[j,i] <- Xmat[j,]%*%bsimi[j,] resid.y[j,i] <- sqrt(dat$totalsperm[j])-y.hat[j,i] } } 1-mean(apply(resid.y, 2, var))/var(sqrt(dat$totalsperm)) # according to Gelman & Pardoe (2006) # variance explained in intercept on donor-level datdonor <- aggregate(dat$weight.c, list(donor=dat$donor), mean) names(datdonor)[names(datdonor)=="x"] <- "weight.c" datdonor$matN.c <- aggregate(dat$matN.c, list(donor=dat$donor), mean)$x datdonor$relSize.c <- aggregate(dat$relSize.c, list(donor=dat$donor), mean)$x datdonor$dur.c <- aggregate(dat$dur.c, list(donor=dat$donor), mean)$x a.hat <- matrix(nrow=nrow(datdonor), ncol=nsim) Xmat <- model.matrix(~ weight.c * matN.c + relSize.c + dur.c, data=datdonor) for(i in 1:nsim){ a.hat[,i] <- Xmat%*%fixef(bsim)[i,] } 1-mean(apply(ranef(bsim)$donor[,,1], 1, var))/ mean(apply(t(a.hat)+ranef(bsim)$donor[,,1], 1, var)) # variance explained in slope on donor-level b.hat <- matrix(nrow=nrow(datdonor), ncol=nsim) for(i in 1:nsim){ b.hat[,i] <- fixef(bsim)[i,"matN.c"] + datdonor$weight.c*fixef(bsim)[i,"weight.c:matN.c"] } 1-mean(apply(ranef(bsim)$donor[,,2], 1, var))/ mean(apply(t(b.hat)+ranef(bsim)$donor[,,2], 1, var))