# solutions to exercises chapter 4 #------------------------------------------------------------------------------ # exercise 1 # model: # y_i = b_0 + b_1 * x_i + e_i # e_i ~ Norm(0, sigma2) # data simulation-------------------------------------------------------------- set.seed(34) # set a seed for the random number generator n <- 50 # sample size sigma <- 5 # standard deviation of the residuals b0 <- 2 # intercept b1 <- 0.7 # slope x <- runif(n, 10, 30) # sample values of the covariate simresid <- rnorm(n, 0, sd=sigma) y <- b0 + b1*x + simresid # end of data simulation------------------------------------------------------- # fit the regression model mod <- lm(y~x) mod summary(mod)$sigma # Draw inference from the regression using Bayesian methods library(arm) nsim <- 1000 bsim <- sim(mod, n.sim=nsim) str(bsim) head(bsim@coef) hist(coef(bsim)[,2], freq=FALSE) # probability that the slope of the regression line # is between 0.5 and 1 sum(coef(bsim)[,2]<1 & coef(bsim)[,2]>0.5)/nsim # Exercise 2) library(blmeco) data(periparusater) dat <- periparusater str(dat) plot(wing~P8, data=dat) mod <- lm(wing~P8, data=dat) par(mfrow=c(2,2)) plot(mod) plot(resid(mod)~dat$country) plot(resid(mod)~dat$age) plot(resid(mod)~dat$sex) plot(resid(mod)~dat$weight) acf(resid(mod)) # observation 22 seems to have a large residual # I do not see a reason for this, the leverage is small # -> just let it in nsim <- 2000 bsim <- sim(mod, n.sim=nsim) plot(wing~P8, data=dat) abline(mod) # draw credible interval newx <- seq(45, 53, length=100) fitmat <- matrix(ncol=nsim, nrow=length(newx)) modelmatrix <- model.matrix(~newx) for(i in 1:nsim) fitmat[,i] <- modelmatrix%*%bsim@coef[i,] lines(newx, apply(fitmat, 1, quantile, prob=0.025), lty=3) lines(newx, apply(fitmat, 1, quantile, prob=0.975), lty=3) # draw prediction interval newy <- matrix(ncol=nsim, nrow=length(newx)) for(i in 1:nsim) newy[,i] <- rnorm(length(newx), mean=fitmat[,i], sd=bsim@sigma[i]) lines(newx, apply(newy, 1, quantile, prob=0.025), lty=3, col="blue") lines(newx, apply(newy, 1, quantile, prob=0.975), lty=3, col="blue") # Exercise 4 data(ellenberg) dat <- ellenberg # select species Ap and Dg str(dat) dat <- dat[is.element(dat$Species, c("Ap", "Dg")),] dat$Species <- factor(dat$Species) mod <- lm(log(Yi.g)~Species + Water + I(Water^2) + Species:Water + Species:I(Water^2), data=dat) plot(mod) mod summary(mod)$sigma bsim <- sim(mod, n.sim=nsim) colnames(bsim@coef) <- names(coef(mod)) # name the columns range(dat$Water) newdat <- expand.grid(Water=seq(-5, 140, length=100), Species=levels(dat$Species)) X <- model.matrix(~Species + Water + I(Water^2) + Species:Water + Species:I(Water^2), data=newdat) newdat$fit <- X%*%coef(mod) fitmat <- matrix(ncol=nsim, nrow=nrow(newdat)) for(i in 1:nsim) fitmat[,i] <- X%*%bsim@coef[i,] newdat$lower <- apply(fitmat, 1, quantile, prob=0.025) newdat$upper <- apply(fitmat, 1, quantile, prob=0.975) plot(dat$Water, log(dat$Yi.g), col=c("orange", "blue")[as.numeric(dat$Species)]) index <- newdat$Species == "Ap" lines(newdat$Water[index],newdat$fit[index], lwd=2, col="orange") lines(newdat$Water[index],newdat$lower[index], lwd=1, col="orange") lines(newdat$Water[index],newdat$upper[index], lwd=1, col="orange") index <- newdat$Species == "Dg" lines(newdat$Water[index],newdat$fit[index], lwd=2, col="blue") lines(newdat$Water[index],newdat$lower[index], lwd=1, col="blue") lines(newdat$Water[index],newdat$upper[index], lwd=1, col="blue")