### Soc 6708 ### Advanced Statistics ### Robert Andersen, University of Toronto ######################################## ## Mixed Models ######################################## ########################################### #1. Multilevel Modeling ########################################### #Example from Snijders and Bosker (1999:28) plot(X, Y, xlim=c(0,8), ylim=c(0,8), xlab="X", ylab="Y", type="n") abline(5.33, -.033, lty=1, lwd=3, col=1) abline(8,-1, lty=2, lwd=3, col=2) lm(y1~x1) abline(4,1, lty=3, col=3) lm(y2~x2) abline(2,1, lty=3, col=3) lm(y3~x3) abline(0,1, lty=3, col=3) lm(y4~x4) abline(-2,1, lty=3, col=3) lm(y5~x5) abline(-4,1, lty=3, col=3) points(x1,y1, cex=2, pch=1) points(x2,y2, cex=2, pch=2) points(x3,y3, cex=2, pch=3) points(x4,y4, cex=2, pch=4) points(x5,y5, cex=2, pch=5) legend(locator(1), lty=1:3, lwd=c(3,3,1), col=1:3, legend=c('Total regression', 'Regression between groups', 'Regressions within groups')) title('Adapted from Snijders and Bosker (1999)') #################################### # British context example #################################### library(nlme)#Neceassary for Mixed Models library(foreign) Context<-read.spss("c:/teaching/ICPSR/data/context.sav", use.value.labels=TRUE, to.data.frame=TRUE) attach(Context) summary(Context) #Centering the explanatory variables to make estimates #more stable interpretation of intercept more sensible-- #they become the group means Context\$INCOME2<-Context\$INCOME-mean(Context\$INCOME) Context\$PROFMAN2<-Context\$PROFMAN-mean(Context\$PROFMAN) detach(Context) attach(Context) #Sampling the Level 2 units to explore the data more #easily (i.e., too many units) #PANO is level two grouping variable sample30<-sample(unique(PANO), 30) sample.new<-groupedData(LRSCALE~INCOME2|PANO, data=Context[is.element(PANO, sample30),]) sample.new<-groupedData(LRSCALE~INCOME2|PANO,data=sample.new) Context2<-groupedData(LRSCALE~INCOME2|PANO, data=sample.new) library(car) coplot(LRSCALE~INCOME2|PANO, panel=panel.car,span=1, data=Context2) #Individual models for each unit of level 2 (PANO) #All units of level 2(PANO) Mod1<-lmList(LRSCALE~INCOME2|PANO, data=Context) summary(Mod1) library(lattice) plot(intervals(Mod1)) #Sample of 30 units Mod.sample<-lmList(LRSCALE~INCOME2|PANO, data=sample.new) summary(Mod.sample)#returns estimates for every level 2 unit plot(intervals(Mod.sample)) #The Intercepts seem to differ, but little evidence of #differences in slope Mod2<-lme(LRSCALE~INCOME2, random=~INCOME2|PANO, data=Context) summary(Mod2) #Random intercept and random slope Mod.slope<-lme(LRSCALE~PROFMAN2+AGE+SEX+DEGREE+INCOME2, random=~INCOME2|PANO, data=Context) #Random intercept model Mod.intercept<-lme(LRSCALE~PROFMAN2+AGE+SEX+DEGREE+INCOME2, random=~1|PANO, data=Context) #OLS model Mod.ols<-lm(LRSCALE~PROFMAN2+AGE+SEX+DEGREE+INCOME2, data=Context) anova(Mod.slope, Mod.ols) #Significant level 2 variation anova(Mod.slope, Mod.intercept) #Random slope model not statistically significant anova(Mod.intercept, Mod.ols) #Testing to see how much addition of #PROFMAN (level 2 variable) matters Mod.intercept2<-lme(LRSCALE~AGE+SEX+DEGREE+INCOME2, random=~1|PANO, data=Context) Mod.intercept2 Mod.intercept #Some of the variation in intercept is accounted for #DIAGNOSTICS for Final Plot(Mod.intercept) # Residual plots #Fitted values and standardized residuals plot(Mod.intercept) #Boxplots of residuals by level 2 (PANO) plot(Mod.intercept, PANO~resid(.), abline=0) #Normality for within group errors assessed #using normal probability plot of residuals qqnorm(Mod.intercept) #Plot of fitted values and residuals plot(Mod.intercept, LRSCALE~fitted(.)) #Plot of fitted values and residuals within level 2 plot(Mod.intercept, resid(., type="p")~fitted(.)|PANO) #Reporting the final model summary(Mod.intercept) #Confidence interval intervals(Mod.intercept, which="var-cov") ####################################### #2. Longitudinal data and mixed models ####################################### detach(Context) library(nlme) library(car) data(Blackmoor) #Taking the log of exercise linearizes the relationships Blackmoor\$log.exercise<-log(Blackmoor\$exercise+1) attach(Blackmoor) age2<-age-mean(age) model.panel<-lme(log.exercise~age2*group, random=~age2|subject, data=Blackmoor, method="ML") summary(model.panel) intervals(model.panel, which="var-cov") #Checking for autocorrelated errors model.cor<-update(model.panel, correlation=corCAR1 ( form = ~age2|subject)) #Comparing the non correlated and autocorrelated models anova(model.panel, model.cor) summary(model.panel) intervals(model.panel, which="var-cov") ###Random intercept model for GLMS detach(Blackmoor) library(MASS) library(foreign) Context<-read.spss("c:/teaching/ICPSR/data/context.sav", use.value.labels=TRUE, to.data.frame=TRUE) attach(Context) summary(Context) library(lme4) mod.degree<-lmer(DEGREE ~ SEX + REGION+ AGE+(1 | PANO), family = binomial, data = Context) summary(mod.degree) intervals(mod.degree, which="var-cov")