################################################################################ # Ex4.4 Poisson two-ways model # Number of worms in three types of soil determined with two methods # # Last revised: Spring 2020 # # Copyright © 2020 by Rodrigo Labouriau ################################################################################ load("DataFramesStatisticalModelling.RData") D <- Ex4.4 #------------------------------------------------------------------------------# # Exploratory analysis # #------------------------------------------------------------------------------# str(Ex4.4) table(D$Soil,D$Method) # Studing the relation between the mean and the two factors mmean <- tapply (D$Worms, list(D$Soil, D$Method), mean) ; mmean plot(1:3, mmean[,1], type='b', pch='1', ylim=range(mmean), ylab='Mean number of worms', xlab='Soil' , col='red' ) points(1:3, mmean[,2], type='b', pch='2', col='blue') legend(x= "topleft", legend=c('Method 1','Method 2'), col=c('red','blue'), pch=c('1','2'), bty='n') plot(1:3, log(mmean[,1]), type='b', pch='1', ylim=range(log(mmean)), ylab='log(Mean number of worms)', xlab='Soil' , col='red' ) points(1:3, log(mmean[,2]), type='b', pch='2', col='blue') legend(x = "topleft", legend=c('Method 1','Method 2'), col=c('red','blue'), pch=c('1','2'), bty='n') # Studying the relationship between the mean and the variance vvar <- tapply (D$Worms, list(D$Soil, D$Method), var) ; vvar plot(as.numeric(mmean), as.numeric(vvar)) abline(lm(as.numeric(vvar)~as.numeric(mmean))) summary(lm(as.numeric(vvar)~as.numeric(mmean))) # -----------------------------------------------------------------------------# # Fiting several Poisson models # # -----------------------------------------------------------------------------# # Model with interaction D$Soil <- factor(D$Soil); D$Method <- factor(D$Method) interaction.model <- glm(Worms ~ Soil:Method + 0, family=poisson(link='log'), data = D) summary(interaction.model) # Displaying the estimates in the log scale (due to the link function used) cbind("Estimated log Counts" = coef(interaction.model), confint(interaction.model)) # Displaying the estimates in the natural scale. # Here I back transformed both the estimates and the confidence intervals cbind("Estimated Counts" = exp(coef(interaction.model)), exp(confint(interaction.model))) # Testing homogeneity deviance(interaction.model) deviance(interaction.model)/(length(D$Worms)-6)#This ratio should be close to 1 pchisq(deviance(interaction.model), df=length(D$Worms)-6, lower.tail = FALSE) set.seed(10) Noise <- runif(length(D$Worms), min = -1/2, max = 1/2) plot(fitted.values(interaction.model) + Noise, residuals(interaction.model,'response'), ylab='Raw residuals', xlab='Fitted values', main='Interaction model' ) plot(fitted.values(interaction.model) + Noise, residuals(interaction.model,'pearson'), ylab='Pearson residuals', xlab='Fitted values', main='Interaction model' ) # Additive model additive.model <- glm(Worms~ Soil + Method + 0, family=poisson, data = D) summary(additive.model) anova(additive.model, interaction.model, test='Chisq') ## Additional (not necessary) test. deviance(additive.model) pchisq(deviance(additive.model), df=length(D$Worms)-4, lower.tail = FALSE) plot(fitted.values(additive.model) + Noise, residuals(additive.model,'response'), ylab='Raw residuals', xlab='Fitted values', main='Additive model' ) plot(fitted.values(additive.model) + Noise, residuals(additive.model,'pearson'), ylab='Pearson residuals', xlab='Fitted values', main='Additive model' ) # Models without soil effect, method effect and null model NO.soil <- glm(Worms ~ Method, family=poisson, data = D) anova(NO.soil, additive.model, test='Chisq') NO.method <- glm(Worms ~ Soil, family=poisson, data = D) anova(NO.method, additive.model, test='Chisq') null.model <- glm(Worms ~ 1, family=poisson, data = D) anova(null.model, additive.model, test='Chisq') summary(additive.model) # -----------------------------------------------------------------------------# # Bonus question (THIS IS OPTIONAL): Look at the analysis performed below, # # where I use an 'identity' link function. # # Differently from the previous analysis,I get then a significant interaction! # # Using an identity link yields a more complex 'final model', a model with # # an interaction. Discuss that in terms of the initial plots relating the mean # # (and the logarithm of the mean) to the type of soil and the method used. # # -----------------------------------------------------------------------------# interaction.model.id <- glm(Worms ~ Soil:Method + 0, family=poisson(link='identity'), data = D) additive.model.id <- glm(Worms~ Soil + Method + 0, family=poisson(link='identity'), data = D) anova(additive.model.id, interaction.model.id, test='Chisq') # -----------------------------------------------------------------------------# # # THE END # # -----------------------------------------------------------------------------#