############################################################################### # # # Chapter 3 - BINOMIAL MODELS # # Two-ways binomial models # # # # R. Labouriau # # Last revision/edition: Spring 2023. # ############################################################################### ############################################################################### # Preparation # ############################################################################### # Loading the datasets required load("~/BSA/DataFramesStatisticalModelling.RData") # Setting the place where the output (graphs) wil be kept setwd("~/BSA") ################################################################################ # # # Analysis of the graminea polen proportion data # # # ################################################################################ # Copying the relevant data-frame in an easily referred data-frame D <- PollenTraps str(D) # Converting the factors (discrete explanatory variables classifying the # observations) into vectors of class "factor". D$Trap <- factor(D$Trap) D$Field <- factor(D$Field) D$Obs.num <- factor(D$Obs.num) str(D) # Making some basic descriptive analyses table(D$Trap, D$Field) prop <- tapply(D$n.gram, list(D$Trap,D$Field), sum)/ tapply(D$n.grains, list(D$Trap, D$Field), sum) prop # pdf("Figure-Ch3-004.pdf") plot(1:4, prop[1, ], col=1, pch=19, cex=2, type="b", ylim = c(0,1), xlab = "Field", ylab = "Proportion of Graminea") points(1:4, prop[2, ], col=2, pch=19, cex=2, type="b") legend(x = "bottomright", legend = c("Trap 1", "Trap 2"), col = c("black", "red"), pch = 19, bty = "n") # dev.off() Logit <- function(p) log(p/(1-p)) Lodds <- Logit(prop) Lodds # pdf("Figure-Ch3-005.pdf") plot(1:4, Lodds[1, ], col=1, pch=19, cex=2, type="b", ylim = range(Lodds), xlab = "Field", ylab = "Lodds of Graminea") points(1:4, Lodds[2, ], col=2, pch=19, cex=2, type="b") legend(x = "bottomright", legend = c("Trap 1", "Trap 2"), col = c("black", "red"), pch = 19, bty = "n") # dev.off() # Note the paralelism of the two "profiles" indicating that the differences of # the lodds of (any) pair of fields is the same for the two traps. # Preparing to adjust several binomial models resp <- cbind(D$n.gram, D$n.grains-D$n.gram) resp[1:10, ] # Fitting a saturated model which attributes one lodds for each # observation, which is equivalent to attribute one probability for # each observation. # Recall that lodd = log (p/(1-p)), where p is a probability. Saturated <- glm(resp~factor(Obs.num), family=binomial(link = "logit"), data = D) # Here the deviance of the Saturated model is, by construction 0, but due to # numerical approximations we get a very small positive number (not 0). deviance(Saturated) # Fitting an "effect modification model" that attributes one lodd for each # of the eight combinations of field (factor with 4 levels) and trap (factor # with 2 levels). EffectModification <- glm(resp~Trap:Field + 0, family=binomial(link = "logit"), data = D) # Examining the model summary(EffectModification) # Testing the reduction of the saturated model to the effect-modification # model, which is equivalent to test homogeneity (i.e. all the observations # arising from the same combination of field and trap have the same lodds (and # therefore the same probability). # Clearly, there is no evidence of heterogeneity (lack of homogeneity), since # the p-value is high. anova(EffectModification, Saturated, test = "Chisq") # Fitting an additive model that assumes that the effects of field and trap # (if any) act additively in the log-odds scale. That is, according to this # model the differences of the lodds of (any) pair of fields is the same for # the two traps. Additive <- glm(resp ~ Field + Trap + 0, family=binomial(link = "logit"), data = D) # Displaying the basic description of the additive model. Note the # parametrisation used. The first four parameters are the lodds of the # four fields (respectively) for trap 1, the fith parameter is the value that # should be added to the previous parameters to obtain the lodds of the four # fields when the determinations are made with the trap 2. summary(Additive) cbind(coef(Additive), confint(Additive)) # (Graphically) comparing the lodds attributed with the additive and the # effect modification model. Almost no differences are observed. plot(fitted(EffectModification), fitted(Additive), pch = 19) abline(0,1) # Testing additivity by testing the reduction of the effect modification model # to the additive model (via the likelihood ratio test). No evidence of lack # of additivity are found (large p-value). anova(Additive, EffectModification, test = "Chisq") # Testing whether there are differences of the lodds between the traps # (under an additive model, i.e., when controlling for possible differences # between the lodds of different fields). To do so, we define a model # containing an effect of the field (but not of the trap) and compare such # model with the additive model. We find a strong evidence that the lodds # obtained with the diffent traps differ (when controlling for possible # effects of field) since we obtain a very small p-value. NoTrap <- glm(resp ~ Field + 0, family=binomial(link = "logit"), data = D) summary(NoTrap) anova(NoTrap, Additive, test = "Chisq") # Testing whether there are differences of the lodds between the fields # (under an additive model, i.e., when controlling for possible differences # between the lodds of different traps). The analogous technique of model # reduction is used. We find a strong evidence that the lodds # obtained with the diffent fields differ (when controlling for possible # effects of trap). NoField <- glm(resp ~ Trap + 0, family=binomial(link = "logit"), data = D) summary(NoField) anova(NoField, Additive, test = "Chisq") # Displaying the results graphically. plot(1:4, Lodds[1, ], col = 1, pch = 8, cex = 2, type = "b", lty = 2, ylim = range(Lodds), xlab = "Field", ylab = "Lodds of Graminea") points(1:4, Lodds[2, ], col = 2, pch = 8, cex=2, type="b", lty = 2) points(1:4, coef(Additive)[1:4], col = 1, pch = 19, cex=1, type="b", lty = 1) points(1:4, coef(Additive)[1:4] + coef(Additive)[5], col = 2, pch = 19, cex=1, type="b", lty = 1) legend(x = "bottomright", legend = c("Trap 1, sample lodds", "Trap 2, sample lodds", "Trap 1, additive model", "Trap 2, additive model"), col = c("black", "red", "black", "red"), pch = c(19, 19, 8, 8), lty = c(2, 2, 1, 1), bty = "n") # Making some residual analysis Raw_Residuals <- residuals(Additive, "response") Pearson_Residuals <- residuals(Additive, "pearson") Fitted <- fitted(Additive) Opar <- par(mfrow = c(2,1)) plot(Fitted, Raw_Residuals) plot(Fitted, Pearson_Residuals) par(Opar) # Adding a small noise to the fitted values to disantangle superposed points # in the graphs set.seed(243) Noise <- runif(n = length(Raw_Residuals), min = -0.01, max = 0.01) Opar <- par(mfrow = c(2,1)) plot(Fitted + Noise, Raw_Residuals, ylab = "Raw-residuals", xlab = "Fitted Values + Small noise") abline(h = 0) plot(Fitted+ Noise, Pearson_Residuals, ylab = "Pearson-residuals", xlab = "Fitted Values + Small noise") abline(h = 0) par(Opar) Vars <- tapply(Raw_Residuals, Fitted, var) Fitt <- as.numeric(levels(factor(Fitted))) plot(Fitt, Vars, type = "b", ylab = "Variance of the Raw residuals", xlab = "Fitted Value") ################################################################################ # THE END ! # ################################################################################