###############################################################################
#                                                                             #
# Chapter 3 - BINOMIAL MODELS                                                 #
# Two-ways binomial models                                                    #
#                                                                             #
# R. Labouriau                                                                #
# Last revision/edition: Spring 2023.                                          #
###############################################################################

###############################################################################
# Preparation                                                                 #
###############################################################################

# Loading the datasets required
load("~/BSA/DataFramesStatisticalModelling.RData")

# Setting the place where the output (graphs) wil be kept
setwd("~/BSA")

################################################################################
#                                                                              #
# Analysis of the graminea polen proportion data                               #
#                                                                              #
################################################################################

# Copying the relevant data-frame in an easily referred data-frame

D <- PollenTraps

str(D)

# Converting the factors (discrete explanatory variables classifying the 
# observations) into vectors of class "factor".

D$Trap <- factor(D$Trap)
D$Field <- factor(D$Field)
D$Obs.num <- factor(D$Obs.num)

str(D)

# Making some basic descriptive analyses

table(D$Trap, D$Field)

prop <- tapply(D$n.gram, list(D$Trap,D$Field), sum)/
  tapply(D$n.grains, list(D$Trap, D$Field), sum)

prop

# pdf("Figure-Ch3-004.pdf")
plot(1:4, prop[1, ], col=1, pch=19, cex=2, type="b", ylim = c(0,1),
     xlab = "Field", ylab = "Proportion of Graminea")
points(1:4, prop[2, ], col=2, pch=19, cex=2, type="b")

legend(x = "bottomright", 
       legend = c("Trap 1", "Trap 2"),
       col = c("black", "red"),
       pch = 19,
       bty = "n")
# dev.off()


Logit <- function(p) log(p/(1-p))

Lodds <- Logit(prop)
Lodds

# pdf("Figure-Ch3-005.pdf")
plot(1:4, Lodds[1, ], col=1, pch=19, cex=2, type="b", ylim = range(Lodds),
     xlab = "Field", ylab = "Lodds of Graminea")
points(1:4, Lodds[2, ], col=2, pch=19, cex=2, type="b")

legend(x = "bottomright", 
       legend = c("Trap 1", "Trap 2"),
       col = c("black", "red"),
       pch = 19,
       bty = "n")
# dev.off()

# Note the paralelism of the two "profiles" indicating that the differences of
# the lodds of (any) pair of fields is the same for the two traps.

# Preparing to adjust several binomial models 

resp <- cbind(D$n.gram, D$n.grains-D$n.gram)

resp[1:10, ]

# Fitting a saturated model which attributes one lodds for each 
# observation, which is equivalent to attribute one probability for
# each observation.

# Recall that lodd = log (p/(1-p)), where p is a probability.

Saturated <- glm(resp~factor(Obs.num), 
                 family=binomial(link = "logit"), data = D)

# Here the deviance of the Saturated model is, by construction 0, but due to
# numerical approximations we get a very small positive number (not 0).

deviance(Saturated)

# Fitting an "effect modification model" that attributes one lodd for each 
# of the eight combinations of field (factor with 4 levels) and trap (factor
# with 2 levels).

EffectModification <- glm(resp~Trap:Field + 0, 
                          family=binomial(link = "logit"), data = D)

# Examining the model

summary(EffectModification)

# Testing the reduction of the saturated model to the effect-modification
# model, which is equivalent to test homogeneity (i.e. all the observations 
# arising from the same combination of field and trap have the same lodds (and
# therefore the same probability). 
# Clearly, there is no evidence of heterogeneity (lack of homogeneity), since 
# the p-value is high.

anova(EffectModification, Saturated, test = "Chisq")

# Fitting an additive model that assumes that the effects of field and trap
# (if any) act additively in the log-odds scale. That is, according to this
# model the differences of the lodds of (any) pair of fields is the same for 
# the two traps.

Additive <- glm(resp ~ Field + Trap + 0, family=binomial(link = "logit"), 
                data = D) 

# Displaying the basic description of the additive model. Note the 
# parametrisation used. The first four parameters are the lodds of the 
# four fields (respectively) for trap 1, the fith parameter is the value that
# should be added to the previous parameters to obtain the lodds of the four
# fields when the determinations are made with the trap 2.

summary(Additive)

cbind(coef(Additive), confint(Additive))

# (Graphically) comparing the lodds attributed with the additive and the
# effect modification model. Almost no differences are observed.
plot(fitted(EffectModification), fitted(Additive), pch = 19)
abline(0,1)

# Testing additivity by testing the reduction of the effect modification model
# to the additive model (via the likelihood ratio test). No evidence of lack
# of additivity are found (large p-value).

anova(Additive, EffectModification, test = "Chisq")

# Testing whether there are differences of the lodds between the traps 
# (under an additive model, i.e., when controlling for possible differences 
# between the lodds of different fields). To do so, we define a model
# containing an effect of the field (but not of the trap) and compare such
# model with the additive model. We find a strong evidence that the lodds
# obtained with the diffent traps differ (when controlling for possible
# effects of field) since we obtain a very small p-value.

NoTrap <- glm(resp ~ Field  + 0, family=binomial(link = "logit"), 
              data = D) 

summary(NoTrap)

anova(NoTrap, Additive, test = "Chisq")

# Testing whether there are differences of the lodds between the fields 
# (under an additive model, i.e., when controlling for possible differences 
# between the lodds of different traps). The analogous technique of model
# reduction is used. We find a strong evidence that the lodds
# obtained with the diffent fields differ (when controlling for possible
# effects of trap).

NoField <- glm(resp ~ Trap  + 0, family=binomial(link = "logit"), 
               data = D) 

summary(NoField)

anova(NoField, Additive, test = "Chisq")

# Displaying the results graphically.

plot(1:4, Lodds[1, ], col = 1, pch = 8, cex = 2, type = "b",  lty = 2,
     ylim = range(Lodds), xlab = "Field", ylab = "Lodds of Graminea")
points(1:4, Lodds[2, ], col = 2, pch = 8, cex=2, type="b", lty = 2)

points(1:4, coef(Additive)[1:4], col = 1, pch = 19, cex=1, type="b", lty = 1)
points(1:4, coef(Additive)[1:4] + coef(Additive)[5], 
       col = 2, pch = 19, cex=1, type="b", lty = 1)

legend(x = "bottomright", 
       legend = c("Trap 1, sample lodds", "Trap 2, sample lodds",
                  "Trap 1, additive model", "Trap 2, additive model"),
       col = c("black", "red", "black", "red"),
       pch = c(19, 19, 8, 8),
       lty = c(2, 2, 1, 1),
       bty = "n")

# Making some residual analysis

Raw_Residuals <- residuals(Additive, "response")
Pearson_Residuals <- residuals(Additive, "pearson")
Fitted <- fitted(Additive)

Opar <- par(mfrow = c(2,1))
plot(Fitted, Raw_Residuals)
plot(Fitted, Pearson_Residuals)
par(Opar)

# Adding a small noise to the fitted values to disantangle superposed points
# in the graphs

set.seed(243)
Noise <- runif(n = length(Raw_Residuals), min = -0.01, max = 0.01)

Opar <- par(mfrow = c(2,1))
plot(Fitted + Noise, Raw_Residuals,
     ylab = "Raw-residuals", xlab = "Fitted Values + Small noise")
abline(h = 0)
plot(Fitted+ Noise, Pearson_Residuals,
     ylab = "Pearson-residuals", xlab = "Fitted Values + Small noise")
abline(h = 0)
par(Opar)

Vars <- tapply(Raw_Residuals, Fitted, var)
Fitt <- as.numeric(levels(factor(Fitted)))
plot(Fitt, Vars, type = "b", 
     ylab = "Variance of the Raw residuals",
     xlab = "Fitted Value")


################################################################################

#                       THE END !                                              #

################################################################################