################################################################################
#  Solution of the exercise 3-1 on binomial models                             
#
# Last revised: Spring 2019
#
# Copyright © 2018 by Rodrigo Labouriau
################################################################################

#------------------------------------------------------------------------------#
# Ex3.1 One-way classification models                                          #
#------------------------------------------------------------------------------#

# Basic preparation 

# Loading and attaching the required data file (please insert the address
# of the directory where the file referred above is placed in your computer)
load("DataFramesStatisticalModelling.RData")

ls()

str(Ex3.1)
D <- Ex3.1

# Tabulating the data, we get 20 observations (petri dishes) per treatment
table(D$Treat)

#------------------------------------------------------------------------------#
# Item 1) a)
#------------------------------------------------------------------------------#

# Ploting the proportions for the levels of Treat

Ngerm <- tapply(D$Germ, D$Treat, sum)
Ngerm
(Ngerm <- tapply(D$Germ, D$Treat, sum))

(Nseeds <- tapply(D$total, D$Treat, sum) )

(prop <- Ngerm/Nseeds)

plot(D$Treat, D$Germ/D$total)
# Superposing the proportions in each treatment to the plot
points(1:6, prop, pch=19, col="yellow", cex=1.2)

#------------------------------------------------------------------------------#
# Item 2) b)
#------------------------------------------------------------------------------#

# Constructing a response matrix
resp <- cbind(D$Germ, D$total-D$Germ)
# print(resp)

# Fitting a model with one probability per level using the identity link
D$Treat <- factor(D$Treat)
fitB <- glm(cbind(D$Germ, D$total-D$Germ) ~ Treat + 0, 
            family=binomial(link="identity"), data = D)
coef(fitB)
prop

#------------------------------------------------------------------------------#
# Item 3) c)
#------------------------------------------------------------------------------#

# Fitting a logistic model with one probability per level of Treat
fitC <- glm(cbind(D$Germ, D$total-D$Germ) ~ Treat -1, 
            family=binomial(link="logit"), data = D)
coef(fitC)

# Converting the lodds to probabilities

lodds <- coef(fitC)
exp(lodds)/ (1+exp(lodds))
# Displaying the proportions again
prop

deviance(fitC)

deviance(fitB)

#------------------------------------------------------------------------------#
# Item d)
#------------------------------------------------------------------------------#

# Fitting a logistic model that attributes the same lodds for all the
# observations

fitD <- glm(cbind(D$Germ, D$total-D$Germ) ~ 1, 
            family=binomial(link="logit"), data = D)

coef(fitD)

# Converting the lodds above into a probability
exp(coef(fitD))/(1+exp(coef(fitD)))

deviance(fitD)

#------------------------------------------------------------------------------#
# Item 5) e)
#------------------------------------------------------------------------------#

deviance(fitC)
deviance(fitD)
delta.dev <- deviance(fitD)-deviance(fitC)  ; delta.dev
pchisq(delta.dev, df = 5, lower.tail = F)

# Or equivalently

anova(fitC, fitD, test="Chisq")

#------------------------------------------------------------------------------#
# Item 6) f)
#------------------------------------------------------------------------------#

# Testing homogeneity
 deviance(fitC) / (120-6)

 pchisq(deviance(fitC), df = 120-6, lower.tail = FALSE)

# Alternatively we might fit a saturated model 

D$Id <- factor(1:120)

sat <- glm(cbind(D$Germ, D$total-D$Germ) ~ Id, 
           family=binomial(link="logit"), data = D)
deviance(sat)
coef(sat)

summary(fitC)
anova(fitC, sat, test="Chisq")
 
#------------------------------------------------------------------------------#
# Aditional analysis (optional)
#------------------------------------------------------------------------------#

# Wald test for equality to the first level of Treat
fit4 <- glm(cbind(D$Germ, D$total-D$Germ) ~ Treat , 
            family=binomial(link="logit"), data = D)
summary(fit4)

# Calculating onfidence intervals
fit4 <- glm(cbind(D$Germ, D$total-D$Germ) ~ Treat + 0 , 
            family=binomial(link="logit"), data = D)
summary(fit4)

confint(fit4)

# Making posthoc pairwise comparisons (requires the package pairwiseComparisons)

# # Installing some useful packages:
# install.packages(pkgs = "http://home.math.au.dk/astatlab/software/pairwisecomparisons/sources/pairwiseComparisons_1.1.4.1.tar.gz",
#                  type = "source", repos = NULL)
# library(pairwiseComparisons)
# # Check if you have the required packages and install them if necessary
# InstallRequiredPackages()


library(pairwiseComparisons)
GG <- GroupClusterEffects(Model = fit4, EffectLabels = levels(D$Treat))

print(GG)
plot(GG)
barplot(GG)
lines(GG)
summary(GG)

# The next call (commented out) cleans everything from the environment, 
# i.e. removes all the objects listed by the call "ls()"

# rm(list = ls())

#                          THE END!