###############################################################################
#                                                                             #
# Chapter 3 - BINOMIAL MODELS                                                 #
# One-way binomial models                                                     #
#                                                                             #
# R. Labouriau                                                                #
# Last revision/edition: Spring 2023.                                          #
###############################################################################

###############################################################################
# Preparation                                                                 #
###############################################################################
# Setting the place where the output (graphs) wil be kept
setwd("~/BSA")

load("DataFramesStatisticalModelling.RData")

################################################################################
# Example Radamachera                                                          #
# Logistic regression                                                          #
################################################################################

str(Ch3.Radamachera.simple)
attach(Ch3.Radamachera.simple)

# Ploting the proportions
# pdf(file="Figure-Ch3-61.pdf")
plot(dose, abscised/nplants, cex=1.5 , xlab="Dose", ylab="Proportion abscised",
     pch=19, col='blue')
prop <- tapply(abscised, factor(dose),sum)/ tapply(nplants, factor(dose),sum)
prop
points(levels(factor(dose)), prop, type='b', lty=2, lwd=1.5, pch=19, cex=1.5,
       col='black')
# dev.off()

# Inserting a random perturbation in the graph to see superposed points
# pdf(file="Figure-Ch3-62.pdf")
n <- length(dose)
plot(dose+0.03*(runif(n)-0.5), abscised/nplants, cex=1.5 , xlab="Dose (+noise)",
     ylab="Proportion abscised",
     pch=19, col='blue')
prop <- tapply(abscised, factor(dose),sum)/ tapply(nplants, factor(dose),sum)
prop
points(levels(factor(dose)), prop, type='b', lty=2, lwd=1.5, pch=19,
       col='black')
# dev.off()

# Ploting the logit functions and the logit transformed proportions
Logit <- function(p){
  return(log(p/(1-p)))
}

ILogit <- function(lodds){
  return(exp(lodds)/(1+exp(lodds)))
}

# Ploting the logistic function  and the inverse logistic function

# pdf(file="Figure-Ch3-63.pdf")
grid <- seq(from=0.005, to=0.995, by=0.0001)
plot(grid, Logit(grid), type='l', lwd=3,
     xlab='Probability (p)',
     ylab='Lodds (logit(p))')
# dev.off()

# pdf(file="Figure-Ch3-64.pdf")
grid <- seq(from=-4.59, to=4.459, by=0.0001)
plot(grid, ILogit(grid), type='l', lwd=3,
     xlab='lodds ',
     ylab='Probability')
abline(v=0)
abline(h=1/2)
# dev.off()

# pdf(file="Figure-Ch3-65.pdf")                  '
plot(dose, Logit(abscised/nplants), cex=1.5 , xlab="Dose",
ylab="Logit transformed proportion abscised",
pch=19, col='blue')
points(levels(factor(dose)), Logit(prop), type='b', lty=2, lwd=1.5, pch=19,
col='black')
cc <- coef(lm(Logit(prop) ~  as.numeric(levels(factor(dose)))))
abline(a=cc[1], b=cc[2])
# dev.off()

# Inserting a random perturbation in the graph to see superposed points
# pdf(file="Figure-Ch3-66.pdf")
n<-length(dose)
plot(dose+0.03*(runif(n)-0.5), Logit(abscised/nplants), cex=1.5 ,
xlab="Dose (+noise)",
ylab="Logit transformed proportion abscised",
pch=19, col='blue')
points(levels(factor(dose)), Logit(prop), type='b', lty=2, lwd=1.5, pch=19,
col='black')
cc <- coef(lm(Logit(prop) ~  as.numeric(levels(factor(dose)))))
abline(a=cc[1], b=cc[2])
# dev.off()

# Fitting a logit model

resp<-cbind(abscised, nplants-abscised)
fit1 <- glm( resp ~ dose, family=binomial(link="logit"))

fitted.coef <- coef(fit1); fitted.coef

summary(fit1)

# Drawing the logistic adjusted logistic curve

grid.points  <- seq(from=min(dose), to=max(dose), by=0.001)
eta <- fitted.coef[1] + ( fitted.coef[2] * grid.points)

pred.prob <- ILogit(eta); pred.prob

# pdf(file="Figure-Ch3-67.pdf")
n<-length(dose)
plot(dose+0.03*(runif(n)-0.5), abscised/nplants, cex=1.5 ,
xlab="Dose (+noise)",
ylab="Proportion abscised",
pch=19, col='blue')
points(grid.points, pred.prob, type='l', lwd=2, col='red')
# dev.off()

# Fitting a free-curve model. The variable dose becomes a factor.
resp<-cbind(abscised, nplants-abscised)
fit1 <- glm( resp ~ dose, family=binomial(link="logit"))
fit2 <- glm( resp ~ 0+factor(dose), family=binomial(link="logit"))

coef(fit1)
deviance(fit1)

summary(fit2)

fit2n <- glm( resp ~ factor(dose), family=binomial(link="logit"))
summary(fit2n)


coef(fit2)
deviance(fit2)

# Testing homogeneity

deviance(fit2)
length(dose)          # 70 observations
length(coef(fit2))    #  7 parameters in the free-curve model

pchisq(deviance(fit2), df=70-7, lower.tail=F)

deviance(fit2)/63

# Uffa! passed the homogeneity test.

# Testing the assumed (logistic) regression curve

anova(fit1, fit2, test="Chisq")

# Again passed the test!

# Ploting the observed proportions and the predicted for the two models
# to get an intuition of what is happening.

grid.points  <- seq(from=min(dose), to=max(dose), by=0.0001)
eta <- fitted.coef[1] + ( fitted.coef[2] * grid.points)
pred.prob <- ILogit(eta)

n<-length(dose)
# pdf(file="Figure-Ch3-68.pdf")
plot(dose+0.03*(runif(n)-0.5), abscised/nplants, cex=1.5 ,
xlab="Dose",
ylab="Proportion abscised",
pch=19, col='blue')
points(grid.points, pred.prob, type='l', lwd=3, col='red')

free.lodds <- ILogit(coef(fit2))
points(levels(factor(dose)), free.lodds , type='p', cex=2, col='yellow', pch=19)
# dev.off()

# Testing the homogeneity assumption and the appropriatness of the regression
# curve SIMULTANEOUSLY!

deviance(fit1)
length(dose)          # 70 observations
length(coef(fit1))    #  2 parameters in the free-curve model

pchisq(deviance(fit1), df=70-2, lower.tail=F)

deviance(fit2)/68

# Again passed the two tests simultaneously!

fit3 <- glm( resp ~ 1, family=binomial(link="logit"))
deviance(fit1)
deviance(fit2)
deviance(fit3)

anova(fit3, fit1, test="Chisq")

detach(Ch3.Radamachera.simple)

################################################################################
# Example Radamachera                                                          #
# Multiple Logistic regression                                                 #
################################################################################

# Reading the data

str(Ch3.Radamachera.multiple)

attach(Ch3.Radamachera.multiple)

# Ploting the proportions

# pdf("Figure-Ch3-69.pdf")
dose1 <- dose[variety==1]
abscised1 <- abscised[variety==1]
nplants1 <- nplants[variety==1]

dose2 <- dose[variety==2]
abscised2 <- abscised[variety==2]
nplants2 <- nplants[variety==2]

n <- length(dose1)
plot(dose1+0.03*(runif(n)-0.5), abscised1/nplants1, cex=1.5 , xlab="Dose",
ylab="Proportion abscised",
pch=19, col='blue', ylim=range(abscised/nplants))
prop <-tapply(abscised1, factor(dose1),sum)/ tapply(nplants1, factor(dose1),sum)
prop
points(levels(factor(dose1)), prop, type='b', lty=2, lwd=2.5, pch=19,
col='blue')

n <- length(dose2)
points(dose2+0.03*(runif(n)-0.5), abscised2/nplants2, cex=1.5 , xlab="Dose",
ylab="Proportion abscised",
pch=19, col='red')
prop <- tapply(abscised2, factor(dose2),sum)/ tapply(nplants2, factor(dose2),sum)
prop
points(levels(factor(dose2)), prop, type='b', lty=2, lwd=2.5, pch=19,
col='red')
# dev.off()

# Plotting the logit transformed proportions
# pdf("Figure-Ch3-70.pdf")
Logit <- function(p){
return(log(p/(1-p)))
}

ILogit <- function(lodds){
return(exp(lodds)/(1+exp(lodds)))
}

plot(dose1+0.03*(runif(n)-0.5), Logit(abscised1/nplants1), cex=1.5 ,
xlab="Dose",
ylab="Logistic transformed proportion abscised",
pch=19, col='blue', ylim=range(Logit(abscised/nplants)))
prop <-tapply(abscised1, factor(dose1),sum)/ tapply(nplants1, factor(dose1),sum)
prop
points(levels(factor(dose1)), Logit(prop), type='b', lty=2, lwd=2.5, pch=19,
col='blue')

n <- length(dose2)
points(dose2+0.03*(runif(n)-0.5), Logit(abscised2/nplants2), cex=1.5 , xlab="Dose", ylab="Proportion abscised",
pch=19, col='red')
prop <- tapply(abscised2, factor(dose2),sum)/ tapply(nplants2, factor(dose2),sum)
prop
points(levels(factor(dose2)), Logit(prop), type='b', lty=2, lwd=2.5, pch=19,
col='red')
# dev.off()

# Fitting (simultaneously) a separate logistic regression for each variety

response <- cbind(abscised  , nplants-abscised)
diff.slopes <- glm(response ~ factor(variety) * dose,
family=binomial(link='logit')
)

summary(diff.slopes)

# Different parametrization

diff.slopes <- glm(response ~ 0 + factor(variety) * dose ,
family=binomial(link='logit')
)

summary(diff.slopes)

# Yet another parametrization

diff.slopes <- glm(response ~ 0 + factor(variety) * dose - dose,
family=binomial(link='logit')
)

summary(diff.slopes)

# Fitting a parallel model. That is different intercepts and common slope
# in the lodds scale

parallel <- glm(response ~ factor(variety) + dose,
family=binomial(link='logit')
)

summary(parallel)

# Using another parametrisation

parallel <- glm(response ~ 0 + factor(variety) + dose,
family=binomial(link='logit')
)

summary(parallel)

# Fitting a free curve model, attributing one lodds (or equiv. probability)
# for each combination of variety and dose

free.curve <- glm(response ~ 0 + factor(variety) * factor(dose),
family=binomial(link='logit')
)

summary(free.curve)

# Testing the homogeneity assumption

deviance(free.curve)

length(dose)              # 140 observations
length(coef(free.curve))  # 14 parameters

pchisq(deviance(free.curve), df=140-4, lower.tail=F)

# Uffa! assed this test!

# Testing the apropriattness of the logistic curve used

anova(diff.slopes, free.curve, test="Chisq")

# Passes this test also!

# Joint test of homogeneity and appropriatness of the logistic curve
# I fit the saturated model, just for fun.

Obs

saturated <-  glm(response ~ 0 + factor(Obs),
family=binomial(link='logit')
)

deviance(saturated)

anova(diff.slopes, saturated, test="Chisq")

# Testing additivity, i.e. parallelism of the two curves in the lodds scale

anova(parallel, diff.slopes, test="Chisq")


# Ploting the observed proportions and the predicted for the two models
# to get an intuition of what is happening.

# pdf("Figure-Ch3-71.pdf")
dose1 <- dose[variety==1]
abscised1 <- abscised[variety==1]
nplants1 <- nplants[variety==1]
dose2 <- dose[variety==2]
abscised2 <- abscised[variety==2]
nplants2 <- nplants[variety==2]

n <- length(dose1)
plot(dose1+0.03*(runif(n)-0.5), abscised1/nplants1, cex=1.5,xlab="Dose(+noise)",
ylab="Proportion abscised",
pch=19, col='blue', ylim=range(abscised/nplants))
n <- length(dose2)
points(dose2+0.03*(runif(n)-0.5), abscised2/nplants2, cex=1.5 , xlab="Dose",
ylab="Proportion abscised",
pch=19, col='red')


fitted.coef <- coef(parallel) ; fitted.coef

grid.points  <- seq(from=min(dose), to=max(dose), by=0.0001);
eta1 <- fitted.coef[1] + ( fitted.coef[3] * grid.points)
eta2 <- fitted.coef[2] + ( fitted.coef[3] * grid.points)
pred.prob1 <- ILogit(eta1); pred.prob1[1:10]
pred.prob2 <- ILogit(eta2); pred.prob2 [1:10]
points(grid.points, pred.prob1, type='l', lwd=3, col='blue')
points(grid.points, pred.prob2, type='l', lwd=3, col='red')

free.curve <- glm(response ~ 0 +  factor(dose) : factor(variety),
family=binomial(link='logit')
)
coef(free.curve)
free.lodds <- ILogit(coef(free.curve)) ; free.lodds
points(levels(factor(dose)), free.lodds[1:7] , type='p', cex=2.6, col='blue')
points(levels(factor(dose)), free.lodds[1:7] , type='p', cex=3  , col='blue')
points(levels(factor(dose)), free.lodds[1:7] , type='p', cex=2.0, col='yellow',
pch=19)

points(levels(factor(dose)), free.lodds[8:14] , type='p', cex=2.6, col='red')
points(levels(factor(dose)), free.lodds[8:14] , type='p', cex=3  , col='red')
points(levels(factor(dose)), free.lodds[8:14] , type='p', cex=2.0, col='yellow',
pch=19)
# dev.off()

################################################################################
#
# Analysis of the Bulls' fertility example
#
################################################################################

FiguresOut <- "~/Dropbox/Courses/Basic Statistical Analysis in Life and Environmental Sciences/LectureNotes/"

str(BullSeminalViabilityH)
attach(BullSeminalViabilityH)

table(Dose)

summary(N)

( Proportions <- tapply(Alive, Dose, sum)/ tapply(N, Dose, sum) )

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-01.pdf", sep=""))
plot(1:6, Proportions[1:6], type="b", pch=19, col="blue", ylim=c(0,1), lwd=3,
     xlab="Dose", ylab="Proportion of motile cells"
)
# dev.off()

Logit <- function(p) return(log(p/(1-p)))

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-02.pdf", sep=""))
plot(1:6, Logit(Proportions[1:6]), type="b", pch=19, col="blue", ylim=range(Logit(Proportions)),
     xlab="Dose", ylab="Log-odds of motile cells"
)
# dev.off()

resp <- cbind(Alive, N-Alive)
resp[1:5, ]

fit1 <- glm(resp ~ factor(Dose)+0, family=binomial(link="logit"))

summary(fit1)

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-03.pdf", sep=""))
plot(fitted(fit1) , residuals(fit1,type="response"),		 
     ylab="Raw-residuals", xlab="Fitted values")
abline(h=0); abline(v=0.5)
# dev.off()

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-04.pdf", sep=""))
plot(fitted(fit1) , residuals(fit1,type="pearson"),		 
     ylab="Pearson-residuals", xlab="Fitted values")
abline(h=0); abline(v=0.5)
# dev.off()

deviance(fit1)
n.observations <- length(N) 
n.observations

n.parameters.fit1 <- length(coef(fit1))
n.parameters.fit1

n.df.fit1 <- n.observations - n.parameters.fit1
n.df.fit1

# Testing the homogeneity of the means
pchisq(deviance(fit1), df=n.df.fit1, lower.tail=F)

fit2 <- glm(resp ~ Dose, family=binomial)
# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-05.pdf", sep=""))
plot(factor(Dose) , residuals(fit2,type="response"),		 
     ylab="Raw-residuals", xlab="Dose", col="lightblue")
abline(h=0)
lines(lowess(x=Dose, y=residuals(fit2,type="response"),f = 1/2), lwd=6, col="red")
points(Dose, residuals(fit2,type="response"))
# dev.off()

summary(fit2)

# Performing a test
anova(fit2, fit1, test="Chisq")

# Fitting a third model

fit3 <- glm(resp ~ 1, family=binomial)

anova(fit3, fit2, test="Chisq")

coef(fit2)

confint(fit2)

detach(BullSeminalViabilityH)
rm(fit1, fit2, fit3, n.observations, n.parameters.fit1, n.df.fit1)

################################################################################
#
# Two simultaneous regressions
#
################################################################################
str(BullSeminalViabilityALL)
attach(BullSeminalViabilityALL)

# Exploratory analysis

mean(N)

table(Breed, Dose)

Proportions <- tapply(Alive, list(Dose, Breed), sum)/
  tapply(N, list(Dose, Breed), sum)

Proportions

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-06.pdf", sep=""))
plot(1:6, Proportions[1:6], type="b", pch=19, col="blue", ylim=c(0,1),
     xlab="Dose", ylab="Proportion of motile cells", lwd=3
)
points(1:6, Proportions[7:12], type="b", pch=19, col="red", lwd=3)
legend(x="topright", legend=c("H","J"), col=c("blue","red"), lwd=3, bty="n")
# dev.off()

Logit <- function(p) return(log(p/(1-p)))

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-07.pdf", sep=""))
plot(1:6, Logit(Proportions[1:6]), type="b", pch=19, col="blue", ylim=range(Logit(Proportions)),
     xlab="Dose", ylab="Log-odds of motile cells"
)
points(1:6, Logit(Proportions[7:12]), type="b", pch=19, col="red")
legend(x="topright", legend=c("H","J"), col=c("blue","red"), lwd=3, bty="n")
# dev.off()

# Fitting the first model
resp <- cbind(Alive, N-Alive)
resp[1:5, ]

fit1 <- glm(resp ~ Breed*factor(Dose), 
            family=binomial(link="logit"))

summary(fit1)

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-08.pdf", sep=""))
plot(fitted(fit1) , residuals(fit1,type="response"),		 
     ylab="Raw-residuals", xlab="Fitted values")
abline(h=0); abline(v=0.5)
# dev.off()


# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-09.pdf", sep=""))
plot(fitted(fit1) , residuals(fit1,type="pearson"),		 
     ylab="Pearson-residuals", xlab="Fitted values")
abline(h=0); abline(v=0.5)
# dev.off()

deviance(fit1)
n.observations <- length(N) 
n.observations

n.parameters.fit1 <- length(coef(fit1))
n.parameters.fit1

n.df.fit1 <- n.observations - n.parameters.fit1
n.df.fit1

# Performing a test
pchisq(deviance(fit1), df=n.df.fit1, lower.tail=F)

# Fitting the second model with two logistic curves 
# using the direct parametrisation

fit2 <- glm(resp ~ 0 + Breed - Dose + Breed:Dose, family=binomial)

summary(fit2)
# Fitting the second model with two logistic curves 
# using the standard (corner point) parametrisation

fit2 <- glm(resp ~ Breed + Dose + Breed:Dose, family=binomial)

summary(fit2)

# Performing a test
anova(fit2, fit1, test="Chisq")

# Fitting an additive model (direct parametrisation)

fit3 <- glm(resp ~ 0 + Breed + Dose, family=binomial)

summary(fit3)

# Fitting an additive model (corner point parametrisation)
fit3 <- glm(resp ~ Breed + Dose, family=binomial)

summary(fit3)

# Performing a test
anova(fit3, fit2, test="Chisq")

# Fitting a fourth model

fit4 <- glm(resp ~ Breed, family=binomial)

summary(fit4)

# Performing a test
anova(fit4, fit3, test="Chisq")

# Fitting a fifth model

fit5 <- glm(resp ~ Dose, family=binomial)

summary(fit5)

# Performing a test
anova(fit5, fit3, test="Chisq")

# Fitting a sixth model

fit6 <- glm(resp ~ 1, family=binomial)

summary(fit6)

# Performing a test
anova(fit6, fit3, test="Chisq")

# Performing another test
anova(fit6, fit1, test="Chisq")

# Summing up 

deviance(fit3)
n.observations <- length(N) 
n.observations

n.parameters.fit3 <- length(coef(fit3))
n.parameters.fit3

n.df.fit3 <- n.observations - n.parameters.fit3
n.df.fit3

# This gives the p-value of a joint test!
pchisq(deviance(fit3), df=n.df.fit3, lower.tail=F)

summary(fit3)

# pdf(paste(FiguresOut,"Figure-Ch3-Continuous-10.pdf", sep=""))
plot(1:6, Proportions[1:6], pch=19, col="blue", ylim=c(0,1), cex=1.5,
     xlab="Dose", ylab=""	 
)
points(1:6, Proportions[7:12], pch=19, col="red", cex=1.5)

coef(fit3)

grid <- seq(from=1, to=6, by=0.1)
responseH <- coef(fit3)[1] + coef(fit3)[3]*grid
responseJ <- (coef(fit3)[1] + coef(fit3)[2]) + coef(fit3)[3]*grid

iLogit <- function(L) return(exp(L)/(1+exp(L)))

points(grid, iLogit(responseH), type="l", col="blue", lwd=3)
points(grid, iLogit(responseJ), type="l", col="red", lwd=3)
legend(x="topright", legend=c("H","J"), col=c("blue","red"), lwd=3, bty="n")
# dev.off()

detach(BullSeminalViabilityALL)
rm(grid, responseH, responseJ, n.observations, n.parameters.fit1, Proportions, 
   n.parameters.fit3, n.df.fit1, n.df.fit3, fit1, fit2, fit3, fit4, fit5)

################################################################################
#
# THE END!
# 
################################################################################


# That is all. Bye, bye ....