################################################################################
#    
#  TO BE REFORMED
#
# Tutorial-12-SimpleBinomialRegression                                         
#                                                                              
# Analysis of the Mice convultion data                                         
#                                                                              
# Binomial models with continuous explanatory variables                        
#                                                                              
# R. Labouriau                                                                 
# Last revision: Spring 2019 (to be reformed)
#
# Copyright © 2018 by Rodrigo Labouriau
################################################################################

###############################################################################
# Preparation                                                                 #
###############################################################################

# Loading the datasets required
load("~/BSA/DataFramesStatisticalModelling.RData")

# Setting the place where the output (graphs) wil be kept
setwd("~/BSA")

str(Ch3.dataMice1)
attach(Ch3.dataMice1)

# We read the variables Dose, conv and total (all three numerical)

# ---------------------------------------------------------------------------- #
# Plotting the data                                                            #
# ---------------------------------------------------------------------------- #

proportion <- conv/total

proportion

plot(Dose, proportion,
		 type="b",
		 xlab="Insuline Dose",
		 ylab="Proprtion of convultion",
		 pch=19, cex=1.2
)


# It is convenient to use the logarithm of the dose instead of the dose.

L <- log(Dose)

plot(L, proportion,
		 type="b",
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Proprtion of convultion",
		 pch=19, cex=1.2
)


# We will fit a range of binomial models to this simple data. Therefore we have
# to build a response matrix, with two columns, the first with the number of
# successes and the second with the number of failures. This is done below.

response <- cbind(conv, total-conv)

# ---------------------------------------------------------------------------- #
# Fitting a linear model using the log(Dose)=L as explanatory variable         #
# ---------------------------------------------------------------------------- #

Linear <- glm(response ~ L , family=binomial(link="identity"))

summary(Linear)

coef(Linear)

# Here I will plot the observed proportions superposed with the
# predicted by the model (solid line)
plot(L, proportion,
		 type="b",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Proprtion of convultion",
		 pch=19, cex=1.2
)
# This draws a straight line with intercept an slope given by the model.
abline(a=coef(Linear)[1], b= coef(Linear)[2], lwd=2)
# This draws the values predicted by the model for each Dose (or log-Dose)
points(L, Linear$fitted.values, cex=2)


# ---------------------------------------------------------------------------- #
# Fitting a logistic model using the log(Dose)=L as explanatory variable       #
# ---------------------------------------------------------------------------- #

Logit <- glm(response ~ L , family=binomial(link="logit"))

summary(Logit)


# Here I will plot the observed proportions superposed with the
# predicted by the model (solid line)
plot(L, proportion,
		 type="b",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Proportion of convultion",
		 pch=19, cex=1.2, ylim=c(0.30, 0.95)
)

# This draws the values predicted by the model for each Dose (or log-Dose)
points(L, Logit$fitted.values, cex=2, type="b")

# Another plot
plot(L, logit(proportion),
		 type="p",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Logit transformed proportion",
		 pch=19, cex=1.2
)
# Drawing a least-squares line
aux <- coef(lm(logit(proportion) ~L))
abline(a=aux[1], b=aux[2], lwd=2)


# ---------------------------------------------------------------------------- #
# Fitting a probit model using the log(Dose)=L as explanatory variable         #
# ---------------------------------------------------------------------------- #

Probit <- glm(response ~ L , family=binomial(link="probit"))

summary(Probit)


# Here I will plot the observed proportions superposed with the
# predicted by the model (solid line)
plot(L, proportion,
		 type="b",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Proportion of convultion",
		 pch=19, cex=1.2, ylim=c(0.30, 0.95)
)

# This draws the values predicted by the model for each Dose (or log-Dose)
points(L, Probit$fitted.values, cex=2, type="b")

# ---------------------------------------------------------------------------- #
# Fitting a compl. log-log model using the log(Dose)=L as explanatory variable #
# ---------------------------------------------------------------------------- #

ComlLogLog <- glm(response ~ L , family=binomial(link="cloglog"))

summary(ComlLogLog)


# Here I will plot the observed proportions superposed with the
# predicted by the model (solid line)
plot(L, proportion,
		 type="b",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Proportion of convultion",
		 pch=19, cex=1.2, ylim=c(0.30, 0.95)
)

# This draws the values predicted by the model for each Dose (or log-Dose)
points(L, ComlLogLog$fitted.values, cex=2, type="b")

# I define next a function that calculates the inverse of the comlpementary
# log-log link (i.e. the associated response function)

compLL <- function(p){
	return(1 - exp(-exp(p)))
}

# Drawing another plot


plot(L, compLL(proportion),
		 type="p",
		 lty=2,
		 xlab="Logarithm of the Insuline Dose",
		 ylab="Compl. log-log transformed proportion",
		 pch=19, cex=1.2
)
# Drawing a least-squares line
aux <- coef(lm(compLL(proportion) ~L))
abline(a=aux[1], b=aux[2], lwd=2)


# ---------------------------------------------------------------------------- #
# Testing the adjustment of the models                                         #
# (see the explanation of the tests in the text of chapter 3                   #
# ---------------------------------------------------------------------------- #


pchisq(deviance(Linear), df=4, lower.tail=F )

pchisq(deviance(Logit), df=4, lower.tail=F )

pchisq(deviance(Probit), df=4, lower.tail=F )

pchisq(deviance(ComlLogLog), df=4, lower.tail=F )

# None of the models are rejected, but we are using an asymptotic test
# with only 6 observations! The validity of this test is questionable!

# ---------------------------------------------------------------------------- #
# Residual analyses                                                            #
# ---------------------------------------------------------------------------- #

# We calculate here the Pearson residuals, which are the differences between
# the obesrved and the predicted by the model divided by the sqrt of the
# expected variance of this difference (see the text!!)

Pearson.res <- function(model){
	p <- model$fitted.values
	y <- model$y
	return((y - p)/sqrt(total*p*(1-p)) )
}

# Linear model
plot(Linear$fitted.values,Pearson.res(Linear),
		 cex=1.5, pch=19, col='blue',
		 xlab='Fitted values',
		 ylab='Pearson residuals'
)
abline(h=0)

# Alternatively you may calculate the Pearson residuals directly by using
# the function "residuals" with the parameter methods set to "Pearson"

Pearson.residuals <- residuals(Linear, method="Pearson")

plot(Linear$fitted.values, Pearson.residuals,
     cex=1.5, pch=19, col='blue',
     xlab='Fitted values',
     ylab='Pearson residuals'
)
abline(h=0)


# Logit model
plot(Logit$fitted.values,Pearson.res(Logit),
		 cex=1.5, pch=19, col='blue',
		 xlab='Fitted values',
		 ylab='Pearson residuals'
)
abline(h=0)

# Complement. log-log model
plot(ComlLogLog$fitted.values,Pearson.res(ComlLogLog),
		 cex=1.5, pch=19, col='blue',
		 xlab='Fitted values',
		 ylab='Pearson residuals'
)
abline(h=0)

detach(dataMice1)

################################################################################
#                                                                              #
# Analysis of the COMPLETE Mice convultion data                                #
#                                                                              #
# Binomial models with continuous explanatory variables                        #
#                                                                              #
################################################################################

###############################################################################
# Preparation                                                                 #
###############################################################################

str(Ch3.dataMiceALL)
attach(Ch3.dataMiceALL)

# We read the variables Insuline Dose, conv and total (the first is a factor,
# the insuline type and the other three are numerical)

# It is convenient to use the logarithm of the dose instead of the dose.

L <- log(Dose)

# ---------------------------------------------------------------------------- #
# Plotting the data                                                            #
# ---------------------------------------------------------------------------- #

par(mfrow=c(1,2), pty='s')

proportion <- conv/total

plot(L[Insuline=='A'], proportion[Insuline=='A'],
		 type="b",
		 xlab="Logarithm of the insuline dose (L)",
		 ylab="Proportion of convultion",
		 pch=19, cex=1.2 , col='blue', lwd=1.5,
		 ylim=range(proportion)
)
points(L[Insuline=='B'], proportion[Insuline=='B'],
			 type="b",
			 pch=19, cex=1.2 , col='red',lwd=1.5
)

legend(2.2,0.9, legend=c("Insuline A", "Insuline B"),
			 col=c("blue","red"),bty="n", lwd=1.5)

# Plot with the compl. log-log transformed proportions

# I define next a function that calculates the inverse of the comlpementary
# log-log link (i.e. the associated response function)

compLL <- function(p){
	return(1 - exp(-exp(p)))
}

plot(L[Insuline=='A'], compLL(proportion[Insuline=='A']),
		 type="b",
		 xlab="Logarithm of the insuline dose (L)",
		 ylab="Compl. log-log transformed proportion",
		 pch=19, cex=1.2 , col='blue', lwd=1.5,
		 ylim=range(compLL(proportion))
)
points(L[Insuline=='B'], compLL(proportion[Insuline=='B']),
			 type="b",
			 pch=19, cex=1.2 , col='red',lwd=1.5
)

legend(2.2,0.9, legend=c("Insuline A", "Insuline B"),
			 col=c("blue","red"),bty="n", lwd=1.5)


# ---------------------------------------------------------------------------- #
# Fitting a full analysis of covariance model                                  #
# i.e. one straight line for each insuline type using the compl.log-log scale. #
# ---------------------------------------------------------------------------- #

# Making the response matrix
Y <- cbind(conv, total-conv)

# Fitting the model
FullModel <- glm(Y ~ L + Insuline + Insuline*L, family=binomial(link="cloglog"))

# Making a simple residual analysis
Pearson.res <- function(model){
	p <- model$fitted.values
	y <- model$y
	return((y - p)/sqrt(total*p*(1-p)) )
}

par(mfrow=c(1,1), pty='s')
plot(FullModel$fitted.values , Pearson.res(FullModel),
		 xlab='Fitted values', ylab='Pearson residuals' )
abline(h=0)

# Making a sequence of tests

anova(FullModel, test="Chisq")

# Conclusion: Parallel lines (additivity), with significant effect of the
#             insuline type and dose.

# Fitting an additive model
AddModel <- glm(Y ~ L + Insuline , family=binomial(link="cloglog"))

summary(AddModel)

# Parametrizing in a different way

AddModel <- glm(Y ~ Insuline + L + 0 , family=binomial(link="cloglog"))

summary(AddModel)

detach(Ch3.dataMiceALL)