################################################################################ # # TO BE REFORMED # # Tutorial-12-SimpleBinomialRegression # # Analysis of the Mice convultion data # # Binomial models with continuous explanatory variables # # R. Labouriau # Last revision: Spring 2019 (to be reformed) # # Copyright © 2018 by Rodrigo Labouriau ################################################################################ ############################################################################### # Preparation # ############################################################################### # Loading the datasets required load("~/BSA/DataFramesStatisticalModelling.RData") # Setting the place where the output (graphs) wil be kept setwd("~/BSA") str(Ch3.dataMice1) attach(Ch3.dataMice1) # We read the variables Dose, conv and total (all three numerical) # ---------------------------------------------------------------------------- # # Plotting the data # # ---------------------------------------------------------------------------- # proportion <- conv/total proportion plot(Dose, proportion, type="b", xlab="Insuline Dose", ylab="Proprtion of convultion", pch=19, cex=1.2 ) # It is convenient to use the logarithm of the dose instead of the dose. L <- log(Dose) plot(L, proportion, type="b", xlab="Logarithm of the Insuline Dose", ylab="Proprtion of convultion", pch=19, cex=1.2 ) # We will fit a range of binomial models to this simple data. Therefore we have # to build a response matrix, with two columns, the first with the number of # successes and the second with the number of failures. This is done below. response <- cbind(conv, total-conv) # ---------------------------------------------------------------------------- # # Fitting a linear model using the log(Dose)=L as explanatory variable # # ---------------------------------------------------------------------------- # Linear <- glm(response ~ L , family=binomial(link="identity")) summary(Linear) coef(Linear) # Here I will plot the observed proportions superposed with the # predicted by the model (solid line) plot(L, proportion, type="b", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Proprtion of convultion", pch=19, cex=1.2 ) # This draws a straight line with intercept an slope given by the model. abline(a=coef(Linear)[1], b= coef(Linear)[2], lwd=2) # This draws the values predicted by the model for each Dose (or log-Dose) points(L, Linear$fitted.values, cex=2) # ---------------------------------------------------------------------------- # # Fitting a logistic model using the log(Dose)=L as explanatory variable # # ---------------------------------------------------------------------------- # Logit <- glm(response ~ L , family=binomial(link="logit")) summary(Logit) # Here I will plot the observed proportions superposed with the # predicted by the model (solid line) plot(L, proportion, type="b", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Proportion of convultion", pch=19, cex=1.2, ylim=c(0.30, 0.95) ) # This draws the values predicted by the model for each Dose (or log-Dose) points(L, Logit$fitted.values, cex=2, type="b") # Another plot plot(L, logit(proportion), type="p", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Logit transformed proportion", pch=19, cex=1.2 ) # Drawing a least-squares line aux <- coef(lm(logit(proportion) ~L)) abline(a=aux[1], b=aux[2], lwd=2) # ---------------------------------------------------------------------------- # # Fitting a probit model using the log(Dose)=L as explanatory variable # # ---------------------------------------------------------------------------- # Probit <- glm(response ~ L , family=binomial(link="probit")) summary(Probit) # Here I will plot the observed proportions superposed with the # predicted by the model (solid line) plot(L, proportion, type="b", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Proportion of convultion", pch=19, cex=1.2, ylim=c(0.30, 0.95) ) # This draws the values predicted by the model for each Dose (or log-Dose) points(L, Probit$fitted.values, cex=2, type="b") # ---------------------------------------------------------------------------- # # Fitting a compl. log-log model using the log(Dose)=L as explanatory variable # # ---------------------------------------------------------------------------- # ComlLogLog <- glm(response ~ L , family=binomial(link="cloglog")) summary(ComlLogLog) # Here I will plot the observed proportions superposed with the # predicted by the model (solid line) plot(L, proportion, type="b", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Proportion of convultion", pch=19, cex=1.2, ylim=c(0.30, 0.95) ) # This draws the values predicted by the model for each Dose (or log-Dose) points(L, ComlLogLog$fitted.values, cex=2, type="b") # I define next a function that calculates the inverse of the comlpementary # log-log link (i.e. the associated response function) compLL <- function(p){ return(1 - exp(-exp(p))) } # Drawing another plot plot(L, compLL(proportion), type="p", lty=2, xlab="Logarithm of the Insuline Dose", ylab="Compl. log-log transformed proportion", pch=19, cex=1.2 ) # Drawing a least-squares line aux <- coef(lm(compLL(proportion) ~L)) abline(a=aux[1], b=aux[2], lwd=2) # ---------------------------------------------------------------------------- # # Testing the adjustment of the models # # (see the explanation of the tests in the text of chapter 3 # # ---------------------------------------------------------------------------- # pchisq(deviance(Linear), df=4, lower.tail=F ) pchisq(deviance(Logit), df=4, lower.tail=F ) pchisq(deviance(Probit), df=4, lower.tail=F ) pchisq(deviance(ComlLogLog), df=4, lower.tail=F ) # None of the models are rejected, but we are using an asymptotic test # with only 6 observations! The validity of this test is questionable! # ---------------------------------------------------------------------------- # # Residual analyses # # ---------------------------------------------------------------------------- # # We calculate here the Pearson residuals, which are the differences between # the obesrved and the predicted by the model divided by the sqrt of the # expected variance of this difference (see the text!!) Pearson.res <- function(model){ p <- model$fitted.values y <- model$y return((y - p)/sqrt(total*p*(1-p)) ) } # Linear model plot(Linear$fitted.values,Pearson.res(Linear), cex=1.5, pch=19, col='blue', xlab='Fitted values', ylab='Pearson residuals' ) abline(h=0) # Alternatively you may calculate the Pearson residuals directly by using # the function "residuals" with the parameter methods set to "Pearson" Pearson.residuals <- residuals(Linear, method="Pearson") plot(Linear$fitted.values, Pearson.residuals, cex=1.5, pch=19, col='blue', xlab='Fitted values', ylab='Pearson residuals' ) abline(h=0) # Logit model plot(Logit$fitted.values,Pearson.res(Logit), cex=1.5, pch=19, col='blue', xlab='Fitted values', ylab='Pearson residuals' ) abline(h=0) # Complement. log-log model plot(ComlLogLog$fitted.values,Pearson.res(ComlLogLog), cex=1.5, pch=19, col='blue', xlab='Fitted values', ylab='Pearson residuals' ) abline(h=0) detach(dataMice1) ################################################################################ # # # Analysis of the COMPLETE Mice convultion data # # # # Binomial models with continuous explanatory variables # # # ################################################################################ ############################################################################### # Preparation # ############################################################################### str(Ch3.dataMiceALL) attach(Ch3.dataMiceALL) # We read the variables Insuline Dose, conv and total (the first is a factor, # the insuline type and the other three are numerical) # It is convenient to use the logarithm of the dose instead of the dose. L <- log(Dose) # ---------------------------------------------------------------------------- # # Plotting the data # # ---------------------------------------------------------------------------- # par(mfrow=c(1,2), pty='s') proportion <- conv/total plot(L[Insuline=='A'], proportion[Insuline=='A'], type="b", xlab="Logarithm of the insuline dose (L)", ylab="Proportion of convultion", pch=19, cex=1.2 , col='blue', lwd=1.5, ylim=range(proportion) ) points(L[Insuline=='B'], proportion[Insuline=='B'], type="b", pch=19, cex=1.2 , col='red',lwd=1.5 ) legend(2.2,0.9, legend=c("Insuline A", "Insuline B"), col=c("blue","red"),bty="n", lwd=1.5) # Plot with the compl. log-log transformed proportions # I define next a function that calculates the inverse of the comlpementary # log-log link (i.e. the associated response function) compLL <- function(p){ return(1 - exp(-exp(p))) } plot(L[Insuline=='A'], compLL(proportion[Insuline=='A']), type="b", xlab="Logarithm of the insuline dose (L)", ylab="Compl. log-log transformed proportion", pch=19, cex=1.2 , col='blue', lwd=1.5, ylim=range(compLL(proportion)) ) points(L[Insuline=='B'], compLL(proportion[Insuline=='B']), type="b", pch=19, cex=1.2 , col='red',lwd=1.5 ) legend(2.2,0.9, legend=c("Insuline A", "Insuline B"), col=c("blue","red"),bty="n", lwd=1.5) # ---------------------------------------------------------------------------- # # Fitting a full analysis of covariance model # # i.e. one straight line for each insuline type using the compl.log-log scale. # # ---------------------------------------------------------------------------- # # Making the response matrix Y <- cbind(conv, total-conv) # Fitting the model FullModel <- glm(Y ~ L + Insuline + Insuline*L, family=binomial(link="cloglog")) # Making a simple residual analysis Pearson.res <- function(model){ p <- model$fitted.values y <- model$y return((y - p)/sqrt(total*p*(1-p)) ) } par(mfrow=c(1,1), pty='s') plot(FullModel$fitted.values , Pearson.res(FullModel), xlab='Fitted values', ylab='Pearson residuals' ) abline(h=0) # Making a sequence of tests anova(FullModel, test="Chisq") # Conclusion: Parallel lines (additivity), with significant effect of the # insuline type and dose. # Fitting an additive model AddModel <- glm(Y ~ L + Insuline , family=binomial(link="cloglog")) summary(AddModel) # Parametrizing in a different way AddModel <- glm(Y ~ Insuline + L + 0 , family=binomial(link="cloglog")) summary(AddModel) detach(Ch3.dataMiceALL)