################################################################################ # # # Solution to the exercises 3.4 # # # # Last revised: Spring 2019 # # Copyright © 2018 by Rodrigo Labouriau # ################################################################################ load("DataFramesStatisticalModelling.RData") attach(Ex3.4) str(Ex3.4) ################################################################################ # Exploratory analysis table(spec, P) Proportions <- tapply(mycor,list(P, spec), sum)/tapply(nsamples,list(P, spec), sum) Proportions # Ploting the proportions for each species plot(c(0,100,200,300,400,500,600), Proportions[1:7], type="b", pch="1", ylim=c(0,1)) points(c(0,100,200,300,400,500,600), Proportions[8:14], type="b", pch="2") points(c(0,100,200,300,400,500,600), Proportions[15:21], type="b", pch="3") # Ploting the lodds for each species plot(c(0,100,200,300,400,500,600), logit(Proportions[1:7]), type="b", pch="1", ylim=c(min(logit(Proportions)),max(logit(Proportions)))) points(c(0,100,200,300,400,500,600), logit(Proportions[8:14]), type="b", pch="2") points(c(0,100,200,300,400,500,600), logit(Proportions[15:21]), type="b", pch="3") ################################################################################ # Analysis with only the species 1 and 2 ################################################################################ detach(Ex3.4) # Making a data-frame containing only the species 1 and 2 Ex3.4Only1and2 <- subset(Ex3.4, spec != 3) table(Ex3.4$spec) str(Ex3.4) str(Ex3.4Only1and2) table(Ex3.4Only1and2$spec) attach(Ex3.4Only1and2) Resp <- cbind(mycor, nsamples-mycor) # FreeCurve <- glm(Resp~factor(P)*factor(spec), family=binomial(link="identity")) # deviance(FreeCurve) FreeCurve <- glm(Resp~ factor(spec)*factor(P), family=binomial(link="logit")) deviance(FreeCurve) coef(FreeCurve) # Testing homogeneity length(coef(FreeCurve)) length(P) pchisq(deviance(FreeCurve), df=280-14, lower.tail = F) TwoLogits <- glm(Resp~ factor(spec)*P, family=binomial(link="logit")) coef(TwoLogits) # Testing adecquacy of the logistic curve anova(TwoLogits, FreeCurve, test="Chisq") Parallel <- glm(Resp~ factor(spec) + P, family=binomial(link="logit")) coef(Parallel) # Testing additivity anova(Parallel, TwoLogits, test="Chisq") # anova(TwoLogits, Parallel, test="Chisq") OneLogit <- glm(Resp~ P, family=binomial(link="logit")) anova(OneLogit, Parallel, test="Chisq") ######################################################################## # Optional # Performing some model control # ######################################################################## RawResiduals <- residuals(FreeCurve, "response") PearsonResiduals <- residuals(FreeCurve, "pearson") Fitted <- fitted(FreeCurve) plot(Fitted, RawResiduals) abline(h=0) plot(Fitted, PearsonResiduals) abline(h=0) # Checking the logistic regression curve RawResiduals <- residuals(Parallel, "response") plot(P+5*spec, RawResiduals, col=spec+2, pch=19) abline(h=0) Spec1 <- data.frame(RawResiduals=RawResiduals[spec==1], P=P[spec==1]) LL <- loess (RawResiduals~P, data=Spec1) Grid <- 1:600 Pred <- predict(LL, data.frame(P=Grid)) points(Grid, Pred, type="l", lwd=4, col="blue") Spec2 <- data.frame(RawResiduals=RawResiduals[spec==2], P=P[spec==2]) LL <- loess (RawResiduals~P, data=Spec2) Grid <- 1:600 Pred <- predict(LL, data.frame(P=Grid)) points(Grid, Pred, type="l", lwd=4, col="green") detach(Ex3.4Only1and2) ################################################################################ # Analysis with the 3 species ################################################################################ attach(Ex3.4) Resp <- cbind(mycor, nsamples-mycor) # FreeCurve <- glm(Resp~factor(P)*factor(spec), family=binomial(link="identity")) # deviance(FreeCurve) FreeCurve <- glm(Resp~ factor(P)*factor(spec)+0, family=binomial(link="logit")) # FreeCurve <- glm(Resp~ 0-factor(P)- factor(spec)+ factor(P):factor(spec), family=binomial(link="logit")) # Testing homogeneity deviance(FreeCurve) summary(FreeCurve) pchisq(deviance(FreeCurve), df=420-21, lower.tail = F) ThreeLogits <- glm(Resp~ factor(spec)*P, family=binomial(link="logit")) coef(ThreeLogits) # Testing adequacy of the logistic curves anova(ThreeLogits, FreeCurve, test="Chisq") Parallel <- glm(Resp~ factor(spec) + P, family=binomial(link="logit")) coef(Parallel) # Showing that the three lines (in the logit scale) are not parallel anova(Parallel, ThreeLogits, test="Chisq") # Displaying the models # Ploting the lodds for each species plot(c(0,100,200,300,400,500,600), logit(Proportions[1:7]), type="b", pch="1", ylim=c(min(logit(Proportions)),max(logit(Proportions)))) points(c(0,100,200,300,400,500,600), logit(Proportions[8:14]), type="b", pch="2") points(c(0,100,200,300,400,500,600), logit(Proportions[15:21]), type="b", pch="3") abline(coef(ThreeLogits)[1], coef(ThreeLogits)[4], lty=2) abline(coef(ThreeLogits)[1]+coef(ThreeLogits)[2], coef(ThreeLogits)[4]+coef(ThreeLogits)[5], lty=2 ) abline(coef(ThreeLogits)[1]+coef(ThreeLogits)[3], coef(ThreeLogits)[4]+coef(ThreeLogits)[6], lty=2 ) # Ploting the proportions for each species plot(c(0,100,200,300,400,500,600), Proportions[1:7], type="b", pch="1", ylim=c(min(Proportions),max(Proportions)), col="blue") alpha <- coef(ThreeLogits)[1]; beta <- coef(ThreeLogits)[4] f <- function(x) exp(alpha+ beta*x)/ (1+exp(alpha+ beta*x)) plot(f, xlim=c(0,600), add=TRUE, lwd=2, col="blue") points(c(0,100,200,300,400,500,600), Proportions[8:14], type="b", pch="2", col="green") alpha <- coef(ThreeLogits)[1]+coef(ThreeLogits)[2] beta <- coef(ThreeLogits)[4]+coef(ThreeLogits)[5] plot(f, xlim=c(0,600), add=TRUE, lwd=2, col="green") points(c(0,100,200,300,400,500,600), Proportions[15:21], type="b", pch="3", col="red") alpha <- coef(ThreeLogits)[1]+coef(ThreeLogits)[3] beta <- coef(ThreeLogits)[4]+coef(ThreeLogits)[6] plot(f, xlim=c(0,600), add=TRUE, lwd=2, col="red") ################################################################################ # End!