################################################################################ # # Solution to exercise 4.2 # # Last revised: Spring 2019 # # Copyright © 2018 by Rodrigo Labouriau ################################################################################ load("DataFramesStatisticalModelling.RData") str(Ex4.2) D <- Ex4.2 str(D) Nobs <- length(D$n.weeds) ################################################################################ # a - Plot the numbers of weed plants for each observation (vertical axis) # against the doses (horizontal axis). ################################################################################ plot(D$Nitr+runif(n=100,min=0, max=5), D$n.weeds, xlab="Nitrogen", ylab="N.weeds") # This calculates the sample mean of n.weeds for each value of Nitr. ( Means <- tapply(D$n.weeds, D$Nitr ,mean) ) points(c(0,50,100,150,200), Means, pch=19, col="red", type="b") ################################################################################ # b - Estimate the mean and the variance for each combination of the doses # used and plot the variances against the means to verify whether the # variance is approximately equal to the mean (as expected from a # Poisson model) ################################################################################ # This calculates the sample mean of n.weeds for each value of Nitr. ( Means <- tapply(D$n.weeds, D$Nitr ,mean) ) # This calculates the sample variances of n.weeds for each value of Nitr. ( Variances <- tapply(D$n.weeds, D$Nitr ,var) ) rbind(Means, Variances) plot( Means, Variances) abline(0,1) ################################################################################ # c - Fit a one-way Poisson model (with Nitr entering as a factor) and # compare that model with a model containing a fixed intensity # parameter (i.e. containing only the intercept) ################################################################################ # Here a fit a Poisson one-way classification model oneWay <- glm(n.weeds~factor(Nitr)+0,family=poisson(link="identity"), data=D) # Note that the model assumes one intensity parameter (expectation) for each # level of nitrogen used summary(oneWay) # Here a fit a Poisson model assuming the same expectation for each observation constant <- glm(n.weeds~1 ,family=poisson(link="identity"), data=D) summary(constant) # Here I use a likelihood ratio test to compare the models anova(constant, oneWay, test="Chisq") # What we conclude? The expected numbers of weeds per nitrogen level are not # all the same! ################################################################################ # d - Fit a Poisson regression model (with Nitr entering as a continuous # explanatory variable) and compare that model with the previous # model. Use this comparison to discuss the linearity of the response # to the dose (at least in this range) ################################################################################ # Here I fit a Poisson linear regression model (assumes that the expected # number of weeds increase linearly with the amount of nitrogen supplied) linear<-glm(n.weeds~Nitr,family=poisson(link="identity"), data=D) summary(linear) anova(linear,oneWay,test="Chisq") ################################################################################ # Additional topic (optional) # Below I will make some basic model control and some simulation ... ################################################################################ # Testing homogeneity deviance(oneWay) Nobs <- length(D$n.weeds) # The number of observations Npar <- length(coef(oneWay)) # The number of parameters in the oneWay model pchisq(deviance(oneWay),df=Nobs-Npar,lower.tail = FALSE) # Conclusion: no reasons to suspect of lack of homogeneity! # Examining the residuals # Here I calculate a vector with what is predicted (the fitted values) by # the model for each observation Fitted <- fitted(oneWay) # Here I calculate the raw residuals, i.e. the difference between what was # observed and what is predicted by the model. Raw.Residuals <- residuals(oneWay, "response") # We should see a pattern of an increasing dispersion of the residuals as the # fitted values increase (a thrombone form) in the graph below. # This is due to the fact that the variance inreases with the means, # if the data is Poisson distributed .. plot(Fitted, Raw.Residuals) # The Pearson residuals calculated below (i.e. the raw residuals divided by # square root of the variance that we would have if he data were Poisson # distributed) Pearson.Residuals <- residuals(oneWay, "pearson") # If the data is Poisson distributed, then the pattern of increasing dispersion # observed in the previous plot should disapear ... and it does disapear ... plot(Fitted, Pearson.Residuals) ################################################################################ # The points in the plot displayed in item b above (estimates of the variance # against estimates of the means) should be disposed in, approximately, a # straight line, if the data is Poisson distributed (under the Poisson # distribution the variance is equal to the mean). # Now, the question is whether the plot displayed there is convincing. # The points are not exactly disposed along a straight line (crossing the # origin) ... but, of course, a certain deviation from the ideal straigt line # should be expected due to the effect of random variation. # I will simulate the data below and check how much we could expect these # deviations to be in a perfect scenario, i.e. in a situation where we know # that the data is Poisson distributed (because we simulate so) # Here I simulate the counts of number of weeds following a Poisson # distribution with expectations given by the estimated expectations # for each level of nitrogen (estimated using the model oneWay). # The same numbers of replicates are used, i.e. we simulate a result # that follow the same probability law as the probability law assumed # in the model oneWay defined above. I keep these simulated counts # in the vector Simulated.n.weeds in the data-frame D. D$Simulated.n.weeds <- simulate(oneWay)[[1]] D$Simulated.n.weeds # Next, I estimate the means and the variances for the SIMULATED counts # and display a plot of those variances against those means ... ( Simulated.Means <- tapply(D$Simulated.n.weeds, D$Nitr ,mean) ) ( Simulated.Variances <- tapply(D$Simulated.n.weeds, D$Nitr ,var) ) plot( Simulated.Means, Simulated.Variances) abline(0,1) # Here I will produce several simulations and display the plots of the # variances against the means for different simulations toghether, so # you can compare ... par(mfrow=c(3,3)) plot(Means, Variances, main="Original Data", pch=19, col="red") abline(0,1) # I plot below hte results of 8 simulations for(i in 1:8){ D$Simulated.n.weeds <- simulate(oneWay)[[1]] Simulated.Means <- tapply(D$Simulated.n.weeds, D$Nitr ,mean) Simulated.Variances <- tapply(D$Simulated.n.weeds, D$Nitr ,var) plot(Simulated.Means, Simulated.Variances, main="Simulated Data") abline(0,1) } par(mfrow=c(1,1)) ################################ THE END #######################################