################################################################################ # # # Module 3 - Poisson models Day 6 # # # # R. Labouriau # # # # Revisions/editions: 10/11/2019. # ################################################################################ load("~/BSA/DataFramesStatisticalModelling.RData") # Setting the place where the output (graphs) will be kept # setwd("~/BaSALES/Temp/") ################################################################################ # Defining a function to draw Poisson QQ-plots # ################################################################################ qqpois <- function(x, lambda=mean(x), main=" ", qqline=T){ ox <- x[order(x)] emp <- ecdf(ox)(ox) teor <- ppois(ox, lambda=lambda) plot(teor,emp, xlab="Poisson Theoretical Quantiles", ylab="Sample Quantiles", main=main,ylim=c(0,1), cex=1.5, pch=19, col='blue') if(qqline){ lines(c(min(emp),max(emp)), c(min(emp),max(emp)), col="black", lwd=2) } } ################################################################################ # Example: total numbers of deaths by horse kicks # ################################################################################ Freq <- c (109, 65, 22, 3, 1) Counts <- 0:4 sum(Freq); # 200 ind.counts <- numeric(200) observ <- 1 for(i in 1:5){ counts <- Counts[i] for(j in 1:Freq[i]){ ind.counts[observ] <- counts observ <- observ + 1 } } mean(ind.counts) var(ind.counts) # Drawing a Poisson QQ-plot qqpois(ind.counts, lambda=mean(ind.counts), qqline=T) ################################################################################ # One-way Poisson model # # Horse kicks deaths # ################################################################################ # Reading, summarizing and displaying the data attach(kicks.data) str(kicks.data) str(kicks.data) print(kicks.data) # setwd('D:/Rodrigo/Courses/2010-Intro2StatModels/Lectures') # pdf('Figure-Ch4-06.pdf') par(mfrow=c(1,2), pty="s") hist(deaths, xlab='Deaths', col='lightblue', breaks=1:12, main='') plot(year, deaths, xlab='Year', ylab='Deaths', pch=19, col='blue', cex=1.5) # dev.off() # Fitting a model with a common intensity parameter common <- glm(deaths ~ 1 , family=poisson(link='log') ) deviance(common) coef(common) summary(common) # Fitting a saturated model (one intensity parameter per observation) saturated <- glm(deaths ~ factor(year) , family=poisson(link='log') ) deviance(saturated) coef(saturated) summary(saturated) # Using another parametrization saturated <- glm(deaths ~ 0 + factor(year) , family=poisson(link='log') ) summary(saturated) # pdf('Figure-Ch4-07.pdf') par(mfrow=c(1,2), pty='s') plot(deaths, coef(saturated), pch=19, cex=1.5, col='blue' ) plot(log(deaths), coef(saturated), pch=19, cex=1.5, col='blue' ) # dev.off() # Testing for differences between years # common <- glm(deaths ~ 1 , family=poisson(link='log') ) # saturated <- glm(deaths ~ 0 + factor(year) , family=poisson(link='log') ) anova(common, saturated, test="Chisq") detach(kicks.data) ################################################################################ # Two-ways Poisson model # # Horse kicks deaths # ################################################################################ attach(kicks.dataG) str(kicks.dataG) mean.year <- tapply(deaths, factor(year), mean) ; mean.year mean.group <- tapply(deaths, factor(group), mean) ; mean.group # setwd('D:/Rodrigo/Courses/2010-Intro2StatModels/Lectures') # pdf('Figure-Ch4-08.pdf') par(mfrow=c(1,2), pty='s') plot(1:10, mean.group, pch=19, cex=1.5, col='blue', xlab='Group' , ylab='Mean number of deaths') abline(h=mean(deaths)) plot(1875:1894, mean.year, type='l', lwd=2, col='blue', ylim=c(-1,5), xlab='Year', ylab='Mean number of deaths') for(i in 1:10){ points(1875:1894 + (runif(1)-1)/3, deaths[group==i]+ (runif(1)-1)/5, col='blue', pch=19) } # dev.off() # Fitting various models # Converting the variables year and group to factors (discrete variables) year <- factor(year); group <- factor(group) saturated <- glm(deaths ~ year + group + year:group, family=poisson(link='log')) additive <- glm(deaths ~ year + group , family=poisson(link='log')) only.year <- glm(deaths ~ year , family=poisson(link='log')) only.group <- glm(deaths ~ group , family=poisson(link='log')) null.model <- glm(deaths ~ 1 , family=poisson(link='log')) # Performing several tests anova(additive, saturated, test="Chisq") anova(only.year, additive, test="Chisq") anova(only.group, additive, test="Chisq") anova(null.model, only.year, test="Chisq") anova(null.model, only.group, test="Chisq") anova(null.model, saturated, test="Chisq") # Alternative (shorter) way to get the same tests saturated <- glm(deaths ~ year + group + year:group, family=poisson(link='log')) anova(saturated, test='Chisq') saturated <- glm(deaths ~ group + year+ year:group, family=poisson(link='log')) anova(saturated, test='Chisq') # Calculating the expected number 4 deaths, assuming a common intensity intensity <- exp(coef(null.model)); intensity mean(deaths) prob.of.4 <- dpois(4, lambda=intensity); prob.of.4 n.obs <- length(deaths); n.obs expected.4s <- n.obs * prob.of.4; expected.4s # Calculating the expected number 3 deaths, assuming a common intensity prob.of.3 <- dpois(3, lambda=exp(coef(null.model))); prob.of.3 expected.3s <- n.obs * prob.of.3; expected.3s # Calculating the expected number 2 deaths, assuming a common intensity prob.of.2 <- dpois(2, lambda=exp(coef(null.model))); prob.of.2 expected.2s <- n.obs * prob.of.2; expected.2s # Calculating the expected number 1 deaths, assuming a common intensity prob.of.1 <- dpois(1, lambda=exp(coef(null.model))); prob.of.1 expected.1s <- n.obs * prob.of.1; expected.1s # Calculating the expected number 0 deaths, assuming a common intensity prob.of.0 <- dpois(0, lambda=exp(coef(null.model))); prob.of.0 expected.0s <- n.obs * prob.of.0; expected.0s # Calculating the expected number 5 deaths, assuming a common intensity prob.of.5 <- dpois(5, lambda=exp(coef(null.model))); prob.of.5 expected.5s <- n.obs * prob.of.5; expected.5s Expected <- c(expected.0s, expected.1s, expected.2s, expected.3s, expected.4s) Expected Observed <- table (deaths) ; Observed Chi.square.comp <- (Observed-Expected)^2 /Expected ; Chi.square.comp cbind(Observed, Expected, Observed-Expected, Chi.square.comp) Chisq.statistic <- sum(Chi.square.comp) ; Chisq.statistic pchisq( Chisq.statistic , df= 5, lower.tail=F) par(mfrow=c(1,2), pty='s') plot(Expected , Observed-Expected, col='blue', pch=19, cex=1.5) abline(h=0) plot(Expected , (Observed-Expected)/sqrt(Expected), col='blue', pch=19, cex=1.5) abline(h=0) detach(kicks.dataG) # ---------------------------------------------------------------------------- # # # # THE END! # # # # ---------------------------------------------------------------------------- #