################################################################################ # # # Stat-Tutorial-15-PoissonProcess # # The Poisson distribution arises from the Poisson process. # # # R. Labouriau # Last revision: Spring 2019 # # Copyright © 2018 by Rodrigo Labouriau ################################################################################ # # This tutorial will demonstrate how the Poisson distribution arises from an # artificial scenario imitating a scenario fullfilling the three assumptions of # the Poisson process (see Chapter04, section 4.1.1). # # The procedure used will be the following: # First, we consider an area where a certain binary event might occur at # different positions. The reader might imagine a microscopy field where tained # bacterias or hemaceas might be observed (or luminescent signals from marked # products of an enzyme are observed or even simply think in a field in which # the presence of a weed is determined). In the simulations we will perform, the # area will be represented by a regular grid of positions, say 200 by 200 # positions, at which the binary event might occur (i.e. find an hemacea or # a bacteria or a weed). # Next, the area is divided in squared sub-areas, say of 100 pointd (10 by 10), # which will play the rule of sampling in a Poisson experiment. The number of # occurrences of the basic event is then recorded for each of these sub-areas. # These counts are the observations of counts of our simulated experiment and # will turn to be approximately Poisson distributed. # # I will demonstrate this and then I will simulate a non-homogeneous field and # show that the distribution of the counts will depart from the Poisson # distribution! # ################################################################################ # First I establish some tools to verify whether some observed counts are # reasonably modeled by a Poisson distributions. # Here I will make a function that makes a chi-square test of goodness of fit # of a Poisson distribution testPois <- function (x, lambda = mean(x), Nsim = 9999, digits = 4) { K <- max(x, na.rm = TRUE) + 1 Expected <- Observed <- numeric(K) for (k in 0:K) { Expected[k] <- sum(dpois(x = k - 1, lambda = lambda)) Observed[k] <- sum(x == k - 1) } Pvalue <- SimPvalues(Expected, Observed, Nsim = Nsim) Pvalue <- round (Pvalue, digits = digits) return(Pvalue) } SimPvalues <- function (Expected, Observed, Nsim = 1000) { K <- length(Observed) Nobs <- sum(Observed) Prob <- Expected/sum(Expected) Chsq <- sum((Observed - Expected)^2/Expected) SimChisquares <- numeric(Nsim) for (i in 1:Nsim) { Counts <- sample(x = 1:K, prob = Prob, size = Nobs, replace = TRUE) SimObserved <- numeric(K) for (k in 1:K) { SimObserved[k] <- sum(Counts == k) } SimChisquares[i] <- sum((SimObserved - Expected)^2/Expected) } return(mean(Chsq < SimChisquares)) } # Here is a function to draw a histogram of some observations and draw # the expected frequencies of a theoretical Poisson distribution # Here is a function to draw a histogram of some observations and draw # the expected frequencies of a theoretical Poisson distribution hpois <- function (x, lambda = mean(x), main = "", xlab = "", sub = "", Legend = TRUE, testAdherence = FALSE, Nsim = 9999, digits = 3) { if (testAdherence) { Pvalue <- round(testPoissonDistribution(x, Nsim = Nsim), digits = digits) sub <- paste("P-value for adherence to a Poisson distribution =", Pvalue) } br <- seq(from = min(x) - 1, to = max(x) + 1, by = 0.5) hist(x, breaks = br, freq = T, main = main, sub = sub, ylab = "", xlab = xlab, col = "lightblue", xlim = c(min(x) - 1, max(x) + 1)) points((min(x):(max(x) + 1)) + 0.2, dpois(min(x):(max(x) + 1), lambda = lambda) * length(x), type = "h", lwd = 5, col = "red") if (Legend) { legend("topright", legend = c("Observed", "Predicted"), col = c("lightblue", "red"), lwd = c(15, 5), bty = "n") } } # Here is a function to draw a Poisson QQ-plot qqpois <- function (x, lambda = mean(x), main = " ", sub = " ", qqline = TRUE, qqlineAll = TRUE, Local = FALSE, cex = 1.5, pch = 19, testAdherence = FALSE, Nsim = 9999, digits = 3) { if (testAdherence) { Pvalue <- round(testPois(x, Nsim = Nsim), digits = digits) sub <- paste("P-value for adherence to a Poisson distribution =", Pvalue) } ox <- x[order(x)] emp <- ecdf(ox)(ox) teor <- ppois(ox, lambda = lambda) if (Local) { plot(teor, emp, xlab = "Poisson Theoretical Quantiles", ylab = "Sample Quantiles", main = main, sub = sub, ylim = c(min(emp), max(emp)), xlim = c(min(teor), max(teor)), cex = cex, pch = pch, col = "blue") } else { plot(teor, emp, xlab = "Poisson Theoretical Quantiles", ylab = "Sample Quantiles", main = main, sub = sub, ylim = c(0, 1), xlim = c(0, 1), cex = cex, pch = pch, col = "blue") } if (qqline) { lines(c(min(emp), max(emp)), c(min(emp), max(emp)), col = "black", lwd = 2) } if (qqlineAll) { abline(0, 1, col = "red") } } # Illustrating the procedures proposed above set.seed(2345) X <- rpois(100, 5) qqpois(X) testPoissonDistribution(X) hpois(X, testAdherence = TRUE) X <- rnorm(100, mean = 5) X <- floor(X) table(X) qqpois(X) testPois(X) hpois(X, testAdherence = TRUE) X <- runif(100, min = 0, max = 10) X <- floor(X) table(X) qqpois(X) testPois(X) hpois(X, testAdherence = TRUE) set.seed(334) x <- rpois(1000,lambda=5) hpois(x, Legend=T) qqpois(x) testPois(x) qqnorm(x) qqline(x) set.seed(123) x <- rpois(10,lambda=3) hpois(x, Legend=T) qqpois(x) testPois(x) ############################### # Generating a Poisson process nrows <- 200 # Setting the number of rows of the field ncols <- 200 # Setting the number of columns of the field gridSize <- 10 # Seting the size (side) of the sub-areas numberPoints <- nrows*ncols set.seed(4321) Events <- rbinom(numberPoints, size=1, prob=0.1) # Generating the events at each point Area <- matrix(Events, nrow=nrows) # Ploting the field image(1:ncols, 1:nrows, Area, xlab="", ylab="") cutH <- seq(from=0, to=nrows, by=gridSize) cutV <- seq(from=0, to=ncols, by=gridSize) abline(h=cutH, lwd=3) abline(v=cutV, lwd=3) # Counting the number of events per sub-area Counts <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 Counts[k] <- sum(Area[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } # Verifying if the results are in agreement with a Poisson distribution mean(Counts); var(Counts) par(mfrow=c(1,2)) hpois(Counts, lambda=10, Legend=F) qqpois(Counts) par(mfrow=c(1,1)) testPois(Counts) ########### # Generating a non-nomogeneous process ########### set.seed(133) nrows <- 200 ncols <- 200 gridSize <- 10 numberPoints <- nrows*ncols Events <- rbinom(numberPoints, size=1, prob=0.15) Area1 <- matrix(Events, nrow=nrows) Events2 <- rbinom(numberPoints, size=1, prob=0.02) Area2 <- matrix(Events2, nrow=nrows) MixedArea <- Area1 for(j in 1:10){ for(i in 1:5){ x <- sort(sample(1:nrows, size=2)) y <- sort(sample(1:ncols, size=2)) MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area2[ x[1]:x[2] , y[1]:y[2] ] } for(i in 1:5){ x <- sort(sample(1:nrows, size=2)) y <- sort(sample(1:ncols, size=2)) MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area1[ x[1]:x[2] , y[1]:y[2] ] } } par(mfrow=c(1,3)) image(1:ncols, 1:nrows, Area1, xlab="", ylab="", main="Area 1") image(1:ncols, 1:nrows, Area2, xlab="", ylab="", main="Area 2") image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area") par(mfrow=c(1,1)) image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area") cutH <- seq(from=0, to=nrows, by=gridSize) cutV <- seq(from=0, to=ncols, by=gridSize) abline(h=cutH, lwd=2) abline(v=cutV, lwd=2) Counts1 <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 Counts1[k] <- sum(Area1[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(Counts1); var(Counts1) testPois(Counts1) par(mfrow=c(1,2)) hpois(Counts1, main="Area 1") qqpois(Counts1) par(mfrow=c(1,1)) Counts2 <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 Counts2[k] <- sum(Area2[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(Counts2); var(Counts2) testPois(Counts2) par(mfrow=c(1,2)) hpois(Counts2, main="Area 2") qqpois(Counts2) par(mfrow=c(1,1)) CountsM <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 CountsM[k] <- sum(MixedArea[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(CountsM); var(CountsM) testPois(CountsM) par(mfrow=c(1,2)) hpois(CountsM, main="Mixed Area") qqpois(CountsM) par(mfrow=c(1,1)) # Drawing a common Figure par(mfrow=c(2,3)) hpois(Counts1, main="Area 1") hpois(Counts2, main="Area 2") hpois(CountsM, main="Mixed Area") qqpois(Counts1, cex=1, pch=1) qqpois(Counts2, cex=1, pch=1) qqpois(CountsM, cex=1, pch=1, Local=T) ########################### # Other example ########################### nrows <- 200 ncols <- 200 gridSize <- 10 set.seed(11) numberPoints <- nrows*ncols Events <- rbinom(numberPoints, size=1, prob=0.030) Area1 <- matrix(Events, nrow=nrows) Events2 <- rbinom(numberPoints, size=1, prob=0.01) Area2 <- matrix(Events2, nrow=nrows) MixedArea <- Area1 for(j in 1:10){ for(i in 1:5){ x <- sort(sample(1:nrows, size=2)) y <- sort(sample(1:ncols, size=2)) MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area2[ x[1]:x[2] , y[1]:y[2] ] } for(i in 1:5){ x <- sort(sample(1:nrows, size=2)) y <- sort(sample(1:ncols, size=2)) MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area[ x[1]:x[2] , y[1]:y[2] ] } } par(mfrow=c(1,3)) image(1:ncols, 1:nrows, Area1, xlab="", ylab="", main="Area 1") image(1:ncols, 1:nrows, Area2, xlab="", ylab="", main="Area 2") image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area") par(mfrow=c(1,1)) image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area") cutH <- seq(from=0, to=nrows, by=gridSize) cutV <- seq(from=0, to=ncols, by=gridSize) abline(h=cutH) abline(v=cutV) Counts1 <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 Counts1[k] <- sum(Area1[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(Counts1); var(Counts1) testPois(Counts1) par(mfrow=c(1,2)) hpois(Counts1, main="Area 1") qqpois(Counts1) par(mfrow=c(1,1)) Counts2 <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 Counts2[k] <- sum(Area2[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(Counts2); var(Counts2) testPois(Counts2) par(mfrow=c(1,2)) hpois(Counts2, main="Area 2") qqpois(Counts2) par(mfrow=c(1,1)) CountsM <- numeric((length(cutH)-1)*(length(cutV)-1)) k <- 0 for(i in 1:(length(cutH)-1)){ for(j in 1:(length(cutV)-1)){ k <- k+1 CountsM[k] <- sum(MixedArea[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]]) } } mean(CountsM); var(CountsM) testPois(CountsM) par(mfrow=c(1,2)) hpois(CountsM, main="Mixed Area") qqpois(CountsM) par(mfrow=c(1,1)) hpois(CountsM, main="Mixed Area") qqpois(CountsM) # Drawing a common Figure par(mfrow=c(2,3)) hpois(Counts1, main="Area 1") hpois(Counts2, main="Area 2") hpois(CountsM, main="Mixed Area") qqpois(Counts1, cex=1, pch=1) qqpois(Counts2, cex=1, pch=1) qqpois(CountsM, cex=1, pch=1, Local=T) ################################################################################ # THE END! ################################################################################