################################################################################
#              
#
# Stat-Tutorial-15-PoissonProcess                                                  
#                                                                              
# The Poisson distribution arises from the Poisson process.                    
#                                                                              
#                                                                              
# R. Labouriau                                                                 
# Last revision: Spring 2019
#
# Copyright © 2018 by Rodrigo Labouriau
################################################################################
# 
# This tutorial will demonstrate how the Poisson distribution arises from an
# artificial scenario imitating a scenario fullfilling the three assumptions of 
# the Poisson process (see Chapter04, section 4.1.1). 
# 
# The procedure used will be the following:
# First, we consider an area where a certain binary event might occur at 
# different positions. The reader might imagine a microscopy field where tained
# bacterias or hemaceas might be observed (or luminescent signals from marked 
# products of an enzyme are observed or even simply think in a field in which 
# the presence of a weed is determined). In the simulations we will perform, the
# area will be represented by a regular grid of positions, say 200 by 200 
# positions, at which the binary event might occur (i.e. find an hemacea or 
# a bacteria or a weed). 
# Next, the area is divided in squared sub-areas, say of 100 pointd (10 by 10), 
# which will play the rule of sampling in a Poisson experiment. The number of 
# occurrences of the basic event is then recorded for each of these sub-areas.
# These counts are the observations of counts of our simulated experiment and
# will turn to be approximately Poisson distributed.
# 
# I will demonstrate this and then I will simulate a non-homogeneous field and
# show that the distribution of the counts will depart from the Poisson 
# distribution!
#
################################################################################

# First I establish some tools to verify whether some observed counts are 
# reasonably modeled by a Poisson distributions.

# Here I will make a function that makes a chi-square test of goodness of fit
# of a Poisson distribution

testPois <- function (x, lambda = mean(x), Nsim = 9999, digits = 4) 
{
  K <- max(x, na.rm = TRUE) + 1
  Expected <- Observed <- numeric(K)
  for (k in 0:K) {
    Expected[k] <- sum(dpois(x = k - 1, lambda = lambda))
    Observed[k] <- sum(x == k - 1)
  }
  Pvalue <- SimPvalues(Expected, Observed, Nsim = Nsim)
  Pvalue <- round (Pvalue, digits = digits)
  return(Pvalue)
}

SimPvalues <- function (Expected, Observed, Nsim = 1000) 
{
  K <- length(Observed)
  Nobs <- sum(Observed)
  Prob <- Expected/sum(Expected)
  Chsq <- sum((Observed - Expected)^2/Expected)
  SimChisquares <- numeric(Nsim)
  for (i in 1:Nsim) {
    Counts <- sample(x = 1:K, prob = Prob, size = Nobs, replace = TRUE)
    SimObserved <- numeric(K)
    for (k in 1:K) {
      SimObserved[k] <- sum(Counts == k)
    }
    SimChisquares[i] <- sum((SimObserved - Expected)^2/Expected)
  }
  return(mean(Chsq < SimChisquares))
}

# Here is a function to draw a histogram of some observations and draw
# the expected frequencies of a theoretical Poisson distribution

# Here is a function to draw a histogram of some observations and draw
# the expected frequencies of a theoretical Poisson distribution

hpois <- function (x, lambda = mean(x), main = "", xlab = "", sub = "", 
                   Legend = TRUE, testAdherence = FALSE, 
                   Nsim = 9999, digits = 3) 
{
  if (testAdherence) {
    Pvalue <- round(testPoissonDistribution(x, Nsim = Nsim), 
                    digits = digits)
    sub <- paste("P-value for adherence to a Poisson distribution =", 
                 Pvalue)
  }
  br <- seq(from = min(x) - 1, to = max(x) + 1, by = 0.5)
  hist(x, breaks = br, freq = T, main = main, sub = sub, ylab = "", 
       xlab = xlab, col = "lightblue", xlim = c(min(x) - 1, 
                                                max(x) + 1))
  points((min(x):(max(x) + 1)) + 0.2, dpois(min(x):(max(x) + 
                                                      1), lambda = lambda) * length(x), type = "h", lwd = 5, 
         col = "red")
  if (Legend) {
    legend("topright", legend = c("Observed", "Predicted"), 
           col = c("lightblue", "red"), lwd = c(15, 5), bty = "n")
  }
}

# Here is a function to draw a Poisson QQ-plot
qqpois <- function (x, lambda = mean(x), main = " ", sub = " ", qqline = TRUE, 
                    qqlineAll = TRUE, Local = FALSE, cex = 1.5, pch = 19, testAdherence = FALSE, 
                    Nsim = 9999, digits = 3) 
{
  if (testAdherence) {
    Pvalue <- round(testPois(x, Nsim = Nsim), 
                    digits = digits)
    sub <- paste("P-value for adherence to a Poisson distribution =", 
                 Pvalue)
  }
  ox <- x[order(x)]
  emp <- ecdf(ox)(ox)
  teor <- ppois(ox, lambda = lambda)
  if (Local) {
    plot(teor, emp, xlab = "Poisson Theoretical Quantiles", 
         ylab = "Sample Quantiles", main = main, sub = sub, 
         ylim = c(min(emp), max(emp)), xlim = c(min(teor), 
                                                max(teor)), cex = cex, pch = pch, col = "blue")
  }
  else {
    plot(teor, emp, xlab = "Poisson Theoretical Quantiles", 
         ylab = "Sample Quantiles", main = main, sub = sub, 
         ylim = c(0, 1), xlim = c(0, 1), cex = cex, pch = pch, 
         col = "blue")
  }
  if (qqline) {
    lines(c(min(emp), max(emp)), c(min(emp), max(emp)), col = "black", 
          lwd = 2)
  }
  if (qqlineAll) {
    abline(0, 1, col = "red")
  }
}


# Illustrating the procedures proposed above

set.seed(2345)
X <- rpois(100, 5)
qqpois(X)
testPoissonDistribution(X)
hpois(X, testAdherence = TRUE)

X <- rnorm(100, mean = 5)
X <- floor(X)
table(X)
qqpois(X)
testPois(X)
hpois(X, testAdherence = TRUE)

X <- runif(100, min = 0, max = 10)
X <- floor(X)
table(X)
qqpois(X)
testPois(X)
hpois(X, testAdherence = TRUE)

set.seed(334)
x <- rpois(1000,lambda=5)
hpois(x, Legend=T)
qqpois(x)
testPois(x)

qqnorm(x)
qqline(x)

set.seed(123)
x <- rpois(10,lambda=3)
hpois(x, Legend=T)
qqpois(x)
testPois(x)

###############################
# Generating a Poisson process

nrows <- 200        # Setting the number of rows of the field
ncols <- 200        # Setting the number of columns of the field
gridSize <- 10      # Seting the size (side) of the sub-areas

numberPoints <- nrows*ncols
set.seed(4321)
Events <- rbinom(numberPoints, size=1, prob=0.1) # Generating the events at each point

Area <- matrix(Events, nrow=nrows)

# Ploting the field
image(1:ncols, 1:nrows, Area, xlab="", ylab="")

cutH <- seq(from=0, to=nrows, by=gridSize)
cutV <- seq(from=0, to=ncols, by=gridSize)

abline(h=cutH, lwd=3)
abline(v=cutV, lwd=3)

# Counting the number of events per sub-area
Counts <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    Counts[k] <- sum(Area[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}

# Verifying if the results are in agreement with a Poisson distribution
mean(Counts); var(Counts)

par(mfrow=c(1,2))
hpois(Counts, lambda=10, Legend=F)
qqpois(Counts)
par(mfrow=c(1,1))
testPois(Counts)

###########
# Generating a non-nomogeneous process
###########
set.seed(133)
nrows <- 200
ncols <- 200
gridSize <- 10

numberPoints <- nrows*ncols
Events  <- rbinom(numberPoints, size=1, prob=0.15)
Area1 <- matrix(Events, nrow=nrows)

Events2 <- rbinom(numberPoints, size=1, prob=0.02)
Area2 <- matrix(Events2, nrow=nrows)
   
MixedArea <- Area1
for(j in 1:10){
 for(i in 1:5){
  x <- sort(sample(1:nrows, size=2))
  y <- sort(sample(1:ncols, size=2))
  MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area2[ x[1]:x[2] , y[1]:y[2] ]
 }
 for(i in 1:5){
  x <- sort(sample(1:nrows, size=2))
  y <- sort(sample(1:ncols, size=2))
  MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area1[ x[1]:x[2] , y[1]:y[2] ]
 }
}


par(mfrow=c(1,3))
image(1:ncols, 1:nrows, Area1, xlab="", ylab="", main="Area 1")
image(1:ncols, 1:nrows, Area2, xlab="", ylab="", main="Area 2")
image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area")
par(mfrow=c(1,1))

image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area")

cutH <- seq(from=0, to=nrows, by=gridSize)
cutV <- seq(from=0, to=ncols, by=gridSize)

abline(h=cutH, lwd=2)
abline(v=cutV, lwd=2)

Counts1 <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    Counts1[k] <- sum(Area1[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}
mean(Counts1); var(Counts1)
testPois(Counts1)
par(mfrow=c(1,2))
hpois(Counts1, main="Area 1")
qqpois(Counts1)
par(mfrow=c(1,1))

Counts2 <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    Counts2[k] <- sum(Area2[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}
mean(Counts2); var(Counts2)
testPois(Counts2)
par(mfrow=c(1,2))
hpois(Counts2, main="Area 2")
qqpois(Counts2)
par(mfrow=c(1,1))

CountsM <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    CountsM[k] <- sum(MixedArea[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}

mean(CountsM); var(CountsM)
testPois(CountsM)

par(mfrow=c(1,2))
hpois(CountsM, main="Mixed Area")
qqpois(CountsM)
par(mfrow=c(1,1))

# Drawing a common Figure
par(mfrow=c(2,3))
hpois(Counts1, main="Area 1")
hpois(Counts2, main="Area 2")
hpois(CountsM, main="Mixed Area")
qqpois(Counts1, cex=1, pch=1)
qqpois(Counts2, cex=1, pch=1)
qqpois(CountsM, cex=1, pch=1, Local=T)

###########################
# Other example
###########################
nrows <- 200
ncols <- 200
gridSize <- 10

set.seed(11)
numberPoints <- nrows*ncols
Events  <- rbinom(numberPoints, size=1, prob=0.030)
Area1 <- matrix(Events, nrow=nrows)

Events2 <- rbinom(numberPoints, size=1, prob=0.01)
Area2 <- matrix(Events2, nrow=nrows)

MixedArea <- Area1
for(j in 1:10){
  for(i in 1:5){
    x <- sort(sample(1:nrows, size=2))
    y <- sort(sample(1:ncols, size=2))
    MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area2[ x[1]:x[2] , y[1]:y[2] ]
  }
  for(i in 1:5){
    x <- sort(sample(1:nrows, size=2))
    y <- sort(sample(1:ncols, size=2))
    MixedArea[ x[1]:x[2] , y[1]:y[2] ] <- Area[ x[1]:x[2] , y[1]:y[2] ]
  }
}


par(mfrow=c(1,3))
image(1:ncols, 1:nrows, Area1, xlab="", ylab="", main="Area 1")
image(1:ncols, 1:nrows, Area2, xlab="", ylab="", main="Area 2")
image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area")
par(mfrow=c(1,1))

image(1:ncols, 1:nrows, MixedArea, xlab="", ylab="", main="Mixed Area")

cutH <- seq(from=0, to=nrows, by=gridSize)
cutV <- seq(from=0, to=ncols, by=gridSize)

abline(h=cutH)
abline(v=cutV)

Counts1 <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    Counts1[k] <- sum(Area1[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}
mean(Counts1); var(Counts1)
testPois(Counts1)
par(mfrow=c(1,2))
hpois(Counts1, main="Area 1")
qqpois(Counts1)
par(mfrow=c(1,1))

Counts2 <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    Counts2[k] <- sum(Area2[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}
mean(Counts2); var(Counts2)
testPois(Counts2)
par(mfrow=c(1,2))
hpois(Counts2, main="Area 2")
qqpois(Counts2)
par(mfrow=c(1,1))

CountsM <- numeric((length(cutH)-1)*(length(cutV)-1))
k <- 0
for(i in 1:(length(cutH)-1)){
  for(j in 1:(length(cutV)-1)){
    k <- k+1
    CountsM[k] <- sum(MixedArea[(cutH[i]+1):cutH[i+1], (cutV[j]+1):cutV[j+1]])
  } 
}

mean(CountsM); var(CountsM)
testPois(CountsM)

par(mfrow=c(1,2))
hpois(CountsM, main="Mixed Area")
qqpois(CountsM)
par(mfrow=c(1,1))
hpois(CountsM, main="Mixed Area")
qqpois(CountsM)

# Drawing a common Figure
par(mfrow=c(2,3))
hpois(Counts1, main="Area 1")
hpois(Counts2, main="Area 2")
hpois(CountsM, main="Mixed Area")
qqpois(Counts1, cex=1, pch=1)
qqpois(Counts2, cex=1, pch=1)
qqpois(CountsM, cex=1, pch=1, Local=T)

################################################################################

#                        THE END!

################################################################################