################################################################################
# Stat-Tutorial-07-CentralLimitTheoremFailure.r                                #
#                                                                              #
# This is a tutorial on the failure of the central limit theorem,              #
# when wrongly applied.                                                        #
#                                                                              #
# R. Labouriau                                                                 #
#                                                                              #
# Last revised: Fall 2020                                                      #
################################################################################
# Copyright © 2019 by Rodrigo Labouriau

# OG <- "~Intro2R/Temp"

# Studying the distribution of the ratio between two normal distributed random
# variables

Y <- rnorm(1000);Z <- rnorm(1000)
# pdf(paste(OG, "Figure-Ch5-104.pdf", sep=""))
par(mfrow=c(2,2))
hist(Y)
qqnorm(Y);qqline(Y)
hist(Z)
qqnorm(Z);qqline(Z)
# dev.off()

X <- Y/Z
# pdf(paste(OG, "Figure-Ch5-105.pdf", sep=""))
par(mfrow=c(1,1))
# hist(X)
qqnorm(X);qqline(X)
# dev.off()

# The ratio, X, is not normally distributed.
# Indeed, the distribution of the ratio is known to be the Cauchy distribution.

# Could we then use the central limit theorem to obtain a normal approximation
# to the Cauchy distribution (the ratio)?

n.rep <- 200
X <- numeric(n.rep)
n.observations <- 1000
for(i in 1:n.rep){
  y <- rnorm(n.observations)
  z <- rnorm(n.observations)
  x <- y/z
  X[i] <- (sqrt(n.observations)*(mean(x) - 0.5)) / sqrt(var(x))
}

par(mfrow=c(1,1))
hist(X)
# pdf(paste(OG, "Figure-Ch5-106.pdf", sep=""))
qqnorm(X); qqline(X)
# dev.off()

# The approximation does not work well.
# Lets try with more replicates

n.rep <- 10000
X <- numeric(n.rep)
n.observations <- 1000
for(i in 1:n.rep){
  y <- rnorm(n.observations)
  z <- rnorm(n.observations)
  x <- y/z
  X[i] <- (sqrt(n.observations)*(mean(x) - 0.5)) / sqrt(var(x))
}


par(mfrow=c(2,1))
hist(X)
qqnorm(X); qqline(X)

# Even worst. We try with even more  (this takes a while)

n.rep <- 1000
X <- numeric(n.rep)
n.observations <- 100000
for(i in 1:n.rep){
  y <- rnorm(n.observations)
  z <- rnorm(n.observations)
  x <- y/z
  X[i] <- (sqrt(n.observations)*(mean(x) - 0.5)) / sqrt(var(x))
}

par(mfrow=c(1,1))
# hist(X)
# pdf(paste(OG, "Figure-Ch5-107.pdf", sep=""))
qqnorm(X, ); qqline(X)
                 # dev.off()

# Well, the theorem is not working !
# The point is that the Cauchy distribution does not have a finite variance
# which violates the hypotheses of the central limit theorem.
# Note that this example can occur in many practical situations!!!