################################################################################
#                                                                              
# Stat-Tutorial-10-NonCorrelatedButDependent                                                                  
#                                                                              
# I construct a range of examples of pair of variables that are non-correlated 
# (or approximately non-correlated) but are clearly DEPENDENT.                 
#                                                                              
# R. Labouriau                                                                 
# Last revision: Spring 2020
#
# Copyright © 2018 by Rodrigo Labouriau
################################################################################

# First I calculate a pair of deterministic variables that are essentially 
# non-correlated, but one carry information on the other.
# To do so I need a function that construct a grid in a region of the plain
# (indeed [-1/2,1/2] x [-1/2,1/2])
Grid <- function(from = -1/2, to = 1/2, by = 0.1){
  Seq <- seq(from = from, to = to, by = by)
  N <- length(Seq)^2
  X <- Y <- numeric(N)
  k <- 0
  for(x in 1:length(Seq)){
    for(y in 1:length(Seq)){
      k <- k + 1
      X[k] <- Seq[x]
      Y[k] <- Seq[y]
    }
  }
  Out <- data.frame(U1 = X, U2 = Y)
  return(Out)
}

# Here, I construct a grid with a fine mesh.
U <- Grid(by = 0.01)
# The variables X and Y are the first and the second coordinates of the 
# points of the grid (defined in a square in the plain), respectively.
X <- U$U1; Y <- U$U2

# Not surprisingly X and Y are (essentially) not correlated.
cor(X, Y)
cor.test(X, Y)
plot(X,Y)

# Next, I will define some constraints in the grid, define in such a way that
# the variables X and Y, again defined by the first and the second coordinates
# of the new region of the plain are still (essentially) non-correlated,
# BUT the X and Y variables carry information on each other! 

DD <- U[abs(U$U1) + abs(U$U2) < 1/2  , ]
X <- DD$U1; Y <- DD$U2

cor(X, Y)
cor.test(X, Y)
plot(X,Y)

# Examining the plot we see that he X and Y variables carry information 
# on each other! Indeed, knowing that X = 0 implies that Y take values 
# between -0.49 and 0.49; on the other hand, knowing that X takes the value
# 0.49 implies that Y takes the value 0.

Y[X == 0]
range(Y[X == 0])

Y[X == 0.49]

# Moreover, see the following
Y[X == -0.3]

################################################################################
# 
# I make below a random version of the construction presented above.
# The idea is to generate uniform distributed variables in a region of the 
# plain (the square [-1/2,1/2] x [-1/2,1/2]) and then define new variables
# by symmetrically restricting that region ... Now,if I did not manage to 
# explain this idea of construction, it is just my fault, but then give me a 
# credit and just run the examples and observe the behaviour of the pairs 
# variables generated ... they will all be essentially NON-CORRELATED but
# definitely NOT INDEPENDENT.
#
# The simulation techniques used here are instances of a general simulation
# technique called acceptance-rejection simulation.
#
################################################################################

# Here is a function that generates n uniform points in a square in the plain
runif2 <- function(n, min = -1/2, max = 1/2){
  U1 <- runif(n, min = min, max = max)
  U2 <- runif(n, min = min, max = max)
  Out <- data.frame(U1, U2)
  return(Out)
}

set.seed(1432)
U <- runif2(10000)

# The two coordinates of the 10,000 points are independent (per construction)
plot(U$U1, U$U2)
cor.test(U$U1, U$U2)

# Now, I make a symmetric restriction in the square obtaining two variables
# that are not independent, but are essentially un-correlated! :-)
DD <- U[abs(U$U1) + abs(U$U2) < 1/2  , ]
X <- DD$U1; Y <- DD$U2

par(mfrow = c(2, 2))
hist(X, col = "lightblue", freq = FALSE)
lines(density(X), col = "red", lwd = 2)
plot(Y, X, pch = 19, col = "lightblue")
abline(v = 0, h = 0)
plot(X, Y, pch = 19, col = "lightblue")
abline(v = 0, h = 0)
hist(Y, col = "lightblue", freq = FALSE)
lines(density(Y), col = "red", lwd = 2)
par(mfrow = c(1, 1))

mean(X); mean(Y)
cor.test(X,Y)

# I define below a convenient function to report the results of the next 
# simulations.
ReportResults <- function(X, Y){
  par(mfrow = c(2, 2))
  hist(X, col = "lightblue", freq = FALSE)
  lines(density(X), col = "red", lwd = 2)
  plot(Y, X, pch = 19, col = "lightblue")
  abline(v = 0, h = 0)
  plot(X, Y, pch = 19, col = "lightblue")
  abline(v = 0, h = 0)
  hist(Y, col = "lightblue", freq = FALSE)
  lines(density(Y), col = "red", lwd = 2)
  par(mfrow = c(1, 1))
  
  p.value <- cor.test(X,Y)$p.value
  correlation <- cor(X,Y)
  Out <- c(correlation, p.value)
  names(Out) <- c("Estimated correlation", "p-value")
  return(Out)
}

# Testing the function
ReportResults(X, Y)

################################################################################

# Here is a different pattern. Again, two dependent un-correlated random
# variables ... :-)

Distance2zero <- function(x,y) sqrt((x^2 + y^2))
DD <- U[Distance2zero(U$U1, U$U2) < 1/2  , ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y)

################################################################################

# The same as before, but using mor points ... this is a bit slow ...

# set.seed(31416)
# U <- runif2(200000)
# DD <- U[Distance2zero(U$U1, U$U2) < 1/2 &  Distance2zero(U$U1, U$U2) > 1/4, ]
# X <- DD$U1; Y <- DD$U2
# 
# ReportResults(X, Y) 

################################################################################

# set.seed(31416)
# U <- runif2(200000)

# Yet anothe pattern ... (the last one is the most impressive!)

Include <- function(X, Y){
  Distance2zero(X, Y) < 1/2 &  Distance2zero(X, Y) > 1/4
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 


################################################################################

# set.seed(31416)
# U <- runif2(200000)

# and another pattern ....

Include  <- function(X, Y){
  Distance2zero(X, Y) < 0.1 |
    (Distance2zero(X, Y) < 1/2 &  Distance2zero(X, Y) > 1/4)
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 

################################################################################

Include <- function(X, Y){
  Y < 0 & (Distance2zero(X, Y) < 1/2 &  Distance2zero(X, Y) > 1/4)
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 

################################################################################

Include <- function(X, Y){
  abs(U$U1) + abs(U$U2) > 1/2
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 

################################################################################

Include <- function(X, Y){
  Y > 0.1 & (abs(U$U1) + abs(U$U2) > 0.5) |
    Y < 0 & (Distance2zero(X, Y) < 0.5 &  Distance2zero(X, Y) > 0.4)
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 

################################################################################

# This is the last example I constructed (having a lot of fun!)

Include <- function(X, Y){
  Y > 0 & (abs(U$U1) + abs(U$U2) > 0.5) |
    Y < 0 & (Distance2zero(X, Y) < 0.5 &  Distance2zero(X, Y) > 0.4) |
    Y < -0.2 & (Distance2zero(X, Y) < 0.30 &  Distance2zero(X, Y) > 0.29) |
    Distance2zero(X + 0.1, Y + 0.1) < 0.01 | 
    Distance2zero(X - 0.1, Y + 0.1) < 0.01
} 

DD <- U[Include(U$U1, U$U2), ]
X <- DD$U1; Y <- DD$U2

ReportResults(X, Y) 

################################################################################
#
#                                THE END!
#
################################################################################