################################################################################ # # Stat-Tutorial-10-NonCorrelatedButDependent # # I construct a range of examples of pair of variables that are non-correlated # (or approximately non-correlated) but are clearly DEPENDENT. # # R. Labouriau # Last revision: Spring 2020 # # Copyright © 2018 by Rodrigo Labouriau ################################################################################ # First I calculate a pair of deterministic variables that are essentially # non-correlated, but one carry information on the other. # To do so I need a function that construct a grid in a region of the plain # (indeed [-1/2,1/2] x [-1/2,1/2]) Grid <- function(from = -1/2, to = 1/2, by = 0.1){ Seq <- seq(from = from, to = to, by = by) N <- length(Seq)^2 X <- Y <- numeric(N) k <- 0 for(x in 1:length(Seq)){ for(y in 1:length(Seq)){ k <- k + 1 X[k] <- Seq[x] Y[k] <- Seq[y] } } Out <- data.frame(U1 = X, U2 = Y) return(Out) } # Here, I construct a grid with a fine mesh. U <- Grid(by = 0.01) # The variables X and Y are the first and the second coordinates of the # points of the grid (defined in a square in the plain), respectively. X <- U$U1; Y <- U$U2 # Not surprisingly X and Y are (essentially) not correlated. cor(X, Y) cor.test(X, Y) plot(X,Y) # Next, I will define some constraints in the grid, define in such a way that # the variables X and Y, again defined by the first and the second coordinates # of the new region of the plain are still (essentially) non-correlated, # BUT the X and Y variables carry information on each other! DD <- U[abs(U$U1) + abs(U$U2) < 1/2 , ] X <- DD$U1; Y <- DD$U2 cor(X, Y) cor.test(X, Y) plot(X,Y) # Examining the plot we see that he X and Y variables carry information # on each other! Indeed, knowing that X = 0 implies that Y take values # between -0.49 and 0.49; on the other hand, knowing that X takes the value # 0.49 implies that Y takes the value 0. Y[X == 0] range(Y[X == 0]) Y[X == 0.49] # Moreover, see the following Y[X == -0.3] ################################################################################ # # I make below a random version of the construction presented above. # The idea is to generate uniform distributed variables in a region of the # plain (the square [-1/2,1/2] x [-1/2,1/2]) and then define new variables # by symmetrically restricting that region ... Now,if I did not manage to # explain this idea of construction, it is just my fault, but then give me a # credit and just run the examples and observe the behaviour of the pairs # variables generated ... they will all be essentially NON-CORRELATED but # definitely NOT INDEPENDENT. # # The simulation techniques used here are instances of a general simulation # technique called acceptance-rejection simulation. # ################################################################################ # Here is a function that generates n uniform points in a square in the plain runif2 <- function(n, min = -1/2, max = 1/2){ U1 <- runif(n, min = min, max = max) U2 <- runif(n, min = min, max = max) Out <- data.frame(U1, U2) return(Out) } set.seed(1432) U <- runif2(10000) # The two coordinates of the 10,000 points are independent (per construction) plot(U$U1, U$U2) cor.test(U$U1, U$U2) # Now, I make a symmetric restriction in the square obtaining two variables # that are not independent, but are essentially un-correlated! :-) DD <- U[abs(U$U1) + abs(U$U2) < 1/2 , ] X <- DD$U1; Y <- DD$U2 par(mfrow = c(2, 2)) hist(X, col = "lightblue", freq = FALSE) lines(density(X), col = "red", lwd = 2) plot(Y, X, pch = 19, col = "lightblue") abline(v = 0, h = 0) plot(X, Y, pch = 19, col = "lightblue") abline(v = 0, h = 0) hist(Y, col = "lightblue", freq = FALSE) lines(density(Y), col = "red", lwd = 2) par(mfrow = c(1, 1)) mean(X); mean(Y) cor.test(X,Y) # I define below a convenient function to report the results of the next # simulations. ReportResults <- function(X, Y){ par(mfrow = c(2, 2)) hist(X, col = "lightblue", freq = FALSE) lines(density(X), col = "red", lwd = 2) plot(Y, X, pch = 19, col = "lightblue") abline(v = 0, h = 0) plot(X, Y, pch = 19, col = "lightblue") abline(v = 0, h = 0) hist(Y, col = "lightblue", freq = FALSE) lines(density(Y), col = "red", lwd = 2) par(mfrow = c(1, 1)) p.value <- cor.test(X,Y)$p.value correlation <- cor(X,Y) Out <- c(correlation, p.value) names(Out) <- c("Estimated correlation", "p-value") return(Out) } # Testing the function ReportResults(X, Y) ################################################################################ # Here is a different pattern. Again, two dependent un-correlated random # variables ... :-) Distance2zero <- function(x,y) sqrt((x^2 + y^2)) DD <- U[Distance2zero(U$U1, U$U2) < 1/2 , ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ # The same as before, but using mor points ... this is a bit slow ... # set.seed(31416) # U <- runif2(200000) # DD <- U[Distance2zero(U$U1, U$U2) < 1/2 & Distance2zero(U$U1, U$U2) > 1/4, ] # X <- DD$U1; Y <- DD$U2 # # ReportResults(X, Y) ################################################################################ # set.seed(31416) # U <- runif2(200000) # Yet anothe pattern ... (the last one is the most impressive!) Include <- function(X, Y){ Distance2zero(X, Y) < 1/2 & Distance2zero(X, Y) > 1/4 } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ # set.seed(31416) # U <- runif2(200000) # and another pattern .... Include <- function(X, Y){ Distance2zero(X, Y) < 0.1 | (Distance2zero(X, Y) < 1/2 & Distance2zero(X, Y) > 1/4) } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ Include <- function(X, Y){ Y < 0 & (Distance2zero(X, Y) < 1/2 & Distance2zero(X, Y) > 1/4) } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ Include <- function(X, Y){ abs(U$U1) + abs(U$U2) > 1/2 } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ Include <- function(X, Y){ Y > 0.1 & (abs(U$U1) + abs(U$U2) > 0.5) | Y < 0 & (Distance2zero(X, Y) < 0.5 & Distance2zero(X, Y) > 0.4) } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ # This is the last example I constructed (having a lot of fun!) Include <- function(X, Y){ Y > 0 & (abs(U$U1) + abs(U$U2) > 0.5) | Y < 0 & (Distance2zero(X, Y) < 0.5 & Distance2zero(X, Y) > 0.4) | Y < -0.2 & (Distance2zero(X, Y) < 0.30 & Distance2zero(X, Y) > 0.29) | Distance2zero(X + 0.1, Y + 0.1) < 0.01 | Distance2zero(X - 0.1, Y + 0.1) < 0.01 } DD <- U[Include(U$U1, U$U2), ] X <- DD$U1; Y <- DD$U2 ReportResults(X, Y) ################################################################################ # # THE END! # ################################################################################