################################################################################
# Stat-Tutorial-03 Expectations and Variances                                  #
#                                                                              #
# This is a tutorial on the basic notions of expectation and variance          #
#                                                                              #
# R. Labouriau                                                                 #
# Last revised: Fall 2020                                                      # 
#
################################################################################
# Copyright © 2020 by Rodrigo Labouriau

# We will work here with the notions of (mathematical) expectation and variance.
# Please, note that mean also called "sample mean") is an estimate of the 
# expectation (sometimes called "mean"). 
# The sample mean is the sum of the values divided by the number of values,
# he expectation is a characteristic of the distribution that (supposedly)
# generated the values.
# 
# In the same way the sample variance is an estimate of the variance. 
# The sample variance is the sum of the mean squared deviations divided
# by the number of values minus 1, while the variance is a characteristic
# of the distribution that generated the observations.
# There is a nomenclature confusion because we often (almost aways) refer to 
# the sample variance as "the variance".

# We start by random generating with a known expectation and variance 
# (known from the theory). We start with a normal distribution.

# Here we generate 100 random samples from a normal distribution WITH MEAN 25
# AND STANDARD DEVIATION 1 (i.e. variance = sqrt(sd) = 1)
Z <- rnorm(1000, mean=25, sd=1)

hist(Z, col="lightblue")

# Now we estimate the expectation by calculating the SAMPLE mean of Z

mean(Z)

# We check the calculations

sum(Z)

length(Z)

sum(Z) / length(Z)

mean(Z)

# Next we estimate the variance by calculating the SAMPLE variance of Z
# We do that step by step

Deviations <- Z - mean(Z)

hist(Deviations)

# We calculate the (sample) variance

n <- length(Z)
n

sum(Deviations^2)
 
sum(Deviations^2)/(n-1)

# We can calculate the (sample) variance directly using the function var 

var(Z)

# We repeat the estimation of the expecation and the variance with a different 
# simulation

Z2 <- rnorm(1000, mean=25, sd=1)

hist(Z2, col="lightblue")

plot(Z2, Z)

mean(Z2)
var(Z2)

# Compare with the estimates based on Z
mean(Z)
var(Z)

# Please, repeat this process several times ...

###################################

# Here we make the estimation with only few observations
# We simulate with a variance equal to 9 and expectation 15
# Note the instabilty of the estimates, they should yield
# values around 15 for the mean and 9 for the variance 

# Results for the expectation with 20 observations
for(i in 1:10){
 Z <- rnorm(20, mean=15, sd=3)
 print(mean(Z))
}

# Results for the expectation with 20 observations
for(i in 1:10){
  Z <- rnorm(20, mean=15, sd=3)
  print(var(Z))
}

# Results for the expectation with 100 observations
for(i in 1:10){
  Z <- rnorm(100, mean=15, sd=3)
  print(mean(Z))
}

# Results for the expectation with 100 observations
for(i in 1:10){
  Z <- rnorm(100, mean=15, sd=3)
  print(var(Z))
}

################################################################################
# # QUESTIONS: 
# 1) Repeat the procedure above using only 3 observations and 
# with 1000 observations.
# 
# 2) Perform the same "experiment" using a Poisson distribution with lambda
# taking the values 2, 5, or 10. That is simulate samples of Poisson with
# several sizes, say n= 3, 5, 10, 100, 1000, and estimate the expectation 
# and the variance. Note that from the theory the expectation and the variance
# are both equal to the parameter lambda, so you know the answer that the
# estimates should yield.
################################################################################