################################################################################ # # Stat-Tutorial-09-z-test.R # The z-test (bilateral) # # By R. Labouriau # Last revised: Fall 2020 ################################################################################ # Copyright © 2019 by Rodrigo Labouriau # # This tutorial illustrates how the classic z-test for testing whether the # theoretical mean is equal to a given value, using a sample of independent and # identically distributed normally distributed with fixed and KNOWN variance. # # The following basic notions of hypotheses tests will be illustrated: # - Level of significance (and error of type I, i.e. wrongly rejecting H0) # - Power function (and error of type II, i.e. wrongly accepting H0 ) # - How the power is influenced by the level of significance choosen and the # number of observations # - The p-value. # ################################################################################ ################################################################################ # PRELIMINAIRES # ################################################################################ # Here I set some parameters to simulate (you might change it): # mu will be the true value of the theoretical mean (expectation) mu <- 10 # sigma2 will be the variance (assumed to be known) sigma2 <- 1 # sigma will be the standard deviation sigma <- sqrt(sigma2) # N will be the number of (iid, i.e. independent and identically # distributed) observations N <- 100 # The function below returns the density of the normal distribution. I will use # that to plot the density Density <- function(x) dnorm(x,mean=mu, sd=sigma) plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma)) # I improve the plot of the density plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma), lwd=4 ) # Here I simulate N independent observations of a normal distribution # with mean mu and variance sigma2 (or standard deviation sigma). X <- rnorm(N, mean=mu, sd=sigma) # I draw a histogram of the simulated data using the frequencies hist(X, col="lightblue") # I draw a histogram of the simulated data using the frequencies hist(X, col="lightblue", freq=F) # Now I draw the histogram using relative frequencies hist(X, col="lightblue", freq=F, ylim=c(0,0.5), main=paste(N, "simulated N(", mu, ",", sigma2, ")", "observations") ) # and add vertical lines at the value of mu and the sample mean (interruped line) abline(v=mu, col="red", lwd=4) abline(v=mean(X), col="red", lwd=3, lty=2) # and here I draw the density over the graph of the histogram plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma), lwd=4, add=T ) ################################################################################ # # Constructing a test (the z-test) # ################################################################################ # # We will consider the following system of hypotheses: # H0: mu = 10 (the null hypothesis) # HA: mu != 10 (the alternative hypothesis) # # Here is a first rejection rule (a naive and very strict): # "Reject H0 when the sample mean is smaller than 9.95 or larger than 10.05" # ################################################################################ # The sample mean is mean(X) # The logic variable below takes the vale "TRUE" if we reject H0 based on the # rule in play or "FALSE" otherwise. Reject <- 9.95 > mean(X) | mean(X) > 10.05 Reject # Here I illustrate hist(X, col="lightblue", freq=F, main=paste(N, "simulated N(", mu, ",", sigma2, ")", "observations")) abline(v=c(9.95, 10.05), col="red", lwd=4, lty=3) abline(v=mean(X), col="blue", lwd=5, lty=1) # plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma),lwd=2, col="grey", add=T) legend("topright", legend=c("Sample mean","Limits rejection region"), col=c("blue", "red"), lwd=c(5,4), lty=c(1,3), bty="n" ) # Here I repeat this procedure 4 times (generating a new X every time) par(mfrow=c(2,2)) for(i in 1:4){ X <- rnorm(N, mean=mu, sd=sigma) hist(X, col="lightblue", freq=F, main="") abline(v=c(9.95, 10.05), col="red", lwd=4, lty=3) abline(v=mean(X), col="blue", lwd=5, lty=1) plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma),lwd=2, col="grey", add=T) } par(mfrow=c(1,1)) # Next, I repeat the procedure 100,000 times and keep track of the results # in a logical vector (called "Results") Ntests <- 100000 Results <- logical(Ntests) for(i in 1:Ntests){ X <- rnorm(N, mean=mu, sd=sigma) Reject <- 9.95 > mean(X) | mean(X) > 10.05 Results[i] <- Reject } # This displays the results of this "in silico experiment table(Results) # and here I calculate the proportion of rejection of H0 # (detail: When we make calculations with the logial vector, R converts the # entries of the logical vector into 0 when FALSE and 1 when TRUE, so that # when we calculate the mean of this vector we get the proportion of TRUEs) mean(Results) # Next, we build a less strict rejection rule. # Second rejection rule (a less strict): # "Reject H0 when the sample mean is smaller than 9.9 or larger than 10.1" mean(X) Reject <- mean(X) < 9.9 | mean(X) > 10.1 Reject hist(X, col="lightblue", freq=F, main=paste(N, "simulated N(", mu, ",", sigma2, ")", "observations")) abline(v=c(9.9, 10.1), col="red", lwd=4, lty=3) abline(v=mean(X), col="blue", lwd=5, lty=1) # plot(Density, xlim=c(mu-3.5*sigma , mu+3.5*sigma),lwd=2, col="grey", add=T) legend("topleft", legend=c("Sample mean","Limits of the rejection region"), col=c("blue", "red"), lwd=c(5,4), lty=c(1,3), bty="n" ) # We perform again the test 100,000 times and keep track of the results. We # keep track also of the sample means obtained in each of the 100,000 # simulations (so we will be able to change the rejection rule and see what # happens, without having to simulate 100,000 tests again) Ntests <- 100000 Results <- logical(Ntests) Means <- numeric(Ntests) for(i in 1:Ntests){ X <- rnorm(N, mean=mu, sd=sigma) Means[i] <- mean(X) Reject <- mean(X) < 9.9 | mean(X) > 10.1 Results[i] <- Reject } # Here are the results of the 100,000 tests table(Results) mean(Results) # Since we kept the sample means in the vector "Means" we might calculate # the proportion of rejection as below mean(Means < 9.9 | Means > 10.1) # Alternatively, delta <- 0.1 mean(Means < mu-delta | Means > mu+delta) # Changing the rejection rule delta <- 0.15 mean(Means < mu-delta | Means > mu+delta) # I will estimate the probability of rejection for several rejection rules. # I express the rejection rules in terms of how many standard deviations # (i.e. sigmas) divided by the suqre roots of the number of observations # that we admit the sample mean to depart from the hypothised theoretical # mean we admit, before reject the null hypothesis. I will denote this # number of standard deviations by "k". k <- 1 delta <- k*sigma/sqrt(N); delta mean(Means < mu-delta | Means > mu+delta) k <- 1 mean(Means < mu-k*sigma/sqrt(N) | Means > mu+k*sigma/sqrt(N)) k <- 1.5 mean(Means < mu-k*sigma/sqrt(N) | Means > mu+k*sigma/sqrt(N)) k <- 1.96 mean(Means < mu-k*sigma/sqrt(N) | Means > mu+k*sigma/sqrt(N)) # Next, I will calculate the probability of rejecting H0 when varying k # in a range of values and then plot the probability of rejecting H0 # as a function of k. Ks <- seq(from=0.5, to=3, by=0.01) ( Ks <- seq(from=0.5, to=3, by=0.01) ) length(Ks) alpha <- numeric(length(Ks)) for(i in 1:length(Ks)){ k <- Ks[i] alpha[i] <- mean( Means < mu-k*sigma/sqrt(N) | Means > mu+k*sigma/sqrt(N) ) } plot(Ks, alpha, type="l", lwd=3, xlab="Distance from mu in standard deviations of the sample mean") abline(h=c(0.1, 0.05, 0.01), lty=c(2,3,4)) plot(Ks, alpha, type="l", lwd=3, xlab="Distance from mu in standard deviations of the sample mean") segments(x0=0, y0=0.05, x1=1.96, y1=0.05, lty=2) segments(x0=1.96, y0=0, x1=1.96, y1=0.05, lty=2) abline(h=0) ################################################################################ # Here are some theoretical considerations: # If X ~ N(mu, sigma2), then the sample mean ~ N(mu, sigma2/N) # Then (sample mean - mu) / sqrt(sigma2/N) ~ N (0 , 1) i.e. # sqrt(N) (sample mean - mu) / sigma ~ N (0 , 1) # P(sqrt(N) (sample mean - mu) / sigma <= 1.96) = Phi (1.96) approx 0.975 pnorm(-1.96) qnorm(0.025) # P(sqrt(N) (sample mean - mu) / sigma > 1.96) approx 0.025 # But sqrt(N) (sample mean - mu) / sigma > 1.96 # <=> sample mean > 1.96 * sigma/sqrt(N) # and # P(sample mean > 1.96 * sigma/sqrt(N) ) approx 0.025 # By symmetry, # P(sample mean < -1.96 * sigma/sqrt(N) ) approx 0.025 # # Therefore, if X ~ N(mu, sigma2) and we use k=1.96 # P(reject H0) approx 0.025 + 0.025 = 0.05 = 5% # Analogously, if we want a test of level 1% we use k such that # the probability of a standard normal distributed random variable # be larger than k is 0.5%, i.e. qnorm(0.005, lower.tail=F) # 2.575829 ################################################################################ # Studying the power of the test ################################################################################ # # In hypothesis tests there are two possible errors we might make: # # Type I error: reject the null hypothesis when the null hypothesis is cirrect # # or # # Type II error: accept the null hypothesis when the null # huypothesis is wrong # # Next we will use a fixed rejection rule, but change the value of mu # i.e. departure from the null hypothesis H0: E(X) = 10 k <- 1.96 muH0 <- 10 mu <- 10.01 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 X <- rnorm(N, mean=mu, sd=sigma) ( Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) ) Ntests <- 10000 Results <- logical(Ntests) for(i in 1:Ntests){ X <- rnorm(N, mean=mu, sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } mean(Results) mu <- 10.05 Ntests <- 10000 Results <- logical(Ntests) for(i in 1:Ntests){ X <- rnorm(N, mean=mu, sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } mean(Results) mu <- 10.1 Ntests <- 1000 Results <- logical(Ntests) for(i in 1:Ntests){ X <- rnorm(N, mean=mu, sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } mean(Results) ( Mus <- seq(from=muH0-5*sigma/sqrt(N), to=muH0+5*sigma/sqrt(N), by=1/100) ) length(Mus) Ntests <- 10000 Beta <- numeric(length(Mus)) for(j in 1:length(Mus)){ for(i in 1:Ntests){ X <- rnorm(N, mean=Mus[j], sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } Beta[j] <- mean(Results) } # Ploting the power function plot(Mus, Beta, type="l", lwd=4, main="Power function", ylab=expression(beta), xlab=expression(mu) ) abline(h=0.05, lty=2) abline(v=muH0) Beta0.05 <- Beta # Power function varying the significance level qnorm(c(0.05, 0.025, 0.005), lower.tail=F) k <- 1.644854 # Setting the significance level to 10% Mus <- seq(from=muH0-5*sigma/sqrt(N), to=muH0+5*sigma/sqrt(N), by=1/100) Ntests <- 10000 Beta <- numeric(length(Mus)) for(j in 1:length(Mus)){ for(i in 1:Ntests){ X <- rnorm(N, mean=Mus[j], sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } Beta[j] <- mean(Results) } Beta0.10 <- Beta plot(Mus, Beta0.10, type="l", lwd=4, main="Power function", ylab=expression(beta), xlab=expression(mu) ) ############ k <- 2.575829 # Setting the significance level to 1% Mus <- seq(from=muH0-5*sigma/sqrt(N), to=muH0+5*sigma/sqrt(N), by=1/100) Ntests <- 10000 Beta <- numeric(length(Mus)) for(j in 1:length(Mus)){ for(i in 1:Ntests){ X <- rnorm(N, mean=Mus[j], sd=sigma) Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Results[i] <- Reject } Beta[j] <- mean(Results) } Beta0.01 <- Beta ##### # pdf("~/Desktop/Plot.pdf") plot(Mus, Beta0.05, type="l", lwd=10, main="Power function", ylab=expression(beta), xlab=expression(mu), ylim=c(0,1), col="gray" ) points(Mus, Beta0.10, type="l", lwd=10, col="yellow" ) points(Mus, Beta0.01, type="l", lwd=10, col="lightblue" ) segments(x0=0, y0=0.05, x1=muH0, y1=0.05, lty=2) segments(x0=0, y0=0.10, x1=muH0, y1=0.10, lty=2, col="orange") segments(x0=0, y0=0.01, x1=muH0, y1=0.01, lty=2, col="blue") segments(x0=0, y0=0, x1=muH0, y1=0) abline(v=muH0) legend("bottomright", legend=c("1% significance", "5% significance", "10% significance"), lwd=15, col=c("lightblue","gray","yellow"), bty="n" ) # dev.off() # ########### # Theoretical calculations of the power functions # # Reject H0 when mean # # "Reject H0 <=> SM < muH0 - k*sigma/sqrt(N) or SM > muH0 + k*sigma/sqrt(N)" # # where SM is the sample mean, E is the actual value of the E(X) # P/E (SM < muH0 - k*sigma/sqrt(N)) = P/E (SM -E < muH0 - k*sigma/sqrt(N) -E) # = P/E ((sqrt(N)/sigma)*(SM -E) < (sqrt(N)/sigma)* (muH0 - k*sigma/sqrt(N) -E) ) # = Phi(sqrt(N)/sigma)* (muH0 - k*sigma/sqrt(N) -E) # = Phi(sqrt(N)/sigma)* (muH0 - E) - k), # # where P/E is the probability assuming the E(X)=E and Phi is the accumulated # distribution function of a standard normal distribution. # # Analougous calculations yields, # # P/E (SM > muH0 + k*sigma/sqrt(N)) = 1 - Phi(sqrt(N)/sigma)* (muH0 - E) + k), # # therefore, # # P/E(rejecting H0) = 1 - Phi(sqrt(N)/sigma)* (muH0 - E) + k) + Phi(sqrt(N)/sigma)* (muH0 - E) - k) # # which can be calculated using the function bellow. # Power <- function(E) { z <- sqrt(N)/sigma *(muH0-E) 1-pnorm(z+k)+pnorm(z-k) } # Theoretical power at a significance level of 5% k <- 1.96 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 Power(Mus) points(Mus, Power(Mus), type="l", lwd=2) # Theoretical power at a significance level of 1% k <- 2.575829 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 points(Mus, Power(Mus), type="l", lwd=2, col="blue") # Theoretical power at a significance level of 10% k <- 1.644854 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 points(Mus, Power(Mus), type="l", lwd=2, col="red") # pdf("~/Desktop/Plot.pdf") plot(Mus, Beta0.05, type="l", lwd=10, main="Power function", ylab=expression(beta), xlab=expression(mu), ylim=c(0,1), col="gray" ) points(Mus, Beta0.10, type="l", lwd=10, col="yellow" ) points(Mus, Beta0.01, type="l", lwd=10, col="lightblue" ) segments(x0=0, y0=0.05, x1=muH0, y1=0.05, lty=2) segments(x0=0, y0=0.10, x1=muH0, y1=0.10, lty=2, col="orange") segments(x0=0, y0=0.01, x1=muH0, y1=0.01, lty=2, col="blue") segments(x0=0, y0=0, x1=muH0, y1=0) abline(v=muH0) k <- 1.96 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 points(Mus, Power(Mus), type="l", lwd=2) # Theoretical power at a significance level of 1% k <- 2.575829 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 points(Mus, Power(Mus), type="l", lwd=2, col="blue") # Theoretical power at a significance level of 10% k <- 1.644854 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 points(Mus, Power(Mus), type="l", lwd=2, col="red") legend("bottomright", cex=0.75, legend=c( "1% significance (theoretical)", "5% significance (theoretical)", "10% significance (theoretical)", "1% significance (Simulated)", "5% significance (Simulated)", "10% significance (Simulated)" ), lwd=c(2,2,2,15,15,15), col=c("blue", "black", "red", "lightblue","gray","yellow"), bty="n" ) # dev.off() ################################## ### Now we vary N, setting alpha =5% ################################## N <- 100 ( Mus <- seq(from=muH0-3.4*sigma/sqrt(N), to=muH0+3.4*sigma/sqrt(N), by=1/100) ) k <- 1.96 muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 5 plot(Mus, Power(Mus), type="l", lwd=5, main="Power function", ylab=expression(beta), xlab=expression(mu), ylim=c(0,1), col="yellow" ) abline(v=muH0) segments(x0=0, y0=0.05, x1=muH0, y1=0.05, lty=2) segments(x0=0, y0=0, x1=muH0, y1=0) N <- 10 points(Mus, Power(Mus), type="l", lwd=5, main="Power function", col="orange" ) N <- 20 points(Mus, Power(Mus), type="l", lwd=5, main="Power function", col="red" ) N <- 50 points(Mus, Power(Mus), type="l", lwd=5, main="Power function", col="blue" ) N <- 100 points(Mus, Power(Mus), type="l", lwd=5, main="Power function", col="black" ) N <- 1000 points(Mus, Power(Mus), type="l", lwd=5, main="Power function", col="gray" ) text(10.3, 0.98, "N=1000", col="gray", cex=0.8) text(10.3, 0.73, "N=100", col="black", cex=0.8) text(10.3, 0.46, "N=50", col="blue", cex=0.8) text(10.3, 0.22, "N=20", col="red", cex=0.8) text(10.3, 0.13, "N=10", col="orange", cex=0.8) text(10.3, 0.08, "N=5", col="yellow", cex=0.8) ################################################################################ # # # The notion of p-value # # # ################################################################################ muH0 <- 10 sigma2 <- 1 sigma <- sqrt(sigma2) N <- 100 # Have some samples, say X as below X <- rnorm(N, mean=muH0, sd=sigma) # For 5% signnificance k <- 1.96 Reject <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) Reject # Now we vary k mean(X) Ks <- seq(from=0.01, to=2, by=0.0001) length(Ks) Results <- logical(length(Ks)) for(i in 1:length(Ks)){ k <- Ks[i] Results[i] <- mean(X) < muH0-k*sigma/sqrt(N) | mean(X) > muH0+k*sigma/sqrt(N) } Results[1] as.numeric(Results[1]) Results[length(Ks)] as.numeric(Results[length(Ks)]) plot(1:length(Ks), Results, type="l", lwd=5) which.min(Results) Results[which.min(Results)-1] Results[which.min(Results)] # This is the "last" k for which we still reject H0 K <- Ks[which.min(Results)-1]; K # that is, the less strict value that still rejects (well, we are making a # search for some of the possible values of k, the values contained in the # vector Ks). Now, what is the probability of wrongly rejecting H0 when # we define the rejection rule defined with THIS (last rejecting) value of k? # Let us call K this (special) value of k (i.e. the largest value of k that # defines a rejection rule that rejects H0). # That is, what is the significance level obtained when using K? # Well, # P/H0 (reject H0) = # = P/H0(mean(X) < muH0-K*sigma/sqrt(N) | mean(X) > muH0+K*sigma/sqrt(N)) # = P/H0(mean(X) < muH0-K*sigma/sqrt(N)) + P/H0(mean(X) > muH0+K*sigma/sqrt(N)) # = P/H0( sqrt(N)*(mean(X) - muH0)/sigma < -K) + # P/H0( sqrt(N)*(mean(X) - muH0)/sigma > +K) # = Phi (-K) + 1 - Phi(K) = 2*Phi(-K) # # Therefore, the the significance level obtained when using K is, 2*pnorm(-K) # This value is (approximately) the p-value (the exact p-value is obtained using # the exact K, not an approximation as we did above). # Consider K such mean(X) = muH0-K*sigma/sqrt(N) . Solving for K we obtain # K = sqrt(N)* (mean(X) - muH0)/sigma K <- sqrt(N)* (mean(X) - muH0)/sigma K # and then the p-value is 2*pnorm(-K) ##################### # THE END