CLT

CLT: When we take many samples of size \(N\), the distribution of samples is approximated with a normal distribution centered at 0 and with standard deviation 1.

\[ \frac{\bar{Y} - \mu}{\sigma_Y/\sqrt{N}} \]

data(father.son,package="UsingR")
dim(father.son)
## [1] 1078    2
# 1078    2
x=father.son$fheight
hist(x,breaks=seq(floor(min(x)),ceiling(max(x))),main="",xlab="Height")

xs<-seq(floor(min(x)),ceiling(max(x)),0.1)
plot(xs,ecdf(x)(xs),type="l",xlab="x=Height",ylab="F(x)")

QQnorm

mypar(1,3)
ps <- ( seq(0,99) + 0.5 )/100 
qs <- quantile(x, ps)
normalqs <- qnorm(ps, mean(x), popsd(x))
plot(normalqs,qs,xlab="Normal percentiles",ylab="Height percentiles")
abline(0,1) ##identity line

qqnorm(x)
qqline(x)

y <- (x-mean(x))/sd(x)
#== y <- scale(x)
qqs <- quantile(y, ps)
normalqqs <- qnorm(ps, mean(y), popsd(y))
plot(normalqqs,qqs,xlab="Normal percentiles",ylab="Height percentiles")
abline(0,1) ##identity line

data(exec.pay,package="UsingR")
hist(exec.pay) 

boxplot(exec.pay, ylab="10,000s of dollars", ylim=c(0,400))

x <- exec.pay
mypar(1,3)
ps <- ( seq(0,99) + 0.5 )/100 
qs <- quantile(x, ps)
normalqs <- qnorm(ps, mean(x), popsd(x))
plot(normalqs,qs,xlab="Normal percentiles",ylab="Height percentiles")
abline(0,1) ##identity line

qqnorm(x)
qqline(x)

y <- (x-mean(x))/sd(x)
#== y <- scale(x)
qqs <- quantile(y, ps)
normalqqs <- qnorm(ps, mean(y), popsd(y))
plot(normalqqs,qqs,xlab="Normal percentiles",ylab="Height percentiles")
abline(0,1) ##identity line

Ns<-seq(5,30,5)
B <- 1000
mypar(3,2)
LIM <- c(-4.5,4.5)
for(N in Ns){
  ts <- replicate(B, {
    X <- rnorm(N)
    sqrt(N)*mean(X)/sd(X)
  })
  ps <- seq(1/(B+1),1-1/(B+1),len=B)
  qqplot(qt(ps,df=N-1),ts,main=N,
         xlab="Theoretical",ylab="Observed",
         xlim=LIM, ylim=LIM)
  abline(0,1)
} 

The Normal Distribution

  • dnorm(x, mean = 0, sd = 1, log = FALSE)
  • pnorm(q, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • qnorm(p, mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE)
  • rnorm(n, mean = 0, sd = 1)
pnorm(-1.96) + (1 - pnorm(1.96))
## [1] 0.04999579
# 0.04999579

Random Normal

set.seed(1)
rnorm(10)
##  [1] -0.6264538  0.1836433 -0.8356286  1.5952808  0.3295078 -0.8204684
##  [7]  0.4874291  0.7383247  0.5757814 -0.3053884
# [1] -0.6264538  0.1836433 -0.8356286  1.5952808  0.3295078 -0.8204684  0.4874291
#  [8]  0.7383247  0.5757814 -0.3053884

# Use a different mean and standard deviation
rnorm(10, mean=5, sd=1)
##  [1] 6.511781 5.389843 4.378759 2.785300 6.124931 4.955066 4.983810
##  [8] 5.943836 5.821221 5.593901
#  [1] 6.511781 5.389843 4.378759 2.785300 6.124931 4.955066 4.983810 5.943836 5.821221
# [10] 5.593901