Solutions¶

1

In [1]:

# A function to generate n iid X_i and return the sample mean and variance.
func<-function(n){
  x <-rpois(n,70)
    mean_x <-mean(x)
    var_x <-var(x)
    return(c(mean_x,var_x))
}
func2<-function(n){
  y=func(n)
  Z_s=(y[1]-70)/sqrt(y[2]/n)
  Z_s
}
Zs_samples=sapply(rep(500,1000),func2)
Zs_samples_loop=rep(0,1000)
for (i in 1:1000){
  Zs_samples_loop[i] = func2(500)
}
hist(Zs_samples)
hist(Zs_samples_loop)
qqnorm(Zs_samples)
qqnorm(Zs_samples_loop)
#they are normally distributed as says the CLT.
#n=500 is sufficient for the sample variance to a good
#approximation to the true variance.

# A function to generate n iid X_i and return the sample mean and variance.
func<-function(n){
  x <-rpois(n,70)
    mean_x <-mean(x)
    var_x <-var(x)
    return(c(mean_x,var_x))
}
func2<-function(n){
  y=func(n)
  Z_s=(y[1]-70)/sqrt(y[2]/n)
  Z_s
}
Zs_samples=sapply(rep(500,1000),func2)
Zs_samples_loop=rep(0,1000)
for (i in 1:1000){
  Zs_samples_loop[i] = func2(500)
}
hist(Zs_samples)
hist(Zs_samples_loop)
qqnorm(Zs_samples)
qqnorm(Zs_samples_loop)
#they are normally distributed as says the CLT.
#n=500 is sufficient for the sample variance to a good
#approximation to the true variance.

# A function to generate n iid X_i and return the sample mean and variance.
func<-function(n){
  x <-rpois(n,70)
    mean_x <-mean(x)
    var_x <-var(x)
    return(c(mean_x,var_x))
}
func2<-function(n){
  y=func(n)
  Z_s=(y[1]-70)/sqrt(y[2]/n)
  Z_s
}
Zs_samples=sapply(rep(500,1000),func2)
Zs_samples_loop=rep(0,1000)
for (i in 1:1000){
  Zs_samples_loop[i] = func2(500)
}
hist(Zs_samples)
hist(Zs_samples_loop)
qqnorm(Zs_samples)
qqnorm(Zs_samples_loop)
#they are normally distributed as says the CLT.
#n=500 is sufficient for the sample variance to a good
#approximation to the true variance.

2

In [2]:

dat <- trees #let dat be the trees dataframe
dat[1:10,] #print the first 10 rows
summary(dat) #print a summary of the dataset
pairs(dat,pch=19) #plot a matrix of scatterplots
mod=lm(Volume~Girth,data=dat)
plot(mod)
summary(mod)
dat$Girth2 <- (dat$Girth)^2 #note numerical operations are performed elementwise in R.
mod2=lm(Volume~Girth2,data=dat)
summary(mod2)
#improves- F-statistic increases, Residual se decreases etc

A data.frame: 10 × 3
	Girth	Height	Volume
	<dbl>	<dbl>	<dbl>
1	8.3	70	10.3
2	8.6	65	10.3
3	8.8	63	10.2
4	10.5	72	16.4
5	10.7	81	18.8
6	10.8	83	19.7
7	11.0	66	15.6
8	11.0	75	18.2
9	11.1	80	22.6
10	11.2	75	19.9

     Girth           Height       Volume     
 Min.   : 8.30   Min.   :63   Min.   :10.20  
 1st Qu.:11.05   1st Qu.:72   1st Qu.:19.40  
 Median :12.90   Median :76   Median :24.20  
 Mean   :13.25   Mean   :76   Mean   :30.17  
 3rd Qu.:15.25   3rd Qu.:80   3rd Qu.:37.30  
 Max.   :20.60   Max.   :87   Max.   :77.00

Call:
lm(formula = Volume ~ Girth, data = dat)
Residuals:
   Min     1Q Median     3Q    Max 
-8.065 -3.107  0.152  3.495  9.587 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -36.9435     3.3651  -10.98 7.62e-12 ***
Girth         5.0659     0.2474   20.48  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.252 on 29 degrees of freedom
Multiple R-squared:  0.9353,    Adjusted R-squared:  0.9331 
F-statistic: 419.4 on 1 and 29 DF,  p-value: < 2.2e-16

Call:
lm(formula = Volume ~ Girth2, data = dat)
Residuals:
    Min      1Q  Median      3Q     Max 
-6.2475 -2.5738  0.2556  2.1341  7.0061 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.355142   1.416902  -2.368   0.0248 *  
Girth2       0.181173   0.006923  26.169   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.37 on 29 degrees of freedom
Multiple R-squared:  0.9594,    Adjusted R-squared:  0.958 
F-statistic: 684.8 on 1 and 29 DF,  p-value: < 2.2e-16

dat <- trees #let dat be the trees dataframe
dat[1:10,] #print the first 10 rows
summary(dat) #print a summary of the dataset
pairs(dat,pch=19) #plot a matrix of scatterplots
mod=lm(Volume~Girth,data=dat)
plot(mod)
summary(mod)
dat$Girth2 <- (dat$Girth)^2 #note numerical operations are performed elementwise in R.
mod2=lm(Volume~Girth2,data=dat)
summary(mod2)
#improves- F-statistic increases, Residual se decreases etc

dat <- trees #let dat be the trees dataframe
dat[1:10,] #print the first 10 rows
summary(dat) #print a summary of the dataset
pairs(dat,pch=19) #plot a matrix of scatterplots
mod=lm(Volume~Girth,data=dat)
plot(mod)
summary(mod)
dat$Girth2 <- (dat$Girth)^2 #note numerical operations are performed elementwise in R.
mod2=lm(Volume~Girth2,data=dat)
summary(mod2)
#improves- F-statistic increases, Residual se decreases etc

     Girth           Height       Volume     
 Min.   : 8.30   Min.   :63   Min.   :10.20  
 1st Qu.:11.05   1st Qu.:72   1st Qu.:19.40  
 Median :12.90   Median :76   Median :24.20  
 Mean   :13.25   Mean   :76   Mean   :30.17  
 3rd Qu.:15.25   3rd Qu.:80   3rd Qu.:37.30  
 Max.   :20.60   Max.   :87   Max.   :77.00

Call:
lm(formula = Volume ~ Girth, data = dat)
Residuals:
   Min     1Q Median     3Q    Max 
-8.065 -3.107  0.152  3.495  9.587 
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -36.9435     3.3651  -10.98 7.62e-12 ***
Girth         5.0659     0.2474   20.48  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.252 on 29 degrees of freedom
Multiple R-squared:  0.9353,    Adjusted R-squared:  0.9331 
F-statistic: 419.4 on 1 and 29 DF,  p-value: < 2.2e-16

Call:
lm(formula = Volume ~ Girth2, data = dat)
Residuals:
    Min      1Q  Median      3Q     Max 
-6.2475 -2.5738  0.2556  2.1341  7.0061 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.355142   1.416902  -2.368   0.0248 *  
Girth2       0.181173   0.006923  26.169   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.37 on 29 degrees of freedom
Multiple R-squared:  0.9594,    Adjusted R-squared:  0.958 
F-statistic: 684.8 on 1 and 29 DF,  p-value: < 2.2e-16

3 (Bonus)

In [3]:

cost <- function(n){
  min(200,n)*25+(n>200)*(n-200)*5
}
profit <- function(n,d){
  100*min(n,d)-cost(n)
}
average.profit <- function(n,nreps){
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
#after some initial searching with small nreps, we find the maximum is in the interval (185,220)
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=500000) #takes a minute
plot(x,y)
#global maximum is 214
#use set.seed to set the seed for each new n in order to couple the estimates
average.profit <- function(n,nreps){
  set.seed(100)
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=3000) #takes a second
plot(x,y)

cost <- function(n){
  min(200,n)*25+(n>200)*(n-200)*5
}
profit <- function(n,d){
  100*min(n,d)-cost(n)
}
average.profit <- function(n,nreps){
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
#after some initial searching with small nreps, we find the maximum is in the interval (185,220)
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=500000) #takes a minute
plot(x,y)
#global maximum is 214
#use set.seed to set the seed for each new n in order to couple the estimates
average.profit <- function(n,nreps){
  set.seed(100)
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=3000) #takes a second
plot(x,y)

cost <- function(n){
  min(200,n)*25+(n>200)*(n-200)*5
}
profit <- function(n,d){
  100*min(n,d)-cost(n)
}
average.profit <- function(n,nreps){
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
#after some initial searching with small nreps, we find the maximum is in the interval (185,220)
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=500000) #takes a minute
plot(x,y)
#global maximum is 214
#use set.seed to set the seed for each new n in order to couple the estimates
average.profit <- function(n,nreps){
  set.seed(100)
  demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
    profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
  av_profit <- mean(profit_vec)
    return(av_profit)
}
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=3000) #takes a second
plot(x,y)