# A function to generate n iid X_i and return the sample mean and variance.
func<-function(n){
x <-rpois(n,70)
mean_x <-mean(x)
var_x <-var(x)
return(c(mean_x,var_x))
}
func2<-function(n){
y=func(n)
Z_s=(y[1]-70)/sqrt(y[2]/n)
Z_s
}
Zs_samples=sapply(rep(500,1000),func2)
Zs_samples_loop=rep(0,1000)
for (i in 1:1000){
Zs_samples_loop[i] = func2(500)
}
hist(Zs_samples)
hist(Zs_samples_loop)
qqnorm(Zs_samples)
qqnorm(Zs_samples_loop)
#they are normally distributed as says the CLT.
#n=500 is sufficient for the sample variance to a good
#approximation to the true variance.
dat <- trees #let dat be the trees dataframe
dat[1:10,] #print the first 10 rows
summary(dat) #print a summary of the dataset
pairs(dat,pch=19) #plot a matrix of scatterplots
mod=lm(Volume~Girth,data=dat)
plot(mod)
summary(mod)
dat$Girth2 <- (dat$Girth)^2 #note numerical operations are performed elementwise in R.
mod2=lm(Volume~Girth2,data=dat)
summary(mod2)
#improves- F-statistic increases, Residual se decreases etc
Girth | Height | Volume | |
---|---|---|---|
<dbl> | <dbl> | <dbl> | |
1 | 8.3 | 70 | 10.3 |
2 | 8.6 | 65 | 10.3 |
3 | 8.8 | 63 | 10.2 |
4 | 10.5 | 72 | 16.4 |
5 | 10.7 | 81 | 18.8 |
6 | 10.8 | 83 | 19.7 |
7 | 11.0 | 66 | 15.6 |
8 | 11.0 | 75 | 18.2 |
9 | 11.1 | 80 | 22.6 |
10 | 11.2 | 75 | 19.9 |
Girth Height Volume Min. : 8.30 Min. :63 Min. :10.20 1st Qu.:11.05 1st Qu.:72 1st Qu.:19.40 Median :12.90 Median :76 Median :24.20 Mean :13.25 Mean :76 Mean :30.17 3rd Qu.:15.25 3rd Qu.:80 3rd Qu.:37.30 Max. :20.60 Max. :87 Max. :77.00
Call: lm(formula = Volume ~ Girth, data = dat) Residuals: Min 1Q Median 3Q Max -8.065 -3.107 0.152 3.495 9.587 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -36.9435 3.3651 -10.98 7.62e-12 *** Girth 5.0659 0.2474 20.48 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 4.252 on 29 degrees of freedom Multiple R-squared: 0.9353, Adjusted R-squared: 0.9331 F-statistic: 419.4 on 1 and 29 DF, p-value: < 2.2e-16
Call: lm(formula = Volume ~ Girth2, data = dat) Residuals: Min 1Q Median 3Q Max -6.2475 -2.5738 0.2556 2.1341 7.0061 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -3.355142 1.416902 -2.368 0.0248 * Girth2 0.181173 0.006923 26.169 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.37 on 29 degrees of freedom Multiple R-squared: 0.9594, Adjusted R-squared: 0.958 F-statistic: 684.8 on 1 and 29 DF, p-value: < 2.2e-16
cost <- function(n){
min(200,n)*25+(n>200)*(n-200)*5
}
profit <- function(n,d){
100*min(n,d)-cost(n)
}
average.profit <- function(n,nreps){
demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
av_profit <- mean(profit_vec)
return(av_profit)
}
#after some initial searching with small nreps, we find the maximum is in the interval (185,220)
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=500000) #takes a minute
plot(x,y)
#global maximum is 214
#use set.seed to set the seed for each new n in order to couple the estimates
average.profit <- function(n,nreps){
set.seed(100)
demand_vec <- rnorm(nreps,178,21)# note that the 3rd argurment of rnorm in R is sd not variance!
profit_vec <- sapply(demand_vec,profit,n=n) #alternatively you may use a for loop. But sapply is a neat way to do things in R.
av_profit <- mean(profit_vec)
return(av_profit)
}
x=seq(from=185,to=220,by=0.5)
y=sapply(x,average.profit,nreps=3000) #takes a second
plot(x,y)