Below are the solutions to these exercises on simulations.
#################### # # # Exercise 1 # # # #################### birth.sample<-function(n.people,n.sample) { success<-0 for(i in 1:n.sample) { samples<-sample(1:365,n.people,replace = TRUE) #Sort the date of birth from the earlyest to the latest samples<-sort(samples) #Boolean variable which indicate if we find two identical birth date find.identical<-FALSE #Pointer for the search index<-1 while(index<n.people & !find.identical) { if(samples[index]==samples[index+1]) { find.identical<-TRUE } index<-index+1 } if(find.identical) { success<-success+1 } } return(success/n.sample) } birth.sample(25,10000)
## [1] 0.5626
number.of.people<-c(0,10,20,30,40,50,60,70,80,90,100) results<-0 for(i in 2:length(number.of.people)) { temp<-birth.sample(number.of.people[i],10000) results<-c(results,temp) } plot(x=number.of.people,y=results)

#################### # # # Exercise 2 # # # #################### shoot.no.spin<-0 for(i in 1:10000) { first.bullet<-sample(2:5,1) second.bullet<-first.bullet+1 if(first.bullet==2|second.bullet==2) { shoot.no.spin<-shoot.no.spin+1 } } print(paste0("Probability of hitting a bullet without spin:",shoot.no.spin/10000 ))
## [1] "Probability of hitting a bullet without spin:0.249"
shoot.with.spin<-0 for(i in 1:10000) { first.bullet<-sample(1:6,1) if(first.bullet==6) { second.bullet<-1 } else { second.bullet<-first.bullet+1 } if(first.bullet==1|second.bullet==1) { shoot.with.spin<-shoot.with.spin+1 } } print(paste0("Probability of hitting a bullet with spin:",shoot.with.spin/10000 ))
## [1] "Probability of hitting a bullet with spin:0.3306"
#################### # # # Exercise 3 # # # #################### #Usually, that's how we intuitively compute this probability. two.boys<-0 for(i in 1:10000) { #Choose which one of both child is known to be a boy boy<-sample(1:2,1) if(boy==1) { first<-1 second<-sample(1:2,1) } else { first<-sample(1:2,1) second<-1 } if(first==1 & second==1) { two.boys<-two.boys+1 } } print(paste0("Probability of giving birth to 2 boys:",two.boys/10000 ))
## [1] "Probability of giving birth to 2 boys:0.4904"
#By doing so, we have assigned a sex to one of the unborn child, which is an information we actually don't have. Instead, we should #compute the probability as so: two.boys<-0 for(i in 1:10000) { first<-sample(1:2,1) second<-sample(1:2,1) while(first!=1 & second!=1) { first<-sample(1:2,1) second<-sample(1:2,1) } if(first==1 & second==1) { two.boys<-two.boys+1 } } print(paste0("Probability of giving birth to 2 boys:",two.boys/10000 ))
## [1] "Probability of giving birth to 2 boys:0.3261"
#################### # # # Exercise 4 # # # #################### #1 coin.flip<-function(friend.1.money,friend.2.money) { penny.1<-friend.1.money penny.2<-friend.2.money number.flip<-0 while(penny.1>0 & penny.2>0) { win<-sample(1:2,1) if(win==1) { penny.1<-penny.1+1 penny.2<-penny.2-1 } else { penny.2<-penny.2+1 penny.1<-penny.1-1 } number.flip<-number.flip+1 } result.game<-number.flip if(penny.1==0) { result.game<-c(result.game,2) } else { result.game<-c(result.game,1) } return(result.game) } #2 simulation.coin.flip<-coin.flip(40,30) for(i in 1:999) { temp<-coin.flip(40,30) simulation.coin.flip<-rbind(simulation.coin.flip,temp) } mean(simulation.coin.flip[,1])
## [1] 1178.128
#3 hist(simulation.coin.flip[,1])

#4 print(1/2)
## [1] 0.5
#5 print(paste0("Probability that the first friend win:",sum(simulation.coin.flip[,2]==1)/length(simulation.coin.flip[,2])))
## [1] "Probability that the first friend win:0.56"
print(paste0("Probability that the second friend win:",sum(simulation.coin.flip[,2]==2)/length(simulation.coin.flip[,2])))
## [1] "Probability that the second friend win:0.44"
#################### # # # Exercise 5 # # # #################### monty.hall.keep<-function(iteration) { win<-0 loss<-0 for(i in 1:iteration) { prize<-sample(1:3,1) choice<-sample(1:3,1) if(prize==choice) {win=win+1} else {loss=loss+1} } return(win/(win+loss)) } monty.hall.keep(10000)
## [1] 0.3318
monty.hall.switch<-function(iteration) { win<-0 loss<-0 for(i in 1:iteration) { door<-1:3 prize<-sample(door,1) door<-1:3 #Why is I wrote that? Write your answer in the comments. choice.1<-sample(door,1) door.open<-sample(which(door!=choice.1 & door!=prize),1) choice.2<-sample(which(door!=choice.1 & door!=door.open),1) if(prize==choice.2) {win=win+1} else {loss=loss+1} } return(win/(win+loss)) } monty.hall.switch(10000)
## [1] 0.4071
#################### # # # Exercise 6 # # # #################### data.ex6<-read.csv("https://www.r-exercises.com/wp-content/uploads/2017/09/data.ex6_.set9_.csv") head(data.ex6[,3])
## [1] 0 0 1 1 1 1
#1 nrow(data.ex6[which(data.ex6[,2]=="A" & data.ex6[,3]==1),])/nrow(data.ex6[which(data.ex6[,2]=="A"),])
## [1] 0.78
nrow(data.ex6[which(data.ex6[,2]=="B" & data.ex6[,3]==1),])/nrow(data.ex6[which(data.ex6[,2]=="B"),])
## [1] 0.8257143
#2 #Treatment B #3 contengency.table<-table(data.ex6[which(data.ex6[,3]==1),1],data.ex6[which(data.ex6[,3]==1),2]) contengency.table
## ## A B ## large 192 55 ## small 81 234
#4 contengency.table[1,1]/nrow(data.ex6[which(data.ex6[,2]=="A" & data.ex6[,1]=="large"),])
## [1] 0.730038
contengency.table[1,2]/nrow(data.ex6[which(data.ex6[,2]=="B" & data.ex6[,1]=="large"),])
## [1] 0.6875
#5 contengency.table[2,1]/nrow(data.ex6[which(data.ex6[,2]=="A" & data.ex6[,1]=="small"),])
## [1] 0.9310345
contengency.table[2,2]/nrow(data.ex6[which(data.ex6[,2]=="B" & data.ex6[,1]=="small"),])
## [1] 0.8666667
#6 #Treatment A #################### # # # Exercise 7 # # # #################### #1 data.ex7<-read.csv("https://www.r-exercises.com/wp-content/uploads/2017/09/data.ex7_.set9_.csv") fit <- lm(Y ~ X, data=data.ex7) summary(fit)
## ## Call: ## lm(formula = Y ~ X, data = data.ex7) ## ## Residuals: ## Min 1Q Median 3Q Max ## -19.4411 -6.4886 0.3442 6.4689 22.2547 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 34.7888 2.0937 16.62 <2e-16 *** ## X -3.5753 0.2408 -14.85 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 8.277 on 59 degrees of freedom ## Multiple R-squared: 0.7889, Adjusted R-squared: 0.7854 ## F-statistic: 220.5 on 1 and 59 DF, p-value: < 2.2e-16
#2 plot(data.ex7$X,data.ex7$Y) abline(fit)

#3 fit <- lm(Y ~ X, data=data.ex7[which(data.ex7[,3]=="A"),]) summary(fit)
## ## Call: ## lm(formula = Y ~ X, data = data.ex7[which(data.ex7[, 3] == "A"), ## ]) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.5249 -1.1975 0.0659 2.1173 3.6389 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 3.6415 2.8611 1.273 0.219 ## X 0.5402 0.3687 1.465 0.160 ## ## Residual standard error: 2.377 on 18 degrees of freedom ## Multiple R-squared: 0.1065, Adjusted R-squared: 0.05691 ## F-statistic: 2.147 on 1 and 18 DF, p-value: 0.1601
plot(data.ex7[which(data.ex7[,3]=="A"),1],data.ex7[which(data.ex7[,3]=="A"),2]) abline(fit)

fit <- lm(Y ~ X, data=data.ex7[which(data.ex7[,3]=="B"),]) summary(fit)
## ## Call: ## lm(formula = Y ~ X, data = data.ex7[which(data.ex7[, 3] == "B"), ## ]) ## ## Residuals: ## Min 1Q Median 3Q Max ## -6.4668 -4.3722 0.7742 3.1045 7.4130 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -32.9568 8.5681 -3.846 0.00118 ** ## X 1.5567 0.6743 2.309 0.03304 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 4.347 on 18 degrees of freedom ## Multiple R-squared: 0.2285, Adjusted R-squared: 0.1856 ## F-statistic: 5.33 on 1 and 18 DF, p-value: 0.03304
plot(data.ex7[which(data.ex7[,3]=="B"),1],data.ex7[which(data.ex7[,3]=="B"),2]) abline(fit)

fit <- lm(Y ~ X, data=data.ex7[which(data.ex7[,3]=="C"),]) summary(fit)
## ## Call: ## lm(formula = Y ~ X, data = data.ex7[which(data.ex7[, 3] == "C"), ## ]) ## ## Residuals: ## Min 1Q Median 3Q Max ## -8.849 -4.223 -1.436 5.803 11.763 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 27.5062 2.6182 10.506 2.36e-09 *** ## X 0.3746 0.8959 0.418 0.681 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 6.215 on 19 degrees of freedom ## Multiple R-squared: 0.009118, Adjusted R-squared: -0.04303 ## F-statistic: 0.1748 on 1 and 19 DF, p-value: 0.6805
plot(data.ex7[which(data.ex7[,3]=="C"),1],data.ex7[which(data.ex7[,3]=="C"),2]) abline(fit)

#################### # # # Exercise 8 # # # #################### data.ex8<-read.csv("https://www.r-exercises.com/wp-content/uploads/2017/09/data.ex8_.set9_.csv") head(data.ex8)
## Classification Real Incidence ## 1 0 0 low ## 2 0 0 low ## 3 0 0 low ## 4 0 0 low ## 5 0 0 low ## 6 0 0 high
#1 #true positive rate nrow(data.ex6[which(data.ex8[,3]=="high" & data.ex8[,1]==1& data.ex8[,2]==1),])/ nrow(data.ex8[which(data.ex8[,3]=="high"& data.ex8[,1]==1),])
## [1] 0.9513889
#false positive rate nrow(data.ex6[which(data.ex8[,3]=="high" & data.ex8[,1]==1& data.ex8[,2]==0),])/ nrow(data.ex8[which(data.ex8[,3]=="high"& data.ex8[,1]==1),])
## [1] 0.04861111
#true negative rate nrow(data.ex6[which(data.ex8[,3]=="high" & data.ex8[,1]==0& data.ex8[,2]==0),])/ nrow(data.ex8[which(data.ex8[,3]=="high"& data.ex8[,1]==0),])
## [1] 0.9869646
#false negative rate nrow(data.ex6[which(data.ex8[,3]=="high" & data.ex8[,1]==0& data.ex8[,2]==1),])/ nrow(data.ex8[which(data.ex8[,3]=="high"& data.ex8[,1]==0),])
## [1] 0.01303538
#2 #true positive rate nrow(data.ex6[which(data.ex8[,3]=="low" & data.ex8[,1]==1& data.ex8[,2]==1),])/ nrow(data.ex8[which(data.ex8[,3]=="low"& data.ex8[,1]==1),])
## [1] 0.03846154
#false positive rate nrow(data.ex6[which(data.ex8[,3]=="low" & data.ex8[,1]==1& data.ex8[,2]==0),])/ nrow(data.ex8[which(data.ex8[,3]=="low"& data.ex8[,1]==1),])
## [1] 0.9615385
#true negative rate nrow(data.ex6[which(data.ex8[,3]=="low" & data.ex8[,1]==0& data.ex8[,2]==0),])/ nrow(data.ex8[which(data.ex8[,3]=="low"& data.ex8[,1]==0),])
## [1] 0.9971831
#false negative rate nrow(data.ex6[which(data.ex8[,3]=="low" & data.ex8[,1]==0& data.ex8[,2]==1),])/ nrow(data.ex8[which(data.ex8[,3]=="low"& data.ex8[,1]==0),])
## [1] 0.002816901
#################### # # # Exercise 9 # # # #################### #1 data.ex9<-rnorm(10000,mean=42,sd=10) #2 estimation.10<-42 for(i in 1:200) { sample.temp<-sample(data.ex9,10) estimation.10<-c(estimation.10,mean(sample.temp)) } #3 var.10<-var(estimation.10[2:length(estimation.10)]) var.10
## [1] 8.774137
#4 estimation.50<-42 for(i in 1:200) { sample.temp<-sample(data.ex9,50) estimation.50<-c(estimation.50,mean(sample.temp)) } var.50<-var(estimation.50[2:length(estimation.50)]) var.50
## [1] 1.565586
#5 estimation.100<-42 for(i in 1:200) { sample.temp<-sample(data.ex9,100) estimation.100<-c(estimation.100,mean(sample.temp)) } var.100<-var(estimation.100[2:length(estimation.100)]) var.100
## [1] 1.022226
#6 estimation.500<-42 for(i in 1:200) { sample.temp<-sample(data.ex9,500) estimation.500<-c(estimation.500,mean(sample.temp)) } var.500<-var(estimation.500[2:length(estimation.500)]) var.500
## [1] 0.1867369
#7 plot(c(10,50,100,500),c(var.10,var.50,var.100,var.500))

#################### # # # Exercise 10 # # # #################### data.ex10<-rnorm(20000,mean=76,sd=15) small.school.estimation<-0 for(i in 1:10000) { sample.temp<-sample(data.ex10,100) small.school.estimation<-c(small.school.estimation,mean(sample.temp)) } sum(small.school.estimation>=81)
## [1] 4
big.school.estimation<-0 for(i in 1:10000) { sample.temp<-sample(data.ex10,500) big.school.estimation<-c(big.school.estimation,mean(sample.temp)) } sum(big.school.estimation>=81)
## [1] 0
Leave a Reply