Below are the solutions to these exercises on answering probability questions with simulation.

#################### # # # Exercise 1 # # # #################### # This is a famous problem covered in a recent episode of Numberphile # https://www.youtube.com/embed/pbXg5EI5t4c matched <- function(highest_card) { cards_ordered <- 1:highest_card # shuffle cards and layout # check if any matches its position any(sample(cards_ordered) == cards_ordered) } # Number of simulation n <- 1e5 # Highest card, cards go from 1 to ... hcard <- 10 # Run n simulations sim <- replicate(n, matched(hcard)) # Estimated probability mean(sim)

## [1] 0.62938

# Analytical solutions 1 - sum( (-1)^(0:hcard) / factorial(0:hcard))

## [1] 0.6321205

#################### # # # Exercise 2 # # # #################### # 35 voters and 3 candidates vote_is_draw <- function() { vote <- sort(table(sample(1:3, size = 35, replace = TRUE))) vote[2] == vote[3] } # Number of simulation n <- 1e4 # Run n simulations sim <- replicate(n, vote_is_draw()) # Estimated probability mean(sim)

## [1] 0.1309

#################### # # # Exercise 3 # # # #################### # This problem has been viral on Reddit.com # https://redd.it/6y93j8 time_until_full <- function() { draws <- 0L cards <- vector(mode = "logical", length = 52L) while (!all(cards)) { cards[sample(1:52, size = 1L)] <- TRUE draws <- draws + 1L } draws } a <- replicate(2000, time_until_full()) mean(a)

## [1] 234.52

# Analytical solution 52*sum(1 / (1:52))

## [1] 235.9783

#################### # # # Exercise 4 # # # #################### dice <- 1:6 winning_difference <- 3:5 win_dicethrow <- function(dice, winning_difference) { throw <- sample(dice, size = 2, replace = TRUE) abs(diff(throw)) %in% winning_difference } # Number of simulation n <- 1e5 # Run n simulations sim <- replicate(n, win_dicethrow(dice, winning_difference)) # Estimated probabilitiy mean(sim)

## [1] 0.33276

#################### # # # Exercise 5 # # # #################### size_group <- 400 nbirthd <- function(size_group) { length(unique(sample(365, size_group, replace = TRUE))) } # Number of simulation n <- 1e5 # Run n simulations sim <- replicate(n, nbirthd(size_group)) # Estimated probabilitiy mean(sim)

## [1] 243.1645

# Analytical solution 365 * (1 - (364/365) ^ size_group)

## [1] 243.1851

#################### # # # Exercise 6 # # # #################### deck <- rep(1:13, 4) # We are not concerned with suit here. draw5cards <- function(deck) { sum(sample(deck, size = 5) == 1) == 3 } # Number of simulation n <- 1e5 # Run n simulations sim <- replicate(n, draw5cards(deck)) # Estimated probabilitiy mean(sim)

## [1] 0.00168

# Analytical solution choose(4, 3) * choose(48, 2) / choose(52, 5)

## [1] 0.001736079

#################### # # # Exercise 7 # # # #################### set <- 1:7 drawabc <- function(set) { #c(a,b,c) draw <- sample(set, size = 3) sum(draw[1:2]) > draw[3] } # Number of simulation n <- 1e4 # Run n simulations sim <- replicate(n, drawabc(set)) # Estimated probabilitiy mean(sim)

## [1] 0.7913

#################### # # # Exercise 8 # # # #################### dice <- 1:6 throwthree <- function(dice) { sum(sample(dice, 3)) == 8 } # Number of simulation n <- 1e4 # Run n simulations sim <- replicate(n, throwthree(dice)) # Estimated probabilitiy mean(sim)

## [1] 0.0961

#################### # # # Exercise 9 # # # #################### # Problem originally posted on a blog: # https://gilkalai.wordpress.com/2017/09/07/tyi-30-expected-number-of-dice-throws/ dice <- 1:6 untilsix <- function(dice) { thrown <- 1L while (TRUE) { throw <- sample(dice, size = 1) if (throw %% 2 != 0L) { return(99) } else if (throw == 6) { return(thrown) } thrown <- thrown + 1L } } # Number of simulation n <- 1e5 # Run n simulations sim <- replicate(n, untilsix(dice)) # Estimated probabilitiy mean(sim[sim != 99])

## [1] 1.50779

# Analytical solution 3/2

## [1] 1.5

#################### # # # Exercise 10 # # # #################### # All two-digits numbers tdn <- 10:99 # Their digits in a list tdndigits <- strsplit(as.character(tdn), split = "") div <- vector(length = length(tdn), mode = "logical") for (i in 1:length(tdn)) { div[i] <- all(tdn[i] %% as.numeric(tdndigits[[i]]) == 0) } div[is.na(div)] <- FALSE # Probability sum(div == TRUE) / length(tdn)

## [1] 0.1555556

Giorgio Salvadè says

Solution Exercize 4 should be corrected with

abs (diff (throw)) ……

sindri says

Thank you Giorgio! I have updated the code accordingly.

Vesselin Nikov says

Actually, in this case, the loop is easily avoidable. Just produce large enough number of observations and it’s done

trows <- sample(x = 1:6, size = 2e6, replace = T)

result = 3]) / length(result)

[1] 0.333079