Below are the solutions to these exercises on “MCMC for Bayesian Inference – Gibbs Sampling: Exercises”.
#### Run the folowing lines before doing the exercises # ================================================================ ## Vector of packages to use in this report pkgs <- c('evd', "ggplot2", "gridExtra") ## Install packages From CRAN if they are not installed. for (p in pkgs) { if (p %in% rownames(installed.packages()) == FALSE) { install.packages(p, repos = 'http://cran.us.r-project.org') } } ## Load the packages for (p in pkgs) suppressPackageStartupMessages(library(p, quietly = T, character.only = T)) # ================================================================ ############### # # # Exercise 1 # # # ############### set.seed(123) N <- 1e3 mu_sample <- 0 sigma_sample <- 0.5 sample_gumb <- rgev( n = N, loc = mu_sample, scale = sigma_sample, shape = 0 ) x <- seq(-1000, 1000, by = 0.01) # Gumbel has unbounded domain ! plot(density(sample_gumb), col = "red", main = "empircal and theoretical densities", lwd = 2) lines(x, dgev( x, loc = mu_sample, scale = sigma_sample, shape = 0 ), lwd = 2) legend( "topright", lwd = 2, col = c("red", "black") , c("density of the sample", "density of Gumbel(0, 0.5)") )

############### # # # Exercise 2 # # # ############### 'logLikelihood_gumbel' <- function(mu, sigma, data) { llhd <- evd::dgev( loc = mu, scale = sigma, shape = 0, data, log = TRUE ) return(sum(llhd, na.rm = TRUE)) # If ever the sample contains missing values, put na.rm=T. #But this is not the case here ! } logLikelihood_gumbel(mu = mu_sample, sigma = sigma_sample, data = sample_gumb)
## [1] -875.1573
############### # # # Exercise 3 # # # ############### 'logPosterior_gumb' <- function(mu, sigma, data) { llhd <- dgev( loc = mu, scale = sigma, shape = 0, data, log = TRUE ) llhd <- sum(llhd, na.rm = TRUE) lprior <- dnorm(mu, sd = 50, log = TRUE) lprior <- lprior + dnorm(sigma, sd = 50, log = TRUE) return(lprior + llhd) } logPosterior_gumb(mu = mu_sample, sigma = sigma_sample, data = sample_gumb)
## [1] -884.8192
# This value is pretty close to the log-likelihood found in exercise 2. This is explained by the vague prior used.
############### # # # Exercise 4 # # # ############### iter <- 1e3 # Starting values start_mu <- mean(sample_gumb) start_sigma <- sd(sample_gumb) # The standard deviations of the proposals proposal.sd <- c(.05, .01) # Initialization out <- data.frame(mu = rep(NA, iter + 1), sigma = rep(NA, iter + 1)) # Starting values in the first row out[1, ] <- c(start_mu, start_sigma) lpost_old <- logPosterior_gumb(mu = out[1, 1], sigma = out[1, 2], data = sample_gumb) # Reproducible example set.seed(1234) for (t in 1:iter) { ## 1) For the parameter mu prop.mu <- rnorm(1, mean = out[t,1], proposal.sd[1]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(prop.mu, out[t,2], data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 1] <- prop.mu lpost_old <- lpost_prop } # reject else out[t + 1, 1] <- out[t, 1] ## 1) For the parameter sigma prop.sigma <- rnorm(1, mean = out[t,2], proposal.sd[2]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(out[t,1], prop.sigma, data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 2] <- prop.sigma lpost_old <- lpost_prop } # reject else out[t + 1, 2] <- out[t, 2] } ############### # # # Exercise 5 # # # ############### par(mfrow = c(1, 2)) plot(out[,1], type = "l", ylab = "mu", main = "Traceplot for mu") plot(out[,2], type = "l", ylab = "sigma", main = "Traceplot for sigma")

# Indeed, the chains look stationary. # One other thing to do would be to run the chains with different #starting values and check if the convergence still occurs. # b. we can see in the above plots that the two chains have different updates # c. The starting values are located well from the region where the probability mass of the chains # is located. Note that this is a good news about the convergence. ############### # # # Exercise 6 # # # ############### # Initialization # Starting values in the first row out[1, ] <- c(start_mu, start_sigma) lpost_old <- logPosterior_gumb(mu = out[1, 1], sigma = out[1, 2], data = sample_gumb) # Add this acc_rates <- matrix(nrow = iter, ncol = 2) # Reproducible example set.seed(1234) for (t in 1:iter) { ## 1) For the parameter mu prop.mu <- rnorm(1, mean = out[t, 1], proposal.sd[1]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(prop.mu, out[t, 2], data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 1] <- prop.mu lpost_old <- lpost_prop } # reject else out[t + 1, 1] <- out[t, 1] acc_rates[t, 1] <- min(r, 1) ## 1) For the parameter sigma prop.sigma <- rnorm(1, mean = out[t, 2], proposal.sd[2]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(out[t, 1], prop.sigma, data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 2] <- prop.sigma lpost_old <- lpost_prop } # reject else out[t + 1, 2] <- out[t, 2] acc_rates[t, 2] <- min(r, 1) } # Check the proportion for the chain of BOTH parameters apply(acc_rates, 2, mean)
## [1] 0.3393706 0.6514134
############### # # # Exercise 7 # # # ############### # The function : # - start specifies the vector of starting values for mu and sigma # - proposal.sd specifies the variance of the proposals for mu and sigma # - data specifies the dataset (sample) to use # - iter specifies the number of iterations for the MCMC sampler. # - (bonus!) Burnin specifies the number of first iterations to remove (for convergence issues) 'gibbsSampler.fun' <- function(start, proposal.sd, data = sample_gumb, iter = 1e3, # Default values after "=" sign burnin = 0) { # Initialization out <- data.frame(mu = rep(NA, iter + 1), sigma = rep(NA, iter + 1)) # Starting values in the first row out[1,] <- start lpost_old <- logPosterior_gumb(mu = out[1, 1], sigma = out[1, 2], data = sample_gumb) # Add this acc_rates <- matrix(nrow = iter, ncol = 2) # Reproducible example set.seed(1234) for (t in 1:iter) { ## 1) For the parameter mu prop.mu <- rnorm(1, mean = out[t, 1], proposal.sd[1]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(prop.mu, out[t, 2], data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 1] <- prop.mu lpost_old <- lpost_prop } # reject else out[t + 1, 1] <- out[t, 1] acc_rates[t, 1] <- min(r, 1) ## 1) For the parameter sigma prop.sigma <- rnorm(1, mean = out[t, 2], proposal.sd[2]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(out[t, 1], prop.sigma, data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 2] <- prop.sigma lpost_old <- lpost_prop } # reject else out[t + 1, 2] <- out[t, 2] acc_rates[t, 2] <- min(r, 1) } return(list( mean.acc_rates = apply(acc_rates, 2, mean), out.chain = data.frame(out[-(1:burnin),], iter = 1:nrow(out[-(1:burnin),])) )) } ############### # # # Exercise 8 # # # ############### # Run the function test_GibbsSampler <- gibbsSampler.fun(start = c(start_mu, start_sigma), proposal.sd = c(.05, .01)) # Check the acceptance rate test_GibbsSampler$mean.acc_rates
## [1] 0.3393706 0.6514134
# Slightly too high for Sigma and not enough for mu test_GibbsSampler1 <- gibbsSampler.fun(start = c(start_mu, start_sigma), proposal.sd = c(.04, .02)) test_GibbsSampler1$mean.acc_rates
## [1] 0.4203871 0.4934121
# Still too high for sigma .... --> Increase the sd of its proposal test_GibbsSampler2 <- gibbsSampler.fun(start = c(start_mu, start_sigma), proposal.sd = c(.04, .03)) test_GibbsSampler2$mean.acc_rates
## [1] 0.4303965 0.3950778
# Both around 40% : OK ! # We can visualize the new traceplots par(mfrow = c(1, 2)) plot( test_GibbsSampler2$out.chain[, 1], type = "l", ylab = "mu", main = "Traceplot for mu" ) plot( test_GibbsSampler2$out.chain[, 2], type = "l", ylab = "sigma", main = "Traceplot for sigma" )

############### # # # Exercise 9 # # # ############### ## Personalized theme for ggplot (not mandatory) "theme_piss" <- function(size_p = 18, size_c = 14, size_l = 12, theme = theme_bw(), ...) { text <- function(size, ...) element_text(size = size, colour = "#33666C", face = "bold", ...) theme + theme( plot.title = text(size_p, hjust = 0.5), axis.title = text(size_c), legend.title = text(size_l) ) + theme( legend.background = element_rect(colour = "black"), legend.key = element_rect(fill = "white"), ... ) } gg_mu <- ggplot(test_GibbsSampler2$out.chain) + geom_hline(aes(yintercept = median(mu), col = "Posterior Estimate"), linetype = "dashed") + geom_hline(aes(yintercept = mu_sample, col = "True value"), linetype = "dashed") + geom_line(aes(x = iter, y = mu)) + coord_cartesian(ylim = c(-0.05, 0.1)) + # Recenter plot labs(title = "Traceplot for mu") + scale_colour_manual(name = "Value", values = c( "True value" = "blue", "Posterior Estimate" = "red")) + theme_piss(legend.position = c(0.85, 0.85)) gg_sigma <- ggplot(test_GibbsSampler2$out.chain) + geom_hline(aes(yintercept = median(sigma), col = "Posterior Estimate"), linetype = "dashed") + geom_hline(aes(yintercept = sigma_sample, col = "True value"), linetype = "dashed") + geom_line(aes(x = iter, y = sigma)) + coord_cartesian(ylim = c(0.46, 0.55)) + # Recenter plot labs(title = "Traceplot for sigma") + scale_colour_manual(name = "Value", values = c( "True value" = "blue", "Posterior Estimate" = "red")) + theme_piss(legend.position = c(0.85, 0.85)) # The blue line represent the actual value, and the red line is the (median) posterior estimate grid.arrange(gg_mu, gg_sigma, nrow = 1)

## Increase number of iterations (10k) and add burn-in period (1k) : test_GibbsSampler3 <- gibbsSampler.fun( start = c(start_mu, start_sigma), proposal.sd = c(.04, .03), iter = 1e4, burnin = 1e3 ) test_GibbsSampler3$mean.acc_rates
## [1] 0.4090641 0.3977764
gg_mu <- ggplot(test_GibbsSampler3$out.chain) + geom_hline(aes(yintercept = median(mu), col = "Posterior Estimate"), linetype = "dashed") + geom_hline(aes(yintercept = mu_sample, col = "True value"), linetype = "dashed") + geom_line(aes(x = iter, y = mu)) + coord_cartesian(ylim = c(-0.05, 0.1)) + # Recenter plot labs(title = "Traceplot for mu") + scale_colour_manual(name = "Value", values = c( "True value" = "blue", "Posterior Estimate" = "red")) + theme_piss(legend.position = c(0.85, 0.85)) gg_sigma <- ggplot(test_GibbsSampler3$out.chain) + geom_hline(aes(yintercept = median(sigma), col = "Posterior Estimate"), linetype = "dashed") + geom_hline(aes(yintercept = sigma_sample, col = "True value"), linetype = "dashed") + geom_line(aes(x = iter, y = sigma)) + coord_cartesian(ylim = c(0.46, 0.55)) + # Recenter plot labs(title = "Traceplot for sigma") + scale_colour_manual(name = "Value", values = c( "True value" = "blue", "Posterior Estimate" = "red")) + theme_piss(legend.position = c(0.85, 0.85)) # The blue line represent the actual value, and the red line is the (median) posterior estimate grid.arrange(gg_mu, gg_sigma, nrow = 1)

Leave a Reply