Below are the solutions to these exercises on “Bayesian Inference – MCMC Diagnostics using coda : Exercises”.
#### Run the folowing lines before doing the exercises # ================================================================ ## Vector of packages to use in this report pkgs <- c('evd', "coda", "reshape2", "tidyverse") ## Install packages From CRAN if they are not installed. for (p in pkgs) { if (p %in% rownames(installed.packages()) == FALSE) { install.packages(p, repos = 'http://cran.us.r-project.org') } } ## Load the packages for (p in pkgs) suppressPackageStartupMessages(library(p, quietly = T, character.only = T)) # ===============================================================
############### # # # Exercise 1 # # # ############### # Reproducible example set.seed(123) N <- 1e3 mu_sample <- 10 sigma_sample <- 3 sample_gumb <- rgev( n = N, loc = mu_sample, scale = sigma_sample, shape = 0 ) x <- seq(-1000, 1000, by = 0.01) # Gumbel has unbounded domain ! plot(density(sample_gumb), col = "red", main = "empircal and theoretical densities", lwd = 2) lines(x, dgev( x, loc = mu_sample, scale = sigma_sample, shape = 0 ), lwd = 2) legend( "topright", lwd = 2, col = c("red", "black") , c( "density of the sample", paste("density of Gumbel(", mu_sample, ",", sigma_sample, ")") ) )

############### # # # Exercise 2 # # # ############### 'logPosterior_gumb' <- function(mu, sigma, data) { llhd <- dgev( loc = mu, scale = sigma, shape = 0, data, log = TRUE ) llhd <- sum(llhd, na.rm = TRUE) lprior <- dnorm(mu, sd = 50, log = TRUE) lprior <- lprior + dnorm(sigma, sd = 50, log = TRUE) return(lprior + llhd) } # The function from last Exercise :: # - start specifies the vector of starting values for mu and sigma # - proposal.sd specifies the variance of the proposals for mu and sigma # - data specifies the dataset (sample) to use # - iter specifies the number of iterations for the MCMC sampler. # - (bonus!) Burnin specifies the number of first iterations to remove (for convergence issues) 'gibbsSampler.fun' <- function(start, proposal.sd, data, iter, # Default values after "=" sign burnin = 0) { # Initialization out <- data.frame(mu = rep(NA, iter + 1), sigma = rep(NA, iter + 1)) # Starting values in the first row out[1,] <- start lpost_old <- logPosterior_gumb(mu = out[1, 1], sigma = out[1, 2], data = sample_gumb) # Add this acc_rates <- matrix(nrow = iter, ncol = 2) # Reproducible example set.seed(1234) for (t in 1:iter) { ## 1) For the parameter mu prop.mu <- rnorm(1, mean = out[t, 1], proposal.sd[1]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(prop.mu, out[t, 2], data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 1] <- prop.mu lpost_old <- lpost_prop } # reject else out[t + 1, 1] <- out[t, 1] acc_rates[t, 1] <- min(r, 1) ## 1) For the parameter sigma prop.sigma <- rnorm(1, mean = out[t, 2], proposal.sd[2]) # symmetric, so that it removes in the ratio. lpost_prop <- logPosterior_gumb(out[t, 1], prop.sigma, data = sample_gumb) r <- exp(lpost_prop - lpost_old) # sine the proposal is symmetric # Accept if (r > runif(1)) { out[t + 1, 2] <- prop.sigma lpost_old <- lpost_prop } # reject else out[t + 1, 2] <- out[t, 2] acc_rates[t, 2] <- min(r, 1) } return(list( mean.acc_rates = apply(acc_rates, 2, mean), out.chain = data.frame(out[-(1:burnin), ], iter = 1:nrow(out[-(1:burnin), ])) )) } ## Starting values start_mu <- mean(sample_gumb) start_sigma <- sd(sample_gumb) ## Execute the Gibbs sampler test_GibbsSampler <- gibbsSampler.fun( start = c(start_mu, start_sigma), proposal.sd = c(.05, .01), data = sample_gumb, iter = 1e4 ) # Check the acceptance rate test_GibbsSampler$mean.acc_rates
## [1] 0.7962319 0.8520756
## Increase the proposal standard deviations GibbsSampler_fin <- gibbsSampler.fun( start = c(start_mu, start_sigma), proposal.sd = c(0.25, 0.18), data = sample_gumb, iter = 1e4 ) # Check the acceptance rate GibbsSampler_fin$mean.acc_rates
## [1] 0.4006203 0.3931149
# --> A good setting for the proposal sd's is around (0.25, 0.18) for (mu, sigma) ## Plot the chains par(mfrow = c(1, 2)) plot( GibbsSampler_fin$out.chain[, 1], type = "l", ylab = "mu", main = "Traceplot for mu" ) plot( GibbsSampler_fin$out.chain[, 2], type = "l", ylab = "sigma", main = "Traceplot for sigma" )

############### # # # Exercise 3 # # # ############### starts_mu <- c(5, 10 ,15, 20) starts_sigma <- c(1, 4 ,7, 11) ## Increase the proposal standard deviations GibbsSampler_list <- lapply(1:length(starts_mu), function(x) { gibbsSampler.fun( start = c(starts_mu[x], starts_sigma[x]), proposal.sd = c(0.25, 0.18), data = sample_gumb, iter = 1e4 ) }) ## Check that all sets of chains have good acceptance rates. lapply(GibbsSampler_list, function(x) x$mean.acc_rates)
## [[1]] ## [1] 0.3937012 0.3958051 ## ## [[2]] ## [1] 0.3929683 0.3934951 ## ## [[3]] ## [1] 0.3947817 0.3983895 ## ## [[4]] ## [1] 0.3970696 0.3984922
## Plot par(mfrow = c(1, 2)) plot( GibbsSampler_list[[1]]$out.chain[, "mu"], # First chain type = "l", ylab = "mu", main = "Traceplot for mu" ) for (i in 2:(length(starts_mu))) { lines(GibbsSampler_list[[i]]$out.chain[, "mu"], # Other chains type = "l", col = i) } legend( "bottomright", lwd = rep(2, 4), col = 1:4, paste("chain nr ", 1:4) ) plot( GibbsSampler_list[[1]]$out.chain[, "sigma"], # First chain type = "l", ylab = "sigma", main = "Traceplot for sigma" ) for (i in 2:(length(starts_mu))) { lines(GibbsSampler_list[[i]]$out.chain[, "sigma"], # Other chains type = "l", col = i) } legend( "bottomright", lwd = rep(2, 4), col = 1:4, paste("chain nr ", 1:4) )

############### # # # Exercise 4 # # # ############### # To use it inside the coda framework, you first have to transform the chains into a mcmc object : autocorr.diag(mcmc(GibbsSampler_fin$out.chain[, c("mu", "sigma")]))
## mu sigma ## Lag 0 1.00000000 1.00000000 ## Lag 1 0.67778196 0.70501101 ## Lag 5 0.18623596 0.22687895 ## Lag 10 0.05447597 0.04685215 ## Lag 50 0.02435257 0.02100073
autocorr.plot(mcmc(GibbsSampler_fin$out.chain[, c("mu", "sigma")]))

# Cross-corrrelation crosscorr.plot(mcmc(GibbsSampler_fin$out.chain[, c("mu", "sigma")])) title("Cross-correlation") # --> The serial correlation relatively quickly disappear when the lag increases. # The cross-correlation seems pretty low. # --> These are good news for the covergence of our algorithm. ############### # # # Exercise 5 # # # ############### smpl_to_keep <- seq(1, nrow(GibbsSampler_fin$out.chain), by = 3) autocorr.diag(mcmc(GibbsSampler_fin$out.chain[smpl_to_keep , c("mu", "sigma")], thin = 3))
## mu sigma ## Lag 0 1.000000000 1.000000000 ## Lag 3 0.345464266 0.390715762 ## Lag 15 0.001452513 0.009079115 ## Lag 30 0.023683752 0.034537660 ## Lag 150 -0.008265679 0.038290323
autocorr.plot(mcmc(GibbsSampler_fin$out.chain[smpl_to_keep, c("mu", "sigma")], thin = 3))


############### # # # Exercise 6 # # # ############### # For each chain individually raf1 <- raftery.diag( mcmc(GibbsSampler_list[[1]]$out.chain), q = 0.05, r = 0.02, s = 0.95 ) raf2 <- raftery.diag( mcmc(GibbsSampler_list[[2]]$out.chain), q = 0.05, r = 0.02, s = 0.95 ) raf3 <- raftery.diag( mcmc(GibbsSampler_list[[3]]$out.chain), q = 0.05, r = 0.02, s = 0.95 ) raf4 <- raftery.diag( mcmc(GibbsSampler_list[[4]]$out.chain), q = 0.05, r = 0.02, s = 0.95 ) raf_indiv <- list(raf1$resmatrix, raf2$resmatrix, raf3$resmatrix, raf4$resmatrix) names(raf_indiv) <- c(paste("chain nr", 1:4)) raf_indiv
## $`chain nr 1` ## M N Nmin I ## mu 20 2590 457 5.67 ## sigma 16 2145 457 4.69 ## iter 3451 3451 457 7.55 ## ## $`chain nr 2` ## M N Nmin I ## mu 18 2465 457 5.39 ## sigma 17 2224 457 4.87 ## iter 3451 3451 457 7.55 ## ## $`chain nr 3` ## M N Nmin I ## mu 17 2206 457 4.83 ## sigma 14 1864 457 4.08 ## iter 3451 3451 457 7.55 ## ## $`chain nr 4` ## M N Nmin I ## mu 16 2051 457 4.49 ## sigma 16 2066 457 4.52 ## iter 3451 3451 457 7.55
## For the complete chains # Stack the individual results together all_chains <- do.call(rbind, lapply(GibbsSampler_list, function(x) x$out.chain)) raftery.diag(mcmc(all_chains[, c("mu", "sigma")]), q = 0.05, r = 0.02, s = 0.95)
## ## Quantile (q) = 0.05 ## Accuracy (r) = +/- 0.02 ## Probability (s) = 0.95 ## ## Burn-in Total Lower bound Dependence ## (M) (N) (Nmin) factor (I) ## mu 18 2326 457 5.09 ## sigma 15 2048 457 4.48
############### # # # Exercise 7 # # # ############### geweke <- geweke.diag(mcmc(GibbsSampler_fin$out.chain[, c("mu", "sigma")])) geweke$z
## mu sigma ## 1.142782 1.742350
2*dnorm(geweke$z)
## mu sigma ## 0.4152949 0.1748751
geweke.plot(mcmc(GibbsSampler_fin$out.chain[, c("mu", "sigma")]), nbins = 20)

# Since they (nearly) all lie in the (-1.96,1.96) band, there is not reason to truncate. # the chain for further inference purposes. ############### # # # Exercise 8 # # # ############### # The function needs a mcmc.list() object with all the individual chains. mcmc_list_ind <- mcmc.list( mcmc(GibbsSampler_list[[1]]$out.chain[, c("mu", "sigma")]), mcmc(GibbsSampler_list[[2]]$out.chain[, c("mu", "sigma")]), mcmc(GibbsSampler_list[[3]]$out.chain[, c("mu", "sigma")]), mcmc(GibbsSampler_list[[4]]$out.chain[, c("mu", "sigma")]) ) # Gelman Coda Diagnostics gelman.diag(mcmc_list_ind, autoburnin = F)
## Potential scale reduction factors: ## ## Point est. Upper C.I. ## mu 1.13 1.16 ## sigma 1.22 1.30 ## ## Multivariate psrf ## ## 1.01
gelman.plot(mcmc_list_ind, autoburnin = F, auto.layout = T)# --> The R_hat converges to very fast and hence 10k iterations seems enough for convergence. ############### # # # Exercise 9 # # # ############### # In ggplot : Put all on the same y-scales gp.dat <- gelman.plot(mcmc_list_ind, autoburnin = F)
# Process the output object to obtain the relvant values (shrink factors, etc..) in a dataframe df = data.frame( bind_rows(as.data.frame(gp.dat[["shrink"]][, , 1]), as.data.frame(gp.dat[["shrink"]][, , 2])), q = rep(dimnames(gp.dat[["shrink"]])[[3]], each = nrow(gp.dat[["shrink"]][, , 1])), last.iter = rep(gp.dat[["last.iter"]], length(gp.dat)) ) # Melt the values to put all shrink factors in one dataframe. df_ggplot <- melt(df, c("q", "last.iter"), value.name = "shrink_factor") ## Personalized theme for ggplot (not mandatory) "theme_piss" <- function(size_p = 18, size_c = 14, size_l = 12, theme = theme_bw(), ...) { text <- function(size, ...) element_text(size = size, colour = "#33666C", face = "bold", ...) theme + theme( plot.title = text(size_p, hjust = 0.5), axis.title = text(size_c), legend.title = text(size_l) ) + theme( legend.background = element_rect(colour = "black"), legend.key = element_rect(fill = "white"), ... ) } # Build the plot ggplot(df_ggplot, aes( last.iter, shrink_factor, colour = q, linetype = q )) + geom_hline(yintercept = 1, colour = "grey30", lwd = 0.2) + geom_line() + geom_hline( yintercept = 1.1, colour = "green4", linetype = "dashed", size = 0.3 ) + scale_y_continuous(breaks = c(1, 1.1, 1.5, 2, 3, 4), labels = c(1, 1.1, 1.5, 2, 3, 4)) + #ggtitle("Gelman Rubin dignostic : R-hat Statistic") + facet_wrap( ~ variable, labeller = labeller( .cols = function(x) gsub("V", "Chain ", x) )) + labs( x = "Last Iteration in Chain", y = "Shrink Factor", colour = "Quantile", linetype = "Quantile", subtitle = "Gelman Rubin diagnostic : R-hat Statistic" ) + scale_linetype_manual(values = c(2, 1)) + theme_piss() + theme( strip.text = element_text(size = 15), plot.subtitle = element_text( size = 21, hjust = 0.5, colour = "#33666C", face = "bold" ) )

Leave a Reply