Below are the solutions to these exercises on “MCMC Using STAN – Diagnostics With The Bayesplot Package: Exercises”.

############### # # # Exercise 1 # # # ############### rstan_options(auto_write = TRUE) options(mc.cores = parallel::detectCores()) # Set the color color_scheme_set("brightblue") # see help("color_scheme_set") # Prepare data url <- "http://stat.columbia.edu/~gelman/arm/examples/arsenic/wells.dat" wells <- read.table(url) # Create the dist100 variable wells$dist100 <- wells$dist / 100 ############### # # # Exercise 2 # # # ############### # We put 'switch' in the x-scale so that we can put a jitter and we better visualize points # 1. With respect to dist100, by arsenic ggplot(wells, aes(y = dist100, x = switch, col = arsenic)) + geom_jitter(width = 0.2) + scale_colour_gradientn(colours = terrain.colors(5))

# 2. With respect to arsenic, by dist100 ggplot(wells, aes(y = arsenic, x = switch, col = dist100)) + geom_jitter(width = 0.2) + scale_colour_gradientn(colours = terrain.colors(5))

# No clear conclusions can be drawn from these two graphs. # Fit the frequentist glm model : glm_fit <- glm(formula = switch ~ dist100 + arsenic, data = wells, family = "binomial") summary(glm_fit)

## ## Call: ## glm(formula = switch ~ dist100 + arsenic, family = "binomial", ## data = wells) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.6351 -1.2139 0.7786 1.0702 1.7085 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.002749 0.079448 0.035 0.972 ## dist100 -0.896644 0.104347 -8.593 <2e-16 *** ## arsenic 0.460775 0.041385 11.134 <2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 4118.1 on 3019 degrees of freedom ## Residual deviance: 3930.7 on 3017 degrees of freedom ## AIC: 3936.7 ## ## Number of Fisher Scoring iterations: 4

############### # # # Exercise 3 # # # ############### stan_model <- " data { int<lower=0> N; // number of data points int<lower=0> P; // number of predictors (including intercept) matrix[N,P] X; // predictors (including 1s for intercept) int<lower=0,upper=1> y[N]; // binary outcome } parameters { vector[P] beta; } model { beta ~ normal(0, 10); y ~ bernoulli_logit(X * beta); } " ############### # # # Exercise 4 # # # ############### # Create the matrix of covariates X <- model.matrix(~ dist100 + arsenic, data = wells) data_list <- list( y = wells$switch, X = X, N = nrow(X), P = ncol(X) ) ############### # # # Exercise 5 # # # ############### # Reproducible example for the whole script set.seed(1234) # Compiling and producing posterior samples from the model. stan_fit <- stan( model_code = stan_model, data = data_list, chains = 4, iter = 1000, warmup = 200, cores = parallel::detectCores(), # Use all your available cores seed = 1234 )

print(stan_fit, pars = "beta")

## Inference for Stan model: b93e4aeb6f81213051f116caef085c7b. ## 4 chains, each with iter=1000; warmup=200; thin=1; ## post-warmup draws per chain=800, total post-warmup draws=3200. ## ## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat ## beta[1] 0.00 0 0.08 -0.16 -0.05 0.00 0.06 0.16 1698 1 ## beta[2] -0.90 0 0.11 -1.11 -0.97 -0.90 -0.83 -0.69 1888 1 ## beta[3] 0.46 0 0.04 0.38 0.43 0.46 0.49 0.55 1611 1 ## ## Samples were drawn using NUTS(diag_e) at Sun Jan 28 22:43:55 2018. ## For each parameter, n_eff is a crude measure of effective sample size, ## and Rhat is the potential scale reduction factor on split chains (at ## convergence, Rhat=1).

############### # # # Exercise 6 # # # ############### posterior <- as.array(stan_fit) mcmc_intervals(posterior, pars = c("beta[1]", "beta[2]", "beta[3]"))

# The thick segments are the 50% credible intervals while the thinner outer lines # are the 90% credible intervals. ############### # # # Exercise 7 # # # ############### mcmc_areas( posterior, pars = c("beta[1]", "beta[2]", "beta[3]"), prob = 0.8, # 80% intervals prob_outer = 0.99, # 99% point_est = "mean" )

############### # # # Exercise 8 # # # ############### mcmc_hist( posterior, pars = c("beta[1]", "beta[2]", "beta[3]"), binwidth = 1 / 50, )

# By chain mcmc_hist_by_chain( posterior, pars = c("beta[1]", "beta[2]", "beta[3]"), binwidth = 1 / 100 )

## Density plots # ================ mcmc_dens( posterior, pars = c("beta[1]", "beta[2]", "beta[3]") )

# By chain mcmc_dens_overlay( posterior, pars = c("beta[1]", "beta[2]", "beta[3]"), )

## Violin plots # ============ mcmc_violin( posterior, pars = c("beta[2]", "beta[3]"), probs = c(0.05, 0.2, 0.5, 0.7, 0.95) )

############### # # # Exercise 9 # # # ############### color_scheme_set("viridis") mcmc_trace(posterior, c("beta[1]", "beta[2]", "beta[3]"), facet_args = list(ncol = 1, strip.position = "left"))

############### # # # Exercise 10 # # # ############### mcmc_scatter(posterior, pars = c("beta[2]", "beta[3]"), size = 1.5, alpha = 0.5)

# hexagonal binning mcmc_hex(posterior, pars = c("beta[2]", "beta[3]"))

# scatterplot table mcmc_pairs(posterior, pars = c("beta[1]", "beta[2]", "beta[3]"), off_diag_args = list(size = 1.5))

## Leave a Reply