Below are the solutions to these exercises on “GLM – Part 4.”

if (!require(car)){install.packages(car, dep=T)} library(car) if (!require(MuMIn)){install.packages(MuMIn, dep=T)}

## Warning: package 'MuMIn' was built under R version 3.4.4

library(MuMIn) spider<-read.csv(file.choose()) ############### # # # Exercise 1 # # # ############### # Visualise the data table(spider$PA)

## ## 0 1 ## 9 10

op <- par(mfrow = c(2, 1)) boxplot(RATIO ~ PA, data = spider, col = "red", xlab = "Presence / Absence of Uta lizards", ylab = "Island Ratio") plot(PA ~ RATIO, pch = 19, data = spider)

par(op) ############### # # # Exercise 2 # # # ############### spider.glm <- glm(PA~RATIO, family=binomial, data=spider) #Actually, we don't need to bother with over dispersion in logistic regression on presence/absence data ( Zuur et al. 2013). BUt we'll check on it. ############### # # # Exercise 3 # # # ############### #represent the model deviance / degrees of freedom of the residuals spider.glm$deviance/spider.glm$df.resid

## [1] 0.8365126

# Slightly under dispersed ############### # # # Exercise 4 # # # ############### # we'll use a component+residual plot to look at the linear fit of odds ratio crPlots(spider.glm, ask = FALSE)

# looks good ############### # # # Exercise 5 # # # ############### # influential values influence.measures(spider.glm)

## Influence measures of ## glm(formula = PA ~ RATIO, family = binomial, data = spider) : ## ## dfb.1_ dfb.RATI dffit cov.r cook.d hat inf ## 1 0.182077 -0.007083 0.447814 1.043 5.50e-02 0.109124 ## 2 0.167005 -0.141263 0.169959 1.235 6.62e-03 0.111730 ## 3 -0.723849 1.079157 1.278634 0.537 8.43e-01 0.151047 * ## 4 -0.239967 0.028419 -0.546081 0.953 9.01e-02 0.108681 ## 5 0.248270 -0.126175 0.359999 1.117 3.30e-02 0.110025 ## 6 0.028088 -0.196986 -0.437403 1.110 5.00e-02 0.129177 ## 7 0.077131 -0.102575 -0.111591 1.250 2.81e-03 0.108288 ## 8 0.140334 -0.247315 -0.332565 1.242 2.65e-02 0.155414 ## 9 -0.562402 0.338850 -0.723598 0.805 1.89e-01 0.112842 ## 10 0.257651 -0.162838 0.319655 1.157 2.52e-02 0.114067 ## 11 0.176591 -0.147771 0.180516 1.234 7.49e-03 0.113765 ## 12 0.104228 -0.093408 0.104419 1.225 2.46e-03 0.090774 ## 13 0.135395 -0.118138 0.136380 1.233 4.23e-03 0.102909 ## 14 0.000410 -0.000476 -0.000481 1.131 5.14e-08 0.001445 ## 15 0.000218 -0.000251 -0.000254 1.130 1.43e-08 0.000817 ## 16 0.139447 -0.248090 -0.335881 1.239 2.70e-02 0.155114 ## 17 0.143708 -0.240774 -0.311977 1.255 2.31e-02 0.156543 ## 18 0.074831 -0.068694 0.074832 1.211 1.26e-03 0.075520 ## 19 0.108633 -0.097001 0.108890 1.226 2.68e-03 0.092718

# nothing to worry ############### # # # Exercise 6 # # # ############### # Cooks distance plot(spider.glm, which = 4)

# Wedge value with margin at (0.8). I'd prefer to okay with it. But you can also remove it if you think that the data is pretty sensitive to any outliers. #look at the summary summary(spider.glm)

## ## Call: ## glm(formula = PA ~ RATIO, family = binomial, data = spider) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.6067 -0.6382 0.2368 0.4332 2.0986 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 3.6061 1.6953 2.127 0.0334 * ## RATIO -0.2196 0.1005 -2.184 0.0289 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 26.287 on 18 degrees of freedom ## Residual deviance: 14.221 on 17 degrees of freedom ## AIC: 18.221 ## ## Number of Fisher Scoring iterations: 6

############### # # # Exercise 7 # # # ############### # check residuals op <- par(mfrow = c(2, 2)) plot(spider.glm)

dev.off()

## null device ## 1

############### # # # Exercise 8 # # # ############### xs<-seq(0,70,l=1000) spider.predict <- predict(spider.glm, type="response", se=T, newdata=data.frame(RATIO=xs)) ############### # # # Exercise 9 # # # ############### # Produce base plot plot(PA~RATIO, data=spider, xlab="", ylab="", axes=FALSE, pch=16) # Plot fitted model and 95% CI bands points(spider.predict$fit~xs, type="l", col="gray") lines(spider.predict$fit+spider.predict$se.fit ~ xs, col="gray", type="l", lty=2) lines(spider.predict$fit-spider.predict$se.fit ~ xs, col="gray", type="l", lty=2) #Axes titles mtext(expression(paste(italic(Uta), " presence/absence")), 2, line=3) axis(2,las=1) mtext("Perimeter to area ratio",1, line=3) axis(1) box(bty="l")

###############

# #

# Exercise 10 #

# #

###############

## odds ratios only

exp(coef(spider.glm))

## (Intercept) RATIO ## 36.8210344 0.8028734

## odds ratios and 95% CI exp(cbind(OR = coef(spider.glm), confint(spider.glm)))

## OR 2.5 % 97.5 % ## (Intercept) 36.8210344 2.7344957 3109.2748308 ## RATIO 0.8028734 0.6157322 0.9356727

# likelihood of lizard presence declines by approximately 20% (1-0.803) # for every unit increase in perimeter/area ratio on the islands ############### # # # Exercise 11 # # # ############### 1 - (spider.glm$dev / spider.glm$null)

## [1] 0.4590197

```
# 45.9% of the variation in the data is captured by the regression equation
```

**What's next:**

- Become a Top R Programmer Fast with our Individual Coaching Program
- Explore all our (>4000) R exercises
- Find an R course using our R Course Finder directory
- Subscribe to receive weekly updates and bonus sets by email
- Share with your friends and colleagues using the buttons below

## Leave a Reply