Basic Generalized Linear Modeling – Part 4: Solutions

Below are the solutions to these exercises on “GLM – Part 4.”

if (!require(car)){install.packages(car, dep=T)}
library(car)
if (!require(MuMIn)){install.packages(MuMIn, dep=T)}
## Warning: package 'MuMIn' was built under R version 3.4.4
library(MuMIn)

spider<-read.csv(file.choose())

###############
#             #
# Exercise 1  #
#             #
###############

# Visualise the data 
table(spider$PA)
## 
##  0  1 
##  9 10
op <- par(mfrow = c(2, 1))
boxplot(RATIO ~ PA, data = spider,
        col = "red", xlab = "Presence / Absence of Uta lizards",
        ylab = "Island Ratio")
plot(PA ~ RATIO, pch = 19, data = spider)

par(op)

###############
#             #
# Exercise 2  #
#             #
###############
spider.glm <- glm(PA~RATIO, family=binomial, data=spider)

#Actually, we don't need to bother with over dispersion in logistic regression on presence/absence data ( Zuur et al. 2013). BUt we'll check on it.

###############
#             #
# Exercise 3  #
#             #
###############
#represent the model deviance / degrees of freedom of the residuals
spider.glm$deviance/spider.glm$df.resid
## [1] 0.8365126
# Slightly  under dispersed

###############
#             #
# Exercise 4  #
#             #
###############
# we'll use a component+residual plot to look at the linear fit of odds ratio

crPlots(spider.glm, ask = FALSE)

# looks  good

###############
#             #
# Exercise 5  #
#             #
###############

# influential values
influence.measures(spider.glm)
## Influence measures of
## 	 glm(formula = PA ~ RATIO, family = binomial, data = spider) :
## 
##       dfb.1_  dfb.RATI     dffit cov.r   cook.d      hat inf
## 1   0.182077 -0.007083  0.447814 1.043 5.50e-02 0.109124    
## 2   0.167005 -0.141263  0.169959 1.235 6.62e-03 0.111730    
## 3  -0.723849  1.079157  1.278634 0.537 8.43e-01 0.151047   *
## 4  -0.239967  0.028419 -0.546081 0.953 9.01e-02 0.108681    
## 5   0.248270 -0.126175  0.359999 1.117 3.30e-02 0.110025    
## 6   0.028088 -0.196986 -0.437403 1.110 5.00e-02 0.129177    
## 7   0.077131 -0.102575 -0.111591 1.250 2.81e-03 0.108288    
## 8   0.140334 -0.247315 -0.332565 1.242 2.65e-02 0.155414    
## 9  -0.562402  0.338850 -0.723598 0.805 1.89e-01 0.112842    
## 10  0.257651 -0.162838  0.319655 1.157 2.52e-02 0.114067    
## 11  0.176591 -0.147771  0.180516 1.234 7.49e-03 0.113765    
## 12  0.104228 -0.093408  0.104419 1.225 2.46e-03 0.090774    
## 13  0.135395 -0.118138  0.136380 1.233 4.23e-03 0.102909    
## 14  0.000410 -0.000476 -0.000481 1.131 5.14e-08 0.001445    
## 15  0.000218 -0.000251 -0.000254 1.130 1.43e-08 0.000817    
## 16  0.139447 -0.248090 -0.335881 1.239 2.70e-02 0.155114    
## 17  0.143708 -0.240774 -0.311977 1.255 2.31e-02 0.156543    
## 18  0.074831 -0.068694  0.074832 1.211 1.26e-03 0.075520    
## 19  0.108633 -0.097001  0.108890 1.226 2.68e-03 0.092718
# nothing to worry

###############
#             #
# Exercise 6  #
#             #
###############

# Cooks distance
plot(spider.glm, which = 4)

# Wedge value with margin at (0.8). I'd prefer to okay with it. But you can also remove it if you think that the data is pretty sensitive to any outliers.

#look at the summary
summary(spider.glm)
## 
## Call:
## glm(formula = PA ~ RATIO, family = binomial, data = spider)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6067  -0.6382   0.2368   0.4332   2.0986  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   3.6061     1.6953   2.127   0.0334 *
## RATIO        -0.2196     0.1005  -2.184   0.0289 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26.287  on 18  degrees of freedom
## Residual deviance: 14.221  on 17  degrees of freedom
## AIC: 18.221
## 
## Number of Fisher Scoring iterations: 6
###############
#             #
# Exercise 7  #
#             #
###############
# check residuals
op <- par(mfrow = c(2, 2))
plot(spider.glm)

dev.off()
## null device 
##           1
###############
#             #
# Exercise 8 #
#             #
###############

xs<-seq(0,70,l=1000)
spider.predict <- predict(spider.glm, type="response", se=T, newdata=data.frame(RATIO=xs))

###############
#             #
# Exercise 9  #
#             #
###############
# Produce base plot
plot(PA~RATIO, data=spider, xlab="", ylab="", axes=FALSE, pch=16)

# Plot fitted model and 95% CI bands
points(spider.predict$fit~xs, type="l", col="gray")
lines(spider.predict$fit+spider.predict$se.fit ~ xs, col="gray", type="l", lty=2)
lines(spider.predict$fit-spider.predict$se.fit ~ xs, col="gray", type="l", lty=2)

#Axes titles
mtext(expression(paste(italic(Uta), " presence/absence")), 2, line=3)
axis(2,las=1)
mtext("Perimeter to area ratio",1, line=3)
axis(1)
box(bty="l")

###############
# #
# Exercise 10 #
# #
###############

## odds ratios only
exp(coef(spider.glm))

## (Intercept)       RATIO 
##  36.8210344   0.8028734
## odds ratios and 95% CI
exp(cbind(OR = coef(spider.glm), confint(spider.glm)))
##                     OR     2.5 %       97.5 %
## (Intercept) 36.8210344 2.7344957 3109.2748308
## RATIO        0.8028734 0.6157322    0.9356727
# likelihood of lizard presence declines by approximately 20% (1-0.803)
# for every unit increase in perimeter/area ratio on the islands

###############
#             #
# Exercise 11 #
#             #
###############
1 - (spider.glm$dev / spider.glm$null)
## [1] 0.4590197
# 45.9% of the variation in the data is captured by the regression equation



Basic Generalized Linear Modeling – Part 3: Solutions

Below are the solutions to these exercises on “GLM – Part 3.”

Please note, you need to execute the answer in this exercise, along with the solutions of Basic GLM Exercises – Part 2 here.

###############
# #
# Exercise 9 #
# #
###############
Road.nb1 <- glm.nb(TOT.N ~ OPEN.L + MONT.S + POLIC +
SHRUB + WAT.RES + L.WAT.C + L.P.ROAD +
D.WAT.COUR + D.PARK, link = “log”, data=Road)

###############
# #
# Exercise 10 #
# #
###############

summary(Road.nb1)

## 
## Call:
## glm.nb(formula = TOT.N ~ OPEN.L + MONT.S + POLIC + SHRUB + WAT.RES + 
##     L.WAT.C + L.P.ROAD + D.WAT.COUR + D.PARK, data = Road, link = "log", 
##     init.theta = 5.67594597)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -3.02657  -0.70592   0.04461   0.63626   1.56209  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.249e+00  2.761e-01  15.390  < 2e-16 ***
## OPEN.L      -9.935e-03  3.146e-03  -3.157  0.00159 ** 
## MONT.S       6.130e-02  3.428e-02   1.788  0.07372 .  
## POLIC        7.868e-04  4.209e-02   0.019  0.98509    
## SHRUB       -3.764e-01  2.451e-01  -1.536  0.12465    
## WAT.RES      9.684e-02  7.689e-02   1.260  0.20784    
## L.WAT.C      1.919e-01  1.005e-01   1.911  0.05603 .  
## L.P.ROAD     2.674e-01  1.363e-01   1.962  0.04976 *  
## D.WAT.COUR  -6.579e-05  3.250e-04  -0.202  0.83959    
## D.PARK      -1.226e-04  1.278e-05  -9.596  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.6759) family taken to be 1)
## 
##     Null deviance: 218.273  on 51  degrees of freedom
## Residual deviance:  51.313  on 42  degrees of freedom
## AIC: 388.56
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.68 
##           Std. Err.:  1.45 
## 
##  2 x log-likelihood:  -366.559
###############
#             #
# Exercise 11 #
#             #
###############

options(na.action=na.fail) # set options in Base R concerning missing values
summary(model.avg(dredge(Road.nb1), fit = TRUE, subset = TRUE))
## 
## Call:
## model.avg(object = get.models(object = dredge(Road.nb1), subset = TRUE))
## 
## Component model call: 
## glm.nb(formula = TOT.N ~ <512 unique rhs>, data = Road, init.theta 
##      = <512 unique values>, link = log)
## 
## Component models: 
##           df  logLik   AICc delta weight
## 1346       6 -185.45 384.77  0.00   0.08
## 146        5 -187.13 385.56  0.79   0.06
## 13456      7 -184.75 386.05  1.28   0.04
## 136        5 -187.43 386.16  1.39   0.04
## 1456       6 -186.33 386.53  1.76   0.03
## 13468      7 -185.17 386.88  2.11   0.03
## 1236       6 -186.61 387.08  2.31   0.03
## 13469      7 -185.28 387.11  2.34   0.03
## 1356       6 -186.65 387.17  2.40   0.02
## 12346      7 -185.38 387.30  2.53   0.02
## 13467      7 -185.44 387.43  2.66   0.02
## 134568     8 -184.10 387.54  2.77   0.02
## 12356      7 -185.66 387.86  3.09   0.02
## 1468       6 -187.02 387.91  3.14   0.02
## 1469       6 -187.06 387.98  3.21   0.02
## 1246       6 -187.06 387.98  3.21   0.02
## 1467       6 -187.11 388.09  3.32   0.02
## 16         4 -189.72 388.28  3.51   0.01
## 134569     8 -184.50 388.35  3.58   0.01
## 14568      7 -185.92 388.39  3.62   0.01
## 1367       6 -187.27 388.41  3.64   0.01
## 123456     8 -184.60 388.55  3.78   0.01
## 1368       6 -187.38 388.62  3.85   0.01
## 1369       6 -187.42 388.71  3.94   0.01
## 156        5 -188.73 388.77  4.00   0.01
## 126        5 -188.73 388.77  4.00   0.01
## 134567     8 -184.74 388.83  4.06   0.01
## 1345689    9 -183.30 388.89  4.12   0.01
## 134689     8 -184.77 388.89  4.12   0.01
## 12456      7 -186.17 388.89  4.12   0.01
## 14569      7 -186.20 388.95  4.18   0.01
## 1256       6 -187.54 388.95  4.18   0.01
## 14567      7 -186.31 389.17  4.40   0.01
## 13568      7 -186.39 389.32  4.55   0.01
## 12367      7 -186.43 389.40  4.63   0.01
## 13567      7 -186.49 389.53  4.76   0.01
## 123468     8 -185.12 389.58  4.81   0.01
## 12368      7 -186.55 389.64  4.87   0.01
## 134678     8 -185.17 389.69  4.92   0.01
## 12369      7 -186.61 389.76  4.99   0.01
## 13569      7 -186.62 389.79  5.02   0.01
## 123469     8 -185.25 389.84  5.07   0.01
## 134679     8 -185.27 389.88  5.11   0.01
## 123568     8 -185.30 389.95  5.18   0.01
## 123467     8 -185.36 390.06  5.30   0.01
## 1234568    9 -183.95 390.20  5.43   0.01
## 123567     8 -185.46 390.26  5.49   0.01
## 14689      7 -186.87 390.29  5.52   0.01
## 167        5 -189.49 390.29  5.52   0.01
## 145689     8 -185.50 390.35  5.58   0.01
## 1345678    9 -184.10 390.48  5.71   0.00
## 12468      7 -186.97 390.48  5.71   0.00
## 12469      7 -187.01 390.57  5.80   0.00
## 14678      7 -187.01 390.58  5.81   0.00
## 12467      7 -187.03 390.60  5.83   0.00
## 14679      7 -187.04 390.62  5.85   0.00
## 123569     8 -185.64 390.64  5.87   0.00
## 169        5 -189.70 390.70  5.93   0.00
## 168        5 -189.71 390.72  5.95   0.00
## 1267       6 -188.48 390.83  6.06   0.00
## 124568     8 -185.77 390.90  6.13   0.00
## 1567       6 -188.52 390.90  6.13   0.00
## 13678      7 -187.24 391.02  6.25   0.00
## 13679      7 -187.26 391.06  6.29   0.00
## 12567      7 -187.28 391.12  6.35   0.00
## 1234569    9 -184.42 391.12  6.35   0.00
## 1568       6 -188.66 391.18  6.41   0.00
## 145678     8 -185.92 391.19  6.42   0.00
## 1345679    9 -184.48 391.24  6.48   0.00
## 1269       6 -188.69 391.25  6.48   0.00
## 13689      7 -187.36 391.26  6.49   0.00
## 1268       6 -188.73 391.33  6.56   0.00
## 1569       6 -188.73 391.33  6.56   0.00
## 12568      7 -187.41 391.36  6.59   0.00
## 1234567    9 -184.57 391.42  6.65   0.00
## 124569     8 -186.10 391.54  6.77   0.00
## 124567     8 -186.13 391.61  6.84   0.00
## 12569      7 -187.53 391.61  6.84   0.00
## 145679     8 -186.18 391.70  6.93   0.00
## 1234689    9 -184.77 391.82  7.05   0.00
## 1346789    9 -184.77 391.83  7.06   0.00
## 135689     8 -186.26 391.87  7.10   0.00
## 135678     8 -186.26 391.88  7.11   0.00
## 12345689  10 -183.28 391.93  7.16   0.00
## 13456789  10 -183.30 391.97  7.20   0.00
## 123678     8 -186.38 392.11  7.35   0.00
## 123679     8 -186.42 392.19  7.42   0.00
## 135        5 -190.46 392.22  7.45   0.00
## 135679     8 -186.44 392.24  7.47   0.00
## 123689     8 -186.54 392.43  7.66   0.00
## 1234678    9 -185.11 392.51  7.74   0.00
## 1235678    9 -185.15 392.59  7.82   0.00
## 13         4 -191.93 392.71  7.94   0.00
## 15         4 -191.93 392.71  7.94   0.00
## 1235689    9 -185.22 392.72  7.95   0.00
## 1234679    9 -185.23 392.74  7.97   0.00
## 1678       6 -189.48 392.83  8.06   0.00
## 1679       6 -189.49 392.84  8.07   0.00
## 145        5 -190.79 392.88  8.11   0.00
## 124689     8 -186.86 393.06  8.29   0.00
## 146789     8 -186.86 393.08  8.31   0.00
## 1235679    9 -185.43 393.15  8.38   0.00
## 1245689    9 -185.45 393.19  8.42   0.00
## 1345       6 -189.67 393.20  8.43   0.00
## 1689       6 -189.68 393.23  8.46   0.00
## 124678     8 -186.95 393.25  8.48   0.00
## 12345678  10 -183.95 393.27  8.50   0.00
## 1456789    9 -185.50 393.28  8.51   0.00
## 124679     8 -186.98 393.31  8.54   0.00
## 12679      7 -188.46 393.46  8.69   0.00
## 15678      7 -188.46 393.46  8.69   0.00
## 12678      7 -188.47 393.49  8.72   0.00
## 14         4 -192.35 393.55  8.78   0.00
## 15679      7 -188.52 393.58  8.81   0.00
## 1          3 -193.55 393.59  8.82   0.00
## 134        5 -191.16 393.62  8.85   0.00
## 125678     8 -187.18 393.70  8.93   0.00
## 1358       6 -189.92 393.72  8.95   0.00
## 136789     8 -187.21 393.77  9.00   0.00
## 1245678    9 -185.76 393.81  9.04   0.00
## 15689      7 -188.65 393.85  9.08   0.00
## 12689      7 -188.68 393.91  9.14   0.00
## 125679     8 -187.28 393.92  9.15   0.00
## 13458      7 -188.78 394.10  9.33   0.00
## 12345679  10 -184.38 394.13  9.36   0.00
## 1235       6 -190.14 394.14  9.37   0.00
## 125689     8 -187.41 394.16  9.39   0.00
## 1458       6 -190.18 394.23  9.46   0.00
## 125        5 -191.46 394.23  9.46   0.00
## 1245679    9 -186.05 394.39  9.62   0.00
## 1356789    9 -186.12 394.52  9.75   0.00
## 158        5 -191.68 394.67  9.90   0.00
## 1359       6 -190.44 394.74  9.97   0.00
## 1357       6 -190.45 394.76  9.99   0.00
## 123        5 -191.77 394.83 10.07   0.00
## 12346789  10 -184.77 394.90 10.13   0.00
## 138        5 -191.82 394.94 10.17   0.00
## 1236789    9 -186.37 395.03 10.26   0.00
## 137        5 -191.91 395.12 10.35   0.00
## 123456789 11 -183.28 395.16 10.39   0.00
## 139        5 -191.93 395.16 10.39   0.00
## 159        5 -191.93 395.16 10.39   0.00
## 157        5 -191.93 395.16 10.39   0.00
## 1457       6 -190.72 395.31 10.54   0.00
## 1459       6 -190.74 395.35 10.58   0.00
## 1245       6 -190.76 395.40 10.63   0.00
## 12         4 -193.28 395.40 10.63   0.00
## 12356789  10 -185.05 395.46 10.69   0.00
## 16789      7 -189.47 395.48 10.71   0.00
## 12358      7 -189.53 395.60 10.83   0.00
## 1348       6 -190.87 395.61 10.84   0.00
## 13459      7 -189.56 395.67 10.90   0.00
## 13457      7 -189.56 395.67 10.90   0.00
## 134589     8 -188.17 395.70 10.93   0.00
## 148        5 -192.24 395.78 11.01   0.00
## 147        5 -192.26 395.82 11.05   0.00
## 12345      7 -189.65 395.85 11.08   0.00
## 19         4 -193.52 395.89 11.12   0.00
## 1347       6 -191.02 395.90 11.13   0.00
## 18         4 -193.55 395.94 11.17   0.00
## 17         4 -193.55 395.94 11.17   0.00
## 1246789    9 -186.84 395.97 11.20   0.00
## 149        5 -192.34 395.99 11.22   0.00
## 124        5 -192.35 396.00 11.23   0.00
## 13589      7 -189.76 396.07 11.30   0.00
## 1349       6 -191.12 396.11 11.34   0.00
## 1258       6 -191.14 396.16 11.39   0.00
## 1234       6 -191.16 396.18 11.41   0.00
## 126789     8 -188.44 396.23 11.46   0.00
## 156789     8 -188.45 396.25 11.48   0.00
## 12456789  10 -185.44 396.25 11.48   0.00
## 14589      7 -189.85 396.26 11.49   0.00
## 13578      7 -189.90 396.35 11.58   0.00
## 134578     8 -188.58 396.51 11.74   0.00
## 1256789    9 -187.17 396.64 11.87   0.00
## 14578      7 -190.06 396.66 11.89   0.00
## 1259       6 -191.46 396.78 12.01   0.00
## 1257       6 -191.46 396.80 12.03   0.00
## 12357      7 -190.13 396.80 12.03   0.00
## 12359      7 -190.13 396.81 12.04   0.00
## 12458      7 -190.16 396.87 12.10   0.00
## 123458     8 -188.77 396.88 12.11   0.00
## 1238       6 -191.65 397.17 12.40   0.00
## 1589       6 -191.67 397.20 12.43   0.00
## 1578       6 -191.68 397.24 12.47   0.00
## 1237       6 -191.74 397.35 12.58   0.00
## 1239       6 -191.76 397.40 12.63   0.00
## 13579      7 -190.43 397.41 12.64   0.00
## 1378       6 -191.79 397.44 12.67   0.00
## 1389       6 -191.81 397.48 12.71   0.00
## 1379       6 -191.91 397.68 12.91   0.00
## 1579       6 -191.93 397.73 12.96   0.00
## 129        5 -193.23 397.77 13.00   0.00
## 128        5 -193.27 397.85 13.08   0.00
## 127        5 -193.27 397.85 13.08   0.00
## 13478      7 -190.67 397.88 13.11   0.00
## 14579      7 -190.68 397.90 13.13   0.00
## 12457      7 -190.71 397.96 13.19   0.00
## 13489      7 -190.71 397.96 13.19   0.00
## 12459      7 -190.73 398.00 13.24   0.00
## 1478       6 -192.12 398.11 13.34   0.00
## 123589     8 -189.40 398.15 13.38   0.00
## 1345789    9 -187.97 398.23 13.46   0.00
## 1489       6 -192.20 398.26 13.49   0.00
## 12348      7 -190.86 398.26 13.49   0.00
## 134579     8 -189.47 398.28 13.51   0.00
## 1248       6 -192.23 398.33 13.56   0.00
## 179        5 -193.52 398.34 13.57   0.00
## 189        5 -193.52 398.35 13.58   0.00
## 123578     8 -189.50 398.35 13.58   0.00
## 1247       6 -192.25 398.37 13.60   0.00
## 1479       6 -192.26 398.38 13.61   0.00
## 178        5 -193.55 398.39 13.63   0.00
## 123457     8 -189.56 398.46 13.69   0.00
## 123459     8 -189.56 398.47 13.70   0.00
## 13479      7 -190.99 398.52 13.75   0.00
## 1249       6 -192.34 398.54 13.77   0.00
## 12347      7 -191.01 398.56 13.79   0.00
## 1234589    9 -188.16 398.60 13.83   0.00
## 12349      7 -191.11 398.77 14.00   0.00
## 12589      7 -191.14 398.82 14.05   0.00
## 145789     8 -189.74 398.83 14.06   0.00
## 12578      7 -191.14 398.83 14.06   0.00
## 135789     8 -189.75 398.85 14.08   0.00
## 124589     8 -189.85 399.06 14.29   0.00
## 1234578    9 -188.58 399.44 14.67   0.00
## 124578     8 -190.05 399.45 14.68   0.00
## 12579      7 -191.46 399.46 14.69   0.00
## 123579     8 -190.12 399.59 14.82   0.00
## 12378      7 -191.62 399.78 15.01   0.00
## 12389      7 -191.65 399.84 15.07   0.00
## 15789      7 -191.67 399.88 15.11   0.00
## 12379      7 -191.74 400.02 15.25   0.00
## 13789      7 -191.78 400.10 15.33   0.00
## 1279       6 -193.23 400.32 15.55   0.00
## 1289       6 -193.23 400.33 15.56   0.00
## 134789     8 -190.50 400.36 15.59   0.00
## 1278       6 -193.27 400.41 15.64   0.00
## 123478     8 -190.64 400.62 15.85   0.00
## 123489     8 -190.65 400.64 15.87   0.00
## 124579     8 -190.67 400.70 15.93   0.00
## 14789      7 -192.08 400.71 15.95   0.00
## 12478      7 -192.11 400.76 15.99   0.00
## 12489      7 -192.17 400.89 16.12   0.00
## 1789       6 -193.52 400.91 16.14   0.00
## 12479      7 -192.25 401.04 16.27   0.00
## 1235789    9 -189.38 401.05 16.29   0.00
## 1234579    9 -189.47 401.22 16.45   0.00
## 12345789  10 -187.93 401.23 16.46   0.00
## 123479     8 -190.96 401.28 16.51   0.00
## 125789     8 -191.14 401.62 16.85   0.00
## 1245789    9 -189.73 401.75 16.98   0.00
## 123789     8 -191.61 402.58 17.81   0.00
## 12789      7 -193.23 403.00 18.23   0.00
## 1234789    9 -190.40 403.08 18.31   0.00
## 124789     8 -192.04 403.44 18.67   0.00
## 678        5 -214.20 439.71 54.94   0.00
## 2678       6 -213.28 440.44 55.67   0.00
## 3678       6 -213.84 441.54 56.77   0.00
## 67         4 -216.37 441.58 56.81   0.00
## 267        5 -215.30 441.90 57.13   0.00
## 5678       6 -214.15 442.17 57.40   0.00
## 4678       6 -214.18 442.22 57.45   0.00
## 6789       6 -214.20 442.27 57.50   0.00
## 23678      7 -212.92 442.38 57.61   0.00
## 68         4 -216.94 442.73 57.96   0.00
## 367        5 -215.73 442.76 57.99   0.00
## 24678      7 -213.22 442.98 58.22   0.00
## 25678      7 -213.26 443.06 58.29   0.00
## 2367       6 -214.62 443.10 58.33   0.00
## 26789      7 -213.28 443.11 58.34   0.00
## 467        5 -216.16 443.63 58.86   0.00
## 679        5 -216.18 443.67 58.90   0.00
## 268        5 -216.25 443.81 59.04   0.00
## 567        5 -216.34 443.99 59.22   0.00
## 35678      7 -213.78 444.11 59.34   0.00
## 2679       6 -215.15 444.16 59.39   0.00
## 36789      7 -213.81 444.17 59.40   0.00
## 34678      7 -213.84 444.22 59.45   0.00
## 2567       6 -215.23 444.34 59.57   0.00
## 6          3 -218.96 444.41 59.64   0.00
## 2467       6 -215.29 444.45 59.68   0.00
## 368        5 -216.64 444.59 59.82   0.00
## 468        5 -216.67 444.64 59.87   0.00
## 3679       6 -215.40 444.66 59.89   0.00
## 45678      7 -214.12 444.78 60.01   0.00
## 234678     8 -212.75 444.84 60.07   0.00
## 56789      7 -214.15 444.85 60.08   0.00
## 46789      7 -214.17 444.88 60.11   0.00
## 568        5 -216.86 445.02 60.25   0.00
## 236789     8 -212.89 445.12 60.35   0.00
## 235678     8 -212.89 445.13 60.36   0.00
## 689        5 -216.93 445.17 60.40   0.00
## 26         4 -218.16 445.18 60.41   0.00
## 3467       6 -215.67 445.20 60.43   0.00
## 23679      7 -214.33 445.21 60.44   0.00
## 3567       6 -215.72 445.31 60.54   0.00
## 4679       6 -215.78 445.42 60.65   0.00
## 36         4 -218.33 445.51 60.74   0.00
## 46         4 -218.35 445.55 60.78   0.00
## 23467      7 -214.57 445.69 60.93   0.00
## 23567      7 -214.60 445.74 60.97   0.00
## 2368       6 -215.94 445.75 60.98   0.00
## 245678     8 -213.20 445.75 60.98   0.00
## 246789     8 -213.22 445.78 61.01   0.00
## 256789     8 -213.26 445.86 61.09   0.00
## 5679       6 -216.13 446.13 61.36   0.00
## 4567       6 -216.16 446.18 61.41   0.00
## 236        5 -217.47 446.25 61.48   0.00
## 2568       6 -216.20 446.26 61.49   0.00
## 2468       6 -216.21 446.28 61.51   0.00
## 2689       6 -216.24 446.35 61.58   0.00
## 69         4 -218.87 446.60 61.83   0.00
## 25679      7 -215.05 446.65 61.88   0.00
## 24679      7 -215.09 446.73 61.96   0.00
## 56         4 -218.94 446.73 61.96   0.00
## 3468       6 -216.46 446.78 62.01   0.00
## 356789     8 -213.77 446.89 62.12   0.00
## 345678     8 -213.78 446.91 62.14   0.00
## 34679      7 -215.20 446.94 62.17   0.00
## 346789     8 -213.80 446.95 62.18   0.00
## 3568       6 -216.55 446.97 62.20   0.00
## 4568       6 -216.57 447.00 62.23   0.00
## 24567      7 -215.23 447.01 62.24   0.00
## 246        5 -217.91 447.12 62.36   0.00
## 3689       6 -216.64 447.15 62.38   0.00
## 4689       6 -216.65 447.17 62.41   0.00
## 346        5 -217.97 447.24 62.47   0.00
## 8          3 -220.37 447.25 62.48   0.00
## 469        5 -218.00 447.30 62.53   0.00
## 35679      7 -215.38 447.30 62.53   0.00
## 78         4 -219.34 447.52 62.75   0.00
## 269        5 -218.11 447.53 62.76   0.00
## 5689       6 -216.84 447.54 62.77   0.00
## 256        5 -218.13 447.56 62.79   0.00
## 456789     8 -214.12 447.58 62.81   0.00
## 369        5 -218.15 447.60 62.83   0.00
## 2345678    9 -212.73 447.74 62.97   0.00
## 2346789    9 -212.74 447.77 63.01   0.00
## 34567      7 -215.67 447.88 63.11   0.00
## (Null)     2 -221.83 447.91 63.14   0.00
## 235679     8 -214.28 447.91 63.14   0.00
## 356        5 -218.33 447.96 63.19   0.00
## 7          3 -220.75 447.99 63.23   0.00
## 456        5 -218.35 448.00 63.23   0.00
## 234679     8 -214.33 448.01 63.24   0.00
## 2356789    9 -212.87 448.02 63.25   0.00
## 45679      7 -215.75 448.05 63.28   0.00
## 23568      7 -215.87 448.29 63.52   0.00
## 23468      7 -215.93 448.40 63.63   0.00
## 23689      7 -215.94 448.42 63.65   0.00
## 234567     8 -214.54 448.43 63.66   0.00
## 2369       6 -217.34 448.54 63.77   0.00
## 2456789    9 -213.19 448.67 63.90   0.00
## 2346       6 -217.40 448.67 63.90   0.00
## 2356       6 -217.47 448.81 64.04   0.00
## 24568      7 -216.14 448.83 64.06   0.00
## 25689      7 -216.17 448.89 64.12   0.00
## 3469       6 -217.54 448.95 64.18   0.00
## 24689      7 -216.21 448.96 64.19   0.00
## 569        5 -218.84 448.99 64.22   0.00
## 34568      7 -216.35 449.25 64.48   0.00
## 2469       6 -217.70 449.27 64.50   0.00
## 28         4 -220.25 449.35 64.58   0.00
## 38         4 -220.25 449.35 64.58   0.00
## 34689      7 -216.42 449.38 64.61   0.00
## 245679     8 -215.02 449.38 64.62   0.00
## 3          3 -221.45 449.40 64.63   0.00
## 48         4 -220.36 449.56 64.79   0.00
## 89         4 -220.37 449.60 64.83   0.00
## 58         4 -220.37 449.60 64.83   0.00
## 35689      7 -216.55 449.65 64.88   0.00
## 478        5 -219.18 449.67 64.90   0.00
## 45689      7 -216.56 449.67 64.90   0.00
## 2456       6 -217.90 449.67 64.91   0.00
## 278        5 -219.20 449.70 64.93   0.00
## 345679     8 -215.18 449.72 64.95   0.00
## 37         4 -220.44 449.73 64.96   0.00
## 378        5 -219.24 449.78 65.01   0.00
## 3456       6 -217.96 449.79 65.02   0.00
## 3456789    9 -213.76 449.81 65.04   0.00
## 2          3 -221.67 449.85 65.08   0.00
## 4569       6 -217.99 449.85 65.08   0.00
## 9          3 -221.71 449.93 65.16   0.00
## 789        5 -219.33 449.97 65.20   0.00
## 578        5 -219.33 449.97 65.20   0.00
## 79         4 -220.56 449.97 65.21   0.00
## 27         4 -220.57 449.99 65.22   0.00
## 2569       6 -218.06 450.00 65.23   0.00
## 5          3 -221.76 450.02 65.25   0.00
## 4          3 -221.82 450.13 65.36   0.00
## 57         4 -220.64 450.14 65.37   0.00
## 3569       6 -218.14 450.15 65.38   0.00
## 47         4 -220.73 450.31 65.54   0.00
## 23456789  10 -212.73 450.82 66.05   0.00
## 2345679    9 -214.28 450.84 66.08   0.00
## 23469      7 -217.15 450.85 66.08   0.00
## 234568     8 -215.86 451.07 66.30   0.00
## 235689     8 -215.87 451.10 66.33   0.00
## 234689     8 -215.92 451.20 66.43   0.00
## 23569      7 -217.33 451.20 66.43   0.00
## 23456      7 -217.40 451.35 66.58   0.00
## 39         4 -221.25 451.36 66.59   0.00
## 23         4 -221.28 451.41 66.64   0.00
## 2478       6 -218.81 451.48 66.71   0.00
## 238        5 -220.12 451.55 66.78   0.00
## 248        5 -220.15 451.60 66.83   0.00
## 245689     8 -216.14 451.62 66.85   0.00
## 34569      7 -217.54 451.63 66.86   0.00
## 379        5 -220.17 451.65 66.88   0.00
## 35         4 -221.41 451.67 66.91   0.00
## 348        5 -220.21 451.72 66.95   0.00
## 34         4 -221.45 451.75 66.98   0.00
## 289        5 -220.24 451.79 67.02   0.00
## 389        5 -220.25 451.80 67.03   0.00
## 258        5 -220.25 451.80 67.03   0.00
## 358        5 -220.25 451.80 67.03   0.00
## 237        5 -220.26 451.82 67.05   0.00
## 3478       6 -219.01 451.88 67.11   0.00
## 24569      7 -217.69 451.92 67.16   0.00
## 25         4 -221.57 451.99 67.22   0.00
## 347        5 -220.35 452.00 67.23   0.00
## 489        5 -220.35 452.00 67.24   0.00
## 458        5 -220.36 452.02 67.25   0.00
## 345689     8 -216.33 452.02 67.25   0.00
## 29         4 -221.59 452.03 67.26   0.00
## 357        5 -220.37 452.04 67.28   0.00
## 589        5 -220.37 452.05 67.28   0.00
## 2378       6 -219.10 452.06 67.30   0.00
## 59         4 -221.61 452.08 67.31   0.00
## 579        5 -220.42 452.14 67.37   0.00
## 257        5 -220.42 452.15 67.38   0.00
## 279        5 -220.43 452.16 67.39   0.00
## 49         4 -221.66 452.16 67.39   0.00
## 24         4 -221.67 452.19 67.43   0.00
## 4789       6 -219.17 452.21 67.44   0.00
## 247        5 -220.46 452.22 67.45   0.00
## 4578       6 -219.18 452.23 67.46   0.00
## 2578       6 -219.19 452.24 67.47   0.00
## 2789       6 -219.20 452.26 67.49   0.00
## 3789       6 -219.22 452.31 67.54   0.00
## 3578       6 -219.23 452.33 67.56   0.00
## 45         4 -221.75 452.35 67.58   0.00
## 479        5 -220.56 452.43 67.66   0.00
## 5789       6 -219.33 452.53 67.76   0.00
## 457        5 -220.62 452.54 67.77   0.00
## 239        5 -221.12 453.55 68.78   0.00
## 234569     8 -217.15 453.64 68.87   0.00
## 23478      7 -218.57 453.68 68.91   0.00
## 359        5 -221.19 453.68 68.92   0.00
## 234        5 -221.21 453.73 68.96   0.00
## 235        5 -221.22 453.74 68.97   0.00
## 2347       6 -219.95 453.77 69.00   0.00
## 2348       6 -219.95 453.77 69.00   0.00
## 349        5 -221.25 453.80 69.03   0.00
## 2379       6 -220.04 453.95 69.18   0.00
## 3579       6 -220.06 453.99 69.22   0.00
## 24789      7 -218.72 453.99 69.22   0.00
## 2345689    9 -215.86 454.00 69.23   0.00
## 2489       6 -220.10 454.06 69.29   0.00
## 2389       6 -220.12 454.11 69.34   0.00
## 2358       6 -220.12 454.11 69.34   0.00
## 345        5 -221.41 454.12 69.35   0.00
## 24578      7 -218.79 454.13 69.36   0.00
## 3479       6 -220.14 454.15 69.38   0.00
## 2458       6 -220.15 454.16 69.39   0.00
## 2357       6 -220.15 454.17 69.40   0.00
## 259        5 -221.46 454.23 69.46   0.00
## 3458       6 -220.21 454.28 69.51   0.00
## 3489       6 -220.21 454.28 69.51   0.00
## 2589       6 -220.24 454.35 69.58   0.00
## 2579       6 -220.24 454.36 69.59   0.00
## 3589       6 -220.25 454.36 69.59   0.00
## 2457       6 -220.26 454.39 69.62   0.00
## 3457       6 -220.26 454.40 69.63   0.00
## 459        5 -221.56 454.43 69.66   0.00
## 245        5 -221.56 454.43 69.66   0.00
## 249        5 -221.58 454.47 69.70   0.00
## 34578      7 -219.00 454.55 69.78   0.00
## 34789      7 -219.01 454.56 69.79   0.00
## 4589       6 -220.35 454.56 69.79   0.00
## 2479       6 -220.38 454.64 69.87   0.00
## 4579       6 -220.41 454.70 69.93   0.00
## 23578      7 -219.09 454.73 69.96   0.00
## 23789      7 -219.09 454.73 69.96   0.00
## 45789      7 -219.17 454.89 70.12   0.00
## 25789      7 -219.19 454.92 70.15   0.00
## 35789      7 -219.21 454.97 70.20   0.00
## 2359       6 -221.04 455.95 71.18   0.00
## 2349       6 -221.11 456.09 71.32   0.00
## 23457      7 -219.80 456.14 71.37   0.00
## 2345       6 -221.14 456.14 71.37   0.00
## 3459       6 -221.18 456.24 71.47   0.00
## 23479      7 -219.87 456.29 71.52   0.00
## 23579      7 -219.90 456.35 71.58   0.00
## 234789     8 -218.52 456.38 71.61   0.00
## 23489      7 -219.93 456.40 71.63   0.00
## 23458      7 -219.95 456.45 71.68   0.00
## 234578     8 -218.55 456.45 71.68   0.00
## 34579      7 -220.03 456.60 71.83   0.00
## 24589      7 -220.10 456.74 71.97   0.00
## 245789     8 -218.72 456.79 72.02   0.00
## 2459       6 -221.46 456.79 72.02   0.00
## 23589      7 -220.12 456.79 72.02   0.00
## 24579      7 -220.18 456.90 72.13   0.00
## 34589      7 -220.20 456.96 72.19   0.00
## 345789     8 -219.00 457.36 72.59   0.00
## 235789     8 -219.08 457.51 72.74   0.00
## 23459      7 -221.02 458.59 73.82   0.00
## 234579     8 -219.70 458.75 73.99   0.00
## 234589     8 -219.93 459.20 74.43   0.00
## 2345789    9 -218.51 459.31 74.54   0.00
## 
## Term codes: 
##     D.PARK D.WAT.COUR   L.P.ROAD    L.WAT.C     MONT.S     OPEN.L 
##          1          2          3          4          5          6 
##      POLIC      SHRUB    WAT.RES 
##          7          8          9 
## 
## Model-averaged coefficients:  
## (full average) 
##               Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)  4.406e+00  2.596e-01   2.643e-01  16.671  < 2e-16 ***
## D.PARK      -1.156e-04  1.161e-05   1.190e-05   9.713  < 2e-16 ***
## L.P.ROAD     1.590e-01  1.651e-01   1.671e-01   0.952  0.34120    
## L.WAT.C      1.089e-01  1.059e-01   1.071e-01   1.017  0.30920    
## OPEN.L      -9.880e-03  3.674e-03   3.744e-03   2.639  0.00831 ** 
## MONT.S       1.963e-02  3.292e-02   3.332e-02   0.589  0.55576    
## SHRUB       -4.732e-02  1.511e-01   1.537e-01   0.308  0.75818    
## D.WAT.COUR  -7.578e-05  2.131e-04   2.163e-04   0.350  0.72610    
## WAT.RES      8.530e-03  4.124e-02   4.207e-02   0.203  0.83934    
## POLIC        2.576e-03  2.134e-02   2.187e-02   0.118  0.90624    
##  
## (conditional average) 
##               Estimate Std. Error Adjusted SE z value Pr(>|z|)    
## (Intercept)  4.406e+00  2.596e-01   2.643e-01  16.671  < 2e-16 ***
## D.PARK      -1.156e-04  1.161e-05   1.190e-05   9.713  < 2e-16 ***
## L.P.ROAD     2.543e-01  1.391e-01   1.428e-01   1.781  0.07493 .  
## L.WAT.C      1.698e-01  8.460e-02   8.677e-02   1.957  0.05036 .  
## OPEN.L      -1.023e-02  3.224e-03   3.306e-03   3.094  0.00197 ** 
## MONT.S       4.913e-02  3.554e-02   3.646e-02   1.348  0.17779    
## SHRUB       -1.817e-01  2.516e-01   2.575e-01   0.706  0.48034    
## D.WAT.COUR  -2.685e-04  3.304e-04   3.377e-04   0.795  0.42664    
## WAT.RES      3.706e-02  7.956e-02   8.144e-02   0.455  0.64911    
## POLIC        1.192e-02  4.470e-02   4.585e-02   0.260  0.79481    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Relative variable importance: 
##                      D.PARK OPEN.L L.WAT.C L.P.ROAD MONT.S D.WAT.COUR
## Importance:          1.00   0.97   0.64    0.63     0.40   0.28      
## N containing models:  256    256    256     256      256    256      
##                      SHRUB WAT.RES POLIC
## Importance:          0.26  0.23    0.22 
## N containing models:  256   256     256
options(na.action = "na.omit") # reset base R options
# our best model includes varaibles 1,3,4 and 6 or 1,4 and 6. We go for the latter


###############
#             #
# Exercise 12 #
#             #
###############
Road.nb2 <- glm.nb(TOT.N ~ OPEN.L + L.WAT.C + D.PARK, link = "log", data=Road)
summary(Road.nb2)
## 
## Call:
## glm.nb(formula = TOT.N ~ OPEN.L + L.WAT.C + D.PARK, data = Road, 
##     link = "log", init.theta = 4.725770366)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7799  -0.7976  -0.1921   0.4513   2.0395  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.467e+00  1.716e-01  26.022  < 2e-16 ***
## OPEN.L      -1.071e-02  3.146e-03  -3.405 0.000661 ***
## L.WAT.C      1.867e-01  7.911e-02   2.361 0.018246 *  
## D.PARK      -1.158e-04  1.082e-05 -10.708  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.7258) family taken to be 1)
## 
##     Null deviance: 189.709  on 51  degrees of freedom
## Residual deviance:  52.003  on 48  degrees of freedom
## AIC: 384.25
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.73 
##           Std. Err.:  1.16 
## 
##  2 x log-likelihood:  -374.254
###############
#             #
# Exercise 13 #
#             #
###############
op <- par(mfrow = c(2, 2))
plot(Road.nb2) # no patterns, OK!

par(op)



Basic Generalized Linear Modeling – Part 2: Exercises

In this exercise, we will try to handle the model that has been over-dispersed using the quasi-Poisson model. Over-dispersion simply means that the variance is greater than the mean. It’s important because it leads to inflation in the models and increases the possibility of Type I errors. We will use a data-set on amphibian road kill (Zuur et al., 2009). It has 17 explanatory variables. We’re going to focus on nine of them using the total number of kills (TOT.N) as the response variable.

Please download the data-set here and name it “Road.” Answers to these exercises are available here. If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page. Load the data-set and required package before running the exercise.

Exercise 1
Doing some plotting, we can see decreasing variability of kills with distance.

Exercise 2
Run the GLM model with distance as the explanatory variables.

Exercise 3
Add more co-variables to the model and see what’s happening by checking the model summary.

Exercise 4
Check the co-linearity using VIF’s. Set options in Base R concerning missing values.

Exercise 5
Check the summary again and set base R options. See why we do this on the previous related post exercise.

Exercise 6
Check for over-dispersion (rule of thumb, value needs to be around 1.) If it is still greater or less than 1, then we need to check diagnostic plots and re-run the GLM with another structure model.

Exercise 7
Restructure the model by throwing out the least significant terms and repeat the model until generating fewer significant terms.

Exercise 8
Check the diagnostic plots. If there are still some problems, then we might need to use other types of regression, like Negative Binomial regression. We’ll discuss it in the next exercise post.




Basic Generalized Linear Modeling – Part 2: Solutions

Below are the solutions to these exercises on “GLM – Part 2.”

if (!require(car)){install.packages(car, dep=T)}
library(car)

###############
#             #
# Exercise 1  #
#             #
###############
rm(list=ls())
Road<-read.csv(file.choose())
plot(Road$D.PARK,Road$TOT.N,xlab="Distance to park",
     ylab="Road kills")

###############
#             #
# Exercise 2  #
#             #
###############
Road.glm1<-glm(TOT.N~D.PARK,family=poisson,data=Road)
summary(Road.glm1)
## 
## Call:
## glm(formula = TOT.N ~ D.PARK, family = poisson, data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.1100  -1.6950  -0.4708   1.4206   7.3337  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.316e+00  4.322e-02   99.87   <2e-16 ***
## D.PARK      -1.059e-04  4.387e-06  -24.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1071.4  on 51  degrees of freedom
## Residual deviance:  390.9  on 50  degrees of freedom
## AIC: 634.29
## 
## Number of Fisher Scoring iterations: 4
###############
#             #
# Exercise 3  #
#             #
###############
Road.glm2 <- glm(TOT.N ~ OPEN.L + MONT.S + POLIC +
                   SHRUB + WAT.RES + L.WAT.C + L.P.ROAD +
                   D.WAT.COUR + D.PARK,family=poisson,data=Road)
summary(Road.glm2)
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + POLIC + SHRUB + WAT.RES + 
##     L.WAT.C + L.P.ROAD + D.WAT.COUR + D.PARK, family = poisson, 
##     data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.0437  -1.2182  -0.3528   1.3615   5.0817  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.995e+00  1.083e-01  36.904  < 2e-16 ***
## OPEN.L      -4.579e-03  1.517e-03  -3.018 0.002542 ** 
## MONT.S       8.672e-02  1.328e-02   6.528 6.66e-11 ***
## POLIC       -3.056e-02  1.443e-02  -2.118 0.034195 *  
## SHRUB       -5.758e-01  1.009e-01  -5.705 1.16e-08 ***
## WAT.RES      1.082e-01  2.911e-02   3.716 0.000203 ***
## L.WAT.C      3.125e-01  4.294e-02   7.277 3.40e-13 ***
## L.P.ROAD     1.706e-01  5.644e-02   3.023 0.002501 ** 
## D.WAT.COUR   9.694e-05  1.469e-04   0.660 0.509203    
## D.PARK      -1.232e-04  5.512e-06 -22.341  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  272.69  on 42  degrees of freedom
## AIC: 532.08
## 
## Number of Fisher Scoring iterations: 5
###############
#             #
# Exercise 4  #
#             #
###############
vif(Road.glm2)
##     OPEN.L     MONT.S      POLIC      SHRUB    WAT.RES    L.WAT.C 
##   1.332456   1.172192   1.364623   1.622003   1.570514   1.933864 
##   L.P.ROAD D.WAT.COUR     D.PARK 
##   1.215771   1.691718   1.578268
options(na.action=na.fail)


###############
#             #
# Exercise 5  #
#             #
###############

Rsummary(model.avg(dredge(Road.glm2), fit = TRUE, subset = TRUE))
## Error in Rsummary(model.avg(dredge(Road.glm2), fit = TRUE, subset = TRUE)): could not find function "Rsummary"
options(na.action = "na.omit")

###############
#             #
# Exercise 6  #
#             #
###############
Road.glm2$deviance/Road.glm2$df.resid
## [1] 6.492606
op <- par(mfrow = c(2, 2))
plot(Road.glm2)

par(op)


###############
#             #
# Exercise 7  #
#             #
###############
Road.glm3 <- glm(TOT.N ~ OPEN.L + MONT.S + POLIC +
                   SHRUB + WAT.RES + L.WAT.C + L.P.ROAD +
                   D.WAT.COUR + D.PARK,family=quasipoisson,data=Road)
summary(Road.glm3)
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + POLIC + SHRUB + WAT.RES + 
##     L.WAT.C + L.P.ROAD + D.WAT.COUR + D.PARK, family = quasipoisson, 
##     data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.0437  -1.2182  -0.3528   1.3615   5.0817  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.995e+00  2.639e-01  15.141  < 2e-16 ***
## OPEN.L      -4.579e-03  3.697e-03  -1.238   0.2225    
## MONT.S       8.672e-02  3.238e-02   2.678   0.0105 *  
## POLIC       -3.056e-02  3.517e-02  -0.869   0.3899    
## SHRUB       -5.758e-01  2.460e-01  -2.341   0.0241 *  
## WAT.RES      1.082e-01  7.096e-02   1.524   0.1349    
## L.WAT.C      3.125e-01  1.047e-01   2.986   0.0047 ** 
## L.P.ROAD     1.706e-01  1.376e-01   1.240   0.2217    
## D.WAT.COUR   9.694e-05  3.580e-04   0.271   0.7879    
## D.PARK      -1.232e-04  1.344e-05  -9.166 1.41e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 5.940819)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  272.69  on 42  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
Road.glm4 <- update(Road.glm3, .~. -D.WAT.COUR)
summary(Road.glm4) # still have terms that are not significant - drop POLIC
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + POLIC + SHRUB + WAT.RES + 
##     L.WAT.C + L.P.ROAD + D.PARK, family = quasipoisson, data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.1912  -1.2373  -0.4168   1.2748   5.0097  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.034e+00  2.183e-01  18.480  < 2e-16 ***
## OPEN.L      -4.755e-03  3.591e-03  -1.324  0.19246    
## MONT.S       8.716e-02  3.187e-02   2.735  0.00903 ** 
## POLIC       -2.789e-02  3.333e-02  -0.837  0.40733    
## SHRUB       -5.650e-01  2.398e-01  -2.356  0.02309 *  
## WAT.RES      1.013e-01  6.540e-02   1.549  0.12868    
## L.WAT.C      2.975e-01  8.763e-02   3.395  0.00149 ** 
## L.P.ROAD     1.762e-01  1.341e-01   1.314  0.19582    
## D.PARK      -1.222e-04  1.281e-05  -9.538 3.56e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 5.800369)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  273.12  on 43  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
Road.glm5 <- update(Road.glm4, .~. -POLIC)
summary(Road.glm5) # still have terms that are not significant - drop L.P.ROAD 
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + SHRUB + WAT.RES + L.WAT.C + 
##     L.P.ROAD + D.PARK, family = quasipoisson, data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.2509  -1.3212  -0.2044   1.1002   5.1422  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.014e+00  2.173e-01  18.476  < 2e-16 ***
## OPEN.L      -5.552e-03  3.483e-03  -1.594  0.11806    
## MONT.S       8.747e-02  3.170e-02   2.759  0.00842 ** 
## SHRUB       -5.397e-01  2.367e-01  -2.280  0.02748 *  
## WAT.RES      1.014e-01  6.518e-02   1.555  0.12710    
## L.WAT.C      2.913e-01  8.689e-02   3.353  0.00165 ** 
## L.P.ROAD     1.612e-01  1.322e-01   1.219  0.22921    
## D.PARK      -1.182e-04  1.189e-05  -9.943 7.96e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 5.771314)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  277.42  on 44  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
Road.glm6 <- update(Road.glm5, .~. -L.P.ROAD)
summary(Road.glm6) # still have terms that are not significant - drop WAT.RES 
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + SHRUB + WAT.RES + L.WAT.C + 
##     D.PARK, family = quasipoisson, data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.7601  -1.3247  -0.4837   0.8902   5.7634  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.164e+00  1.848e-01  22.528  < 2e-16 ***
## OPEN.L      -5.455e-03  3.552e-03  -1.536  0.13164    
## MONT.S       8.565e-02  3.271e-02   2.619  0.01198 *  
## SHRUB       -4.571e-01  2.239e-01  -2.042  0.04709 *  
## WAT.RES      8.464e-02  6.576e-02   1.287  0.20460    
## L.WAT.C      2.893e-01  8.954e-02   3.231  0.00231 ** 
## D.PARK      -1.187e-04  1.225e-05  -9.690 1.38e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 6.167261)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  285.78  on 45  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
Road.glm7 <- update(Road.glm6, .~. -WAT.RES)
summary(Road.glm7) # still have terms that are not significant - drop OPEN.L 
## 
## Call:
## glm(formula = TOT.N ~ OPEN.L + MONT.S + SHRUB + L.WAT.C + D.PARK, 
##     family = quasipoisson, data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.1160  -1.3496  -0.5273   0.9917   5.6795  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.221e+00  1.778e-01  23.743  < 2e-16 ***
## OPEN.L      -5.605e-03  3.571e-03  -1.570  0.12333    
## MONT.S       7.737e-02  3.219e-02   2.403  0.02034 *  
## SHRUB       -3.672e-01  2.080e-01  -1.765  0.08417 .  
## L.WAT.C      2.550e-01  8.530e-02   2.989  0.00448 ** 
## D.PARK      -1.168e-04  1.216e-05  -9.605 1.44e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 6.209759)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  295.09  on 46  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
Road.glm8 <- update(Road.glm7, .~. -OPEN.L)
summary(Road.glm8) # still have terms that are not significant - drop OPEN.L 
## 
## Call:
## glm(formula = TOT.N ~ MONT.S + SHRUB + L.WAT.C + D.PARK, family = quasipoisson, 
##     data = Road)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.9353  -1.7733  -0.1813   1.1852   5.2474  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.151e+00  1.744e-01  23.805  < 2e-16 ***
## MONT.S       8.347e-02  3.216e-02   2.595   0.0126 *  
## SHRUB       -4.011e-01  2.068e-01  -1.940   0.0584 .  
## L.WAT.C      2.057e-01  7.997e-02   2.572   0.0133 *  
## D.PARK      -1.200e-04  1.194e-05 -10.043 2.79e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 6.202568)
## 
##     Null deviance: 1071.44  on 51  degrees of freedom
## Residual deviance:  310.69  on 47  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5
# We finally end up with a model with way fewer significant terms

###############
#             #
# Exercise 8  #
#             #
###############
op <- par(mfrow = c(2, 2))
plot(Road.glm8) # wedge shape in the fitted residuals 

par(op)                 # we still have some problems and we'll discuss it on the next exercise post



Modeling With ANCOVA – Part 2: Solutions

Below are the solutions to these exercises on “ANCOVA – Part 2.”

if (!require(car)){install.packages(car, dep=T)}
library(car)
if (!require(ggplot2)){install.packages(ggplot2, dep=T)}
library(ggplot2)

###############
#             #
# Exercise 1  #
#             #
###############
# Load data
limpet<-read.csv(file.choose())
limpet.lm <- lm(EGGS ~ DENSITY * SEASON, data = limpet)

###############
#             #
# Exercise 2  #
#             #
###############
predict(limpet.lm)
##         1         2         3         4         5         6         7 
## 2.3949692 2.3949692 2.3949692 1.6075953 1.6075953 1.6075953 2.1594217 
##         8         9        10        11        12        13        14 
## 2.1594217 2.1594217 1.3938428 1.3938428 1.3938428 1.6546769 1.6546769 
##        15        16        17        18        19        20        21 
## 1.6546769 0.9358016 0.9358016 0.9358016 1.1499321 1.1499321 1.1499321 
##        22        23        24 
## 0.4777604 0.4777604 0.4777604
###############
#             #
# Exercise 3  #
#             #
###############
vec.x<-expand.grid(DENSITY=unique(limpet$DENSITY),SEASON=levels(limpet$SEASON))
#Density and Season as repeated factors 

###############
#             #
# Exercise 4  #
#             #
###############
predict(limpet.lm, newdata = vec.x)
##         1         2         3         4         5         6         7 
## 2.3949692 2.1594217 1.6546769 1.1499321 1.6075953 1.3938428 0.9358016 
##         8 
## 0.4777604
limpet.predict <- predict(limpet.lm, newdata = vec.x)
predict.plot <- data.frame(vec.x, limpet.predict)
#look at the dataframe 
predict.plot
##   DENSITY SEASON limpet.predict
## 1       8 spring      2.3949692
## 2      15 spring      2.1594217
## 3      30 spring      1.6546769
## 4      45 spring      1.1499321
## 5       8 summer      1.6075953
## 6      15 summer      1.3938428
## 7      30 summer      0.9358016
## 8      45 summer      0.4777604
###############
#             #
# Exercise 5  #
#             #
###############
predict.mean <- predict(limpet.lm, newdata = vec.x)
predict.plot1<- data.frame(vec.x, predict.mean)

###############
#             #
# Exercise 6  #
#             #
###############
vec.x2 <- expand.grid(DENSITY = c(5, 10, 25, 35),
                      SEASON = levels(limpet$SEASON))
# look at it
vec.x2
##   DENSITY SEASON
## 1       5 spring
## 2      10 spring
## 3      25 spring
## 4      35 spring
## 5       5 summer
## 6      10 summer
## 7      25 summer
## 8      35 summer
# now predict the mean values per season
predict.levels <- predict(limpet.lm, newdata = vec.x2)
predict.plot2 <- data.frame(vec.x2, predict.levels)

###############
#             #
# Exercise 7  #
#             #
###############

plot(EGGS ~ DENSITY, data = limpet, pch = 19,cex = 1.5,
     col = c("Black", "Red")[limpet$SEASON],
     xlab = list("Density", cex = 1.2),
     ylab = list("Eggs Produced", cex = 1.2))
xlim = c(0, 50) # limit the X axis
# Now add a legend
legend(35, 3, legend = c("Spring", "Summer"),
       col = c("black", "red"), pch = 19)

# add lines 
lines(limpet.predict ~ DENSITY,subset(predict.plot,SEASON == "spring"))
lines(limpet.predict ~ DENSITY,subset(predict.plot,SEASON == "summer"), col = "red")




Modeling With ANCOVA – Part 1: Exercises

https://media.springernature.com/full/nature-static/assets/v1/image-assets/485176a-f1.2.jpg

In the previous exercise on the #REcology series, we learned how to define the impact of one explanatory variable to another response variable. In a real practice, particularly in experimental or observational design, explanatory variables are often found to be more than one. Thus, it needs a new determination to analyze the data-set and generate the correct conclusion. In this exercise, we will try to do an analysis of the co-variance (ANCOVA) method. Covariates here refers to the continuous explanatory variables. It involves a combination of regression and analysis of variance. ANCOVA requires a continuous response variable, at least one continuous explanatory, and at least one explanatory factor variable. Answers to these exercises are available here. If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page. Data-set on this exercise can be downloaded here.

Exercise 1
Load the data-set and required package, car.

Exercise 2
Do some plotting; what can be inferred? Create a basic verbal hypothesis.

Exercise 3
Create an interaction model based on the basic verbal hypothesis generated on Exercise 2.

Exercise 4
Check the interaction between the explanatory variables of the model created using ANOVA. Make sure that the interaction of those two variables is insignificant.

Learn more about ANOVA into r in the online course
Statistics with R – Intermediate Level
. In this course, you will learn how to:

  • Run parametric correlation and t-tests
  • Learn about two-way and three-way analysis of variances
  • And much more

Exercise 5
Check the statistic summary of the model. Pay attention to the intercept, slope, and the R square of the model.

Exercise 6
Create a linear regression plot and determine the equation based on the statistic summary.




Groups Comparison With ANOVA: Exercises (Part 2)

On this 2nd part of groups comparison exercise, we will focus on nested ANOVA application in R, particularly the application on ecology. This is the last part of groups comparison exercise.Previous exercise can be found here
Answers to the exercises are available here. If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.
To recall our previous exercise, below is the flowchart of group comparison process.

Download dataset required : here

Exercise 1
Load required package; car,ggplot2,dplyr,lattice, alr4 and check if the dataset is in balance using table and or replication function

Exercise 2
Determine the Null hypothesis and create some data visualizations including histogram, boxplot and coplot

Exercise 3
Chcek for normality and homogeneity of variance

Exercise 4
Check for interaction between explanatory variables using interaction plot and or xyplot

Learn more about ANOVA into r in the online course
Statistics with R – Intermediate Level
. this course you will learn how to:

  • Run parametric correlation and t-tests,
  • learn about twoway and threeway analysis of variance,
  • and much more


Exercise 5
Select the appropriate model ANOVA based on the interaction (nested model)

Exercise 6
Select nested model ANOVA based on Food effect

Exercise 7
Select nested model ANOVA based on Pen Effect

Exercise 8
Comparing those two nested models and generate some conclusions




Groups Comparison With ANOVA: Solutions (Part 2)

Below are the solutions to these exercises on Two way ANOVA.

if (!require(car)){install.packages(car, dep=T)}
## Warning: package 'car' was built under R version 3.3.2
library(car)
if (!require(ggplot2)){install.packages(ggplot2, dep=T)}
## Warning: package 'ggplot2' was built under R version 3.3.3
library(ggplot2)
if (!require(dplyr)){install.packages(dplyr, dep=T)}
## Warning: package 'dplyr' was built under R version 3.3.3
library(dplyr)
if (!require(lattice)){install.packages(lattice, dep=T)}
library(lattice)

###############
#             #
# Exercise 1  #
#             #
###############
# Load data
Chick <- read.csv(file.choose())
str(Chick)
## 'data.frame':	24 obs. of  3 variables:
##  $ Food    : Factor w/ 2 levels "Even","Patchy": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pen     : Factor w/ 4 levels "P1","P2","P3",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Cortisol: num  15.5 16.2 15 18.7 15 20.8 12 15 13.2 12.4 ...
Chick
##      Food Pen Cortisol
## 1    Even  P1     15.5
## 2    Even  P1     16.2
## 3    Even  P1     15.0
## 4    Even  P2     18.7
## 5    Even  P2     15.0
## 6    Even  P2     20.8
## 7    Even  P3     12.0
## 8    Even  P3     15.0
## 9    Even  P3     13.2
## 10   Even  P4     12.4
## 11   Even  P4     10.4
## 12   Even  P4      9.5
## 13 Patchy  P1     16.1
## 14 Patchy  P1      7.3
## 15 Patchy  P1     10.5
## 16 Patchy  P2      7.5
## 17 Patchy  P2      7.2
## 18 Patchy  P2     10.8
## 19 Patchy  P3      5.2
## 20 Patchy  P3      9.4
## 21 Patchy  P3      5.5
## 22 Patchy  P4      8.8
## 23 Patchy  P4     10.3
## 24 Patchy  P4      9.9
# see balance......
table(Chick$Food, Chick$Pen)
##         
##          P1 P2 P3 P4
##   Even    3  3  3  3
##   Patchy  3  3  3  3
###############
#             #
# Exercise 2  #
#             #
###############
#boxplot 
par(mfrow = c(2, 1))
boxplot(Cortisol ~ Food, data = Chick, xlab = "Food", ylab = "Cortisol")
boxplot(Cortisol ~ Pen, data = Chick, xlab = "Pen", ylab = "Cortisol")

#outlier presence 

#histogram
P1<- Chick[Chick$Pen == "P1", ]
P2<- Chick[Chick$Pen == "P2", ]
P3<- Chick[Chick$Pen == "P3", ]
P4<- Chick[Chick$Pen == "P4", ]

par(mfrow = c(2, 2))
hist(P1$Cortisol, xlim = c(0, 20), main = "", col = "Darkblue", xlab = "P1")
hist(P2$Cortisol, xlim = c(0, 20), main = "", col = "Darkblue", xlab = "P2")
hist(P3$Cortisol, xlim = c(0, 20), main = "", col = "Darkblue", xlab = "P3")
hist(P4$Cortisol, xlim = c(0, 20), main = "", col = "Darkblue", xlab = "P4")


#coplot
par(mfrow = c(1, 1))
coplot(Cortisol ~ Pen | Food, panel = panel.smooth, xlab = "Pen", ylab = "Cortisol", data = Chick)

###############
#             #
# Exercise 3  #
#             #
###############
#normality
qqnorm(Chick$Cortisol)
qqline(Chick$Cortisol)

#shapiro test
shapiro.test(Chick$Cortisol)
## 
## 	Shapiro-Wilk normality test
## 
## data:  Chick$Cortisol
## W = 0.9669, p-value = 0.5915
#OK

#levene test
leveneTest(Cortisol ~ Pen, data = Chick) 
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  3  3.4458 0.03624 *
##       20                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(Cortisol ~ Food, data = Chick) #not OK?
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1   0.104 0.7502
##       22
###############
#             #
# Exercise 4  #
#             #
###############
#Interaction between explanatory variables
interaction.plot(Chick$Food, Chick$Pen, Chick$Cortisol, col=c(2,3), xlab = "Pen", ylab = "Cortisol", trace.label = "Food")

#all factors must be significant 

#check explanatory variables interaction effect using two-way ANOVA
One <- aov(Cortisol ~ Food * Pen, data = Chick)

# Look at the results
summary(One)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Food         1 177.13  177.13  32.655 3.19e-05 ***
## Pen          3  63.49   21.16   3.902   0.0288 *  
## Food:Pen     3  59.50   19.83   3.656   0.0352 *  
## Residuals   16  86.79    5.42                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The results suggest an highly signficant food effect, a significant pen effect and a significant interaction effect.




###############
#             #
# Exercise 5  #
#             #
###############

#Using a nested analysis
Two <- aov(Cortisol ~ Food + Error(Food / Pen), data = Chick)

# Look at the results
summary(Two)
## 
## Error: Food
##      Df Sum Sq Mean Sq
## Food  1  177.1   177.1
## 
## Error: Food:Pen
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  6    123    20.5               
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16  86.79   5.424
# There are no p values and F ratios
# To find those you need to compare the nested models (between pen and food) and we'll do it manually


###############
#             #
# Exercise 6  #
#             #
###############
# STEP ONE - the food effect comparison
# 1.  You need the F statistic for the food effect so divide the mean sq on the top level 
#     (Food) by the mean sq for the level below (Food : Pen)

177.1/20.5 # = 8.639024
## [1] 8.639024
# 2.  Now compute the p value using the properties of the F distribution
#     Use the formula 1 - pf(F value, Df top level test, DF second level test)

1 - pf(8.63902, 1, 6) # gives  a food effect of (p=0.02597392 or 0.026 if we round it)
## [1] 0.02597392
###############
#             #
# Exercise 7  #
#             #
###############

# STEP TWO - the pen effect. Divide the food:pen residuals by the error within residuals
20.5/5.424 # this is 3.779499
## [1] 3.779499
# Compute the p value
1 - pf(3.779499, 6, 16) # p=0.01549547 or p=0.015 if we round it)
## [1] 0.01549547
# Do the same thing using code forcing R to generate the residual comparisons

# The food effect

FoodEffect <- aov(Cortisol ~ Food + Error(Food : Pen), data = Chick)
## Warning in aov(Cortisol ~ Food + Error(Food:Pen), data = Chick): Error()
## model is singular
# look at results and ignore the warning

summary(FoodEffect)
## 
## Error: Food:Pen
##           Df Sum Sq Mean Sq F value Pr(>F)  
## Food       1  177.1   177.1   8.641  0.026 *
## Residuals  6  123.0    20.5                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 16  86.79   5.424
# Food effect is significant at p=0.026


###############
#             #
# Exercise 8  #
#             #
###############
# now consider pen effect by comparing the un-nested and nested models using an ANOVA command

PenEffect <- anova(aov(Cortisol ~ Food, data = Chick), aov(Cortisol ~ Food : Pen, data = Chick), test = "F")

# Look at the results
PenEffect                               # Notice we don't need the summary command because we used an ANOVA call to compare the models
## Analysis of Variance Table
## 
## Model 1: Cortisol ~ Food
## Model 2: Cortisol ~ Food:Pen
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)  
## 1     22 209.772                             
## 2     16  86.787  6    122.98 3.7789 0.0155 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# So we can see that mean cortisol differed among by pens by treatment. Over and above this effect at this spatial scale
# mean cortisol levels were effected by levels of food predictability. 
# Cortisol levels were lower on average in pens with patchy food than even distribution 




Groups Comparison with ANOVA: Exercises (Part 1)

As we’re aware, the growth of data science has been increased recently, and successfully applied on research for decision making or creating baseline conditions. Statistical analysis, including data visualization, exploration, and modeling are three main important elements in data science.
In this exercise, we’ll learn how to analyze response and explanatory variables of data that consist of two or more groups. In this exercise, we will explore the application of various models/types of ANOVA. We will focus on two ways: (part 1) and nested ANOVA models (part 2). Repeated measures ANOVA exercises can be found here.
The data-sets will be based on ecology; however, the application may vary. Base knowledge is important to interpret the result and make the right decision under certain circumstances.
Answers to the exercises are available here. If you obtained a different (correct) answer than those listed on the solutions page, please feel free to post your answer as a comment on that page.
To make our exercise easy to follow, below is the flowchart of group comparison processes.

Exercise 1
Load the required package car,ggplot2,dplyr,lattice, alr4 and two different data-sets in the link below.
Dataset 1

Exercise 2
Determine the null hypothesis and check if the data-set is in balance condition using the table and or replication function.

Exercise 3
Produce descriptive statistic summaries and data visualization; histogram, boxplot and coplot. What can be inferred from the visualization?

Exercise 4
Check the normality and heterogeneity of variances using qqnorm, qqline,shapiro test and levene test. The rule of thumb for normality is that the qqnorm is following the qqline, accompanied by p>0.05 for shapiro test and levene test. We’ll discuss it further on the answer page.

Exercise 5
Check for interaction between explanatory variables using the interaction plot and or xyplot and select the appropriate model ANOVA based on the interaction.

Learn more about ANOVA into r in the online course
Statistics with R – Intermediate Level
. this course you will learn how to:

  • Run parametric correlation and t-tests,
  • learn about twoway and threeway analysis of variance,
  • and much more

Exercise 6
Plot the residual vs. fitted for model validation.

Exercise 7
Accept or reject the null hypothesis? What is the conclusion?




Groups Comparison with ANOVA: Solutions (Part 1)

Below are the solutions to these exercises on “Two Way ANOVA’s.”

if (!require(car)){install.packages(car, dep=T)}
## Warning: package 'car' was built under R version 3.3.2
library(car)
if (!require(ggplot2)){install.packages(ggplot2, dep=T)}
## Warning: package 'ggplot2' was built under R version 3.3.3
library(ggplot2)
if (!require(dplyr)){install.packages(dplyr, dep=T)}
## Warning: package 'dplyr' was built under R version 3.3.3
library(dplyr)
if (!require(lattice)){install.packages(lattice, dep=T)}
library(lattice)

###############
#             #
# Exercise 1  #
#             #
###############
#dataset named quinn
quinn<-read.csv(file.choose(),header=TRUE)

###############
#             #
# Exercise 2  #
#             #
###############
#Null Hypothesis
#there is no significance influence of density treatment on egg production 
#there is no significance influence of season on egg production

table(quinn$DENSITY, quinn$SEASON)
##     
##      spring summer
##   L1      3      3
##   L2      3      3
##   L3      3      3
##   L4      3      3
!is.list(replications(EGGS ~ Error(DENSITY) + SEASON, data = quinn))
## [1] TRUE
###############
#             #
# Exercise 3  #
#             #
###############
summary(quinn)
##  DENSITY    SEASON        EGGS       
##  L1:6    spring:12   Min.   :0.3560  
##  L2:6    summer:12   1st Qu.:0.9165  
##  L3:6                Median :1.4330  
##  L4:6                Mean   :1.4717  
##                      3rd Qu.:1.9227  
##                      Max.   :2.8750
#Initial visualisation
#HISTOGRAM
L1<- quinn[quinn$DENSITY == "L1", ]
L2<- quinn[quinn$DENSITY == "L2", ]
L3<- quinn[quinn$DENSITY == "L3", ]
L4<- quinn[quinn$DENSITY == "L4", ]

par(mfrow = c(2, 2))
hist(L1$EGGS, xlim = c(0, 3), main = "", col = "Darkblue", xlab = "L1")
hist(L2$EGGS, xlim = c(0, 3), main = "", col = "Darkblue", xlab = "L2")
hist(L3$EGGS, xlim = c(0, 3), main = "", col = "Darkblue", xlab = "L3")
hist(L4$EGGS, xlim = c(0, 3), main = "", col = "Darkblue", xlab = "L4")

#BOXPLOT
par(mfrow = c(2, 1))
boxplot(EGGS ~ SEASON, data = quinn, xlab = "Sample", ylab = "Number")
boxplot(EGGS~DENSITY,data=quinn,xlab="Sample",ylab="Number")

#COPLOT
par(mfrow = c(1, 1))
coplot(EGGS ~ DENSITY | SEASON, panel = panel.smooth, xlab = "Density", ylab = "Eggs", data = quinn)

#there is no outlier
#density and season seems to have significance influence to egg production

###############
#             #
# Exercise 4  #
#             #
###############
#normality
qqnorm(quinn$EGGS)
qqline(quinn$EGGS)

#shapiro test
shapiro.test(quinn$EGGS)
## 
## 	Shapiro-Wilk normality test
## 
## data:  quinn$EGGS
## W = 0.96449, p-value = 0.5351
#levene test
leveneTest(EGGS ~ DENSITY, data = quinn)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.4125 0.7458
##       20
###############
#             #
# Exercise 5  #
#             #
###############
#Interaction between explanatory variables
interaction.plot(quinn$DENSITY, quinn$SEASON, quinn$EGGS, col=c(2,3), xlab = "DENSITY", ylab = "EGGS", trace.label = "SEASON")

#check explanatory variables interaction effect
quinnaov <- aov(EGGS ~ SEASON * DENSITY, data = quinn)
summary(quinnaov)

##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SEASON          1  3.250   3.250  17.842 0.000645 ***
## DENSITY         3  5.284   1.761   9.669 0.000704 ***
## SEASON:DENSITY  3  0.165   0.055   0.301 0.823955    
## Residuals      16  2.915   0.182                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#no significant interaction effect

#Run the two-way ANOVA
quinnaov2<- aov(EGGS ~ SEASON+ DENSITY, data = quinn)
summary(quinnaov2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## SEASON       1  3.250   3.250   20.05 0.000258 ***
## DENSITY      3  5.284   1.761   10.87 0.000223 ***
## Residuals   19  3.079   0.162                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#both factors are found to be significant

###############
#             #
# Exercise 6  #
#             #
###############
par(mfrow = c(2, 2))
plot(quinnaov)

#interpret result briefly 

###############
#             #
# Exercise 7  #
#             #
###############

#Reject Null Hypothesis
#Thus season and density treatment has significant influence on egg production however there is no influence from season and density interaction