```
Solutions to exercises found here
####################
# #
# Exercise 1 #
# #
####################
#Read in the moth experiment data
setwd("H:/datasets")
moth.experiment = read.csv("moth trap experiment.csv", header = TRUE)
#Inspect structure of the data
head(moth.experiment)
```

```
## number.of.moths location type.of.lure
## 1 32 Top Chemical
## 2 29 Top Chemical
## 3 16 Top Chemical
## 4 18 Top Chemical
## 5 20 Top Chemical
## 6 37 Middle Chemical
```

```
#check if our design is balanced
table(moth.experiment$location,moth.experiment$type.of.lure)
```

```
##
## Chemical Scent Sugar
## Ground 5 5 5
## Lower 5 5 5
## Middle 5 5 5
## Top 5 5 5
```

```
#our design is balanced because we have equal observations in each cell
####################
# #
# Exercise 2 #
# #
####################
#get summary statistics for location group
library(psych)
```

`## Warning: package 'psych' was built under R version 3.3.1`

`describeBy(moth.experiment$number.of.moths,moth.experiment$location)`

```
## group: Ground
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 15 19.07 5.09 18 18.85 5.93 12 29 17 0.52 -1.06 1.31
## --------------------------------------------------------
## group: Lower
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 15 33.33 7.5 34 33.77 7.41 17 44 27 -0.6 -0.5 1.94
## --------------------------------------------------------
## group: Middle
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 15 31 9.79 36 31.46 11.86 12 44 32 -0.39 -1.29
## se
## X1 2.53
## --------------------------------------------------------
## group: Top
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 15 23.33 7.41 21 23.23 8.9 13 35 22 0.24 -1.63 1.91
```

```
####################
# #
# Exercise 3 #
# #
####################
#get summary statistics for type of lure group
describeBy(moth.experiment$number.of.moths,moth.experiment$type.of.lure)
```

```
## group: Chemical
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 20 27.5 9.06 28.5 27.44 12.6 14 41 27 -0.01 -1.61 2.03
## --------------------------------------------------------
## group: Scent
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 20 24.75 10.29 22 24.06 11.12 12 44 32 0.43 -1.2
## se
## X1 2.3
## --------------------------------------------------------
## group: Sugar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 20 27.8 9.06 28 27.44 11.12 15 44 29 0.14 -1.35 2.03
```

```
####################
# #
# Exercise 4 #
# #
####################
#Create boxplots using the two factor variables
library(ggplot2)
```

`## Warning: package 'ggplot2' was built under R version 3.3.1`

```
##
## Attaching package: 'ggplot2'
```

```
## The following objects are masked from 'package:psych':
##
## %+%, alpha
```

`ggplot(moth.experiment, aes(x=location,y=number.of.moths, fill = type.of.lure)) + geom_boxplot()`

```
####################
# #
# Exercise 5 #
# #
####################
#Check for normality of observations
shapiro.test(moth.experiment$number.of.moths)
```

```
##
## Shapiro-Wilk normality test
##
## data: moth.experiment$number.of.moths
## W = 0.94533, p-value = 0.009448
```

```
#shapiro test shows our data is not normally distributed
####################
# #
# Exercise 6 #
# #
####################
#Check for equality of variance across the two groups so we will log transform our data
library(car)
```

`## Warning: package 'car' was built under R version 3.3.1`

```
##
## Attaching package: 'car'
```

```
## The following object is masked from 'package:psych':
##
## logit
```

`leveneTest(moth.experiment$number.of.moths~moth.experiment$location*moth.experiment$type.of.lure)`

```
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.6377 0.7875
## 48
```

```
#the levene test shows our data is normally distributed
####################
# #
# Exercise 7 #
# #
####################
#take a log transformation of number of moths and check normality and equal variance
no.of.moth.log = log(moth.experiment$number.of.moths)
moth.experiment$no.of.moth.log = no.of.moth.log
shapiro.test(moth.experiment$no.of.moth.log)
```

```
##
## Shapiro-Wilk normality test
##
## data: moth.experiment$no.of.moth.log
## W = 0.94746, p-value = 0.01185
```

```
#the log transformation is not very effective in normalizing the data
#the appropriate transformation is left as an exercise to the reader
#this will help the reader appreciate challenges of analyzing data
leveneTest(moth.experiment$no.of.moth.log~moth.experiment$location*moth.experiment$type.of.lure)
```

```
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 11 0.5978 0.8211
## 48
```

```
####################
# #
# Exercise 8 #
# #
####################
#perform a power analysis
#our design has 2 factors with 3 and 4 levels, we have 5 observations in each group
# our df for the mean squared term is 4*3(5-1)=48
#We choose a medium effect size of 0.25
library(pwr)
```

`## Warning: package 'pwr' was built under R version 3.3.1`

`pwr.f2.test(u=2,v=48,f2=(0.25*0.25))`

```
##
## Multiple regression power calculation
##
## u = 2
## v = 48
## f2 = 0.0625
## sig.level = 0.05
## power = 0.3210203
```

```
####################
# #
# Exercise 9 #
# #
####################
#perform anova
moth.anova = aov(moth.experiment$no.of.moth.log~moth.experiment$location*moth.experiment$type.of.lure)
#location has an effect on number of moths
#type of lure does not have an effect on number of moths
#the combined effect of location and type of lure does not have an effect on number of moths
#when you have an unbalanced design R does not issue any warnings
#to correctly analyze an unbalanced design we can use the Anova function in car library
#we pass results of aov function and specify we would like to use Type III sums of squares
library(car)
Anova(moth.anova,type = "III")
```

```
## Anova Table (Type III tests)
##
## Response: moth.experiment$no.of.moth.log
## Sum Sq Df F value
## (Intercept) 43.018 1 427.6842
## moth.experiment$location 1.302 3 4.3144
## moth.experiment$type.of.lure 0.102 2 0.5054
## moth.experiment$location:moth.experiment$type.of.lure 0.196 6 0.3245
## Residuals 4.828 48
## Pr(>F)
## (Intercept) < 2.2e-16 ***
## moth.experiment$location 0.008988 **
## moth.experiment$type.of.lure 0.606429
## moth.experiment$location:moth.experiment$type.of.lure 0.920916
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
```

```
####################
# #
# Exercise 10 #
# #
####################
#check for homogeneity of residuals
plot(moth.anova,1)
```

```
#homogeneity assumption is not violated but points 47 and 32 are marked as outliers.
#Remember our data still had some non normality
```

**What's next:**

- Explore all our (>1000) R exercises
- Find an R course using our R Course Finder directory
- Subscribe to receive weekly updates and bonus sets by email
- Share with your friends and colleagues using the buttons below

yankee says

What sense has a test of normality for the raw data? Assumptions of variance analysis says that error should be normaly distributed not data!