Below are the solutions to these exercises on generalized linear models.

if (!'titanic' %in% installed.packages()) install.packages('titanic') library(titanic)

## Warning: package 'titanic' was built under R version 3.3.3

DATA <- titanic_train[,-c(1,4,9,11)] #################### # # # Exercise 1 # # # #################### (lm_reg <- lm(formula = Survived ~ Age + Fare, data = DATA))

## ## Call: ## lm(formula = Survived ~ Age + Fare, data = DATA) ## ## Coefficients: ## (Intercept) Age Fare ## 0.420973 -0.003517 0.002583

(glm_model <- glm(formula = Survived ~ Age + Fare, data = DATA, family = gaussian))

## ## Call: glm(formula = Survived ~ Age + Fare, family = gaussian, data = DATA) ## ## Coefficients: ## (Intercept) Age Fare ## 0.420973 -0.003517 0.002583 ## ## Degrees of Freedom: 713 Total (i.e. Null); 711 Residual ## (177 observations deleted due to missingness) ## Null Deviance: 172.2 ## Residual Deviance: 158 AIC: 957.2

#################### # # # Exercise 2 # # # #################### summary(lm_reg)

## ## Call: ## lm(formula = Survived ~ Age + Fare, data = DATA) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.0336 -0.3675 -0.3110 0.5563 0.7829 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 0.4209734 0.0409896 10.270 < 2e-16 *** ## Age -0.0035166 0.0012209 -2.880 0.00409 ** ## Fare 0.0025834 0.0003351 7.708 4.3e-14 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.4714 on 711 degrees of freedom ## (177 observations deleted due to missingness) ## Multiple R-squared: 0.08263, Adjusted R-squared: 0.08005 ## F-statistic: 32.02 on 2 and 711 DF, p-value: 4.837e-14

summary(glm_model)

## ## Call: ## glm(formula = Survived ~ Age + Fare, family = gaussian, data = DATA) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.0336 -0.3675 -0.3110 0.5563 0.7829 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 0.4209734 0.0409896 10.270 < 2e-16 *** ## Age -0.0035166 0.0012209 -2.880 0.00409 ** ## Fare 0.0025834 0.0003351 7.708 4.3e-14 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for gaussian family taken to be 0.2221983) ## ## Null deviance: 172.21 on 713 degrees of freedom ## Residual deviance: 157.98 on 711 degrees of freedom ## (177 observations deleted due to missingness) ## AIC: 957.25 ## ## Number of Fisher Scoring iterations: 2

#################### # # # Exercise 3 # # # #################### (bin_model <- glm(formula = Survived ~ Age + Fare, data = DATA, family = binomial))

## ## Call: glm(formula = Survived ~ Age + Fare, family = binomial, data = DATA) ## ## Coefficients: ## (Intercept) Age Fare ## -0.41706 -0.01758 0.01726 ## ## Degrees of Freedom: 713 Total (i.e. Null); 711 Residual ## (177 observations deleted due to missingness) ## Null Deviance: 964.5 ## Residual Deviance: 891.3 AIC: 897.3

#################### # # # Exercise 4 # # # #################### binomial()

## ## Family: binomial ## Link function: logit

(bin_probit_model <- glm(formula = Survived ~ Age + Fare, data = DATA, family = binomial(link = probit)))

## ## Call: glm(formula = Survived ~ Age + Fare, family = binomial(link = probit), ## data = DATA) ## ## Coefficients: ## (Intercept) Age Fare ## -0.24598 -0.01028 0.00933 ## ## Degrees of Freedom: 713 Total (i.e. Null); 711 Residual ## (177 observations deleted due to missingness) ## Null Deviance: 964.5 ## Residual Deviance: 894.4 AIC: 900.4

#################### # # # Exercise 5 # # # #################### (bin_model_no_int <- glm(formula = Survived ~ 0 + Age + Fare, data = DATA, family = binomial(link = logit)))

## ## Call: glm(formula = Survived ~ 0 + Age + Fare, family = binomial(link = logit), ## data = DATA) ## ## Coefficients: ## Age Fare ## -0.02805 0.01594 ## ## Degrees of Freedom: 714 Total (i.e. Null); 712 Residual ## (177 observations deleted due to missingness) ## Null Deviance: 989.8 ## Residual Deviance: 896.4 AIC: 900.4

#################### # # # Exercise 6 # # # #################### (bin_model <- glm(formula = Survived ~ Age + Fare, data = DATA, family = binomial(link = logit), na.action = 'na.omit'))

## ## Call: glm(formula = Survived ~ Age + Fare, family = binomial(link = logit), ## data = DATA, na.action = "na.omit") ## ## Coefficients: ## (Intercept) Age Fare ## -0.41706 -0.01758 0.01726 ## ## Degrees of Freedom: 713 Total (i.e. Null); 711 Residual ## (177 observations deleted due to missingness) ## Null Deviance: 964.5 ## Residual Deviance: 891.3 AIC: 897.3

Impute <- median(DATA$Age, na.rm = TRUE) DATA$Age[is.na(DATA$Age)] <- Impute (bin_model_Impute <- glm(formula = Survived ~ Age + Fare, data = DATA, family = binomial(link = logit), na.action = 'na.fail'))

## ## Call: glm(formula = Survived ~ Age + Fare, family = binomial(link = logit), ## data = DATA, na.action = "na.fail") ## ## Coefficients: ## (Intercept) Age Fare ## -0.47997 -0.01682 0.01620 ## ## Degrees of Freedom: 890 Total (i.e. Null); 888 Residual ## Null Deviance: 1187 ## Residual Deviance: 1109 AIC: 1115

#################### # # # Exercise 7 # # # #################### (bin_model<- glm(formula = Survived ~ Age + poly(Fare,2) , data = DATA, family = binomial(link = logit)))

## ## Call: glm(formula = Survived ~ Age + poly(Fare, 2), family = binomial(link = logit), ## data = DATA) ## ## Coefficients: ## (Intercept) Age poly(Fare, 2)1 poly(Fare, 2)2 ## 0.05118 -0.01812 18.41909 -10.24135 ## ## Degrees of Freedom: 890 Total (i.e. Null); 887 Residual ## Null Deviance: 1187 ## Residual Deviance: 1097 AIC: 1105

summary(bin_model)

## ## Call: ## glm(formula = Survived ~ Age + poly(Fare, 2), family = binomial(link = logit), ## data = DATA) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.3643 -0.8806 -0.8030 1.2209 1.8474 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.051181 0.180492 0.284 0.77674 ## Age -0.018117 0.005714 -3.171 0.00152 ** ## poly(Fare, 2)1 18.419094 2.554456 7.211 5.57e-13 *** ## poly(Fare, 2)2 -10.241350 2.229437 -4.594 4.35e-06 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1186.7 on 890 degrees of freedom ## Residual deviance: 1096.6 on 887 degrees of freedom ## AIC: 1104.6 ## ## Number of Fisher Scoring iterations: 4

#################### # # # Exercise 8 # # # #################### (bin_model<- glm(formula = Survived ~ Age + poly(Fare,2) + as.factor(Sex) , data = DATA, family = binomial(link = logit)))

## ## Call: glm(formula = Survived ~ Age + poly(Fare, 2) + as.factor(Sex), ## family = binomial(link = logit), data = DATA) ## ## Coefficients: ## (Intercept) Age poly(Fare, 2)1 ## 1.28411 -0.01077 15.37627 ## poly(Fare, 2)2 as.factor(Sex)male ## -6.59275 -2.37887 ## ## Degrees of Freedom: 890 Total (i.e. Null); 886 Residual ## Null Deviance: 1187 ## Residual Deviance: 877 AIC: 887

summary(bin_model)

## ## Call: ## glm(formula = Survived ~ Age + poly(Fare, 2) + as.factor(Sex), ## family = binomial(link = logit), data = DATA) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.4390 -0.6053 -0.5619 0.7994 2.0913 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 1.284114 0.225901 5.684 1.31e-08 *** ## Age -0.010767 0.006558 -1.642 0.10066 ## poly(Fare, 2)1 15.376271 2.770311 5.550 2.85e-08 *** ## poly(Fare, 2)2 -6.592748 2.521885 -2.614 0.00894 ** ## as.factor(Sex)male -2.378874 0.171822 -13.845 < 2e-16 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1186.66 on 890 degrees of freedom ## Residual deviance: 877.04 on 886 degrees of freedom ## AIC: 887.04 ## ## Number of Fisher Scoring iterations: 4

#################### # # # Exercise 9 # # # #################### DATA$Pred.default <- predict(bin_model) #################### # # # Exercise 10 # # # #################### DATA$Prob <- predict(bin_model, type = 'response') DATA$Pred <- ifelse(DATA$Prob<.5, 0,1) sum(DATA$Pred==DATA$Survived) / nrow(DATA)

## [1] 0.7822671

me says

Your last few answers very buggy especially exercise 9 and exercise 10.