Below are the solutions to these exercises on error metrics.
#################### # # # Exercise 1 # # # #################### library(datasets) data<-attitude head(data)
## rating complaints privileges learning raises critical advance ## 1 43 51 30 39 61 92 45 ## 2 63 64 51 54 63 73 47 ## 3 71 70 68 69 76 86 48 ## 4 61 63 45 47 54 84 35 ## 5 81 78 56 66 71 83 47 ## 6 43 55 49 44 54 49 34
model.1<-lm(rating~.,data=data) summary(model.1)
## ## Call: ## lm(formula = rating ~ ., data = data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -10.9418 -4.3555 0.3158 5.5425 11.5990 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 10.78708 11.58926 0.931 0.361634 ## complaints 0.61319 0.16098 3.809 0.000903 *** ## privileges -0.07305 0.13572 -0.538 0.595594 ## learning 0.32033 0.16852 1.901 0.069925 . ## raises 0.08173 0.22148 0.369 0.715480 ## critical 0.03838 0.14700 0.261 0.796334 ## advance -0.21706 0.17821 -1.218 0.235577 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 7.068 on 23 degrees of freedom ## Multiple R-squared: 0.7326, Adjusted R-squared: 0.6628 ## F-statistic: 10.5 on 6 and 23 DF, p-value: 1.24e-05
model.2<-lm(rating~complaints+privileges+learning+advance,data=data) summary(model.2)
## ## Call: ## lm(formula = rating ~ complaints + privileges + learning + advance, ## data = data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -11.8976 -5.5171 0.7654 5.8086 11.5022 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 14.30347 7.73957 1.848 0.0765 . ## complaints 0.65338 0.13051 5.006 3.67e-05 *** ## privileges -0.07682 0.13059 -0.588 0.5616 ## learning 0.32395 0.15741 2.058 0.0502 . ## advance -0.17151 0.14904 -1.151 0.2607 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 6.821 on 25 degrees of freedom ## Multiple R-squared: 0.7293, Adjusted R-squared: 0.686 ## F-statistic: 16.84 on 4 and 25 DF, p-value: 8.134e-07
model.3<-lm(rating~complaints+learning+advance,data=data) summary(model.3)
## ## Call: ## lm(formula = rating ~ complaints + learning + advance, data = data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -12.217 -5.377 0.967 6.078 11.540 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 13.5777 7.5439 1.800 0.0835 . ## complaints 0.6227 0.1181 5.271 1.65e-05 *** ## learning 0.3124 0.1542 2.026 0.0532 . ## advance -0.1870 0.1449 -1.291 0.2082 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 6.734 on 26 degrees of freedom ## Multiple R-squared: 0.7256, Adjusted R-squared: 0.6939 ## F-statistic: 22.92 on 3 and 26 DF, p-value: 1.807e-07
#################### # # # Exercise 2 # # # #################### rmse<-function(y,y_pred){ RMSE <- sqrt(mean((y-y_pred)^2)) return (RMSE) } rmse(data$rating, model.1$fitted.values)
## [1] 6.1887
rmse(data$rating, model.2$fitted.values)
## [1] 6.226319
rmse(data$rating, model.3$fitted.values)
## [1] 6.269261
#################### # # # Exercise 3 # # # #################### mae<-function(y,y_pred){ MAE <- sum(abs(y-y_pred))/length(y) return (MAE) } mae(data$rating, model.1$fitted.values)
## [1] 5.178977
mae(data$rating, model.2$fitted.values)
## [1] 5.273396
mae(data$rating, model.3$fitted.values)
## [1] 5.316718
#################### # # # Exercise 4 # # # #################### rmlse <- function(y,y_pred) { RMLSE<-sqrt(1/length(y)*sum((log(y_pred +1)-log(y +1))^2)) return(RMLSE) } rmlse(data$rating, model.1$fitted.values)
## [1] 0.1049955
rmlse(data$rating, model.2$fitted.values)
## [1] 0.1056288
rmlse(data$rating, model.3$fitted.values)
## [1] 0.1059711
#################### # # # Exercise 5 # # # #################### cluster.data<-iris set.seed(42) k.means.results.1<-kmeans(cluster.data[,1:4],centers=3,iter.max = 50, algorithm = "Lloyd") k.means.results.1$cluster<-factor(k.means.results.1$cluster) levels(k.means.results.1$cluster)<-list("setosa"=3,"versicolor"=1,"virginica"=2) #################### # # # Exercise 6 # # # #################### table(cluster.data[,5],k.means.results.1$cluster)
## ## setosa versicolor virginica ## setosa 50 0 0 ## versicolor 0 48 2 ## virginica 0 14 36
#################### # # # Exercise 7 # # # #################### accuracy <-function (real_labels, predict_labels) { result.table <- table(real_labels, predict_labels) return(sum(diag(result.table))/sum(result.table)) } recall <- function (real_labels, predict_labels) { result.table <- table(real_labels, predict_labels) rowsums = apply(result.table, 1, sum) return(diag(result.table)/rowsums) } precision <- function (real_labels, predict_labels) { result.table <- table(real_labels, predict_labels) colsums = apply(result.table, 2, sum) return(diag(result.table)/colsums) } accuracy(cluster.data[,5],k.means.results.1$cluster)
## [1] 0.8933333
recall(cluster.data[,5],k.means.results.1$cluster)
## setosa versicolor virginica ## 1.00 0.96 0.72
precision(cluster.data[,5],k.means.results.1$cluster)
## setosa versicolor virginica ## 1.0000000 0.7741935 0.9473684
#################### # # # Exercise 8 # # # #################### f.mesure<-function(real_labels, predict_labels,b) { f<-((b^2+1)*precision(real_labels, predict_labels)*recall(real_labels, predict_labels))/(b^2*precision(real_labels, predict_labels)+recall(real_labels, predict_labels)) return(f) } f.mesure(cluster.data[,5],k.means.results.1$cluster,2)
## setosa versicolor virginica ## 1.0000000 0.9160305 0.7563025
f.mesure(cluster.data[,5],k.means.results.1$cluster,0.5)
## setosa versicolor virginica ## 1.0000000 0.8053691 0.8910891
#################### # # # Exercise 9 # # # #################### purity <- function (real_labels, predict_labels) { result.table = table(real_labels, predict_labels) return(sum(max(result.table[1,]),max(result.table[2,]),max(result.table[3,]) )/length(real_labels)) } purity(cluster.data[,5],k.means.results.1$cluster)
## [1] 0.8933333
#################### # # # Exercise 10 # # # #################### library(clValid) set.seed(42) k.means.results.1<-kmeans(cluster.data[,1:4],centers=3,iter.max = 50, algorithm = "Lloyd") dunn(distance =dist(iris,method="euclidean"),clusters=k.means.results.1$cluster)
## [1] 0.09880739
Leave a Reply