Below are the solutions to these exercises on descriptive statistics.
Learn more about descriptive statistics in the online courses Learn by Example: Statistics and Data Science in R (including 8 lectures specifically on descriptive statistics), and Introduction to R.
#################### # # # Exercise 1 # # # #################### mean(data[['mass']])
## [1] 31.99258
#OR sum(data[['mass']])/length(data[['mass']])
## [1] 31.99258
#################### # # # Exercise 2 # # # #################### median(data[['mass']])
## [1] 32
#OR (sort(data[['mass']])[length(data[['mass']])/2] + sort(data[['mass']])[length(data[['mass']])/2+1] )/2
## [1] 32
# This is a fairly long command, give yourself some time to make sure you understood everything. #################### # # # Exercise 3 # # # #################### getmode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } getmode(data[['mass']])
## [1] 32
#################### # # # Exercise 4 # # # #################### sd(data[["age"]])
## [1] 11.76023
#OR num <- 0 for (i in 1:length(data$age)){ num <- num + (data$age[i]-mean(data$age))^2 } sqrt(num/sum(length(data$age)))
## [1] 11.75257
#################### # # # Exercise 5 # # # #################### var(data$mass)
## [1] 62.15998
#OR num <- 0 for (i in 1:length(data$mass)){ num <- num + (data$mass[i] - mean(data$mass))^2 } num/length(data$mass)
## [1] 62.07905
#################### # # # Exercise 6 # # # #################### IQR(data[["age"]]) # interquartile range
## [1] 17
#OR (sort(data[['age']])[length(data[['age']])*.75] - sort(data[['age']])[length(data[['age']])*.25] )
## [1] 17
#################### # # # Exercise 7 # # # #################### mad(data[['age']])
## [1] 10.3782
#OR num <- 1:length(data$age) for (i in 1:length(data$age)){ num[i] <- abs(data$age[i]-median(data$age)) } 1.4826*median(num) # 1.4826, us the constant when it follows normal distribution.
## [1] 10.3782
#################### # # # Exercise 8 # # # #################### cov(data$age,data$mass)
## [1] 3.36033
#OR num <- 0 for (i in 1:length(data$age)){ num <- num + (data$age[i] - mean(data$age)) * (data$mass[i] - mean(data$mass)) } num/length(data$age)
## [1] 3.355954
#################### # # # Exercise 9 # # # #################### #Note: Pearson is used when the relation between the variables is linear, # while spearman make no such assumption cor(data$age,data$mass,method = "spearman")
## [1] 0.1311859
#Used to measure the degree of the relationship between linearly related variables cor(data$age,data$mass,method = "pearson")
## [1] 0.03624187
#OR # Spearman 1-6*(sum((rank(data$age)-rank(data$mass))^2)/(length(data$age)*(length(data$age))^2-1))
## [1] 0.1322695
# Pearson #1 num <- 0 den <- 0 x <- 0 y <- 0 for ( i in 1:length(data$age)){ num <- num + (data$age[i] - mean(data$age))*(data$mass[i] - mean(data$mass)) x <- x + (data$age[i] - mean(data$age))^2 y <- y + (data$mass[i] - mean(data$mass))^2 } den <- sqrt(x*y) num/den
## [1] 0.03624187
# Pearon #2 cov(data$age,data$mass)/(sd(data$age)*sd(data$mass))
## [1] 0.03624187
#################### # # # Exercise 10 # # # #################### summary(data)
## preg plas pres skin ## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.00 ## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.: 0.00 ## Median : 3.000 Median :117.0 Median : 72.00 Median :23.00 ## Mean : 3.845 Mean :120.9 Mean : 69.11 Mean :20.54 ## 3rd Qu.: 6.000 3rd Qu.:140.2 3rd Qu.: 80.00 3rd Qu.:32.00 ## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00 ## test mass pedi age ## Min. : 0.0 Min. : 0.00 Min. :0.0780 Min. :21.00 ## 1st Qu.: 0.0 1st Qu.:27.30 1st Qu.:0.2437 1st Qu.:24.00 ## Median : 30.5 Median :32.00 Median :0.3725 Median :29.00 ## Mean : 79.8 Mean :31.99 Mean :0.4719 Mean :33.24 ## 3rd Qu.:127.2 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00 ## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :81.00 ## class class.fac ## Min. :0.000 Negative:500 ## 1st Qu.:0.000 Positive:268 ## Median :0.000 ## Mean :0.349 ## 3rd Qu.:1.000 ## Max. :1.000
str(data)
## 'data.frame': 768 obs. of 10 variables: ## $ preg : int 6 1 8 1 0 5 3 10 2 8 ... ## $ plas : int 148 85 183 89 137 116 78 115 197 125 ... ## $ pres : int 72 66 64 66 40 74 50 0 70 96 ... ## $ skin : int 35 29 0 23 35 0 32 0 45 0 ... ## $ test : int 0 0 0 94 168 0 88 0 543 0 ... ## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ... ## $ pedi : num 0.627 0.351 0.672 0.167 2.288 ... ## $ age : int 50 31 32 21 33 30 26 29 53 54 ... ## $ class : int 1 0 1 0 1 0 1 0 1 1 ... ## $ class.fac: Factor w/ 2 levels "Negative","Positive": 2 1 2 1 2 1 2 1 2 2 ...
rcorr(as.matrix(data[-length(data)]),type = "spearman")
## preg plas pres skin test mass pedi age class ## preg 1.00 0.13 0.19 -0.09 -0.13 0.00 -0.04 0.61 0.20 ## plas 0.13 1.00 0.24 0.06 0.21 0.23 0.09 0.29 0.48 ## pres 0.19 0.24 1.00 0.13 -0.01 0.29 0.03 0.35 0.14 ## skin -0.09 0.06 0.13 1.00 0.54 0.44 0.18 -0.07 0.09 ## test -0.13 0.21 -0.01 0.54 1.00 0.19 0.22 -0.11 0.07 ## mass 0.00 0.23 0.29 0.44 0.19 1.00 0.14 0.13 0.31 ## pedi -0.04 0.09 0.03 0.18 0.22 0.14 1.00 0.04 0.18 ## age 0.61 0.29 0.35 -0.07 -0.11 0.13 0.04 1.00 0.31 ## class 0.20 0.48 0.14 0.09 0.07 0.31 0.18 0.31 1.00 ## ## n= 768 ## ## ## P ## preg plas pres skin test mass pedi age class ## preg 0.0003 0.0000 0.0182 0.0004 0.9971 0.2313 0.0000 0.0000 ## plas 0.0003 0.0000 0.0965 0.0000 0.0000 0.0114 0.0000 0.0000 ## pres 0.0000 0.0000 0.0004 0.8514 0.0000 0.4057 0.0000 0.0000 ## skin 0.0182 0.0965 0.0004 0.0000 0.0000 0.0000 0.0643 0.0129 ## test 0.0004 0.0000 0.8514 0.0000 0.0000 0.0000 0.0015 0.0656 ## mass 0.9971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0003 0.0000 ## pedi 0.2313 0.0114 0.4057 0.0000 0.0000 0.0000 0.2349 0.0000 ## age 0.0000 0.0000 0.0000 0.0643 0.0015 0.0003 0.2349 0.0000 ## class 0.0000 0.0000 0.0000 0.0129 0.0656 0.0000 0.0000 0.0000
rcorr(as.matrix(data[-length(data)]),type = "pearson")
## preg plas pres skin test mass pedi age class ## preg 1.00 0.13 0.14 -0.08 -0.07 0.02 -0.03 0.54 0.22 ## plas 0.13 1.00 0.15 0.06 0.33 0.22 0.14 0.26 0.47 ## pres 0.14 0.15 1.00 0.21 0.09 0.28 0.04 0.24 0.07 ## skin -0.08 0.06 0.21 1.00 0.44 0.39 0.18 -0.11 0.07 ## test -0.07 0.33 0.09 0.44 1.00 0.20 0.19 -0.04 0.13 ## mass 0.02 0.22 0.28 0.39 0.20 1.00 0.14 0.04 0.29 ## pedi -0.03 0.14 0.04 0.18 0.19 0.14 1.00 0.03 0.17 ## age 0.54 0.26 0.24 -0.11 -0.04 0.04 0.03 1.00 0.24 ## class 0.22 0.47 0.07 0.07 0.13 0.29 0.17 0.24 1.00 ## ## n= 768 ## ## ## P ## preg plas pres skin test mass pedi age class ## preg 0.0003 0.0000 0.0236 0.0416 0.6246 0.3535 0.0000 0.0000 ## plas 0.0003 0.0000 0.1124 0.0000 0.0000 0.0001 0.0000 0.0000 ## pres 0.0000 0.0000 0.0000 0.0137 0.0000 0.2534 0.0000 0.0715 ## skin 0.0236 0.1124 0.0000 0.0000 0.0000 0.0000 0.0016 0.0383 ## test 0.0416 0.0000 0.0137 0.0000 0.0000 0.0000 0.2432 0.0003 ## mass 0.6246 0.0000 0.0000 0.0000 0.0000 0.0000 0.3158 0.0000 ## pedi 0.3535 0.0001 0.2534 0.0000 0.0000 0.0000 0.3530 0.0000 ## age 0.0000 0.0000 0.0000 0.0016 0.2432 0.3158 0.3530 0.0000 ## class 0.0000 0.0000 0.0715 0.0383 0.0003 0.0000 0.0000 0.0000
Leave a Reply