Below are the solutions to these exercises on “Melt and Cast The Shape of Your Data-Frame.”
#################### # # # Exercise 1 # # # #################### suppressMessages(library(data.table)) df <- data.frame( id = 1:2, q1 = c("A", "B"), q2 = c("C", "A"), stringsAsFactors = FALSE ) df
## id q1 q2 ## 1 1 A C ## 2 2 B A
dfl <- melt(df, id.vars = "id", variable.name = "question") dfl
## id question value ## 1 1 q1 A ## 2 2 q1 B ## 3 1 q2 C ## 4 2 q2 A
#################### # # # Exercise 2 # # # #################### dcast(dfl, id ~ question, value.var = "value")
## id q1 q2 ## 1 1 A C ## 2 2 B A
#################### # # # Exercise 3 # # # #################### dcast(dfl, question ~ paste0("id_", id))
## question id_1 id_2 ## 1 q1 A B ## 2 q2 C A
#################### # # # Exercise 4 # # # #################### df2 <- data.frame( A = c("A1", "A12", "A31", "A4"), B = c("B4", "C7", "C3", "B9"), C = c("C3", "B16", "B3", "C4") ) setDT(df2) df2l <- melt(df2[, id := .I], id.vars = "id") dcast(df2l, id ~ substr(value, 1, 1))[, -c("id")]
## A B C ## 1: A1 B4 C3 ## 2: A12 B16 C7 ## 3: A31 B3 C3 ## 4: A4 B9 C4
# Inspired by this question on SO: # https://stackoverflow.com/a/50841771/4552295 #################### # # # Exercise 5 # # # #################### df3 <- data.frame( Join_ID = rep(1:3, each = 2), Type = rep(c("a", "b"), 3), v2 = c(8, 9, 7, 6, 5, 4)*10 ) dcast(df3, Join_ID ~ paste0(Type, "_v2"), value.var = "v2")
## Join_ID a_v2 b_v2 ## 1 1 80 90 ## 2 2 70 60 ## 3 3 50 40
# https://stackoverflow.com/q/50839606/4552295 #################### # # # Exercise 6 # # # #################### library(AER) data("Fertility") Fertility$mother_id <- 1:nrow(Fertility) ferl <- melt(Fertility, measure.vars = paste0("gender", 1:2), value.name = "gender", variable.name = "order") ferl$order <- gsub("[a-z]", "", ferl$order) head(ferl)
## morekids age afam hispanic other work mother_id order gender ## 1 no 27 no no no 0 1 1 male ## 2 no 30 no no no 30 2 1 female ## 3 no 27 no no no 0 3 1 male ## 4 no 35 yes no no 0 4 1 male ## 5 no 30 no no no 22 5 1 female ## 6 no 26 no no no 40 6 1 male
#################### # # # Exercise 7 # # # #################### d1 = data.frame( ID=c(1,1,1,2,2,4,1,2), medication=c(1,2,3,1,2,7,2,8) ) setDT(d1) d1[, .(medications = paste0(medication, collapse = ", ")), by = .(ID)]
## ID medications ## 1: 1 1, 2, 3, 2 ## 2: 2 1, 2, 8 ## 3: 4 7
#################### # # # Exercise 8 # # # #################### dfs <- data.frame( Name = c(rep("name1",3),rep("name2",2)), MedName = c("atenolol 25mg","aspirin 81mg","sildenafil 100mg", "atenolol 50mg","enalapril 20mg") ) setDT(dfs) dfs[, medn := paste0("medication_", 1:.N), by = Name] dfs
## Name MedName medn ## 1: name1 atenolol 25mg medication_1 ## 2: name1 aspirin 81mg medication_2 ## 3: name1 sildenafil 100mg medication_3 ## 4: name2 atenolol 50mg medication_1 ## 5: name2 enalapril 20mg medication_2
dcast(dfs, Name ~ medn, value.var = "MedName")
## Name medication_1 medication_2 medication_3 ## 1: name1 atenolol 25mg aspirin 81mg sildenafil 100mg ## 2: name2 atenolol 50mg enalapril 20mg <NA>
# or even cleaner: dcast(dfs, Name ~ rowid(Name, prefix = "medication"), value.var = "MedName")
## Name medication1 medication2 medication3 ## 1: name1 atenolol 25mg aspirin 81mg sildenafil 100mg ## 2: name2 atenolol 50mg enalapril 20mg <NA>
# Inspired by # https://stackoverflow.com/q/11322801/4552295 #################### # # # Exercise 9 # # # #################### df7 <- data.frame( v1 = c("name1, name2", "name3", "name4, name5"), v2 = c("1, 2", "3", "4, 5"), v3 = c(1, 2, 3) ) df7
## v1 v2 v3 ## 1 name1, name2 1, 2 1 ## 2 name3 3 2 ## 3 name4, name5 4, 5 3
setDT(df7) df7[, lapply(.SD, tstrsplit, ", "), by = v3][, .(v1,v2,v3)]
## v1 v2 v3 ## 1: name1 1 1 ## 2: name2 2 1 ## 3: name3 3 2 ## 4: name4 4 3 ## 5: name5 5 3
# This was a real problem on SO: # https://stackoverflow.com/q/29758504/4552295 #################### # # # Exercise 10 # # # #################### df <- data.frame( Method = c("10.fold.CV Lasso", "10.fold.CV.1SE", "BIC", "Modified.BIC"), n = c(30, 30, 50, 50, 50, 50, 100, 100), lambda = c(1, 3, 1, 2, 2, 0, 1, 2), df = c(21, 17, 29, 26, 25, 32, 34, 32) ) dcast(df, Method ~ n, fill = "")
## Method 30 50 100 ## 1 10.fold.CV Lasso 21 25 ## 2 10.fold.CV.1SE 17 32 ## 3 BIC 29 34 ## 4 Modified.BIC 26 32
df %>% melt(id.vars = c("Method", "n")) %>% dcast(Method ~ variable + n, fill = "")
## Error in df %>% melt(id.vars = c("Method", "n")) %>% dcast(Method ~ variable + : could not find function "%>%"
# Inspired by: # https://stackoverflow.com/q/50904997/4552295
Leave a Reply