Big Data Analytics in H20 Exercise Part – 2 Solutions

Below are the solutions to these exercises on RevoScaleR.

library(h2o)
h2o.cluster <- h2o.init()

###############
#             #
# Exercise 1  #
#             #
###############
aq <- as.h2o(airquality)
h2o.arrange(aq,"Temp")


###############
#             #
# Exercise 2  #
#             #
###############
ir <- as.h2o(iris)
h2o.arrange(ir,"Sepal.Length")
ir$Species <- h2o.ascharacter(ir$Species) 


h2o.arrange(ir,"Sepal.Length")

ir$Species <- as.factor(ir$Species)
###############
#             #
# Exercise 3  #
#             #
###############
loan.hframe <-  h2o.importFile(path = normalizePath("loan.csv"))
col.types <- c("numeric","categorical")
lapply(col.types,function(x)h2o.columns_by_type(loan.hframe,x)

###############
#             #
# Exercise 4  #
#             #
###############

 h2o.ddply(ir, "Species",function(df) { sum(df[,1], na.rm = TRUE)/nrow(df)})


###############
#             #
# Exercise 5  #
#             #
###############
h2o.topN(loan.hframe,column = "loan_amnt",4)


###############
#             #
# Exercise 6  #
#             #
###############

h2o.bottomN(loan.hframe,column = "loan_amnt",4)


###############
#             #
# Exercise 7  #
#             #
###############


h2o.nacnt(aq)
h2o.nacnt(loan.hframe)


###############
#             #
# Exercise 8  #
#             #
###############
h2o.fillna(aq,"forward",axis = 2,maxlen = 10000L)

###############
#             #
# Exercise 9  #
#             #
###############

aq <- as.h2o(airquality)
h2o.impute(aq,column = 0,method = "mean")
###############
#             #
# Exercise 10  #
#             #
###############
aq$lagTemp <- h2o.difflag1(aq$Temp)