Below are the solutions to these exercises on ” Harvesting Data From the Web With Rvest.”
# load package library(rvest) #################### # # # Exercise 1 # # # #################### webpage <- read_html(x = "https://money.cnn.com/data/us_markets/") #################### # # # Exercise 2 # # # #################### html_session("https://money.cnn.com/data/us_markets/")
## <session> https://money.cnn.com/data/us_markets/ ## Status: 200 ## Type: text/html; charset=utf-8 ## Size: 82914
#################### # # # Exercise 3 # # # #################### html_text(html_nodes(webpage, "a[href*='sectors']"))
## [1] "Communications" "Consumer Durables" ## [3] "Consumer Non-Durables" "Commercial Services" ## [5] "Electronic Technology" "Energy Minerals" ## [7] "Finance" "Health Services" ## [9] "Retail Trade" "Technology Services" ## [11] "Transportation" "Utilities"
#################### # # # Exercise 4 # # # #################### html_text(html_nodes(webpage, "div[id*='sector'] [class$='ChangePct']"))
## [1] "-1.42%" "+3.26%" "-2.33%" "+1.58%" "+5.19%" "-3.52%" "+4.13%" ## [8] "+6.23%" "+5.03%" "+6.17%" "-0.37%" "+2.12%"
#################### # # # Exercise 5 # # # #################### html_table(html_nodes(webpage, "div table")[[1]])
## Gainers & Losers Price Change % Change ## 1 HASHasbro Inc 106.15 12.22 +13.01% ## 2 MATMattel Inc 16.57 0.60 +3.76% ## 3 UHSUniversal Health S... 115.68 3.01 +2.67% ## 4 BHFBrighthouse Financ... 42.05 1.00 +2.44% ## 5 NTRSNorthern Trust Cor... 110.34 2.61 +2.42% ## 6 HALHalliburton Co 41.57 -3.63 -8.03% ## 7 ITWIllinois Tool Work... 136.40 -10.46 -7.12% ## 8 SWKStanley Black & De... 138.81 -5.42 -3.76% ## 9 XYLXylem Inc 67.24 -2.33 -3.35% ## 10 MUMicron Technology ... 53.34 -1.68 -3.05%
#################### # # # Exercise 6 # # # #################### paste0("https://money.cnn.com", html_attr(html_nodes(webpage, "td .wsod_symbol"), "href"))
## [1] "https://money.cnn.com/quote/quote.html?symb=HAS" ## [2] "https://money.cnn.com/quote/quote.html?symb=MAT" ## [3] "https://money.cnn.com/quote/quote.html?symb=UHS" ## [4] "https://money.cnn.com/quote/quote.html?symb=BHF" ## [5] "https://money.cnn.com/quote/quote.html?symb=NTRS" ## [6] "https://money.cnn.com/quote/quote.html?symb=HAL" ## [7] "https://money.cnn.com/quote/quote.html?symb=ITW" ## [8] "https://money.cnn.com/quote/quote.html?symb=SWK" ## [9] "https://money.cnn.com/quote/quote.html?symb=XYL" ## [10] "https://money.cnn.com/quote/quote.html?symb=MU"
#################### # # # Exercise 7 # # # #################### html_text(html_nodes(webpage, ".HeadlineList a"))
## [1] "Jamie Dimon on the trade war, infrastructure 'emergency' and Trump" ## [2] "Papa John's is worried that Papa John will try to take over the company" ## [3] "Watch: What keeps Jamie Dimon up at night" ## [4] "This is how Jamie Dimon would fix income inequality" ## [5] "Sergio Marchionne, auto legend, steps down as CEO of Fiat Chrysler" ## [6] "Overstock is making a push into real estate" ## [7] "The Gates Foundation is now one of Berkshire's largest shareholders" ## [8] "AI is hurting people of color and the poor. Experts want to fix that" ## [9] "Tariffs and Brexit could kill this steel company" ## [10] "Global immigration backlash could hurt India's top tech companies" ## [11] "Ryanair's profit has dropped 20% and more trouble is coming" ## [12] "New York Daily News to slash 50% of its newsroom"
#################### # # # Exercise 8 # # # #################### webpage %>% html_nodes(".wsod_disclaimer > span") %>% html_attrs() %>% .[[1]]
## stream ## "time_144221|196723|276301|417335648|218057|143954|170715|263373|38911044|207106" ## streamjstime ## "1532359505000" ## streamdateformat ## "g%3Ai%3Asa%20x"
#################### # # # Exercise 9 # # # #################### webpage %>% html_nodes(".scale div") %>% html_attr("class")
## [1] "bars pct100" "bars pct60" "bars pct60" "bars pct60" "bars pct50" ## [6] "bars pct50" "bars pct50" "bars pct40" "bars pct40" "bars pct40"
#################### # # # Exercise 10 # # # #################### webpage %>% html_nodes("img[src$='svg']") %>% html_attr("src")
## [1] "//i.cdn.turner.com/money/.element/cnnm-3.0/img/logo/cnnmoney_blue.svg" ## [2] "//i.cdn.turner.com/money/.element/cnnm-3.0/img/logo/cnnmoney_blue.svg"
Leave a Reply