library(rvest)#########################################################.#########################################################.# For information about CSS selectors, see the explanations on the # various levels of the CSS selector Game found here:## https://flukeout.github.io/## Here are some other websites for learning and practicing# CSS Selectors## https://css-speedrun.netlify.app/ # # answers are here: https://github.com/Vincenius/css-speedrun## https://css-selector.netlify.app/# # on this page write the selector that correctly selects the one# # line that contains the words "Select me"##########################################################.#########################################################################.# The following code is how we typically worked with local HTML files# in class:### This is the folder that contains the HTML file# folder = "/Users/yrose/Dropbox (Personal)/website/yu/ids2460-dataMgmtForAnal/83spr23-ids2460-dataManagement/000600-WebScraping"# setwd(folder)## This is the html file name# fullPage = read_html("000420-practiceWithWebScraping-v001.html")## This command reads the HTML file into R# fullPage = read_html("000420-practiceWithWebScraping-v001.html")##########################################################################.## Instead of reading from an HTML file, I put the text of the HTML directly# into the R file by using an R r"(raw string)". You can enclose anything# between r"( and )" without using backslashes on the contents. # This allows us to very simply create a single R variable that contains# the entire text of the HTML file directly in this R file. That makes # understanding the answers to the questions below easier since you can# see the HTML directly in this file.##########################################################################.textOfHtmlFile = r"(<html> <head> <title>this is the title</title> </head> <body> <h1>one</h1> <div class="a"> <h2>three</h2> <p class="c">four</p> <p class="d">five</p> </div> <div class="b"> <h2>six</h2> <p class="d">seven</p> <p class="c">eight</p> </div> <h3>nine</h3> <div class="c"> <p>ten</p> <p>eleven</p> <p>twelve</p> <p>thirteen</p> <p>fourteen</p> <p>fifteen</p> <p>sixteen</p> <p>seventeen</p> </div> <p id="x">eighteen</p> </body></html>)"fullPage =read_html(textOfHtmlFile)#########################################################################.## For your convenience you can use this function to view the HTML# directly in the RStudio "Viewer" tab # (by default Viewer is in the lower right corner of the RStudio window)##########################################################################.viewHtmlInRStudio =function(htmlText){ filename =tempfile() # create a temporary filename filename =paste0(filename, ".html") # add an html extension to the filenamecat("Html is in ", filename , "\n", sep="") # show where the file is (you can ignore this)writeLines(htmlText, filename) # write the html text to the file rstudioapi::viewer(filename) # display the html page in the RStudio viewer# Wait 2 seconds before removing the file to make sure it has already# been displayed in the RStudio "viewer" tabSys.sleep(2)unlink(filename) # remove the temporary file}#viewHtmlInRStudio (textOfHtmlFile)########################################################################.# Question 1a.## What CSS selector targets just the following words: # eighteen########################################################################.# This is ONE POSSIBLE answer# Use the #id selector# See "level 3" and the explanation in the CSS selector game here:# https://flukeout.github.io/css ="#x"# test the answer to make sure it worksfullPage %>%html_nodes(css) %>%html_text2()
[1] "eighteen"
########################################################################.# Question 1b.## What CSS selector targets just the following words: # three# six# ten########################################################################.# This is ONE POSSIBLE answer# Use the following selector rules:## Descendant Selector (i.e. Selector1 Selctor2) # see level 4 of CSS Game# :first-child # see level 15 of CSS Gamecss ="div :first-child"# test the answer to make sure it worksfullPage %>%html_nodes(css) %>%html_text2()
[1] "three" "six" "ten"
########################################################################.# Question 1c.## What CSS selector targets just the following words: # four# seven########################################################################.# This is ONE POSSIBLE answer# Use the following selector rules:## Adjacent Sibling Selector # see level 12 of CSS Gamecss ="h2 + p"# test the answer to make sure it worksfullPage %>%html_nodes(css) %>%html_text2()
[1] "four" "seven"
########################################################################.# Question 1d.## What CSS selector targets just the following words:# thirteen# fifteen# seventeen########################################################################.# This is ONE POSSIBLE answer# Use the following selector rules:## nth-of-type # see level 22 of CSS Gamecss ="p:nth-of-type(2n+4)"# test the answer to make sure it worksfullPage %>%html_nodes(css) %>%html_text2()
[1] "thirteen" "fifteen" "seventeen"
########################################################################.# Question 1e.## What CSS selector targets just the following words: # six# seven# eight########################################################################.# This is ONE POSSIBLE answer# Use the following selector rules:## Class Selector (i.e. .classname) # see level 6 of CSS Game# Descendant Selector (i.e. Selector1 Selctor2) # see level 4 of CSS Game# The Universal Selector (i.e. *) # see level 10 of CSS Gamecss =".b *"# test the answer to make sure it worksfullPage %>%html_nodes(css) %>%html_text2()
[1] "six" "seven" "eight"
########################################################################.# Question 2.## Save the HTML code in a file and display it in your browser. Modify the code # so that the oddNumber numbered words appear in green text and the evenNumber numbered # words appear in red text. You may add any HTML or css code that you think is# appropriate. However, do not modify the overall layout of the page (e.g. all# headings should remain headings, all paragraphs should remain paragraphs,# etc.)########################################################################.# NOTE - instead of modifying the HTML in a file I will show how to # modify the HTML code directly in this R file.## The following is the original HTML code from above with some modifications.## Step1 : add class="evenNumber" to the tags that contain evenNumber numbers# add class="oddNumber" to the tags that contain evenNumber numbers## The words "evenNumber" and "oddNumber" are NOT part of CSS or HTML. # They are just class names that I "made up".## If a tag already has a class="something" just add the new class name# by putting a space between the new name and the original name. # For example <p class="oddNumber d">seven</p># means that the p tag has both a class named "oddNumber" and a class# named "d".## Step 2: Add the following <style> element with these CSS rules into# the <head>...</head> element of the page.## <style># .oddNumber { color: green; }# .evenNumber { color: red; }# </style>modifiedHtml = r"(<html> <head> <title>this is the title</title> <!-- add the following css rules --> <style> .oddNumber { color: green; } .evenNumber { color: red; } </style> </head> <body> <h1 class="oddNumber">one</h1> <div class="a"> <h2 class="oddNumber">three</h2> <p class="evenNumber c">four</p> <p class="oddNumber d">five</p> </div> <div class="b"> <h2 class="evenNumber">six</h2> <p class="oddNumber d">seven</p> <p class="evenNumber c">eight</p> </div> <h3 class="oddNumber">nine</h3> <div class="c"> <p class="evenNumber">ten</p> <p class="oddNumber">eleven</p> <p class="evenNumber">twelve</p> <p class="oddNumber">thirteen</p> <p class="evenNumber">fourteen</p> <p class="oddNumber">fifteen</p> <p class="evenNumber">sixteen</p> <p class="oddNumber">seventeen</p> </div> <p class="evenNumber" id="x">eighteen</p> </body></html>)"# Show the results##viewHtmlInRStudio(modifiedHtml)