This repository has been archived by the owner on Feb 24, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_lyrics.R
53 lines (44 loc) · 1.62 KB
/
get_lyrics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# https://github.com/ewenme/geniusr/issues/17#issuecomment-989302741
get_lyrics <- function(session) {
lyrics <- session %>%
rvest::html_nodes(xpath = '//div[contains(@class, "Lyrics__Container")]')
song <- session %>%
rvest::html_nodes(xpath = '//span[contains(@class, "SongHeaderVariantdesktop__")]') %>%
rvest::html_text(trim = TRUE)
artist <- session %>%
rvest::html_nodes(xpath = '//a[contains(@class, "SongHeaderVariantdesktop__Artist")]') %>%
rvest::html_text(trim = TRUE)
xml2::xml_find_all(lyrics, ".//br") %>%
xml2::xml_add_sibling("p", "\n")
xml2::xml_find_all(lyrics, ".//br") %>%
xml2::xml_remove()
lyrics <- rvest::html_text(lyrics, trim = TRUE)
lyrics <- unlist(strsplit(lyrics, split = "\n"))
lyrics <- grep(pattern = "[[:alnum:]]", lyrics, value = TRUE)
if (is_empty(lyrics)) {
return(
tibble::tibble(
line = NA,
section_name = NA,
section_artist = NA,
song_name = song,
artist_name = artist
)
)
}
section_tags <- nchar(gsub(pattern = "\\[.*\\]", "", lyrics)) == 0
sections <- geniusr:::repeat_before(lyrics, section_tags)
sections <- gsub("\\[|\\]", "", sections)
sections <- strsplit(sections, split = ": ", fixed = TRUE)
section_name <- sapply(sections, "[", 1)
section_artist <- sapply(sections, "[", 2)
section_artist[is.na(section_artist)] <- artist
tibble(
line = lyrics[!section_tags],
section_name = section_name[!section_tags],
section_artist = section_artist[!section_tags],
song_name = song,
artist_name = artist
)
}
assignInNamespace("get_lyrics", get_lyrics, "geniusr")