TIDYVERSE - dplyr::left_join update 0012_

jimrothstein · Dec 4, 2024 · 3f8866f · 3f8866f
1 parent f14cc2a
commit 3f8866f
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 1 deletion.
diff --git a/NSE_project/300_chapter01_metaprogramming_Mailund.qmd b/NSE_project/300_chapter01_metaprogramming_Mailund.qmd
@@ -6,13 +6,13 @@ editor_options:
 ---
 
 ###
+SEE:  310_R_notes (are up-to-date)
 Notation. x,y,z are `formal paramaters`, 1 is default value for z
 str() uses correct terms (I  think) `symbol`
 ```{r}
 library(rlang)
 f = function(x) x
   
-  
 formals(f)
 body(f)
 environment(f)

diff --git a/TIDYVERSE/0012_dplyr_two_table_hadley.R b/TIDYVERSE/0012_dplyr_two_table_hadley.R
@@ -66,3 +66,55 @@ flights %>%
   arrange(desc(count))
 
 # ---- study, WHEN needed ----------
+# MORE examples
+
+band_members
+band_instruments
+
+band_members %>% inner_join(band_instruments)
+band_members %>% left_join(band_instruments)
+band_members %>% right_join(band_instruments)
+band_members %>% full_join(band_instruments)
+
+# To suppress the message about joining variables, supply `by`
+band_members %>% inner_join(band_instruments, by = join_by(name))
+# This is good practice in production code
+
+# Use an equality expression if the join variables have different names
+band_members %>% full_join(band_instruments2, by = join_by(name == artist))
+# By default, the join keys from `x` and `y` are coalesced in the output; use
+# `keep = TRUE` to keep the join keys from both `x` and `y`
+band_members %>%
+  full_join(band_instruments2, by = join_by(name == artist), keep = TRUE)
+
+# If a row in `x` matches multiple rows in `y`, all the rows in `y` will be
+# returned once for each matching row in `x`.
+df1 <- tibble(x = 1:3)
+df2 <- tibble(x = c(1, 1, 2), y = c("first", "second", "third"))
+df1 %>% left_join(df2)
+
+# If a row in `y` also matches multiple rows in `x`, this is known as a
+# many-to-many relationship, which is typically a result of an improperly
+# specified join or some kind of messy data. In this case, a warning is
+# thrown by default:
+df3 <- tibble(x = c(1, 1, 1, 3))
+df3 %>% left_join(df2)
+
+# In the rare case where a many-to-many relationship is expected, set
+# `relationship = "many-to-many"` to silence this warning
+df3 %>% left_join(df2, relationship = "many-to-many")
+
+# Use `join_by()` with a condition other than `==` to perform an inequality
+# join. Here we match on every instance where `df1$x > df2$x`.
+df1 %>% left_join(df2, join_by(x > x))
+
+## NA matches NA !
+# By default, NAs match other NAs so that there are two
+# rows in the output of this join:
+df1 <- data.frame(x = c(1, NA), y = 2)
+df2 <- data.frame(x = c(1, NA), z = 3)
+left_join(df1, df2)
+
+# You can optionally request that NAs don't match, giving a
+# a result that more closely resembles SQL joins
+left_join(df1, df2, na_matches = "never")