diff --git a/DESCRIPTION b/DESCRIPTION index 9fb79f9..db54a37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tidyfast Title: Fast Tidying of Data -Version: 0.2.1 +Version: 0.2.2 Authors@R: c( person(given = "Tyson", family = "Barrett", @@ -9,6 +9,9 @@ Authors@R: c( comment = c(ORCID = "0000-0002-2137-1391")), person(given = "Mark", family = "Fairbanks", + role = "ctb"), + person(given = "Ivan", + family = "Leung", role = "ctb")) Description: Tidying functions built on 'data.table' to provide quick and efficient data manipulation with diff --git a/R/unnest.R b/R/unnest.R index 890ad3c..d1fd38c 100644 --- a/R/unnest.R +++ b/R/unnest.R @@ -4,6 +4,7 @@ #' #' @param dt_ the data table to unnest #' @param col the column to unnest +#' @param fill when set to \code{TRUE}, it allows the user to unnest data.tables that have different variables in them (e.g. dt1 has x and y while dt2 has x and z). This uses the \code{fill} argument in \code{data.table::rbindlist()}. #' @param ... any of the other variables in the nested table that you want to keep in the unnested table. Bare variable names. If none are provided, all variables are kept. #' #' @examples @@ -21,12 +22,12 @@ #' @import data.table #' #' @export -dt_unnest <- function(dt_, col, ...){ +dt_unnest <- function(dt_, col, fill = FALSE, ...){ UseMethod("dt_unnest", dt_) } #' @export -dt_unnest.default <- function(dt_, col, ...){ +dt_unnest.default <- function(dt_, col, fill = FALSE, ...){ if (isFALSE(is.data.table(dt_))) dt_ <- as.data.table(dt_) @@ -45,7 +46,7 @@ dt_unnest.default <- function(dt_, col, ...){ others_dt <- others_dt[, ..keep] others_dt <- lapply(others_dt, rep, times = rows) - dt_[, list(as.data.table(others_dt), rbindlist(eval(col)))] + dt_[, list(as.data.table(others_dt), rbindlist(eval(col), fill = fill))] } diff --git a/README.Rmd b/README.Rmd index 7e9af44..b4e4a04 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( set.seed(843) ``` -# `tidyfast v0.2.1` +# `tidyfast v0.2.2` diff --git a/README.md b/README.md index 97559c1..f404d34 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# `tidyfast v0.2.1` +# `tidyfast v0.2.2` @@ -216,13 +216,13 @@ highlighted below. Notably, the timings are without the `nested1` and #> # A tibble: 2 x 3 #> expression median mem_alloc #> - #> 1 dt_nest 3.31ms 2.88MB - #> 2 group_nest 5.73ms 2.56MB + #> 1 dt_nest 3.15ms 2.88MB + #> 2 group_nest 9.34ms 5.4MB #> # A tibble: 2 x 3 #> expression median mem_alloc #> - #> 1 dt_unnest 4.94ms 5.48MB - #> 2 unnest 9.76ms 6.15MB + #> 1 dt_unnest 4.85ms 5.48MB + #> 2 unnest 17.95ms 15.31MB ## Pivoting @@ -295,10 +295,10 @@ But let’s compare some basic speed and efficiency. Because of the #> # A tibble: 4 x 3 #> expression median mem_alloc #> - #> 1 dt_pivot_longer 958.91µs 996.21KB - #> 2 pivot_longer 7.54ms 3.13MB - #> 3 dt_pivot_wider 11.11ms 1.86MB - #> 4 pivot_wider 10.41ms 2.05MB + #> 1 dt_pivot_longer 1.1ms 997.09KB + #> 2 pivot_longer 11.4ms 2.55MB + #> 3 dt_pivot_wider 10.9ms 1.86MB + #> 4 pivot_wider 13.3ms 1.95MB ### If Else @@ -347,9 +347,9 @@ built on `data.table::fifelse()`. #> # A tibble: 3 x 3 #> expression median mem_alloc #> - #> 1 case_when 137.5ms 148.8MB - #> 2 dt_case_when 36.3ms 34.3MB - #> 3 fifelse 36.7ms 34.3MB + #> 1 case_when 133.7ms 148.8MB + #> 2 dt_case_when 35.8ms 34.3MB + #> 3 fifelse 34.1ms 34.3MB ## Fill @@ -457,7 +457,7 @@ marks3 <- #> # A tibble: 2 x 3 #> expression median mem_alloc #> - #> 1 tidyr::fill(dplyr::group_by(df3, id), x, y) 65.7ms 30.7MB + #> 1 tidyr::fill(dplyr::group_by(df3, id), x, y) 44.1ms 42.6MB #> 2 tidyfast::dt_fill(dt3, x, y, id = list(id)) 23.8ms 29.1MB ## Separate @@ -500,9 +500,9 @@ than `tidyr::separate()`. #> # A tibble: 3 x 3 #> expression median mem_alloc #> - #> 1 separate 363ms 11.8MB - #> 2 dt_separate 122ms 30.6MB - #> 3 dt_separate-mutable 118ms 26.7MB + #> 1 separate 363ms 11.6MB + #> 2 dt_separate 132ms 30.6MB + #> 3 dt_separate-mutable 114ms 26.7MB ## Count and Uncount diff --git a/man/dt_unnest.Rd b/man/dt_unnest.Rd index 0b4a6fd..94b0d01 100644 --- a/man/dt_unnest.Rd +++ b/man/dt_unnest.Rd @@ -4,13 +4,15 @@ \alias{dt_unnest} \title{Unnest: Fast Unnesting of Data Tables} \usage{ -dt_unnest(dt_, col, ...) +dt_unnest(dt_, col, fill = FALSE, ...) } \arguments{ \item{dt_}{the data table to unnest} \item{col}{the column to unnest} +\item{fill}{when set to \code{TRUE}, it allows the user to unnest data.tables that have different variables in them (e.g. dt1 has x and y while dt2 has x and z). This uses the \code{fill} argument in \code{data.table::rbindlist()}.} + \item{...}{any of the other variables in the nested table that you want to keep in the unnested table. Bare variable names. If none are provided, all variables are kept.} } \description{ diff --git a/man/figures/README-third_pivot-1.png b/man/figures/README-third_pivot-1.png index 79cc531..e220343 100644 Binary files a/man/figures/README-third_pivot-1.png and b/man/figures/README-third_pivot-1.png differ diff --git a/man/figures/README-third_pivot-2.png b/man/figures/README-third_pivot-2.png index be62569..1e8e691 100644 Binary files a/man/figures/README-third_pivot-2.png and b/man/figures/README-third_pivot-2.png differ diff --git a/man/figures/README-unnamed-chunk-11-1.png b/man/figures/README-unnamed-chunk-11-1.png index a35e4b2..714eb32 100644 Binary files a/man/figures/README-unnamed-chunk-11-1.png and b/man/figures/README-unnamed-chunk-11-1.png differ diff --git a/man/figures/README-unnamed-chunk-14-1.png b/man/figures/README-unnamed-chunk-14-1.png index b0d07ee..0ef99ec 100644 Binary files a/man/figures/README-unnamed-chunk-14-1.png and b/man/figures/README-unnamed-chunk-14-1.png differ diff --git a/man/figures/README-unnamed-chunk-19-1.png b/man/figures/README-unnamed-chunk-19-1.png index b7c0cb0..c0bded8 100644 Binary files a/man/figures/README-unnamed-chunk-19-1.png and b/man/figures/README-unnamed-chunk-19-1.png differ diff --git a/man/figures/README-unnamed-chunk-23-1.png b/man/figures/README-unnamed-chunk-23-1.png index 0aad1c2..429075d 100644 Binary files a/man/figures/README-unnamed-chunk-23-1.png and b/man/figures/README-unnamed-chunk-23-1.png differ diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png index 645ae75..462c635 100644 Binary files a/man/figures/README-unnamed-chunk-7-1.png and b/man/figures/README-unnamed-chunk-7-1.png differ