nest_by() improvements (tidyverse#5171)

Fixes tidyverse#5140. Fixes tidyverse#5139. Co-authored-by: Jenny Bryan <[email protected]>
TSchiefer · May 12, 2020 · d2627f7 · d2627f7
1 parent ce1dc64
commit d2627f7
Show file tree

Hide file tree

Showing 10 changed files with 123 additions and 51 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -103,6 +103,7 @@ S3method(mutate_,data.frame)
 S3method(mutate_,tbl_df)
 S3method(n_groups,data.frame)
 S3method(nest_by,data.frame)
+S3method(nest_by,grouped_df)
 S3method(nest_join,data.frame)
 S3method(print,all_vars)
 S3method(print,any_vars)

diff --git a/R/nest_by.R b/R/nest_by.R
@@ -4,12 +4,34 @@
 #' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
 #'
 #' `nest_by()` is closely related to [group_by()]. However, instead of storing
-#' the group structure in the metadata, it makes it explicit in the data,
-#' giving each group key a single row with a list-column of data frames that
-#' contain all the other data.
+#' the group structure in the metadata, it is made explicit in the data,
+#' giving each group key a single row along with a list-column of data frames
+#' that contain all the other data.
+#'
+#' `nest_by()` returns a [rowwise] data frame, which makes operations on the
+#' grouped data particularly elegant. See `vignette("rowwise")` for more
+#' details.
+#'
+#' @details
+#' Note that `df %>% nest_by(x, y)` is roughly equivalent to
+#'
+#' ```
+#' df %>%
+#'   group_by(x, y) %>%
+#'   summarise(data = list(cur_data())) %>%
+#'   rowwise()
+#' ```
+#'
+#' If you want to unnest a nested data frame, you can either use
+#' `tidyr::unnest()` or take advantage of `summarise()`s multi-row behaviour:
+#'
+#' ```
+#' nested %>%
+#'   summarise(data)
+#' ```
 #'
 #' @return
-#' A [rowwise()] data frame. The output has the following properties:
+#' A [rowwise] data frame. The output has the following properties:
 #'
 #' * The rows come from the underlying [group_keys()].
 #' * The columns are the grouping keys plus one list-column of data frames.
@@ -43,15 +65,35 @@
 #' models
 #'
 #' models %>% summarise(rsq = summary(model)$r.squared)
-nest_by <- function(.data, ..., .key = "data", .keep = FALSE, .add = FALSE) {
+#' # This is particularly elegant with the broom functions
+#' if (requireNamespace("broom", quietly = TRUE)) {
+#'   models %>% summarise(broom::glance(model))
+#'   models %>% summarise(broom::tidy(model))
+#' }
+#'
+#' # Note that you can also summarise to unnest the data
+#' models %>% summarise(data)
+nest_by <- function(.data, ..., .key = "data", .keep = FALSE) {
   UseMethod("nest_by")
 }
 
 #' @export
-nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE, .add = FALSE) {
-  data <- group_by(.data, ..., .add = .add)
+nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE) {
+  .data <- group_by(.data, ...)
+  nest_by.grouped_df(.data, .key = .key, .keep = .keep)
+}
+
+#' @export
+nest_by.grouped_df <- function(.data, ..., .key = "data", .keep = FALSE) {
+  if (!missing(...)) {
+    abort(c(
+      "Can't re-group while nesting",
+      i = "Either `ungroup()` first or don't supply arguments to `nest_by()"
+    ))
+  }
 
-  keys <- group_keys(data)
-  keys <- mutate(keys, !!.key := group_split(data, .keep = .keep))
-  rowwise(keys, tidyselect::all_of(group_vars(data)))
+  vars <- group_vars(.data)
+  keys <- group_keys(.data)
+  keys <- mutate(keys, !!.key := group_split(.env$.data, .keep = .keep))
+  rowwise(keys, tidyselect::all_of(vars))
 }
diff --git a/R/rowwise.r b/R/rowwise.r
@@ -4,8 +4,9 @@
 #' `rowwise()` allows you to compute on a data frame a row-at-a-time.
 #' This is most useful when a vectorised function doesn't exist.
 #'
-#' A row-wise tibble maintains its row-wise status until explicitly removed
-#' by [group_by()], [ungroup()], or [as_tibble()].
+#' Most dplyr verbs preserve row-wise grouping. The exception is [summarise()],
+#' which return a [grouped_df]. You can explicitly ungroup with [ungroup()]
+#' or [as_tibble()], or convert to a [grouped_df] with [group_by()].
 #'
 #' @section List-columns:
 #' Because a rowwise has exactly one row per group it offers a small

diff --git a/R/summarise.R b/R/summarise.R
@@ -125,10 +125,6 @@ summarise.data.frame <- function(.data, ..., .groups = NULL) {
   summarise_build(.data, cols)
 }
 
-summarise_verbose <- function(.groups, .env) {
-  is.null(.groups) && is_reference(topenv(.env), global_env()) && !identical(getOption("dplyr.summarise.inform"), FALSE)
-}
-
 #' @export
 summarise.grouped_df <- function(.data, ..., .groups = NULL) {
   cols <- summarise_cols(.data, ...)
@@ -148,21 +144,19 @@ summarise.grouped_df <- function(.data, ..., .groups = NULL) {
     n <- length(group_vars)
     if (n > 1) {
       if (verbose) {
-        inform(glue('`summarise()` regrouping by {new_groups} (override with `.groups` argument)',
-          new_groups = glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ")
-        ))
+        new_groups <- glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ")
+        summarise_inform("regrouping by {new_groups}")
       }
       out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data))
     } else {
       if (verbose) {
-        inform('`summarise()` ungrouping (override with `.groups` argument)')
+        summarise_inform("ungroup")
       }
     }
   } else if (identical(.groups, "keep")) {
     if (verbose) {
-      inform(glue('`summarise()` regrouping by {new_groups} (override with `.groups` argument)',
-        new_groups = glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
-      ))
+      new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
+      summarise_inform("regrouping by {new_groups}")
     }
     out <- grouped_df(out, group_vars, group_by_drop_default(.data))
   } else if (identical(.groups, "rowwise")) {
@@ -184,10 +178,13 @@ summarise.rowwise_df <- function(.data, ..., .groups = NULL) {
   verbose <- summarise_verbose(.groups, caller_env())
 
   group_vars <- group_vars(.data)
-  if (is.null(.groups) || identical(.groups, "rowwise") || identical(.groups, "keep")) {
+  if (is.null(.groups) || identical(.groups, "keep")) {
     if (verbose) {
-      inform("summarise() regrouping by rows (override with `.groups` argument)")
+      new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
+      summarise_inform("grouping by {new_groups}")
     }
+    out <- grouped_df(out, group_vars)
+  } else if (identical(.groups, "rowwise")) {
     out <- rowwise_df(out, group_vars)
   } else if (!identical(.groups, "drop")) {
     abort(c(
@@ -285,3 +282,17 @@ summarise_build <- function(.data, cols) {
   dplyr_col_modify(out, cols$new)
 }
 
+
+# messaging ---------------------------------------------------------------
+
+summarise_verbose <- function(.groups, .env) {
+  is.null(.groups) &&
+    is_reference(topenv(.env), global_env()) &&
+    !identical(getOption("dplyr.summarise.inform"), FALSE)
+}
+
+summarise_inform <- function(..., .env = parent.frame()) {
+  inform(paste0(
+    "`summarise()` ", glue(..., .envir = .env), " (override with `.groups` argument)"
+  ))
+}
diff --git a/man/nest_by.Rd b/man/nest_by.Rd
diff --git a/man/rowwise.Rd b/man/rowwise.Rd
diff --git a/tests/testthat/test-nest_by.R b/tests/testthat/test-nest_by.R
@@ -13,17 +13,16 @@ test_that("can control key col", {
   expect_named(out, c("g", "key"))
 })
 
-test_that("overrides grouping by default", {
+test_that("nest_by() inherits grouping", {
   df <- data.frame(g1 = 1:2, g2 = 1:2, x = 1:2, y = 1:2)
 
   expect_equal(
-    df %>% group_by(g1) %>% nest_by(g2) %>% group_vars(),
-    "g2"
-  )
-  expect_equal(
-    df %>% group_by(g1) %>% nest_by(g2, .add = TRUE) %>% group_vars(),
-    c("g1", "g2")
+    df %>% group_by(g1) %>% nest_by() %>% group_vars(),
+    "g1"
   )
+
+  # And you can't have it both ways
+  expect_error(df %>% group_by(g1) %>% nest_by("g2"), "re-group")
 })
 
 test_that("can control whether grouping data in list-col", {

diff --git a/tests/testthat/test-rowwise.r b/tests/testthat/test-rowwise.r
@@ -25,8 +25,9 @@ test_that("rowwise status preserved by major verbs", {
   expect_s3_class(out, "rowwise_df")
   expect_equal(group_vars(out), "x")
 
+  # Except for summarise
   out <- summarise(rf, z = mean(x, y))
-  expect_s3_class(out, "rowwise_df")
+  expect_s3_class(out, "grouped_df")
   expect_equal(group_vars(out), "x")
 })
 

diff --git a/tests/testthat/test-summarise-errors.txt b/tests/testthat/test-summarise-errors.txt
@@ -14,10 +14,10 @@ Message: `summarise()` regrouping by 'x' (override with `.groups` argument)
 
 > eval_bare(expr(tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise()), env(
 +   global_env()))
-Message: summarise() regrouping by rows (override with `.groups` argument)
+Message: `summarise()` grouping by 'x', 'y' (override with `.groups` argument)
 
 # A tibble: 1 x 2
-# Rowwise:  x, y
+# Groups:   x, y [1]
       x     y
   <dbl> <dbl>
 1     1     2

diff --git a/tests/testthat/test-summarise.r b/tests/testthat/test-summarise.r
@@ -43,7 +43,7 @@ test_that("works with grouped empty data frames", {
   )
   expect_equal(
     df %>% rowwise(x) %>% summarise(y = 1L),
-    rowwise(tibble(x = integer(), y = integer()), x)
+    group_by(tibble(x = integer(), y = integer()), x)
   )
 })