Skip to content

Commit

Permalink
nest_by() improvements (tidyverse#5171)
Browse files Browse the repository at this point in the history
Fixes tidyverse#5140. Fixes tidyverse#5139.

Co-authored-by: Jenny Bryan <[email protected]>
  • Loading branch information
hadley and jennybc committed May 12, 2020
1 parent ce1dc64 commit d2627f7
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 51 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ S3method(mutate_,data.frame)
S3method(mutate_,tbl_df)
S3method(n_groups,data.frame)
S3method(nest_by,data.frame)
S3method(nest_by,grouped_df)
S3method(nest_join,data.frame)
S3method(print,all_vars)
S3method(print,any_vars)
Expand Down
62 changes: 52 additions & 10 deletions R/nest_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,34 @@
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
#'
#' `nest_by()` is closely related to [group_by()]. However, instead of storing
#' the group structure in the metadata, it makes it explicit in the data,
#' giving each group key a single row with a list-column of data frames that
#' contain all the other data.
#' the group structure in the metadata, it is made explicit in the data,
#' giving each group key a single row along with a list-column of data frames
#' that contain all the other data.
#'
#' `nest_by()` returns a [rowwise] data frame, which makes operations on the
#' grouped data particularly elegant. See `vignette("rowwise")` for more
#' details.
#'
#' @details
#' Note that `df %>% nest_by(x, y)` is roughly equivalent to
#'
#' ```
#' df %>%
#' group_by(x, y) %>%
#' summarise(data = list(cur_data())) %>%
#' rowwise()
#' ```
#'
#' If you want to unnest a nested data frame, you can either use
#' `tidyr::unnest()` or take advantage of `summarise()`s multi-row behaviour:
#'
#' ```
#' nested %>%
#' summarise(data)
#' ```
#'
#' @return
#' A [rowwise()] data frame. The output has the following properties:
#' A [rowwise] data frame. The output has the following properties:
#'
#' * The rows come from the underlying [group_keys()].
#' * The columns are the grouping keys plus one list-column of data frames.
Expand Down Expand Up @@ -43,15 +65,35 @@
#' models
#'
#' models %>% summarise(rsq = summary(model)$r.squared)
nest_by <- function(.data, ..., .key = "data", .keep = FALSE, .add = FALSE) {
#' # This is particularly elegant with the broom functions
#' if (requireNamespace("broom", quietly = TRUE)) {
#' models %>% summarise(broom::glance(model))
#' models %>% summarise(broom::tidy(model))
#' }
#'
#' # Note that you can also summarise to unnest the data
#' models %>% summarise(data)
nest_by <- function(.data, ..., .key = "data", .keep = FALSE) {
UseMethod("nest_by")
}

#' @export
nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE, .add = FALSE) {
data <- group_by(.data, ..., .add = .add)
nest_by.data.frame <- function(.data, ..., .key = "data", .keep = FALSE) {
.data <- group_by(.data, ...)
nest_by.grouped_df(.data, .key = .key, .keep = .keep)
}

#' @export
nest_by.grouped_df <- function(.data, ..., .key = "data", .keep = FALSE) {
if (!missing(...)) {
abort(c(
"Can't re-group while nesting",
i = "Either `ungroup()` first or don't supply arguments to `nest_by()"
))
}

keys <- group_keys(data)
keys <- mutate(keys, !!.key := group_split(data, .keep = .keep))
rowwise(keys, tidyselect::all_of(group_vars(data)))
vars <- group_vars(.data)
keys <- group_keys(.data)
keys <- mutate(keys, !!.key := group_split(.env$.data, .keep = .keep))
rowwise(keys, tidyselect::all_of(vars))
}
5 changes: 3 additions & 2 deletions R/rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
#' `rowwise()` allows you to compute on a data frame a row-at-a-time.
#' This is most useful when a vectorised function doesn't exist.
#'
#' A row-wise tibble maintains its row-wise status until explicitly removed
#' by [group_by()], [ungroup()], or [as_tibble()].
#' Most dplyr verbs preserve row-wise grouping. The exception is [summarise()],
#' which return a [grouped_df]. You can explicitly ungroup with [ungroup()]
#' or [as_tibble()], or convert to a [grouped_df] with [group_by()].
#'
#' @section List-columns:
#' Because a rowwise has exactly one row per group it offers a small
Expand Down
37 changes: 24 additions & 13 deletions R/summarise.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,6 @@ summarise.data.frame <- function(.data, ..., .groups = NULL) {
summarise_build(.data, cols)
}

summarise_verbose <- function(.groups, .env) {
is.null(.groups) && is_reference(topenv(.env), global_env()) && !identical(getOption("dplyr.summarise.inform"), FALSE)
}

#' @export
summarise.grouped_df <- function(.data, ..., .groups = NULL) {
cols <- summarise_cols(.data, ...)
Expand All @@ -148,21 +144,19 @@ summarise.grouped_df <- function(.data, ..., .groups = NULL) {
n <- length(group_vars)
if (n > 1) {
if (verbose) {
inform(glue('`summarise()` regrouping by {new_groups} (override with `.groups` argument)',
new_groups = glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ")
))
new_groups <- glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ")
summarise_inform("regrouping by {new_groups}")
}
out <- grouped_df(out, group_vars[-n], group_by_drop_default(.data))
} else {
if (verbose) {
inform('`summarise()` ungrouping (override with `.groups` argument)')
summarise_inform("ungroup")
}
}
} else if (identical(.groups, "keep")) {
if (verbose) {
inform(glue('`summarise()` regrouping by {new_groups} (override with `.groups` argument)',
new_groups = glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
))
new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
summarise_inform("regrouping by {new_groups}")
}
out <- grouped_df(out, group_vars, group_by_drop_default(.data))
} else if (identical(.groups, "rowwise")) {
Expand All @@ -184,10 +178,13 @@ summarise.rowwise_df <- function(.data, ..., .groups = NULL) {
verbose <- summarise_verbose(.groups, caller_env())

group_vars <- group_vars(.data)
if (is.null(.groups) || identical(.groups, "rowwise") || identical(.groups, "keep")) {
if (is.null(.groups) || identical(.groups, "keep")) {
if (verbose) {
inform("summarise() regrouping by rows (override with `.groups` argument)")
new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
summarise_inform("grouping by {new_groups}")
}
out <- grouped_df(out, group_vars)
} else if (identical(.groups, "rowwise")) {
out <- rowwise_df(out, group_vars)
} else if (!identical(.groups, "drop")) {
abort(c(
Expand Down Expand Up @@ -285,3 +282,17 @@ summarise_build <- function(.data, cols) {
dplyr_col_modify(out, cols$new)
}


# messaging ---------------------------------------------------------------

summarise_verbose <- function(.groups, .env) {
is.null(.groups) &&
is_reference(topenv(.env), global_env()) &&
!identical(getOption("dplyr.summarise.inform"), FALSE)
}

summarise_inform <- function(..., .env = parent.frame()) {
inform(paste0(
"`summarise()` ", glue(..., .envir = .env), " (override with `.groups` argument)"
))
}
42 changes: 29 additions & 13 deletions man/nest_by.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions man/rowwise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 6 additions & 7 deletions tests/testthat/test-nest_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ test_that("can control key col", {
expect_named(out, c("g", "key"))
})

test_that("overrides grouping by default", {
test_that("nest_by() inherits grouping", {
df <- data.frame(g1 = 1:2, g2 = 1:2, x = 1:2, y = 1:2)

expect_equal(
df %>% group_by(g1) %>% nest_by(g2) %>% group_vars(),
"g2"
)
expect_equal(
df %>% group_by(g1) %>% nest_by(g2, .add = TRUE) %>% group_vars(),
c("g1", "g2")
df %>% group_by(g1) %>% nest_by() %>% group_vars(),
"g1"
)

# And you can't have it both ways
expect_error(df %>% group_by(g1) %>% nest_by("g2"), "re-group")
})

test_that("can control whether grouping data in list-col", {
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ test_that("rowwise status preserved by major verbs", {
expect_s3_class(out, "rowwise_df")
expect_equal(group_vars(out), "x")

# Except for summarise
out <- summarise(rf, z = mean(x, y))
expect_s3_class(out, "rowwise_df")
expect_s3_class(out, "grouped_df")
expect_equal(group_vars(out), "x")
})

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-summarise-errors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ Message: `summarise()` regrouping by 'x' (override with `.groups` argument)

> eval_bare(expr(tibble(x = 1, y = 2) %>% rowwise(x, y) %>% summarise()), env(
+ global_env()))
Message: summarise() regrouping by rows (override with `.groups` argument)
Message: `summarise()` grouping by 'x', 'y' (override with `.groups` argument)

# A tibble: 1 x 2
# Rowwise: x, y
# Groups: x, y [1]
x y
<dbl> <dbl>
1 1 2
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-summarise.r
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ test_that("works with grouped empty data frames", {
)
expect_equal(
df %>% rowwise(x) %>% summarise(y = 1L),
rowwise(tibble(x = integer(), y = integer()), x)
group_by(tibble(x = integer(), y = integer()), x)
)
})

Expand Down

0 comments on commit d2627f7

Please sign in to comment.