Skip to content

Commit

Permalink
Always duplicate variables we plan to modify by reference (tidyverse#…
Browse files Browse the repository at this point in the history
  • Loading branch information
DavisVaughan committed Feb 27, 2023
1 parent f7cf351 commit 2f014b3
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
19 changes: 12 additions & 7 deletions R/data-mask.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ DataMask <- R6Class("DataMask",
private$grouped <- by$type == "grouped"
private$rowwise <- by$type == "rowwise"

# `duplicate(0L)` is necessary to ensure that the value we modify by
# reference is "fresh" and completely owned by this instance of the
# `DataMask`. Otherwise nested `mutate()` calls can end up modifying
# the same value (#6762).
private$env_current_group_info <- new_environment(data = list(
`dplyr:::current_group_id` = 0L,
`dplyr:::current_group_size` = 0L
`dplyr:::current_group_id` = duplicate(0L),
`dplyr:::current_group_size` = duplicate(0L)
))

private$chops <- .Call(
Expand Down Expand Up @@ -171,14 +175,15 @@ DataMask <- R6Class("DataMask",

set_current_group = function(group) {
# Only to be used right before throwing an error.
# We `duplicate()` group to be extremely conservative, because there is an
# extremely small chance we could modify this by reference and cause
# We `duplicate()` both values to be extremely conservative, because there
# is an extremely small chance we could modify this by reference and cause
# issues with the `group` variable in the caller, but this has never been
# seen. `length()` always returns a fresh variable so we don't duplicate
# in that case.
# seen. We generally assume `length()` always returns a fresh variable, so
# we probably don't need to duplicate there, but it seems better to be
# extremely safe here.
env_current_group_info <- private[["env_current_group_info"]]
env_current_group_info[["dplyr:::current_group_id"]] <- duplicate(group)
env_current_group_info[["dplyr:::current_group_size"]] <- length(private$rows[[group]])
env_current_group_info[["dplyr:::current_group_size"]] <- duplicate(length(private$rows[[group]]))
},

get_used = function() {
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,22 @@ test_that("DataMask$add() forces chunks (#4677)", {
expect_equal(df$log_e_bf01, log(1 / 0.244))
})

test_that("DataMask uses fresh copies of group id / size variables (#6762)", {
df <- tibble(x = 1:2)

fn <- function() {
df <- tibble(a = 1)
# Otherwise, this nested `mutate()` can modify the same
# id/size variable as the outer one, which causes havoc
mutate(df, b = a + 1)
}

out <- mutate(df, y = {fn(); x})

expect_identical(out$x, 1:2)
expect_identical(out$y, 1:2)
})

test_that("mutate() correctly auto-names expressions (#6741)", {
df <- tibble(a = 1L)

Expand Down

0 comments on commit 2f014b3

Please sign in to comment.