Skip to content

Commit

Permalink
Rethrow "overflow" join error with a better message (#6956)
Browse files Browse the repository at this point in the history
* Rethrow join explosion error

* NEWS bullet

* R -> dplyr

* Simplify message
  • Loading branch information
DavisVaughan committed Nov 6, 2023
1 parent bcfed14 commit 6b5aa31
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dplyr (development version)

* `left_join()` and friends now return a specialized error message if they
detect that your join would return more rows than dplyr can handle (#6912).

* `slice_*()` now throw the correct error if you forget to name `n` while also
prefixing the call with `dplyr::` (#6946).

Expand Down
23 changes: 23 additions & 0 deletions R/join-rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ dplyr_locate_matches <- function(needles,
vctrs_error_incompatible_type = function(cnd) {
abort("`join_cast_common()` should have handled this.", .internal = TRUE)
},
vctrs_error_matches_overflow = function(cnd) {
rethrow_error_join_matches_overflow(cnd, error_call)
},
vctrs_error_matches_nothing = function(cnd) {
rethrow_error_join_matches_nothing(cnd, error_call)
},
Expand Down Expand Up @@ -137,6 +140,26 @@ dplyr_locate_matches <- function(needles,
)
}

rethrow_error_join_matches_overflow <- function(cnd, call) {
size <- cnd$size

stop_join(
message = c(
"This join would result in more rows than dplyr can handle.",
i = glue(
"{size} rows would be returned. ",
"2147483647 rows is the maximum number allowed."
),
i = paste0(
"Double check your join keys. This error commonly occurs due to a ",
"missing join key, or an improperly specified join condition."
)
),
class = "dplyr_error_join_matches_overflow",
call = call
)
}

rethrow_error_join_matches_nothing <- function(cnd, call) {
i <- cnd$i

Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/_snaps/join-rows.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,16 @@
! `relationship` must be one of "one-to-one", "one-to-many", "many-to-one", or "many-to-many", not "warn-many-to-many".
i Did you mean "many-to-many"?

# join_rows() rethrows overflow error nicely (#6912)

Code
join_rows(df, df, condition = ">=")
Condition
Error:
! This join would result in more rows than dplyr can handle.
i 50000005000000 rows would be returned. 2147483647 rows is the maximum number allowed.
i Double check your join keys. This error commonly occurs due to a missing join key, or an improperly specified join condition.

# `multiple = NULL` is deprecated and results in `'all'` (#6731)

Code
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-join-rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,19 @@ test_that("join_rows() validates `relationship`", {
})
})

test_that("join_rows() rethrows overflow error nicely (#6912)", {
skip_on_cran()
# Windows 32-bit doesn't support long vectors of this size, and the
# intermediate `r_ssize` will be too large
skip_if(.Machine$sizeof.pointer < 8L, message = "No long vector support")

df <- tibble(x = 1:1e7)

expect_snapshot(error = TRUE, {
join_rows(df, df, condition = ">=")
})
})

# Deprecated behavior ----------------------------------------------------------

test_that("`multiple = NULL` is deprecated and results in `'all'` (#6731)", {
Expand Down

0 comments on commit 6b5aa31

Please sign in to comment.