Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vctrs:: based summarise() filter() slice() mutate() #4523

Merged
merged 32 commits into from
Aug 30, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
54321a5
Simplify internals of summarise()
romainfrancois Aug 7, 2019
98a4968
Forget about hybrid n_distinct() for now so that we can eliminate Mul…
romainfrancois Aug 7, 2019
afeb41a
Forget MultipleVectorVisitors c++ class
romainfrancois Aug 7, 2019
7ff4738
Forget VectorVisitor and derived classes
romainfrancois Aug 7, 2019
28035d2
Forget CharacterVectorOrderer c++ class
romainfrancois Aug 7, 2019
020dc12
Experimental summarise2()
romainfrancois Aug 7, 2019
3aaddd3
some TODOs [ci skip]
romainfrancois Aug 7, 2019
12b2d94
astyle
romainfrancois Aug 12, 2019
9d9eaec
assert results of summarise_one() validates vec_size() == 1
romainfrancois Aug 12, 2019
cb79658
pure R implementation of summarise2()
romainfrancois Aug 12, 2019
c189349
Experiment with summarise2(.size =)
romainfrancois Aug 12, 2019
a27abeb
support for n() in summarise()
romainfrancois Aug 13, 2019
970a862
install .data pronoun on summarise_data_mask()
romainfrancois Aug 13, 2019
2aef73d
adapt to changes in summarise()
romainfrancois Aug 13, 2019
36962cf
Better support consecutive reuse of summaries
romainfrancois Aug 13, 2019
43a7444
workaround for case whene there are 0 groups
romainfrancois Aug 15, 2019
f13c686
Fix tests
romainfrancois Aug 15, 2019
ebbd15c
temporary workaround for summarise_data_mask() vs rowwise() until vec…
romainfrancois Aug 15, 2019
832630d
Abandon old C++ implementation of summarise()
romainfrancois Aug 15, 2019
3754061
Update compatibilitry vignette
romainfrancois Aug 15, 2019
7e40a77
test update, min gives a warning in summarise()
romainfrancois Aug 19, 2019
da2d938
onlu auto splice when it's a data frame
romainfrancois Aug 19, 2019
dad4cb4
vctrs based filter()
romainfrancois Aug 19, 2019
45cb714
vctrs based slice()
romainfrancois Aug 20, 2019
278b52e
Abaandon old internal slice() support code
romainfrancois Aug 20, 2019
9688558
vctrs:: implementation of mutate()
romainfrancois Aug 23, 2019
1d455ae
Abandon pre vctrs:: mutate() internal impl
romainfrancois Aug 23, 2019
6bf809d
Remove hybrid evaluation implementation, so that we can simplify the …
romainfrancois Aug 23, 2019
ed32f0e
Remove previous C++ version of data mask
romainfrancois Aug 23, 2019
bc11d34
Promoting data mask to an R6 class
romainfrancois Aug 30, 2019
1b19afb
Remove test that is now irrelevant
romainfrancois Aug 30, 2019
2f1d017
Update tibble requirement, until https://github.com/tidyverse/tibble/…
romainfrancois Aug 30, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
vctrs based slice()
  • Loading branch information
romainfrancois committed Aug 20, 2019
commit 45cb71442726bbc69d2dd723e0d51a200651b417
74 changes: 71 additions & 3 deletions R/tbl-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,79 @@ slice.tbl_df <- function(.data, ..., .preserve = FALSE) {
return(.data)
}

rows <- group_rows(.data)
mask <- groupwise_data_mask(.data, rows)
caller <- caller_env()

old_group_size <- context_env[["..group_size"]]
old_group_number <- context_env[["..group_number"]]
on.exit({
context_env[["..group_size"]] <- old_group_size
context_env[["..group_number"]] <- old_group_number
})

quo <- quo(c(!!!dots))
out <- slice_impl(.data, quo)
if (!.preserve && is_grouped_df(.data)) {
out <- regroup(out)

slice_indices <- new_list(length(rows))
new_rows <- new_list(length(rows))
k <- 1L

for (group in seq_along(rows)) {
current_rows <- rows[[group]]
mask$.set_current_group(group)
n <- length(current_rows)
if (n == 0L) {
new_rows[[group]] <- integer()
next
}
context_env[["..group_size"]] <- n
context_env[["..group_number"]] <- group

res <- eval_tidy(quo, mask, env = caller)

if (is.logical(res) && all(is.na(res))) {
res <- integer()
} else if (is.numeric(res)) {
res <- vec_cast(res, integer())
} else if (!is.integer(res)) {
abort(
"slice() expressions should return indices (positive or negative integers)",
"dplyr_slice_incompatible"
)
}

if (length(res) == 0L) {
# nothing to do
} else if(all(res >= 0, na.rm = TRUE)) {
res <- res[!is.na(res) & res <= length(current_rows) & res > 0]
} else if (all(res <= 0, na.rm = TRUE)) {
res <- setdiff(seq_along(current_rows), -res)
} else {
abort(
"slice() expressions should return either all positive or all negative",
"dplyr_slice_ambiguous"
)
}

slice_indices[[group]] <- current_rows[res]
new_k <- k + length(res)
new_rows[[group]] <- seq2(k, new_k - 1L)
k <- new_k
}
all_slice_indices <- vec_c(!!!slice_indices, .ptype = integer())

out <- vec_slice(.data, all_slice_indices)

if (is_grouped_df(.data)) {
new_groups <- group_data(.data)
new_groups$.rows <- new_list_of(new_rows, ptype = integer())
attr(out, "groups") <- new_groups

if (!.preserve) {
out <- regroup(out)
}
}

out
}
#' @export
Expand Down
38 changes: 11 additions & 27 deletions tests/testthat/test-slice.r
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,17 @@ test_that("slice silently ignores out of range values (#226)", {
test_that("slice works with negative indices", {
res <- slice(mtcars, -(1:2))
exp <- tail(mtcars, -2)
expect_equal(names(res), names(exp))
for (col in names(res)) {
expect_equal(res[[col]], exp[[col]])
}
expect_equivalent(res, exp)
})

test_that("slice forbids positive and negative together", {
expect_error(
mtcars %>% slice(c(-1, 2)),
"Found 1 positive indices and 1 negative indices",
fixed = TRUE
class = "dplyr_slice_ambiguous"
)
expect_error(
mtcars %>% slice(c(2:3, -1)),
"Found 2 positive indices and 1 negative indices",
fixed = TRUE
class = "dplyr_slice_ambiguous"
)
})

Expand Down Expand Up @@ -86,8 +81,14 @@ test_that("slice handles NA (#1235)", {
test_that("slice handles logical NA (#3970)", {
df <- tibble(x = 1:3)
expect_equal(nrow(slice(df, NA)), 0L)
expect_error(slice(df, TRUE))
expect_error(slice(df, FALSE))
expect_error(
slice(df, TRUE),
class = "dplyr_slice_incompatible"
)
expect_error(
slice(df, FALSE),
class = "dplyr_slice_incompatible"
)
})

test_that("slice handles empty data frames (#1219)", {
Expand Down Expand Up @@ -116,12 +117,6 @@ test_that("slice works with zero-column data frames (#2490)", {
)
})

test_that("slice works under gctorture2", {
x <- tibble(y = 1:10)
with_gctorture2(999, x2 <- slice(x, 1:10))
expect_identical(x, x2)
})

test_that("slice correctly computes positive indices from negative indices (#3073)", {
x <- tibble(y = 1:10)
expect_identical(slice(x, -10:-30), tibble(y = 1:9))
Expand Down Expand Up @@ -159,12 +154,6 @@ test_that("slice skips 0 (#3313)", {
expect_identical(slice(d, c(0, -1)), slice(d, -1))
})

test_that("slice is not confused about dense groups (#3753)",{
df <- tibble(row = 1:3)
expect_equal(slice(df, c(2,1,3))$row, c(2L,1L,3L))
expect_equal(slice(df, c(1,1,1))$row, rep(1L, 3))
})

test_that("slice accepts ... (#3804)", {
expect_equal(slice(mtcars, 1, 2), slice(mtcars, 1:2))
expect_equal(slice(mtcars, 1, n()), slice(mtcars, c(1, nrow(mtcars))))
Expand All @@ -187,8 +176,3 @@ test_that("slice does not evaluate the expression in empty groups (#1438)", {
)
expect_equal(nrow(res), 3L)
})

test_that("column_subset() falls back to R indexing on esoteric data types (#4128)", {
res <- slice(tibble::enframe(formals(rnorm)), 2:3)
expect_identical(res, tibble(name = c("mean", "sd"), value = list(0, 1)))
})