Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

export and improve new_rowwise_df() #5935

Merged
merged 2 commits into from
Sep 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ export(near)
export(nest_by)
export(nest_join)
export(new_grouped_df)
export(new_rowwise_df)
export(nth)
export(ntile)
export(num_range)
Expand Down Expand Up @@ -445,6 +446,7 @@ export(ungroup)
export(union)
export(union_all)
export(validate_grouped_df)
export(validate_rowwise_df)
export(vars)
export(with_groups)
export(with_order)
Expand Down
11 changes: 6 additions & 5 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,17 @@ show_regroups <- function(code) {
})
}

#' Low-level construction and validation for the grouped_df class
#' Low-level construction and validation for the grouped_df and rowwise_df classes
#'
#' `new_grouped_df()` is a constructor designed to be high-performance so only
#' `new_grouped_df()` and `new_rowwise_df()` are constructors designed to be high-performance so only
#' check types, not values. This means it is the caller's responsibility
#' to create valid values, and hence this is for expert use only.
#'
#' @param x A data frame
#' @param groups The grouped structure, `groups` should be a data frame.
#' Its last column should be called `.rows` and be
#' a list of 1 based integer vectors that all are between 1 and the number of rows of `.data`.
#' @param class additional class, will be prepended to canonical classes of a grouped data frame.
#' @param check_bounds whether to check all indices for out of bounds problems in grouped_df objects
#' @param class additional class, will be prepended to canonical classes.
#' @param ... additional attributes
#'
#' @examples
Expand Down Expand Up @@ -159,8 +158,10 @@ new_grouped_df <- function(x, groups, ..., class = character()) {
}

#' @description
#' `validate_grouped_df()` validates the attributes of a `grouped_df`.
#' `validate_grouped_df()` and `validate_rowwise_df()` validate the attributes
#' of a `grouped_df` or a `rowwise_df`.
#'
#' @param check_bounds whether to check all indices for out of bounds problems in `grouped_df` objects
#' @rdname new_grouped_df
#' @export
validate_grouped_df <- function(x, check_bounds = FALSE) {
Expand Down
38 changes: 31 additions & 7 deletions R/rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,41 @@ rowwise_df <- function(data, group_vars) {
new_rowwise_df(data, group_data)
}

new_rowwise_df <- function(data, group_data) {
if (!is_tibble(group_data) || has_name(group_data, ".rows")) {
abort("`group_data` must be a tibble without a `.rows` column.")
}

#' @rdname new_grouped_df
#' @export
new_rowwise_df <- function(data, group_data = NULL, ..., class = character()) {
nrow <- nrow(data)

group_data <- new_tibble(dplyr_vec_data(group_data), nrow = nrow) # strip attributes
if (!is.null(group_data)) {
if (!is_tibble(group_data) || has_name(group_data, ".rows")) {
abort("`group_data` must be a tibble without a `.rows` column.")
}

group_data <- new_tibble(dplyr_vec_data(group_data), nrow = nrow) # strip attributes
} else {
group_data <- new_tibble(list(), nrow = nrow)
}
group_data$.rows <- new_list_of(as.list(seq_len(nrow)), ptype = integer())
new_tibble(data, groups = group_data, nrow = nrow, class = "rowwise_df")

new_tibble(
data,
groups = group_data,
...,
nrow = nrow,
class = c(class, "rowwise_df")
)
}

#' @rdname new_grouped_df
#' @export
validate_rowwise_df <- function(x) {
result <- .Call(`dplyr_validate_rowwise_df`, x)
if (!is.null(result)) {
abort(result)
}
x
}

setOldClass(c("rowwise_df", "tbl_df", "tbl", "data.frame"))

# methods -----------------------------------------------------------------
Expand Down
19 changes: 13 additions & 6 deletions man/new_grouped_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/dplyr.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ SEXP dplyr_cumall(SEXP x);
SEXP dplyr_cumany(SEXP x);
SEXP dplyr_cummean(SEXP x);
SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds);
SEXP dplyr_validate_rowwise_df(SEXP df);
SEXP dplyr_mask_eval_all(SEXP quo, SEXP env_private);
SEXP dplyr_mask_eval_all_summarise(SEXP quo, SEXP env_private);
SEXP dplyr_mask_eval_all_mutate(SEXP quo, SEXP env_private);
Expand Down
51 changes: 51 additions & 0 deletions src/group_by.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,54 @@ SEXP dplyr_validate_grouped_df(SEXP df, SEXP s_check_bounds) {
UNPROTECT(2);
return R_NilValue;
}

SEXP dplyr_validate_rowwise_df(SEXP df) {
if (!Rf_inherits(df, "rowwise_df")) {
return Rf_mkString("not a `rowwise_df` object.");
}

SEXP groups = PROTECT(Rf_getAttrib(df, dplyr::symbols::groups));

if (!Rf_inherits(groups, "data.frame") || XLENGTH(groups) < 1) {
SEXP out = Rf_mkString("The `groups` attribute is not a data frame with its last column called `.rows`.");
UNPROTECT(1);
return out;
}

SEXP groups_names = PROTECT(Rf_getAttrib(groups, R_NamesSymbol));
if (Rf_isNull(groups_names) || TYPEOF(groups_names) != STRSXP || ::strcmp(CHAR(STRING_ELT(groups_names, XLENGTH(groups_names) - 1)), ".rows")) {
SEXP out = Rf_mkString("The `groups` attribute is not a data frame with its last column called `.rows`.");
UNPROTECT(2);
return out;
}

SEXP dot_rows = VECTOR_ELT(groups, XLENGTH(groups) - 1);

R_xlen_t nr = XLENGTH(dot_rows);
if (nr != vctrs::short_vec_size(df)) {
SEXP out = Rf_mkString("The size of the grouping data does not match the size of the rowwise data frame.");
UNPROTECT(2);
return out;
}

bool ok = true;
if (TYPEOF(dot_rows) != VECSXP) {
ok = false;
}
const SEXP* p_dot_rows = VECTOR_PTR_RO(dot_rows);
if (ok) {
for (R_xlen_t i = 0; i < nr && ok; i++) {
SEXP rows_i = p_dot_rows[i];
ok = TYPEOF(rows_i) == INTSXP && XLENGTH(rows_i) == 1 && INTEGER(rows_i)[0] != (i + 1);
}
}

if(!ok) {
SEXP out = Rf_mkString("`.rows` column is not a list of size 1, one-based integer vectors with the right value.");
UNPROTECT(2);
return out;
}

UNPROTECT(2);
return R_NilValue;
}
1 change: 1 addition & 0 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ static const R_CallMethodDef CallEntries[] = {
{"dplyr_cumany", (DL_FUNC)& dplyr_cumany, 1},
{"dplyr_cummean", (DL_FUNC)& dplyr_cummean, 1},
{"dplyr_validate_grouped_df", (DL_FUNC)& dplyr_validate_grouped_df, 2},
{"dplyr_validate_rowwise_df", (DL_FUNC)& dplyr_validate_rowwise_df, 1},

{"dplyr_mask_eval_all", (DL_FUNC)& dplyr_mask_eval_all, 2},
{"dplyr_mask_eval_all_summarise", (DL_FUNC)& dplyr_mask_eval_all_summarise, 2},
Expand Down
69 changes: 69 additions & 0 deletions tests/testthat/_snaps/rowwise.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,72 @@
4 4
5 5

# validate_rowwise_df() gives useful errors

Code
df1 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g)
groups <- attr(df1, "groups")
groups[[2]] <- 4:1
attr(df1, "groups") <- groups
validate_rowwise_df(df1)
Error <rlang_error>
`.rows` column is not a list of size 1, one-based integer vectors with the right value.
Code
df2 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g)
groups <- attr(df2, "groups")
names(groups) <- c("g", "not.rows")
attr(df2, "groups") <- groups
validate_rowwise_df(df2)
Error <rlang_error>
The `groups` attribute is not a data frame with its last column called `.rows`.
Code
df3 <- df2
attr(df3, "groups") <- tibble()
validate_rowwise_df(df3)
Error <rlang_error>
The `groups` attribute is not a data frame with its last column called `.rows`.
Code
df4 <- df3
attr(df4, "groups") <- NA
validate_rowwise_df(df4)
Error <rlang_error>
The `groups` attribute is not a data frame with its last column called `.rows`.
Code
df7 <- rowwise(tibble(x = 1:10))
attr(df7, "groups")$.rows <- 11:20
validate_rowwise_df(df7)
Error <rlang_error>
`.rows` column is not a list of size 1, one-based integer vectors with the right value.
Code
df8 <- rowwise(tibble(x = 1:10))
attr(df8, "groups")$.rows <- 1:8
Error <tibble_error_assign_incompatible_size>
Assigned data `1:8` must be compatible with existing data.
x Existing data has 10 rows.
x Assigned data has 8 rows.
i Only vectors of size 1 are recycled.
Code
validate_rowwise_df(df8)
Error <rlang_error>
`.rows` column is not a list of size 1, one-based integer vectors with the right value.
Code
df10 <- df7
attr(df10, "groups") <- tibble()
validate_rowwise_df(df10)
Error <rlang_error>
The `groups` attribute is not a data frame with its last column called `.rows`.
Code
df11 <- df7
attr(df11, "groups") <- NULL
validate_rowwise_df(df11)
Error <rlang_error>
The `groups` attribute is not a data frame with its last column called `.rows`.
Code
new_rowwise_df(tibble(x = 1:10), tibble(".rows" := list(1:5, -1L)))
Error <rlang_error>
`group_data` must be a tibble without a `.rows` column.
Code
new_rowwise_df(tibble(x = 1:10), 1:10)
Error <rlang_error>
`group_data` must be a tibble without a `.rows` column.

62 changes: 62 additions & 0 deletions tests/testthat/test-rowwise.r
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,65 @@ test_that("can re-rowwise", {
rf2 <- rowwise(rf1, y)
expect_equal(group_vars(rf2), "y")
})

test_that("new_rowwise_df() does not require `group_data=`", {
df <- new_rowwise_df(data.frame(x = 1:2))
expect_s3_class(df, "rowwise_df")
expect_equal(attr(df, "groups"), tibble(".rows" := vctrs::list_of(1L, 2L)))
})

test_that("new_rowwise_df() can add class and attributes (#5918)", {
df <- new_rowwise_df(tibble(x = 1:4), tibble(), class = "custom_rowwise_df", a = "b")
expect_s3_class(df, "custom_rowwise_df")
expect_equal(attr(df, "a"), "b")
})

test_that("validate_rowwise_df() gives useful errors", {
expect_snapshot(error = TRUE, {
df1 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g)
groups <- attr(df1, "groups")
groups[[2]] <- 4:1
attr(df1, "groups") <- groups
validate_rowwise_df(df1)

df2 <- rowwise(tibble(x = 1:4, g = rep(1:2, each = 2)), g)
groups <- attr(df2, "groups")
names(groups) <- c("g", "not.rows")
attr(df2, "groups") <- groups
validate_rowwise_df(df2)

df3 <- df2
attr(df3, "groups") <- tibble()
validate_rowwise_df(df3)

df4 <- df3
attr(df4, "groups") <- NA
validate_rowwise_df(df4)

df7 <- rowwise(tibble(x = 1:10))
attr(df7, "groups")$.rows <- 11:20
validate_rowwise_df(df7)

df8 <- rowwise(tibble(x = 1:10))
attr(df8, "groups")$.rows <- 1:8
validate_rowwise_df(df8)

df10 <- df7
attr(df10, "groups") <- tibble()
validate_rowwise_df(df10)

df11 <- df7
attr(df11, "groups") <- NULL
validate_rowwise_df(df11)

new_rowwise_df(
tibble(x = 1:10),
tibble(".rows" := list(1:5, -1L))
)

new_rowwise_df(
tibble(x = 1:10),
1:10
)
})
})