forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rowwise.r
144 lines (127 loc) · 4.49 KB
/
rowwise.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#' Group input by rows
#'
#' @description
#' `rowwise()` allows you to compute on a data frame a row-at-a-time.
#' This is most useful when a vectorised function doesn't exist.
#'
#' Most dplyr verbs preserve row-wise grouping. The exception is [summarise()],
#' which return a [grouped_df]. You can explicitly ungroup with [ungroup()]
#' or [as_tibble()], or convert to a [grouped_df] with [group_by()].
#'
#' @section List-columns:
#' Because a rowwise has exactly one row per group it offers a small
#' convenience for working with list-columns. Normally, `summarise()` and
#' `mutate()` extract a groups worth of data with `[`. But when you index
#' a list in this way, you get back another list. When you're working with
#' a `rowwise` tibble, then dplyr will use `[[` instead of `[` to make your
#' life a little easier.
#'
#' @param data Input data frame.
#' @param ... <[`tidy-select`][dplyr_tidy_select]> Variables to be preserved
#' when calling [summarise()]. This is typically a set of variables whose
#' combination uniquely identify each row.
#'
#' **NB**: unlike `group_by()` you can not create new variables here but
#' instead you can select multiple variables with (e.g.) `everything()`.
#' @seealso [nest_by()] for a convenient way of creating rowwise data frames
#' with nested data.
#' @return A row-wise data frame with class `rowwise_df`. Note that a
#' `rowwise_df` is implicitly grouped by row, but is not a `grouped_df`.
#' @export
#' @examples
#' df <- tibble(x = runif(6), y = runif(6), z = runif(6))
#' # Compute the mean of x, y, z in each row
#' df %>% rowwise() %>% mutate(m = mean(c(x, y, z)))
#' # use c_across() to more easily select many variables
#' df %>% rowwise() %>% mutate(m = mean(c_across(x:z)))
#'
#' # Compute the minimum of x and y in each row
#' df %>% rowwise() %>% mutate(m = min(c(x, y, z)))
#' # In this case you can use an existing vectorised function:
#' df %>% mutate(m = pmin(x, y, z))
#' # Where these functions exist they'll be much faster than rowwise
#' # so be on the lookout for them.
#'
#' # rowwise() is also useful when doing simulations
#' params <- tribble(
#' ~sim, ~n, ~mean, ~sd,
#' 1, 1, 1, 1,
#' 2, 2, 2, 4,
#' 3, 3, -1, 2
#' )
#' # Here I supply variables to preserve after the summary
#' params %>%
#' rowwise(sim) %>%
#' summarise(z = rnorm(n, mean, sd))
#'
#' # If you want one row per simulation, put the results in a list()
#' params %>%
#' rowwise(sim) %>%
#' summarise(z = list(rnorm(n, mean, sd)))
rowwise <- function(data, ...) {
UseMethod("rowwise")
}
#' @export
rowwise.data.frame <- function(data, ...) {
vars <- tidyselect::eval_select(expr(c(...)), data)
rowwise_df(data, vars)
}
#' @export
rowwise.grouped_df <- function(data, ...) {
if (!missing(...)) {
abort(c(
"Can't re-group when creating rowwise data.",
i = "Either first `ungroup()` or call `rowwise()` without arguments."
))
}
rowwise_df(data, group_vars(data))
}
# Constructor + helper ----------------------------------------------------
rowwise_df <- function(data, group_vars) {
group_data <- as_tibble(data)[group_vars]
new_rowwise_df(data, group_data)
}
new_rowwise_df <- function(data, group_data) {
if (!is_tibble(group_data) || has_name(group_data, ".rows")) {
abort("`group_data` must be a tibble without a `.rows` column.")
}
nrow <- nrow(data)
group_data <- new_tibble(dplyr_vec_data(group_data), nrow = nrow) # strip attributes
group_data$.rows <- new_list_of(as.list(seq_len(nrow)), ptype = integer())
new_tibble(data, groups = group_data, nrow = nrow, class = "rowwise_df")
}
setOldClass(c("rowwise_df", "tbl_df", "tbl", "data.frame"))
# methods -----------------------------------------------------------------
#' @export
tbl_sum.rowwise_df <- function(x) {
c(
NextMethod(),
"Rowwise" = commas(group_vars(x))
)
}
#' @export
as_tibble.rowwise_df <- function(x, ...) {
new_tibble(dplyr_vec_data(x), nrow = nrow(x))
}
#' @importFrom tibble is_tibble
#' @export
`[.rowwise_df` <- function(x, i, j, drop = FALSE) {
out <- NextMethod()
if (!is.data.frame(out)) {
return(out)
}
group_vars <- intersect(names(out), group_vars(x))
rowwise_df(out, group_vars)
}
#' @export
`[<-.rowwise_df` <- function(x, i, j, ..., value) {
out <- NextMethod()
group_vars <- intersect(names(out), group_vars(x))
rowwise_df(out, group_vars)
}
#' @export
`names<-.rowwise_df` <- function(x, value) {
data <- NextMethod()
group_vars <- value[match(group_vars(x), names(x))]
rowwise_df(data, group_vars)
}