forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
group_by.Rd
114 lines (95 loc) · 3.36 KB
/
group_by.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/group-by.r
\name{group_by}
\alias{group_by}
\alias{ungroup}
\title{Group by one or more variables}
\usage{
group_by(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
ungroup(x, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}
\item{...}{In \code{group_by()}, variables or computations to group by.
In \code{ungroup()}, variables to remove from the grouping.}
\item{.add}{When \code{FALSE}, the default, \code{group_by()} will
override existing groups. To add to the existing groups, use
\code{.add = TRUE}.
This argument was previously called \code{add}, but that prevented
creating a new grouping variable called \code{add}, and conflicts with
our naming conventions.}
\item{.drop}{Drop groups formed by factor levels that don't appear in the
data? The default is \code{TRUE} except when \code{.data} has been previously
grouped with \code{.drop = FALSE}. See \code{\link[=group_by_drop_default]{group_by_drop_default()}} for details.}
\item{x}{A \code{\link[=tbl]{tbl()}}}
}
\value{
A grouped data frame with class \code{\link{grouped_df}},
unless the combination of \code{...} and \code{add} yields a empty set of
grouping columns, in which case a tibble will be returned.
}
\description{
Most data operations are done on groups defined by variables.
\code{group_by()} takes an existing tbl and converts it into a grouped tbl
where operations are performed "by group". \code{ungroup()} removes grouping.
}
\section{Methods}{
These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.
Methods available in currently loaded packages:
\itemize{
\item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}.
\item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}.
}
}
\examples{
by_cyl <- mtcars \%>\% group_by(cyl)
# grouping doesn't change how the data looks (apart from listing
# how it's grouped):
by_cyl
# It changes how it acts with the other dplyr verbs:
by_cyl \%>\% summarise(
disp = mean(disp),
hp = mean(hp)
)
by_cyl \%>\% filter(disp == max(disp))
# Each call to summarise() removes a layer of grouping
by_vs_am <- mtcars \%>\% group_by(vs, am)
by_vs <- by_vs_am \%>\% summarise(n = n())
by_vs
by_vs \%>\% summarise(n = sum(n))
# To removing grouping, use ungroup
by_vs \%>\%
ungroup() \%>\%
summarise(n = sum(n))
# You can group by expressions: this is just short-hand for
# a mutate() followed by a group_by()
mtcars \%>\% group_by(vsam = vs + am)
# By default, group_by() overrides existing grouping
by_cyl \%>\%
group_by(vs, am) \%>\%
group_vars()
# Use add = TRUE to instead append
by_cyl \%>\%
group_by(vs, am, .add = TRUE) \%>\%
group_vars()
# when factors are involved and .drop = FALSE, groups can be empty
tbl <- tibble(
x = 1:10,
y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c"))
)
tbl \%>\%
group_by(y, .drop = FALSE) \%>\%
group_rows()
}
\seealso{
Other grouping functions:
\code{\link{group_map}()},
\code{\link{group_nest}()},
\code{\link{group_split}()},
\code{\link{group_trim}()}
}
\concept{grouping functions}