forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
count.Rd
89 lines (76 loc) · 3.19 KB
/
count.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count-tally.R
\name{count}
\alias{count}
\alias{tally}
\alias{add_count}
\alias{add_tally}
\title{Count the observations in each group}
\usage{
count(x, ..., wt = NULL, sort = FALSE, name = NULL)
tally(x, wt = NULL, sort = FALSE, name = NULL)
add_count(x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = deprecated())
add_tally(x, wt = NULL, sort = FALSE, name = NULL)
}
\arguments{
\item{x}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr).}
\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group
by.}
\item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights.
Can be \code{NULL} or a variable:
\itemize{
\item If \code{NULL} (the default), counts the number of rows in each group.
\item If a variable, computes \code{sum(wt)} for each group.
}}
\item{sort}{If \code{TRUE}, will show the largest groups at the top.}
\item{name}{The name of the new column in the output.
If omitted, it will default to \code{n}. If there's already a column called \code{n},
it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use
\code{nnn}, and so on, adding \code{n}s until it gets a new name.}
\item{.drop}{For \code{count()}: if \code{FALSE} will include counts for empty groups
(i.e. for levels of factors that don't exist in the data). Deprecated in
\code{add_count()} since it didn't actually affect the output.}
}
\value{
An object of the same type as \code{.data}. \code{count()} and \code{add_count()}
group transiently, so the output has the same groups as the input.
}
\description{
\code{count()} lets you quickly count the unique values of one or more variables:
\code{df \%>\% count(a, b)} is roughly equivalent to
\code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}.
\code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent
to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts,
switching the summary from \code{n = n()} to \code{n = sum(wt)}.
\code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()}
but use \code{mutate()} instead of \code{summarise()} so that they add a new column
with group-wise counts.
}
\examples{
# count() is a convenient way to get a sense of the distribution of
# values in a dataset
starwars \%>\% count(species)
starwars \%>\% count(species, sort = TRUE)
starwars \%>\% count(sex, gender, sort = TRUE)
starwars \%>\% count(birth_decade = round(birth_year, -1))
# use the `wt` argument to perform a weighted count. This is useful
# when the data has already been aggregated once
df <- tribble(
~name, ~gender, ~runs,
"Max", "male", 10,
"Sandra", "female", 1,
"Susan", "female", 4
)
# counts rows:
df \%>\% count(gender)
# counts runs:
df \%>\% count(gender, wt = runs)
# tally() is a lower-level function that assumes you've done the grouping
starwars \%>\% tally()
starwars \%>\% group_by(species) \%>\% tally()
# both count() and tally() have add_ variants that work like
# mutate() instead of summarise
df \%>\% add_count(gender, wt = runs)
df \%>\% add_tally(wt = runs)
}