forked from insightsengineering/tern
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prune_occurrences.R
240 lines (232 loc) · 8.4 KB
/
prune_occurrences.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#' Occurrence table pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#' functions positively, too. However, note that the result of [keep_rows()] says what
#' should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \donttest{
#' tab <- basic_table() %>%
#' split_cols_by("ARM") %>%
#' split_rows_by("RACE") %>%
#' split_rows_by("STRATA1") %>%
#' summarize_row_groups() %>%
#' analyze_vars("COUNTRY", .stats = "count_fraction") %>%
#' build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL
#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#' a row condition function. This removes all analysis rows (`TableRow`) that should be
#' pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#' children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#' analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#' to prune an `rtables` table.
#'
#' @examples
#' \donttest{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
checkmate::assert_function(row_condition)
function(table_tree) {
if (inherits(table_tree, "TableRow")) {
return(!row_condition(table_tree))
}
children <- tree_children(table_tree)
identical(length(children), 0L)
}
}
#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#' a condition for the (first) content row in leaf tables. This removes all leaf tables where
#' the first content row does not fulfill the condition. It does not check individual rows.
#' It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#' first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#' row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' \donttest{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
checkmate::assert_function(content_row_condition)
function(table_tree) {
if (is_leaf_table(table_tree)) {
content_row <- h_content_first_row(table_tree)
return(!content_row_condition(content_row))
}
if (inherits(table_tree, "DataRow")) {
return(FALSE)
}
children <- tree_children(table_tree)
identical(length(children), 0L)
}
}
#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
#' the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#' directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' \donttest{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @export
has_count_in_cols <- function(atleast, ...) {
checkmate::assert_count(atleast)
CombinationFunction(function(table_row) {
row_counts <- h_row_counts(table_row, ...)
total_count <- sum(row_counts)
total_count >= atleast
})
}
#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#' the specified columns satisfying a threshold.
#'
#' @param atleast (`numeric(1)`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#' specified columns with the threshold.
#'
#' @examples
#' \donttest{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
checkmate::assert_count(atleast)
CombinationFunction(function(table_row) {
row_counts <- h_row_counts(table_row, ...)
any(row_counts >= atleast)
})
}
#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#' the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#' specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
assert_proportion_value(atleast, include_boundaries = TRUE)
CombinationFunction(function(table_row) {
row_counts <- h_row_counts(table_row, ...)
total_count <- sum(row_counts)
col_counts <- h_col_counts(table_row, ...)
total_n <- sum(col_counts)
total_percent <- total_count / total_n
total_percent >= atleast
})
}
#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#' the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#' in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
assert_proportion_value(atleast, include_boundaries = TRUE)
CombinationFunction(function(table_row) {
row_fractions <- h_row_fractions(table_row, ...)
any(row_fractions >= atleast)
})
}
#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#' between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#' specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
assert_proportion_value(atleast, include_boundaries = TRUE)
CombinationFunction(function(table_row) {
fractions <- h_row_fractions(table_row, ...)
difference <- diff(range(fractions))
difference >= atleast
})
}
#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#' between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#' specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @export
has_counts_difference <- function(atleast, ...) {
checkmate::assert_count(atleast)
CombinationFunction(function(table_row) {
counts <- h_row_counts(table_row, ...)
difference <- diff(range(counts))
difference >= atleast
})
}