Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vctrs powered distinct() #4512

Merged
merged 32 commits into from
Jul 31, 2019
Merged
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b4be19c
experiment with vctrs
romainfrancois Jul 22, 2019
d61303c
initial impl of expand_groups()
romainfrancois Jul 22, 2019
bd681f8
move expand_groups C++ function to its own file
romainfrancois Jul 23, 2019
731527b
no longer need ListExpander
romainfrancois Jul 23, 2019
bc3aaa8
implementation of VectorExpander
romainfrancois Jul 23, 2019
377eeec
collecting new rows recursively in *Expander
romainfrancois Jul 23, 2019
24b231a
simplify impl of expand_groups, i.e. no use of boost::shared_ptr
romainfrancois Jul 23, 2019
dce8df6
support for add= in bunch_by()
romainfrancois Jul 25, 2019
0aba2c0
dealing with implicit NA in factors in bunch_by()
romainfrancois Jul 26, 2019
5988379
bunch_by() deals with empty factors
romainfrancois Jul 26, 2019
b2c43ff
bunch_by() warning about implicit NA in factors
romainfrancois Jul 26, 2019
2bb1611
bunch_by() returns ungrouped data when no grouping variable selected
romainfrancois Jul 26, 2019
3aac179
moving warning about implicit NA to the R side
romainfrancois Jul 26, 2019
78f5fad
bunch_by() handles list as grouping variables
romainfrancois Jul 27, 2019
e2d9c30
Reinject bunch_by() in existing function grouped_df()
romainfrancois Jul 29, 2019
66a17f9
use grouped_df() instead of grouped_df_impl()
romainfrancois Jul 29, 2019
a032b15
skipping some tests until some tibble fixes
romainfrancois Jul 29, 2019
568a3aa
- grouped_df_impl() c++ function
romainfrancois Jul 29, 2019
21124d2
using version 0.8.99.9000 in case we need to release a 0.8.4 before t…
romainfrancois Jul 30, 2019
1281049
R implementation of regroup()
romainfrancois Jul 30, 2019
edd459a
Trim old Slicer code that is no longer used because group_by() hashes…
romainfrancois Jul 30, 2019
8321217
Declare global variables (bc of %<-%).
romainfrancois Jul 30, 2019
39e63dd
using dev tibble
romainfrancois Jul 30, 2019
4c3bfcc
adapt to https://github.com/r-lib/vctrs/pull/515
romainfrancois Jul 31, 2019
5fbde9f
reverse order of remotes
romainfrancois Jul 31, 2019
3c56f69
no longer need ::: for vec_split_id()
romainfrancois Jul 31, 2019
e56febe
skip a test for now :shrug:
romainfrancois Jul 31, 2019
92682da
NEWS [ci skip]
romainfrancois Jul 31, 2019
f745d9c
vctrs:: based implementation of distinct()
romainfrancois Jul 31, 2019
33fac18
rm obsolete internal distinct_impl()
romainfrancois Jul 31, 2019
8a86445
vctrs based implementation of n_distinct()
romainfrancois Jul 31, 2019
7fbd389
Merge remote-tracking branch 'origin/dev_0_9_0' into vctrs_distinct
romainfrancois Jul 31, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
bunch_by() warning about implicit NA in factors
  • Loading branch information
romainfrancois committed Jul 26, 2019
commit b2c43ffaa391d4f40b4ac566f85858f41c2ccffe
29 changes: 18 additions & 11 deletions src/expand_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class ExpanderCollecter {
leaf_index(0),
vec_new_indices(nvars)
{
for(int i=0; i<nvars; i++) {
for (int i = 0; i < nvars; i++) {
new_indices[i] = Rf_allocVector(INTSXP, new_size);
vec_new_indices[i] = INTEGER(new_indices[i]);
}
Expand All @@ -51,12 +51,12 @@ class ExpanderCollecter {
new_rows[leaf_index++] = old_rows[start];
}

return ExpanderResult(leaf_index-1, leaf_index, index);
return ExpanderResult(leaf_index - 1, leaf_index, index);
}

ExpanderResult collect_node(int depth, int index, const std::vector<Expander*>& expanders) {
int n = expanders.size();
if(n == 0) {
if (n == 0) {
return ExpanderResult(NA_INTEGER, NA_INTEGER, index);
}

Expand Down Expand Up @@ -111,7 +111,7 @@ Expander* expander(const std::vector<SEXP>& data, const std::vector<int*>& posit

inline int expanders_size(const std::vector<Expander*> expanders) {
int n = 0;
for (int i=0; i<expanders.size(); i++) {
for (int i = 0; i < expanders.size(); i++) {
n += expanders[i]->size();
}
return n;
Expand Down Expand Up @@ -147,8 +147,8 @@ class FactorExpander : public Expander {
expanders.push_back(expander(data, positions, depth_ + 1, NA_INTEGER, j, end));
}
}
~FactorExpander(){
for(int i=expanders.size()-1; i>=0; i--) delete expanders[i];
~FactorExpander() {
for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i];
}

virtual int size() const {
Expand Down Expand Up @@ -180,17 +180,17 @@ class VectorExpander : public Expander {
} else {
int* vec_pos = positions_[depth_];

for(int j = start; j < end;) {
for (int j = start; j < end;) {
int current = vec_pos[j];
int start_idx = j;
while(j < end && vec_pos[++j] == current);
while (j < end && vec_pos[++j] == current);
expanders.push_back(expander(data_, positions_, depth_ + 1, current, start_idx, j));
}
}

}
~VectorExpander(){
for(int i=expanders.size()-1; i>=0; i--) delete expanders[i];
~VectorExpander() {
for (int i = expanders.size() - 1; i >= 0; i--) delete expanders[i];
}

virtual int size() const {
Expand All @@ -214,7 +214,7 @@ class LeafExpander : public Expander {
end(end_)
{}

~LeafExpander(){}
~LeafExpander() {}

virtual int size() const {
return 1;
Expand Down Expand Up @@ -251,6 +251,13 @@ Rcpp::List expand_groups(Rcpp::DataFrame old_groups, Rcpp::List positions, int n
for (int i = 0; i < nvars; i++) {
vec_data[i] = old_groups[i];
vec_positions[i] = INTEGER(VECTOR_ELT(positions, i));

if (Rf_isFactor(vec_data[i])) {
Rcpp::IntegerVector xi(vec_data[i]);
if (std::find(xi.begin(), xi.end(), NA_INTEGER) < xi.end()) {
Rcpp::warningcall(R_NilValue, tfm::format("Factor `%s` contains implicit NA, consider using `forcats::fct_explicit_na`", CHAR(STRING_ELT(names, i))));
}
}
}

Expander* exp = expander(vec_data, vec_positions, 0, NA_INTEGER, 0, nr);
Expand Down