Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix](inverted index) fix wrong opt for count_on_index #41127 #41153

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[Fix](inverted index) fix wrong opt for count_on_index (#41127)
## Proposed changes

The pushdownCountOnIndex optimization rule in AggregateStrategies was
incorrectly applied to COUNT functions with complex child expressions,
such as COUNT(CASE WHEN ...). This led to incorrect query results.
  • Loading branch information
airborne12 committed Sep 23, 2024
commit 4dca57ec11260a395826391af09d892dbad5c3a4
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ public List<Rule> buildRules() {
.when(agg -> {
Set<AggregateFunction> funcs = agg.getAggregateFunctions();
return !funcs.isEmpty() && funcs.stream()
.allMatch(f -> f instanceof Count && !f.isDistinct());
.allMatch(f -> f instanceof Count && !f.isDistinct() && (((Count) f).isCountStar()
|| f.child(0) instanceof Slot));
})
.thenApply(ctx -> {
LogicalAggregate<LogicalFilter<LogicalOlapScan>> agg = ctx.root;
Expand All @@ -133,7 +134,8 @@ public List<Rule> buildRules() {
.when(agg -> agg.getGroupByExpressions().isEmpty())
.when(agg -> {
Set<AggregateFunction> funcs = agg.getAggregateFunctions();
return !funcs.isEmpty() && funcs.stream().allMatch(f -> f instanceof Count && !f.isDistinct());
return !funcs.isEmpty() && funcs.stream().allMatch(f -> f instanceof Count && !f.isDistinct()
&& (((Count) f).isCountStar() || f.child(0) instanceof Slot));
})
.thenApply(ctx -> {
LogicalAggregate<LogicalProject<LogicalFilter<LogicalOlapScan>>> agg = ctx.root;
Expand Down
100 changes: 100 additions & 0 deletions regression-test/data/inverted_index_p0/count-on-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{"a": "2024-03-25", "b": "ISZHfv2OQ4", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2023-02-20", "b": "0MgsB3vcIf", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-11-13", "b": "tczLqYSRhY", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-13", "b": "f86oV0P4s8", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-07-13", "b": "YHD5LihHpK", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-07-26", "b": "iSJxD3yKvH", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-12-25", "b": "aNiLbwF1vg", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-04-04", "b": "DGih88jW0H", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2024-03-30", "b": "kxqqsrD1RH", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-10-01", "b": "eySjxEnJvW", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-09-19", "b": "lHAJZpcky5", "c": "c2", "d": "d2", "e": "e1"}
{"a": "2023-11-25", "b": "PB4l4NsNAn", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2023-05-10", "b": "TP5M3xQDCj", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-28", "b": "b1D8xLX4DN", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-05-31", "b": "9gTsOoFITb", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-01-14", "b": "rsv96RalgR", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-08-31", "b": "Uz67DnurlH", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-04-05", "b": "kPlM5F56kj", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-09-18", "b": "wSGVCB6s3I", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-01-28", "b": "2ZpPp1y5G5", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-03-27", "b": "VwdMxBUnrc", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-03-17", "b": "QPV71OyuZ2", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-10", "b": "pyhphs1Mj4", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-01-26", "b": "wxRO18q0EY", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-11-04", "b": "vfVK2TsjTl", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-04-27", "b": "eS6vCuQAXP", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-02-25", "b": "6dx8DMPK9f", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-04-16", "b": "3aJhtwXa7E", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-07-08", "b": "Ue9xroXnHI", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-08-19", "b": "gVPYCdN2eY", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-02-24", "b": "qAaaKQpvd3", "c": "c2", "d": "d2", "e": "e1"}
{"a": "2024-01-10", "b": "XsNcGPnvvC", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-03-04", "b": "BD7CEdzIhP", "c": "c2", "d": "d1", "e": "e1"}
{"a": "2023-03-18", "b": "sXESaouuHE", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-06-28", "b": "DD3RE2pufi", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-04-07", "b": "RdEFKIz8QW", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2024-05-10", "b": "u3OhzAL2LH", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-07-20", "b": "U0n5EVKjPm", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-08-07", "b": "TXypE2ItVh", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-08-08", "b": "8g3hPyCB2B", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-01-05", "b": "aT6WlbObnZ", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-06-05", "b": "mVqMi8Rzfi", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-02-12", "b": "HxpmQ0draG", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-05-20", "b": "R5a7gA61KY", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-04-19", "b": "QacPa5V0Fj", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-07-29", "b": "dwT8GxkWDA", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-06-29", "b": "UCRkZWVEhK", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-02-22", "b": "yMSAdFkaq9", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-04-18", "b": "6Aa4VUyj7b", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-09-20", "b": "xet5tOBGLy", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-07-09", "b": "kyE5wM71uC", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-03-05", "b": "J9UtyRClVj", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2024-08-02", "b": "BnNajVStTq", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-03-20", "b": "YFyjBh6JeE", "c": "c1", "d": "d3", "e": "e1"}
{"a": "2024-01-25", "b": "kF462Dpave", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2023-11-20", "b": "uUsipxur13", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2024-08-06", "b": "g4i1sEGImS", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-02-09", "b": "NUz4tjvKt5", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2024-04-20", "b": "p72Gn18tWd", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2024-01-22", "b": "3DriXIqvSg", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-07-26", "b": "rorCsbghiO", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2023-10-26", "b": "XAWPiEQVmE", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2023-10-30", "b": "L3FWcbrzen", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-06-25", "b": "Lj6SZ26GJN", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-07-20", "b": "U6nYzFhfwM", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-06-26", "b": "J7jWtTmtZT", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-05-27", "b": "hDWYIRDHV4", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-05-23", "b": "kvjedf4zF8", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-02-06", "b": "RsPN2cGb2L", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-06-04", "b": "belvas0y6p", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2024-06-17", "b": "J6vYAcFuGZ", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-04-18", "b": "qHuHh0Y29i", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-20", "b": "1GS5UtXMdz", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-01-23", "b": "gnjJ4TZ6A6", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2023-02-10", "b": "LX6ddQvIX2", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-03-02", "b": "MkImkgiAfm", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-11-30", "b": "tGmu0DD8W0", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-02-01", "b": "NgzjCOPAku", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2023-01-03", "b": "5mWMiuYwbi", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-02-12", "b": "9324TZqLjh", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-06-24", "b": "7fAYU4XSma", "c": "c1", "d": "d3", "e": "e1"}
{"a": "2024-08-28", "b": "iNvBMy8AB8", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-06-08", "b": "nwJjFx21my", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2023-03-15", "b": "Oonv8eGNIF", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-04-22", "b": "6UYDEqQxxf", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2024-05-08", "b": "aVPqpTufJf", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-02", "b": "z1baLbjnTj", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-07-05", "b": "vMLMALySMJ", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-08-30", "b": "s7VZKlOG27", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-03-06", "b": "gWAuqLvHpJ", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2024-02-24", "b": "2IQL6pazn3", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-15", "b": "4YbxyhwaRF", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-02-28", "b": "wAOOLl8Kqj", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-05-31", "b": "FTQKB8rURb", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-02-20", "b": "knNNhnMXLN", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2024-07-01", "b": "cxA5xpl6NM", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-05-10", "b": "5FnmdQtOA0", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-21", "b": "ldggIaWfYF", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-01-05", "b": "7lwOfCQs5o", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-08-21", "b": "2zvnCMAkZG", "c": "c1", "d": "d1", "e": "e2"}
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@
-- !sql --
0

-- !sql_bad --
0 1

Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ suite("test_count_on_index_httplogs", "p0") {
"""
}

def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
def stream_load_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->

// load the json data
Expand Down Expand Up @@ -137,8 +137,8 @@ suite("test_count_on_index_httplogs", "p0") {
create_httplogs_dup_table.call(testTable_dup)
create_httplogs_unique_table.call(testTable_unique)

load_httplogs_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
stream_load_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
stream_load_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')

sql "sync"
sql """set experimental_enable_nereids_planner=true;"""
Expand Down Expand Up @@ -272,6 +272,41 @@ suite("test_count_on_index_httplogs", "p0") {
// case4: test compound query when inverted_index_query disable
qt_sql "SELECT COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)"
qt_sql "SELECT /*+SET_VAR(enable_inverted_index_query=false) */ COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)"

// case5: test complex count to testify bad case
def tableName5 = 'test_count_on_index_bad_case'
sql "DROP TABLE IF EXISTS ${tableName5}"
sql """
CREATE TABLE `${tableName5}` (
`a` DATE NOT NULL COMMENT '',
`b` VARCHAR(4096) NULL COMMENT '',
`c` VARCHAR(4096) NULL COMMENT '',
`d` VARCHAR(4096) NULL COMMENT '',
`e` VARCHAR(4096) NULL COMMENT '',
INDEX idx_a(`a`) USING INVERTED COMMENT '',
INDEX idx_e(`e`) USING INVERTED COMMENT ''
) ENGINE=OLAP
UNIQUE KEY(`a`, `b`)
COMMENT ''
DISTRIBUTED BY HASH(`a`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
stream_load_data.call(tableName5, 'test_count_on_index_bad_case', 'true', 'json', 'count-on-index.json')
def bad_sql = """
SELECT
COUNT(CASE WHEN c IN ('c1', 'c2', 'c3') AND d = 'd1' THEN b END) AS num1,
COUNT(CASE WHEN e = 'e1' AND c IN ('c1', 'c2', 'c3') THEN b END) AS num2
FROM ${tableName5}
WHERE a = '2024-07-26'
AND e = 'e1';
"""
explain {
sql("${bad_sql}")
contains "pushAggOp=NONE"
}
qt_sql_bad "${bad_sql}"
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}
Expand Down
Loading