should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,30 @@
# name: test/sql/optimizer/expression/test_casting_negative_integer_to_bit.test
# description: https://github.com/duckdb/duckdb/issues/13506
# group: [expression]
statement ok
CREATE TABLE t1 as select -1 c1 from range(1);
query I
SELECT t1.c1 FROM t1;
----
-1
query II
SELECT CAST(CAST(t1.c1 AS BIT) AS INTEGER), (1 BETWEEN -1 AND CAST(CAST(t1.c1 AS BIT) AS INTEGER)) FROM t1;
----
-1 false
# also fails
query II
select cast(cast(c1 as BIT) as INTEGER) as cast_res, 1 between -1 and cast(cast(c1 as BIT) as INTEGER) as watever from t1;
----
-1 false
query I
SELECT t1.c1 FROM t1 WHERE (1 BETWEEN -1 AND CAST(CAST(t1.c1 AS BIT) AS INTEGER));
----

View File

@@ -0,0 +1,72 @@
# name: test/sql/optimizer/expression/test_common_aggregate.test
# description: Test common aggregate
# group: [expression]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3)
statement ok
PRAGMA explain_output = 'OPTIMIZED_ONLY';
# all of these are identical, i.e. this gets folded into a single count aggregate
query II
EXPLAIN SELECT COUNT(*), COUNT(), COUNT(i) FROM integers;
----
logical_opt <!REGEX>:.*AGGREGATE.*count.*count.*
query III
SELECT COUNT(*), COUNT(), COUNT(i) FROM integers;
----
3 3 3
# again, here there is only a single count and a single sum
query II
EXPLAIN SELECT COUNT(*), COUNT(), SUM(i), COUNT(i), SUM(i) / COUNT(i) FROM integers;
----
logical_opt <!REGEX>:.*AGGREGATE.*count.*count.*
query IIIII
SELECT COUNT(*), COUNT(), SUM(i), COUNT(i), SUM(i) / COUNT(i) FROM integers;
----
3 3 6 3 2
# however, once we add a null value COUNT(i) is no longer equal to COUNT(*)
statement ok
INSERT INTO integers VALUES (NULL)
# now there are two counts!
query II
EXPLAIN SELECT COUNT(*), COUNT(), SUM(i), COUNT(i), SUM(i) / COUNT(i) FROM integers;
----
logical_opt <REGEX>:.*AGGREGATE.*count.*count.*
query IIIII
SELECT COUNT(*), COUNT(), SUM(i), COUNT(i), SUM(i) / COUNT(i) FROM integers;
----
4 4 6 3 2
statement ok
CREATE TABLE groups(grp INTEGER, aggr1 INTEGER, aggr2 INTEGER, aggr3 INTEGER)
statement ok
INSERT INTO groups VALUES (1, 1, 2, 3), (1, 2, 4, 6), (2, 1, 2, 3), (2, 3, 6, 9);
query III
SELECT
sum(aggr1)::DOUBLE / count(aggr1)::DOUBLE AS avg_qty,
sum(aggr2)::DOUBLE / count(aggr2)::DOUBLE AS avg_price,
sum(aggr3)::DOUBLE / count(aggr3)::DOUBLE AS avg_disc
FROM
groups
GROUP BY
grp
ORDER BY grp;
----
1.5 3 4.5
2 4 6

View File

@@ -0,0 +1,39 @@
# name: test/sql/optimizer/expression/test_comparison_simplification.test
# description: Test ComparisonSimplificationRule
# group: [expression]
statement ok
PRAGMA enable_verification
# VARCHAR => TIME is not invertible.
query I
WITH results AS (
SELECT '2023-08-17T23:00:08.539Z' as timestamp
)
SELECT *
FROM results
WHERE timestamp::TIME BETWEEN '22:00:00'::TIME AND '23:59:59'::TIME ;
----
2023-08-17T23:00:08.539Z
# CAST(TIMESTAMP AS TIME) can't use statistics
statement ok
CREATE TABLE issue8316 (dt TIMESTAMP);
statement ok
INSERT INTO issue8316 VALUES
('2016-02-14 18:00:05'),
('2016-02-15 10:04:25'),
('2016-02-16 10:04:25'),
('2016-02-16 23:59:55'),
;
query I
SELECT dt
FROM issue8316
WHERE CAST(dt as TIME) = CAST('10:04:25' as TIME)
ORDER BY 1
;
----
2016-02-15 10:04:25
2016-02-16 10:04:25

View File

@@ -0,0 +1,118 @@
# name: test/sql/optimizer/expression/test_conjunction_optimization.test
# description: Test conjunction statements that can be simplified
# group: [expression]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3), (NULL)
# test conjunctions in FILTER clause
query I
SELECT i FROM integers WHERE (i=1 AND i>0) OR (i=1 AND i<3) ORDER BY i
----
1
query I
SELECT i FROM integers WHERE (i=1) OR (i=1) ORDER BY i
----
1
query I
SELECT i FROM integers WHERE (i=1) OR (i=1) OR (i=1) OR (i=1) OR (i=1) ORDER BY i
----
1
query I
SELECT i FROM integers WHERE (i IS NULL AND i=1) OR (i IS NULL AND i<10) ORDER BY i
----
query I
SELECT i FROM integers WHERE (i IS NOT NULL AND i>1) OR (i IS NOT NULL AND i<10) ORDER BY i
----
1
2
3
query I
SELECT i FROM integers WHERE (i IS NULL AND (i+1) IS NULL) OR (i IS NULL AND (i+2) IS NULL) ORDER BY i
----
NULL
query I
SELECT i FROM integers WHERE i=1 OR 1=1 ORDER BY i
----
NULL
1
2
3
query I
SELECT i FROM integers WHERE i=1 OR 1=0 OR 1=1 ORDER BY i
----
NULL
1
2
3
query I
SELECT i FROM integers WHERE (i=1 OR 1=0 OR i=1) AND (0=1 OR 1=0 OR 1=1) ORDER BY i
----
1
# test conjunctions in SELECT clause
query T
SELECT (i=1 AND i>0) OR (i=1 AND i<3) FROM integers ORDER BY i
----
NULL
1
0
0
query T
SELECT (i=1) OR (i=1) FROM integers ORDER BY i
----
NULL
1
0
0
query T
SELECT (i=1) OR (i=1) OR (i=1) OR (i=1) OR (i=1) FROM integers ORDER BY i
----
NULL
1
0
0
query T
SELECT (i IS NULL AND i=1) OR (i IS NULL AND i<10) FROM integers ORDER BY i
----
NULL
0
0
0
query T
SELECT (i IS NOT NULL AND i>1) OR (i IS NOT NULL AND i<10) FROM integers ORDER BY i
----
0
1
1
1
query T
SELECT (i IS NULL AND (i+1) IS NULL) OR (i IS NULL AND (i+2) IS NULL) FROM integers ORDER BY i
----
1
0
0
0

View File

@@ -0,0 +1,81 @@
# name: test/sql/optimizer/expression/test_cse.test
# description: Test queries involving Common SubExpressions
# group: [expression]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
create table test(a integer);
statement ok
insert into test values (42);
# single CSE
query I
SELECT (a*2)+(a*2) FROM test
----
168
# multiple CSEs
query I
SELECT (a*2)+(a*2)+(a*2)+(a*2)+(a*2) FROM test
----
420
# use the actual columns still
query II
SELECT (a*2)+(a*2)+(a*2)+(a*2)+(a*2), a FROM test
----
420 42
# CSE in aggregates
query I
SELECT SUM((a*2)+(a*2)+(a*2)+(a*2)+(a*2)) FROM test
----
420
# also with group by clause
query II
SELECT a, SUM((a*2)+(a*2)+(a*2)+(a*2)+(a*2)) FROM test GROUP BY a
----
42 420
# CSE in WHERE clause
query I
SELECT * FROM test WHERE ((a*2)+(a*2))>100
----
42
# multiple CSE in WHERE clause
query I
SELECT * FROM test WHERE ((a*2)+(a*2)+(a*2)+(a*2)+(a*2))>400
----
42
# Strings and NULL values
statement ok
create table test2(a VARCHAR);
statement ok
insert into test2 values ('hello'), ('world'), (NULL);
# single CSE in projection
query T
SELECT substring(a, 1, 3)=substring(a, 1, 3) FROM test2 ORDER BY 1
----
NULL
1
1
# now with GROUP BY clause
query T
SELECT substring(a, 1, 3)=substring(a, 1, 3) AS b FROM test2 GROUP BY b ORDER BY b
----
NULL
1

View File

@@ -0,0 +1,20 @@
# name: test/sql/optimizer/expression/test_date_subtract_filter.test
# description: Issue #9863 - query involving a subtraction between dates and a comparison
# group: [expression]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE dates(lo_commitdate DATE);
statement ok
INSERT INTO dates VALUES (DATE '1992-02-10');
query I
SELECT CAST('2020-02-20' AS date) - CAST(min("ta_1"."lo_commitdate") AS date) AS "ca_1"
FROM dates AS "ta_1"
HAVING CAST('2020-02-20' AS date) - CAST(min("ta_1"."lo_commitdate") AS date) > 4
ORDER BY "ca_1" ASC;
----
10237

View File

@@ -0,0 +1,118 @@
# name: test/sql/optimizer/expression/test_equal_or_null_optimization.test
# description: Test a=b OR (a IS NULL AND b IS NULL) to a IS NOT DISTINCT FROM b
# group: [expression]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA explain_output = 'PHYSICAL_ONLY'
statement ok
CREATE TABLE test(i INTEGER, j INTEGER, k integer);
statement ok
INSERT INTO test VALUES (1,1,3), (2,2,4), (NULL,NULL,NULL);
query I rowsort
SELECT i FROM test WHERE (i=j) OR (i IS NULL AND j IS NULL);
----
1
2
NULL
query II nosort distinctrewrite1
EXPLAIN SELECT (i=j) OR (i IS NULL AND j IS NULL) FROM test;
----
physical_plan <REGEX>:.*DISTINCT.*
query I rowsort
SELECT i FROM test WHERE i IS NOT DISTINCT FROM j;
----
1
2
NULL
query II nosort distinctrewrite1
EXPLAIN SELECT i IS NOT DISTINCT FROM j FROM test;
----
physical_plan <REGEX>:.*DISTINCT.*
query I rowsort
SELECT i FROM test WHERE (i IS NULL AND j IS NULL) OR (i=j);
----
1
2
NULL
query II nosort distinctrewrite1
EXPLAIN SELECT (i IS NULL AND j IS NULL) OR (i=j) FROM test;
----
physical_plan <REGEX>:.*DISTINCT.*
# do a hash join
query I rowsort
SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.j) OR (test1.i IS NULL AND test2.j IS NULL) ORDER BY 1;
----
1
2
NULL
query II nosort distinctrewrite2
EXPLAIN SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.j) OR (test1.i IS NULL AND test2.j IS NULL);
----
physical_plan <REGEX>:.*DISTINCT.*
query II nosort distinctrewrite2
EXPLAIN SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i IS NULL AND test2.j IS NULL) OR (test1.i=test2.j);
----
physical_plan <REGEX>:.*DISTINCT.*
# this one cannot be rewritten
query I nosort
SELECT i FROM test WHERE (i=k) OR (i IS NULL AND j IS NULL);
----
NULL
query II nosort nodistinctrewrite1
EXPLAIN SELECT i FROM test WHERE (i=k) OR (i IS NULL AND j IS NULL);
----
physical_plan <REGEX>:.*OR.*
# neither this one
query I nosort
SELECT i FROM test WHERE (i=j) OR (i IS NULL AND j = 1);
----
1
2
query II nosort nodistinctrewrite2
EXPLAIN SELECT i FROM test WHERE (i=j) OR (i IS NULL AND j = 1);
----
physical_plan <REGEX>:.*OR.*
# do a nested loop join because the OR cannot be rewritten here
query I rowsort
SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.j) OR (test2.i IS NULL AND test1.j IS NULL);
----
1
2
NULL
query II nosort distinctrewrite3
EXPLAIN SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.j) OR (test2.i IS NULL AND test1.j IS NULL);
----
physical_plan <REGEX>:.*OR.*
# same issue as the previous one
query I rowsort
SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.k) OR (test2.i IS NULL AND test2.j IS NULL);
----
1
2
NULL
query II nosort distinctrewrite4
EXPLAIN SELECT test1.i FROM test AS test1, test AS test2 WHERE (test1.i=test2.k) OR (test2.i IS NULL AND test2.j IS NULL);
----
physical_plan <REGEX>:.*OR.*

View File

@@ -0,0 +1,29 @@
# name: test/sql/optimizer/expression/test_in_clause_simplification.test
# description: Test InClauseSimplificationRule
# group: [expression]
statement ok
PRAGMA enable_verification
# Removing the TIMESTAMP => DATE cast only works if the
# IN clause has TIMESTAMPs.
statement ok
CREATE TABLE issue13380(c0 TIMESTAMP);
statement ok
INSERT INTO issue13380(c0) VALUES ('2024-08-09 14:48:00');
query I
SELECT c0::DATE IN ('2024-08-09') d FROM issue13380;
----
true
query I
SELECT NOT (c0::DATE IN ('2024-08-09')) FROM issue13380;
----
false
query I
SELECT c0::DATE NOT IN ('2024-08-09') FROM issue13380;
----
false

View File

@@ -0,0 +1,25 @@
# name: test/sql/optimizer/expression/test_indistinct_aggregates.test
# description: Test DISTINCT-insensitive aggregate rules
# group: [expression]
statement ok
PRAGMA enable_verification
query I
SELECT max(distinct x) from range(10) tbl(x);
----
9
query II
SELECT x, max(distinct x) over (order by x desc) from range(10) tbl(x);
----
9 9
8 9
7 9
6 9
5 9
4 9
3 9
2 9
1 9
0 9

View File

@@ -0,0 +1,175 @@
# name: test/sql/optimizer/expression/test_move_constants.test
# description: Test queries involving constants that can be moved
# group: [expression]
statement ok
PRAGMA enable_verification
# move constants with signed value extremes
# tinyint
foreach type,min,max tinyint,-128,127 smallint,-32768,32767 integer,-2147483648,2147483647 bigint,-9223372036854775808,9223372036854775807 hugeint,-170141183460469231731687303715884105728,170141183460469231731687303715884105727
statement ok
CREATE OR REPLACE TABLE vals(v ${type});
statement ok
INSERT INTO vals VALUES (2), (NULL);
# +
query I
SELECT * FROM vals WHERE v+(${max}-10)::${type}=-100::${type};
----
query I
SELECT v+(${max}-10)::${type}=-100::${type} FROM vals;
----
false
NULL
# -
query I
SELECT * FROM vals WHERE v-${max}::${type}=${max}::${type};
----
query I
SELECT * FROM vals WHERE (${min}+100)::${type}-v=${max}::${type};
----
# *
# multiply by 0
query I
SELECT * FROM vals WHERE v*0::${type}=1::${type};
----
query I
SELECT * FROM vals WHERE v*0::${type}=0::${type};
----
2
query I
SELECT v*0::${type}=0::${type} FROM vals;
----
true
NULL
query I
SELECT v*0::${type}=1::${type} FROM vals;
----
false
NULL
# negative numbers
query I
SELECT * FROM vals WHERE v*(-1)::${type}=(${min})::${type};
----
query I
SELECT * FROM vals WHERE v*(-2)::${type}=(${min})::${type};
----
endloop
# move constants with unsigned values
foreach utype utinyint usmallint uinteger ubigint
statement ok
CREATE OR REPLACE TABLE vals(v ${utype});
statement ok
INSERT INTO vals VALUES (2), (NULL);
# +
query I
SELECT * FROM vals WHERE v+5::${utype}=3::${utype};
----
query I
SELECT v+5::${utype}=3::${utype} FROM vals;
----
false
NULL
query I
SELECT * FROM vals WHERE 5::${utype}+v=3::${utype};
----
query I
SELECT * FROM vals WHERE v+1::${utype}=3::${utype};
----
2
query I
SELECT * FROM vals WHERE 1::${utype}+v=3::${utype};
----
2
# -
query I
SELECT * FROM vals WHERE v-2::${utype}=255::${utype};
----
query I
SELECT * FROM vals WHERE 2::${utype}-v=3::${utype};
----
query I
SELECT * FROM vals WHERE v-1::${utype}=1::${utype};
----
2
query I
SELECT * FROM vals WHERE 4::${utype}-v=2::${utype};
----
2
# *
# multiply by 0
query I
SELECT * FROM vals WHERE v*0::${utype}=1::${utype};
----
query I
SELECT * FROM vals WHERE v*0::${utype}=0::${utype};
----
2
query I
SELECT v*0::${utype}=0::${utype} FROM vals;
----
true
NULL
query I
SELECT v*0::${utype}=1::${utype} FROM vals;
----
false
NULL
# rhs value is not cleanly divisible
# this is always false, since there is no integer value for which x*3=7
query I
SELECT * FROM vals WHERE v*3::${utype}=7::${utype};
----
query I
SELECT v*3::${utype}=7::${utype} FROM vals;
----
false
NULL
query I
SELECT * FROM vals WHERE v*3::${utype}=6::${utype};
----
2
query I
SELECT v*3::${utype}=6::${utype} FROM vals;
----
true
NULL
statement ok
DROP TABLE vals
endloop

View File

@@ -0,0 +1,33 @@
# name: test/sql/optimizer/expression/test_negation_limits.test
# description: Test negation limits
# group: [expression]
statement ok
PRAGMA enable_verification
# tinyint
foreach type,min,max tinyint,-128,127 smallint,-32768,32767 integer,-2147483648,2147483647 bigint,-9223372036854775808,9223372036854775807
statement ok
CREATE OR REPLACE TABLE vals(id INTEGER, v ${type});
statement ok
INSERT INTO vals VALUES (0, ${min}), (1, ${max}), (2, 0), (3, -2), (4, 2);
# cannot negate the min value
statement error
SELECT -v FROM vals;
----
# we can negate the maximum value
statement ok
SELECT -v FROM vals WHERE id>0
query I
SELECT -v FROM vals WHERE id>1 ORDER BY id
----
0
2
-2
endloop

View File

@@ -0,0 +1,128 @@
# name: test/sql/optimizer/expression/test_nop_arithmetic.test
# description: Test NOP arithmetic expressions
# group: [expression]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE test (a INTEGER, b INTEGER)
statement ok
INSERT INTO test VALUES (42, 10), (43, 100);
# a + 0
query I
SELECT a + 0 FROM test
----
42
43
# 0 + a
query I
SELECT 0 + a FROM test
----
42
43
# a - 0
query I
SELECT a - 0 FROM test
----
42
43
# 0 - a
query I
SELECT 0 - a FROM test
----
-42
-43
# a * 1
query I
SELECT a * 1 FROM test
----
42
43
# 1 * a
query I
SELECT 1 * a FROM test
----
42
43
# a * 0 => 0
query I
SELECT a * 0 FROM test
----
0
0
query I
SELECT 0 * a FROM test
----
0
0
# a / 1
query I
SELECT a / 1 FROM test
----
42
43
# 1 / a
query I
SELECT 1 // a FROM test
----
0
0
# a / 0 => NULL
query I
SELECT a // 0 FROM test
----
NULL
NULL
# 0 / a => 0
query I
SELECT 0 // a FROM test
----
0
0
# test expressions involving NULL as well
statement ok
UPDATE test SET a=NULL
# NULL * 0 = NULL
query I
SELECT a * 0 FROM test
----
NULL
NULL
query I
SELECT rowid * 0 FROM test
----
0
0
# 0 / NULL = NULL
query I
SELECT 0 / a FROM test
----
NULL
NULL
statement ok
SET ieee_floating_point_ops=false;
query I
SELECT 0 / rowid FROM test
----
NULL
0

View File

@@ -0,0 +1,50 @@
# name: test/sql/optimizer/expression/test_timestamp_offset.test
# description: Test pushdown of varchars converted to timestamps
# group: [expression]
statement ok
PRAGMA enable_verification
statement ok
create or replace table table1 (
timestamp_str varchar
);
statement ok
insert into table1 values ('2024-05-03 01:00:00'), ('2024-05-03 01:00:02');
query II
select timestamp_str, cast(timestamp_str as timestamp)
from table1
where cast(timestamp_str as timestamp) > cast('2024-05-03 01:00:00' as timestamp);
----
2024-05-03 01:00:02 2024-05-03 01:00:02
statement ok
truncate table table1;
statement ok
insert into table1 values ('2024-05-03T01:00:00+00:00'), ('2024-05-03T01:00:02+00:00');
query II
select timestamp_str, cast(timestamp_str as timestamp)
from table1
where cast(timestamp_str as timestamp) > cast('2024-05-03 01:00:00' as timestamp);
----
2024-05-03T01:00:02+00:00 2024-05-03 01:00:02
query II
select timestamp_str, cast(timestamp_str as timestamp)
from table1
where cast(timestamp_str as timestamp) > cast('2024-05-03T01:00:00+00:00' as timestamp);
----
2024-05-03T01:00:02+00:00 2024-05-03 01:00:02
query II
select * from (
select timestamp_str, cast(timestamp_str as timestamp) as timestamp_column
from table1
)
where timestamp_column > cast('2024-05-03 01:00:00' as timestamp);
----
2024-05-03T01:00:02+00:00 2024-05-03 01:00:02

View File

@@ -0,0 +1,110 @@
# name: test/sql/optimizer/plan/plan_struct_projection_pushdown.test
# description: Test struct projection pushdown
# group: [plan]
require parquet
statement ok
CREATE TABLE struct_pushdown_test(id INT, struct_col STRUCT(sub_col1 integer, sub_col2 bool));
statement ok
INSERT INTO struct_pushdown_test VALUES (1, {'sub_col1': 42, 'sub_col2': true}), (2, NULL), (3, {'sub_col1': 84, 'sub_col2': NULL}), (4, {'sub_col1': NULL, 'sub_col2': false});
statement ok
COPY struct_pushdown_test TO '__TEST_DIR__/struct_pushdown_test.parquet'
statement ok
PRAGMA explain_output = 'PHYSICAL_ONLY';
foreach source struct_pushdown_test read_parquet('__TEST_DIR__/struct_pushdown_test.parquet')
# verify we are only selecting sub_col2
query II
EXPLAIN SELECT struct_col.sub_col2 FROM ${source};
----
physical_plan <REGEX>:.*struct_col.sub_col2.*
# verify we are only selecting sub_col1
query II
EXPLAIN SELECT struct_col.sub_col1 FROM ${source};
----
physical_plan <REGEX>:.*struct_col.sub_col1.*
query II
EXPLAIN SELECT struct_col.sub_col1, struct_col.sub_col2 FROM ${source};
----
physical_plan <REGEX>:.*struct_col.sub_col1.*struct_col.sub_col2.*
# here we need to select the entire column
query II
EXPLAIN SELECT struct_col.sub_col1, struct_col FROM ${source};
----
physical_plan <REGEX>:.*struct_col .*
endloop
# do the same with 2-nested structs
statement ok
CREATE TABLE nested_struct_pushdown_test(id INT, struct_col STRUCT(name STRUCT(v VARCHAR, id INT), nested_struct STRUCT(a integer, b bool)));
statement ok
INSERT INTO nested_struct_pushdown_test
VALUES (1, {'name': {'v': 'Row 1', 'id': 1}, 'nested_struct': {'a': 42, 'b': true}}),
(2, NULL),
(3, {'name': {'v': 'Row 3', 'id': 3}, 'nested_struct': {'a': 84, 'b': NULL}}),
(4, {'name': NULL, 'nested_struct': {'a': NULL, 'b': false}});
statement ok
COPY nested_struct_pushdown_test TO '__TEST_DIR__/nested_struct_pushdown_test.parquet'
foreach source nested_struct_pushdown_test read_parquet('__TEST_DIR__/nested_struct_pushdown_test.parquet')
query II
EXPLAIN SELECT struct_col.name.id FROM ${source};
----
physical_plan <REGEX>:.*struct_col.name.id.*
query II
EXPLAIN SELECT struct_col.name.id, struct_col.name FROM ${source};
----
physical_plan <REGEX>:.*struct_col.name .*
query II
EXPLAIN SELECT struct_col.name.id, struct_col FROM ${source};
----
physical_plan <REGEX>:.*struct_col .*
endloop
# 3 layers of nesting
statement ok
CREATE OR REPLACE TABLE nested_struct_pushdown_test(id INT, struct_col STRUCT(s STRUCT(name STRUCT(v VARCHAR, id INT), nested_struct STRUCT(a integer, b bool))));
statement ok
INSERT INTO nested_struct_pushdown_test
VALUES (1, {'s': {'name': {'v': 'Row 1', 'id': 1}, 'nested_struct': {'a': 42, 'b': true}}}),
(2, NULL),
(3, {'s': {'name': {'v': 'Row 3', 'id': 3}, 'nested_struct': {'a': 84, 'b': NULL}}}),
(4, {'s': {'name': NULL, 'nested_struct': {'a': NULL, 'b': false}}});
statement ok
COPY nested_struct_pushdown_test TO '__TEST_DIR__/nested_struct_pushdown_test.parquet'
foreach source nested_struct_pushdown_test read_parquet('__TEST_DIR__/nested_struct_pushdown_test.parquet')
query II
EXPLAIN SELECT struct_col.s.name.id FROM ${source};
----
physical_plan <REGEX>:.*struct_col.s.name.id.*
query II
EXPLAIN SELECT struct_col.s.name.id, struct_col.s.name FROM ${source};
----
physical_plan <REGEX>:.*struct_col.s.name .*
query II
EXPLAIN SELECT struct_col.s.name.id, struct_col FROM ${source};
----
physical_plan <REGEX>:.*struct_col .*
endloop

View File

@@ -0,0 +1,52 @@
# name: test/sql/optimizer/plan/table_filter_pushdown_large_strings.test_slow
# description: Test Table Filter Push Down Scan String
# group: [plan]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings AS select i FROM (VALUES('pedro'), ('peter'), ('mark')) t1(i), range(0, 100000) t2(j) order by j
query I
SELECT count(i) FROM strings where i = 'pedro'
----
100000
query I
SELECT count(i) FROM strings where i = 'peter'
----
100000
query I
SELECT count(i) FROM strings where i = 'mark'
----
100000
query I
SELECT count(i) FROM strings where i = 'diego'
----
0
statement ok
INSERT INTO strings VALUES('po')
statement ok
INSERT INTO strings VALUES('stefan manegold')
statement ok
INSERT INTO strings VALUES('tim k')
statement ok
INSERT INTO strings VALUES('tim k')
statement ok
update strings set i = 'zorro' where i = 'pedro'
query I
SELECT count(i) FROM strings where i >= 'tim k'
----
100002

View File

@@ -0,0 +1,14 @@
# name: test/sql/optimizer/plan/test_disable_build_side_probe_side.test
# description: Test that disabling BuildProbeSideOptimizer does not swap RIGHT joins to LEFT
# group: [plan]
statement ok
pragma explain_output='optimized_only'
statement ok
set disabled_optimizers to 'build_side_probe_side';
query II
explain from range(10) r1 right join range(10) r2 using (range)
----
logical_opt <!REGEX>:.*LEFT.*

View File

@@ -0,0 +1,233 @@
# name: test/sql/optimizer/plan/test_filter_pushdown.test
# description: Test filter pushdown
# group: [plan]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER)
statement ok
INSERT INTO integers VALUES (1), (2), (3), (NULL)
# test filter pushdown into cross product
# single filter that matches both sides
query II
SELECT * FROM integers i1, integers i2 WHERE i1.i=i2.i ORDER BY 1
----
1 1
2 2
3 3
# add filter that matches left side
query II
SELECT * FROM integers i1, integers i2 WHERE i1.i=i2.i AND i1.i>1 ORDER BY 1
----
2 2
3 3
# three cross products
query III
SELECT * FROM integers i1, integers i2, integers i3 WHERE i1.i=i2.i AND i1.i=i3.i AND i1.i>1 ORDER BY 1
----
2 2 2
3 3 3
# inner join
query II
SELECT * FROM integers i1 JOIN integers i2 ON i1.i=i2.i WHERE i1.i>1 ORDER BY 1
----
2 2
3 3
# left outer join
# condition on LHS
query II
SELECT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=1 WHERE i1.i>2 ORDER BY 2
----
3 NULL
3 1
3 2
3 3
# condition on RHS that eliminates NULL values
query II
SELECT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=0 WHERE i2.i IS NOT NULL ORDER BY 2
----
# more complicated conditions on RHS that eliminates NULL values
query II
SELECT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=0 WHERE i2.i>1 ORDER BY 2
----
query II
SELECT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=0 WHERE CASE WHEN i2.i IS NULL THEN False ELSE True END ORDER BY 2
----
# conditions on RHS that does not eliminate NULL values
query II
SELECT DISTINCT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=0 WHERE i2.i IS NULL ORDER BY 1
----
NULL NULL
1 NULL
2 NULL
3 NULL
# conditions on both sides that guarantees to eliminate null values from RHS
query II
SELECT * FROM integers i1 LEFT OUTER JOIN integers i2 ON 1=1 WHERE i1.i=i2.i ORDER BY 1
----
1 1
2 2
3 3
# MARK join
# transform into semi join
query I
SELECT * FROM integers WHERE i IN ((SELECT * FROM integers)) ORDER BY i
----
1
2
3
# transform into ANTI join
query I
SELECT * FROM integers WHERE i NOT IN ((SELECT * FROM integers WHERE i=1)) ORDER BY i
----
2
3
# condition pushdown
query I
SELECT * FROM integers WHERE i IN ((SELECT * FROM integers)) AND i<3 ORDER BY i
----
1
2
query II
SELECT * FROM integers i1, integers i2 WHERE i1.i IN ((SELECT * FROM integers)) AND i1.i=i2.i ORDER BY 1
----
1 1
2 2
3 3
# DELIM join
# correlated exists: turn into semi join
query I
SELECT * FROM integers i1 WHERE EXISTS(SELECT i FROM integers WHERE i=i1.i) ORDER BY i1.i
----
1
2
3
# correlated not exists: turn into anti join
query I
SELECT * FROM integers i1 WHERE NOT EXISTS(SELECT i FROM integers WHERE i=i1.i) ORDER BY i1.i
----
NULL
# push condition down delim join
query II
SELECT * FROM integers i1, integers i2 WHERE i1.i=(SELECT i FROM integers WHERE i1.i=i) AND i1.i=i2.i ORDER BY i1.i
----
1 1
2 2
3 3
# test filter pushdown into subquery
query II
SELECT * FROM (SELECT i1.i AS a, i2.i AS b FROM integers i1, integers i2) a1 WHERE a=b ORDER BY 1
----
1 1
2 2
3 3
# filter pushdown on subquery with more complicated expression
query T
SELECT * FROM (SELECT i1.i=i2.i AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1
----
1
1
1
# filter pushdown into distinct in subquery
query II
SELECT * FROM (SELECT DISTINCT i1.i AS a, i2.i AS b FROM integers i1, integers i2) res WHERE a=1 AND b=3;
----
1 3
# filter pushdown into union in subquery
query I
SELECT * FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHERE i=3;
----
3
# filter pushdown on subquery with window function (cannot be done because it will mess up the ordering)
query III
SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1
----
1 1 1
2 2 5
3 3 9
# condition on scalar projection
query T
SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1
----
# condition on scalar grouping
query T
SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1
----
# Disable IN generation with inequalities
statement ok
CREATE TABLE cohort (
person_id INTEGER,
cohort_start_date DATE,
cohort_end_date DATE
);
statement ok
INSERT INTO cohort (person_id, cohort_start_date, cohort_end_date) VALUES
(1, DATE '2021-01-01', DATE '2021-02-15'),
(1, DATE '2021-01-01', NULL);
statement ok
CREATE TABLE obs (
person_id INTEGER,
observation_period_start_date DATE
);
statement ok
INSERT INTO obs (person_id, observation_period_start_date) VALUES
(1, DATE '2010-01-01'),
(2, DATE '2010-01-01');
query IIII
SELECT q01.*
FROM (
SELECT LHS.*, observation_period_start_date
FROM (
SELECT q01.*
FROM (
SELECT
person_id,
cohort_start_date,
COALESCE(cohort_end_date, cohort_start_date) AS cohort_end_date
FROM cohort
) q01
WHERE (cohort_start_date <= cohort_end_date)
) LHS
INNER JOIN obs
ON (LHS.person_id = obs.person_id)
) q01
WHERE (cohort_end_date >= observation_period_start_date)
ORDER BY ALL;
----
1 2021-01-01 2021-01-01 2010-01-01
1 2021-01-01 2021-02-15 2010-01-01

View File

@@ -0,0 +1,52 @@
# name: test/sql/optimizer/plan/test_filter_pushdown_advanced.test_slow
# description: Test filter pushdown with more advanced expressions
# group: [plan]
# in this test we run queries that will take a long time without filter pushdown, but are almost instant with
# proper filter pushdown we create two tables with 10K elements each in most tests we cross product them together
# in some way to create a "big table" (100M entries) but the filter can be pushed past the cross product in all
# cases
statement ok
CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 10000, 1) t1(i)
statement ok
CREATE TABLE vals2(k INTEGER, l INTEGER)
statement ok
INSERT INTO vals2 SELECT * FROM vals1
# x + 1 = 5001
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i+1=5001 AND tbl1.i<>5000;
----
0
# x - 1 = 4999
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i-1=4999 AND tbl1.i<>5000;
----
0
# x * 2 = 10000
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i*2=10000 AND tbl1.i<>5000;
----
0
# x * 2 = 9999 should always return false (as 9999 % 2 != 0, it's not cleanly divisible)
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i*2=9999;
# 0
# x / 2 = 2500
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i/2=2500 AND tbl1.i<>5000;
# 0
# -x=-5000
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE -tbl1.i=-5000 AND tbl1.i<>5000;
----
0
# x + (1 + 1) = 5002
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i+(1+1)=5002 AND tbl1.i<>5000;
# 0

View File

@@ -0,0 +1,78 @@
# name: test/sql/optimizer/plan/test_filter_pushdown_duplicate.test
# description: Test moving/duplicating conditions
# group: [plan]
# in this test we run queries that will take a long time without filter pushdown, but are almost instant with
# proper filter pushdown we create two tables with 10K elements each in most tests we cross product them together
# in some way to create a "big table" (100M entries) but the filter can be pushed past the cross product in all
# cases
statement ok
CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 10000, 1) t1(i)
statement ok
CREATE TABLE vals2(k INTEGER, l INTEGER)
statement ok
INSERT INTO vals2 SELECT * FROM vals1
# move conditions between joins
# SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl1 INNER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# 3 3 5 5 3 3 5 5
# SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i>5000) tbl1 INNER JOIN (SELECT * FROM vals1, vals2 WHERE i<5000) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# (empty result)
# SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i>5000) tbl1 INNER JOIN (SELECT * FROM vals1, vals2 WHERE i<5002 AND k=1) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# 5001 5001 1 1 5001 5001 1 1
# left outer join conditions
# SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i>5000) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl1.i<5002 AND tbl1.k=1;
# 5001 5001 1 1 5001 5001 1 1
# only RHS has conditions
# SELECT * FROM (SELECT * FROM vals1, vals2) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl2.i<5000;
# 3 3 5 5 3 3 5 5
# only RHS has conditions
# SELECT COUNT(*) FROM (SELECT * FROM (SELECT * FROM vals1, vals2) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl2 ON tbl1.i=tbl2.i WHERE tbl1.k<10 AND tbl2.k IS NOT NULL) tbl3;
# 10
# only LHS has conditions
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# 1
# side channel EXCEPT/INTERSECT
# SELECT * FROM vals1, vals2 WHERE i>5000 INTERSECT SELECT * FROM vals1, vals2 WHERE i<5002 AND k=1;
# 5001 5001 1 1
# SELECT * FROM vals1, vals2 WHERE i>5000 AND i<5002 AND k=1 EXCEPT SELECT * FROM vals1, vals2;
# (empty result)
# side channel GROUP conditions
# SELECT * FROM (SELECT i, k, MIN(j) FROM vals1, vals2 WHERE i=1 AND k=3 GROUP BY i, k) tbl1 INNER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# 1 3 1 1 1 1 3 3
# conditions in subqueries
# uncorrelated subqueries
# SELECT * FROM vals1 WHERE i IN (SELECT i FROM vals1, vals2) AND i=3;
# 3 3
# SELECT * FROM vals1 WHERE EXISTS(SELECT i FROM vals1, vals2) AND i=3;
# 3 3
# correlated subqueries
# SELECT * FROM vals1 v1 WHERE i IN (SELECT i FROM vals1, vals2 WHERE i=v1.i AND k=v1.i) AND i=3;
# 3 3
# SELECT * FROM vals1 v1 WHERE i IN (SELECT i FROM vals1, vals2 WHERE i=v1.i AND k=v1.i AND k=4) AND i=3;
# (empty result)
# SELECT * FROM vals1 v1 WHERE i IN (SELECT i FROM vals1, vals2 WHERE i=v1.i AND k=v1.i AND k>5000) AND i<5002;
# 5001 5001
# SELECT * FROM vals1 v1 WHERE i=(SELECT i FROM vals1, vals2 WHERE i=v1.i AND k=v1.i) AND i=3;
# 3 3
# SELECT * FROM vals1 v1 WHERE i=(SELECT MIN(i) FROM vals1, vals2 WHERE i=v1.i AND k=v1.i) AND i=3;
# 3 3

View File

@@ -0,0 +1,241 @@
# name: test/sql/optimizer/plan/test_filter_pushdown_large.test
# description: Test filter pushdown with more data
# group: [plan]
# in this test we run queries that will take a long time without filter pushdown, but are almost instant with
# proper filter pushdown we create two tables with 10K elements each in most tests we cross product them together
# in some way to create a "big table" (100M entries) but the filter can be pushed past the cross product in all
# cases
statement ok
CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 10000, 1) t1(i)
statement ok
CREATE TABLE vals2(k INTEGER, l INTEGER)
statement ok
INSERT INTO vals2 SELECT * FROM vals1
# pushdown filters into subqueries
query II
SELECT i, k FROM (SELECT i, k FROM vals1, vals2) tbl1 WHERE i=k AND i<5 ORDER BY i
----
0 0
1 1
2 2
3 3
4 4
# pushdown past DISTINCT
query II
SELECT i, k FROM (SELECT DISTINCT i, k FROM vals1, vals2) tbl1 WHERE i=k AND i<5 ORDER BY i
----
0 0
1 1
2 2
3 3
4 4
# pushdown conditions on group variables
query IIR
SELECT i, k, SUM(j) FROM vals1, vals2 GROUP BY i, k HAVING i=k AND i<5 ORDER BY i
----
0 0 0.000000
1 1 1.000000
2 2 2.000000
3 3 3.000000
4 4 4.000000
# also inside subqueries
query IIR
SELECT i, k, SUM(j) FROM (SELECT * FROM vals1, vals2) tbl1 GROUP BY i, k HAVING i=k AND i<5 ORDER BY i
----
0 0 0.000000
1 1 1.000000
2 2 2.000000
3 3 3.000000
4 4 4.000000
# and also like this
query IIR
SELECT i, k, sum FROM (SELECT i, k, SUM(j) AS sum FROM vals1, vals2 GROUP BY i, k) tbl1 WHERE i=k AND i<5 ORDER BY i;
----
0 0 0.000000
1 1 1.000000
2 2 2.000000
3 3 3.000000
4 4 4.000000
# LEFT OUTER JOIN on constant "true" can be turned into cross product, and after filters can be pushed
query IIII
SELECT * FROM vals1 LEFT OUTER JOIN vals2 ON 1=1 WHERE i=k AND k=5
----
5 5 5 5
# left outer join with equality filter can be turned into INNER JOIN
query IIII
SELECT * FROM vals1 LEFT OUTER JOIN vals2 ON 1=1 WHERE i=k ORDER BY i LIMIT 5
----
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
4 4 4 4
# left outer join can be turned into inner join after which elements can be pushed down into RHS
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2 WHERE j=5 AND l=5) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl2.j=5 AND tbl2.l=5;
----
5 5 5 5 5 5 5 5
# filters can be pushed in the LHS of the LEFT OUTER JOIN
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2 WHERE i=5 AND k=10) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl1.i=5 AND tbl1.k=10
----
5 5 10 10 5 5 10 10
# conditions in the ON clause can be pushed down into the RHS
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5 AND k=5) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl2.i=5 AND tbl2.k=5
----
5 5 5 5 5 5 5 5
# also works if condition filters everything
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5 AND k=5) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl2.i>10000 AND tbl2.k=5
----
5 5 5 5 NULL NULL NULL NULL
# we can replicate conditions on the left join predicates on the RHS
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl1.i=5 AND tbl1.k=10
----
5 5 10 10 5 5 10 10
# also multiple conditions
query IIIIIIII
SELECT * FROM (SELECT * FROM vals1, vals2) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k WHERE tbl1.i>4 AND tbl1.i<6 AND tbl1.k=10
----
5 5 10 10 5 5 10 10
# pushdown union
query IIII
SELECT * FROM (SELECT * FROM vals1, vals2 UNION SELECT * FROM vals1, vals2) tbl1 WHERE i=3 AND k=5
----
3 3 5 5
# pushdown into except
query IIII
SELECT * FROM (SELECT * FROM vals1, vals2 EXCEPT SELECT * FROM vals1, vals2) tbl1 WHERE i=3 AND k=5
----
query IIII
SELECT * FROM (SELECT * FROM vals1, vals2 EXCEPT SELECT * FROM vals1, vals2 WHERE i<>1) tbl1 WHERE i<5 AND k<5 ORDER BY 1, 2, 3, 4;
----
1 1 0 0
1 1 1 1
1 1 2 2
1 1 3 3
1 1 4 4
# pushdown intersect
query IIII
SELECT * FROM (SELECT * FROM vals1, vals2 INTERSECT SELECT * FROM vals1, vals2) tbl1 WHERE i=3 AND k=5
----
3 3 5 5
# constant condition on scalar projection
query T
SELECT * FROM (SELECT 0=1 AS cond FROM vals1, vals2) a1 WHERE cond ORDER BY 1
----
# constant condition that is more hidden
query I
SELECT * FROM (SELECT 1 AS a FROM vals1, vals2) a1 WHERE a=0 ORDER BY 1
----
# condition on scalar grouping
query T
SELECT * FROM (SELECT 0=1 AS cond FROM vals1, vals2 GROUP BY 1) a1 WHERE cond ORDER BY 1
----
query I
SELECT * FROM (SELECT 1 AS a FROM vals1, vals2 GROUP BY a) a1 WHERE a=0 ORDER BY 1
----
# duplicate filters across equivalency sets and pushdown cross product
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl1.k AND tbl1.i=tbl2.k AND tbl1.i=tbl2.i AND tbl1.i=5000;
----
1
# also push other comparisons
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl1.k AND tbl1.i=tbl2.k AND tbl1.i=tbl2.i AND tbl1.i>4999 AND tbl1.i<5001;
----
1
# empty result
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=5000 AND tbl1.i<>5000;
----
0
# also if we have a transitive condition
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=5000 AND tbl1.i=tbl2.i AND tbl2.i<>5000;
----
0
# useless inequality checks should be pruned
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=5000 AND tbl1.i=tbl2.i AND tbl1.i=tbl2.k AND tbl1.i=tbl1.k AND tbl2.i<>5001;
----
1
# add many useless predicates
query I
SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl2.i>10 AND tbl1.k>=500 AND tbl2.k<7000 AND tbl2.k<=6000 AND tbl2.k<>8000 AND tbl1.i<>4000 AND tbl1.i=tbl2.i AND tbl1.i=tbl2.k AND tbl1.i=tbl1.k AND tbl1.i=5000;
----
1
# FIXME filter equivalence with expressions
# SELECT COUNT(*) FROM vals1, vals2 WHERE i+1=5001 AND j=l AND k=i AND l+1=5001
# 0
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2 WHERE i+1=5000 AND k+1=5000) tbl1, (SELECT *
# FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl2.i AND tbl1.k=tbl2.k;
# 0
# greater than/less than should also be transitive
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i>9997 AND tbl1.k>tbl1.i AND tbl2.i>tbl1.i AND tbl2.k>tbl1.i;
# 1
# equality with constant and then GT
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=9998 AND tbl1.k=9998 AND tbl2.i>tbl1.i AND tbl2.k>tbl1.k;
# 1
# equality with constant and then LT
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=1 AND tbl1.k=1 AND tbl2.i<tbl1.i AND tbl2.k<tbl1.k;
# 1
# transitive GT/LT
# SELECT COUNT(*) FROM vals1, vals2 WHERE i>4999 AND j<=l AND k>=i AND l<5001
# 1
# these more advanced cases we don't support yet
# filter equivalence with expressions
# SELECT COUNT(*) FROM vals1 v1, vals1 v2 WHERE v1.i+v2.i=10; IN list
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1,
# (SELECT * FROM vals1, vals2) tbl2 WHERE tbl2.k IN (5000, 5001, 5002) AND tbl2.k<5000;
# 0
# CASE expression
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2
# WHERE tbl2.k<5000 AND CASE WHEN (tbl2.k>5000) THEN (tbl2.k=5001) ELSE (tbl2.k=5000) END;
# 0
# OR expression
# SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2) tbl2
# WHERE tbl2.k<5000 AND (tbl2.k=5000 OR tbl2.k>5000);
# 0

View File

@@ -0,0 +1,34 @@
# name: test/sql/optimizer/plan/test_filter_pushdown_materialized_cte.test
# description: Test filter pushdown in materialized CTEs (internal issue #3041)
# group: [plan]
require tpcds
statement ok
call dsdgen(sf=0.01)
statement ok
pragma explain_output='OPTIMIZED_ONLY'
query II
EXPLAIN WITH ss AS MATERIALIZED
( SELECT i_manufact_id,
sum(ss_ext_sales_price) total_sales
FROM store_sales,
date_dim,
customer_address,
item
WHERE i_manufact_id IN
(SELECT i_manufact_id
FROM item
WHERE i_category IN ('Electronics'))
AND ss_item_sk = i_item_sk
AND ss_sold_date_sk = d_date_sk
AND d_year = 1998
AND d_moy = 5
AND ss_addr_sk = ca_address_sk
AND ca_gmt_offset = -5
GROUP BY i_manufact_id)
FROM ss
----
logical_opt <!REGEX>:.*CROSS_PRODUCT.*

View File

@@ -0,0 +1,33 @@
# name: test/sql/optimizer/plan/test_table_filter_pushdown.test
# description: Test Table Filter Push Down Scan
# group: [plan]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers AS SELECT i AS i, i AS j FROM range(0, 100) tbl(i)
query I
SELECT j FROM integers where j = 99
----
99
query I
SELECT j FROM integers where j = 99 AND i=99
----
99
query I
SELECT j FROM integers where j = 99 AND i=90
----
query I
SELECT count(i) FROM integers where j > 90 and i < 95
----
4
query I
SELECT count(i) FROM integers where j > 90 and j < 95
----
4

View File

@@ -0,0 +1,59 @@
# name: test/sql/optimizer/plan/test_unused_column_after_join.test
# description: Test joins with various columns that are only used in the join
# group: [plan]
statement ok
PRAGMA enable_verification
# test columns that are only used in the join (i.e. can be projected out after the join)
# create tables
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES (11, 1), (12, 2), (13, 3)
statement ok
CREATE TABLE test2 (b INTEGER, c INTEGER);
statement ok
INSERT INTO test2 VALUES (1, 10), (1, 20), (2, 30)
# count of single join
query I
SELECT COUNT(*) FROM test, test2 WHERE test.b = test2.b
----
3
# now a sum
query RII
SELECT SUM(test.a), MIN(test.a), MAX(test.a) FROM test, test2 WHERE test.b = test2.b
----
34.000000 11 12
# count of multi-way join
query I
SELECT COUNT(*) FROM test a1, test a2, test a3 WHERE a1.b=a2.b AND a2.b=a3.b
----
3
# now a sum
query R
SELECT SUM(a1.a) FROM test a1, test a2, test a3 WHERE a1.b=a2.b AND a2.b=a3.b
----
36.000000
# count of multi-way join with filters
query I
SELECT COUNT(*) FROM test a1, test a2, test a3 WHERE a1.b=a2.b AND a2.b=a3.b AND a1.a=11 AND a2.a=11 AND a3.a=11
----
1
# unused columns that become unused because of optimizer
query T
SELECT (TRUE OR a1.a=a2.b) FROM test a1, test a2 WHERE a1.a=11 AND a2.a>=10
----
1
1
1

View File

@@ -0,0 +1,121 @@
# name: test/sql/optimizer/test_duplicate_groups_optimizer.test
# description: Test Duplicate Groups optimizer
# group: [optimizer]
statement ok
create table t1(col1 int, col2 int);
statement ok
create table t2(col3 int);
statement ok
insert into t1 values (1, 1);
statement ok
insert into t2 values (1);
statement ok
pragma enable_verification;
query III
select
col1,
col2,
col3
from t1
join t2
on t1.col1 = t2.col3
group by rollup(col1, col2, col3) order by 1, 2 ,3;
----
1 1 1
1 1 NULL
1 NULL NULL
NULL NULL NULL
query III
select
col1,
col2,
col3
from t1
join t2
on t1.col1 = t2.col3
group by cube(col1, col2, col3) order by 1, 2 ,3;
----
1 1 1
1 1 NULL
1 NULL 1
1 NULL NULL
NULL 1 1
NULL 1 NULL
NULL NULL 1
NULL NULL NULL
query III
select
col1,
col2,
col3
from t1
join t2
on t1.col1 = t2.col3
group by grouping sets (col1, col2, col3), (col1, col2), (col1) order by 1, 2 ,3;
----
1 1 1
1 1 NULL
1 1 NULL
statement ok
pragma explain_output='optimized_only';
statement ok
pragma disable_verification;
# make sure there is only one group and unused columns/duplicate groups still
# works.
# if unused columns/duplicate groups combo breaks, each group will be on a separate line
query II
explain select
col1,
col3
from t1
join t2
on t1.col1 = t2.col3
group by col1, col3;
----
logical_opt <REGEX>:.*Groups: col1.*
statement ok
create table t3 (a int, b int, c int);
statement ok
insert into t3 values
(1, 1, 1),
(1, 2, 2),
(1, 1, 1),
(1, 2, 1);
query III
select * from t3 group by cube(a, b, c) order by all;
----
1 1 1
1 1 NULL
1 2 1
1 2 2
1 2 NULL
1 NULL 1
1 NULL 2
1 NULL NULL
NULL 1 1
NULL 1 NULL
NULL 2 1
NULL 2 2
NULL 2 NULL
NULL NULL 1
NULL NULL 2
NULL NULL NULL

View File

@@ -0,0 +1,31 @@
# name: test/sql/optimizer/test_in_rewrite_rule.test
# description: Test In Rewrite Rule
# group: [optimizer]
statement ok
create table t (i integer);
statement ok
insert into t values (1)
statement ok
insert into t values (2)
query T
select * from t where i in ('1','2','y');
----
1
2
query T
SELECT x::VARCHAR IN ('1', y::VARCHAR) FROM (VALUES (1, 2), (2, 3)) tbl(x, y);
----
1
0
query T
SELECT x::BIGINT IN (1::BIGINT, y) FROM (VALUES (1::INTEGER, 2::BIGINT), (2::INTEGER, 3::BIGINT)) tbl(x, y);
----
1
0

View File

@@ -0,0 +1,106 @@
# name: test/sql/optimizer/test_no_pushdown_cast_into_cte.test
# description: No Pushdown cast into cte
# group: [optimizer]
statement ok
pragma explain_output='optimized_only';
query II
WITH t(a, b) AS (
SELECT a :: int, b :: int
FROM (VALUES
('1', '4'),
('5', '3'),
('2', '*'),
('3', '8'),
('7', '*')) AS _(a, b)
WHERE position('*' in b) = 0
)
SELECT a, b
FROM t
WHERE a < b;
----
1 4
3 8
# check filter is above projection that casts the varchar to int
query II
EXPLAIN WITH t(a, b) AS (
SELECT a :: int, b :: int
FROM (VALUES
('1', '4'),
('5', '3'),
('2', '*'),
('3', '8'),
('7', '*')) AS _(a, b)
WHERE position('*' in b) = 0
)
SELECT a, b
FROM t
WHERE a < b;
----
logical_opt <REGEX>:.*FILTER.*CAST\(a AS INTEGER.*<.*b AS INTEGER\).*PROJECTION.*FILTER.*position.*
# INT can always be cast to varchar, so the filter a[1] = '1'
# can be pushed down
query II
with t(a, b) as (
select a :: varchar, b :: varchar
FROM VALUES
(1, 2),
(3, 3),
(5, 6),
(7, 6) as
_(a, b) where a <= b
) select a, b from t where a[1] = '1';
----
1 2
# we should not see two filters, since the filter can be pushed to just above the column data scan
query II
explain with t(a, b) as (
select a :: varchar, b :: varchar
FROM VALUES
(1, 2),
(3, 3),
(5, 6),
(7, 6) as
_(a, b) where a <= b
) select a, b from t where a[1] = '1';
----
logical_opt <!REGEX>:.*FILTER.*PROJECTION.*FILTER.*
statement ok
create or replace table mytablename2 as
from (values
('a0'),
('a1'),
('a2'),
('xxx-0'),
('xxx-1'),
('xxx-2'),
('xxx-3'),
('xxxx'),
('xxx0'),
('xxx1'),
('xxx2'),
('xxx3')
) t(mycolname),
range(4300) b(someothercolname)
query I
select
mycolname[2:]::int as mycolname2
from mytablename2
where mycolname[1:3] != 'xxx' AND mycolname2 = 0 limit 5;
----
0
0
0
0
0

View File

@@ -0,0 +1,66 @@
# name: test/sql/optimizer/test_rowid_pushdown.test
# group: [optimizer]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS SELECT i + 100 as x FROM range(250000) AS t(i);
query I
SELECT * FROM t1 where rowid = 6;
----
106
query II
EXPLAIN SELECT * FROM t1 where rowid = 6;
----
physical_plan <REGEX>:.*Filters: rowid=6.*
query I
SELECT * FROM t1 where rowid = 200000;
----
200100
query II
EXPLAIN SELECT * FROM t1 where rowid = 200000;
----
physical_plan <REGEX>:.*Filters: rowid=200000.*
query I
SELECT * FROM t1 where rowid IN (SELECT rowid FROM t1 ORDER BY rowid DESC LIMIT 10) ORDER BY rowid;
----
250090
250091
250092
250093
250094
250095
250096
250097
250098
250099
# IN filter
query I
SELECT * FROM t1 where rowid IN (6, 9) ORDER BY ALL;
----
106
109
query II
EXPLAIN SELECT * FROM t1 where rowid IN (6, 9);
----
physical_plan <REGEX>:.*Filters:.*rowid.*IN.*(6.*9).*
# OR clause
query I
SELECT * FROM t1 where rowid = 6 OR rowid = 9 ORDER BY ALL;
----
106
109
query II
EXPLAIN SELECT * FROM t1 where rowid = 6 OR rowid = 9 ORDER BY ALL;
----
physical_plan <REGEX>:.*Filters:.*rowid.*6.*rowid.*9.*

View File

@@ -0,0 +1,30 @@
# name: test/sql/optimizer/test_rowid_pushdown_deletes.test
# group: [optimizer]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE tbl_grow_shrink (id_var VARCHAR, id_int INTEGER, id_point BIGINT);
# growing
loop i 0 29
statement ok
INSERT INTO tbl_grow_shrink VALUES ('hello there stranger! :sunshine:', 2, ${i});
endloop
# shrinking
loop i 0 29
query II
SELECT id_int, id_point = ${i} FROM tbl_grow_shrink WHERE rowid = (SELECT min(rowid) FROM tbl_grow_shrink);
----
2 true
statement ok
DELETE FROM tbl_grow_shrink WHERE rowid = (SELECT min(rowid) FROM tbl_grow_shrink);
endloop

View File

@@ -0,0 +1,32 @@
# name: test/sql/optimizer/test_rowid_pushdown_plan.test
# group: [optimizer]
require tpch
statement ok
CALL dbgen(sf=0.01);
query IIIIIIIIIIIIIIII rowsort top5_result
SELECT * FROM lineitem ORDER BY l_orderkey DESC LIMIT 5;
----
# Rewritten using rowid pushdown
query IIIIIIIIIIIIIIII rowsort top5_result
SELECT * FROM lineitem WHERE rowid IN (SELECT rowid FROM lineitem ORDER BY l_orderkey DESC LIMIT 5);
----
query IIIIIIIIIIIIIIII rowsort filter_result
SELECT * FROM lineitem WHERE l_orderkey % 20000 == 0;
----
# Rewritten using rowid pushdown
query IIIIIIIIIIIIIIII rowsort filter_result
SELECT * FROM lineitem WHERE rowid IN (SELECT rowid FROM lineitem WHERE l_orderkey % 20000 == 0);
----
# The above use dynamic join filters so the pushdown doesnt show up in the plan, but static filters do
query II
EXPLAIN SELECT * FROM lineitem WHERE rowid = 20058;
----
physical_plan <REGEX>:.* SEQ_SCAN.*Filters:.*rowid=20058.*

View File

@@ -0,0 +1,18 @@
# name: test/sql/optimizer/tests_no_pushdown_under_samples.test
# group: [optimizer]
statement ok
CREATE OR REPLACE TABLE df AS (SELECT * AS i FROM range(10));
statement ok
CREATE OR REPLACE TABLE wtf AS (SELECT 1 AS i);
query II
explain FROM
df, wtf
SELECT
df.i
WHERE df.i > 8
USING SAMPLE 1;
----
physical_plan <REGEX>:.*FILTER.*RESERVOIR_SAMPLE.*