should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,5 @@
add_library_unity(test_optimizer OBJECT union_alls.cpp)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:test_optimizer>
PARENT_SCOPE)

View File

@@ -0,0 +1,48 @@
# name: test/optimizer/arithmetic_simplification.test
# description: Arithmetic simplification test
# group: [optimizer]
statement ok
CREATE TABLE test(X INTEGER);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# verify that nop arithmetic is flattened
query I nosort xnorm
EXPLAIN SELECT X FROM test
----
query I nosort xnorm
EXPLAIN SELECT X+0 FROM test
----
query I nosort xnorm
EXPLAIN SELECT 0+X FROM test
----
query I nosort xnorm
EXPLAIN SELECT X-0 FROM test
----
query I nosort xnorm
EXPLAIN SELECT X*1 FROM test
----
query I nosort xnorm
EXPLAIN SELECT 1*X FROM test
----
query I nosort xnorm
EXPLAIN SELECT X//1 FROM test
----
# division by zero results in a NULL
query I nosort xnull
EXPLAIN SELECT NULL FROM test
----
query I nosort xnull
EXPLAIN SELECT X//0 FROM test
----

View File

@@ -0,0 +1,34 @@
# name: test/optimizer/case_simplification.test
# description: Test case simplification
# group: [optimizer]
statement ok
CREATE TABLE test(X INTEGER);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query I nosort casenorm1
EXPLAIN SELECT CASE WHEN 1=1 THEN X+1 ELSE X+2 END FROM test
----
query I nosort casenorm1
EXPLAIN SELECT X+1 FROM test
----
query I nosort casenorm2
EXPLAIN SELECT CASE WHEN 1=0 THEN X+1 ELSE X+2 END FROM test
----
query I nosort casenorm2
EXPLAIN SELECT X+2 FROM test
----
query I nosort casenorm3
EXPLAIN SELECT CASE WHEN NULL>3 THEN X+1 ELSE X+2 END FROM test
----
query I nosort casenorm3
EXPLAIN SELECT X+2 FROM test
----

View File

@@ -0,0 +1,28 @@
# name: test/optimizer/column_binding_error.test
# description: column binding error test inspired by #16426,
# group: [optimizer]
require tpch
statement ok
CREATE TABLE stats(num_docs) AS SELECT 1;
statement ok
CREATE TABLE postings(docid, termid, tf) AS SELECT range, range, 1 FROM range(30);
statement ok
CREATE TABLE docs(docid) AS FROM range(2);
statement ok
WITH termids(termid) AS (SELECT 1)
SELECT
(SELECT num_docs FROM stats),
(SELECT num_docs FROM stats),
(SELECT num_docs FROM stats),
(SELECT num_docs FROM stats),
(SELECT num_docs FROM stats),
(SELECT num_docs FROM stats)
FROM postings
JOIN docs USING (docid)
JOIN termids USING (termid)
WHERE termid IN (SELECT termid FROM termids);

View File

@@ -0,0 +1,19 @@
# name: test/optimizer/column_lifetime_analyzer/must_visit_operator_expressions_first.test
# description: Test column lifetime analyzer
# group: [column_lifetime_analyzer]
statement ok
create table t5 as select (range + 1000) a5, range b5, (range + 50)::INT::VARCHAR || '__suffix__' c5 from range(50);
statement ok
create table t1 as select range::INT a1, (range + 45)::INT b1, (range)::INT::VARCHAR || '__suffix__' c1 from range(900);
query II
select a5, c1 from t1, t5 where b5=b1;
----
1045 0__suffix__
1046 1__suffix__
1047 2__suffix__
1048 3__suffix__
1049 4__suffix__

View File

@@ -0,0 +1,24 @@
# name: test/optimizer/column_lifetime_analyzer/no_unnecessary_projections.test
# description: Test column lifetime
# group: [column_lifetime_analyzer]
statement ok
CREATE TABLE lhs AS SELECT range % 5 i, range j FROM range(100);
statement ok
CREATE TABLE rhs AS SELECT range % 5 i, range j FROM range(10);
query II
explain analyze SELECT rhs.j FROM rhs SEMI JOIN lhs USING (i);
----
analyzed_plan <REGEX>:.*RIGHT_SEMI.*
query II
explain analyze SELECT rhs.j FROM rhs SEMI JOIN lhs USING (i);
----
analyzed_plan <!REGEX>:.*HASH JOIN.*PROJECTION.*
query II
explain analyze SELECT rhs.j FROM rhs ANTI JOIN lhs USING (i);
----
analyzed_plan <!REGEX>:.*HASH JOIN.*PROJECTION.*

View File

@@ -0,0 +1,9 @@
# name: test/optimizer/column_lifetime_analyzer/summary_column_lifetime.test
# description: Test column lifetime analyzer with SUMMARY (internal issue #4138)
# group: [column_lifetime_analyzer]
statement ok
create table data as select * from range(0,4000) tbl(col);
statement ok
SELECT * FROM summary((SELECT col FROM data ORDER BY col));

View File

@@ -0,0 +1,152 @@
# name: test/optimizer/common_subplan.test
# description: Test the Common Subplan optimizer
# group: [optimizer]
statement ok
pragma explain_output='optimized_only'
# this should be automatically detected and materialized
query I
select t1.s + t2.s
from (select sum(range) s from range(10)) t1,
(select sum(range) s from range(10)) t2
----
90
query II
explain select t1.s + t2.s
from (select sum(range) s from range(10)) t1,
(select sum(range) s from range(10)) t2
----
logical_opt <REGEX>:.*CTE.*
# this shouldn't because random() is volatile
query II
explain select t1.s + t2.s
from (select sum(random()) s from range(10)) t1,
(select sum(random()) s from range(10)) t2
----
logical_opt <!REGEX>:.*CTE.*
# common subplan inside of a materialized cte and outside of it
# should yield two ctes
query I
with cte as materialized (
select sum(range) s from range(10)
)
select t1.s + t2.s
from cte t1,
(select sum(range) s from range(10)) t2
----
90
query II
explain with cte as materialized (
select sum(range) s from range(10)
)
select t1.s + t2.s
from cte t1,
(select sum(range) s from range(10)) t2
----
logical_opt <REGEX>:.*CTE.*CTE.*
require tpcds
statement ok
call dsdgen(sf=0)
# q44
query II
explain
SELECT asceding.rnk,
i1.i_product_name best_performing,
i2.i_product_name worst_performing
FROM
(SELECT *
FROM
(SELECT item_sk,
rank() OVER (
ORDER BY rank_col ASC) rnk
FROM
(SELECT ss_item_sk item_sk,
avg(ss_net_profit) rank_col
FROM store_sales ss1
WHERE ss_store_sk = 4
GROUP BY ss_item_sk
HAVING avg(ss_net_profit) > 0.9*
(SELECT avg(ss_net_profit) rank_col
FROM store_sales
WHERE ss_store_sk = 4
AND ss_addr_sk IS NULL
GROUP BY ss_store_sk))V1)V11
WHERE rnk < 11) asceding,
(SELECT *
FROM
(SELECT item_sk,
rank() OVER (
ORDER BY rank_col DESC) rnk
FROM
(SELECT ss_item_sk item_sk,
avg(ss_net_profit) rank_col
FROM store_sales ss1
WHERE ss_store_sk = 4
GROUP BY ss_item_sk
HAVING avg(ss_net_profit) > 0.9*
(SELECT avg(ss_net_profit) rank_col
FROM store_sales
WHERE ss_store_sk = 4
AND ss_addr_sk IS NULL
GROUP BY ss_store_sk))V2)V21
WHERE rnk < 11) descending,
item i1,
item i2
WHERE asceding.rnk = descending.rnk
AND i1.i_item_sk=asceding.item_sk
AND i2.i_item_sk=descending.item_sk
ORDER BY asceding.rnk
LIMIT 100;
----
logical_opt <REGEX>:.*CTE.*
# q65
query II
explain
SELECT s_store_name,
i_item_desc,
sc.revenue,
i_current_price,
i_wholesale_cost,
i_brand
FROM store,
item,
(SELECT ss_store_sk,
avg(revenue) AS ave
FROM
(SELECT ss_store_sk,
ss_item_sk,
sum(ss_sales_price) AS revenue
FROM store_sales,
date_dim
WHERE ss_sold_date_sk = d_date_sk
AND d_month_seq BETWEEN 1176 AND 1176+11
GROUP BY ss_store_sk,
ss_item_sk) sa
GROUP BY ss_store_sk) sb,
(SELECT ss_store_sk,
ss_item_sk,
sum(ss_sales_price) AS revenue
FROM store_sales,
date_dim
WHERE ss_sold_date_sk = d_date_sk
AND d_month_seq BETWEEN 1176 AND 1176+11
GROUP BY ss_store_sk,
ss_item_sk) sc
WHERE sb.ss_store_sk = sc.ss_store_sk
AND sc.revenue <= 0.1 * sb.ave
AND s_store_sk = sc.ss_store_sk
AND i_item_sk = sc.ss_item_sk
ORDER BY s_store_name NULLS FIRST,
i_item_desc NULLS FIRST
LIMIT 100;
----
logical_opt <REGEX>:.*CTE.*

View File

@@ -0,0 +1,155 @@
# name: test/optimizer/compare_blob.test
# description: Test case simplification
# group: [optimizer]
statement ok
CREATE TABLE t1(c0 BLOB);
statement ok
CREATE TABLE t0(c0 BIT);
statement ok
INSERT INTO t0(c0) VALUES (0);
statement ok
INSERT INTO t0(c0) VALUES (1);
statement ok
INSERT INTO t0(c0) VALUES (2);
statement ok
INSERT INTO t0(c0) VALUES (3);
statement ok
INSERT INTO t0(c0) VALUES (4);
statement ok
INSERT INTO t0(c0) VALUES (5);
statement ok
INSERT INTO t1(c0) VALUES (X'41');
statement ok
INSERT INTO t1(c0) VALUES ( X'123456');
statement ok
INSERT INTO t1(c0) VALUES ('2119350449');
statement ok
INSERT INTO t1(c0) VALUES ( X'48656C6C6F');
statement ok
INSERT INTO t1(c0) VALUES (E'\\xabcd');
statement ok
INSERT INTO t1(c0) VALUES (CAST('' AS BLOB));
query III
SELECT t0.c0, t1.c0, (t1.c0)>=(CAST(t0.c0 AS BLOB)) FROM t0, t1 WHERE ((t1.c0)>=(CAST(t0.c0 AS BLOB))) ORDER BY ALL;
----
00000000000000000000000000000000 2119350449 1
00000000000000000000000000000000 x123456 1
00000000000000000000000000000000 x41 1
00000000000000000000000000000000 x48656C6C6F 1
00000000000000000000000000000000 \xABcd 1
00000000000000000000000000000001 2119350449 1
00000000000000000000000000000001 x123456 1
00000000000000000000000000000001 x41 1
00000000000000000000000000000001 x48656C6C6F 1
00000000000000000000000000000001 \xABcd 1
00000000000000000000000000000010 2119350449 1
00000000000000000000000000000010 x123456 1
00000000000000000000000000000010 x41 1
00000000000000000000000000000010 x48656C6C6F 1
00000000000000000000000000000010 \xABcd 1
00000000000000000000000000000011 2119350449 1
00000000000000000000000000000011 x123456 1
00000000000000000000000000000011 x41 1
00000000000000000000000000000011 x48656C6C6F 1
00000000000000000000000000000011 \xABcd 1
00000000000000000000000000000100 2119350449 1
00000000000000000000000000000100 x123456 1
00000000000000000000000000000100 x41 1
00000000000000000000000000000100 x48656C6C6F 1
00000000000000000000000000000100 \xABcd 1
00000000000000000000000000000101 2119350449 1
00000000000000000000000000000101 x123456 1
00000000000000000000000000000101 x41 1
00000000000000000000000000000101 x48656C6C6F 1
00000000000000000000000000000101 \xABcd 1
## original issue
statement ok
CREATE OR REPLACE TABLE t1(c0 BLOB);
statement ok
CREATE OR REPLACE TABLE t0(c0 BIT);
statement ok
CREATE OR REPLACE VIEW v0(c0) AS SELECT 1 FROM t1, t0 GROUP BY t0.c0;
statement ok
INSERT INTO t0(c0) VALUES ( NULL);
statement ok
INSERT INTO t0(c0) VALUES (0);
statement ok
INSERT INTO t0(c0) VALUES ( 1);
statement ok
INSERT INTO t1(c0) VALUES (X'41');
statement ok
INSERT INTO t1(c0) VALUES ( X'123456');
statement ok
INSERT INTO t1(c0) VALUES ('2119350449');
statement ok
INSERT INTO t1(c0) VALUES ( X'48656C6C6F');
statement ok
INSERT INTO t1(c0) VALUES (E'\\xabcd');
statement ok
INSERT INTO t1(c0) VALUES (CAST('' AS BLOB));
query III
SELECT t0.c0, t1.c0, (t1.c0)>=(CAST(t0.c0 AS BLOB)) FROM t0, v0, t1 WHERE ((t1.c0)>=(CAST(t0.c0 AS BLOB))) ORDER BY ALL
----
00000000000000000000000000000000 2119350449 true
00000000000000000000000000000000 2119350449 true
00000000000000000000000000000000 2119350449 true
00000000000000000000000000000000 x123456 true
00000000000000000000000000000000 x123456 true
00000000000000000000000000000000 x123456 true
00000000000000000000000000000000 x41 true
00000000000000000000000000000000 x41 true
00000000000000000000000000000000 x41 true
00000000000000000000000000000000 x48656C6C6F true
00000000000000000000000000000000 x48656C6C6F true
00000000000000000000000000000000 x48656C6C6F true
00000000000000000000000000000000 \xABcd true
00000000000000000000000000000000 \xABcd true
00000000000000000000000000000000 \xABcd true
00000000000000000000000000000001 2119350449 true
00000000000000000000000000000001 2119350449 true
00000000000000000000000000000001 2119350449 true
00000000000000000000000000000001 x123456 true
00000000000000000000000000000001 x123456 true
00000000000000000000000000000001 x123456 true
00000000000000000000000000000001 x41 true
00000000000000000000000000000001 x41 true
00000000000000000000000000000001 x41 true
00000000000000000000000000000001 x48656C6C6F true
00000000000000000000000000000001 x48656C6C6F true
00000000000000000000000000000001 x48656C6C6F true
00000000000000000000000000000001 \xABcd true
00000000000000000000000000000001 \xABcd true
00000000000000000000000000000001 \xABcd true

View File

@@ -0,0 +1,52 @@
# name: test/optimizer/comparison_simplification.test
# description: Comparison simplification test
# group: [optimizer]
statement ok
CREATE TABLE test(X INTEGER);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# comparisons with NULL result in NULL
query I nosort compnorm1
EXPLAIN SELECT NULL FROM test
----
query I nosort compnorm1
EXPLAIN SELECT X=NULL FROM test
----
query I nosort compnorm1
EXPLAIN SELECT X>NULL FROM test
----
query I nosort compnorm1
EXPLAIN SELECT NULL>X FROM test
----
# in the WHERE clause this gets pruned entirely
query I nosort compnorm2
EXPLAIN SELECT * FROM test WHERE 1=0
----
query I nosort compnorm2
EXPLAIN SELECT * FROM test WHERE X=NULL
----
# we shift casts from columns to columns when possible
query I nosort constantshift
EXPLAIN SELECT X=1::BIGINT FROM test
----
query I nosort constantshift
EXPLAIN SELECT X::BIGINT=1::INTEGER FROM test
----
query I nosort constantshift
EXPLAIN SELECT X='1' FROM test
----
query I nosort constantshift
EXPLAIN SELECT X::BIGINT='1' FROM test
----

View File

@@ -0,0 +1,238 @@
# name: test/optimizer/compressed_materialization.test_slow
# description: Compressed materialization test
# group: [optimizer]
statement ok
pragma enable_verification
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY
# these functions live in the catalog, but cannot be called directly
statement error
select __internal_compress_string_utinyint('L')
----
Binder Error: Compressed materialization functions are for internal use only!
# internal issue 1576
statement ok
create table t0 as select range%400000 a, range%400000 b from range(500000);
query III rowsort
select * from (
select *, row_number() OVER () as row_number from (
SELECT * FROM t0 ORDER BY 1) ta
) tb where b > 2
order by a limit 2;
----
3 3 7
3 3 8
# tricky tests taken from test/sql/subquery/scalar/test_issue_6136.test
# we run these with one thread since they are order dependent
statement ok
create table r as select * from values (1, 1, 'a', 'A'), (1, null, 'b', 'B'), (1, 2, 'c', 'C'), (2, null, 'd', 'D') t(ra, rb, x, y);
statement ok
create table b as select * from values (1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, null), (5, 2, 1), (6, 2, null), (7, 99, 99) t(id, ba, bb);
statement ok
set threads=1
query T
select (
select {'x': first(x order by x), 'y': first(y order by y), '__matches': count(*)}
from (
select *
from r
where ba = ra
and (bb = rb or rb is null)
order by all
)
group by ra, rb
order by all
limit 1)
from b
order by all
----
{'x': a, 'y': A, '__matches': 1}
{'x': b, 'y': B, '__matches': 1}
{'x': b, 'y': B, '__matches': 1}
{'x': b, 'y': B, '__matches': 1}
{'x': d, 'y': D, '__matches': 1}
{'x': d, 'y': D, '__matches': 1}
NULL
query T
select
coalesce((select {'x': first(x), 'y': first(y), '__matches': count(*)} from r where ba = ra and (bb = rb or rb is null) group by ra, rb order by bb = rb limit 1), {'x': null, 'y': null, '__matches': 0}) as ref2
from b
----
{'x': a, 'y': A, '__matches': 1}
{'x': c, 'y': C, '__matches': 1}
{'x': b, 'y': B, '__matches': 1}
{'x': b, 'y': B, '__matches': 1}
{'x': d, 'y': D, '__matches': 1}
{'x': d, 'y': D, '__matches': 1}
{'x': NULL, 'y': NULL, '__matches': 0}
statement ok
set threads=4
# we should see compress twice (in the ORDER BY expression and payload) and decompress once (just the payload)
statement ok
create table t1 as select range i from range(10)
query II
explain select i from t1 order by 10-i
----
logical_opt <REGEX>:(.*__internal_decompress.*){1}(.*__internal_compress.*){2}
statement ok
create table test as
select (range + 7) % 4 i,
(range + 7) % 11 j
from range(10)
# should see compress exactly twice (for columns i and j)
# if we see less than twice we're not compressing,
# and if we see it more than twice we're likely compressing and decompressing twice (once for each ORDER BY)
# but we can compress once, then do both ORDER BYs, then decompress
query II
explain select count(i), count(j) from (select i, j from (select i, j from test order by j offset 1) order by j offset 1)
----
logical_opt <REGEX>:(.*__internal_compress.*){2}
# should see it exactly once here, as we can only compress the group (i), not the value being summed (j)
# after the GROUP BY we do the ORDER BY, and finally decompress
query II
explain select i, sum(j) from test group by i order by i
----
logical_opt <REGEX>:(.*__internal_compress.*){1}
# We can't deal with duplicate projections (yet) so this should see 3 compresses instead of 1
query II
explain select count(j1), count(j2) from (select j j1, j j2 from (select j from test order by j offset 1) order by j1, j2 offset 1)
----
logical_opt <REGEX>:(.*__internal_compress.*){3}
query II
explain select distinct i, j from test order by i, j
----
logical_opt <REGEX>:(.*__internal_compress.*){2}
# taken from third_party/sqllogictest/test/index/orderby_nosort/10/slt_good_27.test
# the problem was that statistics propagation created an index join after "filter_prune" happened
statement ok
CREATE TABLE tab3(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)
statement ok
INSERT INTO tab3 VALUES
(0,461,479.93,'idmdh',456,464.90,'nczyk'),
(1,473,482.60,'bguxh',460,466.25,'oseln'),
(2,474,484.45,'bnzmd',461,467.13,'kvwna'),
(3,475,485.1,'obtlj',462,468.73,'jkjbo'),
(4,477,486.62,'gjtbr',463,469.9,'bhers'),
(5,479,489.59,'bkxfm',464,470.29,'aklru'),
(6,481,495.30,'owirt',466,471.55,'lysig'),
(7,482,496.31,'yergm',467,473.31,'rkpxn'),
(8,484,497.51,'fszui',468,474.44,'ztexm'),
(9,486,498.24,'eueji',469,477.28,'amvcc')
statement ok
CREATE UNIQUE INDEX idx_tab3_4 ON tab3 (col3)
query I
SELECT pk FROM tab3 WHERE col0 IN (SELECT col3 FROM tab3 WHERE (col1 > 93.79)) ORDER BY 1 DESC
----
0
# test that we compress all-NULL (from multiple Parquet files) to utinyint too (if union_by_name is true)
require parquet
#
statement ok
pragma disable_verification
# one column without NULL, and two columns (varchar and bigint) that are all NULL
statement ok
copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100)) to '__TEST_DIR__/cm1.parquet'
statement ok
copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100,200)) to '__TEST_DIR__/cm2.parquet'
# has NULL, and does not have non-NULL
query II
select
stats(j) LIKE '%[Has Null: true, Has No Null: false]%',
stats(k) LIKE '%[Has Null: true, Has No Null: false]%'
from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) limit 1
----
true true
# this should lead to a plan where both all-NULL columns (varchar j and bigint k) are compressed
statement ok
PRAGMA explain_output = PHYSICAL_ONLY
query II
explain select * from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) order by i
----
physical_plan <REGEX>:.*__internal_decompress.*__internal_decompress.*__internal_compress.*__internal_compress.*
# and of course some tpch stuff
require tpch
statement ok
call dbgen(sf=0.01)
statement ok
PRAGMA explain_output = PHYSICAL_ONLY
# tpch q1 should use perfect hash aggregate
query II
EXPLAIN
SELECT
l_returnflag,
l_linestatus,
sum(l_quantity) AS sum_qty,
sum(l_extendedprice) AS sum_base_price,
sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
avg(l_quantity) AS avg_qty,
avg(l_extendedprice) AS avg_price,
avg(l_discount) AS avg_disc,
count(*) AS count_order
FROM
lineitem
WHERE
l_shipdate <= CAST('1998-09-02' AS date)
GROUP BY
l_returnflag,
l_linestatus
ORDER BY
l_returnflag,
l_linestatus;
----
physical_plan <REGEX>:.*PERFECT_HASH_GROUP_BY.*
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY
# test that we're compressing lineitem
query II
explain select * from lineitem order by l_shipdate
----
logical_opt <REGEX>:.*__internal_decompress.*__internal_compress.*
# test that we get the same result with and without compressed materialization
query IIIIIIIIIIIIIII nosort q0
select * from lineitem order by l_shipdate
----
statement ok
set disabled_optimizers to 'compressed_materialization'
query IIIIIIIIIIIIIII nosort q0
select * from lineitem order by l_shipdate
----

View File

@@ -0,0 +1,27 @@
# name: test/optimizer/conjunction_simplification.test
# description: Conjunction simplification test
# group: [optimizer]
statement ok
CREATE TABLE test(X BOOLEAN);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# X AND TRUE => X
query I nosort conjnorm1
EXPLAIN SELECT X AND TRUE FROM test
----
query I nosort conjnorm1
EXPLAIN SELECT X FROM test
----
# X OR FALSE => X
query I nosort conjnorm1
EXPLAIN SELECT X OR FALSE FROM test
----
query I nosort conjnorm1
EXPLAIN SELECT X FROM test
----

View File

@@ -0,0 +1,60 @@
# name: test/optimizer/constant_folding.test
# description: Constant folding test
# group: [optimizer]
statement ok
CREATE TABLE test(X BOOLEAN);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query I nosort cfold1
EXPLAIN SELECT 1+1
----
query I nosort cfold1
EXPLAIN SELECT 2
----
query I nosort cfold2
EXPLAIN SELECT (1+1+1)*2
----
query I nosort cfold2
EXPLAIN SELECT 6
----
query I nosort cfold3
EXPLAIN SELECT CASE WHEN 1 IN (1, 2, 3, 4) THEN 3 ELSE 5 END
----
query I nosort cfold3
EXPLAIN SELECT 3
----
query I nosort cfold4
EXPLAIN SELECT CASE WHEN 1 IN (1+1, 2, 3, 4) THEN 3 ELSE 5 END
----
query I nosort cfold4
EXPLAIN SELECT 5
----
query I nosort cfold5
EXPLAIN SELECT 1 IN (1+1, 2, 3, 4, NULL)
----
query I nosort cfold5
EXPLAIN SELECT NULL
----
query I nosort cfold6
EXPLAIN SELECT CASE WHEN 1 IN (1+1, 2, 3, 4, NULL, 1) THEN (3+4) ELSE 2+2+2 END
----
query I nosort cfold6
EXPLAIN SELECT 7
----

View File

@@ -0,0 +1,82 @@
# name: test/optimizer/csv_pushdown.test
# description: Test CSV pushdown
# group: [optimizer]
# read a single column from a file
query I
SELECT l_returnflag FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'VARCHAR','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
----
N
N
N
N
N
N
N
R
R
A
# verify the projection pushdown is correctly displayed
query II
explain SELECT l_returnflag FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'VARCHAR','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
----
physical_plan <REGEX>:.*READ_CSV.*l_returnflag.*
# read a column as the incorrect type (l_shipinstruct is not a date)
statement error
SELECT l_shipinstruct FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
----
Column at position: 13 Set type: DATE Sniffed type: VARCHAR
# conversion is skipped if we don't read the value - so even with the incorrect type specified this still works
query I
SELECT l_returnflag FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, header = 0);
----
N
N
N
N
N
N
N
R
R
A
# ignore errors
query I
SELECT l_shipinstruct FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'DECIMAL(15,2)','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true);
----
# ignore errors partially
statement error
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'SMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
----
l_extendedprice
query III
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'SMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true, header = 0);
----
1 15519 24387
1 6370 10211
1 214 31198
1 2403 31330
statement error
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'USMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'});
----
l_extendedprice
query III
SELECT l_orderkey, l_partkey, l_extendedprice FROM read_csv('data/csv/real/lineitem_sample.csv', delim='|', columns={'l_orderkey': 'INTEGER','l_partkey': 'INTEGER','l_suppkey': 'INTEGER','l_linenumber': 'INTEGER','l_quantity': 'INTEGER','l_extendedprice': 'USMALLINT','l_discount': 'DECIMAL(15,2)','l_tax': 'DECIMAL(15,2)','l_returnflag': 'VARCHAR','l_linestatus': 'VARCHAR','l_shipdate': 'DATE','l_commitdate': 'DATE','l_receiptdate': 'DATE','l_shipinstruct': 'DATE','l_shipmode': 'VARCHAR','l_comment': 'VARCHAR'}, ignore_errors=true, header = 0);
----
1 15519 24387
1 6731 58958
1 6370 10211
1 214 31198
1 2403 31330
1 1564 46898
2 10617 58049
3 430 59869
3 12845 47462

View File

@@ -0,0 +1,60 @@
# name: test/optimizer/cte_inlining.test
# description: Test the CTE Inlining optimizer
# group: [optimizer]
statement ok
SET default_null_order='nulls_first';
statement ok
create table a(i integer);
statement ok
insert into a values (42);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY
# in alternative verify mode, CTEs may not be inlined
require no_alternative_verify
# single reference to CTE, should be inlined
query II
explain with cte1 as (Select i as j from a) select * from cte1;
----
logical_opt <!REGEX>:.*CTE.*
# multiple references to CTE, not inlined by default, but can be forced
query II
explain with cte1 as NOT MATERIALIZED (Select i as j from a) select * from cte1;
----
logical_opt <!REGEX>:.*CTE.*
# Test chained CTEs
query II
explain
with
cte1 as (select i as j from a),
cte2 as (select * from cte1)
select * from cte2;
----
logical_opt <!REGEX>:.*cte1.*
# Test complex case with both materialized and not materialized CTEs
query II
explain
with
cte1 as MATERIALIZED (select i as j from a),
cte2 as NOT MATERIALIZED (select * from cte1)
select * from cte2;
----
logical_opt <REGEX>:.*CTE.*
# Test non-referenced CTE
query II
explain
with
cte1 as (select i as j from a)
select * from a;
----
logical_opt <!REGEX>:.*CTE.*

View File

@@ -0,0 +1,395 @@
# name: test/optimizer/date_trunc_simplification.test
# description: test DATE_TRUNC() constant simplifications
# group: [optimizer]
statement ok
PRAGMA enable_verification;
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
create table test(d TIMESTAMP);
statement ok
insert into test values ('2025-01-06 03:01:00'), ('2025-01-10 05:10:06');
#
# check correctness of simple optimizations
#
query I
select * from test where date_trunc('day', d) < '2025-01-08';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) <= '2025-01-06';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) > '2025-01-08';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('day', d) >= '2025-01-10';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('day', d) = '2025-01-06';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) != '2025-01-10';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) is not distinct from '2025-01-06';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) is distinct from '2025-01-10';
----
2025-01-06 03:01:00
#
# ensure date_trunc() is taken out of the result for all possible operators
#
query II
explain analyze select * from test where date_trunc('day', d) < '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n]*d<'2025.*$
query II
explain analyze select * from test where date_trunc('day', d) <= '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n│]*d<='2025.*$
query II
explain analyze select * from test where date_trunc('day', d) > '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n]*d>='2025.*$
query II
explain analyze select * from test where date_trunc('day', d) >= '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n│]*d>='2025.*$
query II
explain analyze select * from test where date_trunc('day', d) = '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n]*d>='2025.*AND.*d<'2025.*$
query II
explain analyze select * from test where date_trunc('day', d) != '2025-01-08';
----
analyzed_plan <REGEX>:.*FILTER[ \t\n]*\(\(d >= '2025.*OR.*\(d <[ \t\n│─]*'2025.*$
query II
explain analyze select * from test where date_trunc('day', d) is not distinct from '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n]*d>='2025.*AND.*d<'2025.*$
query II
explain analyze select * from test where date_trunc('day', d) is distinct from '2025-01-08';
----
analyzed_plan <REGEX>:.*Filters:[ \t\n]*\(\(d >= '2025.*OR.*\(d <[ \t\n│─]*'2025.*$
#
# check correctness of simple optimizations, column on rhs
#
query I
select * from test where '2025-01-08' > date_trunc('day', d);
----
2025-01-06 03:01:00
query I
select * from test where '2025-01-06' >= date_trunc('day', d);
----
2025-01-06 03:01:00
query I
select * from test where '2025-01-08' < date_trunc('day', d);
----
2025-01-10 05:10:06
query I
select * from test where '2025-01-10' <= date_trunc('day', d);
----
2025-01-10 05:10:06
query I
select * from test where '2025-01-06' = date_trunc('day', d);
----
2025-01-06 03:01:00
query I
select * from test where '2025-01-10' != date_trunc('day', d);
----
2025-01-06 03:01:00
query I
select * from test where '2025-01-06' is not distinct from date_trunc('day', d);
----
2025-01-06 03:01:00
query I
select * from test where '2025-01-10' is distinct from date_trunc('day', d);
----
2025-01-06 03:01:00
#
# check correctness of optimizations with different input types
#
statement ok
create table test2(d DATE);
statement ok
insert into test2 values ('2025-01-06'), ('2025-01-10');
query I
select * from test2 where date_trunc('day', d) < '2025-01-08';
----
2025-01-06
query I
select * from test2 where date_trunc('day', d) <= '2025-01-06';
----
2025-01-06
query I
select * from test2 where date_trunc('day', d) > '2025-01-08';
----
2025-01-10
query I
select * from test2 where date_trunc('day', d) >= '2025-01-10';
----
2025-01-10
query I
select * from test2 where date_trunc('day', d) = '2025-01-06';
----
2025-01-06
query I
select * from test2 where date_trunc('day', d) != '2025-01-10';
----
2025-01-06
query I
select * from test2 where date_trunc('day', d) is not distinct from '2025-01-06';
----
2025-01-06
query I
select * from test2 where date_trunc('day', d) is distinct from '2025-01-10';
----
2025-01-06
#
# check edge cases
#
query I
select * from test where date_trunc('day', d) < '2025-01-06';
----
query I
select * from test where date_trunc('day', d) < '2025-01-07';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) <= '2025-01-06';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) <= '2025-01-07';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('day', d) > '2025-01-10';
----
query I
select * from test where date_trunc('day', d) > '2025-01-09';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('day', d) >= '2025-01-10';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('day', d) >= '2025-01-11';
----
query I
select * from test where date_trunc('hour', d) < '2025-01-06 03:00:00';
----
query I
select * from test where date_trunc('hour', d) < '2025-01-06 04:00:00';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('hour', d) <= '2025-01-06 03:00:00';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('hour', d) <= '2025-01-06 04:00:00';
----
2025-01-06 03:01:00
query I
select * from test where date_trunc('minute', d) > '2025-01-10 05:10:00';
----
query I
select * from test where date_trunc('minute', d) > '2025-01-10 05:09:00';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('minute', d) >= '2025-01-10 05:10:00';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('minute', d) >= '2025-01-10 05:11:00';
----
#
# check when there's a NULL in the table or on the RHS
#
statement ok
create table test3(d TIMESTAMP);
statement ok
insert into test3 values ('2025-01-06 03:01:00'), ('2025-01-10 05:10:06'), (NULL);
query I
select * from test3 where date_trunc('hour', d) = NULL;
----
query I
select * from test3 where date_trunc('hour', d) >= NULL;
----
query I
select * from test3 where date_trunc('hour', d) <= NULL;
----
query I
select * from test3 where date_trunc('hour', d) <> NULL;
----
query I
select * from test3 where date_trunc('hour', d) IS DISTINCT FROM NULL;
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test3 where date_trunc('hour', d) IS NOT DISTINCT FROM NULL;
----
NULL
#
# check that the optimization only applies if the LHS is just a column
#
query I
select * from test where date_trunc('day', date_add(d, INTERVAL 1 day)) >= '2025-01-12';
----
#
# check rewrites for any type of interval
#
query I
select * from test where date_trunc('year', d) >= '2024-01-01';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('month', d) > '2025-01-01';
----
query I
select * from test where date_trunc('day', d) >= '2025-01-06';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('decade', d) >= '2020-01-01';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('century', d) >= '2000-01-01';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('millennium', d) >= '2000-01-01';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('microsecond', d) >= '2025-01-08';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('millisecond', d) >= '2025-01-08';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('second', d) >= '2025-01-08';
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('minute', d) > '2025-01-06 03:01:00'
----
2025-01-10 05:10:06
query I
select * from test where date_trunc('hour', d) >= '2025-01-06 03:00:00';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('week', d) >= '2025-01-01';
----
2025-01-06 03:01:00
2025-01-10 05:10:06
query I
select * from test where date_trunc('quarter', d) >= '2025-04-01';
----

View File

@@ -0,0 +1,87 @@
# name: test/optimizer/date_trunc_simplification_icu.test
# description: test DATE_TRUNC() constant simplifications that require icu
# group: [optimizer]
require icu
statement ok
PRAGMA enable_verification;
statement ok
set Calendar='gregorian';
#
# check operation with hour offsets that aren't complete hours
#
statement ok
create table test(d TIMESTAMPTZ);
statement ok
insert into test values ('2025-01-01 01:01:00+02:10'), ('2025-01-10 19:00:00-06:45');
statement ok
set timezone='utc';
query I
select * from test where date_trunc('day', d) < '2025-01-01'::TIMESTAMPTZ;
----
2024-12-31 22:51:00+00
query I
select * from test where date_trunc('day', d) >= '2025-01-01'::TIMESTAMPTZ;
----
2025-01-11 01:45:00+00
query I
select * from test where date_trunc('day', d) <= '2025-01-10'::TIMESTAMPTZ;
----
2024-12-31 22:51:00+00
query I
select * from test where date_trunc('day', d) > '2025-01-10'::TIMESTAMPTZ;
----
2025-01-11 01:45:00+00
#
# check correctness on DST edge cases using the St. Johns timezone
#
statement ok
set TimeZone='America/St_Johns';
statement ok
create table test2(d TIMESTAMPTZ);
statement ok
insert into test2 values ('2025-03-09T01:05:00'),
('2025-03-09T03:05:00'),
('2025-11-02T00:30:00'),
('2025-11-02T01:30:00');
query I
select * from test2 where date_trunc('hour', d) < '2025-03-09T05:00:00+00'::TIMESTAMPTZ;
----
2025-03-09 01:05:00-03:30
query I
select * from test2 where date_trunc('hour', d) <= '2025-03-09T05:30:00+00'::TIMESTAMPTZ;
----
2025-03-09 01:05:00-03:30
2025-03-09 03:05:00-02:30
query I
select * from test2 where date_trunc('hour', d) > '2025-11-02T03:30:00+00'::TIMESTAMPTZ;
----
2025-11-02 01:30:00-03:30
query I
select * from test2 where date_trunc('hour', d) > '2025-11-02T02:30:00+00'::TIMESTAMPTZ;
----
2025-11-02 01:30:00-03:30
query I
select * from test2 where date_trunc('hour', d) >= '2025-11-02T02:30:00+00'::TIMESTAMPTZ;
----
2025-11-02 00:30:00-02:30
2025-11-02 01:30:00-03:30

View File

@@ -0,0 +1,183 @@
# name: test/optimizer/deliminator.test
# description: Test the Deliminator optimizer
# group: [optimizer]
statement ok
SET default_null_order='nulls_first';
require tpch
statement ok
CALL dbgen(sf=0.0001);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY
# Q 02: join with JoinType::SINGLE (created when pushing down dependent joins) is converted to LEFT
query II
explain SELECT s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment FROM part, supplier, partsupp, nation, region WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND p_size = 15 AND p_type LIKE '%BRASS' AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'EUROPE' AND ps_supplycost = ( SELECT min(ps_supplycost) FROM partsupp, supplier, nation, region WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey AND s_nationkey = n_nationkey AND n_regionkey = r_regionkey AND r_name = 'EUROPE') ORDER BY s_acctbal DESC, n_name, s_name, p_partkey LIMIT 100;
----
logical_opt <!REGEX>:.*SINGLE.*
# Q 17: join with JoinType::SINGLE (created when pushing down dependent joins) is converted to LEFT
query II
explain SELECT sum(l_extendedprice) / 7.0 AS avg_yearly FROM lineitem, part WHERE p_partkey = l_partkey AND p_brand = 'Brand#23' AND p_container = 'MED BOX' AND l_quantity < ( SELECT 0.2 * avg(l_quantity) FROM lineitem WHERE l_partkey = p_partkey);
----
logical_opt <!REGEX>:.*SINGLE.*
# Q 17: if we remove the filters """ p_brand = 'Brand#23' AND p_container = 'MED BOX' """ we can remove the whole DELIM join
query II
explain SELECT sum(l_extendedprice) / 7.0 AS avg_yearly FROM lineitem, part WHERE p_partkey = l_partkey AND l_quantity < (SELECT 0.2 * avg(l_quantity) FROM lineitem WHERE l_partkey = p_partkey);
----
logical_opt <!REGEX>:.*DELIM_JOIN.*
# Q 20: join with JoinType::SINGLE (created when pushing down dependent joins) is converted to LEFT
query II
explain SELECT s_name, s_address FROM supplier, nation WHERE s_suppkey IN ( SELECT ps_suppkey FROM partsupp WHERE ps_partkey IN ( SELECT p_partkey FROM part WHERE p_name LIKE 'forest%') AND ps_availqty > ( SELECT 0.5 * sum(l_quantity) FROM lineitem WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey AND l_shipdate >= CAST('1994-01-01' AS date) AND l_shipdate < CAST('1995-01-01' AS date))) AND s_nationkey = n_nationkey AND n_name = 'CANADA' ORDER BY s_name;
----
logical_opt <!REGEX>:.*SINGLE.*
statement ok
CREATE TABLE integers(i INTEGER, j integer)
statement ok
INSERT INTO integers VALUES (NULL,1)
query II
SELECT i, (select SUM(i) is not null from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL False
query II
SELECT i, (select SUM(i) AND false from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL False
query II
SELECT i, (select SUM(i) OR true from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL True
query II
SELECT i, (select SUM(i) IS DISTINCT FROM NULL from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL False
query II
SELECT i, (select SUM(i) IS NOT DISTINCT FROM NULL from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL TRUE
query II
SELECT i, (select CONCAT(SUM(i), 'hello') from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL hello
query II
SELECT i, (select [SUM(i)] from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL [NULL]
query II
SELECT i, (select {'a': SUM(i)} from integers where j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL {'a': NULL}
statement ok
INSERT INTO integers VALUES (1,2)
query II
SELECT i, (select sum(i) from integers where j is null and j = i1.j) FROM integers i1 ORDER BY 1;
----
NULL NULL
1 NULL
statement ok
DROP TABLE integers;
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3), (NULL);
query II
SELECT i, (SELECT SUM(i) IS NOT NULL FROM integers i2 WHERE i2.i>i1.i) FROM integers i1 ORDER BY i;
----
NULL False
1 True
2 True
3 False
query II
SELECT i, (SELECT sum IS NULL FROM (SELECT SUM(i) sum FROM integers i2 WHERE i2.i>i1.i)) t1 FROM integers i1 ORDER BY i;
----
NULL True
1 False
2 False
3 True
query II
SELECT i, (SELECT sum IS NOT NULL FROM (SELECT SUM(i) sum FROM integers i2 WHERE i2.i>i1.i)) t1 FROM integers i1 ORDER BY i;
----
NULL False
1 True
2 True
3 False
query II
SELECT i, (SELECT [sum] FROM (SELECT SUM(i) sum FROM integers i2 WHERE i2.i>i1.i)) t1 FROM integers i1 ORDER BY i;
----
NULL [NULL]
1 [5]
2 [3]
3 [NULL]
query II
SELECT i, (SELECT (SELECT SUM(i) IS NOT NULL) FROM integers i2 WHERE i2.i>i1.i) FROM integers i1 ORDER BY i;
----
NULL False
1 True
2 True
3 False
query II
SELECT i, (SELECT (SELECT [SUM(i)]) FROM integers i2 WHERE i2.i>i1.i) FROM integers i1 ORDER BY i;
----
NULL [NULL]
1 [5]
2 [3]
3 [NULL]
query II
SELECT i, (SELECT SUM(i) AND false FROM integers i2 WHERE i2.i>i1.i) FROM integers i1 ORDER BY i;
----
NULL 0
1 0
2 0
3 0
query II
SELECT i, (SELECT SUM(i) OR true FROM integers i2 WHERE i2.i>i1.i) FROM integers i1 ORDER BY i;
----
NULL 1
1 1
2 1
3 1
query II
SELECT i, (SELECT COUNT(*) FROM (SELECT SUM(i) FROM integers i2 WHERE i2.i>i1.i)) t1 FROM integers i1 ORDER BY i;
----
NULL 1
1 1
2 1
3 1
query II
SELECT i, (SELECT COUNT(sum) FROM (SELECT SUM(i) sum FROM integers i2 WHERE i2.i>i1.i) t2) t1 FROM integers i1 ORDER BY i;
----
NULL 0
1 1
2 1
3 0

View File

@@ -0,0 +1,61 @@
# name: test/optimizer/distributivity_rule.test
# description: Constant folding test
# group: [optimizer]
statement ok
CREATE TABLE test(A BOOLEAN, B BOOLEAN, C BOOLEAN, D BOOLEAN, X BOOLEAN, Y BOOLEAN, Z BOOLEAN);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query I nosort distributivity1
EXPLAIN SELECT (X AND A AND B) OR (A AND X AND C) OR (X AND B AND D) FROM test
----
query I nosort distributivity1
EXPLAIN SELECT X AND ((A AND B) OR (A AND C) OR (B AND D)) FROM test
----
# (X AND B) OR (X AND C) => X AND (B OR C)
query I nosort distributivity2
EXPLAIN SELECT (X AND B) OR (X AND C) FROM test
----
query I nosort distributivity2
EXPLAIN SELECT X AND (B OR C) FROM test
----
# X OR X = X
query I nosort distributivity3
EXPLAIN SELECT X OR X FROM test
----
query I nosort distributivity3
EXPLAIN SELECT X OR X OR X OR X FROM test
----
query I nosort distributivity3
EXPLAIN SELECT X OR (X OR (X OR X)) FROM test
----
# X OR (X AND A) => X
query I nosort distributivity4
EXPLAIN SELECT X OR (X AND A) FROM test
----
query I nosort distributivity4
EXPLAIN SELECT X OR X FROM test
----
statement ok
CREATE TABLE test2(X INTEGER, Y INTEGER, Z INTEGER);
query I nosort distributivity5
EXPLAIN SELECT (X=1 AND Y=1) OR (X=1 AND Z=1) FROM test2
----
query I nosort distributivity5
EXPLAIN SELECT X=1 AND (Y=1 OR Z=1) FROM test2
----

View File

@@ -0,0 +1,22 @@
# name: test/optimizer/estimated_cardinalities_are_in_logical_plan.test
# description: Make sure estimated cardinalities are respected
# group: [optimizer]
require notwindows
statement ok
CREATE TABLE t1 AS SELECT range a FROM range(100000);
statement ok
CREATE TABLE t2 AS SELECT range b FROM range(500, 100000);
statement ok
CREATE TABLE t3 AS SELECT range c FROM range(10000, 1000000);
statement ok
PRAGMA explain_output=OPTIMIZED_ONLY;
query II
EXPLAIN SELECT * FROM t1, t2, t3 WHERE a = b AND b = c;
----
logical_opt <REGEX>:.*COMPARISON_JOIN.*a = b.*~[0-9]+.*

View File

@@ -0,0 +1,78 @@
# name: test/optimizer/expression_rewriter/functions_that_error_are_not_reordered.test
# description: Functions can error
# group: [expression_rewriter]
statement ok
create table t1 as from range(-5000, 5000, 1) t1(a);
# case expression must happen first, otherwise sqrt will throw an error
query I
select count(*)
from t1
where
case
when a >= 0 then true
When a > 5 then false
when a > 10 then true
when a < 0 then false
when a > 15 then true
else false END
and sqrt(a) >= 0;
----
5000
statement ok
CREATE TABLE t2 AS
SELECT
list_reduce([floor((random() * 2))::INT::VARCHAR for t, i in range(floor((random() * 20))::INT + 1)], lambda x, y: concat(x, y)) a,
list_reduce([floor((random() * 2))::INT::VARCHAR for t, i in range(floor((random() * 20))::INT + 1)], lambda x, y: concat(x, y)) b
FROM range(10000);
# len(a) = len(b) must happen first
# otherwise xor function will throw an error
statement ok
select * from t2 where len(a) = len(b) and ((xor(a::BITSTRING, b::BITSTRING))::VARCHAR)[0] = '0';
# fiter out high values with case, then another filter with multiple that will overflow
# if vcalues are not filtered out
statement ok
create table t3 (a INT, b INT);
statement ok
insert into t3 (select -2147483645, 2147483647 from range(500000));
statement ok
insert into t3 (select range, range from range(32700));
query I
select count(*) from t3
where
case
when (a < 0 and a > -3000) then false
when (a > -2147483645 and a < 0) then true
when (a >= 0 and a < 32700) then true
when a < 3270000 then false
when a < 2147483645 then true
else false END
and (a * b)::INT < 2147483648;
----
32700
statement ok
create table t4 as select [2e304, 2e305, 2e306, 2e307] a from range(10000);
statement ok
insert into t4 select [range, range +1, range + 2, 2e50] from range(1000);
# FIXME: this should not be required
require vector_size 2048
query I
select count(*) from t4 where a[1] < 1001 and list_kurtosis(a) is NOT NULL;
----
1000

View File

@@ -0,0 +1,9 @@
# name: test/optimizer/filter_pushdown/filter_pushdown_into_subquery.test
# description: When Aggregate and Group By operators contain a mark join index, Mark join -> Semi join cannot happen.
# group: [filter_pushdown]
statement ok
SET order_by_non_integer_literal=true;
statement ok
SELECT DISTINCT ON ( 'string' ) 'string', GROUP BY CUBE ( 'string', ), 'string' IN ( SELECT 'string' ), HAVING 'string' IN ( SELECT 'string');

View File

@@ -0,0 +1,17 @@
# name: test/optimizer/grouping_expression_simplification.test
# description: Make sure expressions are optimized in the groups too
# group: [optimizer]
statement ok
CREATE TABLE test(t timestamp);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query I nosort year
EXPLAIN SELECT COUNT(*) FROM test GROUP BY EXTRACT(year from t)
----
query I nosort year
EXPLAIN SELECT COUNT(*) FROM test GROUP BY YEAR(t)
----

View File

@@ -0,0 +1,32 @@
# name: test/optimizer/index_optimizer.test
# description: Test Optimizer uses indexes on point queries
# group: [optimizer]
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER);
statement ok
INSERT INTO integers VALUES (1, 1), (2, 2), (3, 3);
# Test single indexed column.
statement ok
CREATE UNIQUE INDEX i_index ON integers(i);
query II
EXPLAIN ANALYZE SELECT i, j FROM integers WHERE i = 1;
----
analyzed_plan <REGEX>:.*Type: Index Scan.*
query II
SELECT i, j FROM integers WHERE i = 1;
----
1 1
statement error
INSERT INTO integers VALUES (1, 1);
----
<REGEX>:Constraint Error.*violates unique constraint.*
statement ok
DROP INDEX i_index;

View File

@@ -0,0 +1,22 @@
# name: test/optimizer/issue_12181.test
# description: Test move constants optimization involving DISTINCT FROM comparison
# group: [optimizer]
statement ok
CREATE TABLE t0(c0 INT)
statement ok
CREATE TABLE t1(c0 INT, c1 INT)
statement ok
INSERT INTO t1(c0, c1) VALUES (0, 1)
query I
SELECT NULL IS DISTINCT FROM (1 + t1.c1) FROM t1 NATURAL LEFT JOIN t0
----
true
query II
SELECT * FROM t0 NATURAL RIGHT JOIN t1 WHERE (CASE t0.c0 WHEN t0.c0 THEN 1 ELSE NULL END) IS DISTINCT FROM (1 + (CASE t1.c1 WHEN t1.c1 THEN 2 ELSE NULL END))
----
0 1

View File

@@ -0,0 +1,254 @@
# name: test/optimizer/join_dependent_filter.test
# description: Join dependent filter rule test
# group: [optimizer]
statement ok
create table test as select range i, range j from range(10)
# can derive two filters for this query
query I
select count(*)
from test t1, test t2
where (t1.i = 2 and t2.i = 4) or (t1.i = 0 and t2.i = 2)
----
2
query II
explain select count(*)
from test t1, test t2
where (t1.i = 2 and t2.i = 4) or (t1.i = 0 and t2.i = 2)
----
physical_plan <REGEX>:.*Filters.*Filters.*
# not if the constants are volatile however
query II
explain select count(*)
from test t1, test t2
where (t1.i = random() and t2.i = random()) or (t1.i = 0 and t2.i = 2)
----
physical_plan <!REGEX>:.*FILTER.*
# which wouldn't be there without the expression rewriter
statement ok
set disabled_optimizers to 'expression_rewriter'
query I
select count(*)
from test t1, test t2
where (t1.i = 2 and t2.i = 4) or (t1.i = 0 and t2.i = 2)
----
2
query II
explain select count(*)
from test t1, test t2
where (t1.i = 2 and t2.i = 4) or (t1.i = 0 and t2.i = 2)
----
physical_plan <!REGEX>:.*FILTER.*
statement ok
set disabled_optimizers to ''
# in this case we can only derive one filter
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7) or (t1.i != t2.i and t1.i < 3)
----
29
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7) or (t1.i != t2.i and t1.i < 3)
----
physical_plan <REGEX>:.*Filters.*
# a predicate for a column must show up on both sides,
# so, adding a predicate for t2.i to only one side won't create a filter
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7 and t2.i = 0) or (t1.i != t2.i and t1.i < 3)
----
27
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7 and t2.i = 0) or (t1.i != t2.i and t1.i < 3)
----
physical_plan <!REGEX>:.*FILTER.*FILTER.*
# if we add another predicate to the other side, we get another filter
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7 and t2.i = 0) or (t1.i != t2.i and t1.i < 3 and t2.i = 5)
----
3
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7 and t2.i = 0) or (t1.i != t2.i and t1.i < 3 and t2.i = 5)
----
physical_plan <REGEX>:.*Filters.*Filters.*
# one side filters t1, and the other side filter t2, so we can't derive a filter
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7) or (t1.i != t2.i and t2.i = 5)
----
11
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i > 7) or (t1.i != t2.i and t2.i = 5)
----
physical_plan <!REGEX>:.*FILTER.*
# we can still derive filters if there's 3 entries in the OR
query I
select count(*)
from test t1, test t2
where (t1.i = 0 and t2.i = 1) or (t1.i = 2 and t2.i = 3) or (t1.i = 3 and t2.i = 4)
----
3
query II
explain select count(*)
from test t1, test t2
where (t1.i = 0 and t2.i = 1) or (t1.i = 2 and t2.i = 3) or (t1.i = 3 and t2.i = 4)
----
physical_plan <REGEX>:.*Filters.*Filters.*
# not everything in the OR needs to be an AND
# we can still derive one filter (on t2.i)
query I
select count(*)
from test t1, test t2
where (t1.i = 0 and t2.i = 1) or (t1.i = 2 and t2.i = 3) or (t1.i = 3 and t2.i = 4) or (t2.i = 8)
----
13
query II
explain select count(*)
from test t1, test t2
where (t1.i = 0 and t2.i = 1) or (t1.i = 2 and t2.i = 3) or (t1.i = 3 and t2.i = 4) or (t2.i = 8)
----
physical_plan <REGEX>:.*Filters.*
# also works if we have a restriction i on and j, just needs to be the same table
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i = 7) or (t1.j = 3)
----
11
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i = 7) or (t1.j = 3)
----
physical_plan <REGEX>:.*FILTER.*
# we can also do more complex expressions, like modulo
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i % 5 = 0) or (t1.j % 6 = 0)
----
21
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i % 5 = 0) or (t1.j % 6 = 0)
----
physical_plan <REGEX>:.*FILTER.*
# or something like IN
query I
select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i IN (1, 2)) or (t1.j IN (3, 4))
----
22
query II
explain select count(*)
from test t1, test t2
where (t1.i = t2.i and t1.i IN (1, 2)) or (t1.j IN (3, 4))
----
physical_plan <REGEX>:.*FILTER.*
require tpch
statement ok
CALL dbgen(sf=0.01)
# there should be 3 filter operators instead of just one, because we derived two
query II
EXPLAIN SELECT
supp_nation,
cust_nation,
l_year,
sum(volume) AS revenue
FROM (
SELECT
n1.n_name AS supp_nation,
n2.n_name AS cust_nation,
extract(year FROM l_shipdate) AS l_year,
l_extendedprice * (1 - l_discount) AS volume
FROM
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
WHERE
s_suppkey = l_suppkey
AND o_orderkey = l_orderkey
AND c_custkey = o_custkey
AND s_nationkey = n1.n_nationkey
AND c_nationkey = n2.n_nationkey
AND ((n1.n_name = 'FRANCE'
AND n2.n_name = 'GERMANY')
OR (n1.n_name = 'GERMANY'
AND n2.n_name = 'FRANCE'))
AND l_shipdate BETWEEN CAST('1995-01-01' AS date)
AND CAST('1996-12-31' AS date)) AS shipping
GROUP BY
supp_nation,
cust_nation,
l_year
ORDER BY
supp_nation,
cust_nation,
l_year;
----
physical_plan <REGEX>:.*Filters.*Filters.*Filters.*
# results should still be the same
query IIII
PRAGMA tpch(7)
----
<FILE>:extension/tpch/dbgen/answers/sf0.01/q07.csv
# if we put the join-dependent filter explicitly as a join condition, we get a blockwise NL join,
# but we should still derive the same two filters
query II
EXPLAIN SELECT *
FROM nation n1
JOIN nation n2
ON ((n1.n_name = 'FRANCE'
AND n2.n_name = 'GERMANY')
OR (n1.n_name = 'GERMANY'
AND n2.n_name = 'FRANCE'))
----
physical_plan <REGEX>:.*Filters.*Filters.*

View File

@@ -0,0 +1,73 @@
# name: test/optimizer/join_parquet_with_base.test_slow
# description: Make sure we don't segfault when joining parquet files with base tables
# group: [optimizer]
require parquet
require tpch
statement ok
CALL DBGEN(sf=0.1);
statement ok
COPY orders TO '__TEST_DIR__/orders.parquet';
statement ok
DROP TABLE orders;
statement ok
COPY customer TO '__TEST_DIR__/customer.parquet';
statement ok
DROP TABLE customer;
statement ok
COPY region TO '__TEST_DIR__/region.parquet';
statement ok
DROP TABLE region;
statement ok
COPY nation TO '__TEST_DIR__/nation.parquet';
statement ok
DROP TABLE nation;
statement ok
COPY supplier TO '__TEST_DIR__/supplier.parquet';
statement ok
DROP TABLE supplier
statement ok
CREATE view orders AS SELECT * FROM read_parquet('__TEST_DIR__/orders.parquet');
statement ok
CREATE view region AS SELECT * FROM read_parquet('__TEST_DIR__/region.parquet');
statement ok
CREATE view supplier AS SELECT * FROM read_parquet('__TEST_DIR__/supplier.parquet');
statement ok
CREATE view nation AS SELECT * FROM read_parquet('__TEST_DIR__/nation.parquet');
statement ok
CREATE view customer AS SELECT * FROM read_parquet('__TEST_DIR__/customer.parquet');
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q0${i}.csv
endloop
loop i 10 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf0.1/q${i}.csv
endloop

View File

@@ -0,0 +1,112 @@
# name: test/optimizer/join_reorder_optimizer.test
# description: Make sure we can emit a vaild join order by DPhyp if hypergraph is connected
# group: [optimizer]
statement ok
CREATE TABLE t1(c1 int, c2 int, c3 int, c4 int);
statement ok
INSERT INTO t1 VALUES (1, 1, 1, 1);
statement ok
INSERT INTO t1 VALUES (1, 1, 1, 1);
statement ok
CREATE TABLE t2 AS SELECT * FROM t1;
statement ok
INSERT INTO t2 VALUES (1, 1, 1, 1);
statement ok
CREATE TABLE t3 AS SELECT * FROM t2;
statement ok
INSERT INTO t2 VALUES (1, 1, 1, 1);
statement ok
CREATE TABLE t4 AS SELECT * FROM t3;
statement ok
INSERT INTO t2 VALUES (1, 1, 1, 1);
statement ok
PRAGMA debug_force_no_cross_product=true
statement ok
EXPLAIN
SELECT
COUNT(*)
FROM
t1, t2, t3, t4
WHERE
t1.c1 = t2.c1 AND
t2.c2 = t3.c2 AND
t3.c3 = t4.c3
statement ok
EXPLAIN
SELECT
COUNT(*)
FROM
t1, t2, t3, t4
WHERE
t1.c1 = t2.c1 AND
t2.c2 = t3.c2 AND
t3.c3 = t4.c3 AND
t4.c4 = t1.c4
statement ok
EXPLAIN
SELECT
COUNT(*)
FROM
t1, t2, t3, t4
WHERE
t1.c1 = t2.c1 AND
t2.c2 = t3.c2 AND
t1.c1 + t2.c2 + t3.c3= 3 * t4.c4
statement ok
PRAGMA debug_force_no_cross_product=false
statement ok
with
grid as (
from (values ('ABC'), ('DEF')) as v(data)
select
unnest(split(data, '')) as letter,
row_number() over () as row_id,
generate_subscripts(split(data, ''), 1) AS col_id,
),
search(row_i, col_i, letter_to_match) as (
values (0, 0, 'A'), (0, 1, 'B'),
)
from (from grid cross join search) as grid_searches
select exists(
from grid as grid_to_search
where 1=1
and grid_searches.row_id = grid_to_search.row_id + grid_searches.row_i
and grid_searches.col_id = grid_to_search.col_id + grid_searches.col_i
and grid_searches.letter_to_match = grid_to_search.letter
)
statement ok
with
grid as (
from (values ('ABC', 39), ('DEF', 50)) as v(data, row_id)
select
unnest(split(data, '')) as letter,
row_id,
generate_subscripts(split(data, ''), 1) AS col_id,
),
search(row_i, col_i, letter_to_match) as (
values (0, 0, 'A'), (0, 1, 'B'),
)
from (from grid cross join search) as grid_searches
select exists(
from grid as grid_to_search
where 1=1
and grid_searches.row_id = grid_to_search.row_id + grid_searches.row_i
and grid_searches.col_id = grid_to_search.col_id + grid_searches.col_i
and grid_searches.letter_to_match = grid_to_search.letter
)

View File

@@ -0,0 +1,120 @@
# name: test/optimizer/joins/asof_join_adds_rows.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
require json
statement ok
create table child_join as from values (1) t(c);
statement ok
create table small_probe as from values
(1, '1992-03-22 01:02:03'::TIMESTAMP),
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP) t(sp_const, a);
statement ok
create table large_build as from values
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP),
(1, '1992-03-22 01:02:09'::TIMESTAMP),
(1, '1992-03-22 01:02:10'::TIMESTAMP),
(1, '1992-03-22 01:02:11'::TIMESTAMP),
(1, '1992-03-22 01:02:12'::TIMESTAMP),
(1, '1992-03-22 01:02:13'::TIMESTAMP),
(1, '1992-03-22 01:02:14'::TIMESTAMP),
(1, '1992-03-22 01:02:15'::TIMESTAMP),
(1, '1992-03-22 01:02:16'::TIMESTAMP),
(1, '1992-03-22 01:02:17'::TIMESTAMP),
(1, '1992-03-22 01:02:18'::TIMESTAMP),
(1, '1992-03-22 01:02:19'::TIMESTAMP),
(1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
# Compare NLJ optimisation to operator
foreach threshold 0 32
statement ok
PRAGMA asof_loop_join_threshold = ${threshold};
query I
select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
----
1992-03-22 01:02:03
1992-03-22 01:02:04
1992-03-22 01:02:05
1992-03-22 01:02:06
1992-03-22 01:02:07
1992-03-22 01:02:08
query IIII
WITH
id_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-12-23'::TIMESTAMP AS lastSeen
),
id_and_payload_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-02-11'::TIMESTAMP AS timepoint,
'{ "amp": [ {"k": "fqn1"}, {"k": "fqn2"}]}'::VARCHAR AS payload
),
id_with_payload_intermediate AS (
SELECT
id_with_timepoint.user_id,
id_with_timepoint.lastSeen,
id_and_payload_with_timepoint.payload,
FROM
id_with_timepoint ASOF
LEFT JOIN id_and_payload_with_timepoint ON (
id_with_timepoint.user_id = id_and_payload_with_timepoint.user_id
AND id_and_payload_with_timepoint.timepoint < id_with_timepoint.lastSeen
)
),
id_with_fqn AS (
SELECT
user_id,
lastSeen,
t.k_fqn
FROM
id_with_payload_intermediate
LEFT JOIN LATERAL UNNEST(payload ->> '$.amp[*].k') AS t (k_fqn) ON TRUE
),
fqn_table AS (
SELECT
*
FROM
(
VALUES
('fqn2', '2021-03-03'::TIMESTAMP),
('fqn2', '2021-02-02'::TIMESTAMP),
('fqn1', '2021-01-01'::TIMESTAMP)
) AS data (ap_fqn, timepoint)
)
SELECT
id_with_fqn.user_id,
id_with_fqn.k_fqn,
fqn_table.ap_fqn,
fqn_table.timepoint::TIMESTAMP
FROM
id_with_fqn ASOF
LEFT JOIN fqn_table ON (
id_with_fqn.k_fqn = fqn_table.ap_fqn
AND fqn_table.timepoint < id_with_fqn.lastSeen
)
ORDER BY
k_fqn,
timepoint;
----
ID1 fqn1 fqn1 2021-01-01 00:00:00
ID1 fqn2 fqn2 2021-03-03 00:00:00
endloop

View File

@@ -0,0 +1,18 @@
# name: test/optimizer/joins/better_ce_estimates_for_bad_join_conditions.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
create table t1 as select range::Varchar id, (range%700)::VARCHAR name_ from range(2_000);
statement ok
create table t2 as select range::Varchar id, (range%700)::VARCHAR name_ from range(2_000);
statement ok
create table t3 as select (range%2_000)::Varchar t1_id_FK, (range%2_000)::Varchar t2_id_FK from range(8_000);
query II
explain select count(*) from t1, t2, t3 where t1.name_ != t2.name_ and t3.t1_id_FK = t1.id and t3.t2_id_FK = t2.id;
----
physical_plan <!REGEX>:.*NESTED_LOOP_JOIN.*

View File

@@ -0,0 +1,120 @@
# name: test/optimizer/joins/cross_join_and_unnest_dont_work.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
require json
statement ok
create table child_join as from values (1) t(c);
statement ok
create table small_probe as from values
(1, '1992-03-22 01:02:03'::TIMESTAMP),
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP) t(sp_const, a);
statement ok
create table large_build as from values
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP),
(1, '1992-03-22 01:02:09'::TIMESTAMP),
(1, '1992-03-22 01:02:10'::TIMESTAMP),
(1, '1992-03-22 01:02:11'::TIMESTAMP),
(1, '1992-03-22 01:02:12'::TIMESTAMP),
(1, '1992-03-22 01:02:13'::TIMESTAMP),
(1, '1992-03-22 01:02:14'::TIMESTAMP),
(1, '1992-03-22 01:02:15'::TIMESTAMP),
(1, '1992-03-22 01:02:16'::TIMESTAMP),
(1, '1992-03-22 01:02:17'::TIMESTAMP),
(1, '1992-03-22 01:02:18'::TIMESTAMP),
(1, '1992-03-22 01:02:19'::TIMESTAMP),
(1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
# Compare NLJ optimisation to operator
foreach threshold 0 32
statement ok
PRAGMA asof_loop_join_threshold = ${threshold};
query I
select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
----
1992-03-22 01:02:03
1992-03-22 01:02:04
1992-03-22 01:02:05
1992-03-22 01:02:06
1992-03-22 01:02:07
1992-03-22 01:02:08
query IIII
WITH
id_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-12-23'::TIMESTAMP AS lastSeen
),
id_and_payload_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-02-11'::TIMESTAMP AS timepoint,
'{ "amp": [ {"k": "fqn1"}, {"k": "fqn2"}]}'::VARCHAR AS payload
),
id_with_payload_intermediate AS (
SELECT
id_with_timepoint.user_id,
id_with_timepoint.lastSeen,
id_and_payload_with_timepoint.payload,
FROM
id_with_timepoint ASOF
LEFT JOIN id_and_payload_with_timepoint ON (
id_with_timepoint.user_id = id_and_payload_with_timepoint.user_id
AND id_and_payload_with_timepoint.timepoint < id_with_timepoint.lastSeen
)
),
id_with_fqn AS (
SELECT
user_id,
lastSeen,
t.k_fqn
FROM
id_with_payload_intermediate
LEFT JOIN LATERAL UNNEST(payload ->> '$.amp[*].k') AS t (k_fqn) ON TRUE
),
fqn_table AS (
SELECT
*
FROM
(
VALUES
('fqn2', '2021-03-03'::TIMESTAMP),
('fqn2', '2021-02-02'::TIMESTAMP),
('fqn1', '2021-01-01'::TIMESTAMP)
) AS data (ap_fqn, timepoint)
)
SELECT
id_with_fqn.user_id,
id_with_fqn.k_fqn,
fqn_table.ap_fqn,
fqn_table.timepoint::TIMESTAMP
FROM
id_with_fqn ASOF
LEFT JOIN fqn_table ON (
id_with_fqn.k_fqn = fqn_table.ap_fqn
AND fqn_table.timepoint < id_with_fqn.lastSeen
)
ORDER BY
k_fqn,
timepoint;
----
ID1 fqn1 fqn1 2021-01-01 00:00:00
ID1 fqn2 fqn2 2021-03-03 00:00:00
endloop

View File

@@ -0,0 +1,69 @@
# name: test/optimizer/joins/delim_join_dont_explode.test_slow
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
create table big_table (id integer);
statement ok
insert into big_table select range from range(1000);
statement ok
create table medium_1 (id integer, fk_to_big integer, fk_to_medium_2 integer);
statement ok
insert into medium_1 (select range::varchar,
CASE WHEN range<10 THEN 0 ELSE range END,
range + 99,
from range(100));
statement ok
create table medium_2 (id integer);
statement ok
insert into medium_2 (select range from range(100));
query I
select count(*) from medium_2, medium_1 where medium_2.id = medium_1.fk_to_medium_2;
----
1
query I
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
) order by bt.id
----
143 values hashing to dc5d1675d206057ccfe13739a38ee082
# The query plan here used to join the two SEQ_SCANs first, and then join the DELIM_SCAN,
# Since PR #12290, we can reorder DELIM_SCANS
# Now the DELIM_SCAN is joined with a SEQ_SCAN first, and then with the SEQ_SCAN
# Now that we reorder semi joins the ordering of the sequential scans has also changed
query II
EXPLAIN
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
)
order by bt.id
----
physical_plan <REGEX>:.*HASH_JOIN.*DELIM_SCAN.*SEQ_SCAN.*

View File

@@ -0,0 +1,24 @@
# name: test/optimizer/joins/delim_join_with_in_has_correct_results.test
# description: An IN expression should return false when the IN list is empty
# group: [joins]
statement ok
create table t as FROM VALUES (4), (NULL) t(t0);
statement ok
create table u as FROM VALUES (NULL), (NULL) t(u0);
query II
SELECT
t0,
t0 IN (
SELECT
u0
FROM u
WHERE
t0 = 4
)
FROM t;
----
4 NULL
NULL false

View File

@@ -0,0 +1,19 @@
# name: test/optimizer/joins/filter_on_subquery_with_aggregate.test
# description: some fuzzer issues
# group: [joins]
statement ok
create table df as select unnest(range(1, 10)) as A, unnest(range(1, 10)) as B;
query II
WITH cte AS (
SELECT A, B
FROM df
WHERE A >= 7
)
SELECT *
FROM cte
WHERE A = (SELECT MAX(A) FROM cte);
----
9 9

View File

@@ -0,0 +1,21 @@
# name: test/optimizer/joins/get_cardinality_from_limit.test_slow
# description: when a limit is encountered, use the limit to know the cardinality
# group: [joins]
statement ok
create table t_left as select (random() * 10000000)::INT a from range(40000);
statement ok
create table t_right as select range b from range(10000000);
statement ok
pragma explain_output='optimized_only';
query II
explain select * from t_left, (select * from t_right limit 10000) where a = b;
----
logical_opt <REGEX>:.*SEQ_SCAN.*LIMIT.*SEQ_SCAN.*

View File

@@ -0,0 +1,30 @@
# name: test/optimizer/joins/join_bug.test
# description: unsure yet
# group: [joins]
statement ok
CREATE TABLE v00 (c01 INT, c02 STRING);
# don't delete numerator relations
statement ok
SELECT 1
FROM
v00 AS t
INNER JOIN
(
v00 AS t2
SEMI JOIN v00 AS t3
USING (c02)
)
USING (c01)
SEMI JOIN v00 AS t4
USING (c02, c01)
NATURAL JOIN v00 AS t5;
statement ok
CREATE or replace TABLE v00 (c01 STRING);
# Asof joins should swap
statement ok
FROM v00 AS ta02 NATURAL JOIN v00 AS ta03 ASOF JOIN v00 AS ta04 USING ( c01 );

View File

@@ -0,0 +1,39 @@
# name: test/optimizer/joins/join_on_doubles.test
# description: tests for #16901 / #16965
# group: [joins]
statement ok
create table x (a double);
statement ok
create table y (b double);
statement ok
insert into x values ('0.0'), ('NaN');
statement ok
insert into y values ('-0.0'), ('-NaN');
query TT
select a::text, b::text from x inner join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from y inner join x on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x right join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x left join y on a = b;
----
0.0 -0.0
nan -nan

View File

@@ -0,0 +1,39 @@
# name: test/optimizer/joins/join_on_floats.test
# description: tests for #16901 / #16965
# group: [joins]
statement ok
create table x (a float);
statement ok
create table y (b float);
statement ok
insert into x values ('0.0'), ('NaN');
statement ok
insert into y values ('-0.0'), ('-NaN');
query TT
select a::text, b::text from x inner join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from y inner join x on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x right join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x left join y on a = b;
----
0.0 -0.0
nan -nan

View File

@@ -0,0 +1,27 @@
# name: test/optimizer/joins/joins_with_correlated_subqueries.test
# description: issue duckdblabs/duckdb-internal #840
# group: [joins]
statement ok
CREATE TABLE df (x NUMERIC, y NUMERIC);
statement ok
INSERT INTO df VALUES (0, 2), (1, NULL), (2, 4), (3, 5), (4, NULL);
statement ok
SELECT
x,
COALESCE(
y,
(
SELECT
prev.y + ( (next.y - prev.y) * (parent.x - prev.x) / (next.x - prev.x) )
FROM
( SELECT x, y FROM df WHERE x <= parent.x and y is not null ORDER BY x DESC LIMIT 1 ) AS prev
CROSS JOIN
( SELECT x, y FROM df WHERE x >= parent.x and y is not null ORDER BY x ASC LIMIT 1 ) AS next
)
) AS y
FROM
df parent;

View File

@@ -0,0 +1,49 @@
# name: test/optimizer/joins/lateral_cross_join.test
# description: test to string of complex lateral cross join
# group: [joins]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE all_time_periods (
start_date DATE,
end_date DATE
);
statement ok
CREATE TABLE weekly_trading_cube (
ship_date DATE,
vendor_name VARCHAR,
master_league VARCHAR,
net_demand DECIMAL
);
statement ok
CREATE TABLE league_mapping (
wtc_league VARCHAR,
finance_league VARCHAR
);
statement ok
INSERT INTO all_time_periods VALUES
('2024-01-01', '2024-12-31');
statement ok
INSERT INTO weekly_trading_cube VALUES
('2024-06-15', 'F Branded', 'MLB', 100.0),
('2024-07-15', 'M & Ness', 'NBA', 200.0);
statement ok
INSERT INTO league_mapping VALUES
('MLB', 'Major League Baseball'),
('NBA', 'National Basketball Association');
query III
WITH date_range AS (SELECT min(start_date) AS min_start_date, max(end_date) AS max_end_date FROM all_time_periods)
SELECT wtc.vendor_name, wtc.ship_date, lm.finance_league
FROM weekly_trading_cube AS wtc CROSS JOIN date_range AS dr
LEFT JOIN league_mapping AS lm ON (((upper(wtc.master_league) = upper(lm.wtc_league)) AND (wtc.ship_date BETWEEN dr.min_start_date AND dr.max_end_date)))
WHERE (wtc.vendor_name = 'F Branded')
----
F Branded 2024-06-15 Major League Baseball

View File

@@ -0,0 +1,21 @@
# name: test/optimizer/joins/many_joins_and_one_non_reorderable_join.test_slow
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
Create table test0 as select * from range(1000);
statement ok
Create table test1 as select * from range(100);
statement ok
Create table test2 as select * from range(100);
statement ok
Create table test3 as select * from range(100);
statement ok
Create table test4 as select * from range(400);
statement ok
Select * from test0 LEFT JOIN (Select test1.range as the_range from test1, test2, test3) other ON other.the_range = test0.range;

View File

@@ -0,0 +1,54 @@
# name: test/optimizer/joins/no_cross_product_reordering.test
# description: cannot create cross product between LHS and RHS of semi/anti joins
# group: [joins]
statement ok
create table t1 as select range a from range(1000);
statement ok
create table t2 as select range b from range(1);
statement ok
create table t3 as select range c from range(100);
query II
explain select * from t1, t2 where a in (select * from t3);
----
physical_plan <REGEX>:.*CROSS_PRODUCT.*HASH_JOIN.*SEMI.*
#An incorrect join plan looks like
#
# PROJECTION
#
# Expressions:
# a
# b
#
#
# COMPARISON_JOIN
#
# Join Type:
# SEMI
#
# Conditions:
# (a = #[9.0])
#
#
# CROSS_PRODUCT SEQ_SCAN
#
# Stringified:
# t1
#
#
# SEQ_SCAN PROJECTION
#
# Stringified: Expressions:
# t2 c
#
#
# SEQ_SCAN
#
# Stringified:
# t3
#

View File

@@ -0,0 +1,28 @@
# name: test/optimizer/joins/no_duplicate_elimination_join.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
pragma debug_force_no_cross_product=true;
statement ok
Create table test0 as select * from range(1000);
statement ok
Create table test1 as select * from range(1000);
statement ok
Create table test2 as select * from range(1000);
statement ok
Create table test3 as select * from range(100);
statement ok
Create table test4 as select * from range(400);
statement ok
SELECT * FROM test0, test1, test2,test3, test4
WHERE test1.range + test4.range = test2.range AND test1.range + test4.range = test3.range AND test1.range = test4.range AND test1.range = test0.range;
statement ok
SELECT * FROM test0, test1, test2, test3, test4 WHERE test1.range + test4.range = test2.range AND test1.range + test4.range = test3.range AND test1.range = test4.range AND test1.range = test0.range AND test1.range + test3.range = test0.range;

View File

@@ -0,0 +1,6 @@
# name: test/optimizer/joins/order_optimizer_bindings.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
SELECT * FROM summary((select 5)) tbl1(i) JOIN summary((select 5)) tbl2(i) ON tbl1.i=tbl2.i;

View File

@@ -0,0 +1,29 @@
# name: test/optimizer/joins/pushdown_semi_anti.test
# description: Verify semi anti joins are pushed down
# group: [joins]
statement ok
create table tbl1 as select range a from range(10000);
statement ok
create table tbl2 as select range b from range(1000);
statement ok
create table tbl3 as select range c from range(100);
statement ok
set disabled_optimizers='statistics_propagation';
query II
EXPLAIN select * from tbl1, tbl2 where b in (select * from tbl3) and tbl1.a = tbl2.b;
----
physical_plan <REGEX>:.*INNER.*SEMI.*
statement ok
set disabled_optimizers='statistics_propagation,join_order';
# make sure non-optimized plan has semi on top of the inner
query II
EXPLAIN select * from tbl1, tbl2 where b in (select * from tbl3) and tbl1.a = tbl2.b;
----
physical_plan <REGEX>:.*SEMI.*INNER.*

View File

@@ -0,0 +1,15 @@
# name: test/optimizer/joins/test_delim_join_with_cross_product_in_rhs.test
# description: Verify that a delim join with a correlated column in the RHS of a cross product (on the RHS of the delim GET) is properly bound
# group: [joins]
statement ok
CREATE TABLE t1(c0 DOUBLE, c1 INT8);
statement ok
CREATE TABLE t3(c0 VARCHAR);
statement ok
INSERT INTO t1(c1) VALUES (1);
statement ok
SELECT * FROM t3, t1 INNER JOIN ( SELECT t3.c0 ) as subQuery1 ON ( t1.c0 > (t3.c0::DOUBLE) );

View File

@@ -0,0 +1,55 @@
# name: test/optimizer/joins/test_issue_5265.test_slow
# description: Verify expected cardinality of multiple cross products
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.1);
# this should run quickly
statement ok
SELECT n.n_name,
SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue
FROM region r
JOIN nation n
ON n.n_regionkey = r.r_regionkey
JOIN supplier s
ON s.s_nationkey = n.n_nationkey
JOIN lineitem l
ON l.l_suppkey = s.s_suppkey
JOIN orders o
ON o.o_orderkey = l.l_orderkey
JOIN customer c
ON c.c_custkey = o.o_custkey
AND c.c_nationkey = s.s_nationkey
JOIN (SELECT 1 AS dummy) single_row ON 1 = 1
WHERE r.r_name = 'ASIA'
AND o.o_orderdate >= DATE '1994-01-01'
AND o.o_orderdate < DATE '1995-01-01'
GROUP BY n.n_name
ORDER BY revenue DESC;
query II
explain SELECT n.n_name,
SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue
FROM region r
JOIN nation n
ON n.n_regionkey = r.r_regionkey
JOIN supplier s
ON s.s_nationkey = n.n_nationkey
JOIN lineitem l
ON l.l_suppkey = s.s_suppkey
JOIN orders o
ON o.o_orderkey = l.l_orderkey
JOIN customer c
ON c.c_custkey = o.o_custkey
AND c.c_nationkey = s.s_nationkey
JOIN (SELECT 1 AS dummy) single_row ON 1 = 1
WHERE r.r_name = 'ASIA'
AND o.o_orderdate >= DATE '1994-01-01'
AND o.o_orderdate < DATE '1995-01-01'
GROUP BY n.n_name
ORDER BY revenue DESC;
----
physical_plan <!REGEX>:.*CROSS_PRODUCT.*CROSS_PRODUCT.*

View File

@@ -0,0 +1,3 @@
# name: test/optimizer/joins/test_tpcds_pushdown.test
# group: [joins]

View File

@@ -0,0 +1,205 @@
# name: test/optimizer/joins/tpcds_nofail.test
# description: TPCDS queries that force the Join optimizer to emit 10000+ pairs. Tests the approx algorithm throws no errors
# group: [joins]
require tpcds
statement ok
CALL dsdgen(sf=0.01);
statement ok
WITH cs_ui AS
(SELECT cs_item_sk,
sum(cs_ext_list_price) AS sale,
sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) AS refund
FROM catalog_sales,
catalog_returns
WHERE cs_item_sk = cr_item_sk
AND cs_order_number = cr_order_number
GROUP BY cs_item_sk
HAVING sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)),
cross_sales AS
(SELECT i_product_name product_name,
i_item_sk item_sk,
s_store_name store_name,
s_zip store_zip,
ad1.ca_street_number b_street_number,
ad1.ca_street_name b_street_name,
ad1.ca_city b_city,
ad1.ca_zip b_zip,
ad2.ca_street_number c_street_number,
ad2.ca_street_name c_street_name,
ad2.ca_city c_city,
ad2.ca_zip c_zip,
d1.d_year AS syear,
d2.d_year AS fsyear,
d3.d_year s2year,
count(*) cnt,
sum(ss_wholesale_cost) s1,
sum(ss_list_price) s2,
sum(ss_coupon_amt) s3
FROM store_sales,
store_returns,
cs_ui,
date_dim d1,
date_dim d2,
date_dim d3,
store,
customer,
customer_demographics cd1,
customer_demographics cd2,
promotion,
household_demographics hd1,
household_demographics hd2,
customer_address ad1,
customer_address ad2,
income_band ib1,
income_band ib2,
item
WHERE ss_sold_date_sk = d1.d_date_sk
AND ss_customer_sk = c_customer_sk
AND ss_cdemo_sk= cd1.cd_demo_sk
AND ss_hdemo_sk = hd1.hd_demo_sk
AND ss_addr_sk = ad1.ca_address_sk
AND ss_item_sk = i_item_sk
AND ss_item_sk = sr_item_sk
AND ss_ticket_number = sr_ticket_number
AND ss_item_sk = cs_ui.cs_item_sk
AND c_current_cdemo_sk = cd2.cd_demo_sk
AND c_current_hdemo_sk = hd2.hd_demo_sk
AND c_current_addr_sk = ad2.ca_address_sk
AND c_first_sales_date_sk = d2.d_date_sk
AND c_first_shipto_date_sk = d3.d_date_sk
AND hd1.hd_income_band_sk = ib1.ib_income_band_sk
AND hd2.hd_income_band_sk = ib2.ib_income_band_sk
AND cd1.cd_marital_status <> cd2.cd_marital_status
AND i_color IN ('purple',
'burlywood',
'indian',
'spring',
'floral',
'medium')
AND i_current_price BETWEEN 64 AND 64 + 10
AND i_current_price BETWEEN 64 + 1 AND 64 + 15
GROUP BY i_product_name,
i_item_sk,
s_store_name,
s_zip,
ad1.ca_street_number,
ad1.ca_street_name,
ad1.ca_city,
ad1.ca_zip,
ad2.ca_street_number,
ad2.ca_street_name,
ad2.ca_city,
ad2.ca_zip,
d1.d_year,
d2.d_year,
d3.d_year)
SELECT cs1.product_name,
cs1.store_name,
cs1.store_zip,
cs1.b_street_number,
cs1.b_street_name,
cs1.b_city,
cs1.b_zip,
cs1.c_street_number,
cs1.c_street_name,
cs1.c_city,
cs1.c_zip,
cs1.syear cs1syear,
cs1.cnt cs1cnt,
cs1.s1 AS s11,
cs1.s2 AS s21,
cs1.s3 AS s31,
cs2.s1 AS s12,
cs2.s2 AS s22,
cs2.s3 AS s32,
cs2.syear,
cs2.cnt
FROM cross_sales cs1,
cross_sales cs2
WHERE cs1.item_sk=cs2.item_sk
AND cs1.syear = 1999
AND cs2.syear = 1999 + 1
AND cs2.cnt <= cs1.cnt
AND cs1.store_name = cs2.store_name
AND cs1.store_zip = cs2.store_zip
ORDER BY cs1.product_name,
cs1.store_name,
cs2.cnt,
cs1.s1,
cs2.s1;
statement ok
SELECT CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20) > 74129 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20)
END bucket1,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40) > 122840 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40)
END bucket2,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60) > 56580 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60)
END bucket3,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80) > 10097 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80)
END bucket4,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100) > 165306 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100)
END bucket5
FROM reason
WHERE r_reason_sk = 1 ;

View File

@@ -0,0 +1,42 @@
# name: test/optimizer/joins/update_nodes_in_full_path.test_slow
# description: updating nodes in full path should throw no errors
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.01);
statement ok
SELECT NULL
FROM main.supplier AS ref_0
LEFT JOIN main.nation AS ref_1
LEFT JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON (ref_3.c_phone = ref_4.s_name)
ON ((SELECT l_linestatus FROM main.lineitem LIMIT 1 OFFSET 2) IS NULL)
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk ~~~ ref_5.o_comment)
ON (1)
ON (ref_3.c_mktsegment ~~~ ref_4.s_phone)
ON (ref_0.s_acctbal = ref_5.o_totalprice)
INNER JOIN main.lineitem AS ref_7 ON (ref_4.s_suppkey = ref_7.l_orderkey)
INNER JOIN main.supplier AS ref_8
INNER JOIN main.partsupp AS ref_9
INNER JOIN main.supplier AS ref_10
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12
INNER JOIN main.customer AS ref_13 ON (ref_12.l_linestatus = ref_13.c_name)
ON ((SELECT ps_comment FROM main.partsupp LIMIT 1 OFFSET 4) ^@ ref_11.s_address)
ON (ref_13.c_phone ~~~ ref_10.s_address)
ON (ref_9.ps_partkey = ref_11.s_suppkey)
ON ((SELECT ps_comment FROM main.partsupp LIMIT 1 OFFSET 6) ~~* ref_12.l_linestatus)
ON ((ref_6.o_orderpriority IS NULL) OR (ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier LIMIT 1 OFFSET 6)))
INNER JOIN (
SELECT ref_14.p_container AS c0, ref_14.p_mfgr AS c1, ref_14.p_container AS c2, ref_15.c_custkey AS c3
FROM main.part AS ref_14
INNER JOIN main.customer AS ref_15 ON (ref_14.p_brand ~~* ref_15.c_mktsegment)
WHERE (ref_14.p_comment ~~~ ref_14.p_container)
LIMIT 101
) AS subq_0 ON (ref_6.o_orderstatus ~~* ref_6.o_comment)
WHERE (ref_8.s_address ~~* ref_8.s_address);

View File

@@ -0,0 +1,54 @@
# name: test/optimizer/joins/updating_the_join_node_hash_map_has_no_errors.test_slow
# description:
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.05);
statement error
SELECT NULL
FROM main.supplier AS ref_0
INNER JOIN main.nation
INNER JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON ((ref_3.c_phone = ref_4.s_name)) ON (
(SELECT NULL))
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk like '%0000%') ON (1) ON (ref_3.c_mktsegment NOT NULL) ON ((ref_0.s_acctbal = ref_5.o_totalprice))
INNER JOIN main.lineitem AS ref_7 ON ((ref_4.s_suppkey = ref_7.l_orderkey))
INNER JOIN main.supplier
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12 ON (
(SELECT NULL)) ON ((
(SELECT ps_comment FROM main.partsupp) ~~* ref_12.l_linestatus)) ON
((ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier)))
INNER JOIN
(SELECT NULL) ON (ref_6.o_orderstatus NOT NULL);
----
More than one row returned by a subquery used as an expression
statement ok
SET scalar_subquery_error_on_multiple_rows=false
statement ok
SELECT NULL
FROM main.supplier AS ref_0
INNER JOIN main.nation
INNER JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON ((ref_3.c_phone = ref_4.s_name)) ON (
(SELECT NULL))
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk like '%0000%') ON (1) ON (ref_3.c_mktsegment NOT NULL) ON ((ref_0.s_acctbal = ref_5.o_totalprice))
INNER JOIN main.lineitem AS ref_7 ON ((ref_4.s_suppkey = ref_7.l_orderkey))
INNER JOIN main.supplier
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12 ON (
(SELECT NULL)) ON ((
(SELECT ps_comment FROM main.partsupp) ~~* ref_12.l_linestatus)) ON
((ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier)))
INNER JOIN
(SELECT NULL) ON (ref_6.o_orderstatus NOT NULL);

View File

@@ -0,0 +1,52 @@
# name: test/optimizer/joins/wide_build_skinny_probe.test
# description: If the build side is wide, and the probe side is skinny, the children of the hash join should flip
# group: [joins]
statement ok
create table skinny as select range a, range b, range c from range(10000);
statement ok
create table wide as select
range pk,
(range::VARCHAR || '1111') a,
(range::VARCHAR || '2222') b,
(range::VARCHAR || '3333') c,
(range::VARCHAR || '4444') d,
(range::VARCHAR || '5555') e,
(range::VARCHAR || '6666') f,
(range::VARCHAR || '7777') g,
(range::VARCHAR || '8888') h,
(range::VARCHAR || '9999') i,
(range::VARCHAR || '0000') j,
(range::VARCHAR || '0011') k,
(range::VARCHAR || '0022') l,
(range::VARCHAR || '0033') m,
(range::VARCHAR || '9999') o,
(range::VARCHAR || '0000') p,
(range::VARCHAR || '0011') q,
(range::VARCHAR || '0022') u,
(range::VARCHAR || '0033') r,
(range::VARCHAR || '9999') s,
(range::VARCHAR || '0000') t,
(range::VARCHAR || '0011') w,
(range::VARCHAR || '0022') y,
(range::VARCHAR || '0033') z,
(range::VARCHAR || '9999') aa,
(range::VARCHAR || '0000') bb,
(range::VARCHAR || '0011') cc,
(range::VARCHAR || '0022') dd,
(range::VARCHAR || '0033') ee,
(range::VARCHAR || '9999') ff,
(range::VARCHAR || '0000') gg,
(range::VARCHAR || '0011') hh,
(range::VARCHAR || '0022') ii,
(range::VARCHAR || '0033') jj,
(range::VARCHAR || '0044') kk from range(8000);
# wide should be the probe side, skinny should be on the build side
query II
explain select * from wide w, skinny s where w.pk=s.a;
----
physical_plan <REGEX>:.*wide.*skinny.*

View File

@@ -0,0 +1,109 @@
# name: test/optimizer/like_optimizer.test
# description: Test Like Optimization Rules
# group: [optimizer]
statement ok
CREATE TABLE test(S VARCHAR);
statement ok
INSERT INTO test VALUES ('aaa');
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# no special symbols optimization: aaa -> S=a
query I nosort nosymbols
EXPLAIN SELECT S LIKE 'aaa' FROM test
----
query I nosort nosymbols
EXPLAIN SELECT S='aaa' FROM test
----
# prefix optimization: aaa% -> prefix(aaa)
query I nosort prefix
EXPLAIN SELECT S LIKE 'aaa%' FROM test
----
query I nosort prefix
EXPLAIN SELECT prefix(S, 'aaa') FROM test
----
query I nosort prefix
EXPLAIN SELECT S LIKE 'aaa%%%%' FROM test
----
# suffix optimization: %aaa -> suffix(aaa)
query I nosort suffix
EXPLAIN SELECT S LIKE '%aaa' FROM test
----
query I nosort suffix
EXPLAIN SELECT suffix(S, 'aaa') FROM test
----
query I nosort suffix
EXPLAIN SELECT S LIKE '%%%%aaa' FROM test
----
# contains optimization: %aaa% -> contains(aaa)
query I nosort contains
EXPLAIN SELECT S LIKE '%aaa%' FROM test
----
query I nosort contains
EXPLAIN SELECT contains(S, 'aaa') FROM test
----
query I nosort contains
EXPLAIN SELECT S LIKE '%%%%aaa%%' FROM test
----
# NOT LIKE
# no special symbols optimization: aaa -> S<>a
query I nosort notlikenosymbols
EXPLAIN SELECT S NOT LIKE 'aaa' FROM test
----
query I nosort notlikenosymbols
EXPLAIN SELECT S<>'aaa' FROM test
----
# prefix optimization: aaa% -> prefix(aaa)
query I nosort notlikeprefix
EXPLAIN SELECT S NOT LIKE 'aaa%' FROM test
----
query I nosort notlikeprefix
EXPLAIN SELECT NOT(prefix(S, 'aaa')) FROM test
----
query I nosort notlikeprefix
EXPLAIN SELECT S NOT LIKE 'aaa%%%%' FROM test
----
# suffix optimization: %aaa -> suffix(aaa)
query I nosort notlikesuffix
EXPLAIN SELECT S NOT LIKE '%aaa' FROM test
----
query I nosort notlikesuffix
EXPLAIN SELECT NOT(suffix(S, 'aaa')) FROM test
----
query I nosort notlikesuffix
EXPLAIN SELECT S NOT LIKE '%%%%aaa' FROM test
----
# contains optimization: %aaa% -> contains(aaa)
query I nosort notlikecontains
EXPLAIN SELECT S NOT LIKE '%aaa%' FROM test
----
query I nosort notlikecontains
EXPLAIN SELECT NOT(contains(S, 'aaa')) FROM test
----
query I nosort notlikecontains
EXPLAIN SELECT S NOT LIKE '%%%%aaa%%' FROM test
----

View File

@@ -0,0 +1,65 @@
# name: test/optimizer/limit_pushdown.test
# description: Test Limit Pushdown optimization
# group: [optimizer]
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER)
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
INSERT INTO integers VALUES (1,1), (2,2), (3, 3), (4,4)
# project + limit becomes limit + project
query II
EXPLAIN SELECT i FROM integers LIMIT 4
----
logical_opt <REGEX>:.*PROJECTION.*LIMIT.*
# verify result for project + limit to limit + project
query I
SELECT i FROM integers LIMIT 4
----
1
2
3
4
# recursive limit pushdown
query II
EXPLAIN SELECT i FROM (SELECT i, i+1 FROM integers) LIMIT 4
----
logical_opt <REGEX>:.*PROJECTION.*PROJECTION.*LIMIT.*
# verify result for recursive limit pushdown and offset
query I
SELECT i FROM (SELECT i, i+1 FROM integers) LIMIT 4 OFFSET 2
----
3
4
# only offset: no pushdown
query II
EXPLAIN SELECT i FROM integers OFFSET 4
----
logical_opt <REGEX>:.*LIMIT.*PROJECTION.*
# limit and offset
query II
EXPLAIN SELECT i FROM integers LIMIT 4 OFFSET 2
----
logical_opt <REGEX>:.*PROJECTION.*LIMIT.*
# verify result for limit and offset
query I
SELECT i FROM integers LIMIT 4 OFFSET 2
----
3
4
# limit value > 8192: no pushdown
query II
EXPLAIN SELECT i FROM integers LIMIT 8192
----
logical_opt <REGEX>:.*LIMIT.*PROJECTION.*

View File

@@ -0,0 +1,33 @@
# name: test/optimizer/misc/test_count_and_sum.test
# description: Arithmetic simplification test
# group: [misc]
statement ok
DROP VIEW IF EXISTS v0;
statement ok
CREATE TABLE t0 (c0 TEXT);
statement ok
CREATE TABLE t1 (c1 TEXT);
statement ok
CREATE VIEW v0 AS
SELECT t0.c0
FROM t1
LEFT JOIN t0 ON t1.c1 = t0.c0;
statement ok
INSERT INTO t1(c1) VALUES ('example_value');
query I nosort result_1
SELECT COUNT(*)
FROM v0
WHERE (CURRENT_TIMESTAMP, c0) != (CAST(NULL AS TEXT), '0');
----
query I nosort result_1
SELECT SUM(CASE WHEN (CURRENT_TIMESTAMP, c0) != (CAST(NULL AS TEXT), '0') THEN 1 ELSE 0 END)
FROM v0;
----

View File

@@ -0,0 +1,127 @@
# name: test/optimizer/move_constants.test
# description: Test move constants
# group: [optimizer]
statement ok
CREATE TABLE test(X INTEGER);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# addition
query I nosort add_left
EXPLAIN SELECT X+1=10 FROM test
----
query I nosort add_left
EXPLAIN SELECT 1+X=10 FROM test
----
query I nosort add_left
EXPLAIN SELECT X=9 FROM test
----
query I nosort add_right
EXPLAIN SELECT 10=X+1 FROM test
----
query I nosort add_right
EXPLAIN SELECT 10=1+X FROM test
----
query I nosort add_right
EXPLAIN SELECT 9=X FROM test
----
# subtraction
query I nosort subtract_left_right
EXPLAIN SELECT X-1=10 FROM test
----
query I nosort subtract_left_right
EXPLAIN SELECT X=11 FROM test
----
query I nosort subtract_left_left
EXPLAIN SELECT 10-X=5 FROM test
----
query I nosort subtract_left_left
EXPLAIN SELECT X=5 FROM test
----
# non-equality comparisons get flipped get flipped
query I nosort subtract_flipped_lt
EXPLAIN SELECT 10-X<5 FROM test
----
query I nosort subtract_flipped_lt
EXPLAIN SELECT X>5 FROM test
----
query I nosort subtract_flipped_gte
EXPLAIN SELECT 10-X>=5 FROM test
----
query I nosort subtract_flipped_gte
EXPLAIN SELECT X<=5 FROM test
----
# multiplication
query I nosort mult_left
EXPLAIN SELECT X*3=6 FROM test
----
query I nosort mult_left
EXPLAIN SELECT 3*X=6 FROM test
----
query I nosort mult_left
EXPLAIN SELECT X=2 FROM test
----
# gt
query I nosort mult_left_gt
EXPLAIN SELECT X*3>3 FROM test
----
query I nosort mult_left_gt
EXPLAIN SELECT X>1 FROM test
----
# negative values
query I nosort mult_left_negative
EXPLAIN SELECT -1*X=-5 FROM test
----
query I nosort mult_left_negative
EXPLAIN SELECT X=5 FROM test
----
query I nosort mult_left_negative_flip
EXPLAIN SELECT -1*X<-5 FROM test
----
query I nosort mult_left_negative_flip
EXPLAIN SELECT X>5 FROM test
----
mode skip
# FIXME
# negation
query I nosort negation
EXPLAIN SELECT -X=-5 FROM test
----
query I nosort negation
EXPLAIN SELECT X=5 FROM test
----
query I nosort negation_flip
EXPLAIN SELECT -X<-5 FROM test
----
query I nosort negation_flip
EXPLAIN SELECT X>5 FROM test
----

View File

@@ -0,0 +1,18 @@
# name: test/optimizer/needle_optimizer.test
# description: Test move constants
# group: [optimizer]
statement ok
CREATE TABLE test(S VARCHAR);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# empty prefix/suffix/contains is transformed to this case statement
query I nosort no_output
EXPLAIN SELECT PREFIX(S, '') FROM test
----
query I nosort no_output
EXPLAIN SELECT CONTAINS(S, '') FROM test
----

View File

@@ -0,0 +1,16 @@
# name: test/optimizer/optimize_unsigned_vs_signed_ints.test
# description: Test move constants
# group: [optimizer]
query II
SELECT i, TRY_CAST(i - 4 AS UINT32) as b from range(10) tbl(i) where b < 2;
----
4 0
5 1
query III
SELECT *, b<2 FROM (SELECT i, TRY_CAST(i - 4 AS UINT32) as b from range(10) tbl(i) where b < 2);
----
4 0 true
5 1 true

View File

@@ -0,0 +1,81 @@
# name: test/optimizer/ordered_aggregate.test
# description: Test ORDER BY in AGGREGATE clause optimizations
# group: [optimizer]
statement ok
CREATE TABLE integers(grp INTEGER, i INTEGER);
statement ok
INSERT INTO integers VALUES (1, 10), (2, 15), (1, 30), (2, 20)
# duplicate expressions are removed
query I nosort order_by_agg
EXPLAIN SELECT FIRST(i ORDER BY i) FROM integers
----
query I nosort order_by_agg
EXPLAIN SELECT FIRST(i ORDER BY i, i, i) FROM integers
----
query I nosort order_by_agg
EXPLAIN SELECT FIRST(i ORDER BY i, i DESC, i DESC NULLS FIRST) FROM integers
----
# groups are removed
query I nosort order_by_agg_grp
EXPLAIN SELECT grp, FIRST(i ORDER BY i) FROM integers GROUP BY grp ORDER BY grp
----
query I nosort order_by_agg_grp
EXPLAIN SELECT grp, FIRST(i ORDER BY grp, i) FROM integers GROUP BY grp ORDER BY grp
----
query I nosort order_by_agg_grp
EXPLAIN SELECT grp, FIRST(i ORDER BY grp, i, grp DESC, i DESC) FROM integers GROUP BY grp ORDER BY grp
----
# now with no remaining aggregates
query I nosort order_by_none
EXPLAIN SELECT grp, FIRST(i) FROM integers GROUP BY grp ORDER BY grp
----
query I nosort order_by_none
EXPLAIN SELECT grp, FIRST(i ORDER BY grp) FROM integers GROUP BY grp ORDER BY grp
----
query I nosort order_by_none
EXPLAIN SELECT grp, FIRST(i ORDER BY grp, grp DESC, grp DESC NULLS FIRST) FROM integers GROUP BY grp ORDER BY grp
----
# ORDER BY is removed from aggregates for which it has no effect
query I nosort order_by_min
EXPLAIN SELECT MIN(i) FROM integers
----
query I nosort order_by_min
EXPLAIN SELECT MIN(i ORDER BY i) FROM integers
----
query I nosort order_by_max
EXPLAIN SELECT MAX(i) FROM integers
----
query I nosort order_by_max
EXPLAIN SELECT MAX(i ORDER BY i) FROM integers
----
query I nosort order_by_median
EXPLAIN SELECT MEDIAN(i) FROM integers
----
query I nosort order_by_median
EXPLAIN SELECT MEDIAN(i ORDER BY i) FROM integers
----
query I nosort order_by_sum
EXPLAIN SELECT SUM(i) FROM integers
----
query I nosort order_by_sum
EXPLAIN SELECT SUM(i ORDER BY i) FROM integers
----

View File

@@ -0,0 +1,56 @@
# name: test/optimizer/perfect_ht.test
# description: Test aggregates that can trigger a perfect HT
# group: [optimizer]
statement ok
CREATE TABLE timeseries(year INTEGER, val INTEGER);
statement ok
INSERT INTO timeseries VALUES (1996, 10), (1997, 12), (1996, 20), (2001, 30), (NULL, 1), (1996, NULL);
# this query uses a perfect aggregate HT
query II
EXPLAIN SELECT year, SUM(val), COUNT(val), COUNT(*) FROM timeseries GROUP BY year ORDER BY year;
----
physical_plan <REGEX>:.*PERFECT_HASH_GROUP_BY.*
statement ok
PRAGMA perfect_ht_threshold=0;
# if we set the threshold to 0, the perfect HT is not used anymore
query II
EXPLAIN SELECT year, SUM(val), COUNT(val), COUNT(*) FROM timeseries GROUP BY year ORDER BY year;
----
physical_plan <!REGEX>:.*PERFECT_HASH_GROUP_BY.*
statement ok
PRAGMA perfect_ht_threshold=1;
# if we set it too small, it is not used still
query II
EXPLAIN SELECT year, SUM(val), COUNT(val), COUNT(*) FROM timeseries GROUP BY year ORDER BY year;
----
physical_plan <!REGEX>:.*PERFECT_HASH_GROUP_BY.*
# we can also use it with many columns, as long as the threshold is high enough
statement ok
create table manycolumns as select i a, i b, i c, i d, i e, i f, i g, i h, i, i j from range(0,2) tbl(i);
statement ok
PRAGMA perfect_ht_threshold=30;
query II
explain select a, b, c, d, e, f, g, h, i, j FROM manycolumns GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
----
physical_plan <REGEX>:.*PERFECT_HASH_GROUP_BY.*
# the threshold has to be in range
statement error
PRAGMA perfect_ht_threshold=-1;
----
<REGEX>:.*out of range.*
statement error
PRAGMA perfect_ht_threshold=100;
----
<REGEX>:.*out of range.*

View File

@@ -0,0 +1,34 @@
# name: test/optimizer/prefer_final_projected_columns_on_probe_side.test
# description: Test Limit Pushdown optimization
# group: [optimizer]
statement ok
create table t(ts_start timestamptz, ts_stop timestamptz, id text);
statement ok
with dates as (
select '2023-01-01'::timestamp + i * interval '1 DAY' as x
from generate_series(0, 999) as t(i)
),
ids as (
select 'id_' || lpad(i::text, 4, '0') as y
from generate_series(0, 999) as t(i)
)
insert into t(ts_start, ts_stop, id)
select d.x, null, i.y from dates d, ids i;
# this is hard to test but basically you need to make sure that the left child of the hash
# join is a projection, and the right is a sequential scan.
# the child of the projection is then the window and sequential scan
query II
explain update t as this
set ts_stop = next.ts_start_next
from (
select id, ts_start, LEAD(ts_start) over (partition by id order by ts_start)
as ts_start_next
from t
) as next
where this.id=next.id and this.ts_start=next.ts_start;
----
physical_plan <REGEX>:.*HASH_JOIN.*SEQ_SCAN.*PROJECTION.*WINDOW.*SEQ_SCAN.*

View File

@@ -0,0 +1,125 @@
# name: test/optimizer/pullup_filters.test
# description: Test Filters Pull Up
# group: [optimizer]
statement ok
PRAGMA explain_output = 'PHYSICAL_ONLY'
statement ok
CREATE TABLE vals1 AS SELECT i AS i, i AS j FROM range(0, 11, 1) t1(i)
statement ok
CREATE TABLE vals2(k BIGINT, l BIGINT)
statement ok
INSERT INTO vals2 SELECT * FROM vals1
## INNER JOIN: pull up a single filter in cross product from LHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl2.i
----
physical_plan <REGEX>:.*=5.*=5.*
## INNER JOIN: pull up a single filter in cross product from RHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2 WHERE i=5) tbl2 WHERE tbl1.i=tbl2.i
----
physical_plan <REGEX>:.*=5.*=5.*
## INNER JOIN: pull up two filters in cross product from LHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5 AND k=3) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl2.i AND tbl1.k=tbl2.k
----
physical_plan <REGEX>:(.*=5.*=3.*=5.*=3.*|.*=3.*=5.*=3.*=5.*)
## INNER JOIN: pull up two filters in cross product from RHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2 WHERE i=5 AND k=3) tbl2 WHERE tbl1.i=tbl2.i AND tbl1.k=tbl2.k
----
physical_plan <REGEX>:(.*=5.*=3.*=5.*=3.*|.*=3.*=5.*=3.*=5.*)
#### LEFT JOIN: pull up a single filter from LHS ####
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1 WHERE i=5) tbl1 LEFT JOIN (SELECT * FROM vals1) AS tbl2 ON tbl1.i=tbl2.i
----
physical_plan <REGEX>:.*=5.*=5.*
#### LEFT JOIN: filters should not pull up from RHS ####
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1) tbl1 LEFT JOIN (SELECT * FROM vals1 WHERE i=5) AS tbl2 ON tbl1.i=tbl2.i
----
physical_plan <!REGEX>:.*=5.*=5.*
#### LEFT JOIN: pull up two filters from cross product in the LHS ####
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=5 AND k=10) tbl1 LEFT OUTER JOIN (SELECT * FROM vals1, vals2) tbl2 ON tbl1.i=tbl2.i AND tbl1.k=tbl2.k
----
physical_plan <REGEX>:((.*=5.*=10.*=5.*=10.*)|(.*=10.*=5.*=10.*=5.*))
## INNER JOIN: pull up filter from LHS ####
query II
EXPLAIN SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2 WHERE i=5) tbl1, (SELECT * FROM vals1, vals2) tbl2 WHERE tbl1.i=tbl1.k AND tbl1.i=tbl2.k AND tbl1.i=tbl2.i
----
physical_plan <REGEX>:.*=5.*=5.*=5.*=5.*
## INNER JOIN: pull up filters from RHS ####
query II
EXPLAIN SELECT COUNT(*) FROM (SELECT * FROM vals1, vals2) tbl1, (SELECT * FROM vals1, vals2 WHERE i=5) tbl2 WHERE tbl1.i=tbl1.k AND tbl1.i=tbl2.k AND tbl1.i=tbl2.i
----
physical_plan <REGEX>:.*=5.*=5.*=5.*=5.*
## INTERSECT: pull up filters from LHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 WHERE i=3 AND k=5 INTERSECT SELECT * FROM vals1, vals2) tbl1;
----
physical_plan <REGEX>:((.*=3.*=5.*=3.*=5.*)|(.*=5.*=3.*=5.*=3.*))
## INTERSECT: pull up filters from RHS
query II
EXPLAIN SELECT * FROM (SELECT * FROM vals1, vals2 INTERSECT SELECT * FROM vals1, vals2 WHERE i=3 AND k=5) tbl1;
----
physical_plan <REGEX>:((.*=3.*=5.*=3.*=5.*)|(.*=5.*=3.*=5.*=3.*))
statement ok
create table orders as select range o_orderkey from range(10)
statement ok
create table lineitem as select range % 10 l_orderkey from range(100)
# down here we test that we can pull filters out of explicitly joined relations (using JOIN syntax rather than WHERE)
# even though we are explicitly joining on l_orderkey both times,
# we can derive that o1.o_orderkey = o2.o_orderkey
# once we've derived this, the join order optimizer finds it should join o1 with o2 before joining with lineitem
# rather than joining lineitem with o1 and o2 directly
# so we should see lineitem first in the regex, and then 2x orders (deeper because joined first)
statement ok
PRAGMA explain_output='OPTIMIZED_ONLY'
query II
explain
select count(*)
from lineitem l
join orders o1
on (l.l_orderkey = o1.o_orderkey)
join orders o2
on (l.l_orderkey = o2.o_orderkey)
----
logical_opt <REGEX>:.*lineitem.*orders.*orders.*
# if we disable the FilterPullup, we get the original join order again:
# orders first (joined last), then lineitem, then orders
statement ok
set disabled_optimizers to 'filter_pullup'
query II
explain
select count(*)
from lineitem l
join orders o1
on (l.l_orderkey = o1.o_orderkey)
join orders o2
on (l.l_orderkey = o2.o_orderkey)
----
logical_opt <REGEX>:.*orders.*lineitem.*orders.*

View File

@@ -0,0 +1,27 @@
# name: test/optimizer/pushdown/distinct_from_pushdown.test
# description: Test DISTINCT FROM pushed down into scans
# group: [pushdown]
statement ok
create table test as select 'tst' as tst;
query I
select * from test where tst is not distinct from 'a' or tst is not distinct from 'b';
----
query I
select * from test where tst is distinct from 'a' or tst is distinct from 'b';
----
tst
statement ok
create table test2 as select 42 as tst;
query I
select * from test2 where tst is not distinct from 12 or tst is not distinct from 13;
----
query I
select * from test2 where tst is distinct from 12 or tst is distinct from 13
----
42

View File

@@ -0,0 +1,65 @@
# name: test/optimizer/pushdown/filter_cannot_pushdown.test
# description: Test Filter Can Not Push Down
# group: [pushdown]
# test some expressions which have side effects can not push down
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query II
explain select rnd from (select random()) as t(rnd) where rnd < 0.5;
----
logical_opt <REGEX>:.*rnd < 0.5.*
query II
explain select * from (select rnd from (select random()) as t(rnd) where rnd < 0.5);
----
logical_opt <REGEX>:.*rnd < 0.5.*
statement ok
CREATE TABLE t(a integer, b integer, c integer);
query II
explain select rnd, a from (select random() as rnd, a from t) where rnd < 0.3 and a > 1;
----
logical_opt <REGEX>:.*rnd < 0.3.*
query II
explain select * from (select rnd, a from (select random() as rnd, a from t) where rnd < 0.3 and a > 1);
----
logical_opt <REGEX>:.*rnd < 0.3.*
query II
explain select rnd, a from (select random() as rnd, a from t) where rnd < 0.3 and a > 1;
----
logical_opt <!REGEX>:.*a > 1.*
query II
explain select rnd, a from (select random(), 2 as 'a') as t(rnd, a) where rnd < 1 and a > 0;
----
logical_opt <REGEX>:.*rnd < 1.0.*
statement ok
create table t1 as select range as a, random() as b from range(10);
query II
explain select a, b from (select random(), a, b from t1) as t(rnd, a, b) where rnd < 1 and a > 0;
----
logical_opt <REGEX>:.*rnd < 1.0.*
loop i 1 100
query I
WITH combined_results AS (
SELECT rnd > 0.5 as result FROM (SELECT random()) AS t(rnd) WHERE rnd < 0.5
UNION ALL
SELECT false
)
SELECT result
FROM combined_results
GROUP BY result;
----
False
endloop

View File

@@ -0,0 +1,15 @@
# name: test/optimizer/pushdown/issue_16104.test
# description: Test expressions in filter preserve the order in Push Down
# group: [pushdown]
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
WITH random_data AS (
SELECT random() * 2 AS col_double
FROM generate_series(1, 100)
)
SELECT *
FROM random_data
WHERE abs(col_double) < 1 AND acos(col_double) > 0;

View File

@@ -0,0 +1,18 @@
# name: test/optimizer/pushdown/issue_16671.test
# description: Test keeping alias in filter pushdown
# group: [pushdown]
require json
statement ok
set variable W to '{"a":[1,2], "b":[2,4]}';
query II
from (values (1,2),(2,3),(3,1),(1,2),(2,3),(2,4), (3,2)) test (a,b)
select *
where (getvariable('W') -> '/'||alias(columns(getvariable('W').json_keys())))
.json_contains(columns(getvariable('W').json_keys()));
----
1 2
1 2
2 4

View File

@@ -0,0 +1,15 @@
# name: test/optimizer/pushdown/issue_16863.test
# description: Test right join filter lost in filter pushdown
# group: [pushdown]
statement ok
CREATE TABLE t1 (c1 DATE);
statement ok
INSERT INTO t1 (c1) VALUES ('2023-10-31');
query II
SELECT t1.c1, (t1.c1 IS NULL)
FROM t1 RIGHT JOIN (SELECT NULL AS col0 FROM t1) AS sub0 ON true
WHERE (t1.c1 IS NULL);
----

View File

@@ -0,0 +1,35 @@
# name: test/optimizer/pushdown/issue_17042.test
# description: Test left join filter lost in filter pushdown
# group: [pushdown]
statement ok
pragma explain_output = optimized_only
statement ok
CREATE TABLE t2(c1 INTEGER);
statement ok
CREATE TABLE t0(c1 DOUBLE);
statement ok
INSERT INTO t0(c1) VALUES (0.1);
statement ok
INSERT INTO t2(c1) VALUES (2);
query II
SELECT * FROM t2 LEFT JOIN t0 ON true WHERE ((t0.c1<t2.c1) IS NULL);
----
statement ok
INSERT INTO t2(c1) VALUES (NULL);
query II
SELECT * FROM t2 LEFT JOIN t0 ON true WHERE ((t0.c1<t2.c1) IS NULL);
----
NULL 0.1
query II
explain SELECT * FROM t2 LEFT JOIN t0 ON true WHERE (t0.c1 is distinct from t2.c1) and (t2.c1 > t0.c1);
----
logical_opt <REGEX>:.*INNER.*CAST\(c1 AS DOUBLE\) > c1.*CAST\(c1 AS DOUBLE\) IS.*DISTINCT FROM c1.*

View File

@@ -0,0 +1,27 @@
# name: test/optimizer/pushdown/issue_18202.test
# description: Test join filter pushdown with join conditions reordered
# group: [pushdown]
statement ok
CREATE TABLE t0(c0 FLOAT , c1 CHAR);
statement ok
INSERT INTO t0 VALUES(0.001, 'WORLD');
statement ok
CREATE VIEW v0(c0) AS SELECT 1522975040 FROM t0;
statement ok
INSERT INTO t0 VALUES(3.14, 'ABCDE');
statement ok
CREATE INDEX t0i0 ON t0(c1);
statement ok
INSERT INTO t0(c1, c0) VALUES('WORLD', 3.1415);
statement ok
UPDATE t0 SET c1 = 'HELLO';
statement ok
explain SELECT v0, c1 FROM t0 JOIN v0 ON((c1) < (CAST(v0.c0 AS CHAR))) WHERE (NOT((v0) = ((CASE WHEN t0.c0 THEN c1 END))));

View File

@@ -0,0 +1,46 @@
# name: test/optimizer/pushdown/issue_18603.test
# description: Test filter pushdown with conflict comparison filters
# group: [pushdown]
statement ok
pragma enable_verification
statement ok
CREATE TABLE t0(c0 INT, c1 BOOLEAN);
statement ok
CREATE TABLE t1(c0 INT);
statement ok
INSERT INTO t0(c0, c1) VALUES (0, 0);
statement ok
INSERT INTO t1(c0) VALUES (1);
# test different order of filters
query III
SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 > t0.c0) AND (t1.c0 > t0.c0) AND (t0.c0 < 7) AND (t0.c1 = t0.c0);
----
query II
EXPLAIN SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 > t0.c0) AND (t1.c0 > t0.c0) AND (t0.c0 < 7) AND (t0.c1 = t0.c0);
----
physical_plan <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 > t0.c0) AND (t1.c0 > t0.c0) AND (t0.c0 < 7) AND (t0.c1 = t0.c0);
----
physical_plan <!REGEX>:.*c0 > c0.*
query II
EXPLAIN SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 > t0.c0) AND (t1.c0 > t0.c0) AND (t0.c0 < 7) AND (t0.c1 = t0.c0);
----
physical_plan <!REGEX>:.*c0 < 7.*
query III
SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 = t0.c0) AND (t0.c1 > t0.c0) AND (t1.c0 > t0.c0) AND (t0.c0 < 7);
----
query III
SELECT * FROM t0 INNER JOIN t1 ON (t0.c1 = t0.c0) AND (t0.c0 < 7) AND (t0.c1 > t0.c0) AND (t1.c0 > t0.c0);
----

View File

@@ -0,0 +1,58 @@
# name: test/optimizer/pushdown/issue_18653.test
# description: Performance issue with CROSS JOIN and LATERAL JOIN combined with unnest and json_each. Filter should be pushed down to reduce the joined rows
# group: [pushdown]
statement ok
PRAGMA enable_verification
statement ok
create table test_table as
select s.i as id, [1, 2, 3]::bigint[] as values from generate_series(1, 1000000) as s(i);
statement ok
create index test_table_id_idx on test_table(id);
query II
explain analyze
select id, value
from test_table
cross join unnest(values) as values(value) where id = 87100;
----
analyzed_plan <REGEX>:.*LEFT_DELIM_JOIN.*FILTER.*
query II
select id, value
from test_table
cross join unnest(values) as values(value) where id = 87100;
----
87100 3
87100 2
87100 1
query II
explain analyze
select id, value
from test_table t
left join lateral unnest(t.values) as value on true
where id = 87100;
----
analyzed_plan <REGEX>:.*LEFT_DELIM_JOIN.*FILTER.*
require json
statement ok
create table test_table2 as
select s.i as id, '{"key1": 1, "key2": 2, "key3": 3}'::JSON as values
from generate_series(1, 1000000) as s(i);
statement ok
create index test_table2_id_idx on test_table2(id);
query II
explain analyze
select t.id, key, value
from test_table2 t
cross join json_each(t.values) as kv(key, value)
where t.id = 87100;
----
analyzed_plan <REGEX>:.*LEFT_DELIM_JOIN.*Filters:.*id=87100.*

View File

@@ -0,0 +1,21 @@
# name: test/optimizer/pushdown/join_filter_pushdown.test
# description: Test sampling of larger relations
# group: [pushdown]
statement ok
CREATE TABLE t1 AS FROM VALUES
('619d9199-bc25-41d7-803e-1fa801b4b952'::UUID, NULL::VARCHAR),
('1ada8361-c20b-4e9f-9c8e-15689039cc75'::UUID, '91'::VARCHAR),
('f5a8a7d8-6bc5-4337-a296-d52078156051'::UUID, NULL::VARCHAR) t(s, i);
statement ok
CREATE TABLE t2 as from values
('Int'),
('91'),
('13',),
('sst',) t(v);
statement ok
SELECT t1.s
FROM t1
LEFT JOIN t2 ON t1.i = t2.v;

View File

@@ -0,0 +1,101 @@
# name: test/optimizer/pushdown/no_mark_to_semi_if_mark_index_is_projected.test
# description: No mark to semi conversion if the mark join index is projected
# group: [pushdown]
statement ok
CREATE OR REPLACE TABLE BaseData AS (
SELECT
'10' AS my_key,
'20' AS parent_key,
'30' AS payload,
'40' as foo,
'50' as foo2,
'60' as foo3
);
# Original query
query III
WITH
Example AS (
SELECT
c.my_key,
(c.parent_key IN (SELECT my_key FROM BaseData)) AS parentExists,
p.my_key IS NOT NULL AS parentExists2,
FROM BaseData AS c
LEFT JOIN BaseData AS p ON c.parent_key = p.my_key
)
SELECT *
FROM Example
WHERE parentExists
----
# original query no CTE
query III
SELECT
c.my_key,
(c.parent_key IN (SELECT my_key FROM BaseData)) AS parentExists,
p.my_key IS NOT NULL AS parentExists2,
FROM BaseData AS c
LEFT JOIN BaseData AS p ON c.parent_key = p.my_key
WHERE parentExists;
----
# original query but the CTE is a subquery
query III
SELECT *
FROM (SELECT
c.my_key,
(c.parent_key IN (SELECT my_key FROM BaseData)) AS parentExists,
p.my_key IS NOT NULL AS parentExists2,
FROM BaseData AS c
LEFT JOIN BaseData AS p ON c.parent_key = p.my_key
)
WHERE parentExists;
----
statement ok
PRAGMA explain_output='optimized_only'
query II
EXPLAIN
WITH Example AS (
SELECT
c.my_key,
(c.parent_key IN (SELECT my_key FROM BaseData)) AS parentExists,
p.my_key IS NOT NULL AS parentExists2,
FROM BaseData AS c
LEFT JOIN BaseData AS p ON c.parent_key = p.my_key
)
SELECT *
FROM Example
WHERE parentExists
----
logical_opt <REGEX>:.*MARK.*
query II
EXPLAIN
WITH Example AS (
SELECT
c.my_key,
(c.parent_key IN (SELECT my_key FROM BaseData)) AS parentExists,
p.my_key IS NOT NULL AS parentExists2,
FROM BaseData AS c
LEFT JOIN BaseData AS p ON c.parent_key = p.my_key
)
SELECT *
FROM Example
WHERE parentExists
----
logical_opt <!REGEX>:.*SEMI.*
statement ok
create table t0 as select range a from range(300);
statement ok
create table t2 as select range b from range(50000);
query I
select sum(in_alias::INT) FROM (select a in (select b from t2) as in_alias from t0) where in_alias;
----
300

View File

@@ -0,0 +1,66 @@
# name: test/optimizer/pushdown/parquet_or_pushdown.test
# description: Test Parquet With Pushing Down of OR Filters
# group: [pushdown]
require parquet
# FIXME: re-enable when or pushdown is fixed
mode skip
# Multiple column in the root OR node, don't push down
query II
EXPLAIN SELECT tbl.a, tbl.b FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a, b) WHERE a=1 OR b=false
----
physical_plan <!REGEX>:.*PARQUET_SCAN.*Filters:.*
# Single column in the root OR node
query II
EXPLAIN SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a=1 OR a=2
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters: a=1 OR a=2.*
# Single column + root OR node with AND
query II
EXPLAIN SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a=1 OR (a>3 AND a<5)
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters: a=1 OR a>3 AND a<5|.*
# Single column multiple ORs
query II
EXPLAIN SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a=1 OR a>3 OR a<5
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters: a=1 OR a>3 OR a<5|.*
# Testing not equal
query II
EXPLAIN SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a!=1 OR a>3 OR a<2
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters: a!=1 OR a>3 OR a<2|.*
# Multiple OR filters connected with ANDs
query II
EXPLAIN SELECT tbl.a, tbl.b, tbl.c FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a,b,c) WHERE (a<2 OR a>3) AND (a=1 OR a=4) AND (b=false OR c=1);
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters: a<2 OR a>3 AND a=1.*OR a=4.*
# Testing the number of rows filtered (column "a" has eight values: 0 .. 7)
statement ok
PRAGMA enable_profiling
# should return 2 rows: 0 and 7
query II
EXPLAIN ANALYZE SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a<1 OR a>6;
----
analyzed_plan <REGEX>:.*PARQUET_SCAN.*Filters: a<1 OR a>6.*2[ \t].*
# should return 1 row: 0
query II
EXPLAIN ANALYZE SELECT tbl.a FROM "data/parquet-testing/arrow/alltypes_plain.parquet" tbl(a) WHERE a<1 OR a>8;
----
analyzed_plan <REGEX>:.*PARQUET_SCAN.*Filters: a<1 OR a>8.*1[ \t].*

View File

@@ -0,0 +1,42 @@
# name: test/optimizer/pushdown/pushdown_after_statistics.test
# description: Test Table Filter Push Down
# group: [pushdown]
statement ok
set explain_output='optimized_only';
statement ok
create table big_probe as select range%3000 a, range%4000 b from range(100000);
statement ok
create table into_semi as select range%300 c from range(10000);
statement ok
create table into_get as select range d from range(100);
# the IN filter becomes a mark join. We should keep it a mark join at this point
query II
explain select * from big_probe, into_semi, into_get where c in (1, 3, 5, 7, 10, 14, 16, 20, 22) and c = d and a = c;
----
logical_opt <REGEX>:.*MARK.*
statement ok
create table mark_join_build as select range e from range(200);
# Now the in filter is a semi join.
query II
explain select * from big_probe, into_semi, into_get where c in (select e from mark_join_build) and c = d and a = c;
----
logical_opt <REGEX>:.*SEMI.*
statement ok
select t1.a from big_probe t1
where t1.a in
(select t2.b
from big_probe t2
where t2.b in (1206, 1202, 1322, 1204, 1370)
and t2.b not in (select t2_filter.a from big_probe t2_filter));

View File

@@ -0,0 +1,181 @@
# name: test/optimizer/pushdown/pushdown_filter_on_coalesced_equal_outer_join_keys.test
# description: Test pushdown of filters on coalesced join keys compared for equality in the join condition
# group: [pushdown]
# enable query verification
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 AS FROM VALUES
(1),
(2),
(NULL),
(4) t(id);
statement ok
CREATE TABLE t2 AS FROM VALUES
(1),
(3),
(NULL),
(4) t(id);
statement ok
CREATE TABLE t3 AS FROM VALUES
(1),
(3),
(NULL),
(4) t(id);
query I
SELECT id FROM t1 FULL OUTER JOIN t2 USING (id) WHERE id >=2 ORDER BY id
----
2
3
4
# should find all NULL rows correctly
query I
SELECT id FROM t1 FULL OUTER JOIN t2 USING (id) WHERE id IS NULL
----
NULL
NULL
statement ok
set explain_output='optimized_only';
# optimized plan is equivalent to plan with manually pushed down filter
query II nosort single_join
EXPLAIN SELECT id
FROM (SELECT id FROM t1) FULL OUTER JOIN (SELECT id FROM t2) USING (id)
WHERE id >= 2
----
query II nosort single_join
EXPLAIN SELECT id
FROM (SELECT id FROM t1 WHERE id >= 2) FULL OUTER JOIN (SELECT id FROM t2 WHERE id >= 2) USING (id)
----
# optimized plan is equivalent to plan with manually pushed down filter when using IS NULL as a filtering predicate
query II nosort single_join_isnull_filter
EXPLAIN SELECT id
FROM (SELECT id FROM t1) FULL OUTER JOIN (SELECT id FROM t2) USING (id)
WHERE id IS NULL
----
query II nosort single_join_isnull_filter
EXPLAIN SELECT id
FROM (SELECT id FROM t1 WHERE id IS NULL) FULL OUTER JOIN (SELECT id FROM t2 WHERE id IS NULL) USING (id)
----
# optimized plan is equivalent to plan with manually pushed down filter in the case of multiple joins
query II nosort multiple_joins
EXPLAIN SELECT id
FROM (SELECT id FROM t1) FULL OUTER JOIN (SELECT id FROM t2) USING (id) FULL OUTER JOIN (SELECT id FROM t3) USING (id) WHERE id >= 2;
----
query II nosort multiple_joins
EXPLAIN SELECT id
FROM (SELECT id FROM t1 WHERE id >= 2)
FULL OUTER JOIN (SELECT id FROM t2 WHERE id >= 2) USING (id)
FULL OUTER JOIN (SELECT id FROM t3 WHERE id >= 2) USING (id);
----
# should pushdown filter with multiple occurrences of the same coalesced join keys
query II nosort multiple_occurrences_of_the_same_coalesced_join_keys
EXPLAIN
SELECT id FROM (SELECT id FROM t1) FULL OUTER JOIN (SELECT id FROM t2) USING (id)
WHERE id >= 2 OR id IN (1, 4) OR id IS NULL;
----
query II nosort multiple_occurrences_of_the_same_coalesced_join_keys
EXPLAIN SELECT id
FROM (SELECT id FROM t1 WHERE id >= 2 OR id IN (1, 4) OR id IS NULL)
FULL OUTER JOIN (SELECT id FROM t2 WHERE id >= 2 OR id IN (1, 4) OR id IS NULL)
USING (id);
----
# should pushdown filter containing different but equivalent coalesced join keys
query II
EXPLAIN SELECT t1.id, t2.id
FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id
WHERE COALESCE(t1.id, t2.id) >= 2 OR COALESCE(t2.id, t1.id) IS NULL;
----
logical_opt <REGEX>:.*SEQ_SCAN.*Filters.*\(id >= 2\) OR \(id IS NULL\).*
# should not pushdown a volatile filter
query II
EXPLAIN SELECT id
FROM t1 FULL OUTER JOIN t2 USING (id)
WHERE trunc(random() * id) >= 2
----
logical_opt <!REGEX>:.*SEQ_SCAN.*Filters.*
# coalescing right and left is optimized like coalescing left and right
query II nosort left_right_coalesce
EXPLAIN SELECT t1.id, t2.id
FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id
WHERE COALESCE(t1.id, t2.id) >= 2;
----
query II nosort left_right_coalesce
EXPLAIN SELECT t1.id, t2.id
FROM t1 FULL OUTER JOIN t2 ON t1.id = t2.id
WHERE COALESCE(t2.id, t1.id) >= 2;
----
statement ok
CREATE TABLE t4 AS FROM VALUES
(1, 20),
(2, NULL),
(3, 16)
t(id, a);
statement ok
CREATE TABLE t5 AS FROM VALUES
(1, NULL),
(1, 30)
t(id, a);
# should not pushdown filter on coalesced keys that also depend on other columns
query II
EXPLAIN SELECT id FROM t1 FULL OUTER JOIN t4 USING (id) WHERE id = a;
----
logical_opt <!REGEX>:.*SEQ_SCAN.*Filters.*
# should not pushdown single filter containing coalesced keys from different join conditions
query II
EXPLAIN SELECT id FROM t4 FULL OUTER JOIN t5 USING (id, a) WHERE id IN (a, 1)
----
logical_opt <!REGEX>:.*SEQ_SCAN.*Filters.*
query II nosort nullif_func_join_keys
EXPLAIN SELECT *
FROM (SELECT id, a FROM t4 WHERE nullif(id, a) < 3) AS t4
FULL OUTER JOIN (SELECT id, a FROM t5 WHERE nullif(id, a) < 3) AS t5
ON nullif(t4.id, t4.a) = nullif(t5.id, t5.a)
----
# should pushdown filter containing coalesced keys which are functions of the input table
query II nosort nullif_func_join_keys
EXPLAIN SELECT *
FROM (SELECT id, a FROM t4) AS t4
FULL OUTER JOIN (SELECT id, a FROM t5) AS t5
ON nullif(t4.id, t4.a) = nullif(t5.id, t5.a)
WHERE coalesce(nullif(t4.id, t4.a), nullif(t5.id, t5.a)) < 3;
----
query II nosort list_func_join_keys
EXPLAIN SELECT * FROM
(SELECT id, a FROM t4) AS t4
FULL OUTER JOIN (SELECT id, a FROM t5) AS t5
ON [t4.id, t4.a] = [t5.id, t5.a]
WHERE coalesce([t4.id, t4.a], [t5.id, t5.a])[0] < 4;
----
query II nosort list_func_join_keys
EXPLAIN SELECT * FROM
(SELECT id, a FROM t4, WHERE [id, a][0] < 4) AS t4
FULL OUTER JOIN (SELECT id, a FROM t5 WHERE [id, a][0] < 4) AS t5
ON [t4.id, t4.a] = [t5.id, t5.a]
----

View File

@@ -0,0 +1,25 @@
# name: test/optimizer/pushdown/pushdown_in_to_parquet.test
# description: Parquet filter of IN with 1 argument can be converted to =
# group: [pushdown]
require parquet
statement ok
PRAGMA enable_verification
statement ok
create table t1 as select range::VARCHAR a from range(1000);
statement ok
copy t1 to '__TEST_DIR__/t1.parquet' (FORMAT PARQUET);
query II
explain select * from '__TEST_DIR__/t1.parquet' where a in ('400');
----
physical_plan <!REGEX>:.*FILTER.*PARQUET_SCAN.*
query II
explain select * from '__TEST_DIR__/t1.parquet' where a in ('400');
----
physical_plan <REGEX>:.*PARQUET_SCAN.*Filters:.*

View File

@@ -0,0 +1,31 @@
# name: test/optimizer/pushdown/pushdown_unnest_into_cte.test
# description: Parquet filter of IN with 1 argument can be converted to =
# group: [pushdown]
statement ok
CREATE TABLE tbl2 as SELECT i as id1, [i-3, i+1, i+2] as somelist FROM generate_series(1, 10_000) s(i);
statement ok
pragma explain_output='OPTIMIZED_ONLY';
query II
EXPLAIN SELECT id1, element
FROM (
SELECT id1, UNNEST(somelist) AS element
FROM tbl2
) tmp
WHERE id1=10;
----
logical_opt <REGEX>:.*UNNEST.*SEQ_SCAN.*Filters.*
query II
EXPLAIN WITH tmp AS (
SELECT id1, generate_subscripts(somelist, 1) AS index, UNNEST(somelist) AS element
FROM tbl2
)
SELECT id1, index, element
FROM tmp
WHERE id1=10;
----
logical_opt <REGEX>:.*UNNEST.*SEQ_SCAN.*Filters.*

View File

@@ -0,0 +1,150 @@
# name: test/optimizer/pushdown/pushdown_window_partition_filter.test
# description: Test pushdown of filters through window operators that are partitioned by.
# group: [pushdown]
statement ok
create table t1 as from VALUES
('A', 1),
('B', 3),
('C', 12),
('A', 5),
('B', 8),
('C', 9),
('A', 10),
('B', 20),
('C', 3)
t(a, b);
statement ok
pragma explain_output=OPTIMIZED_ONLY
statement ok
create view window_with_filter as select a, b, LEAD(b) OVER (partition by a) as lead from t1 where a != 'C';
statement ok
create view window_no_filter as select a, b, LEAD(b) OVER (partition by a) as lead from t1;
query III no_sort result_1
select * from window_with_filter where a != 'C' order by all;
----
query III no_sort result_1
select * from window_no_filter where a != 'C' order by all;
----
statement ok
create table partition_and_rank_me as from values
('A', 10, 'A', 'id'),
('A', 20, 'A', 'id'),
('A', 30, 'B', 'id'),
('D', 40, 'B', 'id'),
('D', 50, 'C', 'id'),
('D', 60, 'C', 'id')
t(a, b, c, d);
query IIII
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY d ORDER BY b)
from partition_and_rank_me order by all
----
A A 30 1
A A 30 2
A B 70 3
D B 70 4
D C 110 5
D C 110 6
# can't push down the filter c!='B', since the values of the rank() window function
# are affected by the existence of the rows where c='B'
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY d ORDER BY b)
from partition_and_rank_me
) where c != 'B' order by all;
----
A A 30 1
A A 30 2
D C 110 5
D C 110 6
# One filter clause is on the partitioned column but the filter clause is an AND conjunction, so we don't push that down.
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY c ORDER BY b)
from partition_and_rank_me
) where (c = 'B' AND a = 'D') order by all;
----
D B 70 2
# result of above query with pushdown
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY c ORDER BY b)
from partition_and_rank_me where (c = 'B' AND a = 'D')
) order by all;
----
D B 40 1
# The filter is on the partitioned column, but is part of an OR conjunction, so we can push it down
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY c ORDER BY b)
from partition_and_rank_me
) where (c = 'B' OR a = 'D') order by all;
----
A B 70 1
D B 70 2
D C 110 1
D C 110 2
# result of above query with pushdown
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY c ORDER BY b)
from partition_and_rank_me
where (c = 'B' OR a = 'D')
) order by all;
----
A B 70 1
D B 70 2
D C 110 1
D C 110 2
# The filter is a function expression, so we don't push it down
query IIII
select * from (
select a, c, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY c ORDER BY b)
from partition_and_rank_me
) where (c || 'Z' = 'BZ') order by all;
----
A B 70 1
D B 70 2
# can't push down the filter c!='B', since the values of the rank() window function
# are affected by the existence of the rows where c='B'
query II
explain select * from (select a, sum(b) OVER (PARTITION BY c), rank() OVER (PARTITION BY d ORDER BY b), c from partition_and_rank_me) where c != 'B' order by all;
----
logical_opt <REGEX>:.*FILTER.*WINDOW.*
query II
explain select * from window_no_filter where a != 'C' order by a;
----
logical_opt <REGEX>:.*WINDOW.*Filters.*
statement ok
create table t2 as select range a, range%50 b, range%25 c from range(500);
# second window expression is not paritioned on b, so filter expression cannot be
# pushed down
query II
explain select * from (select a, b, c, sum(a) OVER (PARTITION BY b, c), sum(b) OVER (PARTITION BY a, c) from t2) where b > 25;
----
logical_opt <REGEX>:.*FILTER.*WINDOW.*
query II
explain select * from (select a, b, c, sum(a) OVER (PARTITION BY b, c), sum(b) OVER (PARTITION BY a, c) from t2) where c = 20;
----
logical_opt <REGEX>:.*WINDOW.*c=20.*

View File

@@ -0,0 +1,217 @@
# name: test/optimizer/pushdown/table_filter_pushdown.test
# description: Test Table Filter Push Down
# group: [pushdown]
statement ok
CREATE TABLE integers(i integer, j integer, k integer)
statement ok
INSERT INTO integers VALUES (5, 5, 5), (10, 10, 10)
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# complex filter cannot be entirely pushed down
query II
EXPLAIN SELECT k FROM integers where i+j > 10 and j = 5 and i = k+1
----
logical_opt <REGEX>:.*FILTER.*
# simple filter is pushed down: no filter remaining
query II
EXPLAIN SELECT k FROM integers where j=5
----
logical_opt <!REGEX>:.*FILTER.*
# complex filters are also pushed down
query I
SELECT k FROM integers where j::VARCHAR SIMILAR TO '[0-9]*'
----
5
10
# multiple filters pushed down
query II
EXPLAIN SELECT k FROM integers where j = 5 and i = 10
----
logical_opt <!REGEX>:.*FILTER.*
# complex filter is pushed down: no filter remaining
query II
EXPLAIN SELECT k FROM integers where j::VARCHAR SIMILAR TO '[0-9]*'
----
logical_opt <!REGEX>:.*FILTER.*
# we cannot push down expressions that can throw errors
query I
SELECT k FROM integers where j%50=j
----
5
10
query II
EXPLAIN SELECT k FROM integers where j%50=j
----
logical_opt <REGEX>:.*FILTER.*
# test different data types
foreach type <numeric>
statement ok
CREATE TABLE tablinho_numbers(i ${type}, j ${type}, k ${type})
statement ok
INSERT INTO tablinho_numbers VALUES (0, 0, 0), (1, 1, 1), (2, 2, 2)
# simple filters are pushed down
query II
EXPLAIN SELECT k FROM tablinho_numbers where j = 1
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT k FROM tablinho_numbers where j > 1
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT k FROM tablinho_numbers where j >= 1
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT k FROM tablinho_numbers where j < 1
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT k FROM tablinho_numbers where j <= 1
----
logical_opt <!REGEX>:.*FILTER.*
statement ok
DROP TABLE tablinho_numbers
endloop
# pushdown string
statement ok
CREATE TABLE tablinho(i varchar)
statement ok
INSERT INTO tablinho VALUES ('a'), ('bla'), ('c')
# simple filters are pushed down
query II
EXPLAIN SELECT i FROM tablinho where i = 'bla'
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i > 'bla'
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i >= 'bla'
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i < 'bla'
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i <= 'bla'
----
logical_opt <!REGEX>:.*FILTER.*
# more complex filters can be pushed down as long as they only involve one column
query II
EXPLAIN SELECT i FROM tablinho where i like 'bl_a%'
----
logical_opt <REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i like '%bla'
----
logical_opt <!REGEX>:.*FILTER.*
query II
EXPLAIN SELECT i FROM tablinho where i like '_bla'
----
logical_opt <!REGEX>:.*FILTER.*
# test Q6 pushdown
statement ok
CREATE TABLE LINEITEM(L_ORDERKEY INTEGER NOT NULL, L_PARTKEY INTEGER NOT NULL,L_SUPPKEY INTEGER NOT NULL, L_LINENUMBER INTEGER NOT NULL,L_QUANTITY DECIMAL(15,2) NOT NULL,L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, L_DISCOUNT DECIMAL(15,2) NOT NULL, L_TAX DECIMAL(15,2) NOT NULL,L_RETURNFLAG CHAR(1) NOT NULL, L_LINESTATUS CHAR(1) NOT NULL, L_SHIPDATE DATE NOT NULL, L_COMMITDATE DATE NOT NULL,L_RECEIPTDATE DATE NOT NULL, L_SHIPINSTRUCT CHAR(25) NOT NULL, L_SHIPMODE CHAR(10) NOT NULL, L_COMMENT VARCHAR(44) NOT NULL)
query II
explain select sum(l_extendedprice * l_discount) as revenue from lineitem where l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01' and l_discount between 0.05 and 0.07 and l_quantity < 24
----
logical_opt <!REGEX>:.*FILTER.*
statement ok
create temporary table t as select range a, range % 10 b, mod(range,10000) c, 5 d, 10000 e from range(100);
statement ok
PRAGMA explain_output = PHYSICAL_ONLY;
query II
explain select count(*) from t where b <=3 and b>=0;
----
physical_plan <REGEX>:.*b<=3.*
# test time pushdown
statement ok
CREATE TABLE test_time (a TIME, b TIME, c TIME)
statement ok
INSERT INTO test_time VALUES ('00:01:00','00:01:00','00:01:00'),('00:10:00','00:10:00','00:10:00'),('01:00:00','00:10:00','01:00:00'),(NULL,NULL,NULL)
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query II
EXPLAIN SELECT count(*) from test_time where a ='00:01:00'
----
logical_opt <!REGEX>:.*FILTER.*
# test bool pushdown
statement ok
CREATE TABLE test_bool (i bool, j bool)
statement ok
INSERT INTO test_bool VALUES (TRUE,TRUE),(TRUE,FALSE),(FALSE,TRUE),(NULL,NULL)
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query II
EXPLAIN SELECT i FROM test_bool where j = TRUE
----
logical_opt <!REGEX>:.*FILTER.*
# test now() pushdown
statement ok
CREATE TABLE test_timestamps (ts TIMESTAMP);
statement ok
INSERT INTO test_timestamps VALUES (NOW()::TIMESTAMP), (NOW()::TIMESTAMP - INTERVAL 10 YEARS);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query II
EXPLAIN SELECT * FROM test_timestamps where ts >= NOW()::TIMESTAMP - INTERVAL 1 YEAR;
----
logical_opt <!REGEX>:.*FILTER.*
query I
SELECT COUNT(*) FROM test_timestamps where ts >= NOW()::TIMESTAMP - INTERVAL 1 YEAR;
----
1

View File

@@ -0,0 +1,399 @@
# name: test/optimizer/pushdown/table_or_pushdown.test
# description: Test Table OR Filter Push Down
# group: [pushdown]
statement ok
CREATE TABLE integers AS SELECT a as a, a as b FROM generate_series(1, 5, 1) tbl(a)
#### test OR filters with multiple columns in the root OR, e.g., a=1 OR b=2
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR b=2 AND (a>3 OR b<5)
----
physical_plan <!REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a=1 OR b=2 AND (a>3 OR b<5)
----
1 1
2 2
#### test OR filters with AND that triggers a stop early condition
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR a=2 AND (a>3 OR b<5)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters: a=1 OR a=2[ ]*|.*
query II
SELECT * FROM integers WHERE a=1 OR a=2 AND (a>3 OR b<5)
----
1 1
2 2
#### test OR filters with AND in the same column
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR (a>3 AND a<5)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters: a=1 OR a>3 AND a<5|.*
query II
SELECT * FROM integers WHERE a=1 OR (a>3 AND a<5)
----
1 1
4 4
#### test only OR filters
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR a>3 OR a<5
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters: a=1 OR a>3 OR a<5|.*
query II
SELECT * FROM integers WHERE a=1 OR a>3 OR a<5 ORDER by a
----
1 1
2 2
3 3
4 4
5 5
######### Testing String ######################################################
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('AAAAAAAAAAAAAAA11111'), ('AAAAAAAAAAAAAAA99999')
query II
EXPLAIN SELECT * FROM strings WHERE s>'AAAAAAAAAAAAAAA1'
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*s>'AAAAAAAAAAAAAAA1'.*
query I
SELECT * FROM strings WHERE s>'AAAAAAAAAAAAAAA1'
----
AAAAAAAAAAAAAAA11111
AAAAAAAAAAAAAAA99999
#### test only OR filters
query II
EXPLAIN SELECT * FROM strings WHERE s>'AAAAAAAAAAAAAAA' OR s<'AAAAAAAAAAAAAAA99999A'
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*s>.*'AAAAAAAAAAAAAAA'|.*OR.*s<.*'AAAAAAAAAAAAAAA99999A'.*
query I
SELECT * FROM strings WHERE s>'AAAAAAAAAAAAAAA' OR s<'AAAAAAAAAAAAAAA99999A'
----
AAAAAAAAAAAAAAA11111
AAAAAAAAAAAAAAA99999
statement ok
INSERT INTO strings VALUES ('BBBB'), ('CCCC')
# testing string statistics
query II
EXPLAIN SELECT * FROM strings WHERE s!='111' OR s!='WWW'
----
physical_plan <!REGEX>:.*SEQ_SCAN.*Filters:.*
query I
SELECT * FROM strings WHERE s!='111' OR s!='WWW' ORDER BY s
----
AAAAAAAAAAAAAAA11111
AAAAAAAAAAAAAAA99999
BBBB
CCCC
#### Testing not equal
query II
EXPLAIN SELECT * FROM strings WHERE s!='BBBB' or s>'BBBB'
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*s!=.*'BBBB'.*OR.*s>.*'BBBB'.*
query I
SELECT * FROM strings WHERE s!='BBBB' or s>'BBBB'
----
AAAAAAAAAAAAAAA11111
AAAAAAAAAAAAAAA99999
CCCC
query II
EXPLAIN SELECT * FROM integers WHERE a!=1 OR a!=2
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a!=1.*OR.*a!=2.*
query II
SELECT * FROM integers WHERE a!=1 OR a!=2 ORDER BY a
----
1 1
2 2
3 3
4 4
5 5
#### Testing AND priority
query II
EXPLAIN SELECT * FROM integers WHERE a>2 AND (a=3 OR a=5)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a>2.*
query II
SELECT * FROM integers WHERE a>2 AND (a=3 OR a=5) ORDER BY a
----
3 3
5 5
######### Complex filters #####################################################
#### multiple OR filters connected with ANDs
query II
EXPLAIN SELECT * FROM integers WHERE (a<2 OR a>3) AND (a=1 OR a=4)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a<2.*OR.*a>3.*AND.*a=1.*OR.*a=4.*
query II
SELECT * FROM integers WHERE (a<2 OR a>3) AND (a=1 OR a=4)
----
1 1
4 4
query II
EXPLAIN SELECT * FROM integers WHERE (a<2 OR a>3) AND (a=1 OR a=4) AND (b=1 OR b<5)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a<2.*OR.*a>3.*AND.*a=1.*OR.*a=4.*b=1 OR b<5.*
query II
SELECT * FROM integers WHERE (a<2 OR a>3) AND (a=1 OR a=4) AND (b=1 OR b<5)
----
1 1
4 4
#### OR filters with functions: concat(...)
query II
EXPLAIN SELECT * FROM integers WHERE concat(a=1, b=1)='truetrue' OR a=2 ORDER by a
----
physical_plan <!REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE concat(a=1, b=1)='truetrue' OR a=2 ORDER by a
----
1 1
2 2
#### LIKE operator
query II
EXPLAIN SELECT * FROM strings WHERE s>'BBBB' OR s LIKE '%AAAAAAAAAAAAAAA%' ORDER BY s
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query I
SELECT * FROM strings WHERE s>'BBBB' OR s LIKE '%AAAAAAAAAAAAAAA%' ORDER BY s
----
AAAAAAAAAAAAAAA11111
AAAAAAAAAAAAAAA99999
CCCC
#### OR filters with CASE statement
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR (CASE WHEN a=2 THEN true WHEN a=4 THEN true ELSE false END)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a=1 OR (CASE WHEN a=2 THEN true WHEN a=4 THEN true ELSE false END)
----
1 1
2 2
4 4
#### multiple complex OR filters connected with ANDs
query II
EXPLAIN SELECT * FROM integers WHERE (a=1 OR a=4) AND (a=1 OR (CASE WHEN a%2=0 THEN true ELSE false END)) AND (concat(a=1, b=1)='truetrue' OR a=2) AND (a::CHAR LIKE '1' OR a=5)
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a=1.*OR.*a=4.*|$
query II
SELECT * FROM integers WHERE (a=1 OR a=4) AND (a=1 OR (CASE WHEN a%2=0 THEN true ELSE false END)) AND (concat(a=1, b=1)='truetrue' OR a=2) AND (a::CHAR LIKE '1' OR a=5)
----
1 1
#### OR filters with NULL
statement ok
INSERT INTO integers VALUES (NULL, NULL)
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR a IS NULL ORDER BY a
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a=1 OR a IS NULL ORDER BY a
----
1 1
NULL NULL
query II
EXPLAIN SELECT * FROM integers WHERE a=1 OR a IS NOT NULL
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a=1 OR a IS NOT NULL
----
1 1
2 2
3 3
4 4
5 5
# notequal and null
query II
EXPLAIN SELECT * FROM integers WHERE a!=1 OR a IS NULL ORDER BY a
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a!=1 OR a IS NULL ORDER BY a
----
2 2
3 3
4 4
5 5
NULL NULL
# notequal and not null
query II
EXPLAIN SELECT * FROM integers WHERE a!=1 OR a IS NOT NULL ORDER BY a
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a!=1 OR a IS NOT NULL ORDER BY a
----
1 1
2 2
3 3
4 4
5 5
query II
EXPLAIN SELECT * FROM integers WHERE a!=1 OR a>3 OR a<2 ORDER by a
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a!=1.*OR.*a>3.*OR.*a.*<.*2.*
query II
SELECT * FROM integers WHERE a!=1 OR a>3 OR a<2 ORDER by a
----
1 1
2 2
3 3
4 4
5 5
# test comparison with null in OR conjunctions uses ISNULL/ISNOTNULL table filter
statement ok
CREATE TABLE t0 as from values (1), (2), (2), (0), (NULL) t(c1);
# is distinct from NULL
query I
SELECT * FROM t0 WHERE ((t0.c1 IS DISTINCT FROM NULL) OR (NULL));
----
1
2
2
0
# is not distinct from NULL
query I
SELECT * FROM t0 WHERE ((t0.c1 IS NOT DISTINCT FROM NULL) OR (NULL));
----
NULL
# = NULL in conjunction or
query I
SELECT * FROM t0 WHERE ((t0.c1 = NULL) OR (NULL));
----
query I
SELECT * FROM t0 WHERE ((t0.c1 != NULL) OR (NULL));
----
statement ok
SELECT * FROM t0 WHERE ((t0.c1 = NULL) OR (NOT NULL) OR (t0.c1 = 1));
query I
select * from t0 where (NULL OR cast(t0.c1 as bool)) order by all;
----
1
2
2
mode skip
#### numeric statistics
# notequal and constant > max
query II
EXPLAIN SELECT * FROM integers WHERE a!=10 OR a>3
----
physical_plan <!REGEX>:.*SEQ_SCAN.*Filters:.*
## The predicate a!=10 is greater than the numeric_statistics::max implying that it is always true within an OR conjunction
mode unskip
query II
SELECT * FROM integers WHERE a!=10 OR a>3
----
1 1
2 2
3 3
4 4
5 5
# Testing the number of rows filtered (column "a" has five values: 1 .. 5)
statement ok
PRAGMA enable_profiling
mode skip
# should return 2 rows: 1 and 5
query II
EXPLAIN ANALYZE SELECT a FROM integers WHERE a<2 OR a>4
----
analyzed_plan <REGEX>:.*SEQ_SCAN.*Filters:.*a<2.*OR.*a>4.*2 Rows.*
# should return 1 row: 1
query II
EXPLAIN ANALYZE SELECT a FROM integers WHERE a<2 OR a>5
----
analyzed_plan <REGEX>:.*SEQ_SCAN.*Filters: a<2 OR a>5.*1 Rows.*
mode unskip
statement ok
PRAGMA disable_profiling
# notequal and min == max && min == constant
statement ok
DROP TABLE integers
statement ok
CREATE TABLE integers AS SELECT a as A, a as B FROM generate_series(1, 1, 1) tbl(a)
query II
EXPLAIN SELECT * FROM integers WHERE a!=1 OR a<2
----
physical_plan <!REGEX>:.*SEQ_SCAN.*Filters:.*
query II
SELECT * FROM integers WHERE a!=1 OR a<3
----
1 1

View File

@@ -0,0 +1,25 @@
# name: test/optimizer/pushdown/test_constant_or_null_pushdown.test
# description: Test Table OR Filter Push Down
# group: [pushdown]
statement ok
create table t1 (ts_stop TIMESTAMPTZ);
statement ok
insert into t1 values (NULL), (NULL);
query II
explain select count(*) from t1 where constant_or_null(true, COALESCE(ts_stop, '9999-09-09 07:09:09+00'::TIMESTAMPTZ), '2025-06-23 10:32:02.216+00'::TIMESTAMPTZ);
----
physical_plan <REGEX>:.*constant_or_null.*
statement ok
create table t2 as select range%2 a, range b from range(100);
statement ok
insert into t2 values (NULL, NULL), (NULL, NULL), (NULL, NULL);
query II
explain select * from t2 where constant_or_null(true, a);
----
physical_plan <REGEX>:.*a IS NOT NULL.*

View File

@@ -0,0 +1,61 @@
# name: test/optimizer/pushdown/test_pushdown_cte_group_by_all.test
# description: If a CTE contains GROUP BY ALL then predicate pushdown on non-aggregated columns should occur
# group: [pushdown]
statement ok
create or replace table my_temp as
from generate_series(99) t(i)
select i, i % 10 as group_1;
statement ok
pragma explain_output='optimized_only';
query II
explain
with my_cte as (
from my_temp
select
group_1,
min(i) as min_i,
max(i) as max_i
group by ALL
)
from my_cte
where
group_1 = 2;
----
logical_opt <REGEX>:.*Filters:.*
query II
explain
with my_cte as (
from my_temp
select
group_1,
min(i) as min_i,
max(i) as max_i
group by group_1
)
from my_cte
where
group_1 = 2;
----
logical_opt <REGEX>:.*Filters:.*
query II
explain
with my_cte as (
from my_temp
select
group_1,
min(i) as min_i,
max(i) as max_i
group by group_1 having min_i > 1
)
from my_cte
where group_1 = 2;
----
logical_opt <REGEX>:.*FILTER.*Filters:.*

View File

@@ -0,0 +1,53 @@
# name: test/optimizer/pushdown/test_pushdown_or.test_slow
# group: [pushdown]
require tpch
statement ok
create table t1 as select range a, range b from range(0,1000000) ;
statement ok
create table t2(a int);
statement ok
insert into t2 from range(10) t(a);
#statement ok
#select * from t1 join t2 using (a);
query I
select a from t1 where a = 30000 or a = 50000 or a = 500;
----
500
30000
50000
statement ok
call dbgen(sf=1);
query I
select l_orderkey from lineitem where l_orderkey = 6 or l_orderkey = 5999971;
----
6
5999971
5999971
5999971
5999971
5999971
5999971
query II
select l_orderkey, l_partkey from lineitem where l_orderkey = 6 or l_partkey = 4991;
----
6 139636
403456 4991
535522 4991
981987 4991
2593475 4991
3237285 4991
3695110 4991
4093507 4991
4437666 4991
4734181 4991
5552582 4991

View File

@@ -0,0 +1,79 @@
# name: test/optimizer/pushdown/timestamp_to_date_pushdown.test
# description: Test Tpushdown of timestamp to date filters
# group: [pushdown]
require icu
foreach timezone UTC ECT
statement ok
set TimeZone='${timezone}';
statement ok
create or replace table t1 (ts timestamp, i int);
statement ok
insert into t1 select '2024-05-01 00:00:00'::timestamp, i from generate_series(1, 2000) g(i);
statement ok
insert into t1 select '2024-05-02 00:00:00'::timestamp, i from generate_series(1, 1000) g(i);
statement ok
insert into t1 select '2024-05-02 00:22:00'::timestamp, i from generate_series(1, 1000) g(i);
statement ok
insert into t1 select '2024-05-03 00:00:00'::timestamp, i from generate_series(1, 2000) g(i);
query II
explain select * from t1 where ts::date == '2024-05-02';
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
query II
explain select * from t1 where '2024-05-02' == ts::date;
----
physical_plan <REGEX>:.*SEQ_SCAN.*Filters:.*
statement ok
pragma disable_optimizer;
query II nosort no_opt_result
select * from t1 where ts::date == '2024-05-02';
----
statement ok
pragma enable_optimizer;
query II nosort no_opt_result
select * from t1 where ts::date == '2024-05-02';
----
# pattern is still recognized in conjunction
query II
explain select count(*) from t1 where ts::date == '2024-05-02' and i > 122880/2;
----
physical_plan <!REGEX>:.*FILTER.*SEQ_SCAN:.*
query I
select count(*) from t1 where ts::date == '2024-05-02' and i > 1000;
----
0
query I
select count(*) from t1 where ts::date == '2024-05-02' and i <= 500;
----
1000
endloop
query I
select count(*) from t1 where ts::date == '2024-05-01' and i <= 500;
----
500
query I
select count(*) from t1 where ts::date == '2024-05-03';
----
2000

View File

@@ -0,0 +1,161 @@
# name: test/optimizer/pushdown_semi_anti_join.test
# description: Pushdown set operations
# group: [optimizer]
statement ok
PRAGMA explain_output = 'OPTIMIZED_ONLY'
statement ok
create table t0 as (select 42 a);
statement ok
create table t1 as (select 42 b);
query II
explain select a from t0 semi join (select * from t1 where 1 = 0) on a = b;
----
logical_opt <!REGEX>:.*SEMI.*
# SEMI JOIN is empty if either side is empty
query II
explain select * from t0 SEMI JOIN (select * from t1 where 1=0) tmp on a = b;
----
logical_opt <!REGEX>:.*SEMI.*
query II
explain select * from (select * from t0 where 1=0) tmp0 SEMI JOIN (select * from t1) tmp1 on (a = b);
----
logical_opt <!REGEX>:.*SEMI.*
# ANTI JOIN is empty if LHS is empty
query II
explain select * from (select 42 where 1=0) tmp0(a) ANTI JOIN (select 42) tmp1(b) on (a=b);
----
logical_opt <!REGEX>:.*EXCEPT.*
# if RHS is empty we can optimize away the ANTI JOIN
query II
explain select * from t0 ANTI JOIN (select * from t1 where 1=0) on (a=b);
----
logical_opt <!REGEX>:.*ANTI.*
# now pushdown subquery with set ops
query II
explain select * from ((select 42) tbl(a) SEMI JOIN t1 on (a=b)) where a=42;
----
logical_opt <REGEX>:.*SEMI.*
query II
explain select * from ((select 42) tbl(a) SEMI JOIN t1 on (a=b)) where a=43;
----
logical_opt <!REGEX>:.*SEMI.*
query II
explain select * from ((select 43) tbl(i) SEMI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*SEMI.*
query II
explain select * from ((select 42) tbl(i) ANTI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
logical_opt <REGEX>:.*ANTI.*
query II
explain select * from ((select 42) tbl(i) ANTI JOIN (select 43) tbl2(u) on (i=u)) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*ANTI.*
query II
explain select * from ((select 43) tbl(i) ANTI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*ANTI.*
query I
select * from (select 42) tbl(a) SEMI JOIN (select 43) tbl2(b) on (a=b) where a = 42;
----
query I
select * from (select 42) tbl(a) SEMI JOIN (select 42 where 1=0) tbl2(b) on (a=b);
----
query I
select * from (select 42 where 1=0) tbl(a) SEMI JOIN (select 42) tbl2(b) on (a=b);
----
query I
select * from (select 42 where 1=0) tbl(a) ANTI JOIN (select 42) tbl2(b) on (a=b);
----
query I
select * from (select 42) tbl(a) ANTI JOIN (select 42 where 1=0) tbl2(b) on (a=b);
----
42
query I
select * from (select * from (select 42) tbl(i) SEMI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
42
query I
select * from (select * from (select 42) tbl(i) SEMI JOIN (select 43) tbl2(u) on (i=u)) tbl(i) where i=42;
----
query I
select * from (select * from (select 43) tbl(i) SEMI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
query I
select * from (select * from (select 42) tbl(i) ANTI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
query I
select * from (select * from (select 42) tbl(i) ANTI JOIN (select 43) tbl2(u) on (i=u)) tbl(i) where i=42;
----
42
query I
select * from (select * from (select 43) tbl(i) ANTI JOIN (select 42) tbl2(u) on (i=u)) tbl(i) where i=42;
----
statement ok
create table t2 as select range a, range+8 b from range(11);
statement ok
create table t3 as select range a, range+8 b from range(11);
statement ok
create table t4 as select range a, range+8 b from range(11);
statement ok
create table t5 as select range a, range+8 b from range(11);
# no semis or antis in this
query II
explain select a from (select * from t2 semi join (select * from t3 where 1 = 0) tt0 on (t2.a = tt0.b)) tmp1 ANTI JOIN (select * from t3 semi join (select * from t4) tt1 on (t3.a = tt1.b)) tmp2 on (tmp1.a=tmp2.a);
----
logical_opt <!REGEX>:.*(SEMI|ANTI).*
# with complex join condition, answer is still correct
query II no_sort no_filter
select * from t2 semi join t3 on (t2.a + 70 + t3.b = t2.b**t3.a);
----
1 9
# filter is not pushed down to right side, otherwise answer is incorrect
query II no_sort no_filter
select * from t2 semi join t3 on (t2.a + 70 + t3.b = t2.b**t3.a) where a < 10;
----
1 9
statement ok
create table left_table as (select * from VALUES (1, 9), (1, 10) t(a, b));
statement ok
create table right_table as (select * from VALUES (1, 4) t(a, b));
query II
select * from left_table anti join right_table on (left_table.b-5 = right_table.b) where b > 5;
----
1 10

View File

@@ -0,0 +1,115 @@
# name: test/optimizer/pushdown_set_op.test
# description: Pushdown set operations
# group: [optimizer]
statement ok
PRAGMA explain_output = 'OPTIMIZED_ONLY'
query II
explain select 42 intersect select 42;
----
logical_opt <REGEX>:.*INTERSECT.*
# intersect is empty if either side is empty
query II
explain select 42 intersect select 42 where 1=0;
----
logical_opt <!REGEX>:.*INTERSECT.*
query II
explain select 42 where 1=0 intersect select 42;
----
logical_opt <!REGEX>:.*INTERSECT.*
# except is empty if LHS is empty
query II
explain select 42 where 1=0 except select 42;
----
logical_opt <!REGEX>:.*EXCEPT.*
# if RHS is empty we can optimize away the except
query II
explain select 42 except all select 42 where 1=0;
----
logical_opt <!REGEX>:.*EXCEPT.*
# now pushdown subquery with set ops
query II
explain select * from (select 42 intersect select 42) tbl(i) where i=42;
----
logical_opt <REGEX>:.*INTERSECT.*
query II
explain select * from (select 42 intersect all select 43) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*INTERSECT.*
query II
explain select * from (select 43 intersect all select 42) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*INTERSECT.*
query II
explain select * from (select 42 except select 42) tbl(i) where i=42;
----
logical_opt <REGEX>:.*EXCEPT.*
query II
explain select * from (select 42 except all select 43) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*EXCEPT.*
query II
explain select * from (select 43 except all select 42) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*EXCEPT.*
query I
select 42 intersect select 42;
----
42
query I
select 42 intersect select 42 where 1=0;
----
query I
select 42 where 1=0 intersect select 42;
----
query I
select 42 where 1=0 except select 42;
----
query I
select 42 except select 42 where 1=0;
----
42
query I
select * from (select 42 intersect select 42) tbl(i) where i=42;
----
42
query I
select * from (select 42 intersect select 43) tbl(i) where i=42;
----
query I
select * from (select 43 intersect select 42) tbl(i) where i=42;
----
query I
select * from (select 42 except select 42) tbl(i) where i=42;
----
query I
select * from (select 42 except select 43) tbl(i) where i=42;
----
42
query I
select * from (select 43 except select 42) tbl(i) where i=42;
----

View File

@@ -0,0 +1,255 @@
# name: test/optimizer/regex_optimizer.test
# description: Test Regex Optimization Rules
# group: [optimizer]
statement ok
CREATE TABLE test(s VARCHAR);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
INSERT INTO test VALUES ('aaa');
# contains optimization: /aaa/ -> contains(aaa)
query I nosort regexconstantpattern
EXPLAIN SELECT regexp_matches(s, 'aa') FROM test
----
query I nosort regexconstantpattern
EXPLAIN SELECT contains(s, 'aa') FROM test
----
# contains optimization: /a/ -> contains(aaa)
query I nosort regexconstantsinglechar
EXPLAIN SELECT regexp_matches(s, 'a') FROM test
----
query I nosort regexconstantsinglechar
EXPLAIN SELECT contains(s, 'a') FROM test
----
query I nosort correct_result
SELECT regexp_matches(s, '[a]') FROM test
----
query I nosort correct_result
SELECT regexp_matches(s, 'a') FROM test
----
query I nosort correct_result
SELECT contains(s, 'aaa') FROM test
----
query I nosort correct_result
SELECT regexp_matches(s, '^a') FROM TEST;
----
aaa
query I nosort correct_result
SELECT regexp_matches(s, '^aa') FROM TEST;
----
aaa
statement ok
DELETE FROM test;
statement ok
INSERT INTO test VALUES ('aaa'), ('a.a'), ('baba'), ('abba'), ('a\.a'), ('a_a');
query II
explain analyze select regexp_matches(s, 'a.a', 's'), s from test;
----
analyzed_plan <REGEX>:.*"~~"\(s, '%a_a%'\).*
query I nosort
select s from test where regexp_matches(s, 'a.a', 's');
----
aaa
a.a
baba
a_a
query II
explain analyze SELECT regexp_matches(s, 'a.*a', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, '%a%a%'\).*
query I nosort
SELECT s FROM TEST where regexp_matches(s, 'a.*a', 's');
----
aaa
a.a
baba
abba
a\.a
a_a
query II
explain analyze SELECT regexp_matches(s, '^a.*b$', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, 'a%b'\).*
query I
SELECT s FROM TEST where regexp_matches(s, '^a.*b$', 's');
----
query II
explain analyze select regexp_matches(s, 'a_a'), s from test;
----
analyzed_plan <REGEX>:.*contains\(s, 'a_a'\).*
query II
explain analyze select regexp_matches(s, 'a%a'), s from test;
----
analyzed_plan <REGEX>:.*contains\(s, 'a%a'\).*
query II
explain analyze select regexp_matches(s, 'a\\a'), s from test;
----
analyzed_plan <REGEX>:.*contains\(s, 'a\\a'\).*
query I
select s from test where regexp_matches(s, 'a_a');
----
a_a
query II
explain analyze select regexp_matches(s, 'a\.a'), s from test;
----
analyzed_plan <REGEX>:.*contains\(s, 'a.a'\).*
query I
select s from test where regexp_matches(s, 'a\.a');
----
a.a
query II
explain analyze SELECT regexp_matches(s, '^a'), s FROM TEST;
----
analyzed_plan <REGEX>:.*prefix\(s, 'a'\).*
query I nosort
SELECT s FROM TEST where regexp_matches(s, '^a');
----
aaa
a.a
abba
a\.a
a_a
query II
explain analyze SELECT regexp_matches(s, 'a$'), s FROM TEST;
----
analyzed_plan <REGEX>:.*suffix\(s, 'a'\).*
query I nosort
SELECT s FROM TEST where regexp_matches(s, 'a$');
----
aaa
a.a
baba
abba
a\.a
a_a
query II
explain analyze SELECT regexp_matches(s, 'aaa.'), s FROM TEST;
----
analyzed_plan <REGEX>:.*regexp_matches\(s, 'aaa.'\).*
query II
explain analyze SELECT regexp_matches(s, 'aaa.', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, '%aaa_%'\).*
query II
explain analyze SELECT regexp_matches(s, '.aaa', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, '%_aaa%'\).*
query II
explain analyze SELECT regexp_matches(s, '^.aaa', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, '_aaa%'\).*
query II
explain analyze SELECT regexp_matches(s, '.aaa$', 's'), s FROM TEST;
----
analyzed_plan <REGEX>:.*"~~"\(s, '%_aaa'\).*
query II
explain analyze select regexp_matches(s, '.*green.*', 's'), s, from test;
----
analyzed_plan <REGEX>:.*contains\(s, 'green'\).*
query II
explain analyze select regexp_matches(s, '.*special.*requests.*', 's'), s from test;
----
analyzed_plan <REGEX>:.*"~~".s, '%special%requests.*%.*
statement ok
DELETE from test;
# Test matching newlines with and without 's' option
# inserts 'aaa\naaa'
statement ok
insert into test values (concat('aaa', chr(10), 'aaa'));
query I
select count(s) from test where regexp_matches(s, 'aaa');
----
1
# no matches since 's' option not passed
query I
select count(s) from test where regexp_matches(s, 'aaa.');
----
0
# 's' option passed, so we match the string aaa\naaa
query I
select count(s) from test where regexp_matches(s, 'aaa.', 's');
----
1
# when regexp_matches arguments are incorrect, an error is produced
statement error
select count(s) from test where regexp_matches(s);
----
Binder Error
# when regexp_matches arguments are incorrect, an error is produced
statement error
select count(s) from test where regexp_matches('aaa');
----
Binder Error
# Test regexp_matches with flags (like 'm') is properly optimized to contains
statement ok
DELETE FROM test;
statement ok
INSERT INTO test VALUES ('hello world'), ('test'), ('hello again');
query II
explain analyze SELECT s FROM test WHERE regexp_matches(s, 'hello', 'm');
----
analyzed_plan <REGEX>:.*contains\(s, 'hello'\).*
query I nosort
SELECT s FROM test WHERE regexp_matches(s, 'hello', 'm');
----
hello world
hello again
# This used to trigger a debug assertion
query I
WITH fetch_schema AS (
SELECT s FROM test WHERE regexp_matches(s, 'hello', 'm')
) SELECT * FROM fetch_schema LIMIT 0;
----

View File

@@ -0,0 +1,63 @@
# name: test/optimizer/regex_optimizer_coverage.test
# description: Test Like Optimization Rules
# group: [optimizer]
statement ok
CREATE TABLE test(word VARCHAR);
statement ok
INSERT INTO test VALUES ('FLASH'), ('TOAST'), ('BELL');
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
query I nosort correct_result
select contains(word, 'AS') from test;
----
query I nosort correct_result
select regexp_matches(word, '[A]') from test;
----
query I nosort correct_result
select regexp_matches(word, 'A') from test;
----
query I nosort correct_result
select regexp_matches(word, 'AS') from test;
----
query I nosort correct_result
select regexp_matches(word, '[AS]') from test;
----
query I nosort correct_result
select regexp_matches(word, '[S]') from test;
----
query I nosort correct_result
select regexp_matches(word, '[A][S]') from test;
----
query I nosort correct_result
select regexp_matches(word, '[AAAA]') from test;
----
query I nosort correct_result
select regexp_matches(word, '[A-A]') from test;
----
# case-insensitive
query I nosort correct_result
select regexp_matches(word, '(?i)[A-A]') from test;
----
# single-line mode
query I nosort correct_result
select regexp_matches(word, '(?s)[A-A]') from test;
----
# multi-line mode
query I nosort correct_result
select regexp_matches(word, '(?m)[A-A]') from test;
----

View File

@@ -0,0 +1,29 @@
# name: test/optimizer/regex_to_like_optimizer.test
# description: Test regex to like Optimization Rules
# group: [optimizer]
statement ok
CREATE TABLE test(word VARCHAR);
statement ok
INSERT INTO test VALUES ('BIGSmall'), ('bIGSmall'), ('BIGsmall'), ('bigsmall');
# must match [Bb]S
query II
select word, regexp_matches(word, '(?i)b.*(?-i)[S]') from test;
----
BIGSmall true
bIGSmall true
BIGsmall false
bigsmall false
# must match[Bb]s
query II
select word, regexp_matches(word, '(?i)[b].*(?-i)[s]') from test;
----
BIGSmall false
bIGSmall false
BIGsmall true
bigsmall true

View File

@@ -0,0 +1,26 @@
# name: test/optimizer/remove_unused_filter_column.test
# description: Test removal of filter columns that are unused in the remainder of a query plan
# group: [optimizer]
require parquet
#statement ok
#PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
create table test as select range i, range j from range(5)
# j should not be in the SEQ_SCAN in the physical plan
query II
explain select i from test where j=0
----
physical_plan <!REGEX>: +j +
# same for parquet
statement ok
copy test TO '__TEST_DIR__/test.parquet' (FORMAT 'parquet')
query II
explain select i from '__TEST_DIR__/test.parquet' where j=0
----
physical_plan <!REGEX>: +j +

View File

@@ -0,0 +1,63 @@
# name: test/optimizer/reported_bugs/in_vs_where.test
# description: IN filter on join column
# group: [reported_bugs]
statement ok
CREATE OR REPLACE TABLE AllOpportunities as select * FROM Values
('0058b00000IIEQVAA7'),
('0058b00000IIEQVAA6'),
('0058b00000IIEQVAA5') t(owner_id);
statement ok
CREATE OR REPLACE TABLE persons as
select * from VALUES
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA3'),
('0058b00000IIEQVAA4'),
('0058b00000IIEQVAA5'),
('0058b00000IIEQVAA6'),
('0058b00000IIEQVAA7') t(id);
statement ok
CREATE OR REPLACE TABLE role_hierarchy as select *
FROM VALUES
('0058b00000IIEQVAA5','0058b00000IIEQVAA5'),
('0058b00000IIEQVAA6','0058b00000IIEQVAA6'),
('0058b00000IIEQVAA7','0058b00000IIEQVAA7') t(root_person_id, descendent_person_id);
query I
with opp_stats as (
select persons.id from persons
join role_hierarchy on role_hierarchy.root_person_id = persons.id
left outer join AllOpportunities ON AllOpportunities.owner_id = role_hierarchy.descendent_person_id
where persons.id in ('0058b00000IIEQVAA5')
group by persons.id
) select * from opp_stats;
----
0058b00000IIEQVAA5
statement ok
create or replace table t1 as select (range%2000)::VARCHAR a from range(20000);
statement ok
create or replace table t2 as select range::VARCHAR a from range(10);
query I
select count(t1.a) from t1, t2 where t1.a=t2.a and t1.a in ('7');
----
10

View File

@@ -0,0 +1,52 @@
# name: test/optimizer/sampling_pushdown.test
# description: Test Sampling Pushdown optimization
# group: [optimizer]
statement ok
CREATE TABLE integers1(i INTEGER, j INTEGER)
statement ok
CREATE TABLE integers2(i INTEGER, j INTEGER)
statement ok
INSERT INTO integers1 VALUES (1,1), (2,2), (3, 3), (4,4)
statement ok
INSERT INTO integers2 VALUES (1,1), (2,2), (3, 3), (4,4)
# tablesample system + seq scan becomes sample scan
query II
EXPLAIN SELECT i FROM integers1 tablesample system(0.1%)
----
physical_plan <REGEX>:.*SEQ_SCAN.*System: 0.1%.*
# using sample system + seq scan becomes sample scan
query II
EXPLAIN SELECT i FROM integers1 using sample system(0.1%)
----
physical_plan <REGEX>:.*SEQ_SCAN.*System: 0.1%.*
# tablesample system + seq scan with join becomes sample scan with join
query II
EXPLAIN SELECT * FROM integers1 tablesample system(0.1%), integers2 tablesample system(0.1%)
----
physical_plan <REGEX>:.*SEQ_SCAN.*System: 0.1%.*
# tablesample bernoulli: no pushdown
query II
EXPLAIN SELECT i FROM integers1 tablesample bernoulli(0.1%)
----
physical_plan <REGEX>:.*Bernoulli.*SEQ_SCAN.*
# tablesample reservoir: no pushdown
query II
EXPLAIN SELECT i FROM integers1 tablesample reservoir(0.1%)
----
physical_plan <REGEX>:.*RESERVOIR_SAMPLE.*SEQ_SCAN.*
# tablesample system after a derived table: no pushdown
query II
EXPLAIN SELECT * FROM integers1, integers2 where integers1.i = integers2.i USING SAMPLE SYSTEM(25%)
----
physical_plan <REGEX>:.*System.*SEQ_SCAN.*

View File

@@ -0,0 +1,44 @@
# name: test/optimizer/sampling_pushdown.test_slow
# description: Test the performance of Sampling Pushdown optimization
# group: [optimizer]
require tpch
statement ok
CALL DBGEN(sf=0.1);
# tablesample system + seq scan becomes sample scan
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem tablesample system(0.1%)
----
analyzed_plan <REGEX>:.*TABLE_SCAN.*System: 0.1%.*
# using sample system + seq scan becomes sample scan
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem using sample system(0.1%)
----
analyzed_plan <REGEX>:.*TABLE_SCAN.*System: 0.1%.*
# tablesample system + seq scan with join becomes sample scan with join
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem tablesample system(0.1%), orders tablesample system(0.1%)
----
analyzed_plan <REGEX>:.*TABLE_SCAN.*System: 0.1%.*
# tablesample bernoulli: no pushdown
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem tablesample bernoulli(0.1%)
----
analyzed_plan <REGEX>:.*Bernoulli.*TABLE_SCAN.*
# tablesample reservoir: no pushdown
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem tablesample reservoir(0.1%)
----
analyzed_plan <REGEX>:.*RESERVOIR_SAMPLE.*TABLE_SCAN.*
# tablesample system after a derived table: no pushdown
query II
EXPLAIN ANALYZE SELECT count(*) FROM lineitem, orders where l_orderkey = o_orderkey USING SAMPLE SYSTEM(25%)
----
analyzed_plan <REGEX>:.*System.*TABLE_SCAN.*

View File

@@ -0,0 +1,152 @@
# name: test/optimizer/statistics/statistics_aggregate.test
# description: Statistics propagation test with aggregates expression
# group: [statistics]
statement ok
SET default_null_order='nulls_first';
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# statistics propagation in groups
# no null values
query II
EXPLAIN SELECT i IS NULL FROM (SELECT i FROM integers GROUP BY i) integers(i);
----
logical_opt <!REGEX>:.*IS_NULL.*
# i=4 is out of range [1,3]
query II
EXPLAIN SELECT i=4 FROM (SELECT i FROM integers GROUP BY i) integers(i);
----
logical_opt <!REGEX>:.*\(i = 4\).*
# i=3 is in range [1,3]
query II
EXPLAIN SELECT i=3 FROM (SELECT i FROM integers GROUP BY i) integers(i);
----
logical_opt <REGEX>:.*\(i = 3\).*
# count without null values
query I
SELECT COUNT(i) FROM integers;
----
3
query II
SELECT i, COUNT(i) FROM integers GROUP BY i ORDER BY i;
----
1 1
2 1
3 1
statement ok
INSERT INTO integers VALUES (NULL);
# count with null values
query I
SELECT COUNT(i) FROM integers;
----
3
query II
SELECT i, COUNT(i) FROM integers GROUP BY i ORDER BY i;
----
NULL 0
1 1
2 1
3 1
# sum
# sum that fits in int64
statement ok
CREATE TABLE bigints AS SELECT i::BIGINT as i FROM (VALUES (1), (2), (3)) tbl(i);
query I
SELECT SUM(i) FROM bigints;
----
6
# avg
query I
SELECT AVG(i) FROM bigints;
----
2
# sum no longer fits in int64
statement ok
INSERT INTO bigints VALUES (9223372036854775806);
query I
SELECT SUM(i) FROM bigints;
----
9223372036854775812
query I
SELECT AVG(i) FROM bigints;
----
2305843009213693952
statement ok
DROP TABLE bigints;
statement ok
CREATE TABLE bigints AS SELECT i::BIGINT as i FROM (VALUES (-1), (-2), (-3)) tbl(i);
query I
SELECT SUM(i) FROM bigints;
----
-6
# avg
query I
SELECT AVG(i) FROM bigints;
----
-2
# sum no longer fits in int64 [negative]
statement ok
INSERT INTO bigints VALUES (-9223372036854775806);
query I
SELECT SUM(i) FROM bigints;
----
-9223372036854775812
query I
SELECT AVG(i) FROM bigints;
----
-2305843009213693952
# now with decimals
# sum that fits in int64
statement ok
CREATE TABLE decimals AS SELECT i::DECIMAL(18,1) as i FROM (VALUES (1), (2), (3)) tbl(i);
query I
SELECT SUM(i) FROM decimals;
----
6.0
# avg
query I
SELECT AVG(i) FROM decimals;
----
2.0
# sum no longer fits in int64
statement ok
INSERT INTO decimals SELECT 99999999999999999.9 FROM repeat(1, 10)
query I
SELECT SUM(i) FROM decimals;
----
1000000000000000005.0
query I
SELECT AVG(i) FROM decimals;
----
76923076923076923

View File

@@ -0,0 +1,191 @@
# name: test/optimizer/statistics/statistics_between.test
# description: Statistics propagation test with between expression
# group: [statistics]
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# filter is out of range: no need to execute it
query II
EXPLAIN SELECT i=3 FROM integers WHERE i BETWEEN 0 AND 2
----
logical_opt <!REGEX>:.*\(i = 3\).*
# filter is in range: need to execute it
query II
EXPLAIN SELECT i=1 FROM integers WHERE i BETWEEN 0 AND 2
----
logical_opt <REGEX>:.*\(i = 1\).*
# between where lhs is bigger than rhs: we can prune this entirely
query II
EXPLAIN SELECT * FROM integers WHERE i BETWEEN 3 AND 2
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# now verify all of the results
query I
SELECT i=3 FROM integers WHERE i BETWEEN 0 AND 2;
----
0
0
query I
SELECT i=1 FROM integers WHERE i BETWEEN 0 AND 2;
----
1
0
query I
SELECT * FROM integers WHERE i BETWEEN 3 AND 2;
----
# now test the same with a subquery, where we don't have filter pushdown into the scan
query II
EXPLAIN SELECT i=3 FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2
----
logical_opt <!REGEX>:.*\(i = 3\).*
query II
EXPLAIN SELECT i=1 FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2
----
logical_opt <REGEX>:.*\(i = 1\).*
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 3 AND 2
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# lower clause is always true: between should be converted into i <= 2
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2;
----
logical_opt <REGEX>:.*\(i <= 2\).*
# upper clause is always true: between should be converted into i >= 2
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 2 AND 10;
----
logical_opt <REGEX>:.*\(i >= 2\).*
# between is always false
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN -1 AND 0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT i BETWEEN -1 AND 0 FROM (SELECT * FROM integers LIMIT 10) integers(i);
----
logical_opt <REGEX>:.*false.*
# verify the results
query I
SELECT i=3 FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2;
----
0
0
query I
SELECT i=1 FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2;
----
1
0
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 3 AND 2;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 10;
----
1
2
3
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 2;
----
1
2
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 2 AND 10;
----
2
3
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN -1 AND 0;
----
query I
SELECT i BETWEEN -1 AND 0 FROM (SELECT * FROM integers LIMIT 10) integers(i);
----
0
0
0
statement ok
PRAGMA explain_output = PHYSICAL_ONLY;
# wide between: both are always true, entire filter can be pruned. (happens during physical planning).
# see https://github.com/duckdb/duckdb-fuzzer/issues/1357
# https://github.com/duckdb/duckdb-fuzzer/issues/1358
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 10;
----
physical_plan <!REGEX>:.*FILTER.*
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# now insert a null value
statement ok
INSERT INTO integers VALUES (NULL)
# between is always false or null: we can still prune the entire filter
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN -1 AND 0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# between is always false or null: we can still prune the entire filter
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN -1 AND 0;
----
logical_opt <!REGEX>:.*FILTER.*
# however, if used in a select clause, we can only replace it with a constant_or_null clause
query II
EXPLAIN SELECT i BETWEEN -1 AND 0 FROM (SELECT * FROM integers LIMIT 10) integers(i);
----
logical_opt <REGEX>:.*constant_or_null.*
# in the case of null values we cannot prune the filter here
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 10;
----
logical_opt <REGEX>:.*FILTER.*
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN -1 AND 0;
----
query I
SELECT i BETWEEN -1 AND 0 FROM (SELECT * FROM integers LIMIT 10) integers(i);
----
0
0
0
NULL
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i BETWEEN 0 AND 10;
----
1
2
3

View File

@@ -0,0 +1,46 @@
# name: test/optimizer/statistics/statistics_case.test
# description: Test filter propagation in CASE expression
# group: [statistics]
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
# "i" does not contain null values, so we can statically determine this will not be null
query II
EXPLAIN SELECT * FROM integers WHERE (CASE WHEN i=2 THEN i+1 ELSE i+2 END) IS NULL;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# we can't here, because one of the children of the case has null
query II
EXPLAIN SELECT * FROM integers WHERE (CASE WHEN i=2 THEN i+1 ELSE NULL END) IS NULL;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# now check overflow testing
# this gives an overflow on the RHS
statement error
SELECT 123::TINYINT + (CASE WHEN i=2 THEN (i+1)::TINYINT ELSE (i+2)::TINYINT END) FROM integers;
----
<REGEX>:Out of Range Error:.*Overflow in addition.*
# this does not
query I
SELECT 122::TINYINT + (CASE WHEN i=2 THEN (i+1)::TINYINT ELSE (i+2)::TINYINT END) FROM integers;
----
125
125
127
query I
SELECT * FROM integers WHERE (CASE WHEN i=2 THEN i+1 ELSE i+2 END) IS NULL;
----
query I
SELECT * FROM integers WHERE (CASE WHEN i=2 THEN i+1 ELSE NULL END) IS NULL;
----
1
3

View File

@@ -0,0 +1,109 @@
# name: test/optimizer/statistics/statistics_coalesce.test
# description: Test statistics propagation in COALESCE expression
# group: [statistics]
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
# "i" does not contain null values, so the coalesce expression is short-circuited
# "17" is never output
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(i, 17)=17);
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# adding NULLs randomly into the coalesce does not change anything
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(NULL, NULL, NULL, i, NULL, 17)=17);
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# same here, i is never output, the expression is a constant false
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(4, i, 17)=3);
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(i, 4, 17)=3);
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# execute the queries
query I
SELECT * FROM integers WHERE (COALESCE(i, 17)=17);
----
query I
SELECT * FROM integers WHERE (COALESCE(NULL, NULL, NULL, i, NULL, 17)=17);
----
query I
SELECT * FROM integers WHERE (COALESCE(4, i, 17)=3);
----
query I
SELECT * FROM integers WHERE (COALESCE(i, 4, 17)=3);
----
3
statement ok
INSERT INTO integers VALUES (NULL);
# after inserting a NULL, the coalesce result changes
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(i, 17)=17);
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(NULL, NULL, NULL, i, NULL, 17)=17);
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(4, i, 17)=3);
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM integers WHERE (COALESCE(i, 4, 17)=3);
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# execute the queries
query I
SELECT * FROM integers WHERE (COALESCE(i, 17)=17);
----
NULL
query I
SELECT * FROM integers WHERE (COALESCE(NULL, NULL, NULL, i, NULL, 17)=17);
----
NULL
query I
SELECT * FROM integers WHERE (COALESCE(4, i, 17)=3);
----
query I
SELECT * FROM integers WHERE (COALESCE(i, 4, 17)=3);
----
3
# Verification compares results, which are different for random().
statement ok
PRAGMA disable_verification
statement ok
PRAGMA disable_verify_fetch_row;
# COALESCE without statistics.
statement ok
SELECT COALESCE (CASE WHEN RANDOM() < 100 THEN RANDOM() ELSE NULL END, NULL, 42)
FROM range(10)

View File

@@ -0,0 +1,259 @@
# name: test/optimizer/statistics/statistics_filter.test_slow
# description: Statistics propagation test with filters
# group: [statistics]
foreach type utinyint usmallint uinteger ubigint tinyint smallint integer bigint hugeint uhugeint float double
statement ok
CREATE TABLE integers AS SELECT i::${type} i FROM (VALUES (1), (2), (3)) tbl(i);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
statement ok
PRAGMA enable_verification
# = filter is out of range
query II
EXPLAIN SELECT * FROM integers WHERE i=0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM integers WHERE i=4;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# = filter is in range
query II
EXPLAIN SELECT * FROM integers WHERE i=1;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# > filter is out of range
query II
EXPLAIN SELECT * FROM integers WHERE i>3;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# > filter is in of range
query II
EXPLAIN SELECT * FROM integers WHERE i>2;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# >= filter is out of range
query II
EXPLAIN SELECT * FROM integers WHERE i>=4;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# >= filter is in of range
query II
EXPLAIN SELECT * FROM integers WHERE i>=3;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# < filter is out of range
query II
EXPLAIN SELECT * FROM integers WHERE i<1;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# < filter is in of range
query II
EXPLAIN SELECT * FROM integers WHERE i<2;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# <= filter is out of range
query II
EXPLAIN SELECT * FROM integers WHERE i<=0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# <= filter is in of range
query II
EXPLAIN SELECT * FROM integers WHERE i<=1;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# verify that all these queries return correct results
query I
SELECT * FROM integers WHERE i=0;
----
query I
SELECT * FROM integers WHERE i=4;
----
query I
SELECT * FROM integers WHERE i=1;
----
1
query I
SELECT * FROM integers WHERE i>3;
----
query I
SELECT * FROM integers WHERE i>2;
----
3
query I
SELECT * FROM integers WHERE i>=4;
----
query I
SELECT * FROM integers WHERE i>=3;
----
3
query I
SELECT * FROM integers WHERE i<1;
----
query I
SELECT * FROM integers WHERE i<2;
----
1
query I
SELECT * FROM integers WHERE i<=0;
----
query I
SELECT * FROM integers WHERE i<=1;
----
1
# we repeat everything we did above, but with a subquery with a limit
# the limit prevents the filter from being pushed down into the scan
# which causes these tests to test different behavior
# = filter is out of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=4;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# = filter is in range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=1;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# > filter is out of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>3;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# > filter is in of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>2;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# >= filter is out of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>=4;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# >= filter is in of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>=3;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# < filter is out of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<1;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# < filter is in of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<2;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# <= filter is out of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<=0;
----
logical_opt <REGEX>:.*EMPTY_RESULT.*
# <= filter is in of range
query II
EXPLAIN SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<=1;
----
logical_opt <!REGEX>:.*EMPTY_RESULT.*
# verify that all these queries return correct results
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=0;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=4;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i=1;
----
1
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>3;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>2;
----
3
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>=4;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i>=3;
----
3
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<1;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<2;
----
1
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<=0;
----
query I
SELECT * FROM (SELECT * FROM integers LIMIT 10) integers(i) WHERE i<=1;
----
1
statement ok
DROP TABLE integers;
endloop

View File

@@ -0,0 +1,133 @@
# name: test/optimizer/statistics/statistics_filter_multicolumn.test
# description: Statistics propagation through filters with multiple columns
# group: [statistics]
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
statement ok
CREATE TABLE integers2 AS SELECT * FROM (VALUES (2), (3), (4)) tbl(i);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# we can statically prove that i=1 is false
query II
EXPLAIN SELECT i=1 FROM integers JOIN integers2 USING (i);
----
logical_opt <!REGEX>:.*\(i = 1\).*
# same with i=4
query II
EXPLAIN SELECT i=4 FROM integers JOIN integers2 USING (i);
----
logical_opt <!REGEX>:.*\(i = 4\).*
# and with i>3
query II
EXPLAIN SELECT i>3 FROM integers JOIN integers2 USING (i);
----
logical_opt <!REGEX>:.*\(i > 3\).*
# and with i<2
query II
EXPLAIN SELECT i<2 FROM integers JOIN integers2 USING (i);
----
logical_opt <!REGEX>:.*\(i < 2\).*
# NOT the case for i>2
query II
EXPLAIN SELECT i>2 FROM integers JOIN integers2 USING (i);
----
logical_opt <REGEX>:.*\(i > 2\).*
# OR i<3
query II
EXPLAIN SELECT i<3 FROM integers JOIN integers2 USING (i);
----
logical_opt <REGEX>:.*\(i < 3\).*
# range joins
# we again join two tables with i:[1, 3] on the left, and i:[2, 4] on the right
# but now on i2.i<i1.i
# the statistics are now: i1.i: [2,3], i2.i: [2, 3]
# so we can prune i=4
query II
EXPLAIN SELECT i2.i=4 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
logical_opt <!REGEX>:.*\(i = 4\).*
# or i=1
query II
EXPLAIN SELECT i1.i=1 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
logical_opt <!REGEX>:.*\(i = 1\).*
# but not i=3
query II
EXPLAIN SELECT i2.i=3 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
logical_opt <REGEX>:.*\(i = 3\).*
# or i=2
query II
EXPLAIN SELECT i1.i=2 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
logical_opt <REGEX>:.*\(i = 2\).*
query I
SELECT i=1 FROM integers JOIN integers2 USING (i);
----
0
0
query I
SELECT i=4 FROM integers JOIN integers2 USING (i);
----
0
0
query I
SELECT i>3 FROM integers JOIN integers2 USING (i);
----
0
0
query I
SELECT i<2 FROM integers JOIN integers2 USING (i);
----
0
0
query I rowsort
SELECT i>2 FROM integers JOIN integers2 USING (i);
----
0
1
query I rowsort
SELECT i<3 FROM integers JOIN integers2 USING (i);
----
0
1
query I
SELECT i2.i=4 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
0
query I
SELECT i1.i=1 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
0
query I
SELECT i2.i=3 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
0
query I
SELECT i1.i=2 FROM integers i1 JOIN integers2 i2 ON (i2.i<i1.i);
----
0

Some files were not shown because too many files have changed in this diff Show More