Files
email-tracker/external/duckdb/test/issues/general/test_4950.test
2025-10-24 19:21:19 -05:00

137 lines
3.2 KiB
SQL

# name: test/issues/general/test_4950.test
# description: Issue 4950: Subquery is not transformed into an anti-join by the optimizer
# group: [general]
statement ok
pragma explain_output = optimized_only
statement ok
pragma enable_verification
statement ok
create table lineitem (l_orderkey int, l_suppkey int, l_partkey int);
statement ok
insert into lineitem values (1,1,42),(1,2,43),(3,3,44),(4,5,45),(5,5,46),(6,5,47);
query III rowsort
select * from lineitem l1 where exists (
select * from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
1 1 42
1 2 43
# deliminator should remove INNER
query II
explain select * from lineitem l1 where exists (
select * from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
logical_opt <!REGEX>:.*INNER.*
# SINGLE join - same story
query III
select * from lineitem l1 where (
select l_orderkey from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
1 1 42
1 2 43
query II
explain select * from lineitem l1 where (
select l_orderkey from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
logical_opt <!REGEX>:.*INNER.*
# operations on the columns - we can't do the optimization here,
# but with enable_verification we check the results are the same with/without optimizer
query I
select count(*) from lineitem l1 where exists (
select * from lineitem l2
where
l2.l_orderkey = l1.l_orderkey + 1
and l2.l_suppkey <> l1.l_suppkey + 1
);
----
3
# more correlated columns
query I
select count(*) from lineitem l1 where exists (
select * from (SELECT l2.l_orderkey AS l_orderkey, l2.l_suppkey + l1.l_suppkey AS l_suppkey FROM lineitem l2) AS l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
);
----
6
# more correlated columns + operations
query I
select count(*) from lineitem l1 where exists (
select * from (SELECT l2.l_orderkey AS l_orderkey, l2.l_suppkey + l1.l_suppkey AS l_suppkey FROM lineitem l2) AS l3
where
l3.l_orderkey = l1.l_orderkey + 1
and l3.l_suppkey <> l1.l_suppkey + 1
);
----
3
# select other columns + operations
query I
select count(*) from lineitem l1 where exists (
select l_partkey from lineitem l2
where
l2.l_orderkey = l1.l_orderkey + 1
and l2.l_suppkey <> l1.l_suppkey + 1
);
----
3
# select other columns, but no operations (we can perform the optimization here!
query I
select count(*) from lineitem l1 where exists (
select l_partkey from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
2
query II
explain select count(*) from lineitem l1 where exists (
select l_partkey from lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
);
----
logical_opt <!REGEX>:.*INNER.*
# aggregate within the subquery
query I
select count(*) from lineitem l1 where exists (
select sum(l2.l_orderkey) from lineitem l2
where
l2.l_orderkey = l1.l_orderkey + 1
and l2.l_suppkey <> l1.l_suppkey + 1
);
----
6