Files
email-tracker/external/duckdb/test/optimizer/statistics/statistics_setop.test
2025-10-24 19:21:19 -05:00

144 lines
3.4 KiB
SQL

# name: test/optimizer/statistics/statistics_setop.test
# description: Statistics propagation test with set operations
# group: [statistics]
statement ok
CREATE TABLE integers AS SELECT * FROM (VALUES (1), (2), (3)) tbl(i);
statement ok
CREATE TABLE integers2 AS SELECT * FROM (VALUES (4), (5), (6)) tbl(i);
statement ok
CREATE TABLE integers3 AS SELECT * FROM (VALUES (4), (5), (NULL)) tbl(i);
statement ok
PRAGMA explain_output = OPTIMIZED_ONLY;
# union all
# total min/max after union all is [1, 6]: 7 is out of bounds, so i=7 can be optimized away
query II
EXPLAIN SELECT i=7 FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
logical_opt <!REGEX>:.*\(i = 7\).*
# 5 is in bounds, so this cannot be optimized away
query II
EXPLAIN SELECT i=5 FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
logical_opt <REGEX>:.*\(i = 5\).*
# neither integers nor integers2 has null values, so this is false
query II
EXPLAIN SELECT i IS NULL FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
logical_opt <!REGEX>:.*IS NULL.*
# integers3 has null values, so once we include integers3 we need to execute the IS_NULL predicate
query II
EXPLAIN SELECT i IS NULL FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2 UNION ALL SELECT * FROM integers3) tbl(i);
----
logical_opt <REGEX>:.*IS NULL.*
# except
# except has the same stats as the LHS (as in the worst case, nothing is filtered out)
# in this case the LHS stats are [4, 6] without null values
query II
EXPLAIN SELECT i=7 FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
logical_opt <!REGEX>:.*\(i = 7\).*
query II
EXPLAIN SELECT i=5 FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
logical_opt <REGEX>:.*\(i = 5\).*
query II
EXPLAIN SELECT i IS NULL FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
logical_opt <!REGEX>:.*IS NULL.*
# intersect
# intersect has as stats the intersection of the LHS with the RHS
# in this case that would be [4, 5] with no null values
# however, we don't actually intersect the stats yet, so limit our testing to the stats of the LHS
query II
EXPLAIN SELECT i=7 FROM (SELECT * FROM integers2 INTERSECT SELECT * FROM integers3) tbl(i);
----
logical_opt <!REGEX>:.*\(i = 7\).*
query II
EXPLAIN SELECT i=5 FROM (SELECT * FROM integers2 INTERSECT SELECT * FROM integers3) tbl(i);
----
logical_opt <REGEX>:.*\(i = 5\).*
# now check the results of all these queries
query I
SELECT i=7 FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
0
0
0
0
0
0
query I
SELECT i=5 FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
0
0
0
0
1
0
query I
SELECT i IS NULL FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2) tbl(i);
----
0
0
0
0
0
0
query I
SELECT i IS NULL FROM (SELECT * FROM integers UNION ALL SELECT * FROM integers2 UNION ALL SELECT * FROM integers3) tbl(i);
----
0
0
0
0
0
0
0
0
1
query I
SELECT i=7 FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
0
query I
SELECT i=5 FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
0
query I
SELECT i IS NULL FROM (SELECT * FROM integers2 EXCEPT SELECT * FROM integers3) tbl(i);
----
0
query I
SELECT i=7 FROM (SELECT * FROM integers2 INTERSECT SELECT * FROM integers3) tbl(i);
----
0
0
query I
SELECT i=5 FROM (SELECT * FROM integers2 INTERSECT SELECT * FROM integers3) tbl(i) ORDER BY i;
----
0
1