should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,120 @@
# name: test/optimizer/joins/asof_join_adds_rows.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
require json
statement ok
create table child_join as from values (1) t(c);
statement ok
create table small_probe as from values
(1, '1992-03-22 01:02:03'::TIMESTAMP),
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP) t(sp_const, a);
statement ok
create table large_build as from values
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP),
(1, '1992-03-22 01:02:09'::TIMESTAMP),
(1, '1992-03-22 01:02:10'::TIMESTAMP),
(1, '1992-03-22 01:02:11'::TIMESTAMP),
(1, '1992-03-22 01:02:12'::TIMESTAMP),
(1, '1992-03-22 01:02:13'::TIMESTAMP),
(1, '1992-03-22 01:02:14'::TIMESTAMP),
(1, '1992-03-22 01:02:15'::TIMESTAMP),
(1, '1992-03-22 01:02:16'::TIMESTAMP),
(1, '1992-03-22 01:02:17'::TIMESTAMP),
(1, '1992-03-22 01:02:18'::TIMESTAMP),
(1, '1992-03-22 01:02:19'::TIMESTAMP),
(1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
# Compare NLJ optimisation to operator
foreach threshold 0 32
statement ok
PRAGMA asof_loop_join_threshold = ${threshold};
query I
select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
----
1992-03-22 01:02:03
1992-03-22 01:02:04
1992-03-22 01:02:05
1992-03-22 01:02:06
1992-03-22 01:02:07
1992-03-22 01:02:08
query IIII
WITH
id_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-12-23'::TIMESTAMP AS lastSeen
),
id_and_payload_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-02-11'::TIMESTAMP AS timepoint,
'{ "amp": [ {"k": "fqn1"}, {"k": "fqn2"}]}'::VARCHAR AS payload
),
id_with_payload_intermediate AS (
SELECT
id_with_timepoint.user_id,
id_with_timepoint.lastSeen,
id_and_payload_with_timepoint.payload,
FROM
id_with_timepoint ASOF
LEFT JOIN id_and_payload_with_timepoint ON (
id_with_timepoint.user_id = id_and_payload_with_timepoint.user_id
AND id_and_payload_with_timepoint.timepoint < id_with_timepoint.lastSeen
)
),
id_with_fqn AS (
SELECT
user_id,
lastSeen,
t.k_fqn
FROM
id_with_payload_intermediate
LEFT JOIN LATERAL UNNEST(payload ->> '$.amp[*].k') AS t (k_fqn) ON TRUE
),
fqn_table AS (
SELECT
*
FROM
(
VALUES
('fqn2', '2021-03-03'::TIMESTAMP),
('fqn2', '2021-02-02'::TIMESTAMP),
('fqn1', '2021-01-01'::TIMESTAMP)
) AS data (ap_fqn, timepoint)
)
SELECT
id_with_fqn.user_id,
id_with_fqn.k_fqn,
fqn_table.ap_fqn,
fqn_table.timepoint::TIMESTAMP
FROM
id_with_fqn ASOF
LEFT JOIN fqn_table ON (
id_with_fqn.k_fqn = fqn_table.ap_fqn
AND fqn_table.timepoint < id_with_fqn.lastSeen
)
ORDER BY
k_fqn,
timepoint;
----
ID1 fqn1 fqn1 2021-01-01 00:00:00
ID1 fqn2 fqn2 2021-03-03 00:00:00
endloop

View File

@@ -0,0 +1,18 @@
# name: test/optimizer/joins/better_ce_estimates_for_bad_join_conditions.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
create table t1 as select range::Varchar id, (range%700)::VARCHAR name_ from range(2_000);
statement ok
create table t2 as select range::Varchar id, (range%700)::VARCHAR name_ from range(2_000);
statement ok
create table t3 as select (range%2_000)::Varchar t1_id_FK, (range%2_000)::Varchar t2_id_FK from range(8_000);
query II
explain select count(*) from t1, t2, t3 where t1.name_ != t2.name_ and t3.t1_id_FK = t1.id and t3.t2_id_FK = t2.id;
----
physical_plan <!REGEX>:.*NESTED_LOOP_JOIN.*

View File

@@ -0,0 +1,120 @@
# name: test/optimizer/joins/cross_join_and_unnest_dont_work.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
require json
statement ok
create table child_join as from values (1) t(c);
statement ok
create table small_probe as from values
(1, '1992-03-22 01:02:03'::TIMESTAMP),
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP) t(sp_const, a);
statement ok
create table large_build as from values
(1, '1992-03-22 01:02:04'::TIMESTAMP),
(1, '1992-03-22 01:02:05'::TIMESTAMP),
(1, '1992-03-22 01:02:06'::TIMESTAMP),
(1, '1992-03-22 01:02:07'::TIMESTAMP),
(1, '1992-03-22 01:02:08'::TIMESTAMP),
(1, '1992-03-22 01:02:09'::TIMESTAMP),
(1, '1992-03-22 01:02:10'::TIMESTAMP),
(1, '1992-03-22 01:02:11'::TIMESTAMP),
(1, '1992-03-22 01:02:12'::TIMESTAMP),
(1, '1992-03-22 01:02:13'::TIMESTAMP),
(1, '1992-03-22 01:02:14'::TIMESTAMP),
(1, '1992-03-22 01:02:15'::TIMESTAMP),
(1, '1992-03-22 01:02:16'::TIMESTAMP),
(1, '1992-03-22 01:02:17'::TIMESTAMP),
(1, '1992-03-22 01:02:18'::TIMESTAMP),
(1, '1992-03-22 01:02:19'::TIMESTAMP),
(1, '1992-03-22 01:02:20'::TIMESTAMP) t(lb_const, b);
# Compare NLJ optimisation to operator
foreach threshold 0 32
statement ok
PRAGMA asof_loop_join_threshold = ${threshold};
query I
select a from (select * from small_probe, child_join where c=sp_const) asof join large_build on (lb_const = sp_const and a < b) order by a;
----
1992-03-22 01:02:03
1992-03-22 01:02:04
1992-03-22 01:02:05
1992-03-22 01:02:06
1992-03-22 01:02:07
1992-03-22 01:02:08
query IIII
WITH
id_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-12-23'::TIMESTAMP AS lastSeen
),
id_and_payload_with_timepoint AS (
SELECT
'ID1' AS user_id,
'2024-02-11'::TIMESTAMP AS timepoint,
'{ "amp": [ {"k": "fqn1"}, {"k": "fqn2"}]}'::VARCHAR AS payload
),
id_with_payload_intermediate AS (
SELECT
id_with_timepoint.user_id,
id_with_timepoint.lastSeen,
id_and_payload_with_timepoint.payload,
FROM
id_with_timepoint ASOF
LEFT JOIN id_and_payload_with_timepoint ON (
id_with_timepoint.user_id = id_and_payload_with_timepoint.user_id
AND id_and_payload_with_timepoint.timepoint < id_with_timepoint.lastSeen
)
),
id_with_fqn AS (
SELECT
user_id,
lastSeen,
t.k_fqn
FROM
id_with_payload_intermediate
LEFT JOIN LATERAL UNNEST(payload ->> '$.amp[*].k') AS t (k_fqn) ON TRUE
),
fqn_table AS (
SELECT
*
FROM
(
VALUES
('fqn2', '2021-03-03'::TIMESTAMP),
('fqn2', '2021-02-02'::TIMESTAMP),
('fqn1', '2021-01-01'::TIMESTAMP)
) AS data (ap_fqn, timepoint)
)
SELECT
id_with_fqn.user_id,
id_with_fqn.k_fqn,
fqn_table.ap_fqn,
fqn_table.timepoint::TIMESTAMP
FROM
id_with_fqn ASOF
LEFT JOIN fqn_table ON (
id_with_fqn.k_fqn = fqn_table.ap_fqn
AND fqn_table.timepoint < id_with_fqn.lastSeen
)
ORDER BY
k_fqn,
timepoint;
----
ID1 fqn1 fqn1 2021-01-01 00:00:00
ID1 fqn2 fqn2 2021-03-03 00:00:00
endloop

View File

@@ -0,0 +1,69 @@
# name: test/optimizer/joins/delim_join_dont_explode.test_slow
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
create table big_table (id integer);
statement ok
insert into big_table select range from range(1000);
statement ok
create table medium_1 (id integer, fk_to_big integer, fk_to_medium_2 integer);
statement ok
insert into medium_1 (select range::varchar,
CASE WHEN range<10 THEN 0 ELSE range END,
range + 99,
from range(100));
statement ok
create table medium_2 (id integer);
statement ok
insert into medium_2 (select range from range(100));
query I
select count(*) from medium_2, medium_1 where medium_2.id = medium_1.fk_to_medium_2;
----
1
query I
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
) order by bt.id
----
143 values hashing to dc5d1675d206057ccfe13739a38ee082
# The query plan here used to join the two SEQ_SCANs first, and then join the DELIM_SCAN,
# Since PR #12290, we can reorder DELIM_SCANS
# Now the DELIM_SCAN is joined with a SEQ_SCAN first, and then with the SEQ_SCAN
# Now that we reorder semi joins the ordering of the sequential scans has also changed
query II
EXPLAIN
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
)
order by bt.id
----
physical_plan <REGEX>:.*HASH_JOIN.*DELIM_SCAN.*SEQ_SCAN.*

View File

@@ -0,0 +1,24 @@
# name: test/optimizer/joins/delim_join_with_in_has_correct_results.test
# description: An IN expression should return false when the IN list is empty
# group: [joins]
statement ok
create table t as FROM VALUES (4), (NULL) t(t0);
statement ok
create table u as FROM VALUES (NULL), (NULL) t(u0);
query II
SELECT
t0,
t0 IN (
SELECT
u0
FROM u
WHERE
t0 = 4
)
FROM t;
----
4 NULL
NULL false

View File

@@ -0,0 +1,19 @@
# name: test/optimizer/joins/filter_on_subquery_with_aggregate.test
# description: some fuzzer issues
# group: [joins]
statement ok
create table df as select unnest(range(1, 10)) as A, unnest(range(1, 10)) as B;
query II
WITH cte AS (
SELECT A, B
FROM df
WHERE A >= 7
)
SELECT *
FROM cte
WHERE A = (SELECT MAX(A) FROM cte);
----
9 9

View File

@@ -0,0 +1,21 @@
# name: test/optimizer/joins/get_cardinality_from_limit.test_slow
# description: when a limit is encountered, use the limit to know the cardinality
# group: [joins]
statement ok
create table t_left as select (random() * 10000000)::INT a from range(40000);
statement ok
create table t_right as select range b from range(10000000);
statement ok
pragma explain_output='optimized_only';
query II
explain select * from t_left, (select * from t_right limit 10000) where a = b;
----
logical_opt <REGEX>:.*SEQ_SCAN.*LIMIT.*SEQ_SCAN.*

View File

@@ -0,0 +1,30 @@
# name: test/optimizer/joins/join_bug.test
# description: unsure yet
# group: [joins]
statement ok
CREATE TABLE v00 (c01 INT, c02 STRING);
# don't delete numerator relations
statement ok
SELECT 1
FROM
v00 AS t
INNER JOIN
(
v00 AS t2
SEMI JOIN v00 AS t3
USING (c02)
)
USING (c01)
SEMI JOIN v00 AS t4
USING (c02, c01)
NATURAL JOIN v00 AS t5;
statement ok
CREATE or replace TABLE v00 (c01 STRING);
# Asof joins should swap
statement ok
FROM v00 AS ta02 NATURAL JOIN v00 AS ta03 ASOF JOIN v00 AS ta04 USING ( c01 );

View File

@@ -0,0 +1,39 @@
# name: test/optimizer/joins/join_on_doubles.test
# description: tests for #16901 / #16965
# group: [joins]
statement ok
create table x (a double);
statement ok
create table y (b double);
statement ok
insert into x values ('0.0'), ('NaN');
statement ok
insert into y values ('-0.0'), ('-NaN');
query TT
select a::text, b::text from x inner join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from y inner join x on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x right join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x left join y on a = b;
----
0.0 -0.0
nan -nan

View File

@@ -0,0 +1,39 @@
# name: test/optimizer/joins/join_on_floats.test
# description: tests for #16901 / #16965
# group: [joins]
statement ok
create table x (a float);
statement ok
create table y (b float);
statement ok
insert into x values ('0.0'), ('NaN');
statement ok
insert into y values ('-0.0'), ('-NaN');
query TT
select a::text, b::text from x inner join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from y inner join x on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x right join y on a = b;
----
0.0 -0.0
nan -nan
query TT
select a::text, b::text from x left join y on a = b;
----
0.0 -0.0
nan -nan

View File

@@ -0,0 +1,27 @@
# name: test/optimizer/joins/joins_with_correlated_subqueries.test
# description: issue duckdblabs/duckdb-internal #840
# group: [joins]
statement ok
CREATE TABLE df (x NUMERIC, y NUMERIC);
statement ok
INSERT INTO df VALUES (0, 2), (1, NULL), (2, 4), (3, 5), (4, NULL);
statement ok
SELECT
x,
COALESCE(
y,
(
SELECT
prev.y + ( (next.y - prev.y) * (parent.x - prev.x) / (next.x - prev.x) )
FROM
( SELECT x, y FROM df WHERE x <= parent.x and y is not null ORDER BY x DESC LIMIT 1 ) AS prev
CROSS JOIN
( SELECT x, y FROM df WHERE x >= parent.x and y is not null ORDER BY x ASC LIMIT 1 ) AS next
)
) AS y
FROM
df parent;

View File

@@ -0,0 +1,49 @@
# name: test/optimizer/joins/lateral_cross_join.test
# description: test to string of complex lateral cross join
# group: [joins]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE all_time_periods (
start_date DATE,
end_date DATE
);
statement ok
CREATE TABLE weekly_trading_cube (
ship_date DATE,
vendor_name VARCHAR,
master_league VARCHAR,
net_demand DECIMAL
);
statement ok
CREATE TABLE league_mapping (
wtc_league VARCHAR,
finance_league VARCHAR
);
statement ok
INSERT INTO all_time_periods VALUES
('2024-01-01', '2024-12-31');
statement ok
INSERT INTO weekly_trading_cube VALUES
('2024-06-15', 'F Branded', 'MLB', 100.0),
('2024-07-15', 'M & Ness', 'NBA', 200.0);
statement ok
INSERT INTO league_mapping VALUES
('MLB', 'Major League Baseball'),
('NBA', 'National Basketball Association');
query III
WITH date_range AS (SELECT min(start_date) AS min_start_date, max(end_date) AS max_end_date FROM all_time_periods)
SELECT wtc.vendor_name, wtc.ship_date, lm.finance_league
FROM weekly_trading_cube AS wtc CROSS JOIN date_range AS dr
LEFT JOIN league_mapping AS lm ON (((upper(wtc.master_league) = upper(lm.wtc_league)) AND (wtc.ship_date BETWEEN dr.min_start_date AND dr.max_end_date)))
WHERE (wtc.vendor_name = 'F Branded')
----
F Branded 2024-06-15 Major League Baseball

View File

@@ -0,0 +1,21 @@
# name: test/optimizer/joins/many_joins_and_one_non_reorderable_join.test_slow
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
Create table test0 as select * from range(1000);
statement ok
Create table test1 as select * from range(100);
statement ok
Create table test2 as select * from range(100);
statement ok
Create table test3 as select * from range(100);
statement ok
Create table test4 as select * from range(400);
statement ok
Select * from test0 LEFT JOIN (Select test1.range as the_range from test1, test2, test3) other ON other.the_range = test0.range;

View File

@@ -0,0 +1,54 @@
# name: test/optimizer/joins/no_cross_product_reordering.test
# description: cannot create cross product between LHS and RHS of semi/anti joins
# group: [joins]
statement ok
create table t1 as select range a from range(1000);
statement ok
create table t2 as select range b from range(1);
statement ok
create table t3 as select range c from range(100);
query II
explain select * from t1, t2 where a in (select * from t3);
----
physical_plan <REGEX>:.*CROSS_PRODUCT.*HASH_JOIN.*SEMI.*
#An incorrect join plan looks like
#
# PROJECTION
#
# Expressions:
# a
# b
#
#
# COMPARISON_JOIN
#
# Join Type:
# SEMI
#
# Conditions:
# (a = #[9.0])
#
#
# CROSS_PRODUCT SEQ_SCAN
#
# Stringified:
# t1
#
#
# SEQ_SCAN PROJECTION
#
# Stringified: Expressions:
# t2 c
#
#
# SEQ_SCAN
#
# Stringified:
# t3
#

View File

@@ -0,0 +1,28 @@
# name: test/optimizer/joins/no_duplicate_elimination_join.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
pragma debug_force_no_cross_product=true;
statement ok
Create table test0 as select * from range(1000);
statement ok
Create table test1 as select * from range(1000);
statement ok
Create table test2 as select * from range(1000);
statement ok
Create table test3 as select * from range(100);
statement ok
Create table test4 as select * from range(400);
statement ok
SELECT * FROM test0, test1, test2,test3, test4
WHERE test1.range + test4.range = test2.range AND test1.range + test4.range = test3.range AND test1.range = test4.range AND test1.range = test0.range;
statement ok
SELECT * FROM test0, test1, test2, test3, test4 WHERE test1.range + test4.range = test2.range AND test1.range + test4.range = test3.range AND test1.range = test4.range AND test1.range = test0.range AND test1.range + test3.range = test0.range;

View File

@@ -0,0 +1,6 @@
# name: test/optimizer/joins/order_optimizer_bindings.test
# description: In the join order optimizer queries need to have the correct bindings
# group: [joins]
statement ok
SELECT * FROM summary((select 5)) tbl1(i) JOIN summary((select 5)) tbl2(i) ON tbl1.i=tbl2.i;

View File

@@ -0,0 +1,29 @@
# name: test/optimizer/joins/pushdown_semi_anti.test
# description: Verify semi anti joins are pushed down
# group: [joins]
statement ok
create table tbl1 as select range a from range(10000);
statement ok
create table tbl2 as select range b from range(1000);
statement ok
create table tbl3 as select range c from range(100);
statement ok
set disabled_optimizers='statistics_propagation';
query II
EXPLAIN select * from tbl1, tbl2 where b in (select * from tbl3) and tbl1.a = tbl2.b;
----
physical_plan <REGEX>:.*INNER.*SEMI.*
statement ok
set disabled_optimizers='statistics_propagation,join_order';
# make sure non-optimized plan has semi on top of the inner
query II
EXPLAIN select * from tbl1, tbl2 where b in (select * from tbl3) and tbl1.a = tbl2.b;
----
physical_plan <REGEX>:.*SEMI.*INNER.*

View File

@@ -0,0 +1,15 @@
# name: test/optimizer/joins/test_delim_join_with_cross_product_in_rhs.test
# description: Verify that a delim join with a correlated column in the RHS of a cross product (on the RHS of the delim GET) is properly bound
# group: [joins]
statement ok
CREATE TABLE t1(c0 DOUBLE, c1 INT8);
statement ok
CREATE TABLE t3(c0 VARCHAR);
statement ok
INSERT INTO t1(c1) VALUES (1);
statement ok
SELECT * FROM t3, t1 INNER JOIN ( SELECT t3.c0 ) as subQuery1 ON ( t1.c0 > (t3.c0::DOUBLE) );

View File

@@ -0,0 +1,55 @@
# name: test/optimizer/joins/test_issue_5265.test_slow
# description: Verify expected cardinality of multiple cross products
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.1);
# this should run quickly
statement ok
SELECT n.n_name,
SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue
FROM region r
JOIN nation n
ON n.n_regionkey = r.r_regionkey
JOIN supplier s
ON s.s_nationkey = n.n_nationkey
JOIN lineitem l
ON l.l_suppkey = s.s_suppkey
JOIN orders o
ON o.o_orderkey = l.l_orderkey
JOIN customer c
ON c.c_custkey = o.o_custkey
AND c.c_nationkey = s.s_nationkey
JOIN (SELECT 1 AS dummy) single_row ON 1 = 1
WHERE r.r_name = 'ASIA'
AND o.o_orderdate >= DATE '1994-01-01'
AND o.o_orderdate < DATE '1995-01-01'
GROUP BY n.n_name
ORDER BY revenue DESC;
query II
explain SELECT n.n_name,
SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue
FROM region r
JOIN nation n
ON n.n_regionkey = r.r_regionkey
JOIN supplier s
ON s.s_nationkey = n.n_nationkey
JOIN lineitem l
ON l.l_suppkey = s.s_suppkey
JOIN orders o
ON o.o_orderkey = l.l_orderkey
JOIN customer c
ON c.c_custkey = o.o_custkey
AND c.c_nationkey = s.s_nationkey
JOIN (SELECT 1 AS dummy) single_row ON 1 = 1
WHERE r.r_name = 'ASIA'
AND o.o_orderdate >= DATE '1994-01-01'
AND o.o_orderdate < DATE '1995-01-01'
GROUP BY n.n_name
ORDER BY revenue DESC;
----
physical_plan <!REGEX>:.*CROSS_PRODUCT.*CROSS_PRODUCT.*

View File

@@ -0,0 +1,3 @@
# name: test/optimizer/joins/test_tpcds_pushdown.test
# group: [joins]

View File

@@ -0,0 +1,205 @@
# name: test/optimizer/joins/tpcds_nofail.test
# description: TPCDS queries that force the Join optimizer to emit 10000+ pairs. Tests the approx algorithm throws no errors
# group: [joins]
require tpcds
statement ok
CALL dsdgen(sf=0.01);
statement ok
WITH cs_ui AS
(SELECT cs_item_sk,
sum(cs_ext_list_price) AS sale,
sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) AS refund
FROM catalog_sales,
catalog_returns
WHERE cs_item_sk = cr_item_sk
AND cs_order_number = cr_order_number
GROUP BY cs_item_sk
HAVING sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)),
cross_sales AS
(SELECT i_product_name product_name,
i_item_sk item_sk,
s_store_name store_name,
s_zip store_zip,
ad1.ca_street_number b_street_number,
ad1.ca_street_name b_street_name,
ad1.ca_city b_city,
ad1.ca_zip b_zip,
ad2.ca_street_number c_street_number,
ad2.ca_street_name c_street_name,
ad2.ca_city c_city,
ad2.ca_zip c_zip,
d1.d_year AS syear,
d2.d_year AS fsyear,
d3.d_year s2year,
count(*) cnt,
sum(ss_wholesale_cost) s1,
sum(ss_list_price) s2,
sum(ss_coupon_amt) s3
FROM store_sales,
store_returns,
cs_ui,
date_dim d1,
date_dim d2,
date_dim d3,
store,
customer,
customer_demographics cd1,
customer_demographics cd2,
promotion,
household_demographics hd1,
household_demographics hd2,
customer_address ad1,
customer_address ad2,
income_band ib1,
income_band ib2,
item
WHERE ss_sold_date_sk = d1.d_date_sk
AND ss_customer_sk = c_customer_sk
AND ss_cdemo_sk= cd1.cd_demo_sk
AND ss_hdemo_sk = hd1.hd_demo_sk
AND ss_addr_sk = ad1.ca_address_sk
AND ss_item_sk = i_item_sk
AND ss_item_sk = sr_item_sk
AND ss_ticket_number = sr_ticket_number
AND ss_item_sk = cs_ui.cs_item_sk
AND c_current_cdemo_sk = cd2.cd_demo_sk
AND c_current_hdemo_sk = hd2.hd_demo_sk
AND c_current_addr_sk = ad2.ca_address_sk
AND c_first_sales_date_sk = d2.d_date_sk
AND c_first_shipto_date_sk = d3.d_date_sk
AND hd1.hd_income_band_sk = ib1.ib_income_band_sk
AND hd2.hd_income_band_sk = ib2.ib_income_band_sk
AND cd1.cd_marital_status <> cd2.cd_marital_status
AND i_color IN ('purple',
'burlywood',
'indian',
'spring',
'floral',
'medium')
AND i_current_price BETWEEN 64 AND 64 + 10
AND i_current_price BETWEEN 64 + 1 AND 64 + 15
GROUP BY i_product_name,
i_item_sk,
s_store_name,
s_zip,
ad1.ca_street_number,
ad1.ca_street_name,
ad1.ca_city,
ad1.ca_zip,
ad2.ca_street_number,
ad2.ca_street_name,
ad2.ca_city,
ad2.ca_zip,
d1.d_year,
d2.d_year,
d3.d_year)
SELECT cs1.product_name,
cs1.store_name,
cs1.store_zip,
cs1.b_street_number,
cs1.b_street_name,
cs1.b_city,
cs1.b_zip,
cs1.c_street_number,
cs1.c_street_name,
cs1.c_city,
cs1.c_zip,
cs1.syear cs1syear,
cs1.cnt cs1cnt,
cs1.s1 AS s11,
cs1.s2 AS s21,
cs1.s3 AS s31,
cs2.s1 AS s12,
cs2.s2 AS s22,
cs2.s3 AS s32,
cs2.syear,
cs2.cnt
FROM cross_sales cs1,
cross_sales cs2
WHERE cs1.item_sk=cs2.item_sk
AND cs1.syear = 1999
AND cs2.syear = 1999 + 1
AND cs2.cnt <= cs1.cnt
AND cs1.store_name = cs2.store_name
AND cs1.store_zip = cs2.store_zip
ORDER BY cs1.product_name,
cs1.store_name,
cs2.cnt,
cs1.s1,
cs2.s1;
statement ok
SELECT CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20) > 74129 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 1 AND 20)
END bucket1,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40) > 122840 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 21 AND 40)
END bucket2,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60) > 56580 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 41 AND 60)
END bucket3,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80) > 10097 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 61 AND 80)
END bucket4,
CASE
WHEN
(SELECT count(*)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100) > 165306 THEN
(SELECT avg(ss_ext_discount_amt)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100)
ELSE
(SELECT avg(ss_net_paid)
FROM store_sales
WHERE ss_quantity BETWEEN 81 AND 100)
END bucket5
FROM reason
WHERE r_reason_sk = 1 ;

View File

@@ -0,0 +1,42 @@
# name: test/optimizer/joins/update_nodes_in_full_path.test_slow
# description: updating nodes in full path should throw no errors
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.01);
statement ok
SELECT NULL
FROM main.supplier AS ref_0
LEFT JOIN main.nation AS ref_1
LEFT JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON (ref_3.c_phone = ref_4.s_name)
ON ((SELECT l_linestatus FROM main.lineitem LIMIT 1 OFFSET 2) IS NULL)
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk ~~~ ref_5.o_comment)
ON (1)
ON (ref_3.c_mktsegment ~~~ ref_4.s_phone)
ON (ref_0.s_acctbal = ref_5.o_totalprice)
INNER JOIN main.lineitem AS ref_7 ON (ref_4.s_suppkey = ref_7.l_orderkey)
INNER JOIN main.supplier AS ref_8
INNER JOIN main.partsupp AS ref_9
INNER JOIN main.supplier AS ref_10
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12
INNER JOIN main.customer AS ref_13 ON (ref_12.l_linestatus = ref_13.c_name)
ON ((SELECT ps_comment FROM main.partsupp LIMIT 1 OFFSET 4) ^@ ref_11.s_address)
ON (ref_13.c_phone ~~~ ref_10.s_address)
ON (ref_9.ps_partkey = ref_11.s_suppkey)
ON ((SELECT ps_comment FROM main.partsupp LIMIT 1 OFFSET 6) ~~* ref_12.l_linestatus)
ON ((ref_6.o_orderpriority IS NULL) OR (ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier LIMIT 1 OFFSET 6)))
INNER JOIN (
SELECT ref_14.p_container AS c0, ref_14.p_mfgr AS c1, ref_14.p_container AS c2, ref_15.c_custkey AS c3
FROM main.part AS ref_14
INNER JOIN main.customer AS ref_15 ON (ref_14.p_brand ~~* ref_15.c_mktsegment)
WHERE (ref_14.p_comment ~~~ ref_14.p_container)
LIMIT 101
) AS subq_0 ON (ref_6.o_orderstatus ~~* ref_6.o_comment)
WHERE (ref_8.s_address ~~* ref_8.s_address);

View File

@@ -0,0 +1,54 @@
# name: test/optimizer/joins/updating_the_join_node_hash_map_has_no_errors.test_slow
# description:
# group: [joins]
require tpch
statement ok
call dbgen(sf=0.05);
statement error
SELECT NULL
FROM main.supplier AS ref_0
INNER JOIN main.nation
INNER JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON ((ref_3.c_phone = ref_4.s_name)) ON (
(SELECT NULL))
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk like '%0000%') ON (1) ON (ref_3.c_mktsegment NOT NULL) ON ((ref_0.s_acctbal = ref_5.o_totalprice))
INNER JOIN main.lineitem AS ref_7 ON ((ref_4.s_suppkey = ref_7.l_orderkey))
INNER JOIN main.supplier
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12 ON (
(SELECT NULL)) ON ((
(SELECT ps_comment FROM main.partsupp) ~~* ref_12.l_linestatus)) ON
((ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier)))
INNER JOIN
(SELECT NULL) ON (ref_6.o_orderstatus NOT NULL);
----
More than one row returned by a subquery used as an expression
statement ok
SET scalar_subquery_error_on_multiple_rows=false
statement ok
SELECT NULL
FROM main.supplier AS ref_0
INNER JOIN main.nation
INNER JOIN main.nation AS ref_2
INNER JOIN main.customer AS ref_3
INNER JOIN main.supplier AS ref_4 ON ((ref_3.c_phone = ref_4.s_name)) ON (
(SELECT NULL))
INNER JOIN main.orders AS ref_5
INNER JOIN main.orders AS ref_6 ON (ref_5.o_clerk like '%0000%') ON (1) ON (ref_3.c_mktsegment NOT NULL) ON ((ref_0.s_acctbal = ref_5.o_totalprice))
INNER JOIN main.lineitem AS ref_7 ON ((ref_4.s_suppkey = ref_7.l_orderkey))
INNER JOIN main.supplier
INNER JOIN main.supplier AS ref_11
INNER JOIN main.lineitem AS ref_12 ON (
(SELECT NULL)) ON ((
(SELECT ps_comment FROM main.partsupp) ~~* ref_12.l_linestatus)) ON
((ref_7.l_linestatus ~~* (SELECT s_name FROM main.supplier)))
INNER JOIN
(SELECT NULL) ON (ref_6.o_orderstatus NOT NULL);

View File

@@ -0,0 +1,52 @@
# name: test/optimizer/joins/wide_build_skinny_probe.test
# description: If the build side is wide, and the probe side is skinny, the children of the hash join should flip
# group: [joins]
statement ok
create table skinny as select range a, range b, range c from range(10000);
statement ok
create table wide as select
range pk,
(range::VARCHAR || '1111') a,
(range::VARCHAR || '2222') b,
(range::VARCHAR || '3333') c,
(range::VARCHAR || '4444') d,
(range::VARCHAR || '5555') e,
(range::VARCHAR || '6666') f,
(range::VARCHAR || '7777') g,
(range::VARCHAR || '8888') h,
(range::VARCHAR || '9999') i,
(range::VARCHAR || '0000') j,
(range::VARCHAR || '0011') k,
(range::VARCHAR || '0022') l,
(range::VARCHAR || '0033') m,
(range::VARCHAR || '9999') o,
(range::VARCHAR || '0000') p,
(range::VARCHAR || '0011') q,
(range::VARCHAR || '0022') u,
(range::VARCHAR || '0033') r,
(range::VARCHAR || '9999') s,
(range::VARCHAR || '0000') t,
(range::VARCHAR || '0011') w,
(range::VARCHAR || '0022') y,
(range::VARCHAR || '0033') z,
(range::VARCHAR || '9999') aa,
(range::VARCHAR || '0000') bb,
(range::VARCHAR || '0011') cc,
(range::VARCHAR || '0022') dd,
(range::VARCHAR || '0033') ee,
(range::VARCHAR || '9999') ff,
(range::VARCHAR || '0000') gg,
(range::VARCHAR || '0011') hh,
(range::VARCHAR || '0022') ii,
(range::VARCHAR || '0033') jj,
(range::VARCHAR || '0044') kk from range(8000);
# wide should be the probe side, skinny should be on the build side
query II
explain select * from wide w, skinny s where w.pk=s.a;
----
physical_plan <REGEX>:.*wide.*skinny.*