should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/join/asof_join.benchmark
# description: AsOf Join with 50 keys of 100K times
# group: [join]
name AsOf Join scaling
group join
load
PRAGMA debug_asof_iejoin=False;
CREATE TABLE build AS (
SELECT k, '2001-01-01 00:00:00'::TIMESTAMP + INTERVAL (v) MINUTE AS t, v
FROM range(0,100000) vals(v), range(0,50) keys(k)
);
CREATE TABLE probe AS (
SELECT k * 2 AS k, t - INTERVAL (30) SECOND AS t
FROM build
);
run
SELECT SUM(v)
FROM probe ASOF JOIN build USING(k, t);
result I
124996250025

View File

@@ -0,0 +1,30 @@
# name: benchmark/micro/join/asof_join_small_probe.benchmark
# description: AsOf Join with probe smaller than build
# group: [join]
name ASOf Join with |probe| << |build|
group join
load
SELECT SETSEED(0.8675309);
PRAGMA debug_asof_iejoin=False;
CREATE TABLE probe AS
SELECT r AS id,
'2021-01-01T00:00:00'::TIMESTAMP + INTERVAL (random() * 60 * 60 * 24 * 365) SECOND AS probe_ts_1_0,
FROM range(0, 100000) tbl(r);
CREATE TABLE build AS
SELECT r % 100000 AS id,
'2021-01-01T00:00:00'::TIMESTAMP + INTERVAL (random() * 60 * 60 * 24 * 365) SECOND AS build_ts_1_0,
(random() * 100000)::INTEGER AS build_int_1_0
FROM range(0, 1000000) tbl(r);
run
SELECT SUM(build_int_1_0)
FROM "probe"
ASOF JOIN "build"
ON "probe"."id" = "build"."id"
AND "probe"."probe_ts_1_0" >= "build"."build_ts_1_0"
;
result I
4560929499

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/join/blockwise_nl_join.benchmark
# description: Left join between two tables, where the left table is significantly less than the right table.
# group: [join]
name Left Join (big RHS, small LHS)
group join
load
create table lhs as select * from range(10000) lhs(id);
alter table lhs add column enroll_date date;
update lhs set enroll_date = date '2000-01-01' + cast(round(random()*3000) as integer);
create table rhs as select * from range(100000) rhs(id);
update rhs set id = random() * 1000000;
alter table rhs add column claim_date date;
update rhs set claim_date = date '2000-01-01' + cast(random()*3000 as integer);
alter table rhs add column claim_cost double;
update rhs set claim_cost = random() * 10000;
run
select lhs.id, sum(coalesce(claim_cost,0))
from lhs left join rhs on lhs.id = rhs.id
and datediff('month',enroll_date,claim_date) between 1 and 12
group by lhs.id order by lhs.id;

View File

@@ -0,0 +1,32 @@
# name: benchmark/micro/join/delim_join_no_blowup.benchmark
# description: Delim joins dont result in a blow up and therefore take forever
# group: [join]
name High Cardinality Duplicate elimination join
group join
load
create table big_table (id integer);
insert into big_table select range from range(10000000);
create table medium_1 (id integer, fk_to_big integer, fk_to_medium_2 integer);
insert into medium_1 (select
range,
CASE WHEN range<10 THEN 0 ELSE range END,
range + 9999,
from range(10000));
create table medium_2 (id integer);
insert into medium_2 (select range from range(10000));
pragma disabled_optimizers='statistics_propagation';
run
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
)

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/join/external_join_partition_order.benchmark
# description: Test that the external hash join partition selection selects even partitions sequentially
# group: [join]
name External Join Partition Order
group join
cache external_join_partition_order.duckdb
load
create table build as select range c from range(1000e5::bigint);
create table probe as select range c from range(1000e5::bigint);
init
set threads=4;
set temp_directory='${BENCHMARK_DIR}/external_join_partition_order.duckdb.tmp';
set memory_limit='1000mb';
run
select count(*) from probe join build using (c)
result I
100000000

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/join/external_join_partition_selection.benchmark
# description: Test that the external hash join partition selection selects the large partition last
# group: [join]
name External Join Partition Selection
group join
cache external_join_partition_selection.duckdb
load
create table build as select range * 2 c from range(100e5::bigint);
insert into build select 42 c from range(100e5::bigint);
create table probe as select range c from range(1000e5::bigint);
init
set threads=4;
set temp_directory='${BENCHMARK_DIR}/external_join_partition_selection.duckdb.tmp';
set memory_limit='500mb';
run
select count(*) from probe join build using (c)
result I
20000000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/join/hashjoin_dups_rhs.benchmark
# description: Inner hash join using string comparisons with 4x duplicates on the rhs and 4096x duplicates on the lhs
# group: [join]
name Inner Join (dups on rhs)
group join
load
create table t1 as select 'verylargestring' || range % 32768 i from range(131072);
create table t2 as select 'verylargestring' || range % 32768 i from range(134217728);
run
select count(*) from t1 join t2 using (i)
result I
536870912

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/join/hashjoin_highcardinality.benchmark
# description: Hash Join where RHS has high cardinality
# group: [join]
name High Cardinality Join (No Index, Count Only)
group join
load
CREATE TABLE t1 AS SELECT i as v1, i as v2 from range (0,1000) t(i);
CREATE TABLE t2 AS SELECT i as v1, i as v2 from range (0,10000000) t(i);
run
SELECT t1.v2 AS c0,t2.v2 AS c1, count(*) AS c2 from t1 inner join t2 on (t1.v1 = t2.v1) group by t1.v2,t2.v2 order by t1.v2 limit 5
result III
0 0 1
1 1 1
2 2 1
3 3 1
4 4 1

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/join/hashjoin_lhsarithmetic.benchmark
# description: Hash Join where LHS performs case operation
# group: [join]
name Equality Join + Arithmetic (No Index)
group join
load
CREATE TABLE t1 AS SELECT i as v1, i as v2 from range (0,10000) t(i);
CREATE TABLE t2 AS SELECT i as v1, i as v2 from range (0,10000000) t(i);
run
SELECT CASE WHEN t1.v1 > 50 THEN t1.v1+t1.v2 ELSE t1.v1*t1.v2 END FROM t1 JOIN t2 USING (v1);

View File

@@ -0,0 +1,67 @@
# name: benchmark/micro/join/iejoin_employees.benchmark
# description: Range join between integers
# group: [join]
name Range Join
group join
# (1) Employees. A dataset that contains employees salary and tax information [3] with eight attributes:
# state, married, dependents, salary, tax, and three others for notes.
# The relation has been populated with real-life data: tax rates, income brackets, and exemptions
# for each state in the USA have been manually collected to generate synthetic tax records.
# We used the following self-join query to identify anomalies [7]:
# Q1 : SELECT r.id, s.id
# FROM Employees r, Employees s
# WHERE r.salary < s.salary AND r.tax > s.tax;
# The above query returns a set of employee pairs, where one employee earns higher salary than the other but pays less tax.
# To make sure that we generate output for Q1, we selected 10% random rows and increased their tax values.
# Employees2 is a group of larger input datasets with up to 6 Billion records, but with only 0.001% random changes to tax values.
# The higher selectivity is used to test the distributed algorithm on large input files.
load
CREATE TYPE surname_t AS ENUM (
'Smith',
'Johnson',
'Williams',
'Jones',
'Brown',
'Davis',
'Miller',
'Wilson',
'Moore',
'Taylor',
'Anderson',
'Thomas',
'Jackson',
'White',
'Harris',
'Martin',
'Thompson',
'Garcia',
'Martinez',
'Robinson'
);
SELECT SETSEED(0.8675309);
CREATE TABLE employees AS
SELECT
facts.id AS id,
surname AS "name",
dept,
salary,
(salary / 10 - CASE WHEN random() <= 0.01 THEN (10 + 1) ELSE 0 END)::INTEGER AS tax
FROM (
SELECT
id,
enum_range(NULL::surname_t)[(round(random() * 19))::INTEGER] AS surname,
round(random() * 5)::INTEGER AS dept,
100 * id AS salary
FROM (SELECT UNNEST(range(1, 10000000))) tbl(id)
) facts
;
run
SELECT COUNT(*) FROM (
SELECT r.id, s.id
FROM employees r, employees s
WHERE r.salary < s.salary AND r.tax > s.tax
) q1;

View File

@@ -0,0 +1,43 @@
# name: benchmark/micro/join/iejoin_events.benchmark
# description: Range self-join between event dates
# group: [join]
name IEJoin Events
group join
# (2) Events. A synthetic dataset that contains start and end time information for a set of independent events.
# Each event contains the name of the event, event ID, number of attending people, and the sponsor ID.
# We used this dataset with a self-join query that collects pairs of overlapping events:
# Q2 : SELECT r.id, s.id
# FROM Events r, Events s
# WHERE r.start ≤ s.end AND r.end ≥ s.start AND r.id ≠ s.id;
# Again, to make sure we generate output for Q2, we selected 10% random events and extended their end values.
# We also generate Events2 as larger datasets with up to 6 Billion records, but with 0.001% extended random events.
load
SELECT SETSEED(0.8675309);
CREATE TABLE events AS (
SELECT *,
"start" + INTERVAL (CASE WHEN random() < 0.1 THEN 120 ELSE (5 + round(random() * 50, 0)::BIGINT) END) MINUTE
AS "end"
FROM (
SELECT id,
'Event ' || id::VARCHAR as "name",
(5 + round(random() * 5000, 0)::BIGINT) AS audience,
'1992-01-01'::TIMESTAMP
+ INTERVAL (round(random() * 40 * 365, 0)::BIGINT) DAY
+ INTERVAL (round(random() * 23, 0)::BIGINT) HOUR
AS "start",
'Sponsor ' || (1 + round(random() * 10, 0)::BIGINT) AS sponsor
FROM range(1, 30000) tbl(id)
) q
);
run
SELECT COUNT(*) FROM (
SELECT r.id, s.id
FROM events r, events s
WHERE r.start <= s.end AND r.end >= s.start
AND r.id <> s.id
) q2;

View File

@@ -0,0 +1,29 @@
# name: benchmark/micro/join/iejoin_selectivity.benchmark
# description: Mixed predicate where range is faster
# group: [join]
name IEJoin Selectivity
group join
load
SELECT SETSEED(0.8675309);
PRAGMA prefer_range_joins=False;
CREATE TABLE df AS
SELECT
(random() * 1000)::INTEGER + 1 as id,
(random() * 10)::INTEGER + 1 as id2,
(random() * 500)::INTEGER + 1 as id3,
random() as value,
FROM range(5000000);
run
SELECT id2, id3, id3_right, corr(value, value_right) as value
FROM (
SELECT df.*, df2.id3 as id3_right, df2.value as value_right
FROM df JOIN df as df2
ON (df.id = df2.id
AND df.id2 = df2.id2
AND df.id3 > df2.id3
AND df.id3 < df2.id3 + 30)
) tbl
GROUP BY ALL

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/join/join_from_parquet.benchmark
# description: Join between two parquet files. We want to put the smaller parquet file on the build side
# group: [join]
name Right Outer Join (big LHS, small RHS)
group join
require tpch
cache tpch_sf1_join_from_parquet.duckdb
load
call dbgen(sf=1);
COPY lineitem TO 'lineitem.parquet';
COPY (SELECT 42) TO 'singlerow.parquet';
run
select * from 'singlerow.parquet', 'lineitem.parquet';

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/join/join_order_optimizer_should_respect_limit.benchmark
# description: If a constant value limit operator exists, is should have influence on the estimated cardinality
# group: [join]
name join limit
group join
load
create table t_left as select (random() * 1000000000)::INT a from range(400000);
create table t_right as select range b from range(1000000000);
run
select * from t_left, (select * from t_right limit 10000) where a = b;

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/join/left_outer_join_right_big.benchmark
# description: LEFT OUTER JOIN between two tables, where the right table is significantly bigger than the LEFT table
# group: [join]
name LEFT OUTER JOIN (big RHS, small LHS)
group join
load
SELECT setseed(0.4);
CREATE TABLE small_table ( pkey integer, c0 char(2), c_1k integer, c_10k integer, c_100k integer, c_1m integer, c_10m integer, c_100m integer );
INSERT INTO small_table
SELECT i, concat('A',mod(i,2)), (random()* 1000)::int, (random()* 10000)::int, (random()* 100000)::int,
(random()* 1000000)::int, (random()* 10000000)::int, (random()* 100000000)::int FROM range(0,10000) tbl(i) ;
CREATE TABLE big_table ( pkey integer, c0 char(2), c_1k integer, c_10k integer, c_100k integer, c_1m integer, c_10m integer, c_100m integer);
INSERT INTO big_table
SELECT i, concat('A',mod(i,2)), (random()* 1000)::int, (random()* 10000)::int, (random()* 100000)::int,
(random()* 1000000)::int, (random()* 10000000)::int, (random()* 100000000)::int FROM range(0,100000000) tbl(i);
run
SELECT count(*) FROM small_table d LEFT OUTER JOIN big_table f ON ( d.pkey=f.c_10k);
result I
99995018

View File

@@ -0,0 +1,39 @@
# name: benchmark/micro/join/many_inner_joins.benchmark
# description: Optimizer benchmark to ensure joins are not over-optimized
# group: [join]
group join
subgroup optimizer
load
CREATE TABLE MainTable (Id INT,
Value1_Id INT, Value2_Id INT, Value3_Id INT, Value4_Id INT, Value5_Id INT,
Value6_Id INT, Value7_Id INT, Value8_Id INT, Value9_Id INT, Value10_Id INT,
Value11_Id INT, Value12_Id INT, Value13_Id INT, Value14_Id INT, Value15_Id INT,
Value16_Id INT, Value17_Id INT, Value18_Id INT, Value19_Id INT, Value20_Id INT);
CREATE TABLE ValueTable (Id INT, Value TEXT);
run
SELECT T.Id FROM MainTable T
INNER JOIN ValueTable T1 ON T.Value1_Id = T1.Id
INNER JOIN ValueTable T2 ON T.Value2_Id = T2.Id
INNER JOIN ValueTable T3 ON T.Value3_Id = T3.Id
INNER JOIN ValueTable T4 ON T.Value4_Id = T4.Id
INNER JOIN ValueTable T5 ON T.Value5_Id = T5.Id
INNER JOIN ValueTable T6 ON T.Value6_Id = T6.Id
INNER JOIN ValueTable T7 ON T.Value7_Id = T7.Id
INNER JOIN ValueTable T8 ON T.Value8_Id = T8.Id
INNER JOIN ValueTable T9 ON T.Value9_Id = T9.Id
INNER JOIN ValueTable T10 ON T.Value10_Id = T10.Id
INNER JOIN ValueTable T11 ON T.Value11_Id = T11.Id
INNER JOIN ValueTable T12 ON T.Value12_Id = T12.Id
INNER JOIN ValueTable T13 ON T.Value13_Id = T13.Id
INNER JOIN ValueTable T14 ON T.Value14_Id = T14.Id
INNER JOIN ValueTable T15 ON T.Value15_Id = T15.Id
INNER JOIN ValueTable T16 ON T.Value16_Id = T16.Id
INNER JOIN ValueTable T17 ON T.Value17_Id = T17.Id
INNER JOIN ValueTable T18 ON T.Value18_Id = T18.Id
INNER JOIN ValueTable T19 ON T.Value16_Id = T19.Id
INNER JOIN ValueTable T20 ON T.Value20_Id = T20.Id;
result I

View File

@@ -0,0 +1,115 @@
# name: benchmark/micro/join/many_left_joins.benchmark
# description: Optimizer benchmark to ensure joins are not over-optimized
# group: [join]
group micro
subgroup join
load
CREATE TABLE t1(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, info VARCHAR);
CREATE TABLE t10(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t11(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t12(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t13(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t14(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t15(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t16(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t17(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t18(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t19(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t2(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t20(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t21(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t22(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t23(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t24(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t25(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t26(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t27(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t28(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t29(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t3(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t30(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t31(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t32(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t33(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t34(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t35(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t36(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t37(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t38(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t39(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t4(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t40(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t41(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t42(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t43(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t44(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t45(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t46(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t47(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t48(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t5(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t6(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t7(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t8(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
CREATE TABLE t9(c1 INTEGER NOT NULL, c2 INTEGER, c3 INTEGER, c4 INTEGER, c5 INTEGER, c6 INTEGER, c7 INTEGER, c8 INTEGER, c9 INTEGER, info VARCHAR);
# If operators are unnecessarily re-optimized, this query will take forever and most likely timeout the test-runner
run
select t1.c2 from t1
inner join t2 on (t1.c2=t2.c1)
left join t4 on (t1.c3=t4.c1)
left join t5 on (t4.c2=t5.c1)
left join t6 on (t5.c2=t6.c1)
left join t7 on (t6.c2=t7.c1)
left join t8 on (t7.c2=t8.c1)
left join t9 on (t8.c2=t9.c1)
left join t10 on (t9.c2=t10.c1)
left join t11 on (t10.c2=t11.c1)
left join t12 on (t11.c2=t12.c1)
left join t13 on (t12.c2=t13.c1)
left join t14 on (t13.c2=t14.c1)
left join t15 on (t14.c2=t15.c1)
left join t16 on (t15.c2=t16.c1)
left join t17 on (t16.c2=t17.c1)
left join t18 on (t17.c2=t18.c1)
left join t19 on (t18.c2=t19.c1)
left join t20 on (t19.c2=t20.c1)
left join t21 on (t20.c2=t21.c1)
left join t22 on (t21.c2=t22.c1)
left join t23 on (t22.c2=t23.c1)
left join t24 on (t23.c2=t24.c1)
left join t25 on (t24.c2=t25.c1)
left join t26 on (t25.c2=t26.c1)
left join t27 on (t26.c2=t27.c1)
left join t28 on (t27.c2=t28.c1)
left join t29 on (t28.c2=t29.c1)
left join t30 on (t29.c2=t30.c1)
left join t31 on (t30.c2=t31.c1)
left join t32 on (t31.c2=t32.c1)
left join t33 on (t32.c2=t33.c1)
left join t34 on (t33.c2=t34.c1)
left join t35 on (t34.c2=t35.c1)
left join t36 on (t35.c2=t36.c1)
left join t37 on (t36.c2=t37.c1)
left join t38 on (t37.c2=t38.c1)
left join t39 on (t38.c2=t39.c1)
left join t3 ttttt3 on (ttttt3.c6=t33.c5)
left join t40 on (t39.c2=t40.c1)
left join t41 on (t40.c2=t41.c1)
left join t42 on (t41.c2=t42.c1)
left join t43 on (t42.c2=t43.c1)
left join t44 on (t43.c2=t44.c1)
left join t45 on (t44.c2=t45.c1)
left join t46 on (t45.c2=t46.c1)
left join t47 on (t46.c2=t47.c1)
left join t48 on (t47.c2=t48.c1)
left join t5 ttt5 on (t42.c1=ttt5.c3)
left join t5 ttt6 on (ttt5.c4=ttt6.c5)
left join t5 ttt7 on (ttt6.c6=ttt7.c7)
inner join t3 on (t2.c2=t3.c1)
left join t4 tt4 on (t47.c1=tt4.c3)
left join t4 tt5 on (tt4.c4=tt5.c5);
result I

View File

@@ -0,0 +1,35 @@
# name: benchmark/micro/join/nary_positional_join.benchmark
# description: Range self-join between random numbers
# group: [join]
name N-Ary Positional Join
group join
require tpch
cache tpch_sf1_positional.duckdb
load
CALL dbgen(sf=1, suffix='_positional');
CREATE VIEW lineitem AS (SELECT * FROM
(SELECT l_returnflag FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_linestatus FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_quantity FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_extendedprice FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_discount FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_tax FROM lineitem_positional)
POSITIONAL JOIN
(SELECT l_shipdate FROM lineitem_positional)
)
run
PRAGMA tpch(1)
result extension/tpch/dbgen/answers/sf1/q01.csv
#<FILE>:extension/tpch/dbgen/answers/sf1/q01.csv

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/join/positional_join.benchmark
# description: Range self-join between random numbers
# group: [join]
name Positional Join
group join
load
SELECT SETSEED(0.8675309);
CREATE TABLE df1 AS
SELECT round(random()*100)::INTEGER as df1_0
FROM range(0, 10000000)
;
CREATE TABLE df2 AS
SELECT round(random()*100)::INTEGER as df2_0
FROM range(0, 10000000)
;
run
SELECT COUNT(*), SUM(df1_0), SUM(df2_0),
FROM df1 POSITIONAL JOIN df2;
result III
10000000 499831718 499971590

View File

@@ -0,0 +1,11 @@
# name: benchmark/micro/join/prefer_right_deep_join_trees.benchmark
# description: Prefer right deep join trees, so we can rebuild the same tables.
# group: [join]
load
create or replace table t1 as select range a from range(48183280);
create or replace table t2 as select range b from range(48183280);
create or replace table t3 as select range c from range(2000000);
run
select * from t1 RIGHT JOIN (select * from t2 RIGHT JOIN t3 on b = c) ON a = b;

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/join/range_join.benchmark
# description: Range join between integers
# group: [join]
name Range Join
group join
load
CREATE TABLE integers AS SELECT ((i * 9582398353) % 1000)::INTEGER AS i, ((i * 847892347987) % 100)::INTEGER AS j FROM range(0, 100000) tbl(i);
run
SELECT COUNT(*) FROM integers a, integers b WHERE (a.i // 1000) > b.j ORDER BY 1;
result I
0

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/join/range_join_big_result.benchmark
# description: Range join between integers that has many result
# group: [join]
name Range Join with big result
group join
load
CREATE TABLE integers AS SELECT ((i * 9582398353) % 1000)::INTEGER AS i, ((i * 847892347987) % 100)::INTEGER AS j FROM range(0, 50000) tbl(i);
run
SELECT COUNT(*) FROM integers a, integers b WHERE (a.i // 1000) < b.j ORDER BY 1;
result I
2475000000

View File

@@ -0,0 +1,26 @@
# name: benchmark/micro/join/range_join_small_rhs.benchmark
# description: Range Join with a small RHS
# group: [join]
name Range Join with a small RHS
group join
load
CREATE TABLE trip_data(fare_amount DOUBLE, trip_distance DOUBLE);
INSERT INTO trip_data SELECT ((i * 458736436.734) % 100.0::DOUBLE), ((i * 745986489.963) % 1000.0::DOUBLE) FROM range(100000000) t(i);
run
SELECT COUNT(*)
FROM
trip_data,
(
SELECT
AVG(fare_amount) + 3 * STDDEV_SAMP(fare_amount) as max_fare,
AVG(trip_distance) + 3 * STDDEV_SAMP(trip_distance) as max_distance
FROM trip_data
) AS sub
WHERE
fare_amount > 0 AND
fare_amount < sub.max_fare AND
trip_distance > 0 AND
trip_distance < sub.max_distance

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/join/right_outer_join_left_big.benchmark
# description: RIGHT OUTER JOIN between two tables, where the left table is significantly bigger than the right table
# group: [join]
name Right Outer Join (big LHS, small RHS)
group join
load
SELECT setseed(0.4);
CREATE TABLE small_table ( pkey integer, c0 char(2), c_1k integer, c_10k integer, c_100k integer, c_1m integer, c_10m integer, c_100m integer );
INSERT INTO small_table
SELECT i, concat('A',mod(i,2)), (random()* 1000)::int, (random()* 10000)::int, (random()* 100000)::int,
(random()* 1000000)::int, (random()* 10000000)::int, (random()* 100000000)::int FROM range(0,10000) tbl(i) ;
CREATE TABLE big_table ( pkey integer, c0 char(2), c_1k integer, c_10k integer, c_100k integer, c_1m integer, c_10m integer, c_100m integer);
INSERT INTO big_table
SELECT i, concat('A',mod(i,2)), (random()* 1000)::int, (random()* 10000)::int, (random()* 100000)::int,
(random()* 1000000)::int, (random()* 10000000)::int, (random()* 100000000)::int FROM range(0,100000000) tbl(i);
run
SELECT count(*) FROM big_table f RIGHT OUTER JOIN small_table d ON ( d.pkey=f.c_10k);
result I
99995018

View File

@@ -0,0 +1,46 @@
# name: benchmark/micro/join/skinny_probe_wide_build.benchmark
# description: RIGHT OUTER JOIN between two tables, where the left table is significantly bigger than the right table
# group: [join]
load
create table skinny as select range a, range b, range c from range(1000000);
create table wide as select
range pk,
(range::VARCHAR || '1111') a,
(range::VARCHAR || '2222') b,
(range::VARCHAR || '3333') c,
(range::VARCHAR || '4444') d,
(range::VARCHAR || '5555') e,
(range::VARCHAR || '6666') f,
(range::VARCHAR || '7777') g,
(range::VARCHAR || '8888') h,
(range::VARCHAR || '9999') i,
(range::VARCHAR || '0000') j,
(range::VARCHAR || '0011') k,
(range::VARCHAR || '0022') l,
(range::VARCHAR || '0033') m,
(range::VARCHAR || '9999') o,
(range::VARCHAR || '0000') p,
(range::VARCHAR || '0011') q,
(range::VARCHAR || '0022') u,
(range::VARCHAR || '0033') r,
(range::VARCHAR || '9999') s,
(range::VARCHAR || '0000') t,
(range::VARCHAR || '0011') w,
(range::VARCHAR || '0022') y,
(range::VARCHAR || '0033') z,
(range::VARCHAR || '9999') aa,
(range::VARCHAR || '0000') bb,
(range::VARCHAR || '0011') cc,
(range::VARCHAR || '0022') dd,
(range::VARCHAR || '0033') ee,
(range::VARCHAR || '9999') ff,
(range::VARCHAR || '0000') gg,
(range::VARCHAR || '0011') hh,
(range::VARCHAR || '0022') ii,
(range::VARCHAR || '0033') jj,
(range::VARCHAR || '0044') kk from range(300000);
# wide should be the probe side, skinny should be on the build side
run
select * from wide w, skinny s where w.pk=s.a;