Files
email-tracker/external/duckdb/test/db-benchmark/join.test_slow
2025-10-24 19:21:19 -05:00

101 lines
2.5 KiB
Plaintext

# name: test/db-benchmark/join.test_slow
# description: Join benchmark (0.5GB - small dataset) from h2oai db-benchmark (https://github.com/h2oai/db-benchmark)
# group: [db-benchmark]
require httpfs
statement ok
pragma threads=16
statement ok
CREATE TABLE x AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_NA_0_0.csv.gz');
statement ok
CREATE TABLE small AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e1_0_0.csv.gz');
statement ok
CREATE TABLE medium AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e4_0_0.csv.gz');
statement ok
CREATE TABLE big AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e7_0_0.csv.gz');
query I
SELECT COUNT(*) FROM x;
----
10000000
query I
SELECT COUNT(*) FROM small;
----
10
query I
SELECT COUNT(*) FROM medium;
----
10000
query I
SELECT COUNT(*) FROM big;
----
10000000
# q1
statement ok
CREATE TABLE ans AS SELECT x.*, small.id4 AS small_id4, v2 FROM x JOIN small USING (id1);
query III
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
----
8998860 450015153.57734203 347720187.39596415
statement ok
DROP TABLE ans;
# q2
statement ok
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x JOIN medium USING (id2);
query III
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
----
8998412 449954076.0263213 449999844.93746006
statement ok
DROP TABLE ans;
# q3
statement ok
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x LEFT JOIN medium USING (id2);
query III
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
----
10000000 500043740.7523774 449999844.93746
statement ok
DROP TABLE ans;
# q4
statement ok
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id2 AS medium_id2, medium.id4 AS medium_id4, v2 FROM x JOIN medium USING (id5);
query III
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
----
8998412 449954076.02631813 449999844.93746257
statement ok
DROP TABLE ans;
# q5
statement ok
CREATE TABLE ans AS SELECT x.*, big.id1 AS big_id1, big.id2 AS big_id2, big.id4 AS big_id4, big.id5 AS big_id5, big.id6 AS big_id6, v2 FROM x JOIN big USING (id3);
query III
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
----
9000000 450032091.8405316 449860428.6155452
statement ok
DROP TABLE ans;