101 lines
2.5 KiB
Plaintext
101 lines
2.5 KiB
Plaintext
# name: test/db-benchmark/join.test_slow
|
|
# description: Join benchmark (0.5GB - small dataset) from h2oai db-benchmark (https://github.com/h2oai/db-benchmark)
|
|
# group: [db-benchmark]
|
|
|
|
require httpfs
|
|
|
|
statement ok
|
|
pragma threads=16
|
|
|
|
statement ok
|
|
CREATE TABLE x AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_NA_0_0.csv.gz');
|
|
|
|
statement ok
|
|
CREATE TABLE small AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e1_0_0.csv.gz');
|
|
|
|
statement ok
|
|
CREATE TABLE medium AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e4_0_0.csv.gz');
|
|
|
|
statement ok
|
|
CREATE TABLE big AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e7_0_0.csv.gz');
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM x;
|
|
----
|
|
10000000
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM small;
|
|
----
|
|
10
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM medium;
|
|
----
|
|
10000
|
|
|
|
query I
|
|
SELECT COUNT(*) FROM big;
|
|
----
|
|
10000000
|
|
|
|
# q1
|
|
statement ok
|
|
CREATE TABLE ans AS SELECT x.*, small.id4 AS small_id4, v2 FROM x JOIN small USING (id1);
|
|
|
|
query III
|
|
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
|
|
----
|
|
8998860 450015153.57734203 347720187.39596415
|
|
|
|
statement ok
|
|
DROP TABLE ans;
|
|
|
|
# q2
|
|
statement ok
|
|
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x JOIN medium USING (id2);
|
|
|
|
query III
|
|
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
|
|
----
|
|
8998412 449954076.0263213 449999844.93746006
|
|
|
|
statement ok
|
|
DROP TABLE ans;
|
|
|
|
# q3
|
|
statement ok
|
|
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x LEFT JOIN medium USING (id2);
|
|
|
|
query III
|
|
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
|
|
----
|
|
10000000 500043740.7523774 449999844.93746
|
|
|
|
statement ok
|
|
DROP TABLE ans;
|
|
|
|
# q4
|
|
statement ok
|
|
CREATE TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id2 AS medium_id2, medium.id4 AS medium_id4, v2 FROM x JOIN medium USING (id5);
|
|
|
|
query III
|
|
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
|
|
----
|
|
8998412 449954076.02631813 449999844.93746257
|
|
|
|
statement ok
|
|
DROP TABLE ans;
|
|
|
|
# q5
|
|
statement ok
|
|
CREATE TABLE ans AS SELECT x.*, big.id1 AS big_id1, big.id2 AS big_id2, big.id4 AS big_id4, big.id5 AS big_id5, big.id6 AS big_id6, v2 FROM x JOIN big USING (id3);
|
|
|
|
query III
|
|
SELECT COUNT(*), SUM(v1) AS v1, SUM(v2) AS v2 FROM ans;
|
|
----
|
|
9000000 450032091.8405316 449860428.6155452
|
|
|
|
statement ok
|
|
DROP TABLE ans;
|