should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,26 @@
# name: ${FILE_PATH}
# description: ${DESCRIPTION}
# group: [h2oaijoin]
require httpfs load_only
name Q${QUERY_NUMBER_PADDED}
group H2OAI Join
storage persistent
cache h2oaijoin.duckdb
retry load 5
load benchmark/h2oai/join/queries/load.sql
run benchmark/h2oai/join/queries/q${QUERY_NUMBER_PADDED}.sql
result_query ${RESULT_COLUMNS}
${RESULT_QUERY}
----
${RESULT_ANSWER}
cleanup
DROP TABLE ans

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q01.benchmark
# description: Run query 01 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=1
QUERY_NUMBER_PADDED=01
RESULT_COLUMNS=III
RESULT_QUERY=SELECT COUNT(DISTINCT id4), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=9 347720187.3928393 8998860

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q02.benchmark
# description: Run query 02 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=2
QUERY_NUMBER_PADDED=02
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id4), COUNT(DISTINCT medium_id5), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10 9000 449999844.93783 8998412

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q03.benchmark
# description: Run query 03 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=3
QUERY_NUMBER_PADDED=03
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id4), COUNT(DISTINCT medium_id5), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10 9000 449999844.9375197 10000000

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q04.benchmark
# description: Run query 04 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=4
QUERY_NUMBER_PADDED=04
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id2), COUNT(DISTINCT medium_id4), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 9000 10 449999844.9375197 8998412

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q05.benchmark
# description: Run query 05 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=5
QUERY_NUMBER_PADDED=05
RESULT_COLUMNS=IIIIIII
RESULT_QUERY=SELECT COUNT(DISTINCT big_id1), COUNT(DISTINCT big_id2), COUNT(DISTINCT big_id4), COUNT(DISTINCT big_id5), COUNT(DISTINCT big_id6), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10000 10 10000 9000000 449860428.61554617 9000000

View File

@@ -0,0 +1,4 @@
CREATE TABLE IF NOT EXISTS x AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_NA_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS small AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e1_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS medium AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e4_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS big AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e7_0_0.csv.gz');

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, small.id4 AS small_id4, v2 FROM x JOIN small USING (id1);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x JOIN medium USING (id2);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x LEFT JOIN medium USING (id2);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id2 AS medium_id2, medium.id4 AS medium_id4, v2 FROM x JOIN medium USING (id5);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, big.id1 AS big_id1, big.id2 AS big_id2, big.id4 AS big_id4, big.id5 AS big_id5, big.id6 AS big_id6, v2 FROM x JOIN big USING (id3);