should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
# name: ${FILE_PATH}
# description: ${DESCRIPTION}
# group: [h2oai]
require httpfs load_only
name Q${QUERY_NUMBER_PADDED}
group H2OAI
cache h2oai.duckdb
retry load 5
load benchmark/h2oai/group/queries/load.sql
run benchmark/h2oai/group/queries/q${QUERY_NUMBER_PADDED}.sql
result_query ${RESULT_COLUMNS}
${RESULT_QUERY}
----
${RESULT_ANSWER}
cleanup
DROP TABLE ans

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q01.benchmark
# description: Run query 01 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=1
QUERY_NUMBER_PADDED=01
RESULT_COLUMNS=III
RESULT_QUERY=SELECT COUNT(DISTINCT id1), SUM(v1), COUNT(*) FROM ans
RESULT_ANSWER=95 28498857 96

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q02.benchmark
# description: Run query 02 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=2
QUERY_NUMBER_PADDED=02
RESULT_COLUMNS=IIII
RESULT_QUERY=SELECT COUNT(DISTINCT id1), COUNT(DISTINCT id2), SUM(v1), COUNT(*) FROM ans
RESULT_ANSWER=95 95 28498857 9216

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q03.benchmark
# description: Run query 03 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=3
QUERY_NUMBER_PADDED=03
RESULT_COLUMNS=IIII
RESULT_QUERY=SELECT COUNT(DISTINCT id3), SUM(v1), SUM(v3), COUNT(*) FROM ans
RESULT_ANSWER=95000 28498857 4749467.631946747 95001

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q04.benchmark
# description: Run query 04 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=4
QUERY_NUMBER_PADDED=04
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT id4), SUM(v1), SUM(v2), SUM(v3), COUNT(*) FROM ans
RESULT_ANSWER=95 287.9894309270616821 767.8529216923457105 4799.873270453372 96

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q05.benchmark
# description: Run query 05 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=5
QUERY_NUMBER_PADDED=05
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT id6), SUM(v1), SUM(v2), SUM(v3), COUNT(*) FROM ans
RESULT_ANSWER=95000 28498857 75988394 474969574.04777884 95001

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q06.benchmark
# description: Run query 06 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=6
QUERY_NUMBER_PADDED=06
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT id4), COUNT(DISTINCT id5), SUM(median_v3), SUM(sd_v3), COUNT(*) FROM ans
RESULT_ANSWER=95 95 460771.2164439997 266006.9046221105 9216

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q07.benchmark
# description: Run query 07 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=7
QUERY_NUMBER_PADDED=07
RESULT_COLUMNS=III
RESULT_QUERY=SELECT COUNT(DISTINCT id3), SUM(range_v1_v2), COUNT(*) FROM ans
RESULT_ANSWER=95000 379850 95001

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q08.benchmark
# description: Run query 08 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=8
QUERY_NUMBER_PADDED=08
RESULT_COLUMNS=III
RESULT_QUERY=SELECT COUNT(DISTINCT id6), SUM(largest2_v3), COUNT(*) FROM ans;
RESULT_ANSWER=95000 18700554.77963206 190002

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q09.benchmark
# description: Run query 09 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=9
QUERY_NUMBER_PADDED=09
RESULT_COLUMNS=IIII
RESULT_QUERY=SELECT COUNT(DISTINCT id2), COUNT(DISTINCT id4), SUM(r2), COUNT(*) FROM ans;
RESULT_ANSWER=95 95 9.940515516534347 9216

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/group/q10.benchmark
# description: Run query 10 from the H2OAI benchmark
# group: [group]
template benchmark/h2oai/group/h2oai.benchmark.in
QUERY_NUMBER=10
QUERY_NUMBER_PADDED=10
RESULT_COLUMNS=IIIIIIIII
RESULT_QUERY=SELECT COUNT(DISTINCT id1), COUNT(DISTINCT id2), COUNT(DISTINCT id3), COUNT(DISTINCT id4), COUNT(DISTINCT id5), COUNT(DISTINCT id6), SUM(v3), SUM(count), COUNT(*) FROM ans;
RESULT_ANSWER=95 95 95000 95 95 95000 474969574.04781127 10000000 9999993

View File

@@ -0,0 +1 @@
CREATE TABLE x_group AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/G1_1e7_1e2_5_0.csv.gz');

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id1, sum(v1) AS v1 FROM x_group GROUP BY id1;

View File

@@ -0,0 +1,2 @@
CREATE TEMP TABLE ans AS SELECT id1, id2, sum(v1) AS v1 FROM x_group GROUP BY id1, id2;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM x_group GROUP BY id3;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM x_group GROUP BY id4;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM x_group GROUP BY id6;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id4, id5, quantile_cont(v3, 0.5) AS median_v3, stddev(v3) AS sd_v3 FROM x_group GROUP BY id4, id5;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id3, max(v1)-min(v2) AS range_v1_v2 FROM x_group GROUP BY id3;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id6, v3 AS largest2_v3 FROM (SELECT id6, v3, row_number() OVER (PARTITION BY id6 ORDER BY v3 DESC) AS order_v3 FROM x_group WHERE v3 IS NOT NULL) sub_query WHERE order_v3 <= 2

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM x_group GROUP BY id2, id4;

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count(*) AS count FROM x_group GROUP BY id1, id2, id3, id4, id5, id6;

View File

@@ -0,0 +1,26 @@
# name: ${FILE_PATH}
# description: ${DESCRIPTION}
# group: [h2oaijoin]
require httpfs load_only
name Q${QUERY_NUMBER_PADDED}
group H2OAI Join
storage persistent
cache h2oaijoin.duckdb
retry load 5
load benchmark/h2oai/join/queries/load.sql
run benchmark/h2oai/join/queries/q${QUERY_NUMBER_PADDED}.sql
result_query ${RESULT_COLUMNS}
${RESULT_QUERY}
----
${RESULT_ANSWER}
cleanup
DROP TABLE ans

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q01.benchmark
# description: Run query 01 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=1
QUERY_NUMBER_PADDED=01
RESULT_COLUMNS=III
RESULT_QUERY=SELECT COUNT(DISTINCT id4), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=9 347720187.3928393 8998860

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q02.benchmark
# description: Run query 02 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=2
QUERY_NUMBER_PADDED=02
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id4), COUNT(DISTINCT medium_id5), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10 9000 449999844.93783 8998412

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q03.benchmark
# description: Run query 03 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=3
QUERY_NUMBER_PADDED=03
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id4), COUNT(DISTINCT medium_id5), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10 9000 449999844.9375197 10000000

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q04.benchmark
# description: Run query 04 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=4
QUERY_NUMBER_PADDED=04
RESULT_COLUMNS=IIIII
RESULT_QUERY=SELECT COUNT(DISTINCT medium_id1), COUNT(DISTINCT medium_id2), COUNT(DISTINCT medium_id4), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 9000 10 449999844.9375197 8998412

View File

@@ -0,0 +1,10 @@
# name: benchmark/h2oai/join/q05.benchmark
# description: Run query 05 from the H2OAI join benchmark
# group: [join]
template benchmark/h2oai/join/h2oai.benchmark.in
QUERY_NUMBER=5
QUERY_NUMBER_PADDED=05
RESULT_COLUMNS=IIIIIII
RESULT_QUERY=SELECT COUNT(DISTINCT big_id1), COUNT(DISTINCT big_id2), COUNT(DISTINCT big_id4), COUNT(DISTINCT big_id5), COUNT(DISTINCT big_id6), SUM(v2), COUNT(*) FROM ans
RESULT_ANSWER=10 10000 10 10000 9000000 449860428.61554617 9000000

View File

@@ -0,0 +1,4 @@
CREATE TABLE IF NOT EXISTS x AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_NA_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS small AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e1_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS medium AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e4_0_0.csv.gz');
CREATE TABLE IF NOT EXISTS big AS SELECT * FROM read_csv_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/J1_1e7_1e7_0_0.csv.gz');

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, small.id4 AS small_id4, v2 FROM x JOIN small USING (id1);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x JOIN medium USING (id2);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id4 AS medium_id4, medium.id5 AS medium_id5, v2 FROM x LEFT JOIN medium USING (id2);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, medium.id1 AS medium_id1, medium.id2 AS medium_id2, medium.id4 AS medium_id4, v2 FROM x JOIN medium USING (id5);

View File

@@ -0,0 +1 @@
CREATE TEMP TABLE ans AS SELECT x.*, big.id1 AS big_id1, big.id2 AS big_id2, big.id4 AS big_id4, big.id5 AS big_id5, big.id6 AS big_id6, v2 FROM x JOIN big USING (id3);