should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,23 @@
# name: benchmark/tpch/parquet/parquet_load.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (In-Memory)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1);
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem.parquet';
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,25 @@
# name: benchmark/tpch/parquet/parquet_load_encrypted.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (In-Memory)
group parquet
subgroup tpch
require parquet
require tpch
require httpfs
load
CALL dbgen(sf=1);
PRAGMA add_parquet_key('key128', '0123456789112345');
COPY lineitem TO '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key128'});
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key128'});
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,24 @@
# name: benchmark/tpch/parquet/parquet_load_no_order.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (In-Memory, Unordered)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1);
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
SET preserve_insertion_order=false;
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem.parquet';
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,25 @@
# name: benchmark/tpch/parquet/parquet_load_persistent.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (Persistent)
group parquet
subgroup tpch
require parquet
require tpch
storage persistent
load
CALL dbgen(sf=1);
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem.parquet';
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,26 @@
# name: benchmark/tpch/parquet/parquet_load_persistent_no_order.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (Persistent, Unordered)
group parquet
subgroup tpch
require parquet
require tpch
storage persistent
load
CALL dbgen(sf=1);
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
SET preserve_insertion_order=false;
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem.parquet';
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,24 @@
# name: benchmark/tpch/parquet/parquet_load_small_row_groups.benchmark
# description: Import data from Parquet
# group: [parquet]
name Parquet Data Import (Small Row Groups)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1);
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET, ROW_GROUP_SIZE 5000);
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);
run
COPY lineitem FROM '${BENCHMARK_DIR}/lineitem.parquet';
cleanup
DROP TABLE lineitem;
CREATE TABLE lineitem(l_orderkey INTEGER NOT NULL, l_partkey INTEGER NOT NULL, l_suppkey INTEGER NOT NULL, l_linenumber INTEGER NOT NULL, l_quantity INTEGER NOT NULL, l_extendedprice DECIMAL(15,2) NOT NULL, l_discount DECIMAL(15,2) NOT NULL, l_tax DECIMAL(15,2) NOT NULL, l_returnflag VARCHAR NOT NULL, l_linestatus VARCHAR NOT NULL, l_shipdate DATE NOT NULL, l_commitdate DATE NOT NULL, l_receiptdate DATE NOT NULL, l_shipinstruct VARCHAR NOT NULL, l_shipmode VARCHAR NOT NULL, l_comment VARCHAR NOT NULL);

View File

@@ -0,0 +1,45 @@
# name: benchmark/tpch/parquet/parquet_mixed_struct_projection.benchmark
# description: Execute a projection over lineitem stored in several parquet files with mixed structs
# group: [parquet]
name Q1 (Parquet, Mixed Structs)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1, suffix='_normal');
COPY (
SELECT subq AS lineitem
FROM (
SELECT l_suppkey, l_linestatus, l_commitdate, l_receiptdate, l_comment, l_extendedprice, l_tax, l_partkey, l_shipdate, l_linenumber, l_shipinstruct, l_shipmode, l_discount, l_orderkey, l_returnflag, l_quantity
FROM lineitem_normal
LIMIT 1000000) subq
)
TO '${BENCHMARK_DIR}/lineitem_mixed_struct1.parquet';
COPY (
SELECT subq AS lineitem
FROM (
SELECT l_linenumber, l_tax, l_orderkey, l_discount, l_linestatus, l_quantity, l_shipmode, l_returnflag, l_receiptdate, l_partkey, l_shipdate, l_suppkey, l_commitdate, l_extendedprice, l_comment, l_shipinstruct
FROM lineitem_normal
LIMIT 2000000
OFFSET 1000000) subq
)
TO '${BENCHMARK_DIR}/lineitem_mixed_struct2.parquet';
COPY (
SELECT subq AS lineitem
FROM (
SELECT l_extendedprice, l_linenumber, l_shipdate, l_comment, l_shipmode, l_orderkey, l_partkey, l_shipinstruct, l_commitdate, l_returnflag, l_quantity, l_tax, l_linestatus, l_receiptdate, l_discount, l_suppkey
FROM lineitem_normal
OFFSET 3000000) subq
)
TO '${BENCHMARK_DIR}/lineitem_mixed_struct3.parquet';
CREATE VIEW lineitem AS SELECT UNNEST(lineitem) FROM '${BENCHMARK_DIR}/lineitem_mixed_struct*.parquet';
run
SELECT MAX(l_linenumber) FROM lineitem
result I
7

View File

@@ -0,0 +1,20 @@
# name: benchmark/tpch/parquet/parquet_projection_direct.benchmark
# description: Execute a simple aggregate + projection over lineitem to test projection pushdown
# group: [parquet]
name Lineitem Projection Pushdown (Direct)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
run
SELECT SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) FROM read_parquet('${BENCHMARK_DIR}/lineitem.parquet');
result I
226829357828.867781

View File

@@ -0,0 +1,15 @@
# name: benchmark/tpch/parquet/parquet_projection_view.benchmark
# description: Execute aggregate + projection over lineitem with a view on top to test projection pushdown
# group: [parquet]
include benchmark/tpch/tpch_load_parquet.benchmark.in
name Lineitem Projection Pushdown (View)
group parquet
subgroup tpch
run
SELECT SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) FROM lineitem
result I
226829357828.867781

View File

@@ -0,0 +1,23 @@
# name: benchmark/tpch/parquet/parquet_q1_encrypted.benchmark
# description: Execute Q1 over lineitem stored in a parquet file
# group: [parquet]
name Q1 (Parquet)
group parquet
subgroup tpch
# if httpfs is required, en/decryption is 2x as fast
require httpfs
require parquet
require tpch
load
CALL dbgen(sf=1, suffix='_normal');
PRAGMA add_parquet_key('key256', '01234567891123450123456789112345');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem_encrypted.parquet' (ENCRYPTION_CONFIG {footer_key: 'key256'});
CREATE VIEW lineitem AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem_encrypted.parquet', encryption_config={footer_key: 'key256'});
run
PRAGMA tpch(1)
result extension/tpch/dbgen/answers/sf1/q01.csv

View File

@@ -0,0 +1,19 @@
# name: benchmark/tpch/parquet/parquet_top_n.benchmark
# description: Top-N over lineitem Parquet
# group: [parquet]
include benchmark/tpch/tpch_load_parquet.benchmark.in
name Lineitem Top N (order key)
group topn
subgroup tpch
run
SELECT * FROM lineitem ORDER BY l_extendedprice DESC, l_orderkey LIMIT 5;
result IIIIIIIIIIIIIIII
2513090 199999 5038 4 50.00 104949.50 0.02 0.04 A F 1993-10-05 1993-10-17 1993-10-28 TAKE BACK RETURN FOB - ironic, pending pinto be
82823 199998 5037 2 50.00 104899.50 0.04 0.05 A F 1992-04-30 1992-07-05 1992-05-29 COLLECT COD SHIP orbits. bold fox
644100 199998 5037 2 50.00 104899.50 0.05 0.04 A F 1994-08-29 1994-08-05 1994-09-20 COLLECT COD FOB ges nag carefully silent r
3811460 199998 5037 1 50.00 104899.50 0.05 0.05 R F 1993-10-18 1993-08-10 1993-11-13 TAKE BACK RETURN MAIL usly. blithely fin
2077184 198998 6556 2 50.00 104849.50 0.06 0.01 R F 1993-07-20 1993-06-19 1993-07-23 DELIVER IN PERSON MAIL ffily even pinto beans thrash slyly final t

View File

@@ -0,0 +1,24 @@
# name: benchmark/tpch/parquet/parquet_union_all_pushdown.benchmark
# description: Read parquet files through a union all where we don't use any columns (manual pushdown)
# group: [parquet]
name Parquet Union All
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
CREATE VIEW p1 AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem.parquet');
CREATE VIEW p2 AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem.parquet');
run
SELECT COUNT(*) FROM (SELECT * FROM p1 UNION ALL SELECT * FROM p2) t1
result I
12002430

View File

@@ -0,0 +1,23 @@
# name: benchmark/tpch/parquet/parquet_union_all_pushdown_manual.benchmark
# description: Read parquet files through a union all where we don't use any columns (manual pushdown)
# group: [parquet]
name Parquet Union All (Manual Pushdown)
group parquet
subgroup tpch
require parquet
require tpch
load
CALL dbgen(sf=1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.parquet' (FORMAT PARQUET);
CREATE VIEW p1 AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem.parquet');
CREATE VIEW p2 AS SELECT * FROM read_parquet('${BENCHMARK_DIR}/lineitem.parquet');
run
SELECT COUNT(*) FROM (SELECT l_orderkey FROM p1 UNION ALL SELECT l_orderkey FROM p2) t1
result I
12002430

View File

@@ -0,0 +1,16 @@
# name: benchmark/tpch/parquet/write_lineitem_parquet.benchmark
# description: Write the lineitem of TPC-H SF1 to a Parquet file
# group: [parquet]
include benchmark/tpch/tpch_load.benchmark.in
name Write Lineitem Parquet
group parquet
require parquet
run
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet';
result I sf=1
6001215

View File

@@ -0,0 +1,19 @@
# name: benchmark/tpch/parquet/write_lineitem_parquet_memory_limited.benchmark
# description: Write the lineitem of TPC-H SF1 to a Parquet file with limited memory
# group: [parquet]
include benchmark/tpch/tpch_load.benchmark.in
name Write Lineitem Parquet (Limited Memory)
group parquet
require parquet
init
SET memory_limit='500MB';
run
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet';
result I sf=1
6001215

View File

@@ -0,0 +1,17 @@
# name: benchmark/tpch/parquet/write_lineitem_parquet_no_order.benchmark
# description: Write the lineitem of TPC-H SF1 to a Parquet file
# group: [parquet]
include benchmark/tpch/tpch_load.benchmark.in
name Write Lineitem Parquet (Non-Order Preserving)
group parquet
init
SET preserve_insertion_order=false;
run
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.parquet';
result I sf=1
6001215