should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,21 @@
# name: benchmark/tpch/csv/lineitem_csv_auto_detect.benchmark
# description: Run CSV auto-detection on the lineitem CSV
# group: [csv]
name Read Lineitem CSV Auto-Detect
group csv
require tpch
# create the CSV file
load
CALL dbgen(sf=0.1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '|', HEADER);
CREATE SCHEMA tpch;
CALL dbgen(sf=0, schema='tpch');
run
SELECT COUNT(l_orderkey) FROM (FROM '${BENCHMARK_DIR}/lineitem.csv' LIMIT 5)
result I
5

View File

@@ -0,0 +1,24 @@
# name: benchmark/tpch/csv/lineitem_csv_gzip.benchmark
# description: Read Lineitem CSV gzipped with auto-detection
# group: [csv]
name Read Lineitem CSV gzipped with auto-detection
group csv
require tpch
# create the CSV file
load
CREATE SCHEMA tpch_schema;
CALL dbgen(sf=1, schema='tpch_schema');
COPY tpch_schema.lineitem TO '${BENCHMARK_DIR}/lineitem.csv.gz' (DELIMITER '|', HEADER);
DROP SCHEMA tpch_schema CASCADE;
run
CREATE OR REPLACE TABLE lineitem AS (SELECT * FROM read_csv_auto(['${BENCHMARK_DIR}/lineitem.csv.gz']));
cleanup
DROP TABLE IF EXISTS lineitem;
result I
6001215

View File

@@ -0,0 +1,24 @@
# name: benchmark/tpch/csv/lineitem_csv_gzip_sample.benchmark
# description: Read Lineitem CSV gzipped with auto-detection sampling the entire file
# group: [csv]
name Read Lineitem CSV gzipped with auto-detection sampling the entire file
group csv
require tpch
# create the CSV file
load
CREATE SCHEMA tpch_schema;
CALL dbgen(sf=1, schema='tpch_schema');
COPY tpch_schema.lineitem TO '${BENCHMARK_DIR}/lineitem.csv.gz' (FORMAT CSV, DELIMITER '|', HEADER);
DROP SCHEMA tpch_schema CASCADE;
run
CREATE OR REPLACE TABLE lineitem AS (SELECT * FROM read_csv_auto('${BENCHMARK_DIR}/lineitem.csv.gz', sample_size=-1));
cleanup
DROP TABLE IF EXISTS lineitem;
result I
6001215

View File

@@ -0,0 +1,49 @@
# name: benchmark/tpch/csv/lineitem_csv_many_files.benchmark
# description: Read the lineitem CSV with many files
# group: [csv]
name Read Lineitem CSV Many Files
group csv
require tpch
# create the CSV file
load
call dbgen(sf=1);
COPY (FROM lineitem LIMIT 200000 OFFSET 0) TO '${BENCHMARK_DIR}/lineitem-split-0.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 200000) TO '${BENCHMARK_DIR}/lineitem-split-1.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 400000) TO '${BENCHMARK_DIR}/lineitem-split-2.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 600000) TO '${BENCHMARK_DIR}/lineitem-split-3.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 800000) TO '${BENCHMARK_DIR}/lineitem-split-4.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 1000000) TO '${BENCHMARK_DIR}/lineitem-split-5.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 1200000) TO '${BENCHMARK_DIR}/lineitem-split-6.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 1400000) TO '${BENCHMARK_DIR}/lineitem-split-7.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 1600000) TO '${BENCHMARK_DIR}/lineitem-split-8.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 1800000) TO '${BENCHMARK_DIR}/lineitem-split-9.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 2000000) TO '${BENCHMARK_DIR}/lineitem-split-10.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 2200000) TO '${BENCHMARK_DIR}/lineitem-split-11.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 2400000) TO '${BENCHMARK_DIR}/lineitem-split-12.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 2600000) TO '${BENCHMARK_DIR}/lineitem-split-13.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 2800000) TO '${BENCHMARK_DIR}/lineitem-split-14.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 3000000) TO '${BENCHMARK_DIR}/lineitem-split-15.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 3200000) TO '${BENCHMARK_DIR}/lineitem-split-16.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 3400000) TO '${BENCHMARK_DIR}/lineitem-split-17.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 3600000) TO '${BENCHMARK_DIR}/lineitem-split-18.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 3800000) TO '${BENCHMARK_DIR}/lineitem-split-19.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 4000000) TO '${BENCHMARK_DIR}/lineitem-split-20.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 4200000) TO '${BENCHMARK_DIR}/lineitem-split-21.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 4400000) TO '${BENCHMARK_DIR}/lineitem-split-22.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 4600000) TO '${BENCHMARK_DIR}/lineitem-split-23.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 4800000) TO '${BENCHMARK_DIR}/lineitem-split-24.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 5000000) TO '${BENCHMARK_DIR}/lineitem-split-25.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 5200000) TO '${BENCHMARK_DIR}/lineitem-split-26.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 5400000) TO '${BENCHMARK_DIR}/lineitem-split-27.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 5600000) TO '${BENCHMARK_DIR}/lineitem-split-28.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 5800000) TO '${BENCHMARK_DIR}/lineitem-split-29.csv' (HEADER, DELIMITER '|');
COPY (FROM lineitem LIMIT 200000 OFFSET 6000000) TO '${BENCHMARK_DIR}/lineitem-split-30.csv' (HEADER, DELIMITER '|');
run
SELECT COUNT(*) FROM read_csv_auto('${BENCHMARK_DIR}/lineitem-split-*.csv');
result I
6001215

View File

@@ -0,0 +1,27 @@
# name: benchmark/tpch/csv/read_lineitem_csv.benchmark
# description: Read the lineitem of TPC-H SF0.1 from a CSV file with an ASCII delimiter
# group: [csv]
name Read Lineitem CSV (ASCII Delimiter)
group csv
require tpch
# create the CSV file
load
CALL dbgen(sf=0.1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '|', HEADER);
CREATE SCHEMA tpch;
CALL dbgen(sf=0, schema='tpch');
run
COPY tpch.lineitem FROM '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '|', HEADER);
# cleanup: delete and re-create the lineitem table
cleanup
DROP SCHEMA tpch CASCADE;
CREATE SCHEMA tpch;
CALL dbgen(sf=0, schema='tpch');
result I
600572

View File

@@ -0,0 +1,27 @@
# name: benchmark/tpch/csv/read_lineitem_csv_unicode.benchmark
# description: Read the lineitem of TPC-H SF0.1 from a CSV file with a unicode delimiter
# group: [csv]
name Read Lineitem CSV (Unicode Delimiter)
group csv
require tpch
# create the CSV file
load
CALL dbgen(sf=0.1, suffix='_normal');
COPY lineitem_normal TO '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '🦆', HEADER);
CREATE SCHEMA tpch;
CALL dbgen(sf=0, schema='tpch');
run
COPY tpch.lineitem FROM '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '🦆', HEADER);
# cleanup: delete and re-create the lineitem table
cleanup
DROP SCHEMA tpch CASCADE;
CREATE SCHEMA tpch;
CALL dbgen(sf=0, schema='tpch');
result I
600572

View File

@@ -0,0 +1,14 @@
# name: benchmark/tpch/csv/write_lineitem_csv.benchmark
# description: Write the lineitem of TPC-H SF1 to a CSV file
# group: [csv]
include benchmark/tpch/tpch_load.benchmark.in
name Write Lineitem CSV
group csv
run
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '|', HEADER);
result I sf=1
6001215

View File

@@ -0,0 +1,17 @@
# name: benchmark/tpch/csv/write_lineitem_csv_no_order.benchmark
# description: Write the lineitem of TPC-H SF1 to a CSV file (without preserving insertion order)
# group: [csv]
include benchmark/tpch/tpch_load.benchmark.in
name Write Lineitem CSV (Non-Order Preserving)
group csv
init
SET preserve_insertion_order=false;
run
COPY lineitem TO '${BENCHMARK_DIR}/lineitem.csv' (FORMAT CSV, DELIMITER '|', HEADER);
result I sf=1
6001215