should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,23 @@
# name: benchmark/parquet/clickbench_write.benchmark
# description: Write ClickBench data to Parquet
# group: [parquet]
require httpfs
require parquet
name ClickBench Write Parquet
group Clickbench
cache clickbench.duckdb
load benchmark/clickbench/queries/load.sql
init
set preserve_insertion_order=false;
run
COPY hits TO '${BENCHMARK_DIR}/hits.parquet';
result I
10000000

View File

@@ -0,0 +1,7 @@
# name: benchmark/parquet/dictionary_read-long-100.benchmark
# description: Read dictionary-encoded data from Parquet, 100 unique long strings, 10% NULLs
# group: [parquet]
template benchmark/parquet/dictionary_read.benchmark.in
UNIQUE_COUNT=100
PREFIX=veryveryverylongstring_

View File

@@ -0,0 +1,7 @@
# name: benchmark/parquet/dictionary_read-long-1000000.benchmark
# description: Read dictionary-encoded data from Parquet, 1000000 unique long strings, 10% NULLs
# group: [parquet]
template benchmark/parquet/dictionary_read.benchmark.in
UNIQUE_COUNT=1000000
PREFIX=veryveryverylongstring_

View File

@@ -0,0 +1,7 @@
# name: benchmark/parquet/dictionary_read-short-100.benchmark
# description: Read dictionary-encoded data from Parquet, 100 unique short strings, 10% NULLs
# group: [parquet]
template benchmark/parquet/dictionary_read.benchmark.in
UNIQUE_COUNT=100
PREFIX=''

View File

@@ -0,0 +1,7 @@
# name: benchmark/parquet/dictionary_read-short-1000000.benchmark
# description: Read dictionary-encoded data from Parquet, 1000000 unique short strings, 10% NULLs
# group: [parquet]
template benchmark/parquet/dictionary_read.benchmark.in
UNIQUE_COUNT=1000000
PREFIX=''

View File

@@ -0,0 +1,15 @@
# name: benchmark/parquet/dictionary_read-short-1000000.benchmark
# description: Read dictionary-encoded data from Parquet, 1000000 unique long strings, 10% NULLs
# group: [parquet]
name Parquet Dictionary Read Benchmark
group parquet
load
set variable total_rows = 100000000;
set variable null_rows_factor = 0.1;
set variable unique_values = ${UNIQUE_COUNT};
copy (select * from (select '${PREFIX}' || r1.range::varchar v from range(0,getvariable('unique_values')) r1, range(0,((getvariable('total_rows') * (1-getvariable('null_rows_factor'))) // getvariable('unique_values'))::INTEGER) r2 union all select null from range((getvariable('total_rows') * getvariable('null_rows_factor'))::INTEGER) ) order by random()) to '${BENCHMARK_DIR}/dictionary_read.parquet' (dictionary_compression_ratio_threshold 0);
run
select count(v) from '${BENCHMARK_DIR}/dictionary_read.parquet';