should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/alp/alp_read.benchmark
# description: Scanning a large amount of doubles
# group: [alp]
name Alp Scan
group alp
storage persistent
require parquet
require httpfs
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='alp';
CREATE TABLE temperatures (
temperature DOUBLE
);
INSERT INTO temperatures SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
checkpoint;
run
select avg(temperature) from temperatures;
result I
56.028391124637494

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_read_best_case.benchmark
# description: ALP best case scenario is when it founds low precision decimals within a limited absolute range
# group: [alp]
name Alp Scan
group alp
storage persistent
load
DROP TABLE IF EXISTS alp_random_doubles;
PRAGMA force_compression='alp';
create table alp_random_doubles as select round(random(), 1)::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alp_random_doubles;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_read_worst_case.benchmark
# description: ALP slowest scenario is when it founds high precision decimals. Here, ALP achieves no compression and everything is encoded as exception
# group: [alp]
name Alp Scan
group alp
storage persistent
load
DROP TABLE IF EXISTS alp_random_doubles;
PRAGMA force_compression='alp';
create table alp_random_doubles as select random()::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alp_random_doubles;

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/compression/alp/alp_store.benchmark
# description: Scanning a large amount of doubles
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
require parquet
require httpfs
load
PRAGMA force_compression='uncompressed';
DROP TABLE IF EXISTS temperatures_uncompressed;
CREATE TABLE temperatures_uncompressed (
temperature DOUBLE
);
INSERT INTO temperatures_uncompressed SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
CREATE TABLE temperatures_alp (
temperature DOUBLE
);
PRAGMA force_compression='alp';
checkpoint;
run
INSERT INTO temperatures_alp SELECT * FROM temperatures_uncompressed;
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_store_best_case.benchmark
# description: ALP best case scenario is when it founds low precision decimals within a limited absolute range.
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
load
PRAGMA force_compression='alp';
DROP TABLE IF EXISTS alp_random_doubles;
run
create table alp_random_doubles as select round(random(), 1)::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alp/alp_store_worst_case.benchmark
# description: ALP slowest scenario is when it founds high precision decimals. Here, ALP achieves no compression and everything is encoded as exception
# group: [alp]
name Alp Insert
group alp
storage persistent
require_reinit
load
PRAGMA force_compression='alp';
DROP TABLE IF EXISTS alp_random_doubles;
run
create table alp_random_doubles as select random()::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/alprd/alprd_read.benchmark
# description: Scanning a large amount of doubles
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
require parquet
require httpfs
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='alprd';
CREATE TABLE temperatures (
temperature DOUBLE
);
INSERT INTO temperatures SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
checkpoint;
run
select avg(temperature) from temperatures;
result I
56.028391124637494

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alprd/alprd_read_best_case.benchmark
# description: ALPRD best case scenario is when all the floats share their front bits
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
create table alprd_random_doubles as select (random() + 10)::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alprd_random_doubles;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/alprd/alprd_read_worst_case.benchmark
# description: ALPRD worst case scenario is when all the float have unique front bits. Multiplying by different powers of two ensures us unique front bits
# group: [alprd]
name Alprd Scan
group alprd
storage persistent
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
create table alprd_random_doubles as select (random() * pow(2, (i % 1000)) * (CASE WHEN i%2=0 THEN 1 ELSE -1 END))::DOUBLE as data from range(200000000) tbl(i);
checkpoint;
run
select avg(data) from alprd_random_doubles;

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/compression/alprd/alprd_store.benchmark
# description: Scanning a large amount of doubles
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
require parquet
require httpfs
load
PRAGMA force_compression='uncompressed';
DROP TABLE IF EXISTS temperatures_uncompressed;
CREATE TABLE temperatures_uncompressed (
temperature DOUBLE
);
INSERT INTO temperatures_uncompressed SELECT temp FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(28);
CREATE TABLE temperatures_alprd (
temperature DOUBLE
);
PRAGMA force_compression='alprd';
checkpoint;
run
INSERT INTO temperatures_alprd SELECT * FROM temperatures_uncompressed;
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/alprd/alprd_store_best_case.benchmark
# description: ALPRD best case scenario is when all the floats share their front bits.
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
checkpoint;
run
create table alprd_random_doubles as select (random() + 10)::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/alprd/alprd_store_worst_case.benchmark
# description: ALPRD worst case scenario is when all the float have unique front bits. Multiplying by different powers of two ensures us unique front bits
# group: [alprd]
name Alprd Insert
group alprd
storage persistent
require_reinit
load
DROP TABLE IF EXISTS alprd_random_doubles;
PRAGMA force_compression='alprd';
checkpoint;
run
create table alprd_random_doubles as select (random() * pow(2, (i % 1000)) * (CASE WHEN i%2=0 THEN 1 ELSE -1 END))::DOUBLE as data from range(50000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_constant.benchmark
# description: Scanning 1GB of ints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Constant Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
CREATE TABLE integers AS SELECT (i/119000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1049.9202

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_constant_delta.benchmark
# description: Scanning 1GB of ints compressed mostly with the CONSTANT_DELTA bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Constant Delta Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant_delta';
CREATE TABLE integers AS SELECT i::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
124999999.5

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_dfor.benchmark
# description: Scanning 1GB of ints compressed mostly with the Delta FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Scan Delta For Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='delta_for';
CREATE TABLE integers AS SELECT (i%4000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1991999.5

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_read_for.benchmark
# description: Scanning 1GB of ints compressed mostly with the FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Scan For Mode
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='for';
CREATE TABLE integers AS SELECT (i%4000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1991999.5

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_constant.benchmark
# description: Storing 1GB of ints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Constant Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i/119000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_constant_delta.benchmark
# description: Storing 1GB of ints compressed mostly with the CONSTANT DELTA bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Constant Delta Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant_delta';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i%250000000)::INT32 as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_dfor.benchmark
# description: Storing 1GB of ints compressed mostly with the DELTA FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Insert Delta For Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='delta_for';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT CASE WHEN i%2=0 THEN 0 ELSE 2048 END AS i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking/bitpacking_store_for.benchmark
# description: Storing 1GB of ints compressed mostly with the FOR bitpacking mode
# group: [bitpacking]
name Bitpacking Insert For Mode
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='for';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i%250000000)::INT32 AS i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/bitpacking_hugeint/bitpacking_hugeint_read_constant.benchmark
# description: Scanning hugeints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking_hugeint]
name Bitpacking Scan Constant Mode Hugeint
group bitpacking
storage persistent
load
DROP TABLE IF EXISTS integers;
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
CREATE TABLE integers AS SELECT (i/119000)::HUGEINT as i FROM range(0, 250000000) tbl(i);
checkpoint;
run
select avg(i) from integers;
result I
1049.9202

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/bitpacking_hugeint/bitpacking_hugeint_store_constant.benchmark
# description: Storing hugeints compressed mostly with the CONSTANT bitpacking mode
# group: [bitpacking_hugeint]
name Bitpacking Insert Constant Mode Hugeint
group bitpacking
storage persistent
require_reinit
load
PRAGMA force_compression='bitpacking';
PRAGMA force_bitpacking_mode='constant';
DROP TABLE IF EXISTS integers;
run
CREATE TABLE integers AS SELECT (i/119000)::HUGEINT as i FROM range(0, 250000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/compression/dictionary/dictionary_read.benchmark
# description: Scanning strings at ~4.3x compression
# group: [dictionary]
name Dictionary Compression Scan
group dictionary
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;
result I
599.500000

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_best_case.benchmark
# description: Scanning strings at best case compression of only 2 unique values
# group: [dictionary]
name Dictionary Compression Scan
group dictionary
storage persistent v1.3.0
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT (100 + (i%2))::VARCHAR AS i FROM range(0, 200) tbl(i);
checkpoint;
assert I
select compression from pragma_storage_info('test') where segment_type in ('VARCHAR')
----
DICT_FSST
run
select avg(i::INT) from test;
result I
100.500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_worst_case.benchmark
# description: Scanning data that is uncompressible with dictionary encoding
# group: [dictionary]
name Dictionary Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT i::VARCHAR AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_read_worst_case_with_null.benchmark
# description: Scanning data that is uncompressible with dictionary encoding
# group: [dictionary]
name Dictionary Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='dict_fsst';
CREATE TABLE test AS SELECT if((i % 200) = 0, NULL, i::VARCHAR) AS i FROM range(0, 200_000_000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/dictionary/dictionary_store.benchmark
# description: Storing strings compressed at ~4.3x compression
# group: [dictionary]
name Dictionary Compression Write
group aggregate
storage persistent
require_reinit
load
PRAGMA force_compression='dict_fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 100_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_store_worst_case.benchmark
# description: Storing a column containing only unique strings.
# group: [dictionary]
name Dictionary Compression Write
group dictionary
storage persistent
require_reinit
load
PRAGMA force_compression='dict_fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT i::VARCHAR AS i FROM range(0, 50_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/dictionary/dictionary_store_worst_case_with_null.benchmark
# description: Storing a column containing only unique strings.
# group: [dictionary]
name Dictionary Compression Write
group dictionary
storage persistent
require_reinit
load
PRAGMA force_compression='dictionary';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT if((i % 200) = 0, NULL, i::VARCHAR) AS i FROM range(0, 50_000_000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/compression/fsst/fsst_late_decompression.benchmark
# description: Using a filter on another column to make use of late decompression
# group: [fsst]
name fsst late decompression benefit
group fsst
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT i as id, (100 + (i%2))::VARCHAR AS value FROM range(0, 50000000) tbl(i);
checkpoint;
SET enable_fsst_vectors=false;
run
select avg(value::INT) from test where id%10=0;
result I
100.500000

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/compression/fsst/fsst_read.benchmark
# description: Scanning strings at ~3.35x compression
# group: [fsst]
name fsst Compression Scan
group fsst
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 50000000) tbl(i);
checkpoint;
run
select avg(i::INT) from test;
result I
599.500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_read_worst_case.benchmark
# description: Scanning data that is not with fsst encoding, note that compresssion ratio is still 1.9x due to bitpacking
# group: [fsst]
name fsst Compression Scan
group aggregate
storage persistent
load
DROP TABLE IF EXISTS test;
PRAGMA force_compression='fsst';
CREATE TABLE test AS SELECT gen_random_uuid()::VARCHAR AS i FROM range(0, 20000000) tbl(i);
checkpoint;
run
select max(i[2]) from test;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_store.benchmark
# description: Storing strings compressed at ~3.3x compression
# group: [fsst]
name fsst Compression Write
group aggregate
storage persistent
require_reinit
load
PRAGMA force_compression='fsst';
run
CREATE TABLE test_compressed AS SELECT (100 + (i%1000))::VARCHAR AS i FROM range(0, 2500000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/compression/fsst/fsst_store_worst_case.benchmark
# description: Storing a column containing only unique strings.
# group: [fsst]
name name fsst Compression Write
group fsst
storage persistent
require_reinit
load
PRAGMA force_compression='fsst';
DROP TABLE IF EXISTS test;
run
CREATE TABLE test AS SELECT gen_random_uuid()::VARCHAR AS i FROM range(0, 2000000) tbl(i);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/fsst/select_fsst_selective.benchmark
# description: FSST String selective filter
# group: [fsst]
name Select FSST String Columns (0.1% selectivity)
group fsst
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION FSST, str_val_nulls VARCHAR USING COMPRESSION FSST);
INSERT INTO string_values SELECT i % 1000, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=77;
result IIII
100000 1631748 1087835 66667

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/fsst/select_fsst_unselective.benchmark
# description: FSST String unselective filter
# group: [fsst]
name Select FSST String Columns (33% selectivity)
group fsst
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION FSST, str_val_nulls VARCHAR USING COMPRESSION FSST);
INSERT INTO string_values SELECT i % 3, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=1;
result IIII
33333333 543915346 543915346 33333333

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/rle/select_rle_selective.benchmark
# description: RLE selective filter
# group: [rle]
name Select RLE Columns (0.1% selectivity)
group rle
storage persistent
load
CREATE TABLE rle_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, rle_val INTEGER USING COMPRESSION RLE, rle_val_nulls INTEGER USING COMPRESSION RLE);
INSERT INTO rle_values SELECT i % 1000, i // 10 rle_val, case when i%9=0 then null else i // 10 end rle_val_null FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(rle_val), SUM(rle_val_nulls), COUNT(rle_val_nulls) FROM rle_values WHERE filter_val=77;
result IIII
100000 499995700000 444441733323 88889

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/rle/select_rle_unselective.benchmark
# description: RLE selective filter
# group: [rle]
name Select RLE Columns (33% selectivity)
group rle
storage persistent
load
CREATE TABLE rle_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, rle_val INTEGER USING COMPRESSION RLE, rle_val_nulls INTEGER USING COMPRESSION RLE);
INSERT INTO rle_values SELECT i % 3, i // 10 rle_val, case when i%9=0 then null else i // 10 end rle_val_null FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(rle_val), SUM(rle_val_nulls), COUNT(rle_val_nulls) FROM rle_values WHERE filter_val=1;
result IIII
33333333 166666646666667 166666646666667 33333333

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_array_read.benchmark
# description: Scanning 1GB of ints compressed with Array Containers
# group: [roaring]
name Roaring Scan Array Container
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i%25=0 then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
10000000

View File

@@ -0,0 +1,26 @@
# name: benchmark/micro/compression/roaring/roaring_array_store.benchmark
# description: Writing 250m tuples, with validity masks compressed with Roaring Bitmap Compression (Array Containers)
# group: [roaring]
name Roaring Write Array Container
group roaring
storage persistent v1.2.0
load
CREATE TABLE data_source AS SELECT case when i%25=0 then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
PRAGMA force_compression='Roaring';
SET checkpoint_threshold = '10.0 GB';
CREATE TABLE test_compression as FROM data_source;
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VALIDITY')
----
Roaring
run
CREATE TABLE tbl AS FROM data_source;
checkpoint;
cleanup
DROP TABLE IF EXISTS tbl;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_bitset_read.benchmark
# description: Scanning 1GB of ints compressed with Run Containers (inverted)
# group: [roaring]
name Roaring Scan Run Container Inverted
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i%3=0 then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
83333334

View File

@@ -0,0 +1,26 @@
# name: benchmark/micro/compression/roaring/roaring_bitset_store.benchmark
# description: Writing 250m tuples, with validity masks compressed with Roaring Bitmap Compression (Run Containers (inverted))
# group: [roaring]
name Roaring Write Run Container Inverted
group roaring
storage persistent v1.2.0
load
CREATE TABLE data_source AS SELECT case when i%3=0 then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
PRAGMA force_compression='roaring';
SET checkpoint_threshold = '10.0 GB';
CREATE TABLE test_compression as FROM data_source;
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VALIDITY')
----
Roaring
run
CREATE TABLE tbl AS FROM data_source;
checkpoint;
cleanup
DROP TABLE IF EXISTS tbl;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_inverted_array_read.benchmark
# description: Scanning 1GB of ints compressed with Array Containers (inverted)
# group: [roaring]
name Roaring Scan Array Container Inverted
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i%25=0 then null else 1337 end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
240000000

View File

@@ -0,0 +1,26 @@
# name: benchmark/micro/compression/roaring/roaring_inverted_array_store.benchmark
# description: Writing 250m tuples, with validity masks compressed with Roaring Bitmap Compression (Array Containers (inverted))
# group: [roaring]
name Roaring Scan Array Container Inverted
group roaring
storage persistent v1.2.0
load
CREATE TABLE data_source AS SELECT case when i%25=0 then null else 1337 end as a FROM range(0, 250_000_000) tbl(i);
PRAGMA force_compression='roaring';
SET checkpoint_threshold = '10.0 GB';
CREATE TABLE test_compression as FROM data_source;
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VALIDITY')
----
Roaring
run
CREATE TABLE tbl AS FROM data_source;
checkpoint;
cleanup
DROP TABLE IF EXISTS tbl;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_inverted_run_read.benchmark
# description: Scanning 1GB of ints compressed with Run Containers (inverted)
# group: [roaring]
name Roaring Scan Run Container Inverted
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
200195338

View File

@@ -0,0 +1,26 @@
# name: benchmark/micro/compression/roaring/roaring_inverted_run_store.benchmark
# description: Writing 250m tuples, with validity masks compressed with Roaring Bitmap Compression (Run Containers (inverted))
# group: [roaring]
name Roaring Write Run Container Inverted
group roaring
storage persistent v1.2.0
load
CREATE TABLE data_source AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then 1337 else null end as a FROM range(0, 250_000_000) tbl(i);
PRAGMA force_compression='Roaring';
SET checkpoint_threshold = '10.0 GB';
CREATE TABLE test_compression as FROM data_source;
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VALIDITY')
----
Roaring
run
CREATE TABLE tbl AS FROM data_source;
checkpoint;
cleanup
DROP TABLE IF EXISTS tbl;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/roaring/roaring_run_read.benchmark
# description: Scanning 1GB of ints compressed with Run Containers (inverted)
# group: [roaring]
name Roaring Scan Run Container Inverted
group roaring
storage persistent v1.2.0
load
DROP TABLE IF EXISTS tbl;
PRAGMA force_compression='Roaring';
CREATE TABLE tbl AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end as a FROM range(0, 250_000_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('tbl') where segment_type in ('VALIDITY')
----
Roaring
run
select count(*) from tbl WHERE a IS NOT NULL;
result I
49804662

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/compression/roaring/roaring_run_store.benchmark
# description: Writing 250m tuples, with validity masks compressed with Roaring Bitmap Compression (Run Containers)
# group: [roaring]
name Roaring Write Run Container
group roaring
storage persistent v1.2.0
# Roughly 8 runs per Vector
load
CREATE TABLE data_source AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end as a FROM range(0, 250_000_000) tbl(i);
PRAGMA force_compression='Roaring';
SET checkpoint_threshold = '10.0 GB';
CREATE TABLE test_compression as FROM data_source;
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VALIDITY')
----
Roaring
run
CREATE TABLE tbl AS FROM data_source;
checkpoint;
cleanup
DROP TABLE IF EXISTS tbl;

View File

@@ -0,0 +1,18 @@
# name: benchmark/micro/compression/store_tpch_sf1.benchmark
# description: Generating and storing a tpc-h sf1 database using default compression
# group: [compression]
name TPC-H Write benchmark
group aggregate
storage persistent
require_reinit
require tpch
load
PRAGMA force_compression='none';
run
call DBGEN(sf=1);
checkpoint;

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/string/string_select_selective.benchmark
# description: Uncompressed String selective filter
# group: [string]
name Select Uncompressed String Columns (0.1% selectivity)
group string
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION UNCOMPRESSED, str_val_nulls VARCHAR USING COMPRESSION UNCOMPRESSED);
INSERT INTO string_values SELECT i % 1000, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=77;
result IIII
100000 1631748 1087835 66667

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/compression/string/string_select_unselective.benchmark
# description: Uncompressed String unselective filter
# group: [string]
name Select Uncompressed String Columns (33% selectivity)
group string
storage persistent
load
CREATE TABLE string_values(filter_val INTEGER USING COMPRESSION UNCOMPRESSED, str_val VARCHAR USING COMPRESSION UNCOMPRESSED, str_val_nulls VARCHAR USING COMPRESSION UNCOMPRESSED);
INSERT INTO string_values SELECT i % 3, case when i%7=0 then concat('thisisalongstring', i) else concat('shortstr', i // 10) end str_val, case when i%3=0 then null else str_val end str_val_nulls FROM range(100_000_000) t(i);
run
SELECT COUNT(*), SUM(strlen(str_val)), SUM(strlen(str_val_nulls)), COUNT(str_val_nulls) FROM string_values WHERE filter_val=1;
result IIII
33333333 543915346 543915346 33333333

View File

@@ -0,0 +1,22 @@
# name: benchmark/micro/compression/zstd/zstd_read.benchmark
# description: ZSTD decompression speed of relatively big (8000 byte) strings
# group: [zstd]
name ZSTD Scan
group zstd
storage persistent v1.2.0
load
DROP TABLE IF EXISTS zstd_strings;
PRAGMA force_compression='zstd';
set variable my_string = (list_reduce([chr(((i % 26) + ord('a'))::INTEGER) for i in range(8000)], (x, y) -> concat(x, y)));
create table zstd_strings as select getvariable('my_string') as data from range(2_500_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('zstd_strings') where segment_type in ('VARCHAR')
----
ZSTD
run
select avg(strlen(data)) from zstd_strings;

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/compression/zstd/zstd_store.benchmark
# description: ZSTD decompression speed of relatively big (>= overflow string) strings
# group: [zstd]
name ZSTD Compression Write
group zstd
storage persistent v1.2.0
require_reinit
load
DROP TABLE IF EXISTS zstd_strings;
PRAGMA force_compression='zstd';
set variable my_string = (list_reduce([chr(((i % 26) + ord('a'))::INTEGER) for i in range(4096)], (x, y) -> concat(x, y)));
create table test_compression as select getvariable('my_string') as data from range(2_500_000) tbl(i);
checkpoint;
assert I
select DISTINCT compression from pragma_storage_info('test_compression') where segment_type in ('VARCHAR')
----
ZSTD
run
create table zstd_strings as select getvariable('my_string') as data from range(2_500_000) tbl(i);
checkpoint;