should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,36 @@
# name: test/sql/storage/compression/dictionary/dictionary_compression_ratio.test_slow
# description: Assert dictionary compression ratio is within reasonable margins
# group: [dictionary]
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
# First test: detailed compression ratio
statement ok
PRAGMA force_compression='dictionary';
# Assuming 10 chars at 1 byte, with a 4byte offset and a 2byte length per string uncompressed:
# Ratio absolute max at 3 bits per value (ignoring dict size) = (16/(3/8)) = 42.6666666667
statement ok
CREATE TABLE test_dictionary AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
# keep a wide margin for the compression ratio to account for changes (like the block size) that
# influence the compression ratio
query I
SELECT uncompressed::FLOAT / dictionary::FLOAT > 30 AND uncompressed::FLOAT / dictionary::FLOAT < 55 FROM
(SELECT count(DISTINCT block_id) AS dictionary FROM pragma_storage_info('test_dictionary') WHERE segment_type IN ('VARCHAR')) AS dictionary,
(SELECT count(DISTINCT block_id) AS uncompressed FROM pragma_storage_info('test_uncompressed') WHERE segment_type IN ('VARCHAR')) AS uncompressed;
----
True

View File

@@ -0,0 +1,22 @@
# name: test/sql/storage/compression/dictionary/dictionary_read.test_slow
# group: [dictionary]
# The database is written with a vector size of 2048.
require vector_size 2048
unzip data/storage/dictionary.db.gz __TEST_DIR__/dictionary.db
load __TEST_DIR__/dictionary.db readonly
query I
select count(street) from tbl;
----
397527
query I nosort res
select street from tbl;
----
query I
SELECT compression FROM pragma_storage_info('tbl') WHERE segment_type == 'VARCHAR' AND compression != 'Dictionary';
----

View File

@@ -0,0 +1,25 @@
# name: test/sql/storage/compression/dictionary/dictionary_storage_info.test
# description: Test storage with Dictionary compression
# group: [dictionary]
require no_latest_storage
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test (a VARCHAR, b VARCHAR);
statement ok
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary

View File

@@ -0,0 +1,46 @@
# name: test/sql/storage/compression/dictionary/fetch_row.test
# description: Test storage with Dictionary compression
# group: [dictionary]
load __TEST_DIR__/test_dictionary_fetchrow.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test (
a INTEGER,
b VARCHAR
);
statement ok
INSERT INTO test (a, b)
SELECT
x AS a,
CASE x % 5
WHEN 0 THEN 'aaaa'
WHEN 1 THEN 'bbbb'
WHEN 2 THEN 'cccc'
WHEN 3 THEN 'dddd'
WHEN 4 THEN NULL
END AS b
FROM range(10_000) t(x);
statement ok
CHECKPOINT
restart
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary
query I
select distinct b from test order by a % 5;
----
aaaa
bbbb
cccc
dddd
NULL

View File

@@ -0,0 +1,26 @@
# name: test/sql/storage/compression/dictionary/force_dictionary.test
# description: Test forcing dictionary encoding as the compression scheme
# group: [dictionary]
require no_latest_storage
require vector_size 2048
load __TEST_DIR__/force_dictionary.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test_dict (a VARCHAR);
statement ok
INSERT INTO test_dict SELECT i::VARCHAR FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary