should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
# name: test/sql/storage/compression/rle/force_rle.test
# description: Test forcing RLE as the compression scheme
# group: [rle]
require vector_size 2048
load __TEST_DIR__/force_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test_rle (a INTEGER);
statement ok
INSERT INTO test_rle SELECT i FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_rle') WHERE segment_type ILIKE 'INTEGER'
----
RLE

View File

@@ -0,0 +1,56 @@
# name: test/sql/storage/compression/rle/list_rle.test_slow
# description: Test storage with RLE inside lists
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (id INTEGER, l INTEGER[]);
statement ok
INSERT INTO test SELECT i, case when (i//1000)%2=0 then [1, 1, 1] else [2, 2] end FROM range(200000) tbl(i)
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
# full unnest
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test) tbl(i)
----
500000 700000
# filters/skips
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=5000 AND id<6000) tbl(i)
----
2000 4000
# zonemaps
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=150000 AND id<160000) tbl(i)
----
25000 35000
statement ok
CREATE INDEX i_index ON test(id)
# index lookup in lists
query II
SELECT * FROM test WHERE id=150001
----
150001 [1, 1, 1]
# large lists
statement ok
CREATE TABLE test_large_list AS SELECT i%10 AS id, LIST(-i) AS list FROM range(0,100000) tbl(i) GROUP BY id;
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(list) FROM test_large_list) tbl(i)
----
100000 -4999950000

View File

@@ -0,0 +1,29 @@
# name: test/sql/storage/compression/rle/rle_bool.test
# description: Test RLE with booleans
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_bool.db
statement ok
PRAGMA force_compression = 'rle'
# simple RLE with few values
statement ok
CREATE TABLE test (a BOOLEAN);
statement ok
INSERT INTO test select false from range(2048);
statement ok
INSERT INTO test select true from range(2048);
query I
SELECT COUNT(*) FROM test WHERE a=false
----
2048
query I
SELECT COUNT(*) FROM test WHERE a=false
----
2048

View File

@@ -0,0 +1,84 @@
# name: test/sql/storage/compression/rle/rle_compression_ratio.test_slow
# description: Assert rle compression ratio is within reasonable margins
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression='rle'
# Maximum rle compression for this column:
# Uncompressed size: 5 x 64bit
# Compressed size: 1 x 64bit + 1x 16bit (sizeof rle_count_t)
# Ratio: (5*64) / (64 + 16) = 4
statement ok
CREATE TABLE test_rle AS SELECT (i//5)::INT64 FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT (i//10)::INT64 FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
# This query keeps a pretty wide margin in compression ratio un purpose to account for possible implementation changes
# that influence compression ratio.
query II
select (uncompressed::FLOAT // rle::FLOAT) > 2.5, (uncompressed::FLOAT // rle::FLOAT) < 4.5 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_rle') where segment_type not in('VARCHAR', 'VALIDITY')) as rle,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_rle
statement ok
drop table test_uncompressed
# Assert that all supported types do in fact compress
foreach type <numeric>
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT ((i//10)%100)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='rle'
statement ok
CREATE TABLE test_rle AS SELECT ((i//10)%100)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
# assert compression ratio >2 wich should be achieved for even the smallest types for this data
query II
select (uncompressed::FLOAT // rle::FLOAT) > 2, CAST(1 as ${type}) FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_rle') where segment_type not in('VARCHAR', 'VALIDITY')) as rle,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True 1
statement ok
drop table test_rle
statement ok
drop table test_uncompressed
endloop

View File

@@ -0,0 +1,74 @@
# name: test/sql/storage/compression/rle/rle_constant.test
# description: Test RLE where we can emit ConstantVectors when scanning
# group: [rle]
load __TEST_DIR__/test_rle.db
require vector_size 2048
# we check vector types explicitly in this test
require no_vector_verification
statement ok
PRAGMA force_compression = 'rle'
# simple RLE with few values
statement ok
CREATE TABLE test (a INTEGER);
# Produces two full vectors from one run
statement ok
INSERT INTO test select 0 from range(4096);
# Produces one full vector from one run
statement ok
INSERT INTO test select 1 from range(2048);
# Dito
statement ok
INSERT INTO test select 2 from range(2048);
# These do not fully fill the Vector, so they don't produce ConstantVectors
statement ok
INSERT INTO test select 3 from range(1024)
statement ok
INSERT INTO test select 4 from range(1024)
statement ok
INSERT INTO test select 5 from range(512)
statement ok
INSERT INTO test select 6 from range(512)
statement ok
INSERT INTO test select 7 from range(512)
statement ok
INSERT INTO test select 8 from range(512)
statement ok
checkpoint;
# Some of them produce constant vectors, but not all
query I
select distinct on (types) vector_type(a) as types from test order by all;
----
CONSTANT_VECTOR
FLAT_VECTOR
statement ok
PRAGMA disable_optimizer
# The first 4 vectors are constant
query I
select distinct on (types) types from (select vector_type(a) from test limit 8192) tbl(types)
----
CONSTANT_VECTOR
# The other vectors are not constant
query I
select distinct on (types) types from (select vector_type(a) from test offset 8192) tbl(types)
----
FLAT_VECTOR

View File

@@ -0,0 +1,113 @@
# name: test/sql/storage/compression/rle/rle_filter.test
# description: Test filtering from RLE compression
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_filter.db
statement ok
pragma enable_verification
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE tbl AS SELECT i id, i // 50 rle_val, case when i%8=0 then null else i // 50 end rle_val_null FROM range(100000) t(i);
query III
SELECT * FROM tbl WHERE id = 5040 AND rle_val=100
----
5040 100 NULL
query III
SELECT * FROM tbl WHERE id = 5040 AND substr(rle_val::VARCHAR, 1, 3)='100'
----
5040 100 NULL
query III
SELECT * FROM tbl WHERE id >= 5020 AND rle_val=100
----
5020 100 100
5021 100 100
5022 100 100
5023 100 100
5024 100 NULL
5025 100 100
5026 100 100
5027 100 100
5028 100 100
5029 100 100
5030 100 100
5031 100 100
5032 100 NULL
5033 100 100
5034 100 100
5035 100 100
5036 100 100
5037 100 100
5038 100 100
5039 100 100
5040 100 NULL
5041 100 100
5042 100 100
5043 100 100
5044 100 100
5045 100 100
5046 100 100
5047 100 100
5048 100 NULL
5049 100 100
query III
SELECT * FROM tbl WHERE rle_val=100
----
5000 100 NULL
5001 100 100
5002 100 100
5003 100 100
5004 100 100
5005 100 100
5006 100 100
5007 100 100
5008 100 NULL
5009 100 100
5010 100 100
5011 100 100
5012 100 100
5013 100 100
5014 100 100
5015 100 100
5016 100 NULL
5017 100 100
5018 100 100
5019 100 100
5020 100 100
5021 100 100
5022 100 100
5023 100 100
5024 100 NULL
5025 100 100
5026 100 100
5027 100 100
5028 100 100
5029 100 100
5030 100 100
5031 100 100
5032 100 NULL
5033 100 100
5034 100 100
5035 100 100
5036 100 100
5037 100 100
5038 100 100
5039 100 100
5040 100 NULL
5041 100 100
5042 100 100
5043 100 100
5044 100 100
5045 100 100
5046 100 100
5047 100 100
5048 100 NULL
5049 100 100

View File

@@ -0,0 +1,37 @@
# name: test/sql/storage/compression/rle/rle_filter.test_slow
# description: Test filtering from RLE compression
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_filter.db
statement ok
pragma enable_verification
statement ok
SET force_compression = 'rle'
statement ok
CREATE TABLE tbl AS select unnest(repeat([i], i)) i from range(5000) t(i);
foreach test_val 0 7 993 2525 4375 4999
query I
SELECT COUNT(*) = ${test_val} FROM tbl WHERE i=${test_val}
----
true
endloop
# now with NULL values
statement ok
CREATE TABLE tbl2 AS select unnest(repeat([i], i)) i from range(5000) t(i);
foreach test_val 1 12 736 1237 2314 3333
query I
SELECT COUNT(*) = ${test_val} FROM tbl2 WHERE i=${test_val}
----
true
endloop

View File

@@ -0,0 +1,33 @@
# name: test/sql/storage/compression/rle/rle_filter_pushdown.test
# description: Filter pushdown with RLE columns
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (id VARCHAR, col INTEGER);
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1 b FROM range(5000) tbl(i)
statement ok
INSERT INTO test SELECT (5000 + i)::VARCHAR id, 2 b FROM range(5000) tbl(i)
statement ok
CHECKPOINT
# filter on the RLE column
query IIII
SELECT SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE col=2
----
10000 2 2 5000
# filter on non-rle column
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 2 2 2 1

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/rle/rle_index_fetch.test
# description: Fetch from RLE column with index
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test(id INTEGER PRIMARY KEY, col INTEGER);
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1 b FROM range(5000) tbl(i)
statement ok
INSERT INTO test SELECT (5000 + i)::VARCHAR id, 2 b FROM range(5000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 2 2 2 1

View File

@@ -0,0 +1,25 @@
# name: test/sql/storage/compression/rle/rle_many_repeated.test_slow
# description: Test forcing RLE as the compression scheme
# group: [rle]
require vector_size 2048
load __TEST_DIR__/rle_many_repeated.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test_rle (a BIGINT);
statement ok
INSERT INTO test_rle SELECT 3::BIGINT FROM range(0, 65535) UNION ALL SELECT 4::BIGINT FROM range(100000);
statement ok
CHECKPOINT
query II
SELECT a, COUNT(*) FROM test_rle GROUP BY ALL
----
3 65535
4 100000

View File

@@ -0,0 +1,33 @@
# name: test/sql/storage/compression/rle/rle_medium.test
# description: Test medium storage with RLE (> vector size)
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
# simple RLE with few values
statement ok
CREATE TABLE test (a INTEGER);
statement ok
INSERT INTO test SELECT * FROM repeat(1, 1000);
INSERT INTO test SELECT * FROM repeat(2, 1000);
INSERT INTO test SELECT * FROM repeat(3, 1000);
INSERT INTO test SELECT * FROM repeat(4, 1000);
INSERT INTO test SELECT * FROM repeat(5, 1000);
INSERT INTO test SELECT * FROM repeat(6, 1000);
query IIII
SELECT SUM(a), MIN(a), MAX(a), COUNT(*) FROM test
----
21000 1 6 6000
restart
query IIII
SELECT SUM(a), MIN(a), MAX(a), COUNT(*) FROM test
----
21000 1 6 6000

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/rle/rle_nulls_edge_case.test
# description: Test RLE compression with an edge of having exactly UINT16_MAX null values
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_nulls_edge_case.db
statement ok
pragma enable_verification
statement ok
PRAGMA force_compression = 'rle'
statement ok
BEGIN;
statement ok
PRAGMA force_compression='RLE';
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers SELECT NULL FROM range(65535);
statement ok
INSERT INTO integers SELECT 1;
statement ok
INSERT INTO integers SELECT 2;
statement ok
INSERT INTO integers SELECT 3;
statement ok
COMMIT;
statement ok
CHECKPOINT;
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM integers;
----
1 3 65538 3

View File

@@ -0,0 +1,75 @@
# name: test/sql/storage/compression/rle/rle_select.test
# description: Test selecting from RLE compression
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_select.db
statement ok
pragma enable_verification
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE tbl AS SELECT i id, i // 50 rle_val, case when i%8=0 then null else i // 50 end rle_val_null FROM range(100000) t(i);
query III
SELECT * FROM tbl WHERE id >= 75 and id <= 125 and id%4=0
----
76 1 1
80 1 NULL
84 1 1
88 1 NULL
92 1 1
96 1 NULL
100 2 2
104 2 NULL
108 2 2
112 2 NULL
116 2 2
120 2 NULL
124 2 2
query III
SELECT * FROM tbl WHERE id >= 75 and id <= 125 and id%4=0
----
76 1 1
80 1 NULL
84 1 1
88 1 NULL
92 1 1
96 1 NULL
100 2 2
104 2 NULL
108 2 2
112 2 NULL
116 2 2
120 2 NULL
124 2 2
query III
SELECT * FROM tbl WHERE id >= 33380 and id <= 33410 and id%4=0
----
33380 667 667
33384 667 NULL
33388 667 667
33392 667 NULL
33396 667 667
33400 668 NULL
33404 668 668
33408 668 NULL
# non-consecutive select
statement ok
CREATE TABLE tbl2 AS SELECT i id, i%5 id_modulo, i // 50 rle_val, case when i%8=0 then null else i // 50 end rle_val_null FROM range(100000) t(i);
query IIIIII
SELECT COUNT(*), SUM(rle_val), MIN(rle_val), MAX(rle_val), SUM(rle_val_null), COUNT(rle_val_null) FROM tbl2 WHERE id >= 1500 and id <= 2500 AND id_modulo=3;
----
200 7900 30 49 6910 175
query IIIIII
SELECT COUNT(*), SUM(rle_val), MIN(rle_val), MAX(rle_val), SUM(rle_val_null), COUNT(rle_val_null) FROM tbl2 WHERE id >= 1500 and id <= 19500 AND id_modulo<=2;
----
10801 2262990 30 390 1980030 9451

View File

@@ -0,0 +1,23 @@
# name: test/sql/storage/compression/rle/rle_storage_info.test
# description: Test storage with RLE
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES (11, 22), (11, 22), (12, 21), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/rle/struct_rle.test
# description: Test storage with RLE inside structs
# group: [rle]
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (s ROW(a INTEGER));
statement ok
INSERT INTO test SELECT {'a': i} s FROM repeat(1, 1000) tbl(i);
INSERT INTO test SELECT {'a': i} s FROM repeat(2, 1000) tbl(i);
INSERT INTO test SELECT {'a': i} s FROM repeat(3, 1000) tbl(i);
INSERT INTO test SELECT {'a': i} s FROM repeat(4, 1000) tbl(i);
INSERT INTO test SELECT {'a': i} s FROM repeat(5, 1000) tbl(i);
INSERT INTO test SELECT {'a': i} s FROM repeat(6, 1000) tbl(i);
query IIII
SELECT SUM(s['a']), MIN(s['a']), MAX(s['a']), COUNT(*) FROM test
----
21000 1 6 6000
restart
query IIII
SELECT SUM(s['a']), MIN(s['a']), MAX(s['a']), COUNT(*) FROM test
----
21000 1 6 6000