should be it
This commit is contained in:
93
external/duckdb/test/sql/storage/compression/roaring/fetch_row.test
vendored
Normal file
93
external/duckdb/test/sql/storage/compression/roaring/fetch_row.test
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
# name: test/sql/storage/compression/roaring/fetch_row.test
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
# load the DB from disk
|
||||
load __TEST_DIR__/test_roaring_compression_fetch_row.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test (
|
||||
a INT
|
||||
);
|
||||
|
||||
statement ok
|
||||
pragma force_compression='roaring'
|
||||
|
||||
# Array Container
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then 1337 else null end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
restart
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
400
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
534800 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test;
|
||||
|
||||
# Run Container
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
restart
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
1938
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
2591106 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test;
|
||||
|
||||
# Bitset Container
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%3=0 then 1337 else null end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
restart
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
3334
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
4457558 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test;
|
||||
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_array.test
vendored
Normal file
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_array.test
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_analyze_array.test
|
||||
# description: Check the produced (final_)analyze result
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
require noforcestorage
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
set logging_level='info';
|
||||
|
||||
# 1 rowgroup
|
||||
statement ok
|
||||
set variable dataset_size = 122880;
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT
|
||||
case
|
||||
when i%25=0
|
||||
then 1337
|
||||
else null
|
||||
end
|
||||
FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_uncompressed') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_UNCOMPRESSED');
|
||||
----
|
||||
15360
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS select * from test_uncompressed;
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
# For single row group
|
||||
# 60 vectors with 82 non-null values per vector
|
||||
# Total compressed bytes:
|
||||
# 2 bits (is_inverted + is_run) + 8 bits (cardinality) = 10 bits per Vector
|
||||
# 10 * 60 = 600 bits == 75 bytes of metadata per RowGroup
|
||||
#
|
||||
# 8 (compressed overhead) + (82 * sizeof(uint8_t)) = 90 bytes per Vector
|
||||
# 90 * 60 = 5400 bytes of data per RowGroup
|
||||
# 5475 bytes
|
||||
|
||||
# We 2x the actual result, to pay for the slower decompression speed
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_roaring') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_ROARING');
|
||||
----
|
||||
10944
|
||||
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_bitset.test
vendored
Normal file
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_bitset.test
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_analyze_bitset.test
|
||||
# description: Check the produced (final_)analyze result
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
require noforcestorage
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
set logging_level='info';
|
||||
|
||||
# 1 rowgroup
|
||||
statement ok
|
||||
set variable dataset_size = 122880;
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT
|
||||
case
|
||||
when i%3=0
|
||||
then 1337
|
||||
else null
|
||||
end
|
||||
FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_uncompressed') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_UNCOMPRESSED');
|
||||
----
|
||||
15360
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS select * from test_uncompressed;
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
# For single row group
|
||||
# 60 vectors with 7 or 8 runs of nulls per vector
|
||||
# Total compressed bytes:
|
||||
# 2 bits (is_inverted + is_run) = 2 bits per Vector
|
||||
# 2 * 60 = 120 bits == 15 bytes of metadata per RowGroup
|
||||
#
|
||||
# 256 bytes bytes per Vector
|
||||
# 256 * 60 = 15360 bytes of data per RowGroup
|
||||
# 15375 bytes
|
||||
|
||||
# We 2x the actual result, to pay for the slower decompression speed
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_roaring') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_ROARING');
|
||||
----
|
||||
30872
|
||||
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_run.test
vendored
Normal file
83
external/duckdb/test/sql/storage/compression/roaring/roaring_analyze_run.test
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_analyze_run.test
|
||||
# description: Check the produced (final_)analyze result
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
require noforcestorage
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
set logging_level='info';
|
||||
|
||||
# 1 rowgroup
|
||||
statement ok
|
||||
set variable dataset_size = 122880;
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT
|
||||
case
|
||||
when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450)
|
||||
then null
|
||||
else 1337
|
||||
end
|
||||
FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_uncompressed') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_UNCOMPRESSED');
|
||||
----
|
||||
15360
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
set enable_logging=true;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS select * from test_uncompressed;
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set enable_logging=false;
|
||||
|
||||
# For single row group
|
||||
# 60 vectors with 7 or 8 runs of nulls per vector
|
||||
# Total compressed bytes:
|
||||
# 2 bits (is_inverted + is_run) + 7 bits (run_size) = 9 bits per Vector
|
||||
# 9 * 60 = 540 bits == 67 bytes of metadata per RowGroup
|
||||
#
|
||||
# 8 (compressed overhead) + (8 * sizeof(uint16_t)) = 24 bytes per Vector
|
||||
# 24 * 60 = 1440 bytes of data per RowGroup
|
||||
# 1507 bytes
|
||||
|
||||
# We 2x the actual result, to pay for the slower decompression speed
|
||||
query I
|
||||
SELECT message.split(': ')[2]::INTEGER FROM duckdb_logs
|
||||
where
|
||||
message.starts_with('ColumnDataCheckpointer FinalAnalyze') and
|
||||
message.contains('test_roaring') and
|
||||
message.contains('VALIDITY') and
|
||||
message.contains('COMPRESSION_ROARING');
|
||||
----
|
||||
3024
|
||||
39
external/duckdb/test/sql/storage/compression/roaring/roaring_appends.test_slow
vendored
Normal file
39
external/duckdb/test/sql/storage/compression/roaring/roaring_appends.test_slow
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_appends.test_slow
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
load __TEST_DIR__/test_roaring_appends.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring';
|
||||
|
||||
statement ok
|
||||
set checkpoint_threshold = '100mb';
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
foreach size 50 100 250 1025 1500
|
||||
|
||||
statement ok
|
||||
delete from test;
|
||||
|
||||
loop i 1 30
|
||||
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then 1337 else null end FROM range(0,${size}) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
select count(*) = (${size} / 25) * ${i} from test WHERE a IS NOT NULL;
|
||||
----
|
||||
true
|
||||
|
||||
#i
|
||||
endloop
|
||||
|
||||
#size
|
||||
endloop
|
||||
67
external/duckdb/test/sql/storage/compression/roaring/roaring_array_simple.test
vendored
Normal file
67
external/duckdb/test/sql/storage/compression/roaring/roaring_array_simple.test
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_array_simple.test
|
||||
# description: Test bitpacking with nulls
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
# simple compression with few values
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
# 82 values stored in the Array Container
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then 1337 else null end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
400
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
534800 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test
|
||||
|
||||
# 5 non-null values per Vector, uses uncompressed arrays
|
||||
statement ok
|
||||
with intermediates as (
|
||||
select i % 2048 as i
|
||||
from range(0, 10_000) t(i)
|
||||
)
|
||||
insert into test select case when
|
||||
i = 0 or
|
||||
i = 6 or
|
||||
i = 1000 or
|
||||
i = 1500 or
|
||||
i = 2000
|
||||
then 1337
|
||||
else null end from intermediates;
|
||||
|
||||
statement ok
|
||||
checkpoint;
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
24
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
32088 1337 1337
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
35
external/duckdb/test/sql/storage/compression/roaring/roaring_bitset_simple.test
vendored
Normal file
35
external/duckdb/test/sql/storage/compression/roaring/roaring_bitset_simple.test
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_bitset_simple.test
|
||||
# description: Test bitpacking with NULLs.
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
# Simple compression with a few values.
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test SELECT CASE WHEN i % 3 = 0 THEN 1337 ELSE NULL END FROM range(0, 10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
CHECKPOINT;
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' AND compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) FROM test WHERE a IS NOT NULL;
|
||||
----
|
||||
3334
|
||||
|
||||
query III
|
||||
SELECT SUM(a), MIN(a), MAX(a) FROM test;
|
||||
----
|
||||
4457558 1337 1337
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
220
external/duckdb/test/sql/storage/compression/roaring/roaring_compression_ratio.test_slow
vendored
Normal file
220
external/duckdb/test/sql/storage/compression/roaring/roaring_compression_ratio.test_slow
vendored
Normal file
@@ -0,0 +1,220 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_compression_ratio.test_slow
|
||||
# description: Assert roaring compression ratio is within reasonable margins for each container type
|
||||
# group: [roaring]
|
||||
|
||||
require block_size 262144
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
set variable dataset_size = 120_000_000;
|
||||
|
||||
#### Array container Roaring Compression ratio calculation:
|
||||
# For single row group
|
||||
# 60 vectors with 82 non-null values per vector
|
||||
# Total compressed bytes:
|
||||
# metadata: (64 / (8 / 2 (bitwidth))) + 60 = 76
|
||||
# data: (8 + (82 * 1)) * 60 = 5400
|
||||
# 5476 bytes
|
||||
|
||||
# Total uncompressed bytes = (60 * 256) = 15360 bytes
|
||||
# Expected Ratio ~= 2.8x
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS SELECT case when i%25=0 then 1337 else null end FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT case when i%25=0 then 1337 else null end FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_roaring') WHERE segment_type = 'VALIDITY' AND compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type = 'VALIDITY' AND compression != 'Uncompressed';
|
||||
----
|
||||
|
||||
statement ok
|
||||
CREATE TYPE test_result AS UNION (
|
||||
ok BOOL,
|
||||
err STRUCT(
|
||||
uncompressed HUGEINT,
|
||||
compressed HUGEINT,
|
||||
allowed_minimum_ratio DECIMAL(2,1),
|
||||
allowed_maximum_ratio DECIMAL(2,1),
|
||||
actual_ratio FLOAT
|
||||
)
|
||||
);
|
||||
|
||||
statement ok
|
||||
set variable min_ratio = 2.6;
|
||||
set variable max_ratio = 2.8;
|
||||
|
||||
query I
|
||||
SELECT
|
||||
CASE
|
||||
WHEN (uncompressed::FLOAT / compressed::FLOAT) > getvariable('min_ratio') AND (uncompressed::FLOAT / compressed::FLOAT) <= getvariable('max_ratio')
|
||||
THEN True::test_result
|
||||
ELSE {
|
||||
'uncompressed': uncompressed,
|
||||
'compressed': compressed,
|
||||
'allowed_minimum_ratio': getvariable('min_ratio'),
|
||||
'allowed_maximum_ratio': getvariable('max_ratio'),
|
||||
'actual_ratio': uncompressed::FLOAT / compressed::FLOAT
|
||||
}::test_result
|
||||
END
|
||||
FROM (
|
||||
select
|
||||
(select count(distinct block_id) from pragma_storage_info('test_roaring') where segment_type in ('VALIDITY')) as compressed,
|
||||
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in ('VALIDITY')) as uncompressed
|
||||
) AS blocks_tbl;
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
drop table test_roaring;
|
||||
drop table test_uncompressed;
|
||||
|
||||
#### Run container Roaring Compression ratio calculation:
|
||||
# For single row group
|
||||
# 60 vectors with 7/8 runs of nulls per vector
|
||||
# Total compressed bytes:
|
||||
# metadata: (64 / (8 / 2 (bitwidth))) + ((64 * 7) / 8) = 72
|
||||
# data: (8 + (8 * 2)) * 60 = 1440
|
||||
# 1512 bytes
|
||||
|
||||
# Total uncompressed bytes = (60 * 256) = 15360 bytes
|
||||
# Expected Ratio ~= 10.15x
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end FROM range(0, getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end FROM range(0, getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_roaring') WHERE segment_type = 'VALIDITY' AND compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type = 'VALIDITY' AND compression != 'Uncompressed';
|
||||
----
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
# Hmm, this doesnt actually match the result from the back-of-the-napkin calculation
|
||||
statement ok
|
||||
set variable min_ratio = 8.6;
|
||||
set variable max_ratio = 8.8;
|
||||
|
||||
query I
|
||||
SELECT
|
||||
CASE
|
||||
WHEN (uncompressed::FLOAT / compressed::FLOAT) > getvariable('min_ratio') AND (uncompressed::FLOAT / compressed::FLOAT) <= getvariable('max_ratio')
|
||||
THEN True::test_result
|
||||
ELSE {
|
||||
'uncompressed': uncompressed,
|
||||
'compressed': compressed,
|
||||
'allowed_minimum_ratio': getvariable('min_ratio'),
|
||||
'allowed_maximum_ratio': getvariable('max_ratio'),
|
||||
'actual_ratio': uncompressed::FLOAT / compressed::FLOAT
|
||||
}::test_result
|
||||
END
|
||||
FROM (
|
||||
select
|
||||
(select count(distinct block_id) from pragma_storage_info('test_roaring') where segment_type in ('VALIDITY')) as compressed,
|
||||
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in ('VALIDITY')) as uncompressed
|
||||
) AS blocks_tbl;
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
drop table test_roaring;
|
||||
drop table test_uncompressed;
|
||||
|
||||
#### Bitset container Roaring Compression ratio calculation:
|
||||
# For single row group
|
||||
# 60 vectors stored uncompressed, + metadata
|
||||
# Total compressed bytes = (60 * 2 (metadata)) + (60 * 256) = 15480 bytes
|
||||
# Total uncompressed bytes = (60 * 256) = 15360 bytes
|
||||
# Expected Ratio ~= 7.5x
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_roaring AS SELECT case when i%3=0 then 1337 else null end FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='uncompressed'
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test_uncompressed AS SELECT case when i%3=0 then 1337 else null end FROM range(getvariable('dataset_size')) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_roaring') WHERE segment_type = 'VALIDITY' AND compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type = 'VALIDITY' AND compression != 'Uncompressed';
|
||||
----
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
statement ok
|
||||
set variable min_ratio = 0.9;
|
||||
set variable max_ratio = 1;
|
||||
|
||||
query I
|
||||
SELECT
|
||||
CASE
|
||||
WHEN (uncompressed::FLOAT / compressed::FLOAT) > getvariable('min_ratio') AND (uncompressed::FLOAT / compressed::FLOAT) <= getvariable('max_ratio')
|
||||
THEN True::test_result
|
||||
ELSE {
|
||||
'uncompressed': uncompressed,
|
||||
'compressed': compressed,
|
||||
'allowed_minimum_ratio': getvariable('min_ratio'),
|
||||
'allowed_maximum_ratio': getvariable('max_ratio'),
|
||||
'actual_ratio': uncompressed::FLOAT / compressed::FLOAT
|
||||
}::test_result
|
||||
END
|
||||
FROM (
|
||||
select
|
||||
(select count(distinct block_id) from pragma_storage_info('test_roaring') where segment_type in ('VALIDITY')) as compressed,
|
||||
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in ('VALIDITY')) as uncompressed
|
||||
) AS blocks_tbl;
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
drop table test_roaring;
|
||||
drop table test_uncompressed;
|
||||
61
external/duckdb/test/sql/storage/compression/roaring/roaring_inverted_array_simple.test
vendored
Normal file
61
external/duckdb/test/sql/storage/compression/roaring/roaring_inverted_array_simple.test
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_inverted_array_simple.test
|
||||
# description: Test bitpacking with nulls
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
# simple compression with few values
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then null else 1337 end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
12835200 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test
|
||||
|
||||
# 5 null values per Vector, uses inverted uncompressed arrays
|
||||
statement ok
|
||||
with intermediates as (
|
||||
select i % 2048 as i
|
||||
from range(0, 10_000) t(i)
|
||||
)
|
||||
insert into test select case when
|
||||
i = 0 or
|
||||
i = 6 or
|
||||
i = 1000 or
|
||||
i = 1500 or
|
||||
i = 2000
|
||||
then null
|
||||
else 1337 end from intermediates;
|
||||
|
||||
statement ok
|
||||
checkpoint;
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
9976
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
13337912 1337 1337
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
36
external/duckdb/test/sql/storage/compression/roaring/roaring_inverted_run_simple.test
vendored
Normal file
36
external/duckdb/test/sql/storage/compression/roaring/roaring_inverted_run_simple.test
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_inverted_run_simple.test
|
||||
# description: Test bitpacking with nulls
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
# simple compression with few values
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
# Runs can't be inverted, this would be better off being inverted but the space saving is only 1 run at most.
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then 1337 else null end FROM range(0,10000) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
8062
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
10778894 1337 1337
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
72
external/duckdb/test/sql/storage/compression/roaring/roaring_run_simple.test
vendored
Normal file
72
external/duckdb/test/sql/storage/compression/roaring/roaring_run_simple.test
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_run_simple.test
|
||||
# description: Test bitpacking with nulls
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring'
|
||||
|
||||
# simple compression with few values
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
# 8 runs per Vector
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i = 0 or (i % 512 != 0 and (i % 512) < 350 or (i % 512) > 450) then null else 1337 end FROM range(0,10000) tbl(i);
|
||||
# runs:
|
||||
# (0,350)
|
||||
# (451,512)
|
||||
# (513,862)
|
||||
# (963,1024)
|
||||
# (1025,1374)
|
||||
# (1475,1536)
|
||||
# (1537,1886)
|
||||
# (1987,2048)
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VALIDITY' and compression != 'Roaring';
|
||||
----
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
1938
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
2591106 1337 1337
|
||||
|
||||
statement ok
|
||||
delete from test;
|
||||
|
||||
# 3 runs per Vector (uses uncompressed runs)
|
||||
statement ok
|
||||
with intermediates as (
|
||||
select i % 2048 as i
|
||||
from range(0, 10_000) t(i)
|
||||
)
|
||||
INSERT INTO test SELECT case when (i >= 0 and i < 110) or (i >= 1500 and i < 1800) or (i >= 2000) then null else 1337 end FROM intermediates;
|
||||
# (0,110)
|
||||
# (1500,1800)
|
||||
# (2000,2048)
|
||||
|
||||
statement ok
|
||||
checkpoint;
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
7758
|
||||
|
||||
query III
|
||||
select sum(a), min(a), max(a) from test;
|
||||
----
|
||||
10372446 1337 1337
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
39
external/duckdb/test/sql/storage/compression/roaring/roaring_smaller_than_vector.test
vendored
Normal file
39
external/duckdb/test/sql/storage/compression/roaring/roaring_smaller_than_vector.test
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# name: test/sql/storage/compression/roaring/roaring_smaller_than_vector.test
|
||||
# group: [roaring]
|
||||
|
||||
load __TEST_DIR__/test_roaring2.db readwrite v1.2.0
|
||||
|
||||
statement ok
|
||||
PRAGMA force_compression='roaring';
|
||||
|
||||
statement ok
|
||||
set checkpoint_threshold = '10mb';
|
||||
|
||||
# simple compression with few values
|
||||
statement ok
|
||||
CREATE TABLE test (a BIGINT);
|
||||
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then 1337 else null end FROM range(0,1025) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
41
|
||||
|
||||
statement ok
|
||||
INSERT INTO test SELECT case when i%25=0 then 1337 else null end FROM range(0,1025) tbl(i);
|
||||
|
||||
statement ok
|
||||
checkpoint;
|
||||
|
||||
query I
|
||||
select count(*) from test WHERE a IS NOT NULL;
|
||||
----
|
||||
82
|
||||
|
||||
statement ok
|
||||
DROP TABLE test;
|
||||
Reference in New Issue
Block a user