should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,335 @@
# name: test/sql/storage/compression/alp/alp_inf_null_nan.test
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp_nulls.db
foreach compression uncompressed alp
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl2_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl3_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity');
# Only NULLS
statement ok
insert into tbl2_${compression}(d,f) VALUES
(NULL, NULL),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
(0, 0),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL);
# Starting with NULLS
statement ok
insert into tbl3_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('NaN', 'NaN'),
('NaN', 'NaN'),
('NaN', 'NaN');
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alp;
----
#tbl2
query II nosort r2
select d, f from tbl2_uncompressed;
----
query II nosort r2
select d, f from tbl2_alp;
----
# tbl3
query II nosort r3
select d, f from tbl3_uncompressed;
----
query II nosort r3
select d, f from tbl3_alp;
----

View File

@@ -0,0 +1,127 @@
# name: test/sql/storage/compression/alp/alp_list_skip.test
# description: Test skipping of small lists in alp
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp_list_skip.db
foreach comp alp alprd
statement ok
SET force_compression='${comp}'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create or replace table list_doubles as select 5700 i, [5700.0] l UNION ALL select i, CASE WHEN i%128=0 THEN [i::DOUBLE] ELSE []::DOUBLE[] END as data from range(10000) tbl(i) union all select 5700, [i] FROM range(100) tbl(i);
statement ok
checkpoint
query II
SELECT * FROM list_doubles WHERE i=5700
----
5700 [5700.0]
5700 []
5700 [0.0]
5700 [1.0]
5700 [2.0]
5700 [3.0]
5700 [4.0]
5700 [5.0]
5700 [6.0]
5700 [7.0]
5700 [8.0]
5700 [9.0]
5700 [10.0]
5700 [11.0]
5700 [12.0]
5700 [13.0]
5700 [14.0]
5700 [15.0]
5700 [16.0]
5700 [17.0]
5700 [18.0]
5700 [19.0]
5700 [20.0]
5700 [21.0]
5700 [22.0]
5700 [23.0]
5700 [24.0]
5700 [25.0]
5700 [26.0]
5700 [27.0]
5700 [28.0]
5700 [29.0]
5700 [30.0]
5700 [31.0]
5700 [32.0]
5700 [33.0]
5700 [34.0]
5700 [35.0]
5700 [36.0]
5700 [37.0]
5700 [38.0]
5700 [39.0]
5700 [40.0]
5700 [41.0]
5700 [42.0]
5700 [43.0]
5700 [44.0]
5700 [45.0]
5700 [46.0]
5700 [47.0]
5700 [48.0]
5700 [49.0]
5700 [50.0]
5700 [51.0]
5700 [52.0]
5700 [53.0]
5700 [54.0]
5700 [55.0]
5700 [56.0]
5700 [57.0]
5700 [58.0]
5700 [59.0]
5700 [60.0]
5700 [61.0]
5700 [62.0]
5700 [63.0]
5700 [64.0]
5700 [65.0]
5700 [66.0]
5700 [67.0]
5700 [68.0]
5700 [69.0]
5700 [70.0]
5700 [71.0]
5700 [72.0]
5700 [73.0]
5700 [74.0]
5700 [75.0]
5700 [76.0]
5700 [77.0]
5700 [78.0]
5700 [79.0]
5700 [80.0]
5700 [81.0]
5700 [82.0]
5700 [83.0]
5700 [84.0]
5700 [85.0]
5700 [86.0]
5700 [87.0]
5700 [88.0]
5700 [89.0]
5700 [90.0]
5700 [91.0]
5700 [92.0]
5700 [93.0]
5700 [94.0]
5700 [95.0]
5700 [96.0]
5700 [97.0]
5700 [98.0]
5700 [99.0]
endloop

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_many_segments.test_slow
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select round(random(), 6)::DOUBLE as data from range(500000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_many_segments_float.test_slow
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random floats of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_float as select round(random(), 3)::FLOAT as data from range(500000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_float') WHERE segment_type == 'float' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_float as select * from random_float;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_float') WHERE segment_type == 'float' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_float;
----
query I sort r1
select * from random_alp_float;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_middle_flush.test_slow
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select round(random(), 6)::DOUBLE as data from range(110000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,36 @@
# name: test/sql/storage/compression/alp/alp_min_max.test
# group: [alp]
load __TEST_DIR__/alp_min_max.db
statement ok
PRAGMA enable_verification
statement ok
PRAGMA force_compression='alp';
foreach type DOUBLE FLOAT
statement ok
CREATE TABLE all_types AS SELECT ${type} FROM test_all_types();
loop i 0 15
statement ok
INSERT INTO all_types SELECT ${type} FROM all_types;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('all_types') WHERE segment_type == '${type}' AND compression != 'ALP';
----
# i
endloop
statement ok
DROP TABLE all_types;
#type
endloop

View File

@@ -0,0 +1,45 @@
# name: test/sql/storage/compression/alp/alp_negative_numbers.test
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select round(cos(1 / (random() + 0.001)), 5)::DOUBLE * -1 as data from range(1023) tbl(i);
insert into random_double values (-0.0::DOUBLE);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,342 @@
# name: test/sql/storage/compression/alp/alp_nulls.test
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp_nulls.db
foreach compression uncompressed alp
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl2_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl3_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, NULL);
# Only NULLS
statement ok
insert into tbl2_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL);
# Starting with NULLS
statement ok
insert into tbl3_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142);
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alp;
----
#tbl2
query II nosort r2
select d, f from tbl2_uncompressed;
----
query II nosort r2
select d, f from tbl2_alp;
----
# tbl3
query II nosort r3
select d, f from tbl3_uncompressed;
----
query II nosort r3
select d, f from tbl3_alp;
----

View File

@@ -0,0 +1,129 @@
# name: test/sql/storage/compression/alp/alp_nulls_simple.test
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp_nulls.db
foreach compression uncompressed alp
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, NULL);
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alp;
----

View File

@@ -0,0 +1,51 @@
# name: test/sql/storage/compression/alp/alp_read.test_slow
# group: [alp]
require parquet
require httpfs
load __TEST_DIR__/test_alp.db
statement ok
DROP TABLE IF EXISTS temperatures;
statement ok
pragma threads=1;
statement ok
PRAGMA force_compression='alp';
statement ok
CREATE TABLE temperatures (
temperature DOUBLE
);
statement ok
INSERT INTO temperatures SELECT temp from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(1);
statement ok
CHECKPOINT;
statement ok
create table reference_temperatures (
temperature DOUBLE
);
statement ok
PRAGMA force_compression='uncompressed';
statement ok
INSERT INTO reference_temperatures SELECT temp from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(1);
statement ok
checkpoint;
query I nosort r1
select temperature from reference_temperatures;
----
query I nosort r1
select temperature from temperatures
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_simple.test
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select round(random(), 6)::DOUBLE as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_simple_float.test
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random floats of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_float as select round(random(), 6)::FLOAT as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_float') WHERE segment_type == 'float' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_float as select * from random_float;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_float') WHERE segment_type == 'float' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_float;
----
query I sort r1
select * from random_alp_float;
----

View File

@@ -0,0 +1,55 @@
# name: test/sql/storage/compression/alp/alp_skip.test_slow
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
pragma enable_verification;
statement ok
pragma disable_optimizer;
statement ok
pragma force_compression='uncompressed'
# Create the data for the columns
statement ok
create table temp_table as select round(random(), 6)::DOUBLE * 100 as col, j from range(10240) tbl(j);
statement ok
checkpoint
foreach compression ALP Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Setup
statement ok
create table tbl_${compression} as select * from temp_table;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('tbl_${compression}') WHERE segment_type == 'double' AND compression != '${compression}';
----
# compression
endloop
loop i 1 1024
query II
select x as x_${i}, y as y_${i} from (
select
(select col from tbl_alp where (j > (${i} * 1024)) except select col from tbl_uncompressed where (j > (${i} * 1024))) as x,
(select col from tbl_uncompressed where (j > (${i} * 1024)) except select col from tbl_alp where (j > (${i} * 1024))) as y
);
----
NULL NULL
# i
endloop

View File

@@ -0,0 +1,94 @@
# name: test/sql/storage/compression/alp/alp_stress_test.test_slow
# group: [alp]
load __TEST_DIR__/alp_min_max.db
foreach type DOUBLE FLOAT
statement ok
pragma force_compression='uncompressed'
# Create the initial data to start with (without this Constant compression will be used at first)
statement ok
create table temp_table as select round(random(), 6)::${type} * 100 from range(5);
statement ok
checkpoint
foreach compression ALP Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Setup
statement ok
create table ${compression}_tbl(
data ${type}
);
statement ok
insert into ${compression}_tbl select * from temp_table;
statement ok
checkpoint
# compression
endloop
statement ok
drop table temp_table;
# ---------------- MAIN LOOP ----------------
loop i 0 10240
# Create a temporary table containing the value we want to add to both tables
statement ok
create table temp_table as select random()::${type} * 100 + ${i};
foreach compression ALP Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Insert a single value into the table
statement ok
insert into ${compression}_tbl select * from temp_table;
# Checkpoint the table with the newly added data
statement ok
checkpoint
# compression
endloop
statement ok
drop table temp_table;
# ---------------- VERIFY CORRECTNESS ----------------
query II
select x as x_${i}, y as y_${i} from (
select
(select data from alp_tbl except select data from uncompressed_tbl) as x,
(select data from uncompressed_tbl except select data from alp_tbl) as y
);
----
NULL NULL
# i
endloop
# Teardown
foreach compression alp uncompressed
statement ok
drop table ${compression}_tbl;
# compression
endloop
# type
endloop

View File

@@ -0,0 +1,753 @@
# name: test/sql/storage/compression/alp/alp_tpcds.test_slow
# group: [alp]
require tpcds
# load the DB from disk
load __TEST_DIR__/test_alp.db
#statement ok
#pragma threads=1
statement ok
PRAGMA force_compression='alp';
statement ok
call dsdgen(sf=1, suffix='_original');
statement ok
PRAGMA default_null_order='NULLS LAST'
# Test both DOUBLE and FLOAT
foreach type DOUBLE FLOAT
# Create tables
statement ok
CREATE TABLE web_site(
web_site_sk ${type},
web_site_id VARCHAR,
web_rec_start_date DATE,
web_rec_end_date DATE,
web_name VARCHAR,
web_open_date_sk ${type},
web_close_date_sk ${type},
web_class VARCHAR,
web_manager VARCHAR,
web_mkt_id ${type},
web_mkt_class VARCHAR,
web_mkt_desc VARCHAR,
web_market_manager VARCHAR,
web_company_id ${type},
web_company_name VARCHAR,
web_street_number VARCHAR,
web_street_name VARCHAR,
web_street_type VARCHAR,
web_suite_number VARCHAR,
web_city VARCHAR,
web_county VARCHAR,
web_state VARCHAR,
web_zip VARCHAR,
web_country VARCHAR,
web_gmt_offset ${type},
web_tax_percentage ${type}
);
statement ok
CREATE TABLE web_sales(
ws_sold_date_sk ${type},
ws_sold_time_sk ${type},
ws_ship_date_sk ${type},
ws_item_sk ${type},
ws_bill_customer_sk ${type},
ws_bill_cdemo_sk ${type},
ws_bill_hdemo_sk ${type},
ws_bill_addr_sk ${type},
ws_ship_customer_sk ${type},
ws_ship_cdemo_sk ${type},
ws_ship_hdemo_sk ${type},
ws_ship_addr_sk ${type},
ws_web_page_sk ${type},
ws_web_site_sk ${type},
ws_ship_mode_sk ${type},
ws_warehouse_sk ${type},
ws_promo_sk ${type},
ws_order_number ${type},
ws_quantity ${type},
ws_wholesale_cost ${type},
ws_list_price ${type},
ws_sales_price ${type},
ws_ext_discount_amt ${type},
ws_ext_sales_price ${type},
ws_ext_wholesale_cost ${type},
ws_ext_list_price ${type},
ws_ext_tax ${type},
ws_coupon_amt ${type},
ws_ext_ship_cost ${type},
ws_net_paid ${type},
ws_net_paid_inc_tax ${type},
ws_net_paid_inc_ship ${type},
ws_net_paid_inc_ship_tax ${type},
ws_net_profit ${type}
);
statement ok
CREATE TABLE web_returns(
wr_returned_date_sk ${type},
wr_returned_time_sk ${type},
wr_item_sk ${type},
wr_refunded_customer_sk ${type},
wr_refunded_cdemo_sk ${type},
wr_refunded_hdemo_sk ${type},
wr_refunded_addr_sk ${type},
wr_returning_customer_sk ${type},
wr_returning_cdemo_sk ${type},
wr_returning_hdemo_sk ${type},
wr_returning_addr_sk ${type},
wr_web_page_sk ${type},
wr_reason_sk ${type},
wr_order_number ${type},
wr_return_quantity ${type},
wr_return_amt ${type},
wr_return_tax ${type},
wr_return_amt_inc_tax ${type},
wr_fee ${type},
wr_return_ship_cost ${type},
wr_refunded_cash ${type},
wr_reversed_charge ${type},
wr_account_credit ${type},
wr_net_loss ${type}
);
statement ok
CREATE TABLE web_page(
wp_web_page_sk ${type},
wp_web_page_id VARCHAR,
wp_rec_start_date DATE,
wp_rec_end_date DATE,
wp_creation_date_sk ${type},
wp_access_date_sk ${type},
wp_autogen_flag VARCHAR,
wp_customer_sk ${type},
wp_url VARCHAR,
wp_type VARCHAR,
wp_char_count ${type},
wp_link_count ${type},
wp_image_count ${type},
wp_max_ad_count ${type}
);
statement ok
CREATE TABLE warehouse(
w_warehouse_sk ${type},
w_warehouse_id VARCHAR,
w_warehouse_name VARCHAR,
w_warehouse_sq_ft ${type},
w_street_number VARCHAR,
w_street_name VARCHAR,
w_street_type VARCHAR,
w_suite_number VARCHAR,
w_city VARCHAR,
w_county VARCHAR,
w_state VARCHAR,
w_zip VARCHAR,
w_country VARCHAR,
w_gmt_offset ${type}
);
statement ok
CREATE TABLE time_dim(
t_time_sk ${type},
t_time_id VARCHAR,
t_time ${type},
t_hour ${type},
t_minute ${type},
t_second ${type},
t_am_pm VARCHAR,
t_shift VARCHAR,
t_sub_shift VARCHAR,
t_meal_time VARCHAR
);
statement ok
CREATE TABLE store_sales(
ss_sold_date_sk ${type},
ss_sold_time_sk ${type},
ss_item_sk ${type},
ss_customer_sk ${type},
ss_cdemo_sk ${type},
ss_hdemo_sk ${type},
ss_addr_sk ${type},
ss_store_sk ${type},
ss_promo_sk ${type},
ss_ticket_number ${type},
ss_quantity ${type},
ss_wholesale_cost ${type},
ss_list_price ${type},
ss_sales_price ${type},
ss_ext_discount_amt ${type},
ss_ext_sales_price ${type},
ss_ext_wholesale_cost ${type},
ss_ext_list_price ${type},
ss_ext_tax ${type},
ss_coupon_amt ${type},
ss_net_paid ${type},
ss_net_paid_inc_tax ${type},
ss_net_profit ${type}
);
statement ok
CREATE TABLE store_returns(
sr_returned_date_sk ${type},
sr_return_time_sk ${type},
sr_item_sk ${type},
sr_customer_sk ${type},
sr_cdemo_sk ${type},
sr_hdemo_sk ${type},
sr_addr_sk ${type},
sr_store_sk ${type},
sr_reason_sk ${type},
sr_ticket_number ${type},
sr_return_quantity ${type},
sr_return_amt ${type},
sr_return_tax ${type},
sr_return_amt_inc_tax ${type},
sr_fee ${type},
sr_return_ship_cost ${type},
sr_refunded_cash ${type},
sr_reversed_charge ${type},
sr_store_credit ${type},
sr_net_loss ${type}
);
statement ok
CREATE TABLE store(
s_store_sk ${type},
s_store_id VARCHAR,
s_rec_start_date DATE,
s_rec_end_date DATE,
s_closed_date_sk ${type},
s_store_name VARCHAR,
s_number_employees ${type},
s_floor_space ${type},
s_hours VARCHAR,
s_manager VARCHAR,
s_market_id ${type},
s_geography_class VARCHAR,
s_market_desc VARCHAR,
s_market_manager VARCHAR,
s_division_id ${type},
s_division_name VARCHAR,
s_company_id ${type},
s_company_name VARCHAR,
s_street_number VARCHAR,
s_street_name VARCHAR,
s_street_type VARCHAR,
s_suite_number VARCHAR,
s_city VARCHAR,
s_county VARCHAR,
s_state VARCHAR,
s_zip VARCHAR,
s_country VARCHAR,
s_gmt_offset ${type},
s_tax_percentage ${type}
);
statement ok
CREATE TABLE ship_mode(
sm_ship_mode_sk ${type},
sm_ship_mode_id VARCHAR,
sm_type VARCHAR,
sm_code VARCHAR,
sm_carrier VARCHAR,
sm_contract VARCHAR
);
statement ok
CREATE TABLE reason(
r_reason_sk ${type},
r_reason_id VARCHAR,
r_reason_desc VARCHAR
);
statement ok
CREATE TABLE promotion(
p_promo_sk ${type},
p_promo_id VARCHAR,
p_start_date_sk ${type},
p_end_date_sk ${type},
p_item_sk ${type},
p_cost ${type},
p_response_target ${type},
p_promo_name VARCHAR,
p_channel_dmail VARCHAR,
p_channel_email VARCHAR,
p_channel_catalog VARCHAR,
p_channel_tv VARCHAR,
p_channel_radio VARCHAR,
p_channel_press VARCHAR,
p_channel_event VARCHAR,
p_channel_demo VARCHAR,
p_channel_details VARCHAR,
p_purpose VARCHAR,
p_discount_active VARCHAR
);
statement ok
CREATE TABLE item(
i_item_sk ${type},
i_item_id VARCHAR,
i_rec_start_date DATE,
i_rec_end_date DATE,
i_item_desc VARCHAR,
i_current_price ${type},
i_wholesale_cost ${type},
i_brand_id ${type},
i_brand VARCHAR,
i_class_id ${type},
i_class VARCHAR,
i_category_id ${type},
i_category VARCHAR,
i_manufact_id ${type},
i_manufact VARCHAR,
i_size VARCHAR,
i_formulation VARCHAR,
i_color VARCHAR,
i_units VARCHAR,
i_container VARCHAR,
i_manager_id ${type},
i_product_name VARCHAR
);
statement ok
CREATE TABLE inventory(
inv_date_sk ${type},
inv_item_sk ${type},
inv_warehouse_sk ${type},
inv_quantity_on_hand ${type}
);
statement ok
CREATE TABLE income_band(
ib_income_band_sk ${type},
ib_lower_bound ${type},
ib_upper_bound ${type}
);
statement ok
CREATE TABLE household_demographics(
hd_demo_sk ${type},
hd_income_band_sk ${type},
hd_buy_potential VARCHAR,
hd_dep_count ${type},
hd_vehicle_count ${type}
);
statement ok
CREATE TABLE date_dim(
d_date_sk ${type},
d_date_id VARCHAR,
d_date DATE,
d_month_seq ${type},
d_week_seq ${type},
d_quarter_seq ${type},
d_year ${type},
d_dow ${type},
d_moy ${type},
d_dom ${type},
d_qoy ${type},
d_fy_year ${type},
d_fy_quarter_seq ${type},
d_fy_week_seq ${type},
d_day_name VARCHAR,
d_quarter_name VARCHAR,
d_holiday VARCHAR,
d_weekend VARCHAR,
d_following_holiday VARCHAR,
d_first_dom ${type},
d_last_dom ${type},
d_same_day_ly ${type},
d_same_day_lq ${type},
d_current_day VARCHAR,
d_current_week VARCHAR,
d_current_month VARCHAR,
d_current_quarter VARCHAR,
d_current_year VARCHAR
);
statement ok
CREATE TABLE customer_demographics(
cd_demo_sk ${type},
cd_gender VARCHAR,
cd_marital_status VARCHAR,
cd_education_status VARCHAR,
cd_purchase_estimate ${type},
cd_credit_rating VARCHAR,
cd_dep_count ${type},
cd_dep_employed_count ${type},
cd_dep_college_count ${type}
);
statement ok
CREATE TABLE customer_address(
ca_address_sk ${type},
ca_address_id VARCHAR,
ca_street_number VARCHAR,
ca_street_name VARCHAR,
ca_street_type VARCHAR,
ca_suite_number VARCHAR,
ca_city VARCHAR,
ca_county VARCHAR,
ca_state VARCHAR,
ca_zip VARCHAR,
ca_country VARCHAR,
ca_gmt_offset ${type},
ca_location_type VARCHAR
);
statement ok
CREATE TABLE customer(
c_customer_sk ${type},
c_customer_id VARCHAR,
c_current_cdemo_sk ${type},
c_current_hdemo_sk ${type},
c_current_addr_sk ${type},
c_first_shipto_date_sk ${type},
c_first_sales_date_sk ${type},
c_salutation VARCHAR,
c_first_name VARCHAR,
c_last_name VARCHAR,
c_preferred_cust_flag VARCHAR,
c_birth_day ${type},
c_birth_month ${type},
c_birth_year ${type},
c_birth_country VARCHAR,
c_login VARCHAR,
c_email_address VARCHAR,
c_last_review_date_sk ${type}
);
statement ok
CREATE TABLE catalog_sales(
cs_sold_date_sk ${type},
cs_sold_time_sk ${type},
cs_ship_date_sk ${type},
cs_bill_customer_sk ${type},
cs_bill_cdemo_sk ${type},
cs_bill_hdemo_sk ${type},
cs_bill_addr_sk ${type},
cs_ship_customer_sk ${type},
cs_ship_cdemo_sk ${type},
cs_ship_hdemo_sk ${type},
cs_ship_addr_sk ${type},
cs_call_center_sk ${type},
cs_catalog_page_sk ${type},
cs_ship_mode_sk ${type},
cs_warehouse_sk ${type},
cs_item_sk ${type},
cs_promo_sk ${type},
cs_order_number ${type},
cs_quantity ${type},
cs_wholesale_cost ${type},
cs_list_price ${type},
cs_sales_price ${type},
cs_ext_discount_amt ${type},
cs_ext_sales_price ${type},
cs_ext_wholesale_cost ${type},
cs_ext_list_price ${type},
cs_ext_tax ${type},
cs_coupon_amt ${type},
cs_ext_ship_cost ${type},
cs_net_paid ${type},
cs_net_paid_inc_tax ${type},
cs_net_paid_inc_ship ${type},
cs_net_paid_inc_ship_tax ${type},
cs_net_profit ${type}
);
statement ok
CREATE TABLE catalog_returns(
cr_returned_date_sk ${type},
cr_returned_time_sk ${type},
cr_item_sk ${type},
cr_refunded_customer_sk ${type},
cr_refunded_cdemo_sk ${type},
cr_refunded_hdemo_sk ${type},
cr_refunded_addr_sk ${type},
cr_returning_customer_sk ${type},
cr_returning_cdemo_sk ${type},
cr_returning_hdemo_sk ${type},
cr_returning_addr_sk ${type},
cr_call_center_sk ${type},
cr_catalog_page_sk ${type},
cr_ship_mode_sk ${type},
cr_warehouse_sk ${type},
cr_reason_sk ${type},
cr_order_number ${type},
cr_return_quantity ${type},
cr_return_amount ${type},
cr_return_tax ${type},
cr_return_amt_inc_tax ${type},
cr_fee ${type},
cr_return_ship_cost ${type},
cr_refunded_cash ${type},
cr_reversed_charge ${type},
cr_store_credit ${type},
cr_net_loss ${type}
);
statement ok
CREATE TABLE catalog_page(
cp_catalog_page_sk ${type},
cp_catalog_page_id VARCHAR,
cp_start_date_sk ${type},
cp_end_date_sk ${type},
cp_department VARCHAR,
cp_catalog_number ${type},
cp_catalog_page_number ${type},
cp_description VARCHAR,
cp_type VARCHAR
);
statement ok
CREATE TABLE call_center(
cc_call_center_sk ${type},
cc_call_center_id VARCHAR,
cc_rec_start_date DATE,
cc_rec_end_date DATE,
cc_closed_date_sk ${type},
cc_open_date_sk ${type},
cc_name VARCHAR,
cc_class VARCHAR,
cc_employees ${type},
cc_sq_ft ${type},
cc_hours VARCHAR,
cc_manager VARCHAR,
cc_mkt_id ${type},
cc_mkt_class VARCHAR,
cc_mkt_desc VARCHAR,
cc_market_manager VARCHAR,
cc_division ${type},
cc_division_name VARCHAR,
cc_company ${type},
cc_company_name VARCHAR,
cc_street_number VARCHAR,
cc_street_name VARCHAR,
cc_street_type VARCHAR,
cc_suite_number VARCHAR,
cc_city VARCHAR,
cc_county VARCHAR,
cc_state VARCHAR,
cc_zip VARCHAR,
cc_country VARCHAR,
cc_gmt_offset ${type},
cc_tax_percentage ${type}
);
# Populate tables
statement ok
insert into web_site select * from web_site_original;
statement ok
insert into web_sales select * from web_sales_original;
statement ok
insert into web_returns select * from web_returns_original;
statement ok
insert into web_page select * from web_page_original;
statement ok
insert into warehouse select * from warehouse_original;
statement ok
insert into time_dim select * from time_dim_original;
statement ok
insert into store_sales select * from store_sales_original;
statement ok
insert into store_returns select * from store_returns_original;
statement ok
insert into store select * from store_original;
statement ok
insert into ship_mode select * from ship_mode_original;
statement ok
insert into reason select * from reason_original;
statement ok
insert into promotion select * from promotion_original;
statement ok
insert into item select * from item_original;
statement ok
insert into inventory select * from inventory_original;
statement ok
insert into income_band select * from income_band_original;
statement ok
insert into household_demographics select * from household_demographics_original;
statement ok
insert into date_dim select * from date_dim_original;
statement ok
insert into customer_demographics select * from customer_demographics_original;
statement ok
insert into customer_address select * from customer_address_original;
statement ok
insert into customer select * from customer_original;
statement ok
insert into catalog_sales select * from catalog_sales_original;
statement ok
insert into catalog_returns select * from catalog_returns_original;
statement ok
insert into catalog_page select * from catalog_page_original;
statement ok
insert into call_center select * from call_center_original;
# Checkpoint to compress the data
statement ok
checkpoint
# And verify that no other compression is used
foreach tbl web_site web_sales web_returns web_page warehouse time_dim store_sales store_returns store ship_mode reason promotion item inventory income_band household_demographics date_dim customer_demographics customer_address customer catalog_sales catalog_returns catalog_page call_center
# Cant turn off the creation of constant segments, so we have to just accept that some of the segments are Constant
query I
SELECT compression FROM pragma_storage_info('${tbl}') WHERE segment_type == '${type}' AND compression != 'ALP' AND compression != 'Constant';
----
endloop
# Run the tpcds queries
loop i 1 9
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/0${i}.csv
endloop
loop i 10 49
#Skip tpcds 49 because it doesn't work without decimals
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
# skip tpcds 67 - inconsistent without decimals
loop i 50 66
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
loop i 68 99
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
# Drop tables
statement ok
DROP TABLE web_site;
statement ok
DROP TABLE web_sales;
statement ok
DROP TABLE web_returns;
statement ok
DROP TABLE web_page;
statement ok
DROP TABLE warehouse;
statement ok
DROP TABLE time_dim;
statement ok
DROP TABLE store_sales;
statement ok
DROP TABLE store_returns;
statement ok
DROP TABLE store;
statement ok
DROP TABLE ship_mode;
statement ok
DROP TABLE reason;
statement ok
DROP TABLE promotion;
statement ok
DROP TABLE item;
statement ok
DROP TABLE inventory;
statement ok
DROP TABLE income_band;
statement ok
DROP TABLE household_demographics;
statement ok
DROP TABLE date_dim;
statement ok
DROP TABLE customer_demographics;
statement ok
DROP TABLE customer_address;
statement ok
DROP TABLE customer;
statement ok
DROP TABLE catalog_sales;
statement ok
DROP TABLE catalog_returns;
statement ok
DROP TABLE catalog_page;
statement ok
DROP TABLE call_center;
endloop

View File

@@ -0,0 +1,247 @@
# name: test/sql/storage/compression/alp/alp_tpch.test_slow
# group: [alp]
require tpch
# load the DB from disk
load __TEST_DIR__/test_alp.db
# This needs to be single-threaded to be consistent (because of floating point issues)
statement ok
pragma threads=1
statement ok
PRAGMA force_compression='alp';
statement ok
call dbgen(sf=1, suffix='_original');
# Test both DOUBLE and FLOAT
foreach type DOUBLE FLOAT
# Create tables
statement ok
CREATE TABLE lineitem(
l_orderkey ${type} NOT NULL,
l_partkey ${type} NOT NULL,
l_suppkey ${type} NOT NULL,
l_linenumber ${type} NOT NULL,
l_quantity ${type} NOT NULL,
l_extendedprice ${type} NOT NULL,
l_discount ${type} NOT NULL,
l_tax ${type} NOT NULL,
l_returnflag VARCHAR NOT NULL,
l_linestatus VARCHAR NOT NULL,
l_shipdate DATE NOT NULL,
l_commitdate DATE NOT NULL,
l_receiptdate DATE NOT NULL,
l_shipinstruct VARCHAR NOT NULL,
l_shipmode VARCHAR NOT NULL,
l_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE orders(
o_orderkey ${type} NOT NULL,
o_custkey ${type} NOT NULL,
o_orderstatus VARCHAR NOT NULL,
o_totalprice ${type} NOT NULL,
o_orderdate DATE NOT NULL,
o_orderpriority VARCHAR NOT NULL,
o_clerk VARCHAR NOT NULL,
o_shippriority ${type} NOT NULL,
o_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE partsupp(
ps_partkey ${type} NOT NULL,
ps_suppkey ${type} NOT NULL,
ps_availqty ${type} NOT NULL,
ps_supplycost ${type} NOT NULL,
ps_comment VARCHAR NOT NULL
);
# 'p_partkey' being INTEGER is imperative to TPCH(17)
statement ok
CREATE TABLE part(
p_partkey INTEGER NOT NULL,
p_name VARCHAR NOT NULL,
p_mfgr VARCHAR NOT NULL,
p_brand VARCHAR NOT NULL,
p_type VARCHAR NOT NULL,
p_size ${type} NOT NULL,
p_container VARCHAR NOT NULL,
p_retailprice ${type} NOT NULL,
p_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE customer(
c_custkey ${type} NOT NULL,
c_name VARCHAR NOT NULL,
c_address VARCHAR NOT NULL,
c_nationkey ${type} NOT NULL,
c_phone VARCHAR NOT NULL,
c_acctbal ${type} NOT NULL,
c_mktsegment VARCHAR NOT NULL,
c_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE supplier(
s_suppkey ${type} NOT NULL,
s_name VARCHAR NOT NULL,
s_address VARCHAR NOT NULL,
s_nationkey ${type} NOT NULL,
s_phone VARCHAR NOT NULL,
s_acctbal ${type} NOT NULL,
s_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE nation(
n_nationkey ${type} NOT NULL,
n_name VARCHAR NOT NULL,
n_regionkey ${type} NOT NULL,
n_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE region(
r_regionkey ${type} NOT NULL,
r_name VARCHAR NOT NULL,
r_comment VARCHAR NOT NULL
);
# Populate tables
statement ok
insert into lineitem select * from lineitem_original;
statement ok
insert into orders select * from orders_original;
statement ok
insert into partsupp select * from partsupp_original;
statement ok
insert into part select * from part_original;
statement ok
insert into customer select * from customer_original;
statement ok
insert into supplier select * from supplier_original;
statement ok
insert into nation select * from nation_original;
statement ok
insert into region select * from region_original;
# Checkpoint to compress the data
statement ok
checkpoint
# Run the tpch queries
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
endloop
loop i 10 15
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop
#TPCH 15 - 'sum' replaced with 'kahan_sum'
query I
SELECT
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
FROM
supplier,
(
SELECT
l_suppkey AS supplier_no,
kahan_sum(l_extendedprice * (1 - l_discount)) AS total_revenue
FROM
lineitem
WHERE
l_shipdate >= CAST('1996-01-01' AS date)
AND l_shipdate < CAST('1996-04-01' AS date)
GROUP BY
supplier_no) revenue0
WHERE
s_suppkey = supplier_no
AND total_revenue = (
SELECT
max(total_revenue)
FROM (
SELECT
l_suppkey AS supplier_no,
kahan_sum(l_extendedprice * (1 - l_discount)) AS total_revenue
FROM
lineitem
WHERE
l_shipdate >= CAST('1996-01-01' AS date)
AND l_shipdate < CAST('1996-04-01' AS date)
GROUP BY
supplier_no) revenue1)
ORDER BY
s_suppkey;
----
<FILE>:extension/tpch/dbgen/answers/sf1/q15.csv
loop i 16 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop
# Drop tables
statement ok
DROP TABLE lineitem;
statement ok
DROP TABLE orders;
statement ok
DROP TABLE partsupp;
statement ok
DROP TABLE part;
statement ok
DROP TABLE customer;
statement ok
DROP TABLE supplier;
statement ok
DROP TABLE nation;
statement ok
DROP TABLE region;
endloop

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alp/alp_zeros.test
# description: Test storage of alp, but simple
# group: [alp]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select 0::DOUBLE as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alp'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALP';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,335 @@
# name: test/sql/storage/compression/alprd/alprd_inf_null_nan.test
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd_inf.db
foreach compression uncompressed alprd
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl2_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl3_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity');
# Only NULLS
statement ok
insert into tbl2_${compression}(d,f) VALUES
(NULL, NULL),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', 'Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
('-Infinity', '-Infinity'),
(0, 0),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL);
# Starting with NULLS
statement ok
insert into tbl3_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', '-Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('-Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('Infinity', 'Infinity'),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
('NaN', 'NaN'),
('NaN', 'NaN'),
('NaN', 'NaN');
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alprd;
----
#tbl2
query II nosort r2
select d, f from tbl2_uncompressed;
----
query II nosort r2
select d, f from tbl2_alprd;
----
# tbl3
query II nosort r3
select d, f from tbl3_uncompressed;
----
query II nosort r3
select d, f from tbl3_alprd;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alprd/alprd_many_segments.test_slow
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select random()::DOUBLE as data from range(500000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alprd/alprd_many_segments_float.test_slow
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random floats of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_float as select random()::FLOAT as data from range(500000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_float') WHERE segment_type == 'float' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALPRD instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_float as select * from random_float;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_float') WHERE segment_type == 'float' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_float;
----
query I sort r1
select * from random_alp_float;
----

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alprd/alprd_middle_flush.test_slow
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select random()::DOUBLE as data from range(110000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALPRD instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,37 @@
# name: test/sql/storage/compression/alprd/alprd_min_max.test
# group: [alprd]
load __TEST_DIR__/alprd_min_max.db
statement ok
PRAGMA enable_verification
statement ok
PRAGMA force_compression='alprd';
foreach type DOUBLE FLOAT
statement ok
CREATE TABLE all_types AS SELECT ${type} FROM test_all_types();
loop i 0 15
statement ok
INSERT INTO all_types SELECT ${type} FROM all_types;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('all_types') WHERE segment_type == '${type}' AND compression != 'ALPRD';
----
# i
endloop
statement ok
DROP TABLE all_types;
#type
endloop

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/compression/alprd/alprd_negative_numbers.test
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
# This data should achieve x6 compression ratio
statement ok
create table random_double as select round(cos(1 / (random() + 0.001)), 15)::DOUBLE * -1 as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,343 @@
# name: test/sql/storage/compression/alprd/alprd_nulls.test
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd_nulls.db
foreach compression uncompressed alprd
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl2_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
statement ok
create table tbl3_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, NULL);
# Only NULLS
statement ok
insert into tbl2_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL);
# Starting with NULLS
statement ok
insert into tbl3_${compression}(d,f) VALUES
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(NULL, NULL),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142),
(7034.34968234, 93472948.98034),
(1.213123, 1.232142);
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alprd;
----
#tbl2
query II nosort r2
select d, f from tbl2_uncompressed;
----
query II nosort r2
select d, f from tbl2_alprd;
----
# tbl3
query II nosort r3
select d, f from tbl3_uncompressed;
----
query II nosort r3
select d, f from tbl3_alprd;
----

View File

@@ -0,0 +1,129 @@
# name: test/sql/storage/compression/alprd/alprd_nulls_simple.test
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd_nulls.db
foreach compression uncompressed alprd
# Set the compression algorithm
statement ok
pragma force_compression='${compression}'
# Create tables
statement ok
create table tbl1_${compression}(
a INTEGER DEFAULT 5,
b VARCHAR DEFAULT 'test',
c BOOL DEFAULT false,
d DOUBLE,
e TEXT default 'null',
f FLOAT
);
# Populate tables
# Mixed NULLs
statement ok
insert into tbl1_${compression}(d,f) VALUES
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, 1.2314234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(324213.23123, NULL),
(NULL, NULL),
(21312.23412, 12.123234),
(NULL, NULL);
# Force a checkpoint
statement ok
checkpoint
endloop
# Assert that the scanned results are the same
#tbl1
query II nosort r1
select d, f from tbl1_uncompressed;
----
query II nosort r1
select d, f from tbl1_alprd;
----

View File

@@ -0,0 +1,51 @@
# name: test/sql/storage/compression/alprd/alprd_read.test_slow
# group: [alprd]
require parquet
require httpfs
load __TEST_DIR__/test_alprd.db
statement ok
DROP TABLE IF EXISTS temperatures;
statement ok
pragma threads=1;
statement ok
PRAGMA force_compression='alprd';
statement ok
CREATE TABLE temperatures (
temperature DOUBLE
);
statement ok
INSERT INTO temperatures SELECT temp from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(1);
statement ok
CHECKPOINT;
statement ok
create table reference_temperatures (
temperature DOUBLE
);
statement ok
PRAGMA force_compression='uncompressed';
statement ok
INSERT INTO reference_temperatures SELECT temp from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/city_temperature.parquet' t(temp), range(1);
statement ok
checkpoint;
query I nosort r1
select temperature from reference_temperatures;
----
query I nosort r1
select temperature from temperatures
----

View File

@@ -0,0 +1,43 @@
# name: test/sql/storage/compression/alprd/alprd_simple.test
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
statement ok
create table random_double as select random()::DOUBLE as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALPRD instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alprd_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alprd_double') WHERE segment_type == 'double' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALPRD
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alprd_double;
----

View File

@@ -0,0 +1,43 @@
# name: test/sql/storage/compression/alprd/alprd_simple_float.test
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alp.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random floats of limited precision compressed as Uncompressed
statement ok
create table random_float as select random()::FLOAT as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_float') WHERE segment_type == 'float' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_float as select * from random_float;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_float') WHERE segment_type == 'float' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_float;
----
query I sort r1
select * from random_alp_float;
----

View File

@@ -0,0 +1,55 @@
# name: test/sql/storage/compression/alprd/alprd_skip.test_slow
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
pragma enable_verification;
statement ok
pragma disable_optimizer;
statement ok
pragma force_compression='uncompressed'
# Create the data for the columns
statement ok
create table temp_table as select random()::DOUBLE * 100 as col, j from range(10240) tbl(j);
statement ok
checkpoint
foreach compression ALPRD Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Setup
statement ok
create table tbl_${compression} as select * from temp_table;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('tbl_${compression}') WHERE segment_type == 'double' AND compression != '${compression}';
----
# compression
endloop
loop i 1 1024
query II
select x as x_${i}, y as y_${i} from (
select
(select col from tbl_alprd where (j > (${i} * 1024)) except select col from tbl_uncompressed where (j > (${i} * 1024))) as x,
(select col from tbl_uncompressed where (j > (${i} * 1024)) except select col from tbl_alprd where (j > (${i} * 1024))) as y
);
----
NULL NULL
# i
endloop

View File

@@ -0,0 +1,94 @@
# name: test/sql/storage/compression/alprd/alprd_stress_test.test_slow
# group: [alprd]
load __TEST_DIR__/alprd_min_max.db
foreach type DOUBLE FLOAT
statement ok
pragma force_compression='uncompressed'
# Create the initial data to start with (without this Constant compression will be used at first)
statement ok
create table temp_table as select random()::${type} * 100 from range(5);
statement ok
checkpoint
foreach compression ALPRD Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Setup
statement ok
create table ${compression}_tbl(
data ${type}
);
statement ok
insert into ${compression}_tbl select * from temp_table;
statement ok
checkpoint
# compression
endloop
statement ok
drop table temp_table;
# ---------------- MAIN LOOP ----------------
loop i 0 10240
# Create a temporary table containing the value we want to add to both tables
statement ok
create table temp_table as select random()::${type} * 100 + ${i};
foreach compression ALPRD Uncompressed
# Ensure the correct compression is used
statement ok
pragma force_compression='${compression}'
# Insert a single value into the table
statement ok
insert into ${compression}_tbl select * from temp_table;
# Checkpoint the table with the newly added data
statement ok
checkpoint
# compression
endloop
statement ok
drop table temp_table;
# ---------------- VERIFY CORRECTNESS ----------------
query II
select x as x_${i}, y as y_${i} from (
select
(select data from alprd_tbl except select data from uncompressed_tbl) as x,
(select data from uncompressed_tbl except select data from alprd_tbl) as y
);
----
NULL NULL
# i
endloop
# Teardown
foreach compression alprd uncompressed
statement ok
drop table ${compression}_tbl;
# compression
endloop
# type
endloop

View File

@@ -0,0 +1,753 @@
# name: test/sql/storage/compression/alprd/alprd_tpcds.test_slow
# group: [alprd]
require tpcds
# load the DB from disk
load __TEST_DIR__/test_alprd.db
#statement ok
#pragma threads=1
statement ok
PRAGMA force_compression='alprd';
statement ok
call dsdgen(sf=1, suffix='_original');
statement ok
PRAGMA default_null_order='NULLS LAST'
# Test both DOUBLE and FLOAT
foreach type DOUBLE FLOAT
# Create tables
statement ok
CREATE TABLE web_site(
web_site_sk ${type},
web_site_id VARCHAR,
web_rec_start_date DATE,
web_rec_end_date DATE,
web_name VARCHAR,
web_open_date_sk ${type},
web_close_date_sk ${type},
web_class VARCHAR,
web_manager VARCHAR,
web_mkt_id ${type},
web_mkt_class VARCHAR,
web_mkt_desc VARCHAR,
web_market_manager VARCHAR,
web_company_id ${type},
web_company_name VARCHAR,
web_street_number VARCHAR,
web_street_name VARCHAR,
web_street_type VARCHAR,
web_suite_number VARCHAR,
web_city VARCHAR,
web_county VARCHAR,
web_state VARCHAR,
web_zip VARCHAR,
web_country VARCHAR,
web_gmt_offset ${type},
web_tax_percentage ${type}
);
statement ok
CREATE TABLE web_sales(
ws_sold_date_sk ${type},
ws_sold_time_sk ${type},
ws_ship_date_sk ${type},
ws_item_sk ${type},
ws_bill_customer_sk ${type},
ws_bill_cdemo_sk ${type},
ws_bill_hdemo_sk ${type},
ws_bill_addr_sk ${type},
ws_ship_customer_sk ${type},
ws_ship_cdemo_sk ${type},
ws_ship_hdemo_sk ${type},
ws_ship_addr_sk ${type},
ws_web_page_sk ${type},
ws_web_site_sk ${type},
ws_ship_mode_sk ${type},
ws_warehouse_sk ${type},
ws_promo_sk ${type},
ws_order_number ${type},
ws_quantity ${type},
ws_wholesale_cost ${type},
ws_list_price ${type},
ws_sales_price ${type},
ws_ext_discount_amt ${type},
ws_ext_sales_price ${type},
ws_ext_wholesale_cost ${type},
ws_ext_list_price ${type},
ws_ext_tax ${type},
ws_coupon_amt ${type},
ws_ext_ship_cost ${type},
ws_net_paid ${type},
ws_net_paid_inc_tax ${type},
ws_net_paid_inc_ship ${type},
ws_net_paid_inc_ship_tax ${type},
ws_net_profit ${type}
);
statement ok
CREATE TABLE web_returns(
wr_returned_date_sk ${type},
wr_returned_time_sk ${type},
wr_item_sk ${type},
wr_refunded_customer_sk ${type},
wr_refunded_cdemo_sk ${type},
wr_refunded_hdemo_sk ${type},
wr_refunded_addr_sk ${type},
wr_returning_customer_sk ${type},
wr_returning_cdemo_sk ${type},
wr_returning_hdemo_sk ${type},
wr_returning_addr_sk ${type},
wr_web_page_sk ${type},
wr_reason_sk ${type},
wr_order_number ${type},
wr_return_quantity ${type},
wr_return_amt ${type},
wr_return_tax ${type},
wr_return_amt_inc_tax ${type},
wr_fee ${type},
wr_return_ship_cost ${type},
wr_refunded_cash ${type},
wr_reversed_charge ${type},
wr_account_credit ${type},
wr_net_loss ${type}
);
statement ok
CREATE TABLE web_page(
wp_web_page_sk ${type},
wp_web_page_id VARCHAR,
wp_rec_start_date DATE,
wp_rec_end_date DATE,
wp_creation_date_sk ${type},
wp_access_date_sk ${type},
wp_autogen_flag VARCHAR,
wp_customer_sk ${type},
wp_url VARCHAR,
wp_type VARCHAR,
wp_char_count ${type},
wp_link_count ${type},
wp_image_count ${type},
wp_max_ad_count ${type}
);
statement ok
CREATE TABLE warehouse(
w_warehouse_sk ${type},
w_warehouse_id VARCHAR,
w_warehouse_name VARCHAR,
w_warehouse_sq_ft ${type},
w_street_number VARCHAR,
w_street_name VARCHAR,
w_street_type VARCHAR,
w_suite_number VARCHAR,
w_city VARCHAR,
w_county VARCHAR,
w_state VARCHAR,
w_zip VARCHAR,
w_country VARCHAR,
w_gmt_offset ${type}
);
statement ok
CREATE TABLE time_dim(
t_time_sk ${type},
t_time_id VARCHAR,
t_time ${type},
t_hour ${type},
t_minute ${type},
t_second ${type},
t_am_pm VARCHAR,
t_shift VARCHAR,
t_sub_shift VARCHAR,
t_meal_time VARCHAR
);
statement ok
CREATE TABLE store_sales(
ss_sold_date_sk ${type},
ss_sold_time_sk ${type},
ss_item_sk ${type},
ss_customer_sk ${type},
ss_cdemo_sk ${type},
ss_hdemo_sk ${type},
ss_addr_sk ${type},
ss_store_sk ${type},
ss_promo_sk ${type},
ss_ticket_number ${type},
ss_quantity ${type},
ss_wholesale_cost ${type},
ss_list_price ${type},
ss_sales_price ${type},
ss_ext_discount_amt ${type},
ss_ext_sales_price ${type},
ss_ext_wholesale_cost ${type},
ss_ext_list_price ${type},
ss_ext_tax ${type},
ss_coupon_amt ${type},
ss_net_paid ${type},
ss_net_paid_inc_tax ${type},
ss_net_profit ${type}
);
statement ok
CREATE TABLE store_returns(
sr_returned_date_sk ${type},
sr_return_time_sk ${type},
sr_item_sk ${type},
sr_customer_sk ${type},
sr_cdemo_sk ${type},
sr_hdemo_sk ${type},
sr_addr_sk ${type},
sr_store_sk ${type},
sr_reason_sk ${type},
sr_ticket_number ${type},
sr_return_quantity ${type},
sr_return_amt ${type},
sr_return_tax ${type},
sr_return_amt_inc_tax ${type},
sr_fee ${type},
sr_return_ship_cost ${type},
sr_refunded_cash ${type},
sr_reversed_charge ${type},
sr_store_credit ${type},
sr_net_loss ${type}
);
statement ok
CREATE TABLE store(
s_store_sk ${type},
s_store_id VARCHAR,
s_rec_start_date DATE,
s_rec_end_date DATE,
s_closed_date_sk ${type},
s_store_name VARCHAR,
s_number_employees ${type},
s_floor_space ${type},
s_hours VARCHAR,
s_manager VARCHAR,
s_market_id ${type},
s_geography_class VARCHAR,
s_market_desc VARCHAR,
s_market_manager VARCHAR,
s_division_id ${type},
s_division_name VARCHAR,
s_company_id ${type},
s_company_name VARCHAR,
s_street_number VARCHAR,
s_street_name VARCHAR,
s_street_type VARCHAR,
s_suite_number VARCHAR,
s_city VARCHAR,
s_county VARCHAR,
s_state VARCHAR,
s_zip VARCHAR,
s_country VARCHAR,
s_gmt_offset ${type},
s_tax_percentage ${type}
);
statement ok
CREATE TABLE ship_mode(
sm_ship_mode_sk ${type},
sm_ship_mode_id VARCHAR,
sm_type VARCHAR,
sm_code VARCHAR,
sm_carrier VARCHAR,
sm_contract VARCHAR
);
statement ok
CREATE TABLE reason(
r_reason_sk ${type},
r_reason_id VARCHAR,
r_reason_desc VARCHAR
);
statement ok
CREATE TABLE promotion(
p_promo_sk ${type},
p_promo_id VARCHAR,
p_start_date_sk ${type},
p_end_date_sk ${type},
p_item_sk ${type},
p_cost ${type},
p_response_target ${type},
p_promo_name VARCHAR,
p_channel_dmail VARCHAR,
p_channel_email VARCHAR,
p_channel_catalog VARCHAR,
p_channel_tv VARCHAR,
p_channel_radio VARCHAR,
p_channel_press VARCHAR,
p_channel_event VARCHAR,
p_channel_demo VARCHAR,
p_channel_details VARCHAR,
p_purpose VARCHAR,
p_discount_active VARCHAR
);
statement ok
CREATE TABLE item(
i_item_sk ${type},
i_item_id VARCHAR,
i_rec_start_date DATE,
i_rec_end_date DATE,
i_item_desc VARCHAR,
i_current_price ${type},
i_wholesale_cost ${type},
i_brand_id ${type},
i_brand VARCHAR,
i_class_id ${type},
i_class VARCHAR,
i_category_id ${type},
i_category VARCHAR,
i_manufact_id ${type},
i_manufact VARCHAR,
i_size VARCHAR,
i_formulation VARCHAR,
i_color VARCHAR,
i_units VARCHAR,
i_container VARCHAR,
i_manager_id ${type},
i_product_name VARCHAR
);
statement ok
CREATE TABLE inventory(
inv_date_sk ${type},
inv_item_sk ${type},
inv_warehouse_sk ${type},
inv_quantity_on_hand ${type}
);
statement ok
CREATE TABLE income_band(
ib_income_band_sk ${type},
ib_lower_bound ${type},
ib_upper_bound ${type}
);
statement ok
CREATE TABLE household_demographics(
hd_demo_sk ${type},
hd_income_band_sk ${type},
hd_buy_potential VARCHAR,
hd_dep_count ${type},
hd_vehicle_count ${type}
);
statement ok
CREATE TABLE date_dim(
d_date_sk ${type},
d_date_id VARCHAR,
d_date DATE,
d_month_seq ${type},
d_week_seq ${type},
d_quarter_seq ${type},
d_year ${type},
d_dow ${type},
d_moy ${type},
d_dom ${type},
d_qoy ${type},
d_fy_year ${type},
d_fy_quarter_seq ${type},
d_fy_week_seq ${type},
d_day_name VARCHAR,
d_quarter_name VARCHAR,
d_holiday VARCHAR,
d_weekend VARCHAR,
d_following_holiday VARCHAR,
d_first_dom ${type},
d_last_dom ${type},
d_same_day_ly ${type},
d_same_day_lq ${type},
d_current_day VARCHAR,
d_current_week VARCHAR,
d_current_month VARCHAR,
d_current_quarter VARCHAR,
d_current_year VARCHAR
);
statement ok
CREATE TABLE customer_demographics(
cd_demo_sk ${type},
cd_gender VARCHAR,
cd_marital_status VARCHAR,
cd_education_status VARCHAR,
cd_purchase_estimate ${type},
cd_credit_rating VARCHAR,
cd_dep_count ${type},
cd_dep_employed_count ${type},
cd_dep_college_count ${type}
);
statement ok
CREATE TABLE customer_address(
ca_address_sk ${type},
ca_address_id VARCHAR,
ca_street_number VARCHAR,
ca_street_name VARCHAR,
ca_street_type VARCHAR,
ca_suite_number VARCHAR,
ca_city VARCHAR,
ca_county VARCHAR,
ca_state VARCHAR,
ca_zip VARCHAR,
ca_country VARCHAR,
ca_gmt_offset ${type},
ca_location_type VARCHAR
);
statement ok
CREATE TABLE customer(
c_customer_sk ${type},
c_customer_id VARCHAR,
c_current_cdemo_sk ${type},
c_current_hdemo_sk ${type},
c_current_addr_sk ${type},
c_first_shipto_date_sk ${type},
c_first_sales_date_sk ${type},
c_salutation VARCHAR,
c_first_name VARCHAR,
c_last_name VARCHAR,
c_preferred_cust_flag VARCHAR,
c_birth_day ${type},
c_birth_month ${type},
c_birth_year ${type},
c_birth_country VARCHAR,
c_login VARCHAR,
c_email_address VARCHAR,
c_last_review_date_sk ${type}
);
statement ok
CREATE TABLE catalog_sales(
cs_sold_date_sk ${type},
cs_sold_time_sk ${type},
cs_ship_date_sk ${type},
cs_bill_customer_sk ${type},
cs_bill_cdemo_sk ${type},
cs_bill_hdemo_sk ${type},
cs_bill_addr_sk ${type},
cs_ship_customer_sk ${type},
cs_ship_cdemo_sk ${type},
cs_ship_hdemo_sk ${type},
cs_ship_addr_sk ${type},
cs_call_center_sk ${type},
cs_catalog_page_sk ${type},
cs_ship_mode_sk ${type},
cs_warehouse_sk ${type},
cs_item_sk ${type},
cs_promo_sk ${type},
cs_order_number ${type},
cs_quantity ${type},
cs_wholesale_cost ${type},
cs_list_price ${type},
cs_sales_price ${type},
cs_ext_discount_amt ${type},
cs_ext_sales_price ${type},
cs_ext_wholesale_cost ${type},
cs_ext_list_price ${type},
cs_ext_tax ${type},
cs_coupon_amt ${type},
cs_ext_ship_cost ${type},
cs_net_paid ${type},
cs_net_paid_inc_tax ${type},
cs_net_paid_inc_ship ${type},
cs_net_paid_inc_ship_tax ${type},
cs_net_profit ${type}
);
statement ok
CREATE TABLE catalog_returns(
cr_returned_date_sk ${type},
cr_returned_time_sk ${type},
cr_item_sk ${type},
cr_refunded_customer_sk ${type},
cr_refunded_cdemo_sk ${type},
cr_refunded_hdemo_sk ${type},
cr_refunded_addr_sk ${type},
cr_returning_customer_sk ${type},
cr_returning_cdemo_sk ${type},
cr_returning_hdemo_sk ${type},
cr_returning_addr_sk ${type},
cr_call_center_sk ${type},
cr_catalog_page_sk ${type},
cr_ship_mode_sk ${type},
cr_warehouse_sk ${type},
cr_reason_sk ${type},
cr_order_number ${type},
cr_return_quantity ${type},
cr_return_amount ${type},
cr_return_tax ${type},
cr_return_amt_inc_tax ${type},
cr_fee ${type},
cr_return_ship_cost ${type},
cr_refunded_cash ${type},
cr_reversed_charge ${type},
cr_store_credit ${type},
cr_net_loss ${type}
);
statement ok
CREATE TABLE catalog_page(
cp_catalog_page_sk ${type},
cp_catalog_page_id VARCHAR,
cp_start_date_sk ${type},
cp_end_date_sk ${type},
cp_department VARCHAR,
cp_catalog_number ${type},
cp_catalog_page_number ${type},
cp_description VARCHAR,
cp_type VARCHAR
);
statement ok
CREATE TABLE call_center(
cc_call_center_sk ${type},
cc_call_center_id VARCHAR,
cc_rec_start_date DATE,
cc_rec_end_date DATE,
cc_closed_date_sk ${type},
cc_open_date_sk ${type},
cc_name VARCHAR,
cc_class VARCHAR,
cc_employees ${type},
cc_sq_ft ${type},
cc_hours VARCHAR,
cc_manager VARCHAR,
cc_mkt_id ${type},
cc_mkt_class VARCHAR,
cc_mkt_desc VARCHAR,
cc_market_manager VARCHAR,
cc_division ${type},
cc_division_name VARCHAR,
cc_company ${type},
cc_company_name VARCHAR,
cc_street_number VARCHAR,
cc_street_name VARCHAR,
cc_street_type VARCHAR,
cc_suite_number VARCHAR,
cc_city VARCHAR,
cc_county VARCHAR,
cc_state VARCHAR,
cc_zip VARCHAR,
cc_country VARCHAR,
cc_gmt_offset ${type},
cc_tax_percentage ${type}
);
# Populate tables
statement ok
insert into web_site select * from web_site_original;
statement ok
insert into web_sales select * from web_sales_original;
statement ok
insert into web_returns select * from web_returns_original;
statement ok
insert into web_page select * from web_page_original;
statement ok
insert into warehouse select * from warehouse_original;
statement ok
insert into time_dim select * from time_dim_original;
statement ok
insert into store_sales select * from store_sales_original;
statement ok
insert into store_returns select * from store_returns_original;
statement ok
insert into store select * from store_original;
statement ok
insert into ship_mode select * from ship_mode_original;
statement ok
insert into reason select * from reason_original;
statement ok
insert into promotion select * from promotion_original;
statement ok
insert into item select * from item_original;
statement ok
insert into inventory select * from inventory_original;
statement ok
insert into income_band select * from income_band_original;
statement ok
insert into household_demographics select * from household_demographics_original;
statement ok
insert into date_dim select * from date_dim_original;
statement ok
insert into customer_demographics select * from customer_demographics_original;
statement ok
insert into customer_address select * from customer_address_original;
statement ok
insert into customer select * from customer_original;
statement ok
insert into catalog_sales select * from catalog_sales_original;
statement ok
insert into catalog_returns select * from catalog_returns_original;
statement ok
insert into catalog_page select * from catalog_page_original;
statement ok
insert into call_center select * from call_center_original;
# Checkpoint to compress the data
statement ok
checkpoint
# And verify that no other compression is used
foreach tbl web_site web_sales web_returns web_page warehouse time_dim store_sales store_returns store ship_mode reason promotion item inventory income_band household_demographics date_dim customer_demographics customer_address customer catalog_sales catalog_returns catalog_page call_center
# Cant turn off the creation of constant segments, so we have to just accept that some of the segments are Constant
query I
SELECT compression FROM pragma_storage_info('${tbl}') WHERE segment_type == '${type}' AND compression != 'ALPRD' AND compression != 'Constant';
----
endloop
# Run the tpcds queries
loop i 1 9
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/0${i}.csv
endloop
loop i 10 49
#Skip tpcds 49 because it doesn't work without decimals
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
# skip tpcds 67 - inconsistent without decimals
loop i 50 66
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
loop i 68 99
query I
PRAGMA tpcds(${i})
----
<FILE>:extension/tpcds/dsdgen/answers/sf1/${i}.csv
endloop
# Drop tables
statement ok
DROP TABLE web_site;
statement ok
DROP TABLE web_sales;
statement ok
DROP TABLE web_returns;
statement ok
DROP TABLE web_page;
statement ok
DROP TABLE warehouse;
statement ok
DROP TABLE time_dim;
statement ok
DROP TABLE store_sales;
statement ok
DROP TABLE store_returns;
statement ok
DROP TABLE store;
statement ok
DROP TABLE ship_mode;
statement ok
DROP TABLE reason;
statement ok
DROP TABLE promotion;
statement ok
DROP TABLE item;
statement ok
DROP TABLE inventory;
statement ok
DROP TABLE income_band;
statement ok
DROP TABLE household_demographics;
statement ok
DROP TABLE date_dim;
statement ok
DROP TABLE customer_demographics;
statement ok
DROP TABLE customer_address;
statement ok
DROP TABLE customer;
statement ok
DROP TABLE catalog_sales;
statement ok
DROP TABLE catalog_returns;
statement ok
DROP TABLE catalog_page;
statement ok
DROP TABLE call_center;
endloop

View File

@@ -0,0 +1,247 @@
# name: test/sql/storage/compression/alprd/alprd_tpch.test_slow
# group: [alprd]
require tpch
# load the DB from disk
load __TEST_DIR__/test_alprd.db
# This needs to be single-threaded to be consistent (because of floating point issues)
statement ok
pragma threads=1
statement ok
PRAGMA force_compression='alprd';
statement ok
call dbgen(sf=1, suffix='_original');
# Test both DOUBLE and FLOAT
foreach type DOUBLE FLOAT
# Create tables
statement ok
CREATE TABLE lineitem(
l_orderkey ${type} NOT NULL,
l_partkey ${type} NOT NULL,
l_suppkey ${type} NOT NULL,
l_linenumber ${type} NOT NULL,
l_quantity ${type} NOT NULL,
l_extendedprice ${type} NOT NULL,
l_discount ${type} NOT NULL,
l_tax ${type} NOT NULL,
l_returnflag VARCHAR NOT NULL,
l_linestatus VARCHAR NOT NULL,
l_shipdate DATE NOT NULL,
l_commitdate DATE NOT NULL,
l_receiptdate DATE NOT NULL,
l_shipinstruct VARCHAR NOT NULL,
l_shipmode VARCHAR NOT NULL,
l_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE orders(
o_orderkey ${type} NOT NULL,
o_custkey ${type} NOT NULL,
o_orderstatus VARCHAR NOT NULL,
o_totalprice ${type} NOT NULL,
o_orderdate DATE NOT NULL,
o_orderpriority VARCHAR NOT NULL,
o_clerk VARCHAR NOT NULL,
o_shippriority ${type} NOT NULL,
o_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE partsupp(
ps_partkey ${type} NOT NULL,
ps_suppkey ${type} NOT NULL,
ps_availqty ${type} NOT NULL,
ps_supplycost ${type} NOT NULL,
ps_comment VARCHAR NOT NULL
);
# 'p_partkey' being INTEGER is imperative to TPCH(17)
statement ok
CREATE TABLE part(
p_partkey INTEGER NOT NULL,
p_name VARCHAR NOT NULL,
p_mfgr VARCHAR NOT NULL,
p_brand VARCHAR NOT NULL,
p_type VARCHAR NOT NULL,
p_size ${type} NOT NULL,
p_container VARCHAR NOT NULL,
p_retailprice ${type} NOT NULL,
p_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE customer(
c_custkey ${type} NOT NULL,
c_name VARCHAR NOT NULL,
c_address VARCHAR NOT NULL,
c_nationkey ${type} NOT NULL,
c_phone VARCHAR NOT NULL,
c_acctbal ${type} NOT NULL,
c_mktsegment VARCHAR NOT NULL,
c_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE supplier(
s_suppkey ${type} NOT NULL,
s_name VARCHAR NOT NULL,
s_address VARCHAR NOT NULL,
s_nationkey ${type} NOT NULL,
s_phone VARCHAR NOT NULL,
s_acctbal ${type} NOT NULL,
s_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE nation(
n_nationkey ${type} NOT NULL,
n_name VARCHAR NOT NULL,
n_regionkey ${type} NOT NULL,
n_comment VARCHAR NOT NULL
);
statement ok
CREATE TABLE region(
r_regionkey ${type} NOT NULL,
r_name VARCHAR NOT NULL,
r_comment VARCHAR NOT NULL
);
# Populate tables
statement ok
insert into lineitem select * from lineitem_original;
statement ok
insert into orders select * from orders_original;
statement ok
insert into partsupp select * from partsupp_original;
statement ok
insert into part select * from part_original;
statement ok
insert into customer select * from customer_original;
statement ok
insert into supplier select * from supplier_original;
statement ok
insert into nation select * from nation_original;
statement ok
insert into region select * from region_original;
# Checkpoint to compress the data
statement ok
checkpoint
# Run the tpch queries
loop i 1 9
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q0${i}.csv
endloop
loop i 10 15
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop
#TPCH 15 - 'sum' replaced with 'kahan_sum'
query I
SELECT
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
FROM
supplier,
(
SELECT
l_suppkey AS supplier_no,
kahan_sum(l_extendedprice * (1 - l_discount)) AS total_revenue
FROM
lineitem
WHERE
l_shipdate >= CAST('1996-01-01' AS date)
AND l_shipdate < CAST('1996-04-01' AS date)
GROUP BY
supplier_no) revenue0
WHERE
s_suppkey = supplier_no
AND total_revenue = (
SELECT
max(total_revenue)
FROM (
SELECT
l_suppkey AS supplier_no,
kahan_sum(l_extendedprice * (1 - l_discount)) AS total_revenue
FROM
lineitem
WHERE
l_shipdate >= CAST('1996-01-01' AS date)
AND l_shipdate < CAST('1996-04-01' AS date)
GROUP BY
supplier_no) revenue1)
ORDER BY
s_suppkey;
----
<FILE>:extension/tpch/dbgen/answers/sf1/q15.csv
loop i 16 23
query I
PRAGMA tpch(${i})
----
<FILE>:extension/tpch/dbgen/answers/sf1/q${i}.csv
endloop
# Drop tables
statement ok
DROP TABLE lineitem;
statement ok
DROP TABLE orders;
statement ok
DROP TABLE partsupp;
statement ok
DROP TABLE part;
statement ok
DROP TABLE customer;
statement ok
DROP TABLE supplier;
statement ok
DROP TABLE nation;
statement ok
DROP TABLE region;
endloop

View File

@@ -0,0 +1,43 @@
# name: test/sql/storage/compression/alprd/alprd_zeros.test
# description: Test storage of alprd, but simple
# group: [alprd]
# load the DB from disk
load __TEST_DIR__/test_alprd.db
statement ok
PRAGMA force_compression='uncompressed'
# Create a table with random doubles of limited precision compressed as Uncompressed
statement ok
create table random_double as select 0::DOUBLE as data from range(1024) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_double') WHERE segment_type == 'double' AND compression != 'Uncompressed';
----
# Now create a duplicate of this table, compressed with ALP instead
statement ok
PRAGMA force_compression='alprd'
statement ok
create table random_alp_double as select * from random_double;
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('random_alp_double') WHERE segment_type == 'double' AND compression != 'ALPRD';
----
# Assert that the data was not corrupted by compressing to ALP
query I sort r1
select * from random_double;
----
query I sort r1
select * from random_alp_double;
----

View File

@@ -0,0 +1,119 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_bitwidths.test_slow
# description: Test bitpacking with values that compress to all different widths
# group: [bitpacking]
# load the DB from disk
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
foreach typesize 8 16 32 64
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
CREATE TABLE test_unsigned AS SELECT cast(2**(i//2048) as UINT${typesize}) i FROM range(${typesize}*2048) tbl(i);
statement ok
CHECKPOINT;
query I
SELECT count(*)=${typesize} FROM (SELECT count(i) FROM test_unsigned GROUP BY i);
----
1
query I
SELECT DISTINCT(c_i) FROM (SELECT count(i) AS c_i FROM test_unsigned GROUP BY i);
----
2048
query IIII
select count(*)=1, count(*)=(${typesize}-1) , AVG(c_i), (i//delta) AS diff_to_next_row from (
SELECT i, count(i) as c_i, lag(i, 1) OVER (ORDER BY i) delta FROM test_unsigned GROUP BY i
) GROUP BY diff_to_next_row ORDER BY ALL;
----
False True 2048.000000 2
True False 2048.000000 NULL
statement ok
CREATE TABLE test_signed_neg AS SELECT cast(-(2**(i//2048)) as INT${typesize}) i FROM range(${typesize}*2048) tbl(i);
statement ok
CHECKPOINT;
query I
SELECT count(*)=${typesize} FROM (SELECT count(i) FROM test_signed_neg GROUP BY i);
----
1
query I
SELECT DISTINCT(c_i) FROM (SELECT count(i) AS c_i FROM test_signed_neg GROUP BY i);
----
2048
query IIII
select count(*)=1, count(*)=(${typesize}-1) , AVG(c_i), (i//delta) AS diff_to_next_row from (
SELECT i, count(i) as c_i, lag(i, 1) OVER (ORDER BY i) delta FROM test_unsigned GROUP BY i
) GROUP BY diff_to_next_row ORDER BY ALL;
----
False True 2048.000000 2
True False 2048.000000 NULL
statement ok
CREATE TABLE test_signed_pos AS SELECT cast(2**(i//2048) as INT${typesize}) i FROM range((${typesize}-1)*2048) tbl(i);
statement ok
CHECKPOINT;
query I
SELECT count(*)=(${typesize}-1) FROM (SELECT count(i) FROM test_signed_pos GROUP BY i);
----
1
query I
SELECT DISTINCT(c_i) FROM (SELECT count(i) AS c_i FROM test_signed_neg GROUP BY i);
----
2048
query IIII
select count(*)=1, count(*)=(${typesize}-1) , AVG(c_i), (i//delta) AS diff_to_next_row from (
SELECT i, count(i) as c_i, lag(i, 1) OVER (ORDER BY i) delta FROM test_unsigned GROUP BY i
) GROUP BY diff_to_next_row ORDER BY ALL;
----
False True 2048.000000 2
True False 2048.000000 NULL
statement ok
DROP TABLE test_unsigned
statement ok
DROP TABLE test_signed_neg
statement ok
DROP TABLE test_signed_pos
endloop
foreach type <integral> bool
statement ok
CREATE TABLE test_nullpack AS SELECT CAST((i//3000)%2 as ${type}) as i FROM range(0,12000) tbl(i);
statement ok
CHECKPOINT;
query I
SELECT AVG(cast (i as int)) FROM test_nullpack
----
0.5
statement ok
drop table test_nullpack
endloop
endloop

View File

@@ -0,0 +1,253 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_compression_ratio.test_slow
# description: Assert bitpacking compression ratio is within reasonable margins for each mode
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
#### CONSTANT MODE Compression ratio calculation:
# For single row group (note we choose values such that we don't create the constant segments):
# 59 vectors with CONSTANT mode, the last one will be FOR mode
# Total compressed bytes = 59*(8+4) + 1*(2048/8 + 8 + 8 + 4) = 984
# Total uncompressed bytes = 120000*8 = 960000
# Ratio ~= 975x
# However, because this completely fills up a block and we do not support block sharing yet, we waste a lot of space
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant'
statement ok
CREATE TABLE test_bitpacked AS SELECT (i//119000::INT64)::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::INT64 FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 700, (uncompressed::FLOAT / bitpacked::FLOAT) < 1000 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
#### CONSTANT DELTA MODE Compression ratio calculation:
# For single row group
# 60 vectors with a constant increase (1)
# Total compressed bytes = 60*(8+8+4) = 1200
# Total uncompressed bytes = 120000*8 = 960000
# Expected Ratio ~= 800x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant_delta'
statement ok
CREATE TABLE test_bitpacked AS SELECT i::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 600, (uncompressed::FLOAT / bitpacked::FLOAT) < 800 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
#### DELTA FOR MODE Compression ratio calculation:
# For single row group
# 60 vectors with DELTA_FOR mode smallest possible compression
# Total compressed bytes = 60*(8+8+4+(2048/8)) = 16560
# Total uncompressed bytes = 120000*8 = 960000
# Expected Ratio ~= 58x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='delta_for'
statement ok
CREATE TABLE test_bitpacked AS SELECT i//2::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 50, (uncompressed::FLOAT / bitpacked::FLOAT) < 60 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
# FOR MODE Compression ratio calculation:
# For single row group
# 60 vectors with DELTA_FOR mode smallest possible compression
# Total compressed bytes = 60*(8+4+(2048/8)) = 16080
# Total uncompressed bytes = 120000*8 = 960000
# Expected Ratio ~= 60x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='for'
statement ok
CREATE TABLE test_bitpacked AS SELECT i%2::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::INT64 AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 50, (uncompressed::FLOAT / bitpacked::FLOAT) < 60 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
statement ok
PRAGMA force_bitpacking_mode='none'
# Assert that all supported types do in fact compress
foreach type int8 int16 int32 int64 uint8 uint16 uint32 uint64 decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1) bool
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT (i%2)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='bitpacking'
statement ok
CREATE TABLE test_bitpacked AS SELECT (i%2)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
# assert compression ratio >2 wich should be achieved for even the smallest types for this data
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 2, CAST(1 as ${type}) FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
1 1
statement ok
drop table test_bitpacked
statement ok
drop table test_uncompressed
endloop

View File

@@ -0,0 +1,194 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_compression_ratio_hugeint.test_slow
# description: Assert bitpacking compression ratio is within reasonable margins for each mode
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
#### CONSTANT MODE:
# Ratio ~= 1000x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant'
statement ok
CREATE TABLE test_bitpacked AS SELECT (i//119000::INT64)::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::HUGEINT FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 900, (uncompressed::FLOAT / bitpacked::FLOAT) < 1200 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
#### CONSTANT DELTA MODE:
# Expected Ratio ~= 800x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant_delta'
statement ok
CREATE TABLE test_bitpacked AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 600, (uncompressed::FLOAT / bitpacked::FLOAT) < 800 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
#### DELTA FOR MODE:
# Expected Ratio ~= 50x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='delta_for'
statement ok
CREATE TABLE test_bitpacked AS SELECT i//2::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 40, (uncompressed::FLOAT / bitpacked::FLOAT) < 60 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;
# FOR MODE:
# Expected Ratio ~= 95x
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='for'
statement ok
CREATE TABLE test_bitpacked AS SELECT i%2::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT i::HUGEINT AS i FROM range(0, 120000000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test_bitpacked') WHERE segment_type != 'VALIDITY' AND compression != 'BitPacking';
----
query I
SELECT compression FROM pragma_storage_info('test_uncompressed') WHERE segment_type != 'VALIDITY' AND compression != 'Uncompressed';
----
statement ok
checkpoint
query II
select (uncompressed::FLOAT / bitpacked::FLOAT) > 90, (uncompressed::FLOAT / bitpacked::FLOAT) < 100 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_bitpacked') where segment_type not in('VARCHAR', 'VALIDITY')) as bitpacked,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_bitpacked;
drop table test_uncompressed;

View File

@@ -0,0 +1,69 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_constant_delta.test
# description: Test that will use the BitpackingMode::CONSTANT_DELTA compression mode
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
foreach type int8 int16 int32 int64 uint8 uint16 uint32 uint64 hugeint decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1)
statement ok
CREATE TABLE test (c ${type});
statement ok
INSERT INTO test SELECT 2+i*2::${type} FROM range(0,5) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test') where segment_type != 'VALIDITY' and compression != 'BitPacking'
----
query I
SELECT * FROM test;
----
2
4
6
8
10
statement ok
DROP TABLE test
endloop
statement ok
CREATE TABLE test (c INT64);
statement ok
INSERT INTO test SELECT i from range(0,130000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test') where segment_type != 'VALIDITY' and compression != 'BitPacking'
----
query I
SELECT avg(c) FROM test;
----
64999.5
statement ok
DROP TABLE test
endloop

View File

@@ -0,0 +1,68 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_delta.test_slow
# description: Test some large incompressible data
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/bitpacking_uncompressible.db
statement ok
PRAGMA force_compression='bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
# simple compression with few values
statement ok
CREATE TABLE test_delta_full_range (a UINT64);
# Insert multiple ranges so that each method can be used on at least on the the ranges
statement ok
INSERT INTO test_delta_full_range select case when i%2=0 then 0 else 18446744073709551615 end from range(0,1000000) tbl(i);
query II
select a, count(*) from test_delta_full_range group by a order by a;
----
0 500000
18446744073709551615 500000
query I
SELECT DISTINCT compression FROM pragma_storage_info('test_delta_full_range') where segment_type = 'UBIGINT'
----
BitPacking
statement ok
drop table test_delta_full_range
endloop
# Do the same thing and confirm we don't bitpack here
statement ok
PRAGMA force_compression='none'
# simple compression with few values
statement ok
CREATE TABLE test_delta_full_range (a UINT64);
# Insert multiple ranges so that each method can be used on at least on the the ranges
statement ok
INSERT INTO test_delta_full_range select case when i%2=0 then 0 else 18446744073709551615 end from range(0,1000000) tbl(i);
query II
select a, count(*) from test_delta_full_range group by a order by a;
----
0 500000
18446744073709551615 500000
query I
SELECT DISTINCT compression FROM pragma_storage_info('test_delta_full_range') where segment_type = 'UBIGINT'
----
Uncompressed
statement ok
drop table test_delta_full_range

View File

@@ -0,0 +1,26 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_delta_for.test
# description: Test bitpacking delta for
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
statement ok
create table aux as select range::INT x from range(-2_000_000_000, 2_000_000_000, 2_000_000);
statement ok
create table tt as select (x + if (random() > 0.5, 1, -1)) x from aux;
statement ok
CHECKPOINT;
query I
select compression from pragma_storage_info('tt') where segment_type != 'VALIDITY';
----
BitPacking

View File

@@ -0,0 +1,42 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_fatal_forced.test_slow
# description: Test forced bitpacking, with value ranges that are rejected by the bitpacking compression analyze step
# group: [bitpacking]
# load the DB from disk
load __TEST_DIR__/test_bitpacking.db
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
PRAGMA force_compression = 'bitpacking'
statement ok
CREATE TABLE test (x INT128, a INT64, b INT32, c INT16, d TINYINT);
# Data too big to be compressed with bitpacking
statement ok
INSERT INTO test VALUES
(-170141183460469231731687303715884105728, -9223372036854775808, -2147483648, -32768, -128),
(170141183460469231731687303715884105727, 9223372036854775807, 2147483647, 32767, 127);
query I
SELECT compression FROM pragma_storage_info('test') WHERE (
segment_type ILIKE 'HUGEINT' OR
segment_type ILIKE 'BIGINT' OR
segment_type ILIKE 'INTEGER' OR
segment_type ILIKE 'SMALLINT' OR
segment_type ILIKE 'TINYINT')
----
Uncompressed
Uncompressed
Uncompressed
Uncompressed
Uncompressed
statement ok
DROP TABLE test;
endloop

View File

@@ -0,0 +1,60 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_filter_pushdown.test
# description: Filter pushdown with Bitpacking columns
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode auto delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
CREATE TABLE test (id VARCHAR, col INTEGER)
# Insert various data to ensure theres something compressible for all bitpacking modes
statement ok
INSERT INTO test SELECT i::VARCHAR id, i b FROM range(10000) tbl(i)
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1337 FROM range(20000, 30000) tbl(i)
statement ok
INSERT INTO test SELECT i::VARCHAR id, i b FROM range(30000,40000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') where segment_type = 'INTEGER' and compression != 'BitPacking'
----
# filter on the bitpacking column
query IIII
SELECT SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE col=1337
----
13371337 1337 1337 10001
# filter on non-bitpacking column
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 5000 5000 5000 1
# filter on non-bitpacking column
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id::INT64%1000=0;
----
0 9000 403370 0 39000 30
statement ok
DROP TABLE test
endloop

View File

@@ -0,0 +1,255 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_hugeint.test
# description: Test hugeint bitpacking at multiple bitwidths
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant'
# bit-width < 32
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
0
1
2
3
4
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 32
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 3000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
3000000000
3000000001
3000000002
3000000003
3000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 32 < bit-width < 64
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 200000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
200000000000
200000000001
200000000002
200000000003
200000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 64
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 10000000000000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
10000000000000000000
10000000000000000001
10000000000000000002
10000000000000000003
10000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 64 < bit-width < 96
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 500000000000000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
500000000000000000000
500000000000000000001
500000000000000000002
500000000000000000003
500000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 96
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 50000000000000000000000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
50000000000000000000000000000
50000000000000000000000000001
50000000000000000000000000002
50000000000000000000000000003
50000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 96 < bit-width < 128
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 300000000000000000000000000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
300000000000000000000000000000000
300000000000000000000000000000001
300000000000000000000000000000002
300000000000000000000000000000003
300000000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 128
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::HUGEINT + 20000000000000000000000000000000000000 FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
20000000000000000000000000000000000000
20000000000000000000000000000000000001
20000000000000000000000000000000000002
20000000000000000000000000000000000003
20000000000000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,62 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_index_fetch.test_slow
# description: Fetch from Bitpacking column with index
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
foreach type INTEGER UINT16
statement ok
CREATE TABLE test(id INTEGER PRIMARY KEY, col ${type})
statement ok
INSERT INTO test SELECT i::VARCHAR id, i b FROM range(10000) tbl(i)
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1337 FROM range(10000, 20000) tbl(i)
statement ok
INSERT INTO test SELECT i::VARCHAR id, i b FROM range(20000, 30000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' and compression != 'BitPacking'
----
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 5000 5000 5000 1
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='12000'
----
12000 12000 1337 1337 1337 1
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='22000'
----
22000 22000 22000 22000 22000 1
statement ok
DROP TABLE test;
endloop
endloop

View File

@@ -0,0 +1,72 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_lists.test_slow
# description: Test storage with Bitpacking inside lists
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
CREATE TABLE test (id INTEGER, l INTEGER[]);
statement ok
INSERT INTO test SELECT i, case when (i//1000)%2=0 then [1, 1, 1] else [2, 2] end FROM range(200000) tbl(i)
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
BitPacking
# full unnest
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test) tbl(i)
----
500000 700000
# filters/skips
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=5000 AND id<6000) tbl(i)
----
2000 4000
# zonemaps
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=150000 AND id<160000) tbl(i)
----
25000 35000
statement ok
CREATE INDEX i_index ON test(id)
# index lookup in lists
query II
SELECT * FROM test WHERE id=150001
----
150001 [1, 1, 1]
# large lists
statement ok
CREATE TABLE test_large_list AS SELECT i%100 AS id, LIST(-i) AS list FROM range(0,100000) tbl(i) GROUP BY id;
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(list) FROM test_large_list) tbl(i)
----
100000 -4999950000
statement ok
drop table test_large_list
statement ok
drop table test
endloop

View File

@@ -0,0 +1,29 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_mode.test
# description: Test bitpacking mode
# group: [bitpacking]
statement ok
PRAGMA force_compression = 'bitpacking'
query I
SELECT current_setting('force_bitpacking_mode')
----
auto
statement error
PRAGMA force_bitpacking_mode='xxx'
----
Unrecognized option
foreach mode auto constant constant_delta delta_for for
statement ok
PRAGMA force_bitpacking_mode='${mode}'
query I
SELECT current_setting('force_bitpacking_mode')='${mode}'
----
true
endloop

View File

@@ -0,0 +1,50 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_nulls.test
# description: Test bitpacking with nulls
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
# simple compression with few values
statement ok
CREATE TABLE test (a BIGINT);
# Constant compressible range
statement ok
INSERT INTO test SELECT case when i%5=0 then null else 1337 end FROM range(0,10000) tbl(i);
# Constant delta compressible range
statement ok
INSERT INTO test SELECT case when i%5=0 then null else i end FROM range(0,10000) tbl(i);
# FOR/FOR-delta compressible range
statement ok
INSERT INTO test SELECT case when i%5=0 then null else i//2 end FROM range(0,10000) tbl(i);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'BIGINT' and compression != 'BitPacking';
----
query III
select sum(a), min(a), max(a) from test;
----
70694000 0 9999
statement ok
DROP TABLE test;
endloop

View File

@@ -0,0 +1,65 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_simple.test
# description: Test storage bitpacking, but simple
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
# simple compression with few values
statement ok
CREATE TABLE test (id VARCHAR, a BIGINT);
# insert multiple ranges so that each method can be used on at least one the the ranges
statement ok
INSERT INTO test SELECT i::VARCHAR, -i FROM range(0,10000) tbl(i);
statement ok
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0,10000) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
0
-1
-2
-3
-4
query I
select a from test limit 5 offset 12000;
----
13371337
13371337
13371337
13371337
13371337
query I
select avg(a) from test;
----
6683168.75
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'BIGINT';
----
BitPacking
statement ok
DROP TABLE test;
endloop

View File

@@ -0,0 +1,60 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_simple_hugeint.test
# description: Test storage bitpacking, but simple and for hugeints
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
# simple compression with few values
statement ok
CREATE TABLE test (id VARCHAR, a HUGEINT);
# insert multiple ranges so that each method can be used on at least on the the ranges bit-width > 96
statement ok
INSERT INTO test SELECT i::VARCHAR, -i::HUGEINT + -1234567891011121314151617180000::HUGEINT FROM range(0, 10000) tbl(i);
statement ok
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0,10000) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
-1234567891011121314151617180000
-1234567891011121314151617180001
-1234567891011121314151617180002
-1234567891011121314151617180003
-1234567891011121314151617180004
query I
select a from test limit 5 offset 12000;
----
13371337
13371337
13371337
13371337
13371337
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'HUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
endloop

View File

@@ -0,0 +1,20 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_size_calculation.test
# description: Test for a bug found in the size calculation
# group: [bitpacking]
require parquet
require httpfs
load __TEST_DIR__/test_bitpacking.db
statement ok
pragma force_compression='bitpacking'
statement ok
CREATE OR REPLACE TABLE toy_table AS
SELECT *
FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/bp_bug.parquet' ;
statement ok
CHECKPOINT;

View File

@@ -0,0 +1,71 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_storage_info.test
# description: Test storage info with Bitpacking
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
require vector_size 2048
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES (11, 22), (11, 22), (12, 21), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
BitPacking
statement ok
CREATE TABLE test_bp (a INTEGER)
statement ok
INSERT INTO test_bp SELECT 1 FROM range(0, 10000) tbl(i)
statement ok
INSERT INTO test_bp SELECT 2 FROM range(0, 10000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT
segment_info
FROM
pragma_storage_info('test_bp')
WHERE segment_type NOT IN ('VALIDITY')
----
CONSTANT: 9, DELTA_FOR: 1
statement ok
PRAGMA force_bitpacking_mode = 'delta_for'
statement ok
CREATE OR REPLACE TABLE test_bp (a INTEGER)
statement ok
INSERT INTO test_bp SELECT 3*(i // 1000) + (i%10) FROM range(0, 10000) tbl(i)
statement ok
CHECKPOINT
# Should have all 5 blocks for 10K integers (1 block per 2048 tuples) forced to DELTA_FOR
query I
SELECT
segment_info
FROM
pragma_storage_info('test_bp')
WHERE segment_type NOT IN ('VALIDITY')
----
DELTA_FOR: 5

View File

@@ -0,0 +1,49 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_table_copy.test
# description: Tests a table copy on a table spanning multiple segments
# group: [bitpacking]
require tpch
# load the DB from disk
load __TEST_DIR__/test_bitpacking_struct_bug.db
foreach bitpacking_mode delta_for for constant_delta
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
PRAGMA force_compression = 'bitpacking'
statement ok
CREATE TABLE test (a integer);
statement ok
INSERT INTO test SELECT i FROM range(0,150000) tbl(i);
statement ok
checkpoint
statement ok
CREATE TABLE test_2 AS SELECT a FROM test;
statement ok
checkpoint
query I
select sum(a) from test;
----
11249925000
query I
select sum(a) from test_2;
----
11249925000
statement ok
drop table test
statement ok
drop table test_2
endloop

View File

@@ -0,0 +1,126 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_types.test_slow
# description: Test bitpacking with different types, especially around the numerical limits
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
foreach type <numeric> decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1)
statement ok
CREATE TABLE a AS SELECT MOD(i,3)::${type} i FROM range(10000) tbl(i)
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM a
----
0 2 0.999900 10000 10000
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM a WHERE i=1
----
1 1 1.000000 3333 3333
statement ok
DROP TABLE a
endloop
statement ok
CREATE TABLE test (d INT32);
# Range too big to force bitpacking
statement ok
INSERT INTO test VALUES (-2147483648), (2147483647);
query I
SELECT compression FROM pragma_storage_info('test') WHERE (
segment_type ILIKE 'INTEGER')
----
Uncompressed
statement ok
DROP TABLE IF EXISTS test;
statement ok
CREATE TABLE test (x INT128, a INT64, b INT32, c INT16, d TINYINT);
# Only using (SIZEOF(TYPE)*8)-1
# (All but 1 bit)
# Because bitpacking wont accept a value range bigger than that
statement ok
INSERT INTO test VALUES (-85070591730234615865843651857942052864, -4611686018427387904, -1073741824, -16384, -64), (85070591730234615865843651857942052863, 4611686018427387903, 1073741823, 16383, 63);
statement ok
checkpoint
query I
SELECT compression FROM pragma_storage_info('test') WHERE (
segment_type ILIKE 'HUGEINT' OR
segment_type ILIKE 'BIGINT' OR
segment_type ILIKE 'INTEGER' OR
segment_type ILIKE 'SMALLINT' OR
segment_type ILIKE 'TINYINT')
----
BitPacking
BitPacking
BitPacking
BitPacking
BitPacking
query IIIIII
SELECT AVG(x), AVG(a), AVG(b), AVG(c), AVG(d), COUNT(*) FROM test
----
-0.5 -0.5 -0.5 -0.5 -0.5 2
statement ok
drop table test
statement ok
CREATE TABLE test (a UINT64, b UINT32, c UINT16, d UINT8);
statement ok
INSERT INTO test SELECT i, i, i, i FROM range(0, 256) tbl(i);
INSERT INTO test SELECT i, i, i, NULL FROM range(31768, 32768) tbl(i);
INSERT INTO test SELECT i, i, NULL, NULL FROM range(4294966295, 4294967295) tbl(i);
INSERT INTO test SELECT 18446744073709551615 - i, NULL, NULL, NULL FROM range(0, 1000) tbl(i);
statement ok
checkpoint
query IIIII
SELECT AVG(a), AVG(b), AVG(c), AVG(d), COUNT(*) FROM test
----
5.665461940419088e+18 1903811655.4255319 25716.671974522294 127.5 3256
statement ok
CREATE TABLE test_bool (id VARCHAR, col BOOL)
statement ok
INSERT INTO test_bool SELECT i::VARCHAR id, CAST (i%2 as BOOL) col FROM range(10000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT COUNT(*) FROM test_bool WHERE col = TRUE
----
5000
statement ok
drop table test
statement ok
drop table test_bool
endloop

View File

@@ -0,0 +1,255 @@
# name: test/sql/storage/compression/bitpacking/bitpacking_uhugeint.test
# description: Test uhugeint bitpacking at multiple bitwidths
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression='bitpacking'
statement ok
PRAGMA force_bitpacking_mode='constant'
# bit-width < 32
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
0
1
2
3
4
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 32
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 3000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
3000000000
3000000001
3000000002
3000000003
3000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 32 < bit-width < 64
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 200000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
200000000000
200000000001
200000000002
200000000003
200000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 64
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 10000000000000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
10000000000000000000
10000000000000000001
10000000000000000002
10000000000000000003
10000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 64 < bit-width < 96
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 500000000000000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
500000000000000000000
500000000000000000001
500000000000000000002
500000000000000000003
500000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 96
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 50000000000000000000000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
50000000000000000000000000000
50000000000000000000000000001
50000000000000000000000000002
50000000000000000000000000003
50000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# 96 < bit-width < 128
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 300000000000000000000000000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
300000000000000000000000000000000
300000000000000000000000000000001
300000000000000000000000000000002
300000000000000000000000000000003
300000000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;
# bit-width == 128
# ----------------------------------------
statement ok
CREATE TABLE test (id VARCHAR, a UHUGEINT);
statement ok
INSERT INTO test SELECT i::VARCHAR, i::UHUGEINT + 20000000000000000000000000000000000000::UHUGEINT FROM range(0, 16) tbl(i);
INSERT INTO test SELECT i::VARCHAR, 13371337 FROM range(0, 16) tbl(i);
statement ok
checkpoint
query I
select a from test limit 5;
----
20000000000000000000000000000000000000
20000000000000000000000000000000000001
20000000000000000000000000000000000002
20000000000000000000000000000000000003
20000000000000000000000000000000000004
# make sure compression is being used
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'UHUGEINT';
----
BitPacking
statement ok
DROP TABLE test;

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/bitpacking/force_bitpacking.test
# description: Test forcing bitpacking as the compression scheme
# group: [bitpacking]
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
require vector_size 2048
load __TEST_DIR__/force_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
statement ok
CREATE TABLE test_bp (a INTEGER);
statement ok
INSERT INTO test_bp SELECT 1 FROM range(0, 1000) tbl(i);
statement ok
INSERT INTO test_bp SELECT 2 FROM range(0, 1000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_bp') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
BitPacking

View File

@@ -0,0 +1,40 @@
# name: test/sql/storage/compression/bitpacking/struct_bitpacking.test
# description: Test storage with Bitpacking inside structs
# group: [bitpacking]
# load the DB from disk
load __TEST_DIR__/test_bitpacking.db
statement ok
PRAGMA force_compression = 'bitpacking'
foreach bitpacking_mode delta_for for constant_delta constant
statement ok
PRAGMA force_bitpacking_mode='${bitpacking_mode}'
statement ok
CREATE TABLE test (s ROW(a INTEGER));
statement ok
INSERT INTO test SELECT {'a': i} FROM range(0, 10000) tbl(i);
INSERT INTO test SELECT {'a': i} FROM range(22767, 32767) tbl(i);
INSERT INTO test SELECT {'a': 1337} FROM range(2147473647, 2147483647) tbl(i);
INSERT INTO test SELECT {'a': i} FROM range(2147473647, 2147483647) tbl(i);
query IIII
SELECT SUM(s['a']), MIN(s['a']), MAX(s['a']), COUNT(*) FROM test
----
21475127495000 0 2147483646 40000
restart
query IIII
SELECT SUM(s['a']), MIN(s['a']), MAX(s['a']), COUNT(*) FROM test
----
21475127495000 0 2147483646 40000
statement ok
DROP TABLE test;
endloop

View File

@@ -0,0 +1,39 @@
# name: test/sql/storage/compression/chimp/chimp_read.test_slow
# group: [chimp]
# The database is written with a vector size of 2048.
require vector_size 2048
load test/sql/storage/compression/chimp/chimp.db readonly
query I
select count(temperature) from temperatures_double;
----
245000
query I
select count(temperature) from temperatures_float;
----
245000
query I nosort res
select temperature from temperatures_double;
----
query I nosort res
select temperature from temperatures_float;
----
query I
SELECT compression FROM pragma_storage_info('temperatures_double') WHERE segment_type == 'double' AND compression != 'Chimp';
----
query I
SELECT compression FROM pragma_storage_info('temperatures_float') WHERE segment_type == 'float' AND compression != 'Chimp';
----
# Verify that the compression method is deprecated
statement error
pragma force_compression='chimp'
----
deprecated

View File

@@ -0,0 +1,75 @@
# name: test/sql/storage/compression/compression_null.test_slow
# description: Test storage with compression and many null values
# group: [compression]
# load the DB from disk
load __TEST_DIR__/test_rle.db
foreach compression <compression>
statement ok
PRAGMA force_compression = '${compression}'
# single NULL value
statement ok
CREATE TABLE nulls(i INTEGER)
statement ok
INSERT INTO nulls VALUES (NULL)
query I
SELECT * FROM nulls
----
NULL
restart
query I
SELECT * FROM nulls
----
NULL
statement ok
DROP TABLE nulls
# many null values
statement ok
CREATE TABLE nulls(i INTEGER)
statement ok
INSERT INTO nulls SELECT NULL FROM range(70000)
query III
SELECT COUNT(*), COUNT(i), SUM(i) FROM nulls
----
70000 0 NULL
restart
query III
SELECT COUNT(*), COUNT(i), SUM(i) FROM nulls
----
70000 0 NULL
# mix with non-null values
statement ok
INSERT INTO nulls VALUES (1), (1), (1), (2), (2), (2)
query III
SELECT COUNT(*), COUNT(i), SUM(i) FROM nulls
----
70006 6 9
restart
query III
SELECT COUNT(*), COUNT(i), SUM(i) FROM nulls
----
70006 6 9
statement ok
DROP TABLE nulls
endloop

View File

@@ -0,0 +1,75 @@
# name: test/sql/storage/compression/compression_selection.test
# description: Test that the right scheme is chosen for the right data.
# group: [compression]
require no_latest_storage
require vector_size 2048
load __TEST_DIR__/test_compression_simple.db readwrite v1.0.0
statement ok
CREATE TABLE test_rle (a INTEGER);
statement ok
INSERT INTO test_rle SELECT 2147480000 FROM range(0, 10000) tbl(i);
statement ok
INSERT INTO test_rle SELECT 2147480001 FROM range(0, 10000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_rle') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
# Constant
statement ok
CREATE TABLE test_constant (a INTEGER);
statement ok
INSERT INTO test_constant SELECT 1 FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_constant') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
Constant
# Dictionary
statement ok
CREATE TABLE test_dict (a VARCHAR);
statement ok
INSERT INTO test_dict SELECT concat('foobar-', (i%2)::VARCHAR) FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
# Bitpacking
statement ok
CREATE TABLE test_bp (a INTEGER);
statement ok
INSERT INTO test_bp SELECT i FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_bp') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
BitPacking

View File

@@ -0,0 +1,73 @@
# name: test/sql/storage/compression/compression_selection_dict_fsst.test
# description: Test that the right scheme is chosen for the right data.
# group: [compression]
require vector_size 2048
load __TEST_DIR__/test_compression_simple.db readwrite v1.3.0
statement ok
CREATE TABLE test_rle (a INTEGER);
statement ok
INSERT INTO test_rle SELECT 2147480000 FROM range(0, 10000) tbl(i);
statement ok
INSERT INTO test_rle SELECT 2147480001 FROM range(0, 10000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_rle') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
# Constant
statement ok
CREATE TABLE test_constant (a INTEGER);
statement ok
INSERT INTO test_constant SELECT 1 FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_constant') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
Constant
# Dictionary
statement ok
CREATE TABLE test_dict (a VARCHAR);
statement ok
INSERT INTO test_dict SELECT concat('foobar-', (i%2)::VARCHAR) FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
DICT_FSST
# This test defaults to another compression function for smaller block sizes,
# because the bitpacking groups no longer fit the blocks.
require block_size 262144
# Bitpacking
statement ok
CREATE TABLE test_bp (a INTEGER);
statement ok
INSERT INTO test_bp SELECT i FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_bp') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
BitPacking

View File

@@ -0,0 +1,110 @@
# name: test/sql/storage/compression/constant/constant_columns.test_slow
# description: Test storage of constant columns (columns that all have the same value)
# group: [constant]
# load the DB from disk
load __TEST_DIR__/constant_columns.db
# simple constant
statement ok
CREATE TABLE integers AS SELECT 1 i FROM range(1000000)
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers
----
1 1 1 1000000 1000000
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers WHERE i=1
----
1 1 1 1000000 1000000
restart
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers
----
1 1 1 1000000 1000000
# constant null
statement ok
CREATE TABLE nulls AS SELECT NULL i FROM range(1000000)
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM nulls
----
NULL NULL NULL 1000000 0
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM nulls WHERE i IS NULL
----
NULL NULL NULL 1000000 0
restart
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM nulls
----
NULL NULL NULL 1000000 0
# mix of constant and non-constant
statement ok
CREATE TABLE mixed_table AS SELECT CASE WHEN i < 1000000 then 1 else i end i FROM range(2000000) tbl(i)
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_table
----
1 1999999 2000000 2000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_table WHERE i=1
----
1 1 1000000 1000000
restart
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_table
----
1 1999999 2000000 2000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_table WHERE i=1
----
1 1 1000000 1000000
# mix of constant and non-constant NULL
statement ok
CREATE TABLE mixed_nulls AS SELECT CASE WHEN i < 1000000 then 1 else NULL end i FROM range(2000000) tbl(i)
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls
----
1 1 2000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls WHERE i=1
----
1 1 1000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls WHERE i IS NULL
----
NULL NULL 1000000 0
restart
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls
----
1 1 2000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls WHERE i=1
----
1 1 1000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM mixed_nulls WHERE i IS NULL
----
NULL NULL 1000000 0

View File

@@ -0,0 +1,51 @@
# name: test/sql/storage/compression/constant/constant_columns_indexes.test_slow
# description: Test storage of constant columns with indexes
# group: [constant]
# load the DB from disk
load __TEST_DIR__/constant_columns.db
# simple constant
statement ok
CREATE TABLE integers AS SELECT i id,
CASE WHEN i < 500000 THEN 1 ELSE NULL END i FROM range(1000000) tbl(i);
statement ok
CREATE INDEX i_index ON integers(id);
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers;
----
1 1 1 1000000 500000
query II
SELECT * FROM integers WHERE id = 1;
----
1 1
query II
SELECT * FROM integers WHERE id = 2;
----
2 1
query II
SELECT * FROM integers WHERE id = 999999;
----
999999 NULL
restart
query II
SELECT * FROM integers WHERE id = 1;
----
1 1
query II
SELECT * FROM integers WHERE id = 2;
----
2 1
query II
SELECT * FROM integers WHERE id = 999999;
----
999999 NULL

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/constant/constant_columns_top_n.test_slow
# description: Test Top-N operation on constant columns
# group: [constant]
# load the DB from disk
load __TEST_DIR__/constant_columns.db
# simple constant
statement ok
CREATE TABLE integers AS SELECT 1 i FROM range(1000000)
statement ok
INSERT INTO integers SELECT 2 FROM range(1000000)
query I
SELECT * FROM integers ORDER BY i LIMIT 5
----
1
1
1
1
1
query I
SELECT * FROM integers ORDER BY i DESC LIMIT 5
----
2
2
2
2
2

View File

@@ -0,0 +1,60 @@
# name: test/sql/storage/compression/constant/constant_columns_types.test_slow
# description: Test storage of constant columns with various types
# group: [constant]
# load the DB from disk
load __TEST_DIR__/constant_columns_types.db
foreach type <numeric> decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1)
statement ok
CREATE TABLE a AS SELECT 1::${type} i FROM range(1000000)
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM a
----
1 1 1 1000000 1000000
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM a WHERE i=1
----
1 1 1 1000000 1000000
statement ok
DROP TABLE a
endloop
# interval
statement ok
CREATE TABLE a AS SELECT interval 1 year i FROM range(1000000)
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM a
----
1 year 1 year 1000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM a WHERE i=interval 1 year
----
1 year 1 year 1000000 1000000
statement ok
DROP TABLE a
# bool
statement ok
CREATE TABLE a AS SELECT false i FROM range(1000000)
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM a
----
false false 1000000 1000000
query IIII
SELECT MIN(i), MAX(i), COUNT(*), COUNT(i) FROM a WHERE not i
----
false false 1000000 1000000
statement ok
DROP TABLE a

View File

@@ -0,0 +1,32 @@
# name: test/sql/storage/compression/constant/constant_columns_updates.test_slow
# description: Test storage of constant columns with updates
# group: [constant]
# load the DB from disk
load __TEST_DIR__/constant_columns.db
# simple constant
statement ok
CREATE TABLE integers AS SELECT i id, 1 i FROM range(1000000) tbl(i)
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers
----
1 1 1 1000000 1000000
query I
UPDATE integers SET i=i+1 WHERE id%2=0
----
500000
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers
----
1 2 1.5 1000000 1000000
restart
query IIIII
SELECT MIN(i), MAX(i), AVG(i), COUNT(*), COUNT(i) FROM integers
----
1 2 1.5 1000000 1000000

View File

@@ -0,0 +1,30 @@
# name: test/sql/storage/compression/dict_fsst/dict_fsst_fsst_only.test_slow
# description: Assert fsst compression ratio is within reasonable margins
# group: [dict_fsst]
# load the DB from disk
load __TEST_DIR__/test_dict_fsst_unique.db readwrite v1.3.0
require tpch
statement ok
SET force_compression='uncompressed'
statement ok
CALL dbgen(sf=1);
statement ok
SET force_compression='dict_fsst'
statement ok
CREATE TABLE lineitem_unique AS SELECT row_number() OVER () % 27 AS k, * FROM (SELECT DISTINCT l_comment FROM lineitem)
query I
SELECT COUNT(DISTINCT l_comment) FROM lineitem_unique WHERE k = 13
----
133731
query I
SELECT COUNT(*) FROM lineitem_unique WHERE contains(l_comment, 'fox')
----
197137

View File

@@ -0,0 +1,52 @@
# name: test/sql/storage/compression/dict_fsst/dict_fsst_test.test
# group: [dict_fsst]
load __TEST_DIR__/dict_fsst_test readwrite v1.3.0
statement ok
pragma force_compression='uncompressed';
statement ok
create table uncompressed_data as
select
i, repeat(
(i % 200)::INTEGER::VARCHAR,
2047 // len((i % 200)::INTEGER::VARCHAR)
) a
from range(20000) t(i);
statement ok
checkpoint;
query I nosort expected_result
select * from uncompressed_data order by i;
----
statement ok
pragma force_compression='dict_fsst';
statement ok
create table compressed_data as select * from uncompressed_data;
statement ok
checkpoint;
# 11 and 111 produce the same string so it's 199 not 200
query I
select count(distinct a) from compressed_data;
----
199
query I nosort expected_result
select * from compressed_data order by i;
----
query I
select count(distinct a) from compressed_data where contains(a, '11')
----
20
query I
select count(distinct a) from compressed_data where i%10=0
----
20

View File

@@ -0,0 +1,36 @@
# name: test/sql/storage/compression/dict_fsst/dictionary_compression_ratio.test_slow
# description: Assert dictionary compression ratio is within reasonable margins
# group: [dict_fsst]
load __TEST_DIR__/test_dictionary.db readwrite v1.3.0
# First test: detailed compression ratio
statement ok
PRAGMA force_compression='dict_fsst';
# Assuming 10 chars at 1 byte, with a 4byte offset and a 2byte length per string uncompressed:
# Ratio absolute max at 3 bits per value (ignoring dict size) = (16/(3/8)) = 42.6666666667
statement ok
CREATE TABLE test_dictionary AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
# keep a wide margin for the compression ratio to account for changes (like the block size) that
# influence the compression ratio
query I
SELECT uncompressed::FLOAT / dictionary::FLOAT > 30 AND uncompressed::FLOAT / dictionary::FLOAT < 55 FROM
(SELECT count(DISTINCT block_id) AS dictionary FROM pragma_storage_info('test_dictionary') WHERE segment_type IN ('VARCHAR')) AS dictionary,
(SELECT count(DISTINCT block_id) AS uncompressed FROM pragma_storage_info('test_uncompressed') WHERE segment_type IN ('VARCHAR')) AS uncompressed;
----
True

View File

@@ -0,0 +1,75 @@
# name: test/sql/storage/compression/dict_fsst/dictionary_covers_validity.test
# group: [dict_fsst]
load __TEST_DIR__/dictionary_covers_validity readwrite v1.3.0
statement ok
set checkpoint_threshold='10mb';
statement ok
CREATE TABLE tbl AS SELECT
{
'a': i,
'b': NULL::VARCHAR
} col
FROM range(5000) t(i)
union all
select
{
'a': 10000,
'b': 'hello'
}
statement ok
set force_compression='dict_fsst';
statement ok
force checkpoint;
# Dictionary FSST covers the validity mask replacing it with "Empty Validity".
# The BIGINT segment type is either bitpacking (256kB blocks), or uncompressed (16kB blocks).
query II
SELECT segment_type, compression FROM pragma_storage_info('tbl') WHERE segment_type != 'BIGINT';
----
VALIDITY Constant
VALIDITY Constant
VARCHAR DICT_FSST
VALIDITY Empty Validity
# Now force a different compression method, that doesn't cover the validity
statement ok
set force_compression='zstd';
statement ok
CREATE OR REPLACE TABLE tbl AS SELECT
{
'a': i,
'b': NULL::VARCHAR
} col
FROM range(5000) t(i)
union all
select
{
'a': 10000,
'b': 'hello'
} FROM range(2)
statement ok
force checkpoint;
# During checkpoint this will scan the dictionary compressed segments to get the validity
# this then gets compressed as normal (since FSST does not cover the validity)
query II
select segment_type, compression from pragma_storage_info('tbl') where segment_type IN ('VARCHAR', 'VALIDITY') order by all;
----
VALIDITY Constant
VALIDITY Constant
VALIDITY Roaring
VARCHAR ZSTD
query I
SELECT col FROM tbl ORDER BY col.a DESC LIMIT 3;
----
{'a': 10000, 'b': hello}
{'a': 10000, 'b': hello}
{'a': 4999, 'b': NULL}

View File

@@ -0,0 +1,23 @@
# name: test/sql/storage/compression/dict_fsst/dictionary_storage_info.test
# description: Test storage with Dictionary compression
# group: [dict_fsst]
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.3.0
statement ok
PRAGMA force_compression = 'dict_fsst'
statement ok
CREATE TABLE test (a VARCHAR, b VARCHAR);
statement ok
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
DICT_FSST

View File

@@ -0,0 +1,52 @@
# name: test/sql/storage/compression/dict_fsst/fetch_row.test
# description: Test storage with Dictionary compression
# group: [dict_fsst]
load __TEST_DIR__/test_dictionary_fetchrow.db readwrite v1.3.0
statement ok
PRAGMA force_compression = 'dict_fsst';
statement ok
CREATE TABLE test (a INTEGER, b VARCHAR);
statement ok
INSERT INTO test (a, b)
SELECT
x AS a,
CASE x % 5
WHEN 0 THEN 'aaaa'
WHEN 1 THEN 'bbbb'
WHEN 2 THEN 'cccc'
WHEN 3 THEN 'this is not an inlined string'
WHEN 4 THEN NULL
END AS b
FROM range(80) t(x);
statement ok
CHECKPOINT
restart
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
DICT_FSST
query I
SELECT DISTINCT b FROM test ORDER BY a % 5;
----
aaaa
bbbb
cccc
this is not an inlined string
NULL
query I
SELECT DISTINCT b FROM test ORDER BY a % 5;
----
aaaa
bbbb
cccc
this is not an inlined string
NULL

View File

@@ -0,0 +1,24 @@
# name: test/sql/storage/compression/dict_fsst/force_dictionary.test
# description: Test forcing dictionary encoding as the compression scheme
# group: [dict_fsst]
require vector_size 2048
load __TEST_DIR__/force_dictionary.db readwrite v1.3.0
statement ok
PRAGMA force_compression = 'dict_fsst';
statement ok
CREATE TABLE test_dict (a VARCHAR);
statement ok
INSERT INTO test_dict SELECT i::VARCHAR FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
DICT_FSST

View File

@@ -0,0 +1,127 @@
# name: test/sql/storage/compression/dict_fsst/fsst_compression_ratio.test_slow
# description: Assert fsst compression ratio is within reasonable margins
# group: [dict_fsst]
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.3.0
require tpch
# First test: detailed compression ratio
statement ok
PRAGMA force_compression='dict_fsst'
# Uncompressed size per value: 10 chars + 4 bytes for dict offset = 14
# Compressed size per values: 2 bytes for 2 symbols (BEEPBOOP) and (-{num}) + 0.25 byte = 2.25 bytes per value
# Ignoring overhead for symbol table and bitwidth storage, we would expect a ratio of maximally 14/2.25 = 6.22
statement ok
CREATE TABLE test_compressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
checkpoint
statement ok
CREATE TYPE test_result AS UNION (
ok BOOL,
err STRUCT(
uncompressed HUGEINT,
compressed HUGEINT,
allowed_minimum_ratio DECIMAL(2,1),
allowed_maximum_ratio DECIMAL(2,1),
actual_ratio FLOAT
)
);
statement ok
set variable min_ratio = 35.0;
set variable max_ratio = 36.5;
# This query keeps a pretty wide margin in compression ratio un purpose to account for possible changes that
# influence compression ratio.
query I
SELECT
CASE
WHEN (uncompressed::FLOAT / compressed::FLOAT) > getvariable('min_ratio') AND (uncompressed::FLOAT / compressed::FLOAT) < getvariable('max_ratio')
THEN True::test_result
ELSE {
'uncompressed': uncompressed,
'compressed': compressed,
'allowed_minimum_ratio': getvariable('min_ratio'),
'allowed_maximum_ratio': getvariable('max_ratio'),
'actual_ratio': uncompressed::FLOAT / compressed::FLOAT
}::test_result
END
FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_compressed') where segment_type in('VARCHAR')) as compressed,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in('VARCHAR')) as uncompressed
) AS blocks_tbl;
----
true
statement ok
CALL dbgen(sf=0.1)
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE l_comment_uncompressed AS SELECT l_orderkey % 27 as k, l_comment FROM lineitem;
statement ok
checkpoint
statement ok
PRAGMA force_compression='dict_fsst'
statement ok
CREATE TABLE l_comment_compressed AS SELECT l_orderkey % 27 as k, l_comment FROM lineitem;
statement ok
checkpoint
statement ok
set variable min_ratio = 1.5;
set variable max_ratio = 2.5;
# We compress l_comment with roughly ~3x compression ratio with fsst
query I
SELECT
CASE
WHEN (uncompressed::FLOAT / compressed::FLOAT) > getvariable('min_ratio') AND (uncompressed::FLOAT / compressed::FLOAT) < getvariable('max_ratio')
THEN True::test_result
ELSE {
'uncompressed': uncompressed,
'compressed': compressed,
'allowed_minimum_ratio': getvariable('min_ratio'),
'allowed_maximum_ratio': getvariable('max_ratio'),
'actual_ratio': uncompressed::FLOAT / compressed::FLOAT
}::test_result
END
FROM (
select
(select count(distinct block_id) from pragma_storage_info('l_comment_compressed') where segment_type in('VARCHAR')) as compressed,
(select count(distinct block_id) from pragma_storage_info('l_comment_uncompressed') where segment_type in('VARCHAR')) as uncompressed
) AS blocks_tbl;
----
true
query I
SELECT COUNT(DISTINCT l_comment) FROM l_comment_compressed WHERE k = 13
----
21983
query I
SELECT COUNT(*) FROM l_comment_compressed WHERE contains(l_comment, 'fox')
----
27771

View File

@@ -0,0 +1,23 @@
# name: test/sql/storage/compression/dict_fsst/fsst_storage_info.test
# description: Test storage with fsst compression
# group: [dict_fsst]
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.3.0
statement ok
PRAGMA force_compression = 'dict_fsst'
statement ok
CREATE TABLE test (a VARCHAR, b VARCHAR);
statement ok
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
DICT_FSST

View File

@@ -0,0 +1,43 @@
# name: test/sql/storage/compression/dict_fsst/issue_5675.test
# description: fix for issue #5675
# group: [dict_fsst]
load __TEST_DIR__/issue_5675.db readwrite v1.3.0
require parquet
statement ok
pragma threads=1
statement ok
CREATE TABLE TEST (col VARCHAR);
# A very long random string -> this will compress very poorly
statement ok
INSERT INTO TEST
SELECT '33y93uhg3qi3f13hnh8xjrvvbz7iwevroaaif8v5ecfetwnd5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss'
FROM range(0,1) tbl(i);
# Now several, slightly shorter, but still near 4k limit strings
statement ok
INSERT INTO TEST
SELECT '5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss'
FROM range(0,90) tbl(i);
# Followed by many empty strings ->
statement ok
INSERT INTO TEST SELECT '' FROM range(0,100000) tbl(i);
# With the finishing move of a single char string
statement ok
INSERT INTO TEST values ('33y93uhg3qi3f13hnh8xjrvvbz7iwevroaaif8v5ecfetwnd5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss');
# Now create our FSST table
statement ok
pragma force_compression='dict_fsst';
statement ok
CREATE TABLE TEST2 as SELECT * FROM TEST;
statement ok
CHECKPOINT;

View File

@@ -0,0 +1,29 @@
# name: test/sql/storage/compression/dict_fsst/issue_5675_followup.test
# description: Issue #5675: Follow up test to cover issue when the 1+th segment of a rowgroup is flushed with 1 value
# group: [dict_fsst]
load __TEST_DIR__/issue_5675_followup.db readwrite v1.3.0
require parquet
statement ok
pragma threads=1
statement ok
CREATE TABLE TEST (col VARCHAR);
# This will fill up a segment completely with 1 value
statement ok
INSERT INTO TEST SELECT CASE WHEN i%2==0
THEN
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
ELSE
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
END FROM range(0,2214) tbl(i);
# Now create our FSST table
statement ok
pragma force_compression='dict_fsst';
statement ok
CREATE TABLE TEST2 as SELECT * FROM TEST;

View File

@@ -0,0 +1,12 @@
# name: test/sql/storage/compression/dict_fsst/issue_5759.test
# description: Issue #5759: segfault on sample creation
# group: [dict_fsst]
load __TEST_DIR__/issue_5759.db readwrite v1.3.0
statement ok
pragma force_compression='dict_fsst'
# With many short and a few small strings, the sample calculation would overflow
statement ok
CREATE TABLE trigger5759 AS SELECT CASE WHEN RANDOM() > 0.95 THEN repeat('ab', 1500) ELSE 'c' END FROM range(0,1000);

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/dict_fsst/test_dict_fsst_with_smaller_block_size.test
# description: Test storage with dictionary compression and a smaller block size.
# group: [dict_fsst]
statement ok
SET storage_compatibility_version='latest';
statement ok
ATTACH '__TEST_DIR__/partial_manager.db' AS db (BLOCK_SIZE 16384);
statement ok
CREATE TABLE db.t AS FROM read_csv('data/csv/rabo-anon.csv.gz', strict_mode=FALSE);
statement ok
DETACH db;
statement ok
ATTACH '__TEST_DIR__/partial_manager.db' AS db;
query I
SELECT COUNT("XXX XXX/XXX") FROM db.t WHERE "XXX XXX/XXX" IS NOT NULL;
----
3767
query I
SELECT COUNT(*) FROM db.t WHERE "XXX XXX/XXX" IS NULL;
----
5460
statement ok
SELECT * FROM db.t;

View File

@@ -0,0 +1,36 @@
# name: test/sql/storage/compression/dictionary/dictionary_compression_ratio.test_slow
# description: Assert dictionary compression ratio is within reasonable margins
# group: [dictionary]
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
# First test: detailed compression ratio
statement ok
PRAGMA force_compression='dictionary';
# Assuming 10 chars at 1 byte, with a 4byte offset and a 2byte length per string uncompressed:
# Ratio absolute max at 3 bits per value (ignoring dict size) = (16/(3/8)) = 42.6666666667
statement ok
CREATE TABLE test_dictionary AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
CHECKPOINT;
# keep a wide margin for the compression ratio to account for changes (like the block size) that
# influence the compression ratio
query I
SELECT uncompressed::FLOAT / dictionary::FLOAT > 30 AND uncompressed::FLOAT / dictionary::FLOAT < 55 FROM
(SELECT count(DISTINCT block_id) AS dictionary FROM pragma_storage_info('test_dictionary') WHERE segment_type IN ('VARCHAR')) AS dictionary,
(SELECT count(DISTINCT block_id) AS uncompressed FROM pragma_storage_info('test_uncompressed') WHERE segment_type IN ('VARCHAR')) AS uncompressed;
----
True

View File

@@ -0,0 +1,22 @@
# name: test/sql/storage/compression/dictionary/dictionary_read.test_slow
# group: [dictionary]
# The database is written with a vector size of 2048.
require vector_size 2048
unzip data/storage/dictionary.db.gz __TEST_DIR__/dictionary.db
load __TEST_DIR__/dictionary.db readonly
query I
select count(street) from tbl;
----
397527
query I nosort res
select street from tbl;
----
query I
SELECT compression FROM pragma_storage_info('tbl') WHERE segment_type == 'VARCHAR' AND compression != 'Dictionary';
----

View File

@@ -0,0 +1,25 @@
# name: test/sql/storage/compression/dictionary/dictionary_storage_info.test
# description: Test storage with Dictionary compression
# group: [dictionary]
require no_latest_storage
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test (a VARCHAR, b VARCHAR);
statement ok
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary

View File

@@ -0,0 +1,46 @@
# name: test/sql/storage/compression/dictionary/fetch_row.test
# description: Test storage with Dictionary compression
# group: [dictionary]
load __TEST_DIR__/test_dictionary_fetchrow.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test (
a INTEGER,
b VARCHAR
);
statement ok
INSERT INTO test (a, b)
SELECT
x AS a,
CASE x % 5
WHEN 0 THEN 'aaaa'
WHEN 1 THEN 'bbbb'
WHEN 2 THEN 'cccc'
WHEN 3 THEN 'dddd'
WHEN 4 THEN NULL
END AS b
FROM range(10_000) t(x);
statement ok
CHECKPOINT
restart
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary
query I
select distinct b from test order by a % 5;
----
aaaa
bbbb
cccc
dddd
NULL

View File

@@ -0,0 +1,26 @@
# name: test/sql/storage/compression/dictionary/force_dictionary.test
# description: Test forcing dictionary encoding as the compression scheme
# group: [dictionary]
require no_latest_storage
require vector_size 2048
load __TEST_DIR__/force_dictionary.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'dictionary'
statement ok
CREATE TABLE test_dict (a VARCHAR);
statement ok
INSERT INTO test_dict SELECT i::VARCHAR FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
Dictionary

View File

@@ -0,0 +1,87 @@
# name: test/sql/storage/compression/fsst/fsst_compression_ratio.test_slow
# description: Assert fsst compression ratio is within reasonable margins
# group: [fsst]
require no_latest_storage
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
require tpch
# First test: detailed compression ratio
statement ok
PRAGMA force_compression='fsst'
# Uncompressed size per value: 10 chars + 4 bytes for dict offset = 14
# Compressed size per values: 2 bytes for 2 symbols (BEEPBOOP) and (-{num}) + 0.25 byte = 2.25 bytes per value
# Ignoring overhead for symbol table and bitwidth storage, we would expect a ratio of maximally 14/2.25 = 6.22
statement ok
CREATE TABLE test_compressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
checkpoint
query I
SELECT DISTINCT lower(compression) FROM pragma_storage_info('test_compressed') where segment_type = 'VARCHAR'
----
fsst
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i);
statement ok
checkpoint
#mode output_result
statement ok
select (uncompressed::FLOAT / compressed::FLOAT) as compression_ratio FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_compressed') where segment_type in('VARCHAR')) as compressed,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in('VARCHAR')) as uncompressed
)
# This query keeps a pretty wide margin in compression ratio un purpose to account for possible changes that
# influence compression ratio.
query II
select (uncompressed::FLOAT / compressed::FLOAT) > 5, (uncompressed::FLOAT / compressed::FLOAT) < 6.5 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_compressed') where segment_type in('VARCHAR')) as compressed,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type in('VARCHAR')) as uncompressed
)
----
True True
statement ok
CALL dbgen(sf=0.1)
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE l_comment_uncompressed AS SELECT l_comment FROM lineitem;
statement ok
checkpoint
statement ok
PRAGMA force_compression='fsst'
statement ok
CREATE TABLE l_comment_compressed AS SELECT l_comment FROM lineitem;
statement ok
checkpoint
# We compress l_comment with roughly ~3x compression ratio with fsst
statement ok
select (uncompressed::FLOAT / compressed::FLOAT) > 2.5, (uncompressed::FLOAT / compressed::FLOAT) < 3.5 as compression_ratio FROM (
select
(select count(distinct block_id) from pragma_storage_info('l_comment_compressed') where segment_type in('VARCHAR')) as compressed,
(select count(distinct block_id) from pragma_storage_info('l_comment_uncompressed') where segment_type in('VARCHAR')) as uncompressed
)

View File

@@ -0,0 +1,39 @@
# name: test/sql/storage/compression/fsst/fsst_disable_compression.test
# description: Test disabling compresison
# group: [fsst]
require no_latest_storage
require skip_reload
# load the DB from disk
load __TEST_DIR__/test_disabled_compression_methods.db readwrite v1.0.0
statement ok
CREATE TABLE test AS SELECT concat('longprefix', i) FROM range(30000) t(i);
statement ok
CHECKPOINT
query I
SELECT DISTINCT compression FROM pragma_storage_info('test') where segment_type = 'VARCHAR';
----
FSST
statement ok
DROP TABLE test
statement ok
SET disabled_compression_methods='fsst'
# verify FSST is disabled
statement ok
CREATE TABLE test AS SELECT concat('longprefix', i) FROM range(30000) t(i);
statement ok
CHECKPOINT
query I
SELECT BOOL_OR(compression ILIKE 'fsst%') FROM pragma_storage_info('test')
----
false

View File

@@ -0,0 +1,22 @@
# name: test/sql/storage/compression/fsst/fsst_read.test_slow
# group: [fsst]
# The database is written with a vector size of 2048.
require vector_size 2048
unzip data/storage/fsst.db.gz __TEST_DIR__/fsst.db
load __TEST_DIR__/fsst.db readonly
query I
select count(street) from tbl;
----
397527
query I nosort res
select street from tbl;
----
query I
SELECT compression FROM pragma_storage_info('tbl') WHERE segment_type == 'VARCHAR' AND compression != 'FSST';
----

View File

@@ -0,0 +1,25 @@
# name: test/sql/storage/compression/fsst/fsst_storage_info.test
# description: Test storage with fsst compression
# group: [fsst]
require no_latest_storage
# load the DB from disk
load __TEST_DIR__/test_dictionary.db readwrite v1.0.0
statement ok
PRAGMA force_compression = 'fsst'
statement ok
CREATE TABLE test (a VARCHAR, b VARCHAR);
statement ok
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1
----
FSST

View File

@@ -0,0 +1,43 @@
# name: test/sql/storage/compression/fsst/issue_5675.test
# description: fix for issue #5675
# group: [fsst]
load __TEST_DIR__/issue_5675.db readwrite v1.0.0
require parquet
statement ok
pragma threads=1
statement ok
CREATE TABLE TEST (col VARCHAR);
# A very long random string -> this will compress very poorly
statement ok
INSERT INTO TEST
SELECT '33y93uhg3qi3f13hnh8xjrvvbz7iwevroaaif8v5ecfetwnd5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss'
FROM range(0,1) tbl(i);
# Now several, slightly shorter, but still near 4k limit strings
statement ok
INSERT INTO TEST
SELECT '5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss'
FROM range(0,90) tbl(i);
# Followed by many empty strings ->
statement ok
INSERT INTO TEST SELECT '' FROM range(0,100000) tbl(i);
# With the finishing move of a single char string
statement ok
INSERT INTO TEST values ('33y93uhg3qi3f13hnh8xjrvvbz7iwevroaaif8v5ecfetwnd5yqde9mna8753399lj5r2u5rps5tuu0xsetmsxi80dacku7uz0q3r7h3r9gboq4c41t4h8brm6t7hcb2zg41g7elknivchc3ff409nhczhc7ciledplgcql5sfjb1x2ctuvf6cjdckfkq56ranj67qkzdnvj8guw8rxyd55298ziig0adhqsi0jdx60exhq6vv8hfqarfpvfma8qoqtv5kzte4mofm41w5uy0zdk4tjd5i7673pyigseb4mb3g6u5rwac1s8s1xcoobjoexp340hbr7vk1fwqdfisdm94967tvqxmirn6ml4ccaw3r7bh04e2p5txvea0vi965t29vbv1858ystdqpc8s9aalij445brgn55gpwtgmpbg8tkg7plw05aw8auijbkl9v4go9azrbqcb3yipi1hphmazfrzuta6qdys89usu672vs18qbricebs2rjhsqud8iemug1fr31hoaxu0lg7yfg2i8i9ufmrkgg8dtv5dw5edxptw48meec88iunibox2bc72eq5d59fmg2db7r8xtj1wptoak72on1v5jis1s024hvebx3nudmx78s32dszihs8mvkpt37fxsza9nccbo0jmyu7wzspkl1tqtbkx17lc043weyu2qavtp7vzvuyk9j7l7gha5kr6x295l9sk610ya8820atujz6dpcnkp4fuyatyefo4acc1g05ipcf2bwjgtz48ewzivm8p6kh7e1dp715x5as9bvs5i9n9pxvhp6jo7clnj987iojgztfk2fk9980gr8i69frtci4cj6rxlf9ig228tyljlqd53nwxnz7mvqjymohyqjsuunjdnioo199cl8m6fvv3hu8vh0v8otcohrskwan81p1ncqkf2svf97gyz2cu7bfrvbv03o6bflqdm60ojl0z1yta41m416mkdljpuxzk7gs9tj121jnvfe9piw1ow3ibu66eyrs2cmxopyhio4h7toekptm4xysmx96pdbjxx0kugwj4mfgjwj40ijkpwz0kjs39eumuexyooldizgzidhzk7hecfqamlnolbescpiuqrfyewm4evn2tx3cnoun9y3mciact4ont84q9z99r4wt76wusjtoz11xv5rgkz1t2dy3wtzog1ifhmprfymrzt0lzlsugjru5m6ubq87dmyzu9fwo8sivuea2zedogsm93sgwgrbomalj213ejyic4w9wgobnqjogd9j0h24u8dddr238elpqwk4sr2dznvw2fthow2l5ckua9z022nj3hktzae4jnyu7ckohdowy91lmh6ns10sialoa146x5sfje0fc4i1dt1dwbcygtmadjwjhytsvsq879y9tfud08fg2c1a01yzb5yy1z5cj99aherty940w91qjl0zl27p58enhtdvr0wzqpkgkxno93vj9qm7fiszrzcbmlowwfeusty9dkm9i4vskzovrgjpp91op2654acbblntyxra484e8ejbmehtecdh25patwrs016w7gqtvtzgufg9znmuxwcczrwgqpfkgwj27pfu56z5sd9241vjz8364zxfwfogjvhdxcxfgh6u4lbqmu1870ei2snna70atxputk4ybpj1r1e0ut4zcz8786eofale8on8uysws7mfs8o3uen21zjtqnc260s6e9tc1lpw2qk2vjsg10escmlj8hliba24sauabdc7ttj2pmv2xjh0photfpywqfbw7f9av3jxyplfap42nc9ttz1xqbrb3seieu2cnnwjc8vqxuacprwcdznwzrc99e28zf5q6y9c2exm9rbofy09w854xkplwlx2v2qjfm36r4jhwi953i993xhgu3um0qeisyfw8aicc52itcc53e98wer9iluwqrttx25vjr2ukk7m0olyzksbj9gaiu4tii3vcmk994ouvzse4khfd4ud9uxea4n8ahdshzpflx0feh0s1r9nv6e6fxipxiclaqs4hul93pbka016cq2l9cogc3tueqhwuximt11gws89uhjf81dhkowdrr0lh19yso5r2hclhblr3c0ykac7ieecfth4xbyz31hr3ug4gd8lbgkvh0hm5tgtyy84qnptuif7l6do0ksktathok43eyflzcq7z8wulniynjhqmnlrdjisosqt3mr02iuahfg7ldzvop71ezhxzgix6xzn93e2pjfo9ejjp5aqgocif4vob6t1be3hg0hn5597h5f66v2m6swkjed482ggfnw894rhxaurk67dsj2albqndwkz25rjxrahlj2eeinxqq93a30o86i865jter4g7wm0brhz5csy5kgb51sawjkc9kfcsqz92c4d7cf7f28kgtruroohlxli6bve96r2ff3eoqqupwifodag20lfp25t0fx61254kwm3s6runk0hcfn9opo2fsduoibmqdrzfnmmf431af441tkur3rhx1o08ovl9uf4xxachfj7acy58g7fnfkk4od5pfjvr20l85dfes3tq9vyom2vbx0m2r5sokx6c7chetqpphdk9etnpqxwwspcnqrq8hcrjka52lgc1y41fvnhxcdisx7w9b9l19p2rmdj4tqx4ezjghgi06b9vil6hkvllt3m2ym9e21mlqy1v0e5yoypwj32k5ri8et1bn2uqmjaw8gpcegsvv9wz45i2ggnntszj0nx05dl1u46mcb66y43xd2tyow6v6wokbeputut9ybaxz7o3hcq41s2p97mxpv34guz7xxbdhezhvja06m298nqx0qsh4tn14vcwt7kdv9rvpcwljzese17dwmnhmi38iqxf8hp5dannyffr8ijrwmxn3ezmfg6f4l8a4mwyhfcjvziu4uy9a62d8wrfskakvwh9h89rfjpa4k8edrq6jjkf9a7xu0vibp3g3escq0jjt209xxyztpy1u1yf9r2hgmw7mwa4j4565w74ib5ocn8q5wrawc9m17o1i8e3mvoc6bkj96nwm5iti5krq5xjwy8rd7j0tjkjzvupus5d64d1zt4fezbuvhieygyl0jlv8s8m24u3jj822tf1wwukafoksuigt1ttsni935ly51cqokl5k09t4y3hjszbj1cisjg18cqsqwqcrlai7exaefpknsa6euj7k0kaqbf4lya7f29zmytpzc32t3vqs2b0967a80ybxykz6fvyjdraj43831rnq19xh4m000gyi0bu5jgp8x53eibhya5xtqdglmdjrhen0md1fha4k0y95ze3v5cxssdeyqwj9y8a8eco858nf9uj6f83z3uk9yk7zi90lmcy8bm6es36hxwj8g1yygj5evne8ire0q6pa2e0jh9wmubpsv53tcof1pvhvvd7b03i2srdeykcq7sn88bv2huz39pmd4m012nx6jib7c53ape70i6gwefvatqetrvj94oq6bm50eqo4c72csqdwn55xpoq27pqot90zfzpnb690eund78eoss6ltg7zhkc4hk0qjvrl8me8cgzy0py4btyhhsks9i0veou0ia84nxbkwo758dn3m0kgp60jxcrgdjuyojbh5u67qlu69lthdzmzshij0mhlaa05rdrn9vdv1440v2rozpwdtvxzfvykb0tjx700eqdr164zy7d3ji8g82souaiui7n96my35ocgt0xmdrss');
# Now create our FSST table
statement ok
pragma force_compression='fsst';
statement ok
CREATE TABLE TEST2 as SELECT * FROM TEST;
statement ok
CHECKPOINT;

View File

@@ -0,0 +1,29 @@
# name: test/sql/storage/compression/fsst/issue_5675_followup.test
# description: Issue #5675: Follow up test to cover issue when the 1+th segment of a rowgroup is flushed with 1 value
# group: [fsst]
load __TEST_DIR__/issue_5675_followup.db readwrite v1.0.0
require parquet
statement ok
pragma threads=1
statement ok
CREATE TABLE TEST (col VARCHAR);
# This will fill up a segment completely with 1 value
statement ok
INSERT INTO TEST SELECT CASE WHEN i%2==0
THEN
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
ELSE
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
END FROM range(0,2214) tbl(i);
# Now create our FSST table
statement ok
pragma force_compression='fsst';
statement ok
CREATE TABLE TEST2 as SELECT * FROM TEST;

View File

@@ -0,0 +1,12 @@
# name: test/sql/storage/compression/fsst/issue_5759.test
# description: Issue #5759: segfault on sample creation
# group: [fsst]
load __TEST_DIR__/issue_5759.db readwrite v1.0.0
statement ok
pragma force_compression='fsst'
# With many short and a few small strings, the sample calculation would overflow
statement ok
CREATE TABLE trigger5759 AS SELECT CASE WHEN RANDOM() > 0.95 THEN repeat('ab', 1500) ELSE 'c' END FROM range(0,1000);

View File

@@ -0,0 +1,39 @@
# name: test/sql/storage/compression/patas/patas_read.test_slow
# group: [patas]
# The database is written with a vector size of 2048.
require vector_size 2048
load test/sql/storage/compression/patas/patas.db readonly
query I
select count(temperature) from temperatures_double;
----
245000
query I
select count(temperature) from temperatures_float;
----
245000
query I nosort res
select temperature from temperatures_double;
----
query I nosort res
select temperature from temperatures_float;
----
query I
SELECT compression FROM pragma_storage_info('temperatures_double') WHERE segment_type == 'double' AND compression != 'Patas';
----
query I
SELECT compression FROM pragma_storage_info('temperatures_float') WHERE segment_type == 'float' AND compression != 'Patas';
----
# Verify that the compression method is deprecated
statement error
pragma force_compression='patas'
----
deprecated

View File

@@ -0,0 +1,24 @@
# name: test/sql/storage/compression/rle/force_rle.test
# description: Test forcing RLE as the compression scheme
# group: [rle]
require vector_size 2048
load __TEST_DIR__/force_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test_rle (a INTEGER);
statement ok
INSERT INTO test_rle SELECT i FROM range(0, 2000) tbl(i);
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test_rle') WHERE segment_type ILIKE 'INTEGER'
----
RLE

View File

@@ -0,0 +1,56 @@
# name: test/sql/storage/compression/rle/list_rle.test_slow
# description: Test storage with RLE inside lists
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (id INTEGER, l INTEGER[]);
statement ok
INSERT INTO test SELECT i, case when (i//1000)%2=0 then [1, 1, 1] else [2, 2] end FROM range(200000) tbl(i)
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
# full unnest
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test) tbl(i)
----
500000 700000
# filters/skips
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=5000 AND id<6000) tbl(i)
----
2000 4000
# zonemaps
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(l) FROM test WHERE id>=150000 AND id<160000) tbl(i)
----
25000 35000
statement ok
CREATE INDEX i_index ON test(id)
# index lookup in lists
query II
SELECT * FROM test WHERE id=150001
----
150001 [1, 1, 1]
# large lists
statement ok
CREATE TABLE test_large_list AS SELECT i%10 AS id, LIST(-i) AS list FROM range(0,100000) tbl(i) GROUP BY id;
query II
SELECT COUNT(*), SUM(i) FROM (SELECT UNNEST(list) FROM test_large_list) tbl(i)
----
100000 -4999950000

View File

@@ -0,0 +1,29 @@
# name: test/sql/storage/compression/rle/rle_bool.test
# description: Test RLE with booleans
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_bool.db
statement ok
PRAGMA force_compression = 'rle'
# simple RLE with few values
statement ok
CREATE TABLE test (a BOOLEAN);
statement ok
INSERT INTO test select false from range(2048);
statement ok
INSERT INTO test select true from range(2048);
query I
SELECT COUNT(*) FROM test WHERE a=false
----
2048
query I
SELECT COUNT(*) FROM test WHERE a=false
----
2048

View File

@@ -0,0 +1,84 @@
# name: test/sql/storage/compression/rle/rle_compression_ratio.test_slow
# description: Assert rle compression ratio is within reasonable margins
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression='rle'
# Maximum rle compression for this column:
# Uncompressed size: 5 x 64bit
# Compressed size: 1 x 64bit + 1x 16bit (sizeof rle_count_t)
# Ratio: (5*64) / (64 + 16) = 4
statement ok
CREATE TABLE test_rle AS SELECT (i//5)::INT64 FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='uncompressed'
statement ok
CREATE TABLE test_uncompressed AS SELECT (i//10)::INT64 FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
# This query keeps a pretty wide margin in compression ratio un purpose to account for possible implementation changes
# that influence compression ratio.
query II
select (uncompressed::FLOAT // rle::FLOAT) > 2.5, (uncompressed::FLOAT // rle::FLOAT) < 4.5 FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_rle') where segment_type not in('VARCHAR', 'VALIDITY')) as rle,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True True
statement ok
drop table test_rle
statement ok
drop table test_uncompressed
# Assert that all supported types do in fact compress
foreach type <numeric>
statement ok
PRAGMA force_compression='uncompressed';
statement ok
CREATE TABLE test_uncompressed AS SELECT ((i//10)%100)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
statement ok
PRAGMA force_compression='rle'
statement ok
CREATE TABLE test_rle AS SELECT ((i//10)%100)::${type} FROM range(0, 2500000) tbl(i);
statement ok
checkpoint
# assert compression ratio >2 wich should be achieved for even the smallest types for this data
query II
select (uncompressed::FLOAT // rle::FLOAT) > 2, CAST(1 as ${type}) FROM (
select
(select count(distinct block_id) from pragma_storage_info('test_rle') where segment_type not in('VARCHAR', 'VALIDITY')) as rle,
(select count(distinct block_id) from pragma_storage_info('test_uncompressed') where segment_type not in('VARCHAR', 'VALIDITY')) as uncompressed
)
----
True 1
statement ok
drop table test_rle
statement ok
drop table test_uncompressed
endloop

View File

@@ -0,0 +1,74 @@
# name: test/sql/storage/compression/rle/rle_constant.test
# description: Test RLE where we can emit ConstantVectors when scanning
# group: [rle]
load __TEST_DIR__/test_rle.db
require vector_size 2048
# we check vector types explicitly in this test
require no_vector_verification
statement ok
PRAGMA force_compression = 'rle'
# simple RLE with few values
statement ok
CREATE TABLE test (a INTEGER);
# Produces two full vectors from one run
statement ok
INSERT INTO test select 0 from range(4096);
# Produces one full vector from one run
statement ok
INSERT INTO test select 1 from range(2048);
# Dito
statement ok
INSERT INTO test select 2 from range(2048);
# These do not fully fill the Vector, so they don't produce ConstantVectors
statement ok
INSERT INTO test select 3 from range(1024)
statement ok
INSERT INTO test select 4 from range(1024)
statement ok
INSERT INTO test select 5 from range(512)
statement ok
INSERT INTO test select 6 from range(512)
statement ok
INSERT INTO test select 7 from range(512)
statement ok
INSERT INTO test select 8 from range(512)
statement ok
checkpoint;
# Some of them produce constant vectors, but not all
query I
select distinct on (types) vector_type(a) as types from test order by all;
----
CONSTANT_VECTOR
FLAT_VECTOR
statement ok
PRAGMA disable_optimizer
# The first 4 vectors are constant
query I
select distinct on (types) types from (select vector_type(a) from test limit 8192) tbl(types)
----
CONSTANT_VECTOR
# The other vectors are not constant
query I
select distinct on (types) types from (select vector_type(a) from test offset 8192) tbl(types)
----
FLAT_VECTOR

View File

@@ -0,0 +1,113 @@
# name: test/sql/storage/compression/rle/rle_filter.test
# description: Test filtering from RLE compression
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_filter.db
statement ok
pragma enable_verification
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE tbl AS SELECT i id, i // 50 rle_val, case when i%8=0 then null else i // 50 end rle_val_null FROM range(100000) t(i);
query III
SELECT * FROM tbl WHERE id = 5040 AND rle_val=100
----
5040 100 NULL
query III
SELECT * FROM tbl WHERE id = 5040 AND substr(rle_val::VARCHAR, 1, 3)='100'
----
5040 100 NULL
query III
SELECT * FROM tbl WHERE id >= 5020 AND rle_val=100
----
5020 100 100
5021 100 100
5022 100 100
5023 100 100
5024 100 NULL
5025 100 100
5026 100 100
5027 100 100
5028 100 100
5029 100 100
5030 100 100
5031 100 100
5032 100 NULL
5033 100 100
5034 100 100
5035 100 100
5036 100 100
5037 100 100
5038 100 100
5039 100 100
5040 100 NULL
5041 100 100
5042 100 100
5043 100 100
5044 100 100
5045 100 100
5046 100 100
5047 100 100
5048 100 NULL
5049 100 100
query III
SELECT * FROM tbl WHERE rle_val=100
----
5000 100 NULL
5001 100 100
5002 100 100
5003 100 100
5004 100 100
5005 100 100
5006 100 100
5007 100 100
5008 100 NULL
5009 100 100
5010 100 100
5011 100 100
5012 100 100
5013 100 100
5014 100 100
5015 100 100
5016 100 NULL
5017 100 100
5018 100 100
5019 100 100
5020 100 100
5021 100 100
5022 100 100
5023 100 100
5024 100 NULL
5025 100 100
5026 100 100
5027 100 100
5028 100 100
5029 100 100
5030 100 100
5031 100 100
5032 100 NULL
5033 100 100
5034 100 100
5035 100 100
5036 100 100
5037 100 100
5038 100 100
5039 100 100
5040 100 NULL
5041 100 100
5042 100 100
5043 100 100
5044 100 100
5045 100 100
5046 100 100
5047 100 100
5048 100 NULL
5049 100 100

View File

@@ -0,0 +1,37 @@
# name: test/sql/storage/compression/rle/rle_filter.test_slow
# description: Test filtering from RLE compression
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle_filter.db
statement ok
pragma enable_verification
statement ok
SET force_compression = 'rle'
statement ok
CREATE TABLE tbl AS select unnest(repeat([i], i)) i from range(5000) t(i);
foreach test_val 0 7 993 2525 4375 4999
query I
SELECT COUNT(*) = ${test_val} FROM tbl WHERE i=${test_val}
----
true
endloop
# now with NULL values
statement ok
CREATE TABLE tbl2 AS select unnest(repeat([i], i)) i from range(5000) t(i);
foreach test_val 1 12 736 1237 2314 3333
query I
SELECT COUNT(*) = ${test_val} FROM tbl2 WHERE i=${test_val}
----
true
endloop

View File

@@ -0,0 +1,33 @@
# name: test/sql/storage/compression/rle/rle_filter_pushdown.test
# description: Filter pushdown with RLE columns
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test (id VARCHAR, col INTEGER);
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1 b FROM range(5000) tbl(i)
statement ok
INSERT INTO test SELECT (5000 + i)::VARCHAR id, 2 b FROM range(5000) tbl(i)
statement ok
CHECKPOINT
# filter on the RLE column
query IIII
SELECT SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE col=2
----
10000 2 2 5000
# filter on non-rle column
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 2 2 2 1

View File

@@ -0,0 +1,31 @@
# name: test/sql/storage/compression/rle/rle_index_fetch.test
# description: Fetch from RLE column with index
# group: [rle]
# load the DB from disk
load __TEST_DIR__/test_rle.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test(id INTEGER PRIMARY KEY, col INTEGER);
statement ok
INSERT INTO test SELECT i::VARCHAR id, 1 b FROM range(5000) tbl(i)
statement ok
INSERT INTO test SELECT (5000 + i)::VARCHAR id, 2 b FROM range(5000) tbl(i)
statement ok
CHECKPOINT
query I
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'INTEGER' LIMIT 1
----
RLE
query IIIIII
SELECT MIN(id), MAX(id), SUM(col), MIN(col), MAX(col), COUNT(*) FROM test WHERE id='5000'
----
5000 5000 2 2 2 1

View File

@@ -0,0 +1,25 @@
# name: test/sql/storage/compression/rle/rle_many_repeated.test_slow
# description: Test forcing RLE as the compression scheme
# group: [rle]
require vector_size 2048
load __TEST_DIR__/rle_many_repeated.db
statement ok
PRAGMA force_compression = 'rle'
statement ok
CREATE TABLE test_rle (a BIGINT);
statement ok
INSERT INTO test_rle SELECT 3::BIGINT FROM range(0, 65535) UNION ALL SELECT 4::BIGINT FROM range(100000);
statement ok
CHECKPOINT
query II
SELECT a, COUNT(*) FROM test_rle GROUP BY ALL
----
3 65535
4 100000

Some files were not shown because too many files have changed in this diff Show More