# name: test/optimizer/compressed_materialization.test_slow # description: Compressed materialization test # group: [optimizer] statement ok pragma enable_verification statement ok PRAGMA explain_output = OPTIMIZED_ONLY # these functions live in the catalog, but cannot be called directly statement error select __internal_compress_string_utinyint('L') ---- Binder Error: Compressed materialization functions are for internal use only! # internal issue 1576 statement ok create table t0 as select range%400000 a, range%400000 b from range(500000); query III rowsort select * from ( select *, row_number() OVER () as row_number from ( SELECT * FROM t0 ORDER BY 1) ta ) tb where b > 2 order by a limit 2; ---- 3 3 7 3 3 8 # tricky tests taken from test/sql/subquery/scalar/test_issue_6136.test # we run these with one thread since they are order dependent statement ok create table r as select * from values (1, 1, 'a', 'A'), (1, null, 'b', 'B'), (1, 2, 'c', 'C'), (2, null, 'd', 'D') t(ra, rb, x, y); statement ok create table b as select * from values (1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, null), (5, 2, 1), (6, 2, null), (7, 99, 99) t(id, ba, bb); statement ok set threads=1 query T select ( select {'x': first(x order by x), 'y': first(y order by y), '__matches': count(*)} from ( select * from r where ba = ra and (bb = rb or rb is null) order by all ) group by ra, rb order by all limit 1) from b order by all ---- {'x': a, 'y': A, '__matches': 1} {'x': b, 'y': B, '__matches': 1} {'x': b, 'y': B, '__matches': 1} {'x': b, 'y': B, '__matches': 1} {'x': d, 'y': D, '__matches': 1} {'x': d, 'y': D, '__matches': 1} NULL query T select coalesce((select {'x': first(x), 'y': first(y), '__matches': count(*)} from r where ba = ra and (bb = rb or rb is null) group by ra, rb order by bb = rb limit 1), {'x': null, 'y': null, '__matches': 0}) as ref2 from b ---- {'x': a, 'y': A, '__matches': 1} {'x': c, 'y': C, '__matches': 1} {'x': b, 'y': B, '__matches': 1} {'x': b, 'y': B, '__matches': 1} {'x': d, 'y': D, '__matches': 1} {'x': d, 'y': D, '__matches': 1} {'x': NULL, 'y': NULL, '__matches': 0} statement ok set threads=4 # we should see compress twice (in the ORDER BY expression and payload) and decompress once (just the payload) statement ok create table t1 as select range i from range(10) query II explain select i from t1 order by 10-i ---- logical_opt :(.*__internal_decompress.*){1}(.*__internal_compress.*){2} statement ok create table test as select (range + 7) % 4 i, (range + 7) % 11 j from range(10) # should see compress exactly twice (for columns i and j) # if we see less than twice we're not compressing, # and if we see it more than twice we're likely compressing and decompressing twice (once for each ORDER BY) # but we can compress once, then do both ORDER BYs, then decompress query II explain select count(i), count(j) from (select i, j from (select i, j from test order by j offset 1) order by j offset 1) ---- logical_opt :(.*__internal_compress.*){2} # should see it exactly once here, as we can only compress the group (i), not the value being summed (j) # after the GROUP BY we do the ORDER BY, and finally decompress query II explain select i, sum(j) from test group by i order by i ---- logical_opt :(.*__internal_compress.*){1} # We can't deal with duplicate projections (yet) so this should see 3 compresses instead of 1 query II explain select count(j1), count(j2) from (select j j1, j j2 from (select j from test order by j offset 1) order by j1, j2 offset 1) ---- logical_opt :(.*__internal_compress.*){3} query II explain select distinct i, j from test order by i, j ---- logical_opt :(.*__internal_compress.*){2} # taken from third_party/sqllogictest/test/index/orderby_nosort/10/slt_good_27.test # the problem was that statistics propagation created an index join after "filter_prune" happened statement ok CREATE TABLE tab3(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT) statement ok INSERT INTO tab3 VALUES (0,461,479.93,'idmdh',456,464.90,'nczyk'), (1,473,482.60,'bguxh',460,466.25,'oseln'), (2,474,484.45,'bnzmd',461,467.13,'kvwna'), (3,475,485.1,'obtlj',462,468.73,'jkjbo'), (4,477,486.62,'gjtbr',463,469.9,'bhers'), (5,479,489.59,'bkxfm',464,470.29,'aklru'), (6,481,495.30,'owirt',466,471.55,'lysig'), (7,482,496.31,'yergm',467,473.31,'rkpxn'), (8,484,497.51,'fszui',468,474.44,'ztexm'), (9,486,498.24,'eueji',469,477.28,'amvcc') statement ok CREATE UNIQUE INDEX idx_tab3_4 ON tab3 (col3) query I SELECT pk FROM tab3 WHERE col0 IN (SELECT col3 FROM tab3 WHERE (col1 > 93.79)) ORDER BY 1 DESC ---- 0 # test that we compress all-NULL (from multiple Parquet files) to utinyint too (if union_by_name is true) require parquet # statement ok pragma disable_verification # one column without NULL, and two columns (varchar and bigint) that are all NULL statement ok copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100)) to '__TEST_DIR__/cm1.parquet' statement ok copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100,200)) to '__TEST_DIR__/cm2.parquet' # has NULL, and does not have non-NULL query II select stats(j) LIKE '%[Has Null: true, Has No Null: false]%', stats(k) LIKE '%[Has Null: true, Has No Null: false]%' from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) limit 1 ---- true true # this should lead to a plan where both all-NULL columns (varchar j and bigint k) are compressed statement ok PRAGMA explain_output = PHYSICAL_ONLY query II explain select * from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) order by i ---- physical_plan :.*__internal_decompress.*__internal_decompress.*__internal_compress.*__internal_compress.* # and of course some tpch stuff require tpch statement ok call dbgen(sf=0.01) statement ok PRAGMA explain_output = PHYSICAL_ONLY # tpch q1 should use perfect hash aggregate query II EXPLAIN SELECT l_returnflag, l_linestatus, sum(l_quantity) AS sum_qty, sum(l_extendedprice) AS sum_base_price, sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, avg(l_quantity) AS avg_qty, avg(l_extendedprice) AS avg_price, avg(l_discount) AS avg_disc, count(*) AS count_order FROM lineitem WHERE l_shipdate <= CAST('1998-09-02' AS date) GROUP BY l_returnflag, l_linestatus ORDER BY l_returnflag, l_linestatus; ---- physical_plan :.*PERFECT_HASH_GROUP_BY.* statement ok PRAGMA explain_output = OPTIMIZED_ONLY # test that we're compressing lineitem query II explain select * from lineitem order by l_shipdate ---- logical_opt :.*__internal_decompress.*__internal_compress.* # test that we get the same result with and without compressed materialization query IIIIIIIIIIIIIII nosort q0 select * from lineitem order by l_shipdate ---- statement ok set disabled_optimizers to 'compressed_materialization' query IIIIIIIIIIIIIII nosort q0 select * from lineitem order by l_shipdate ----