should be it
This commit is contained in:
238
external/duckdb/test/optimizer/compressed_materialization.test_slow
vendored
Normal file
238
external/duckdb/test/optimizer/compressed_materialization.test_slow
vendored
Normal file
@@ -0,0 +1,238 @@
|
||||
# name: test/optimizer/compressed_materialization.test_slow
|
||||
# description: Compressed materialization test
|
||||
# group: [optimizer]
|
||||
|
||||
statement ok
|
||||
pragma enable_verification
|
||||
|
||||
statement ok
|
||||
PRAGMA explain_output = OPTIMIZED_ONLY
|
||||
|
||||
# these functions live in the catalog, but cannot be called directly
|
||||
statement error
|
||||
select __internal_compress_string_utinyint('L')
|
||||
----
|
||||
Binder Error: Compressed materialization functions are for internal use only!
|
||||
|
||||
# internal issue 1576
|
||||
statement ok
|
||||
create table t0 as select range%400000 a, range%400000 b from range(500000);
|
||||
|
||||
query III rowsort
|
||||
select * from (
|
||||
select *, row_number() OVER () as row_number from (
|
||||
SELECT * FROM t0 ORDER BY 1) ta
|
||||
) tb where b > 2
|
||||
order by a limit 2;
|
||||
----
|
||||
3 3 7
|
||||
3 3 8
|
||||
|
||||
# tricky tests taken from test/sql/subquery/scalar/test_issue_6136.test
|
||||
# we run these with one thread since they are order dependent
|
||||
statement ok
|
||||
create table r as select * from values (1, 1, 'a', 'A'), (1, null, 'b', 'B'), (1, 2, 'c', 'C'), (2, null, 'd', 'D') t(ra, rb, x, y);
|
||||
|
||||
statement ok
|
||||
create table b as select * from values (1, 1, 1), (2, 1, 2), (3, 1, 3), (4, 1, null), (5, 2, 1), (6, 2, null), (7, 99, 99) t(id, ba, bb);
|
||||
|
||||
statement ok
|
||||
set threads=1
|
||||
|
||||
query T
|
||||
select (
|
||||
select {'x': first(x order by x), 'y': first(y order by y), '__matches': count(*)}
|
||||
from (
|
||||
select *
|
||||
from r
|
||||
where ba = ra
|
||||
and (bb = rb or rb is null)
|
||||
order by all
|
||||
)
|
||||
group by ra, rb
|
||||
order by all
|
||||
limit 1)
|
||||
from b
|
||||
order by all
|
||||
----
|
||||
{'x': a, 'y': A, '__matches': 1}
|
||||
{'x': b, 'y': B, '__matches': 1}
|
||||
{'x': b, 'y': B, '__matches': 1}
|
||||
{'x': b, 'y': B, '__matches': 1}
|
||||
{'x': d, 'y': D, '__matches': 1}
|
||||
{'x': d, 'y': D, '__matches': 1}
|
||||
NULL
|
||||
|
||||
query T
|
||||
select
|
||||
coalesce((select {'x': first(x), 'y': first(y), '__matches': count(*)} from r where ba = ra and (bb = rb or rb is null) group by ra, rb order by bb = rb limit 1), {'x': null, 'y': null, '__matches': 0}) as ref2
|
||||
from b
|
||||
----
|
||||
{'x': a, 'y': A, '__matches': 1}
|
||||
{'x': c, 'y': C, '__matches': 1}
|
||||
{'x': b, 'y': B, '__matches': 1}
|
||||
{'x': b, 'y': B, '__matches': 1}
|
||||
{'x': d, 'y': D, '__matches': 1}
|
||||
{'x': d, 'y': D, '__matches': 1}
|
||||
{'x': NULL, 'y': NULL, '__matches': 0}
|
||||
|
||||
statement ok
|
||||
set threads=4
|
||||
|
||||
# we should see compress twice (in the ORDER BY expression and payload) and decompress once (just the payload)
|
||||
statement ok
|
||||
create table t1 as select range i from range(10)
|
||||
|
||||
query II
|
||||
explain select i from t1 order by 10-i
|
||||
----
|
||||
logical_opt <REGEX>:(.*__internal_decompress.*){1}(.*__internal_compress.*){2}
|
||||
|
||||
statement ok
|
||||
create table test as
|
||||
select (range + 7) % 4 i,
|
||||
(range + 7) % 11 j
|
||||
from range(10)
|
||||
|
||||
# should see compress exactly twice (for columns i and j)
|
||||
# if we see less than twice we're not compressing,
|
||||
# and if we see it more than twice we're likely compressing and decompressing twice (once for each ORDER BY)
|
||||
# but we can compress once, then do both ORDER BYs, then decompress
|
||||
query II
|
||||
explain select count(i), count(j) from (select i, j from (select i, j from test order by j offset 1) order by j offset 1)
|
||||
----
|
||||
logical_opt <REGEX>:(.*__internal_compress.*){2}
|
||||
|
||||
# should see it exactly once here, as we can only compress the group (i), not the value being summed (j)
|
||||
# after the GROUP BY we do the ORDER BY, and finally decompress
|
||||
query II
|
||||
explain select i, sum(j) from test group by i order by i
|
||||
----
|
||||
logical_opt <REGEX>:(.*__internal_compress.*){1}
|
||||
|
||||
# We can't deal with duplicate projections (yet) so this should see 3 compresses instead of 1
|
||||
query II
|
||||
explain select count(j1), count(j2) from (select j j1, j j2 from (select j from test order by j offset 1) order by j1, j2 offset 1)
|
||||
----
|
||||
logical_opt <REGEX>:(.*__internal_compress.*){3}
|
||||
|
||||
query II
|
||||
explain select distinct i, j from test order by i, j
|
||||
----
|
||||
logical_opt <REGEX>:(.*__internal_compress.*){2}
|
||||
|
||||
# taken from third_party/sqllogictest/test/index/orderby_nosort/10/slt_good_27.test
|
||||
# the problem was that statistics propagation created an index join after "filter_prune" happened
|
||||
statement ok
|
||||
CREATE TABLE tab3(pk INTEGER PRIMARY KEY, col0 INTEGER, col1 FLOAT, col2 TEXT, col3 INTEGER, col4 FLOAT, col5 TEXT)
|
||||
|
||||
statement ok
|
||||
INSERT INTO tab3 VALUES
|
||||
(0,461,479.93,'idmdh',456,464.90,'nczyk'),
|
||||
(1,473,482.60,'bguxh',460,466.25,'oseln'),
|
||||
(2,474,484.45,'bnzmd',461,467.13,'kvwna'),
|
||||
(3,475,485.1,'obtlj',462,468.73,'jkjbo'),
|
||||
(4,477,486.62,'gjtbr',463,469.9,'bhers'),
|
||||
(5,479,489.59,'bkxfm',464,470.29,'aklru'),
|
||||
(6,481,495.30,'owirt',466,471.55,'lysig'),
|
||||
(7,482,496.31,'yergm',467,473.31,'rkpxn'),
|
||||
(8,484,497.51,'fszui',468,474.44,'ztexm'),
|
||||
(9,486,498.24,'eueji',469,477.28,'amvcc')
|
||||
|
||||
statement ok
|
||||
CREATE UNIQUE INDEX idx_tab3_4 ON tab3 (col3)
|
||||
|
||||
query I
|
||||
SELECT pk FROM tab3 WHERE col0 IN (SELECT col3 FROM tab3 WHERE (col1 > 93.79)) ORDER BY 1 DESC
|
||||
----
|
||||
0
|
||||
|
||||
# test that we compress all-NULL (from multiple Parquet files) to utinyint too (if union_by_name is true)
|
||||
require parquet
|
||||
|
||||
#
|
||||
statement ok
|
||||
pragma disable_verification
|
||||
|
||||
# one column without NULL, and two columns (varchar and bigint) that are all NULL
|
||||
statement ok
|
||||
copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100)) to '__TEST_DIR__/cm1.parquet'
|
||||
|
||||
statement ok
|
||||
copy (select hash(range + 1) i, null::varchar j, null::bigint k from range(100,200)) to '__TEST_DIR__/cm2.parquet'
|
||||
|
||||
# has NULL, and does not have non-NULL
|
||||
query II
|
||||
select
|
||||
stats(j) LIKE '%[Has Null: true, Has No Null: false]%',
|
||||
stats(k) LIKE '%[Has Null: true, Has No Null: false]%'
|
||||
from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) limit 1
|
||||
----
|
||||
true true
|
||||
|
||||
# this should lead to a plan where both all-NULL columns (varchar j and bigint k) are compressed
|
||||
statement ok
|
||||
PRAGMA explain_output = PHYSICAL_ONLY
|
||||
|
||||
query II
|
||||
explain select * from read_parquet('__TEST_DIR__/cm*.parquet', union_by_name=true) order by i
|
||||
----
|
||||
physical_plan <REGEX>:.*__internal_decompress.*__internal_decompress.*__internal_compress.*__internal_compress.*
|
||||
|
||||
# and of course some tpch stuff
|
||||
|
||||
require tpch
|
||||
|
||||
statement ok
|
||||
call dbgen(sf=0.01)
|
||||
|
||||
statement ok
|
||||
PRAGMA explain_output = PHYSICAL_ONLY
|
||||
|
||||
# tpch q1 should use perfect hash aggregate
|
||||
query II
|
||||
EXPLAIN
|
||||
SELECT
|
||||
l_returnflag,
|
||||
l_linestatus,
|
||||
sum(l_quantity) AS sum_qty,
|
||||
sum(l_extendedprice) AS sum_base_price,
|
||||
sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
|
||||
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
|
||||
avg(l_quantity) AS avg_qty,
|
||||
avg(l_extendedprice) AS avg_price,
|
||||
avg(l_discount) AS avg_disc,
|
||||
count(*) AS count_order
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_shipdate <= CAST('1998-09-02' AS date)
|
||||
GROUP BY
|
||||
l_returnflag,
|
||||
l_linestatus
|
||||
ORDER BY
|
||||
l_returnflag,
|
||||
l_linestatus;
|
||||
----
|
||||
physical_plan <REGEX>:.*PERFECT_HASH_GROUP_BY.*
|
||||
|
||||
statement ok
|
||||
PRAGMA explain_output = OPTIMIZED_ONLY
|
||||
|
||||
# test that we're compressing lineitem
|
||||
query II
|
||||
explain select * from lineitem order by l_shipdate
|
||||
----
|
||||
logical_opt <REGEX>:.*__internal_decompress.*__internal_compress.*
|
||||
|
||||
# test that we get the same result with and without compressed materialization
|
||||
query IIIIIIIIIIIIIII nosort q0
|
||||
select * from lineitem order by l_shipdate
|
||||
----
|
||||
|
||||
statement ok
|
||||
set disabled_optimizers to 'compressed_materialization'
|
||||
|
||||
query IIIIIIIIIIIIIII nosort q0
|
||||
select * from lineitem order by l_shipdate
|
||||
----
|
||||
Reference in New Issue
Block a user