should be it
This commit is contained in:
53
external/duckdb/test/sql/storage/parallel/batch_insert_filtered_row_groups.test_slow
vendored
Normal file
53
external/duckdb/test/sql/storage/parallel/batch_insert_filtered_row_groups.test_slow
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
# name: test/sql/storage/parallel/batch_insert_filtered_row_groups.test_slow
|
||||
# description: Test batch insert with small batches
|
||||
# group: [parallel]
|
||||
|
||||
require vector_size 512
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_mix_batches.db
|
||||
|
||||
loop i 0 2
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(10000000)) TO '__TEST_DIR__/many_small_batches.parquet' (row_group_size 5000)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE test AS FROM '__TEST_DIR__/many_small_batches.parquet' t(i) WHERE (i // 6144) % 3 = 0;
|
||||
|
||||
query I
|
||||
SELECT * FROM test LIMIT 5 OFFSET 500000
|
||||
----
|
||||
1495328
|
||||
1495329
|
||||
1495330
|
||||
1495331
|
||||
1495332
|
||||
|
||||
query I
|
||||
SELECT * FROM test QUALIFY i <= lag(i) over ()
|
||||
----
|
||||
|
||||
# ensure that we still write close to our row group size as our row group size count, even for different block sizes
|
||||
query I
|
||||
SELECT MAX(count) > 100000 FROM pragma_storage_info('test')
|
||||
----
|
||||
true
|
||||
|
||||
# The median differs between block sizes because the upper bound of the segment size is the block size.
|
||||
require block_size 262144
|
||||
|
||||
query I
|
||||
SELECT MEDIAN(count) > 100000 FROM pragma_storage_info('test')
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
DROP TABLE test
|
||||
|
||||
# repeat with a low memory limit
|
||||
statement ok
|
||||
SET memory_limit='500MB'
|
||||
|
||||
endloop
|
||||
223
external/duckdb/test/sql/storage/parallel/batch_insert_mix_batches.test_slow
vendored
Normal file
223
external/duckdb/test/sql/storage/parallel/batch_insert_mix_batches.test_slow
vendored
Normal file
@@ -0,0 +1,223 @@
|
||||
# name: test/sql/storage/parallel/batch_insert_mix_batches.test_slow
|
||||
# description: Test batch insert with small batches
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_mix_batches.db
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
|
||||
# create views that read the batches
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
loop i 0 2
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS FROM v1;
|
||||
----
|
||||
1000000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 1.1M values - this should not be more than 20 row groups (ideally it is 10)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v1
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query I
|
||||
SELECT * FROM v1 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
CREATE TABLE integers2 AS FROM v2
|
||||
----
|
||||
500000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 500K values - this should not be more than 20 row groups (ideally it is 5)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers2');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v2
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers2
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query I
|
||||
SELECT * FROM v2 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
# do it again, but this time only filter out SOME small batches
|
||||
query I
|
||||
CREATE TABLE integers3 AS FROM v3
|
||||
----
|
||||
700000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 750K values - this should not be more than 20 row groups (ideally it is 7)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers3');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v3
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers3
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
query I
|
||||
SELECT * FROM v3 LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
query I
|
||||
SELECT * FROM integers3 LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers3
|
||||
|
||||
statement ok
|
||||
drop view if exists v2;
|
||||
|
||||
statement ok
|
||||
drop view if exists v3;
|
||||
|
||||
# create views that read the batches using unions
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE OR REPLACE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
endloop
|
||||
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers4(i INTEGER)
|
||||
|
||||
# mix batches transaction
|
||||
statement ok
|
||||
BEGIN TRANSACTION
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers4 FROM '__TEST_DIR__/mix_batches_small.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers4 FROM '__TEST_DIR__/mix_batches_large.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers4 FROM '__TEST_DIR__/mix_batches_odd.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers4 FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
|
||||
|
||||
statement ok
|
||||
COMMIT
|
||||
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers4');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers4
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers4 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
185
external/duckdb/test/sql/storage/parallel/batch_insert_small_batches.test_slow
vendored
Normal file
185
external/duckdb/test/sql/storage/parallel/batch_insert_small_batches.test_slow
vendored
Normal file
@@ -0,0 +1,185 @@
|
||||
# name: test/sql/storage/parallel/batch_insert_small_batches.test_slow
|
||||
# description: Test batch insert with small batches
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_small_batches.db
|
||||
|
||||
foreach row_group_size 5000 100000
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(1000000) tbl(i)) TO '__TEST_DIR__/small_batches.parquet' (ROW_GROUP_SIZE ${row_group_size})
|
||||
|
||||
statement ok
|
||||
drop view if exists v2;
|
||||
|
||||
statement ok
|
||||
drop view if exists v3;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM '__TEST_DIR__/small_batches.parquet'
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS FROM v1;
|
||||
----
|
||||
1000000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 1.1M values - this should not be more than 20 row groups (ideally it is 10)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v1
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query II
|
||||
SELECT * FROM (select i, lag(i) over () from v1) t(i, lag) WHERE i <= lag
|
||||
----
|
||||
|
||||
query II
|
||||
SELECT * FROM (select i, lag(i) over () from integers) t(i, lag) WHERE i <= lag
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM v1 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
CREATE TABLE integers2 AS FROM v2;
|
||||
----
|
||||
500000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 500K values - this should not be more than 20 row groups (ideally it is 5)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers2');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v2;
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers2
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query II
|
||||
SELECT * FROM (select i, lag(i) over () from v2) t(i, lag) WHERE i <= lag
|
||||
----
|
||||
|
||||
query II
|
||||
SELECT * FROM (select i, lag(i) over () from integers2) t(i, lag) WHERE i <= lag
|
||||
----
|
||||
|
||||
query I
|
||||
SELECT * FROM v2 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5 OFFSET 99998
|
||||
----
|
||||
189998
|
||||
189999
|
||||
200000
|
||||
200001
|
||||
200002
|
||||
|
||||
# do it again, but this time only filter out SOME small batches
|
||||
query I
|
||||
CREATE TABLE integers3 AS FROM v3
|
||||
----
|
||||
700000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 750K values - this should not be more than 20 row groups (ideally it is 7)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers3');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII nosort full_match3
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v3
|
||||
----
|
||||
261256463520 0 802815 602816 602816
|
||||
|
||||
query IIIII nosort full_match3
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers3
|
||||
----
|
||||
261256463520 0 802815 602816 602816
|
||||
|
||||
query I
|
||||
SELECT * FROM v3 LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
query I
|
||||
SELECT * FROM integers3 LIMIT 5 OFFSET 9999
|
||||
----
|
||||
9999
|
||||
20000
|
||||
20001
|
||||
20002
|
||||
20003
|
||||
|
||||
statement ok
|
||||
DROP VIEW v2
|
||||
|
||||
statement ok
|
||||
DROP VIEW v3
|
||||
|
||||
statement ok
|
||||
DROP VIEW v1;
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers3
|
||||
|
||||
endloop
|
||||
52
external/duckdb/test/sql/storage/parallel/batch_row_group_size_plus_one.test_slow
vendored
Normal file
52
external/duckdb/test/sql/storage/parallel/batch_row_group_size_plus_one.test_slow
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
# name: test/sql/storage/parallel/batch_row_group_size_plus_one.test_slow
|
||||
# description: Test batches that are slightly larger than a single row group
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_row_group_size_plus_one.db
|
||||
|
||||
# write many batches of row group size plus one vector
|
||||
statement ok
|
||||
COPY (FROM range(10000000) tbl(i)) TO '__TEST_DIR__/row_group_size_plus_one.parquet' (ROW_GROUP_SIZE 124928)
|
||||
|
||||
# create a view that reads the batch
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan('__TEST_DIR__/row_group_size_plus_one.parquet')
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS FROM v1;
|
||||
----
|
||||
10000000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 10M values - this should not be more than 200 row groups (ideally it is 100)
|
||||
query I
|
||||
select count(distinct row_group_id) < 200 from pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v1
|
||||
----
|
||||
49999995000000 0 9999999 10000000 10000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
49999995000000 0 9999999 10000000 10000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 9999998
|
||||
----
|
||||
9999998
|
||||
9999999
|
||||
165
external/duckdb/test/sql/storage/parallel/custom_row_group_size.test_slow
vendored
Normal file
165
external/duckdb/test/sql/storage/parallel/custom_row_group_size.test_slow
vendored
Normal file
@@ -0,0 +1,165 @@
|
||||
# name: test/sql/storage/parallel/custom_row_group_size.test_slow
|
||||
# description: Test batch insert with small batches
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
ATTACH '__TEST_DIR__/custom_row_group_size.db' AS custom_row_group_size (ROW_GROUP_SIZE 204800, STORAGE_VERSION 'v1.2.0')
|
||||
|
||||
statement ok
|
||||
USE custom_row_group_size
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
|
||||
# create views that read the batches
|
||||
statement ok
|
||||
CREATE VIEW v1 AS SELECT * FROM parquet_scan(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0;
|
||||
|
||||
statement ok
|
||||
CREATE VIEW v3 AS FROM v1 WHERE (i//10000)%2=0 OR (i>200000 AND i < 400000) OR (i>600000 AND i < 800000);
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS FROM v1;
|
||||
----
|
||||
1000000
|
||||
|
||||
# verify we are actually creating larger row groups
|
||||
query I
|
||||
SELECT MAX(count) FROM pragma_storage_info('integers')
|
||||
----
|
||||
204800
|
||||
|
||||
# we have a total of 1M values - this should not be more than 10 row groups (ideally it is 5)
|
||||
query I
|
||||
select count(distinct row_group_id) < 10 from pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM v1
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
# now do the same, but filter out half of the values
|
||||
query I
|
||||
CREATE TABLE integers2 AS FROM v2
|
||||
----
|
||||
500000
|
||||
|
||||
# also test deletions
|
||||
query I
|
||||
DELETE FROM integers WHERE (i//10000)%2<>0;
|
||||
----
|
||||
500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers2
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
# test updates
|
||||
query I
|
||||
UPDATE integers SET i=i+1 WHERE i%2=0
|
||||
----
|
||||
250000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
247500000000 1 989999 500000 500000
|
||||
|
||||
query I
|
||||
CREATE TABLE integers3 AS FROM v3
|
||||
----
|
||||
700000
|
||||
|
||||
# verify that we are not consuming an unnecessarily giant amount of blocks
|
||||
# we have a total of 750K values - this should not be more than 10 row groups (ideally it is 4)
|
||||
query I
|
||||
select count(distinct row_group_id) < 10 from pragma_storage_info('integers3');
|
||||
----
|
||||
true
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers3
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
# non-batch insert
|
||||
statement ok
|
||||
SET preserve_insertion_order = false
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers4 AS FROM integers
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers4
|
||||
----
|
||||
247500000000 1 989999 500000 500000
|
||||
|
||||
# re-attach without the parameter
|
||||
statement ok
|
||||
ATTACH ':memory:' AS mem
|
||||
|
||||
statement ok
|
||||
USE mem
|
||||
|
||||
statement ok
|
||||
DETACH custom_row_group_size
|
||||
|
||||
statement ok
|
||||
ATTACH '__TEST_DIR__/custom_row_group_size.db' AS custom_row_group_size
|
||||
|
||||
statement ok
|
||||
USE custom_row_group_size
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
247500000000 1 989999 500000 500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers2
|
||||
----
|
||||
247499750000 0 989999 500000 500000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers3
|
||||
----
|
||||
348499650000 0 989999 700000 700000
|
||||
|
||||
# invalid row group size parameters
|
||||
statement error
|
||||
ATTACH '__TEST_DIR__/custom_row_group_size_xx.db' AS custom_row_group_size_x1 (ROW_GROUP_SIZE 0)
|
||||
----
|
||||
row group size must be bigger than 0
|
||||
|
||||
statement error
|
||||
ATTACH '__TEST_DIR__/custom_row_group_size_xx.db' AS custom_row_group_size_x2 (ROW_GROUP_SIZE 77)
|
||||
----
|
||||
row group size must be divisible by the vector size
|
||||
119
external/duckdb/test/sql/storage/parallel/insert_many_compressible_batches.test_slow
vendored
Normal file
119
external/duckdb/test/sql/storage/parallel/insert_many_compressible_batches.test_slow
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
# name: test/sql/storage/parallel/insert_many_compressible_batches.test_slow
|
||||
# description: Test writing many compressible batches
|
||||
# group: [parallel]
|
||||
|
||||
require vector_size 512
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_many_compressible_batches.db
|
||||
|
||||
# 50M values, extremely compressible
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT CASE WHEN i % 50000 = 0 THEN 1 ELSE 0 END AS i FROM range(50000000) tbl(i);
|
||||
----
|
||||
50000000
|
||||
|
||||
# check the block count and median number of rows per row group
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
SELECT MEDIAN(count) FROM pragma_storage_info('integers');
|
||||
----
|
||||
122880
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/integers.parquet'
|
||||
|
||||
# verify that reading while preserving insertion order creates the same size table,
|
||||
# with very small block variations for compact block sizes
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_parquet AS FROM '__TEST_DIR__/integers.parquet';
|
||||
|
||||
query I
|
||||
SELECT * FROM integers_parquet LIMIT 5
|
||||
----
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
|
||||
query II
|
||||
SELECT i, COUNT(*) FROM integers_parquet GROUP BY ALL ORDER BY ALL
|
||||
----
|
||||
0 49999000
|
||||
1 1000
|
||||
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 5 FROM pragma_storage_info('integers_parquet');
|
||||
----
|
||||
true
|
||||
|
||||
# verify that loading in separate SQL statements within the same transaction generates the same size table
|
||||
statement ok
|
||||
CREATE TABLE integers_batched_load(i INTEGER)
|
||||
|
||||
statement ok
|
||||
BEGIN TRANSACTION
|
||||
|
||||
loop i 0 50
|
||||
|
||||
query I
|
||||
INSERT INTO integers_batched_load SELECT CASE WHEN i % 50000 = 0 THEN 1 ELSE 0 END AS i FROM range(1000000) tbl(i);
|
||||
----
|
||||
1000000
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
COMMIT
|
||||
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers_batched_load');
|
||||
----
|
||||
true
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(i), SUM(i) FROM integers_batched_load
|
||||
----
|
||||
50000000 50000000 1000
|
||||
|
||||
# now with NULL values
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_batched_load_nulls(i INTEGER)
|
||||
|
||||
statement ok
|
||||
BEGIN TRANSACTION
|
||||
|
||||
loop i 0 50
|
||||
|
||||
query I
|
||||
INSERT INTO integers_batched_load_nulls SELECT CASE WHEN i % 50000 = 0 THEN 1 ELSE NULL END AS i FROM range(1000000) tbl(i);
|
||||
----
|
||||
1000000
|
||||
|
||||
endloop
|
||||
|
||||
statement ok
|
||||
COMMIT
|
||||
|
||||
# NULLs are RLE compressed (with Roaring)
|
||||
# So even with nulls we reach a similar compression ratio
|
||||
|
||||
mode skip
|
||||
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 8 FROM pragma_storage_info('integers_batched_load_nulls');
|
||||
----
|
||||
true
|
||||
|
||||
query III
|
||||
SELECT COUNT(*), COUNT(i), SUM(i) FROM integers_batched_load_nulls
|
||||
----
|
||||
50000000 1000 1000
|
||||
53
external/duckdb/test/sql/storage/parallel/insert_many_grouping_sets.test_slow
vendored
Normal file
53
external/duckdb/test/sql/storage/parallel/insert_many_grouping_sets.test_slow
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
# name: test/sql/storage/parallel/insert_many_grouping_sets.test_slow
|
||||
# description: Test parallel insert from many groups
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/insert_many_grouping_sets.db
|
||||
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT i, i%2 as j FROM generate_series(0,9999999,1) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers GROUP BY GROUPING SETS ((), (i), (i, j), (j));
|
||||
----
|
||||
20000003
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers;
|
||||
----
|
||||
49999995000000 5000000 10000000 10000000 10000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers2;
|
||||
----
|
||||
99999990000000 5000001 20000003 20000000 10000002
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers;
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2;
|
||||
|
||||
# now with null values
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT case when i%2=0 then null else i end AS i, i%2 as j FROM generate_series(0,9999999,1) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers GROUP BY GROUPING SETS ((), (i), (i, j), (j));
|
||||
----
|
||||
10000005
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers
|
||||
----
|
||||
25000000000000 5000000 10000000 5000000 10000000
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), SUM(j), COUNT(*), COUNT(i), COUNT(j) FROM integers2
|
||||
----
|
||||
50000000000000 5000001 10000005 10000000 5000003
|
||||
51
external/duckdb/test/sql/storage/parallel/insert_non_order_preserving.test_slow
vendored
Normal file
51
external/duckdb/test/sql/storage/parallel/insert_non_order_preserving.test_slow
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# name: test/sql/storage/parallel/insert_non_order_preserving.test_slow
|
||||
# description: Test parallel non order-preserving insert
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/insert_non_order_preserving.db
|
||||
|
||||
statement ok
|
||||
PRAGMA preserve_insertion_order=false
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT * FROM range(10000000) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
49999995000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
49999995000000
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
# now with null values
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT case when i%2=0 then null else i end AS i FROM range(10000000) tbl(i);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
25000000000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
25000000000000
|
||||
84
external/duckdb/test/sql/storage/parallel/insert_order_preserving.test_slow
vendored
Normal file
84
external/duckdb/test/sql/storage/parallel/insert_order_preserving.test_slow
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# name: test/sql/storage/parallel/insert_order_preserving.test_slow
|
||||
# description: Test parallel order-preserving insert
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/insert_order_preserving.db
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT * FROM range(10000000) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers
|
||||
----
|
||||
10000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
49999995000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
49999995000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5 OFFSET 777778
|
||||
----
|
||||
777778
|
||||
777779
|
||||
777780
|
||||
777781
|
||||
777782
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
# now with null values
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT case when i%2=0 then null else i end AS i FROM range(10000000) tbl(i);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
25000000000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
25000000000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5
|
||||
----
|
||||
NULL
|
||||
1
|
||||
NULL
|
||||
3
|
||||
NULL
|
||||
|
||||
query I
|
||||
SELECT * FROM integers2 LIMIT 5 OFFSET 777778
|
||||
----
|
||||
NULL
|
||||
777779
|
||||
NULL
|
||||
777781
|
||||
NULL
|
||||
81
external/duckdb/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
vendored
Normal file
81
external/duckdb/test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
# name: test/sql/storage/parallel/insert_order_preserving_odd_sized_batches.test_slow
|
||||
# description: Test parallel order-preserving insert
|
||||
# group: [parallel]
|
||||
|
||||
# There are different numbers of distinct blocks for smaller block sizes,
|
||||
# because the segment size is bound by the block size.
|
||||
require block_size 262144
|
||||
|
||||
require vector_size 512
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/insert_odd_sized_batches.db
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT * FROM range(10_000_000) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
# Check the block count and median number of rows per row group.
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers');
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
SELECT MEDIAN(count) FROM pragma_storage_info('integers');
|
||||
----
|
||||
122880
|
||||
|
||||
statement ok
|
||||
COPY integers TO '__TEST_DIR__/integers.parquet' (ROW_GROUP_SIZE 77777)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_parquet AS FROM '__TEST_DIR__/integers.parquet';
|
||||
|
||||
query I
|
||||
SELECT * FROM integers_parquet LIMIT 5
|
||||
----
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
|
||||
query I
|
||||
SELECT * FROM integers_parquet LIMIT 5 OFFSET 773654
|
||||
----
|
||||
773654
|
||||
773655
|
||||
773656
|
||||
773657
|
||||
773658
|
||||
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 4 FROM pragma_storage_info('integers_parquet');
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
SELECT MEDIAN(count) > 100000 FROM pragma_storage_info('integers_parquet');
|
||||
----
|
||||
true
|
||||
|
||||
# FIXME: does this even make sense?
|
||||
# Verify that reading without preserving insertion order creates a same size table.
|
||||
statement ok
|
||||
SET preserve_insertion_order=false
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers_parquet_no_order AS FROM '__TEST_DIR__/integers.parquet'
|
||||
|
||||
query I
|
||||
SELECT COUNT(DISTINCT block_id) < 30 FROM pragma_storage_info('integers_parquet_no_order');
|
||||
----
|
||||
true
|
||||
|
||||
query I
|
||||
SELECT MEDIAN(count) > 100000 FROM pragma_storage_info('integers_parquet_no_order');
|
||||
----
|
||||
true
|
||||
63
external/duckdb/test/sql/storage/parallel/memory_limit_batch_load.test_slow
vendored
Normal file
63
external/duckdb/test/sql/storage/parallel/memory_limit_batch_load.test_slow
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
# name: test/sql/storage/parallel/memory_limit_batch_load.test_slow
|
||||
# description: Test batch streaming to disk with different row group sizes
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/memory_limit_batch_load.db
|
||||
|
||||
# in this test we load data of around 100M rows - uncompressed this will be 1.4GB~2GB (without/with NULLs)
|
||||
# we do these operations with a low memory limit to verify the data is streamed to and from disk correctly
|
||||
statement ok
|
||||
SET memory_limit='300MB'
|
||||
|
||||
foreach row_group_size 5000 150000 1000000
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000000) tbl(i)) TO '__TEST_DIR__/giant_row_groups.parquet' (ROW_GROUP_SIZE ${row_group_size})
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS FROM '__TEST_DIR__/giant_row_groups.parquet'
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
4999999950000000 0 99999999 100000000 100000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
# now with NULL values
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(100000000) tbl(i)) TO '__TEST_DIR__/giant_row_groups_nulls.parquet' (ROW_GROUP_SIZE ${row_group_size})
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS FROM '__TEST_DIR__/giant_row_groups_nulls.parquet'
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
2500000000000000 1 99999999 50000000 100000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
NULL
|
||||
99999
|
||||
NULL
|
||||
100001
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
endloop
|
||||
89
external/duckdb/test/sql/storage/parallel/memory_limit_batch_load_list.test_slow
vendored
Normal file
89
external/duckdb/test/sql/storage/parallel/memory_limit_batch_load_list.test_slow
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
# name: test/sql/storage/parallel/memory_limit_batch_load_list.test_slow
|
||||
# description: Test batch streaming to disk with different row group sizes
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/memory_limit_batch_load_list.db
|
||||
|
||||
# in this test we load data of around 100M rows - uncompressed this will be 1.4GB~2GB (without/with NULLs)
|
||||
# we do these operations with a low memory limit to verify the data is streamed to and from disk correctly
|
||||
statement ok
|
||||
SET memory_limit='300MB'
|
||||
|
||||
foreach row_group_size 5000 150000 1000000
|
||||
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT [i] AS l FROM range(10000000) tbl(i)
|
||||
) TO '__TEST_DIR__/giant_row_groups.parquet' (
|
||||
ROW_GROUP_SIZE ${row_group_size}
|
||||
)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE list AS FROM '__TEST_DIR__/giant_row_groups.parquet'
|
||||
|
||||
query IIIII
|
||||
SELECT
|
||||
SUM(i),
|
||||
MIN(i),
|
||||
MAX(i),
|
||||
COUNT(i),
|
||||
COUNT(*)
|
||||
FROM (
|
||||
SELECT UNNEST(l) AS i FROM list
|
||||
)
|
||||
----
|
||||
49999995000000 0 9999999 10000000 10000000
|
||||
|
||||
query I
|
||||
SELECT * FROM list LIMIT 5 OFFSET 99998
|
||||
----
|
||||
[99998]
|
||||
[99999]
|
||||
[100000]
|
||||
[100001]
|
||||
[100002]
|
||||
|
||||
statement ok
|
||||
DROP TABLE list
|
||||
|
||||
# now with NULL values
|
||||
statement ok
|
||||
COPY (
|
||||
SELECT CASE WHEN i%2=0 THEN NULL ELSE [i] END AS l FROM range(10000000) tbl(i)
|
||||
) TO '__TEST_DIR__/giant_row_groups_nulls.parquet' (
|
||||
ROW_GROUP_SIZE ${row_group_size}
|
||||
)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE list AS FROM '__TEST_DIR__/giant_row_groups_nulls.parquet'
|
||||
|
||||
query IIIII
|
||||
SELECT
|
||||
SUM(i),
|
||||
MIN(i),
|
||||
MAX(i),
|
||||
COUNT(i),
|
||||
COUNT(*)
|
||||
FROM (
|
||||
SELECT UNNEST(l) AS i FROM list
|
||||
)
|
||||
----
|
||||
25000000000000 1 9999999 5000000 5000000
|
||||
|
||||
query I
|
||||
SELECT
|
||||
*
|
||||
FROM list LIMIT 5 OFFSET 99998
|
||||
----
|
||||
NULL
|
||||
[99999]
|
||||
NULL
|
||||
[100001]
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
DROP TABLE list
|
||||
|
||||
endloop
|
||||
103
external/duckdb/test/sql/storage/parallel/memory_limit_mixed_batches.test_slow
vendored
Normal file
103
external/duckdb/test/sql/storage/parallel/memory_limit_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
# name: test/sql/storage/parallel/memory_limit_mixed_batches.test_slow
|
||||
# description: Test batch streaming to disk with mixed batch sizes
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/memory_limit_mixed_batches.db
|
||||
|
||||
# in this test we load data of around 100M rows - uncompressed this will be 1.4GB~2GB (without/with NULLs)
|
||||
# we do these operations with a low memory limit to verify the data is streamed to and from disk correctly
|
||||
statement ok
|
||||
SET memory_limit='300MB'
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(20000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_1.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(20000000,30000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_2.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(30000000,50000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_3.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(50000000,70000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_4.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(70000000,90000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_5.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(90000000,100000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_6.parquet' (ROW_GROUP_SIZE 33445)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS FROM read_parquet([
|
||||
'__TEST_DIR__/mixed_batches_1.parquet',
|
||||
'__TEST_DIR__/mixed_batches_2.parquet',
|
||||
'__TEST_DIR__/mixed_batches_3.parquet',
|
||||
'__TEST_DIR__/mixed_batches_4.parquet',
|
||||
'__TEST_DIR__/mixed_batches_5.parquet',
|
||||
'__TEST_DIR__/mixed_batches_6.parquet'
|
||||
])
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
4999999950000000 0 99999999 100000000 100000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
99998
|
||||
99999
|
||||
100000
|
||||
100001
|
||||
100002
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
|
||||
# now with NULL values
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(20000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_1.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(20000000,30000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_2.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(30000000,50000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_3.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(50000000,70000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_4.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(70000000,90000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_5.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
COPY (SELECT CASE WHEN i%2=0 THEN NULL ELSE i END AS i FROM range(90000000,100000000) tbl(i)) TO '__TEST_DIR__/mixed_batches_6.parquet' (ROW_GROUP_SIZE 33445)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS FROM read_parquet([
|
||||
'__TEST_DIR__/mixed_batches_1.parquet',
|
||||
'__TEST_DIR__/mixed_batches_2.parquet',
|
||||
'__TEST_DIR__/mixed_batches_3.parquet',
|
||||
'__TEST_DIR__/mixed_batches_4.parquet',
|
||||
'__TEST_DIR__/mixed_batches_5.parquet',
|
||||
'__TEST_DIR__/mixed_batches_6.parquet'
|
||||
])
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM integers
|
||||
----
|
||||
2500000000000000 1 99999999 50000000 100000000
|
||||
|
||||
query I
|
||||
SELECT * FROM integers LIMIT 5 OFFSET 99998
|
||||
----
|
||||
NULL
|
||||
99999
|
||||
NULL
|
||||
100001
|
||||
NULL
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers
|
||||
77
external/duckdb/test/sql/storage/parallel/parallel_insert_selective_filter.test_slow
vendored
Normal file
77
external/duckdb/test/sql/storage/parallel/parallel_insert_selective_filter.test_slow
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
# name: test/sql/storage/parallel/parallel_insert_selective_filter.test_slow
|
||||
# description: Test parallel insert with a selective filter
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/parallel_insert_selective.db
|
||||
|
||||
query I
|
||||
CREATE TABLE integers AS SELECT * FROM range(10000000) tbl(i);
|
||||
----
|
||||
10000000
|
||||
|
||||
# loop and test both with and without preserve preserving order
|
||||
|
||||
loop attempts 0 2
|
||||
|
||||
# insert values with a selective filter
|
||||
# not many values are inserted
|
||||
# verify that we are not creating many small row-groups due to the parallel insertion
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers WHERE i%100=0
|
||||
----
|
||||
100000
|
||||
|
||||
loop i 0 10
|
||||
|
||||
query I
|
||||
INSERT INTO integers2 SELECT * FROM integers WHERE i%100=0
|
||||
----
|
||||
100000
|
||||
|
||||
endloop
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers
|
||||
----
|
||||
49999995000000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
5499945000000
|
||||
|
||||
# we have a total of 1.1M values - this should not be more than 20 row groups (ideally it is 11)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers2');
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
# now test a mix of selective and non-selective filters
|
||||
# we insert all values that have a modulo 100 of 0
|
||||
# AND all values between 3 and 4 million
|
||||
query I
|
||||
CREATE TABLE integers2 AS SELECT * FROM integers WHERE i%100=0 OR (i >= 3000000 AND i <= 4000000)
|
||||
----
|
||||
1090000
|
||||
|
||||
query I
|
||||
SELECT SUM(i) FROM integers2
|
||||
----
|
||||
3964995000000
|
||||
|
||||
# we have a total of 1.1M values - this should not be more than 20 row groups (ideally it is 11)
|
||||
query I
|
||||
select count(distinct row_group_id) < 20 from pragma_storage_info('integers2');
|
||||
----
|
||||
true
|
||||
|
||||
statement ok
|
||||
DROP TABLE integers2
|
||||
|
||||
statement ok
|
||||
PRAGMA preserve_insertion_order=false
|
||||
|
||||
endloop
|
||||
83
external/duckdb/test/sql/storage/parallel/reclaim_space_batch_insert.test_slow
vendored
Normal file
83
external/duckdb/test/sql/storage/parallel/reclaim_space_batch_insert.test_slow
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
# name: test/sql/storage/parallel/reclaim_space_batch_insert.test_slow
|
||||
# description: Test space reclamation of optimistic writing with batch inserts and a primary key.
|
||||
# group: [parallel]
|
||||
|
||||
require parquet
|
||||
|
||||
load __TEST_DIR__/reclaim_space_batch_insert.db
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(10000000) t(i)) TO '__TEST_DIR__/integers.parquet' (FORMAT PARQUET, ROW_GROUP_SIZE 200000);
|
||||
|
||||
statement ok
|
||||
CREATE VIEW integers AS FROM '__TEST_DIR__/integers.parquet';
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (9999999);
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers;
|
||||
----
|
||||
<REGEX>:Constraint Error.*violates primary key constraint.*
|
||||
|
||||
statement ok
|
||||
CREATE TABLE block_count (count INT);
|
||||
|
||||
loop i 0 10
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers;
|
||||
----
|
||||
<REGEX>:Constraint Error.*violates primary key constraint.*
|
||||
|
||||
statement ok
|
||||
BEGIN;
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (9999998);
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers WHERE i <= 9999998;
|
||||
----
|
||||
<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
|
||||
|
||||
statement ok
|
||||
ROLLBACK
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) - ${i} FROM integers2;
|
||||
----
|
||||
1
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (10000000 + ${i});
|
||||
|
||||
statement ok
|
||||
CHECKPOINT;
|
||||
|
||||
statement ok
|
||||
INSERT INTO block_count
|
||||
SELECT total_blocks FROM pragma_database_size();
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) - ${i} FROM integers2;
|
||||
----
|
||||
2
|
||||
|
||||
# Ensure there is only a small difference between the MIN and MAX block counts.
|
||||
# Example table contents for 16kB blocks:
|
||||
# count: 4766, 4788, 4846, 4749, 4770, 4730, 4711, 4732, 4751, 4711
|
||||
|
||||
query I
|
||||
SELECT
|
||||
CASE WHEN get_block_size('reclaim_space_batch_insert') = 16384
|
||||
THEN (MAX(count / 100) - MIN(count / 100)) < 3
|
||||
ELSE (MAX(count) - MIN(count)) < 3
|
||||
END FROM block_count;
|
||||
----
|
||||
True
|
||||
|
||||
endloop
|
||||
74
external/duckdb/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
vendored
Normal file
74
external/duckdb/test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# name: test/sql/storage/parallel/reclaim_space_insert_unique_idx_optimistic.test_slow
|
||||
# description: Test space reclamation of optimistic writing with a UNIQUE constraint violation.
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/reclaim_space_unique_index.db
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT * FROM range(1_000_000) t(i);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers2 (i INTEGER);
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (9999999);
|
||||
|
||||
statement ok
|
||||
CREATE UNIQUE INDEX idx ON integers2(i);
|
||||
|
||||
# For smaller block sizes (16KB) the total blocks increase (to twice the original amount) in the first
|
||||
# iteration, and then stay mostly constant.
|
||||
|
||||
statement ok
|
||||
CREATE TABLE total_blocks_tbl (total_blocks UBIGINT);
|
||||
|
||||
loop i 0 20
|
||||
|
||||
statement ok
|
||||
BEGIN;
|
||||
|
||||
statement ok
|
||||
CHECKPOINT;
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (999_998);
|
||||
|
||||
# Invalidate the transaction.
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers WHERE i <= 999_998;
|
||||
----
|
||||
<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
|
||||
|
||||
statement ok
|
||||
ROLLBACK
|
||||
|
||||
# Track the block count of each iteration.
|
||||
|
||||
statement ok
|
||||
INSERT INTO total_blocks_tbl SELECT current.total_blocks FROM pragma_database_size() AS current
|
||||
|
||||
endloop
|
||||
|
||||
# Ensure that the blocks don't grow between iterations.
|
||||
|
||||
query I
|
||||
WITH tbl(w) AS (
|
||||
SELECT struct_pack(
|
||||
total_blocks := total_blocks,
|
||||
sum := SUM (total_blocks) OVER (ROWS BETWEEN 0 PRECEDING AND 1 FOLLOWING)
|
||||
) AS w
|
||||
FROM total_blocks_tbl
|
||||
LIMIT 19)
|
||||
SELECT list_filter(LIST(w), lambda x: x.total_blocks * 2.5 < x.sum) FROM tbl;
|
||||
----
|
||||
[]
|
||||
|
||||
# Ensure that the blocks don't grow 'quietly' between iterations.
|
||||
|
||||
query I
|
||||
WITH tbl(l) AS (
|
||||
SELECT LIST(total_blocks) AS l FROM total_blocks_tbl)
|
||||
SELECT list_sum(l) < (list_count(l) * l[3] + 2 * l[3]) FROM tbl;
|
||||
----
|
||||
True
|
||||
101
external/duckdb/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
vendored
Normal file
101
external/duckdb/test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
# name: test/sql/storage/parallel/reclaim_space_primary_key_optimistic.test_slow
|
||||
# description: Test space reclamation of optimistic writing with a PK constraint violation.
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/reclaim_space_primary_key.db
|
||||
|
||||
statement ok
|
||||
SET preserve_insertion_order=false;
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers AS SELECT * FROM range(10000000) t(i);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers2 (i INTEGER PRIMARY KEY);
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (9999999);
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers;
|
||||
----
|
||||
<REGEX>:Constraint Error.*violates primary key constraint.*
|
||||
|
||||
# For smaller block sizes (16KB) the total blocks increase (to twice the original amount) in the first
|
||||
# iteration, and then stay constant.
|
||||
|
||||
statement ok
|
||||
CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
|
||||
|
||||
statement ok
|
||||
CREATE TYPE test_result AS UNION (
|
||||
ok BOOL,
|
||||
err STRUCT(
|
||||
old BIGINT,
|
||||
allowed_max DECIMAL(21,1),
|
||||
actual BIGINT)
|
||||
);
|
||||
|
||||
loop i 0 10
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers;
|
||||
----
|
||||
<REGEX>:Constraint Error.*violates primary key constraint.*
|
||||
|
||||
statement ok
|
||||
BEGIN;
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (9999998);
|
||||
|
||||
statement error
|
||||
INSERT INTO integers2 SELECT * FROM integers WHERE i <= 9999998;
|
||||
----
|
||||
<REGEX>:Constraint Error.*PRIMARY KEY or UNIQUE constraint violation.*
|
||||
|
||||
statement ok
|
||||
ROLLBACK
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) - ${i} FROM integers2;
|
||||
----
|
||||
1
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers2 VALUES (10000000 + ${i});
|
||||
|
||||
statement ok
|
||||
CHECKPOINT;
|
||||
|
||||
query I
|
||||
SELECT COUNT(*) - ${i} FROM integers2;
|
||||
----
|
||||
2
|
||||
|
||||
# Ensure that the total blocks don't exceed the total blocks after the first iteration by more than 1.2.
|
||||
|
||||
query I
|
||||
SELECT
|
||||
CASE WHEN ${i} = 0 THEN True::test_result
|
||||
WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.6 THEN True::test_result
|
||||
ELSE {
|
||||
'old': total_blocks_tbl.total_blocks,
|
||||
'allowed_max': total_blocks_tbl.total_blocks * 1.6,
|
||||
'actual': current.total_blocks
|
||||
}::test_result
|
||||
END
|
||||
FROM pragma_database_size() AS current, total_blocks_tbl;
|
||||
----
|
||||
true
|
||||
|
||||
# Adjust the total_blocks_tbl once to the count after the first iteration.
|
||||
|
||||
statement ok
|
||||
UPDATE total_blocks_tbl SET total_blocks = (
|
||||
SELECT
|
||||
CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
|
||||
ELSE (total_blocks)END
|
||||
);
|
||||
|
||||
endloop
|
||||
67
external/duckdb/test/sql/storage/parallel/reclaim_space_rollback_mixed_batches.test_slow
vendored
Normal file
67
external/duckdb/test/sql/storage/parallel/reclaim_space_rollback_mixed_batches.test_slow
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# name: test/sql/storage/parallel/reclaim_space_rollback_mixed_batches.test_slow
|
||||
# description: Test space reclamation of optimistic writing when mixing appends of different batch sizes
|
||||
# group: [parallel]
|
||||
|
||||
load __TEST_DIR__/reclaim_space_mixed_batches.db
|
||||
|
||||
require parquet
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000) tbl(i)) TO '__TEST_DIR__/mix_batches_small.parquet' (ROW_GROUP_SIZE 5000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(100000, 400000) tbl(i)) TO '__TEST_DIR__/mix_batches_large.parquet' (ROW_GROUP_SIZE 200000)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(400000, 700000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd.parquet' (ROW_GROUP_SIZE 999)
|
||||
|
||||
statement ok
|
||||
COPY (FROM range(700000, 1000000) tbl(i)) TO '__TEST_DIR__/mix_batches_odd_again.parquet' (ROW_GROUP_SIZE 99979)
|
||||
|
||||
statement ok
|
||||
CREATE TABLE integers(i INTEGER);
|
||||
|
||||
statement ok
|
||||
CREATE TABLE block_count(count int)
|
||||
|
||||
loop i 0 10
|
||||
|
||||
# one big insert
|
||||
statement ok
|
||||
BEGIN TRANSACTION
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers FROM read_parquet(['__TEST_DIR__/mix_batches_small.parquet', '__TEST_DIR__/mix_batches_large.parquet', '__TEST_DIR__/mix_batches_odd.parquet', '__TEST_DIR__/mix_batches_odd_again.parquet'])
|
||||
|
||||
statement ok
|
||||
ROLLBACK
|
||||
|
||||
# multiple separate inserts
|
||||
statement ok
|
||||
BEGIN TRANSACTION
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers FROM '__TEST_DIR__/mix_batches_small.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers FROM '__TEST_DIR__/mix_batches_large.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers FROM '__TEST_DIR__/mix_batches_odd.parquet'
|
||||
|
||||
statement ok
|
||||
INSERT INTO integers FROM '__TEST_DIR__/mix_batches_odd_again.parquet'
|
||||
|
||||
statement ok
|
||||
ROLLBACK
|
||||
|
||||
statement ok
|
||||
insert into block_count select total_blocks from pragma_database_size();
|
||||
|
||||
# ensure there is a small diff between min and max block counts
|
||||
query I
|
||||
select (max(count)-min(count))<20 from block_count
|
||||
----
|
||||
true
|
||||
|
||||
endloop
|
||||
42
external/duckdb/test/sql/storage/parallel/tiny_row_group_size.test_slow
vendored
Normal file
42
external/duckdb/test/sql/storage/parallel/tiny_row_group_size.test_slow
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# name: test/sql/storage/parallel/tiny_row_group_size.test_slow
|
||||
# description: Test tiny row group size
|
||||
# group: [parallel]
|
||||
|
||||
statement ok
|
||||
ATTACH '__TEST_DIR__/tiny_row_group_size.db' (ROW_GROUP_SIZE 2048)
|
||||
|
||||
statement ok
|
||||
USE tiny_row_group_size
|
||||
|
||||
statement ok
|
||||
CREATE TABLE t AS FROM range(1000000) t(i)
|
||||
|
||||
query IIIII
|
||||
SELECT SUM(i), MIN(i), MAX(i), COUNT(i), COUNT(*) FROM t
|
||||
----
|
||||
499999500000 0 999999 1000000 1000000
|
||||
|
||||
# we have a total of 1M values, ideally this is 488 row groups
|
||||
query II
|
||||
select count(distinct row_group_id) < 1000, max(count) from pragma_storage_info('t');
|
||||
----
|
||||
true 2048
|
||||
|
||||
query I
|
||||
SELECT * FROM t OFFSET 77777 LIMIT 5
|
||||
----
|
||||
77777
|
||||
77778
|
||||
77779
|
||||
77780
|
||||
77781
|
||||
|
||||
query II
|
||||
SELECT i, row_number() OVER () FROM t OFFSET 777776 LIMIT 5
|
||||
----
|
||||
777776 777777
|
||||
777777 777778
|
||||
777778 777779
|
||||
777779 777780
|
||||
777780 777781
|
||||
|
||||
Reference in New Issue
Block a user