should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,65 @@
# name: test/sql/storage/vacuum/repeated_deletes_and_checkpoints.test_slow
# description: Test running repeated deletes and checkpoints
# group: [vacuum]
load __TEST_DIR__/repeated_deletes_and_checkpoints.db
statement ok
CREATE TABLE test (pk INT);
statement ok
INSERT INTO test SELECT * FROM generate_series(0, 1000000);
statement ok
CHECKPOINT;
restart
query I
DELETE FROM test WHERE pk > 738645 AND pk < 978908;
----
240262
query II
SELECT COUNT(*), SUM(pk) FROM test;
----
759739 293669140557
restart
query I
DELETE FROM test WHERE pk > 282475 AND pk < 522738;
----
240262
query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519477 196938097654
restart
query I
INSERT INTO test SELECT * FROM generate_series(1201414, 1201514);
----
101
query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518
restart
query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518
statement ok
CHECKPOINT;
query II
SELECT COUNT(*), SUM(pk) FROM test;
----
519578 197059445518

View File

@@ -0,0 +1,97 @@
# name: test/sql/storage/vacuum/test_truncate_after_delete.test_slow
# description: Test truncating of the database file after data is deleted
# group: [vacuum]
load __TEST_DIR__/truncate_after_delete.db
statement ok
CREATE TABLE uuids (i VARCHAR);
# For smaller block sizes (16KB) the total blocks alternate between a few values in the loop.
statement ok
CREATE TABLE blocks AS SELECT total_blocks AS b FROM pragma_database_size();
loop i 0 20
statement ok
DROP TABLE IF EXISTS integers;
statement ok
INSERT INTO uuids SELECT uuid()::varchar FROM range(1000000);
statement ok
CHECKPOINT;
# note that just deleting does not free the space yet
# that is because deleting + checkpointing causes the metadata to be written at the END of the file
# as we cannot override the deleted data yet
statement ok
DELETE FROM uuids;
statement ok
CHECKPOINT;
# doing ANOTHER action and checkpointing again causes the data to be truncated
# since the metadata can then overwrite the previously written data
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3);
statement ok
CHECKPOINT;
# Ensure that the current block count does not exceed the maximum block count by more than 1.2.
query I
SELECT CASE WHEN ${i} < 10 THEN True
WHEN c.total_blocks <= blocks.b * 1.2 THEN True
ELSE False END
FROM pragma_database_size() AS c, blocks;
----
1
# Adjust blocks to the maximum of the first 10 warm-up iterations.
statement ok
UPDATE blocks SET b = (
SELECT CASE WHEN ${i} < 10 THEN
(SELECT list_max(list_value(c.total_blocks, b)) FROM pragma_database_size() AS c)
ELSE (b) END);
endloop
restart
query I
FROM integers;
----
1
2
3
query I
FROM uuids;
----
# resume operation after truncation
restart
statement ok
DELETE FROM integers;
statement ok
INSERT INTO integers FROM range(1000000);
query I
SELECT SUM(i) FROM integers;
----
499999500000
restart
query I
SELECT SUM(i) FROM integers;
----
499999500000

View File

@@ -0,0 +1,112 @@
# name: test/sql/storage/vacuum/vacuum_deletes_cleanup.test_slow
# description: Verify that deleting rows and re-appending does not increase storage size
# group: [vacuum]
load __TEST_DIR__/vacuum_deletes_cleanup.db
statement ok
CREATE TABLE integers(i INTEGER);
# verify that deleting an entire table in a loop doesn't increase database size (i.e. deletes are vacuumed correctly)
loop i 0 10
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
query I
SELECT SUM(i) FROM integers;
----
499999500000
statement ok
DELETE FROM integers;
query I
SELECT SUM(i) FROM integers;
----
NULL
# ensure that the expected total storage size is the same as in the first iteration of the loop
query I nosort expected_blocks_delete_table
SELECT total_blocks FROM pragma_database_size();
endloop
# do the same but delete in segments
# for smaller block sizes (16KB) the total blocks alternate between a few values in the loop,
# therefore, we need to compare to a range of total block counts
statement ok
CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
loop i 0 10
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
query I
SELECT SUM(i) FROM integers;
----
499999500000
statement ok
DELETE FROM integers WHERE i < 200000;
query I
SELECT SUM(i) FROM integers;
----
479999600000
statement ok
DELETE FROM integers WHERE i < 400000;
query I
SELECT SUM(i) FROM integers;
----
419999700000
statement ok
DELETE FROM integers WHERE i < 600000;
query I
SELECT SUM(i) FROM integers;
----
319999800000
statement ok
DELETE FROM integers WHERE i < 800000;
query I
SELECT SUM(i) FROM integers;
----
179999900000
statement ok
DELETE FROM integers;
query I
SELECT SUM(i) FROM integers;
----
NULL
# ensure that the total blocks don't exceed the total blocks after the first iteration
# by more than 1.2
query I
SELECT CASE WHEN ${i} = 0 THEN True
WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 1.2 THEN True
ELSE False END
FROM pragma_database_size() AS current, total_blocks_tbl;
----
1
# adjust total_blocks_tbl once to the count after the first iteration
statement ok
UPDATE total_blocks_tbl SET total_blocks = (
SELECT CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
ELSE (total_blocks) END);
endloop

View File

@@ -0,0 +1,19 @@
# name: test/sql/storage/vacuum/vacuum_deletes_index.test_slow
# description: Test vacuuming of deletes in combination with indexes
# group: [vacuum]
load __TEST_DIR__/vacuum_deletes_cleanup.db
statement ok
CREATE TABLE integers(i INTEGER PRIMARY KEY);
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
statement ok
DELETE FROM integers WHERE i < 500000
query I
SELECT * FROM integers WHERE i=600000
----
600000

View File

@@ -0,0 +1,45 @@
# name: test/sql/storage/vacuum/vacuum_partial_deletes.test_slow
# description: Verify that deletes get vacuumed correctly through merging of adjacent row groups
# group: [vacuum]
load __TEST_DIR__/vacuum_partial_deletes.db
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
statement ok
SET max_vacuum_tasks=99
query I
SELECT SUM(i) FROM integers WHERE i%2<>0
----
250000000000
statement ok
CHECKPOINT
# 1M rows, 128K each is around ~9 row groups
query I
SELECT COUNT(DISTINCT row_group_id) > 6 AND COUNT(DISTINCT row_group_id) <= 10 FROM pragma_storage_info('integers')
----
true
statement ok
DELETE FROM integers WHERE i%2=0
statement ok
CHECKPOINT
query I
SELECT SUM(i) FROM integers
----
250000000000
# after deleting we have 500K rows left, which should be 4~5 row groups
query I
SELECT COUNT(DISTINCT row_group_id) > 3 AND COUNT(DISTINCT row_group_id) <= 6 FROM pragma_storage_info('integers')
----
true

View File

@@ -0,0 +1,63 @@
# name: test/sql/storage/vacuum/vacuum_partial_deletes_cleanup.test_slow
# description: Verify that deleting rows and re-appending does not increase storage size
# group: [vacuum]
load __TEST_DIR__/vacuum_deletes_partial_cleanup.db
statement ok
CREATE TABLE integers(i INTEGER);
# verify that deleting an entire table in a loop doesn't increase database size (i.e. deletes are vacuumed correctly)
# for smaller block sizes (16KB) the total blocks alternate between a few values in the loop,
# therefore, we need to compare to a range of total block counts
statement ok
CREATE TABLE total_blocks_tbl AS SELECT total_blocks FROM pragma_database_size();
loop i 0 10
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
query I
SELECT SUM(i) FROM integers;
----
499999500000
query I
DELETE FROM integers WHERE i%2=0
----
500000
statement ok
CHECKPOINT
query I
DELETE FROM integers WHERE i%2<>0
----
500000
query I
SELECT SUM(i) FROM integers
----
NULL
# ensure that the total blocks don't exceed the total blocks after the first iteration
# by more than 2
query I
SELECT CASE WHEN ${i} = 0 THEN MAP {'within_limits': True}
WHEN current.total_blocks <= total_blocks_tbl.total_blocks * 2 THEN MAP {'within_limits': True}
ELSE MAP {'within_limits': False, 'current.total_blocks': current.total_blocks, 'total_blocks_tbl.total_blocks': total_blocks_tbl.total_blocks} END
FROM pragma_database_size() AS current, total_blocks_tbl;
----
{within_limits=1}
# adjust total_blocks_tbl once to the count after the first iteration
statement ok
UPDATE total_blocks_tbl SET total_blocks = (
SELECT CASE WHEN ${i} = 0 THEN (SELECT current.total_blocks FROM pragma_database_size() AS current)
ELSE (total_blocks) END);
endloop

View File

@@ -0,0 +1,44 @@
# name: test/sql/storage/vacuum/vacuum_partial_deletes_complex.test_slow
# description: Verify that deletes get vacuumed correctly through merging of adjacent row groups
# group: [vacuum]
load __TEST_DIR__/vacuum_partial_deletes_complex.db
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
query I
SELECT SUM(i) FROM integers WHERE i%3<>0
----
333332666667
statement ok
CHECKPOINT
# 1M rows, 128K each is around ~9 row groups
query I
SELECT COUNT(DISTINCT row_group_id) > 6 AND COUNT(DISTINCT row_group_id) <= 10 FROM pragma_storage_info('integers')
----
true
statement ok
DELETE FROM integers WHERE i%3=0
statement ok
CHECKPOINT
query I
SELECT SUM(i) FROM integers
----
333332666667
# after deleting we have 666K rows left, which should be 6~7 row groups
# note that this is more difficult, since after deleting each row group has ~80K rows
# this means that we need to merge 3 row groups into 2 row groups
query I
SELECT COUNT(DISTINCT row_group_id) > 4 AND COUNT(DISTINCT row_group_id) <= 7 FROM pragma_storage_info('integers')
----
true

View File

@@ -0,0 +1,100 @@
# name: test/sql/storage/vacuum/vacuum_partial_deletes_mixed.test_slow
# description: Verify that deletes get vacuumed correctly through merging of adjacent row groups
# group: [vacuum]
load __TEST_DIR__/vacuum_partial_deletes_mixed.db
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers SELECT * FROM range(1000000);
# 1M rows, 128K each is around ~9 row groups
query I
SELECT COUNT(DISTINCT row_group_id) > 6 AND COUNT(DISTINCT row_group_id) <= 10 FROM pragma_storage_info('integers')
----
true
# mix of deletions
# we use weird/odd numbers here as well for testing purposes
# 0..157K - delete every other entry
query I
DELETE FROM integers WHERE i%2 AND i<157353;
----
78676
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
921324 493809587024 0 999999
# 157K..433K - delete ALL entries
query I
DELETE FROM integers WHERE i>=157353 AND i<433427;
----
276074
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
645250 412260226201 0 999999
# 433K..512K - delete every odd 5K entries
query I
DELETE FROM integers WHERE (i//4973)%2=0 AND i>=433427 AND i<512933;
----
39784
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
605466 393365969137 0 999999
# 512K..732K - delete every 7 entries
query I
DELETE FROM integers WHERE i%7=0 AND i>=512933 AND i<721377
----
29777
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
575689 374988944702 0 999999
# 732K..910K - delete every 3 entries but based on the hash to make it more random
query I
DELETE FROM integers WHERE hash(i)::DOUBLE%3=0 AND i>=721377 AND i<909999
----
62853
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
512836 323738360425 0 999999
# 732K..910K - delete every 2 entries but based on the hash to make it more random
query I
DELETE FROM integers WHERE hash(i)::DOUBLE%2=0 AND i>=909999
----
89983
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
422853 237804579942 0 987388
statement ok
CHECKPOINT
query IIII
SELECT COUNT(*), SUM(i), MIN(i), MAX(i) FROM integers
----
422853 237804579942 0 987388
# after deleting we have 422K rows left, which should be 4 row groups
# note that achieving exactly 4 row groups is difficult because of the mixed nature of the deletes
query I
SELECT COUNT(DISTINCT row_group_id) >= 4 AND COUNT(DISTINCT row_group_id) <= 7 FROM pragma_storage_info('integers')
----
true