should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,44 @@
# name: test/sql/index/art/vacuum/test_art_vacuum.test_slow
# description: Test vacuuming leaves.
# group: [vacuum]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE delete_vacuum (id INT);
statement ok
INSERT INTO delete_vacuum SELECT 10 FROM range(10000);
statement ok
INSERT INTO delete_vacuum SELECT 11 FROM range(10000);
statement ok
INSERT INTO delete_vacuum SELECT 12 FROM range(10000);
statement ok
INSERT INTO delete_vacuum SELECT 13 FROM range(10000);
statement ok
CREATE INDEX idx ON delete_vacuum(id);
statement ok
DELETE FROM delete_vacuum WHERE id = 11 OR id = 10;
# Reuse the buffer IDs of vacuumed buffers.
statement ok
CREATE TABLE reuse_buffer_tbl (i INTEGER);
statement ok
INSERT INTO reuse_buffer_tbl SELECT range FROM range(200000);
statement ok
CREATE INDEX idx_reuse_buffer ON reuse_buffer_tbl(i);
statement ok
DELETE FROM reuse_buffer_tbl WHERE i < 100000;
statement ok
INSERT INTO reuse_buffer_tbl SELECT range FROM range(200000);

View File

@@ -0,0 +1,125 @@
# name: test/sql/index/art/vacuum/test_art_vacuum_integers.test_slow
# description: Test checkpointing for vacuum operations with integers
# group: [vacuum]
statement ok
PRAGMA enable_verification
statement ok
CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;
# store the memory usage of 1M integers in base table
# verify that the memory increases and drops again
# this is the setup phase of the memory tests in this file
statement ok
CREATE TABLE temp (i integer);
statement ok
CREATE TABLE empty AS
SELECT mem_to_bytes(memory_usage) AS usage FROM pragma_database_size();
statement ok
INSERT INTO temp SELECT * FROM range(1000000);
statement ok
CREATE TABLE base AS
SELECT mem_to_bytes(memory_usage) AS usage FROM pragma_database_size();
query I
SELECT base.usage > empty.usage
FROM base, empty;
----
true
statement ok
DROP TABLE temp;
statement ok
UPDATE empty SET usage = (SELECT mem_to_bytes(current.memory_usage) FROM pragma_database_size() AS current);
# create a table with an index, then restart the database
# due to serialization + lazy loading, the index size after the reload must be almost zero
statement ok
CREATE TABLE t (i integer);
statement ok
INSERT INTO t SELECT * FROM range(1000000);
statement ok
CREATE INDEX idx ON t(i);
query I
SELECT mem_to_bytes(current.memory_usage) < 4 * base.usage
FROM base, pragma_database_size() current;
----
1
# insert 250K values into every fourth leaf, partially deserializing the ART
# the size of the database also increases more significantly, as we now have duplicates in leaves,
# i.e., we no longer inline row IDs
statement ok
INSERT INTO t SELECT range * 4 FROM range(250000);
# store the current size of the DB
statement ok
CREATE TABLE db_size AS
SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current;
# now perform one bulk deletion of half the values
# and then loop and perform some smaller deletions
statement ok
DELETE FROM t WHERE i > 500000;
# the previous bulk deletion causes a vacuum-operation. We delete excess_buffer_count in-memory buffers
# during a vacuum. We move their data to buffers that still have free space and that potentially
# are not yet deserialized, deserializing them. Therefore, our overall ART size decreases, but our in-memory
# ART size potentially increases, as we deserialize additional buffers
# we add some +10MB
query I
SELECT mem_to_bytes(current.memory_usage) < db_size.usage + 10000000
FROM db_size, pragma_database_size() current;
----
1
statement ok
UPDATE db_size SET usage = (SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current);
loop threshold 0 10
statement ok
DELETE FROM t WHERE i < (${threshold} * 25000);
endloop
query I
SELECT mem_to_bytes(current.memory_usage) < db_size.usage
FROM db_size, pragma_database_size() current;
----
1
statement ok
UPDATE db_size SET usage = (SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current);
statement ok
DELETE FROM t;
query I
SELECT mem_to_bytes(current.memory_usage) < db_size.usage
FROM db_size, pragma_database_size() current;
----
1

View File

@@ -0,0 +1,64 @@
# name: test/sql/index/art/vacuum/test_art_vacuum_rollback.test
# description: Test rolling back writes to many tables
# group: [vacuum]
load __TEST_DIR__/art_vacuum_rollback.db
loop i 0 10
statement ok
CREATE TABLE t${i}(i INTEGER UNIQUE);
endloop
statement ok con1
BEGIN
loop i 0 10
statement ok con1
INSERT INTO t${i} SELECT CASE WHEN i%2<>0 THEN NULL ELSE i END FROM range(10000) t(i);
endloop
loop i 0 10
statement ok
INSERT INTO t${i} FROM range(10000, 10077);
endloop
statement ok
INSERT INTO t7 VALUES (42);
statement error con1
COMMIT
----
42
loop i 0 10
query I con1
SELECT COUNT(i) - (CASE WHEN ${i}==7 THEN 1 ELSE 0 END) FROM t${i}
----
77
endloop
# insert actual values
loop i 0 10
statement ok
INSERT INTO t${i} FROM range(20000, 30000);
endloop
loop i 0 10
query I con1
SELECT COUNT(i) - (CASE WHEN ${i}==7 THEN 1 ELSE 0 END) FROM t${i}
----
10077
endloop

View File

@@ -0,0 +1,137 @@
# name: test/sql/index/art/vacuum/test_art_vacuum_strings.test_slow
# description: Test checkpointing for vacuum operations with strings
# group: [vacuum]
statement ok
PRAGMA enable_verification
statement ok
CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;
# store the memory usage of 100K strings in base table
# verify that the memory increases and drops again
# this is the setup phase of the memory tests in this file
statement ok
CREATE TABLE temp (i varchar);
statement ok
CREATE TABLE empty AS
SELECT mem_to_bytes(memory_usage) AS usage FROM pragma_database_size();
statement ok
INSERT INTO temp SELECT range || 'I am' || range || 'a long not' || range || 'inlined string' || range FROM range(100000) AS range;
statement ok
CREATE TABLE base AS
SELECT mem_to_bytes(memory_usage) AS usage FROM pragma_database_size();
query I
SELECT base.usage > empty.usage
FROM base, empty;
----
true
statement ok
DROP TABLE temp;
statement ok
UPDATE empty SET usage = (SELECT mem_to_bytes(current.memory_usage) FROM pragma_database_size() AS current);
# create a table with an index, then restart the database
# due to serialization + lazy loading, the index size after the reload must be almost zero
statement ok
CREATE TABLE t (i varchar);
statement ok
INSERT INTO t SELECT range || 'I am' || range || 'a long not' || range || 'inlined string' || range FROM range(100000) AS range;
statement ok
CREATE INDEX idx ON t(i);
query I
SELECT mem_to_bytes(current.memory_usage) > 2 * base.usage AND mem_to_bytes(current.memory_usage) < 4 * base.usage
FROM base, pragma_database_size() current;
----
1
# insert 100K values into every fourth leaf, deserializing a significant part of the ART
statement ok
INSERT INTO t SELECT (range * 4) || 'I am' || (range * 4) || 'a long not' || (range * 4) || 'inlined string' || (range * 4) FROM range(100000) AS range;
query I
SELECT
case
when mem_to_bytes(current.memory_usage) > 4 * base.usage AND mem_to_bytes(current.memory_usage) <= 8 * base.usage
then true
else
concat('current mem usage not between 4X and 7X base (current ', current.memory_usage, ', base ', base.usage, ')')::union(err varchar, b bool)
end
FROM base, pragma_database_size() current;
----
true
# store the current size of the DB
statement ok
CREATE TABLE db_size AS
SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current;
# now perform one bulk deletion of half the values
# and then loop and perform some smaller deletions
statement ok
DELETE FROM t WHERE rowid > (SELECT AVG(rowid) FROM t);
# the previous bulk deletion causes a vacuum-operation. We delete excess_buffer_count in-memory buffers
# during a vacuum. We move their data to buffers that still have free space and that potentially
# are not yet deserialized, deserializing them. Therefore, our overall ART size decreases, but our in-memory
# ART size potentially increases, as we deserialize additional buffers
# we add some +10MB
query I
SELECT mem_to_bytes(current.memory_usage) < db_size.usage + 10000000
FROM db_size, pragma_database_size() current;
----
1
statement ok
UPDATE db_size SET usage = (SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current);
loop threshold 0 4
statement ok
DELETE FROM t WHERE rowid > (SELECT AVG(rowid) FROM t);
endloop
query I
SELECT mem_to_bytes(current.memory_usage) < db_size.usage
FROM db_size, pragma_database_size() current;
----
1
statement ok
UPDATE db_size SET usage = (SELECT mem_to_bytes(current.memory_usage) AS usage
FROM pragma_database_size() AS current);
# only ~6250 values remaining (~4MB), and we are only slightly under 4MB after the vacuum (~3.6MB)
# which we then round up to 4MB in our mem_to_bytes macro
statement ok
DELETE FROM t;
query I
SELECT mem_to_bytes(current.memory_usage) <= db_size.usage
FROM db_size, pragma_database_size() current;
----
1