should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,103 @@
# name: test/sql/index/art/memory/test_art_linear.test_slow
# description: Test the memory usage of the ART for linear integer insertions
# group: [memory]
require noforcestorage
require skip_reload
statement ok
PRAGMA enable_verification
# Create a table with a primary key and store the memory usage.
# Verify that the memory stays stable when deleting all entries from the table.
# We keep a single empty buffer alive for each allocator, so we never drop to zero bytes.
statement ok
CREATE TABLE t (i integer PRIMARY KEY);
statement ok
INSERT INTO t SELECT * FROM range(1000000);
statement ok
DELETE FROM t;
statement ok
CREATE TABLE base AS SELECT memory_usage_bytes FROM duckdb_memory() WHERE tag = 'ART_INDEX';
loop i 0 5
statement ok
INSERT INTO t SELECT * FROM range(1000000);
statement ok
DELETE FROM t
query I
SELECT
CASE WHEN current.memory_usage_bytes == base.memory_usage_bytes
THEN true
ELSE concat('Current memory usage ', current.memory_usage_bytes , ' is not equal to base usage ', base.memory_usage_bytes)::UNION(error VARCHAR, b BOOLEAN)
END
FROM duckdb_memory() current, base
WHERE current.tag = 'ART_INDEX';
----
true
endloop
# Index memory usage must return to zero after an explicit vacuum.
statement ok
VACUUM t;
query I
SELECT memory_usage_bytes == 0 FROM duckdb_memory() WHERE tag = 'ART_INDEX';
----
true
statement ok
DROP TABLE t;
# create a table with a primary key and store the memory usage
# now verify that the memory drops, but this time drop the whole table instead of deleting entries from it
statement ok
CREATE TABLE t (i integer PRIMARY KEY);
statement ok
INSERT INTO t SELECT * FROM range(1000000);
statement ok
DROP TABLE t;
query I
SELECT memory_usage_bytes == 0 FROM duckdb_memory() WHERE tag = 'ART_INDEX';
----
true
# create a table with a primary key and store the memory usage
# verify that the memory decreases by approximately half when deleting half the entries
statement ok
CREATE TABLE t (i integer PRIMARY KEY);
statement ok
INSERT INTO t SELECT * FROM range(1000000);
statement ok
CREATE TABLE full_idx AS SELECT memory_usage_bytes FROM duckdb_memory() WHERE tag = 'ART_INDEX';
statement ok
DELETE FROM t WHERE i > 500000;
query I
SELECT
CASE WHEN current.memory_usage_bytes <= full_idx.memory_usage_bytes * 0.6
THEN true
ELSE concat('Current memory usage ', current.memory_usage_bytes , ' is not equal to base usage ', full_idx.memory_usage_bytes)::UNION(error VARCHAR, b BOOLEAN)
END
FROM duckdb_memory() current, full_idx
WHERE current.tag = 'ART_INDEX';
----
true

View File

@@ -0,0 +1,108 @@
# name: test/sql/index/art/memory/test_art_non_linear.test_slow
# description: Test the memory usage of the ART for various workloads
# group: [memory]
require skip_reload
statement ok
PRAGMA enable_verification;
statement ok
CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0
ELSE x::INT END;
# test 100K short strings
statement ok
CREATE TABLE strings_temp AS
SELECT ((i * 95823983533) % 100000)::VARCHAR AS s1,
((i * 547892347987) % 1000)::VARCHAR AS s2,
((i * 847892347987) % 100)::VARCHAR AS s3,
FROM range(100000) tbl(i);
statement ok
CREATE TABLE art AS SELECT s1 || 'a' || s2 || 'b' || s3 || 'c' AS id FROM strings_temp;
statement ok
DROP TABLE strings_temp;
statement ok
CREATE INDEX idx ON art USING ART(id);
query I
SELECT mem_to_bytes(memory_usage) < 7500000 FROM pragma_database_size();
----
true
statement ok
DROP TABLE art;
# test 100K long strings
statement ok
CREATE TABLE strings_temp AS
SELECT ((i * 95823983533) % 100000)::VARCHAR AS s1,
((i * 547892347987) % 1000)::VARCHAR AS s2,
((i * 847892347987) % 100)::VARCHAR AS s3,
FROM range(100000) tbl(i);
statement ok
CREATE TABLE art AS SELECT s3 || 'a' || s2 || 'b' || s3 || 'c' || repeat(s2, s3::INT) || s1 || 'a' || s3 || 'c' AS id FROM strings_temp;
statement ok
DROP TABLE strings_temp;
statement ok
CREATE INDEX idx ON art USING ART(id);
# 11 blocks for prefixes, 2 blocks for Node4, 6 blocks for Node16,
# 19 blocks * 256KB = 4864KB
# WITHOUT the index, our database size is already approximately 17MB here
query I
SELECT mem_to_bytes(memory_usage) < 23000000 FROM pragma_database_size();
----
true
statement ok
DROP TABLE art;
# test 100K mostly distinct BIGINT keys
statement ok
CREATE TABLE art AS SELECT (range * 9876983769044::INT128 % 10000000)::INT64 AS id FROM range(100000);
statement ok
CREATE INDEX idx ON art USING ART(id);
# 1 block for prefixes, 6 blocks for Node4, 2 blocks for Node256
# 8 blocks * 256KB = 2048KB
query I
SELECT mem_to_bytes(memory_usage) < 4000000 FROM pragma_database_size();
----
true
statement ok
DROP TABLE art;
# test 100K mostly duplicate INTEGER keys
statement ok
CREATE TABLE art AS SELECT (range * 9876983769044::INT128 % 1000)::INT64 AS id FROM range(100000);
statement ok
CREATE INDEX idx ON art USING ART(id);
query I
SELECT mem_to_bytes(memory_usage) < 4000000 FROM pragma_database_size();
----
true
statement ok
DROP TABLE art;

View File

@@ -0,0 +1,76 @@
# name: test/sql/index/art/memory/test_art_varchar.test_slow
# description: Test the memory usage of the ART for a big table with a VARCHAR column
# group: [memory]
# test issue #7760
require 64bit
require vector_size 2048
statement ok
PRAGMA enable_verification
statement ok
CREATE FUNCTION mem_to_bytes(x) AS CASE
WHEN CONTAINS(x, 'KiB') THEN REPLACE(x, 'KiB', '')::INT * 1024.0
WHEN CONTAINS(x, 'MiB') THEN REPLACE(x, 'MiB', '')::INT * 1024.0 * 1024
WHEN CONTAINS(x, 'GiB') THEN REPLACE(x, 'GiB', '')::INT * 1024.0 * 1024 * 1024
WHEN CONTAINS(x, 'TiB') THEN REPLACE(x, 'TiB', '')::INT * 1024.0 * 1024 * 1024 * 1024
WHEN x = '0 bytes' THEN 0::BIGINT
ELSE x::BIGINT END;
# 7200000 unique strings
statement ok
CREATE TABLE art AS
SELECT rpad(((i * 95823983533) % 86000000)::VARCHAR, 10, '-') AS id
FROM range(7200000) tbl(i);
# 2 * 7200k entries
statement ok
INSERT INTO art (SELECT * FROM art);
# 4 * 7200k entries
statement ok
INSERT INTO art (SELECT * FROM art);
# 8 * 7200k entries
statement ok
INSERT INTO art (SELECT * FROM art);
# 86M entries
statement ok
INSERT INTO art (SELECT * FROM art LIMIT 28400000);
query I
SELECT count(*) FROM art;
----
86000000
query I
SELECT COUNT(DISTINCT id) FROM art;
----
7200000
query II
SELECT MIN(length(id)), MAX(length(id)) FROM art;
----
10 10
statement ok
CREATE TABLE base AS
SELECT mem_to_bytes(memory_usage)::BIGINT AS usage FROM pragma_database_size();
statement ok
SET memory_limit='12GB';
statement ok
CREATE INDEX idx ON art USING ART(id);
query I
SELECT mem_to_bytes(current.memory_usage) > base.usage AND
mem_to_bytes(current.memory_usage) < 5 * base.usage
FROM base, pragma_database_size() current;
----
1