should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/list/column_data_collection_copy/large_lists.benchmark
# description: Benchmark code paths that use the column data collection copy for large lists
# group: [column_data_collection_copy]
name Column Data Collection Copy Large Lists
group micro
subgroup list
load
CREATE TABLE stage AS SELECT range AS len FROM range(20000);
run
SELECT a from (SELECT STRUCT_PACK(a := (SELECT LIST(len + j) FROM stage), b := j)
as a FROM range(5000) tbl2(j)) as t;

View File

@@ -0,0 +1,11 @@
# name: benchmark/micro/list/column_data_collection_copy/small_lists.benchmark
# description: Benchmark code paths that use the column data collection copy for small lists
# group: [column_data_collection_copy]
name Column Data Collection Copy Small Lists
group micro
subgroup list
run
SELECT a from (SELECT STRUCT_PACK(a := LIST_VALUE(1,2,3,4,5,6,7,8,9,10,2,2,2,2,2,1,2,3,4,5,6,7,8,9,0,11,42), b := i) as a FROM range(5000000) tbl(i)) as t;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/column_data_collection_copy/unnest.benchmark
# description: Benchmark code paths that use the column data collection copy for an unnest query
# group: [column_data_collection_copy]
name Column Data Collection Copy Unnest
group micro
subgroup list
load
CREATE TABLE stage AS SELECT range AS len FROM range(500);
CREATE TABLE tbl AS SELECT range AS i FROM range(50);
ALTER TABLE tbl ADD COLUMN l INTEGER[];
UPDATE tbl SET l = (SELECT LIST(len + i) FROM stage);
run
SELECT * FROM tbl, (SELECT i, UNNEST(l) FROM tbl);

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_extract.benchmark
# description: Benchmark for the list_extract function
# group: [list]
name list_extract micro
group micro
subgroup list
load
CREATE TABLE t1 as SELECT range(0,1000) as l FROM range(0,10000) as r(e);
run
SELECT sum(list_extract(l, 500)) FROM t1;
result I
4990000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_extract_null.benchmark
# description: Benchmark for the list_extract function
# group: [list]
name list_extract micro
group micro
subgroup list
load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), a -> if(e % a = 0, null, a)) as l FROM range(0,10000) as r(e);
run
SELECT count(list_extract(l, 5)) FROM t1;
result I
7500

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/list/list_extract_struct.benchmark
# description: Benchmark for the list_extract function
# group: [list]
name list_extract micro
group micro
subgroup list
load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), x -> {'foo': x, 'bar': (-x)::VARCHAR}) as l
FROM range(0,10000) as r(e);
run
SELECT sum(list_extract(l, 500).foo) FROM t1;
result I
4990000

View File

@@ -0,0 +1,17 @@
# name: benchmark/micro/list/list_extract_struct_null.benchmark
# description: Benchmark for the list_extract function
# group: [list]
name list_extract micro
group micro
subgroup list
load
CREATE TABLE t1 as SELECT list_transform(range(0,1000), x -> if(e % x = 0, null, {'foo': x, 'bar': (-x)::VARCHAR})) as l
FROM range(0,10000) as r(e);
run
SELECT sum(list_extract(l, 500).foo) FROM t1;
result I
4979521

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/list/list_few_rows.benchmark
# description: List aggregate with few rows per list
# group: [list]
name List aggregate (small)
group micro
subgroup list
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT MIN(LENGTH(l)), MAX(LENGTH(l)), COUNT(*) FROM (SELECT l_orderkey, LIST(l_shipdate) AS l FROM lineitem GROUP BY l_orderkey)
result III
1 7 1500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_has_any.benchmark
# description: Benchmark for the list_has_any function
# group: [list]
name list_has_any micro
group micro
subgroup list
load
CREATE TABLE t1 as SELECT range(s, s + 3000) as l1, range(s + 1499, s + 3000) as l2 FROM range(0, 3000) r(s);
run
SELECT bool_and(list_has_any(l1, l2)) FROM t1;
result I
true

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/list/list_many_rows.benchmark
# description: List aggregate with many rows per list
# group: [list]
name List aggregate (large)
group micro
subgroup list
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT MIN(LENGTH(l)), MAX(LENGTH(l)), COUNT(*) FROM (SELECT l_returnflag, LIST(l_shipdate) AS l FROM lineitem GROUP BY l_returnflag)
result III
1478493 3043852 3

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_min_max.benchmark
# description: List min/max
# group: [list]
name List min/max
group micro
subgroup list
load
CREATE TABLE lists AS SELECT [i, i + 1, i + 2] l FROM range(100000001) t(i);
run
SELECT MIN(l), MAX(l) FROM lists;
result II
[0, 1, 2] [100000000, 100000001, 100000002]

View File

@@ -0,0 +1,30 @@
# name: benchmark/micro/list/list_order_by.benchmark
# description: Ordered LIST aggregation
# group: [list]
name List Order By
group micro
subgroup list
load
create table issue5920 AS
select
a.*,
b.*,
c.generate_series::varchar AS c
from generate_series(1, 500) as a(a)
join generate_series(1, 500) as b(b)
on true
join generate_series(1, 50) as c
on true
;
run
select sum(length(l)) FROM (
select a, b, list(c order by c) l
from issue5920
group by 1, 2
) t;
result I
12500000

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/list/list_value.benchmark
# description: LIST_VALUE performance
# group: [list]
name List Value
group micro
subgroup list
load
CREATE TABLE uuids AS SELECT uuid() AS uuid FROM range(100000000) tbl(i) UNION ALL SELECT UUID '00000000-0000-0000-0000-000000000000';
run
SELECT MIN(l::VARCHAR) FROM (SELECT [uuid] AS l FROM uuids)
result I
[00000000-0000-0000-0000-000000000000]

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/list/list_value_large_list.benchmark
# description: LIST_VALUE performance with very large lists
# group: [list]
name List Value
group micro
subgroup list
load
CREATE TABLE large_list AS SELECT list(i) AS a FROM range(1000000) t(i);
run
SELECT list_value(a, a, a, a, a) FROM large_list;

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/list/list_value_nested_list.benchmark
# description: LIST_VALUE performance with nested lists
# group: [list]
name List Value
group micro
subgroup list
load
CREATE TABLE nested_lists AS SELECT [[i], [i + 1]] AS a, [[i, i], [i + 1, i + 1]] as b FROM range(10000) t(i);
run
SELECT list_value(a, b, a, b, a, b, a, b, a, b, a, b, a, b) FROM nested_lists;

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/list/list_value_struct.benchmark
# description: LIST_VALUE performance with large structs
# group: [list]
name List Value
group micro
subgroup list
load
CREATE TABLE large_struct_table AS
SELECT {'a': i - 5, 'b': i - 4} AS a, {'a': i - 3, 'b': i - 2} AS b, {'a': i - 1,'b': i} AS c FROM range(10000000) tbl(i);
run
SELECT LIST_VALUE(a, b, c) FROM large_struct_table;

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/list/string_split.benchmark
# description: String split benchmark
# group: [list]
name String Split
group micro
subgroup list
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT SUM(LENGTH(str_split(l_comment, ' '))) FROM lineitem
result I
27116609

View File

@@ -0,0 +1,20 @@
# name: benchmark/micro/list/string_split_regexp.benchmark
# description: String split regexp benchmark
# group: [list]
name String Split Regexp
group micro
subgroup list
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT SUM(LENGTH(str_split_regex(l_comment, '[z ]'))) FROM lineitem
result I
27179168

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/list/string_split_unicode.benchmark
# description: String split unicode benchmark
# group: [list]
name String Split Unicode
group micro
subgroup list
require tpch
load
CALL dbgen(sf=1);
CREATE TABLE duck_comments AS SELECT concat(l_comment, '🦆') l_comment FROM lineitem
run
SELECT SUM(LENGTH(str_split(l_comment, ' '))) FROM duck_comments
result I
27116609

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/list/unnest.benchmark
# description: Large unnest
# group: [list]
name Unnest
group micro
subgroup list
run
SELECT COUNT(k) FROM (SELECT UNNEST(l) FROM (SELECT LIST(i) l FROM RANGE(1000000) tbl(i)) tbl2(l)) tbl3(k)
result I
1000000

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/list/unnest_rewrite.benchmark
# description: Benchmarking rewritten nested UNNESTs
# group: [list]
name Unnest Rewriter
group micro
subgroup list
run
with stage1 as (
SELECT r, md5(r::VARCHAR) as h, gen_random_uuid() as id
FROM (SELECT UNNEST(GENERATE_SERIES(1,100,1)) r ))
, stage2 as (
SELECT a.*, list({r:b.r, h:b.h, id: gen_random_uuid()}) as sub
FROM stage1 as a, stage1 as b
GROUP BY ALL)
, stage3 as (
SELECT a.r, a.h, list({r:b.r, h:b.h, sub:b.sub, id: gen_random_uuid()}) as sub
FROM stage1 as a, stage2 as b
GROUP BY ALL)
SELECT count(*) as r
FROM stage3,
(SELECT UNNEST(stage3.sub) sub) as s1(sub),
(SELECT UNNEST(s1.sub.sub) sub) as s2(sub);
result I
1000000