should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/string/bitstring.benchmark
# description: String Concat (Long Strings)
# group: [string]
name Bit string
group string
load
CREATE TABLE bits AS SELECT printf('%b',i)::BIT col FROM range(0, 10000000) tbl(i);
run
SELECT COUNT(get_bit(col, 0)) FROM bits;
result I
10000000

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/concat_long.benchmark
# description: String Concat (Long Strings)
# group: [string]
name String Concat (Long)
group string
load benchmark/micro/string/strings_long.sql
run
SELECT MIN(STRLEN(s1 || s2)) FROM strings
result I
10

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/concat_short.benchmark
# description: String Concat
# group: [string]
name String Concat
group string
load benchmark/micro/string/strings_small.sql
run
SELECT MIN(STRLEN(s1 || s2)) FROM strings
result I
2

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_integers.benchmark
# description: Contains the string 'riously. regular, express dep' in the l_comment (3)
# group: [string]
name Contains ('1234')
group string
require tpch
load
CREATE TABLE strs AS SELECT (i * 9 % 10000000)::VARCHAR AS s FROM range(0, 10000000) t(i);
run
SELECT COUNT(*) FROM strs WHERE contains(s, '1234')
result I
4000

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_long.benchmark
# description: Contains the string 'riously. regular, express dep' in the l_comment (3)
# group: [string]
name Contains ('riously. regular, express dep')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE contains(l_comment, 'riously. regular, express pinto ')
result I
3

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_r.benchmark
# description: Contains word 'r' in the l_comment
# group: [string]
name Contains ('r')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE contains(l_comment, 'r')
result I
4239956

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_re.benchmark
# description: Contains word 're' in the l_comment
# group: [string]
name Contains ('re')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE contains(l_comment, 're')
result I
2453562

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_reg.benchmark
# description: Contains word 'reg' in the l_comment
# group: [string]
name Contains ('reg')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE contains(l_comment, 'reg')
result I
816933

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/contains_regular.benchmark
# description: Contains word 'regular' in the l_comment (11.5%~)
# group: [string]
name Contains ('regular')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE contains(l_comment, 'regular')
result I
687323

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/equality_long.benchmark
# description: String Equality (Long)
# group: [string]
name String Equality (Long)
group string
load benchmark/micro/string/strings_long.sql
run
SELECT MIN(s1 == s2) FROM strings
result I
false

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/equality_short.benchmark
# description: String Equality
# group: [string]
name String Equality
group string
load benchmark/micro/string/strings_small.sql
run
SELECT MIN(s1 == s2) FROM strings
result I
false

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/greater_than_long.benchmark
# description: String Greater Than (Long)
# group: [string]
name String Greater Than (Long)
group string
load benchmark/micro/string/strings_long.sql
run
SELECT MIN(s1 > s2) FROM strings
result I
false

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/greater_than_short.benchmark
# description: String Greater Than
# group: [string]
name String Greater Than
group string
load benchmark/micro/string/strings_small.sql
run
SELECT MIN(s1 > s2) FROM strings
result I
false

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/inet_escape_function.benchmark
# description: inet's extension escape function benchmark
# group: [string]
name html_escape benchmark
group string
require inet
load
CREATE TABLE html_text_tbl AS SELECT repeat('&', i%10) html_text FROM range(1000000) t(i);
run
SELECT html_escape(html_text) FROM html_text_tbl;

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/string/inet_unescape_charrefs.benchmark
# description: inet's extension unescape function benchmark with character references
# group: [string]
name html_unescape benchmark for character references
group string
require inet
load
CREATE TABLE charrefs AS SELECT * FROM (VALUES ('&amp'), ('∷'), ('&CounterClockwiseContourIntegral;'), ('&;'), ('≷'), ('⇆'), ('↓'), ('not &notin'), ('";'), ('&no charref'));
INSERT INTO charrefs SELECT repeat('⪰̸', i%10) charref FROM range(1000) t(i);
INSERT INTO charrefs SELECT repeat('𝔷', i%10) charref FROM range(1000) t(i);
INSERT INTO charrefs SELECT repeat('É', i%7) html_text FROM range(997990) t(i);
run
SELECT html_unescape(charrefs.col0) FROM charrefs;

View File

@@ -0,0 +1,13 @@
# name: benchmark/micro/string/inet_unescape_codepoints.benchmark
# description: inet's extension unescape function benchmark with Unicode codepoints
# group: [string]
name html_unescape benchmark with hexadecimal values
group string
require inet
load
CREATE TABLE html_hex_tbl AS SELECT format('&#x{:x}', i) html_text FROM range(1000000) t(i);
run
SELECT html_unescape(html_text) FROM html_hex_tbl;

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/length_long.benchmark
# description: String Length (Long)
# group: [string]
name String Length (Long)
group string
load benchmark/micro/string/strings_long.sql
run
SELECT MIN(LENGTH(s1) + LENGTH(s2)) FROM strings
result I
10

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/length_short.benchmark
# description: String Length
# group: [string]
name String Length
group string
load benchmark/micro/string/strings_small.sql
run
SELECT MIN(LENGTH(s1) + LENGTH(s2)) FROM strings
result I
2

View File

@@ -0,0 +1,19 @@
# name: benchmark/micro/string/like_regular.benchmark
# description: Contains word 'regular' in the l_comment (11.5%~)
# group: [string]
name Like ('%regular%')
group string
require tpch
cache tpch_sf1.duckdb
load
CALL dbgen(sf=1);
run
SELECT COUNT(*) FROM lineitem WHERE l_comment LIKE '%regular%'
result I
687323

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/regexp_matches.benchmark
# description: Regexp Matches
# group: [string]
name Regexp Matches ('h')
group string
load benchmark/micro/string/strings_small.sql
run
SELECT MIN(REGEXP_MATCHES(s1, '0')) FROM strings
result I
false

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/string_agg_long.benchmark
# description: String Agg (Long)
# group: [string]
name String Agg (Long)
group string
load benchmark/micro/string/strings_long.sql
run
SELECT STRLEN(STRING_AGG(s1, ' ')) FROM strings
result I
154499999

View File

@@ -0,0 +1,30 @@
# name: benchmark/micro/string/string_agg_order_by.benchmark
# description: Ordered LIST aggregation
# group: [string]
name String Split Regexp
group micro
subgroup list
load
create table issue5920 AS
select
a.*,
b.*,
c.generate_series::varchar AS c
from generate_series(1, 500) as a(a)
join generate_series(1, 500) as b(b)
on true
join generate_series(1, 50) as c
on true
;
run
select sum(length(s)) FROM (
select a, b, string_agg(c, ',' order by c) s
from issue5920
group by 1, 2
) t;
result I
35000000

View File

@@ -0,0 +1,14 @@
# name: benchmark/micro/string/string_agg_short.benchmark
# description: String Agg
# group: [string]
name String Agg
group string
load benchmark/micro/string/strings_small.sql
run
SELECT STRLEN(STRING_AGG(s1, ' ')) FROM strings
result I
38899999

View File

@@ -0,0 +1,2 @@
CREATE TEMPORARY TABLE strings_temp AS SELECT ((i * 9582398353) % 1000)::VARCHAR AS s1, ((i * 847892347987) % 1000)::VARCHAR AS s2 FROM range(0, 10000000) tbl(i);
CREATE TABLE strings AS SELECT repeat(s1, 5) AS s1, repeat(s2, 5) AS s2 FROM strings_temp;

View File

@@ -0,0 +1 @@
CREATE TABLE strings AS SELECT ((i * 9582398353) % 1000)::VARCHAR AS s1, ((i * 847892347987) % 1000)::VARCHAR AS s2 FROM range(0, 10000000) tbl(i);

View File

@@ -0,0 +1,16 @@
# name: benchmark/micro/string/url_encode.benchmark
# description: URL Encode
# group: [string]
name URL Encode
group string
load
CREATE TABLE strings(s VARCHAR);
INSERT INTO strings SELECT concat('this is a string with encodings /\% - ', i) FROM range(100000000) t(i);
run
SELECT MIN(url_decode(url_encode(s))) FROM strings
result I
this is a string with encodings /\% - 0