should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,131 @@
# name: test/sql/function/string/format_bytes.test
# description: Test the to_hex/from_hex function
# group: [string]
statement ok
PRAGMA enable_verification
query I
SELECT format_bytes(0);
----
0 bytes
query I
SELECT format_bytes(1);
----
1 byte
query I
SELECT format_bytes(1023);
----
1023 bytes
query I
SELECT format_bytes(1024);
----
1.0 KiB
query I
SELECT pg_size_pretty(1024);
----
1.0 KiB
query I
SELECT format_bytes(1024*1024-1);
----
1023.9 KiB
query I
SELECT format_bytes(1024*1024);
----
1.0 MiB
query I
SELECT format_bytes(1024*1024 + 555555);
----
1.5 MiB
query I
SELECT format_bytes(1024*1024*1024-1);
----
1023.9 MiB
query I
SELECT format_bytes(1e9::BIGINT);
----
953.6 MiB
query I
SELECT format_bytes(pow(1024,3)::BIGINT);
----
1.0 GiB
query I
SELECT format_bytes(pow(1024.0,4)::BIGINT);
----
1.0 TiB
query I
SELECT format_bytes((pow(1024.0,4) - 1)::BIGINT);
----
1023.9 GiB
query I
SELECT format_bytes(1e15::BIGINT);
----
909.4 TiB
query I
SELECT format_bytes(9223372036854775807);
----
8191.9 PiB
query I
SELECT format_bytes(NULL);
----
NULL
query I
SELECT format_bytes(1);
----
1 byte
query I
SELECT format_bytes(-1);
----
-1 byte
query I
SELECT format_bytes(-9223372036854775808);
----
-8192.0 PiB
query I
SELECT formatReadableDecimalSize(500);
----
500 bytes
query I
SELECT formatReadableSize(500);
----
500 bytes
query I
SELECT formatReadableDecimalSize(500*1000);
----
500.0 kB
query I
SELECT formatReadableSize(500*1000);
----
488.2 KiB
query I
SELECT formatReadableDecimalSize(500*1000*1000);
----
500.0 MB
query I
SELECT formatReadableSize(500*1000*1000);
----
476.8 MiB

View File

@@ -0,0 +1,80 @@
# name: test/sql/function/string/hex.test
# description: Test the to_hex/from_hex function
# group: [string]
statement ok
PRAGMA enable_verification
query I
SELECT to_hex('duckdb');
----
6475636B6462
query I
SELECT hex(unhex('abcd'));
----
ABCD
query I
SELECT hex(blob '\x00\x00\x80');
----
000080
query I
SELECT from_hex('6475636B6462');
----
duckdb
query I
SELECT from_hex('5');
----
\x05
#Test hex/unhex
query I
SELECT unhex(hex('duckdb'))
----
duckdb
# Test Invalid input
statement error
SELECT from_hex('duckdb');
----
query IIIIIIIIIIII
SELECT to_hex(columns('^(.*int|varchar|bignum)$')) FROM test_all_types();
----
FFFFFFFFFFFFFF80 FFFFFFFFFFFF8000 FFFFFFFF80000000 8000000000000000 80000000000000000000000000000000 0 0 0 0 0 7FFF7F00000000000007FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF F09FA686F09FA686F09FA686F09FA686F09FA686F09FA686
7F 7FFF 7FFFFFFF 7FFFFFFFFFFFFFFF 7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FF FFFF FFFFFFFF FFFFFFFFFFFFFFFF 800080FFFFFFFFFFFFF800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 676F6F007365
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
query IIIIIIIIIIII
SELECT from_hex(to_hex(columns('^(.*int|varchar|bignum)$'))) FROM test_all_types();
----
\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x80 \xFF\xFF\xFF\xFF\xFF\xFF\x80\x00 \xFF\xFF\xFF\xFF\x80\x00\x00\x00 \x80\x00\x00\x00\x00\x00\x00\x00 \x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00 \x00 \x00 \x00 \x00 \x7F\xFF\x7F\x00\x00\x00\x00\x00\x00\x07\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86
\x7F \x7F\xFF \x7F\xFF\xFF\xFF \x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF \x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xFF \xFF\xFF \xFF\xFF\xFF\xFF \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \x80\x00\x80\xFF\xFF\xFF\xFF\xFF\xFF\xF8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 goo\x00se
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
query I
SELECT to_binary('duckdb');
----
011001000111010101100011011010110110010001100010
query I
SELECT from_binary('011001000111010101100011011010110110010001100010');
----
duckdb
query IIIIIIIIIIII
SELECT to_binary(columns('^(.*int|varchar|bignum)$')) FROM test_all_types();
----
1111111111111111111111111111111111111111111111111111111110000000 1111111111111111111111111111111111111111111111111000000000000000 1111111111111111111111111111111110000000000000000000000000000000 1000000000000000000000000000000000000000000000000000000000000000 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 0 0 0 0 0 0111111111111111011111110000000000000000000000000000000000000000000000000000011111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 111100001001111110100110100001101111000010011111101001101000011011110000100111111010011010000110111100001001111110100110100001101111000010011111101001101000011011110000100111111010011010000110
1111111 111111111111111 1111111111111111111111111111111 111111111111111111111111111111111111111111111111111111111111111 1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111 11111111 1111111111111111 11111111111111111111111111111111 1111111111111111111111111111111111111111111111111111111111111111 1000000000000000100000001111111111111111111111111111111111111111111111111111100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 011001110110111101101111000000000111001101100101
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
query IIIIIIIIIIII
SELECT from_binary(to_binary(columns('^(.*int|varchar|bignum)$'))) FROM test_all_types();
----
\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x80 \xFF\xFF\xFF\xFF\xFF\xFF\x80\x00 \xFF\xFF\xFF\xFF\x80\x00\x00\x00 \x80\x00\x00\x00\x00\x00\x00\x00 \x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00 \x00 \x00 \x00 \x00 \x7F\xFF\x7F\x00\x00\x00\x00\x00\x00\x07\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86\xF0\x9F\xA6\x86
\x7F \x7F\xFF \x7F\xFF\xFF\xFF \x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF \x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \xFF \xFF\xFF \xFF\xFF\xFF\xFF \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF \x80\x00\x80\xFF\xFF\xFF\xFF\xFF\xFF\xF8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 goo\x00se
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL

View File

@@ -0,0 +1,52 @@
# name: test/sql/function/string/like_unicode.test
# description: Test _ and like unicode characters
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t0 (c0 VARCHAR);
statement ok
INSERT INTO t0 VALUES (''),('t'),('%'),(''),(''), ('🦆');
query I
SELECT count(*) FROM t0 WHERE t0.c0 LIKE '_';
----
6
query I
SELECT count(*) FROM t0 WHERE t0.c0 ILIKE '_';
----
6
query I
SELECT '🦆a🦆' LIKE '_a_'
----
true
query I
SELECT '🦆a🦆' ILIKE '_A_'
----
true
query I
SELECT 'BaB' ILIKE '_A_'
----
true
query I
SELECT '🦆🦆' ILIKE '_'
----
false
query I
SELECT '🦆🦆' ILIKE '__'
----
true
query I
SELECT '🦆🦆' ILIKE '___'
----
false

View File

@@ -0,0 +1,60 @@
# name: test/sql/function/string/md5.test
# description: Test the md5 function
# group: [string]
statement ok
PRAGMA enable_verification
query II
select md5('hello'), md5(NULL)
----
5d41402abc4b2a76b9719d911017c592
NULL
query I
select md5('\xff\xff'::BLOB)
----
ab2a0d28de6b77ffdd6c72afead099ab
query II
select md5_number('hello'), md5_number_upper(NULL)
----
195090637673866227529774941829999903069
NULL
query II
select md5_number_upper('hello'), md5_number_upper(NULL)
----
8514701317032132957
NULL
query II
select md5_number_lower('hello'), md5_number_lower(NULL)
----
10575884659879408057
NULL
statement ok
CREATE TABLE strings AS SELECT s::VARCHAR s FROM generate_series(0,10,1) t(s)
query II
select md5(s), md5('1') from strings ORDER BY s
----
cfcd208495d565ef66e7dff9f98764da c4ca4238a0b923820dcc509a6f75849b
c4ca4238a0b923820dcc509a6f75849b c4ca4238a0b923820dcc509a6f75849b
d3d9446802a44259755d38e6d163e820 c4ca4238a0b923820dcc509a6f75849b
c81e728d9d4c2f636f067f89cc14862c c4ca4238a0b923820dcc509a6f75849b
eccbc87e4b5ce2fe28308fd9f2a7baf3 c4ca4238a0b923820dcc509a6f75849b
a87ff679a2f3e71d9181a67b7542122c c4ca4238a0b923820dcc509a6f75849b
e4da3b7fbbce2345d7772b0674a318d5 c4ca4238a0b923820dcc509a6f75849b
1679091c5a880faf6fb5e6087eb1b2dc c4ca4238a0b923820dcc509a6f75849b
8f14e45fceea167a5a36dedd4bea2543 c4ca4238a0b923820dcc509a6f75849b
c9f0f895fb98ab9159f51fd0297e236d c4ca4238a0b923820dcc509a6f75849b
45c48cce2e2d7fbdea1afc51c7c6ad26 c4ca4238a0b923820dcc509a6f75849b
query II
select md5(s), md5('1') from strings where s::INTEGER BETWEEN 1 AND 3 ORDER BY s
----
c4ca4238a0b923820dcc509a6f75849b c4ca4238a0b923820dcc509a6f75849b
c81e728d9d4c2f636f067f89cc14862c c4ca4238a0b923820dcc509a6f75849b
eccbc87e4b5ce2fe28308fd9f2a7baf3 c4ca4238a0b923820dcc509a6f75849b

View File

@@ -0,0 +1,129 @@
# name: test/sql/function/string/null_byte.test
# description: Test string functions with null bytes
# group: [string]
statement ok
PRAGMA enable_verification
query I
SELECT chr(0)
----
\0
query I
SELECT chr(0)::blob
----
\x00
query I
select ascii(chr(0));
----
0
query I
CREATE TABLE null_byte AS SELECT concat('goo', chr(0), 'se') AS v
----
1
query I
SELECT * FROM null_byte
----
goo\0se
query I
SELECT * FROM null_byte WHERE contains(v, chr(0))
----
goo\0se
query I
SELECT instr(v, chr(0)) FROM null_byte
----
4
query I
SELECT * FROM null_byte WHERE v LIKE concat('%', chr(0), '%')
----
goo\0se
query I
SELECT * FROM null_byte WHERE regexp_matches(v, chr(0))
----
goo\0se
query I
SELECT * FROM null_byte WHERE regexp_full_match(v, concat('goo', chr(0), 'se'))
----
goo\0se
# nested types
query I
SELECT {'a': v} FROM null_byte
----
{'a': goo\0se}
query I
SELECT [v] FROM null_byte
----
[goo\0se]
query I
SELECT LENGTH(v) FROM null_byte
----
6
query I
SELECT STRLEN(v) FROM null_byte
----
6
query I
SELECT LENGTH_GRAPHEME(v) FROM null_byte
----
6
query I
SELECT v||v FROM null_byte
----
goo\0segoo\0se
query I
SELECT printf('%s - zzz', v) FROM null_byte
----
goo\0se - zzz
query I
SELECT substr(v, 4) FROM null_byte
----
\0se
statement ok
CREATE TABLE more_null_bytes AS
SELECT 1 AS id, v FROM null_byte
UNION ALL
SELECT 2 AS id, substr(v, 4, 1) FROM null_byte
UNION ALL
SELECT 3 AS id, v FROM null_byte
query II
SELECT * FROM more_null_bytes ORDER BY v, id
----
2 \0
1 goo\0se
3 goo\0se
query II
SELECT v, SUM(id) FROM more_null_bytes GROUP BY v ORDER BY ALL
----
\0 2
goo\0se 4
query II
SELECT v, SUM(id) FROM more_null_bytes GROUP BY v ORDER BY ALL
----
\0 2
goo\0se 4
query I
SELECT COUNT(*) FROM null_byte JOIN more_null_bytes USING(v)
----
2

View File

@@ -0,0 +1,485 @@
# name: test/sql/function/string/parse_path.test
# description: parse path functions test
# group: [string]
statement ok
PRAGMA enable_verification
require notwindows
# all separators option
query T
SELECT * FROM (VALUES (parse_path('path/to/file.csv', 'system')), (parse_path('path/to/file.csv\file2.csv', 'both_slash')), (parse_path('path/to/file.csv', 'forward_slash')), (parse_path('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
[path, to, file.csv]
[path, to, file.csv, file2.csv]
[path, to, file.csv]
[path, to, file.csv/file2.csv]
# default separator
query T
SELECT parse_path('path/to/file.csv\file2.csv');
----
[path, to, file.csv, file2.csv]
# no separators in path
query T
SELECT parse_path('file.csv', 'both_slash');
----
[file.csv]
# start with separator
query T
SELECT parse_path('/path/to/file.csv', 'forward_slash');
----
[/, path, to, file.csv]
query T
select parse_path('\path\to\file', 'forward_slash');
----
[\path\to\file]
# consecutive separators
query T
SELECT parse_path('//path/to//file.csv', 'forward_slash');
----
[/, path, to, file.csv]
# special characters
query T
SELECT parse_path('p@th\t0\wh@t3ve%\42/12 ch,ars.sth', 'both_slash');
----
[p@th, t0, wh@t3ve%, 42, '12 ch,ars.sth']
# custom separator
query T
SELECT parse_path('path/to/file.csv','@');
----
[path, to, file.csv]
query T
SELECT parse_path('path/to/file.csv', NULL);
----
[path, to, file.csv]
query T
SELECT parse_path(NULL, NULL);
----
NULL
query T
SELECT parse_path(NULL, '');
----
NULL
query T
SELECT parse_path('');
----
[]
query T
SELECT parse_path('', '');
----
[]
query T
select parse_path('/');
----
[/]
query T
select parse_path('///');
----
[/]
# only separators in path
query T
SELECT parse_path('/\\/', 'both_slash');
----
[/]
# test incorrect usage
statement error
SELECT parse_path();
----
Binder Error: No function matches the given name and argument types 'parse_path()'. You might need to add explicit type casts.
statement error
SELECT parse_path('/path/to', true, 'system');
----
Binder Error: No function matches the given name and argument types 'parse_path(STRING_LITERAL, BOOLEAN, STRING_LITERAL)'.
# test parse_dirname
# all separators option
query T
SELECT * FROM (VALUES (parse_dirname('path/to/file.csv', 'system')), (parse_dirname('path/to/file.csv\file2.csv', 'both_slash')), (parse_dirname('path/to/file.csv', 'forward_slash')), (parse_dirname('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
path
path
path
path
# default separator
query T
SELECT parse_dirname('path/to/file.csv\file2.csv');
----
path
# no separator in path
query T
SELECT parse_dirname('file.csv', 'backslash');
----
(empty)
# only separators in path
query T
SELECT parse_dirname('\', 'backslash');
----
\
# path with only one separator, different from the separator option
query T
SELECT parse_dirname('/', 'backslash');
----
(empty)
# start with separator
query T
SELECT parse_dirname('/path/to/file.csv', 'forward_slash');
----
/
# no forward slashes in the path
query T
SELECT parse_dirname('\path\to\file', 'forward_slash');
----
(empty)
# have consecutive separators
query T
SELECT parse_dirname('///path/to//file.csv', 'forward_slash');
----
/
# special characters
query T
SELECT parse_dirname('wh@t3ve%\42/12 ch,ars.sth', 'both_slash');
----
wh@t3ve%
# custom separator
query T
SELECT parse_dirname('path/to/file.csv','@');
----
path
query T
SELECT parse_dirname('path/to/file.csv', NULL);
----
path
query T
SELECT parse_dirname(NULL, NULL);
----
NULL
query T
SELECT parse_dirname(NULL, '');
----
NULL
query T
SELECT parse_dirname('');
----
(empty)
query T
SELECT parse_dirname('', '');
----
(empty)
# test incorrect usage
statement error
SELECT parse_dirname();
----
Binder Error: No function matches the given name and argument types 'parse_dirname()'.
statement error
SELECT parse_dirname('/path/to', true, 'system');
----
Binder Error: No function matches the given name and argument types 'parse_dirname(STRING_LITERAL, BOOLEAN, STRING_LITERAL)'.
# test parse_dirpath
# all separators option
query T
SELECT * FROM (VALUES (parse_dirpath('path/to/file.csv', 'system')), (parse_dirpath('path/to/file.csv\file2.csv', 'both_slash')), (parse_dirpath('path/to/file.csv', 'forward_slash')), (parse_dirpath('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
path/to
path/to/file.csv
path/to
path\to
# default separator
query T
SELECT parse_dirpath('path/to/file.csv\file2.csv');
----
path/to/file.csv
# no separator in path
query T
SELECT parse_dirpath('file.csv', 'backslash');
----
(empty)
# only separators in path
query T
SELECT parse_dirpath('\', 'backslash');
----
\
# path with only one separator, different from the separator option
query T
SELECT parse_dirpath('/', 'backslash');
----
(empty)
# start with separator
query T
SELECT parse_dirpath('/path/to/file.csv', 'forward_slash');
----
/path/to
# no forward slashes in the path
query T
SELECT parse_dirpath('\path\to\file', 'forward_slash');
----
(empty)
# have consecutive separators
query T
SELECT parse_dirpath('///path/to//file.csv', 'forward_slash');
----
///path/to/
# special characters
query T
SELECT parse_dirpath('wh@t3ve%\42/12 ch,ars.sth', 'both_slash');
----
wh@t3ve%\42
# custom separator
query T
SELECT parse_dirpath('path/to/file.csv','@');
----
path/to
query T
SELECT parse_dirpath('path/to/file.csv', NULL);
----
path/to
query T
SELECT parse_dirpath(NULL, NULL);
----
NULL
query T
SELECT parse_dirpath(NULL, '');
----
NULL
query T
SELECT parse_dirpath('');
----
(empty)
query T
SELECT parse_dirpath('', '');
----
(empty)
# test incorrect usage
statement error
SELECT parse_dirpath();
----
Binder Error: No function matches the given name and argument types 'parse_dirpath()'.
statement error
SELECT parse_dirpath('/path/to', true, 'system');
----
Binder Error: No function matches the given name and argument types 'parse_dirpath(STRING_LITERAL, BOOLEAN, STRING_LITERAL)'.
# test parse_filename
# all separators option, default trim_extension
query T
SELECT * FROM (VALUES (parse_filename('path/to/file.csv', 'system')), (parse_filename('path/to/file.csv\file2.csv', 'both_slash')), (parse_filename('path/to/file.csv', 'forward_slash')), (parse_filename('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
file.csv
file2.csv
file.csv
file.csv/file2.csv
# default separator
query T
SELECT parse_filename('path/to/file.csv\file2.csv');
----
file2.csv
query T
SELECT parse_filename('/path/to/file.csv\file2.csv', true);
----
file2
query T
SELECT parse_filename('/path/to/file.csv\file2.csv', false);
----
file2.csv
# trim extension
query T
SELECT * FROM (VALUES (parse_filename('path/to/file.csv', true, 'system')), (parse_filename('path/to/file.csv\file2.csv', true, 'both_slash')), (parse_filename('path/to/file.csv', true, 'forward_slash')), (parse_filename('path\to\file.csv/file2.csv', true, 'backslash'))) tbl(i);
----
file
file2
file
file.csv/file2
query T
SELECT parse_filename('path/to/file.csv\file2', true, 'forward_slash');
----
file
# use varchar type for boolean values as 2nd argument
query T
SELECT parse_filename('path/to/file.csv', 'true', 'system');
----
file
query T
SELECT parse_filename('path/to/file.csv', 'false', 'system');
----
file.csv
# no separators in path
query T
SELECT parse_filename('file.csv', 'backslash');
----
file.csv
query T
SELECT parse_filename('file.csv', true, 'backslash');
----
file
# only separators in path
query T
select parse_filename('/');
----
(empty)
query T
select parse_filename('//');
----
(empty)
query T
select parse_filename('/', 'backslash');
----
/
query T
SELECT parse_filename('/', true, 'forward_slash');
----
(empty)
# separator at the end
query T
SELECT parse_filename('path/to/', 'forward_slash');
----
(empty)
query T
SELECT parse_filename('path/to///', true, 'forward_slash');
----
(empty)
# special characters
query T
SELECT parse_filename('wh@t3ve%\42/12 ch,ars.sth', 'both_slash');
----
12 ch,ars.sth
query T
SELECT parse_filename('wh@t3ve%\42/12 ch,ars.sth', true, 'both_slash');
----
12 ch,ars
# custom separator
query T
SELECT parse_filename('path/to/file.csv','@');
----
file.csv
# no extension
query T
SELECT parse_filename('path/to/file', true, 'both_slash');
----
file
query T
SELECT parse_filename(NULL, true, 'system');
----
NULL
query T
SELECT parse_filename('path/to/file.csv', NULL);
----
file.csv
query T
SELECT parse_filename('path/to/file.csv', NULL, NULL);
----
file.csv
query T
SELECT parse_filename(NULL, NULL, NULL);
----
NULL
query T
SELECT parse_filename(NULL, '');
----
NULL
query T
SELECT parse_filename('', '');
----
(empty)
query T
SELECT parse_filename('');
----
(empty)
# test incorrect usage
statement error
SELECT parse_filename(true);
----
Binder Error: No function matches the given name and argument types 'parse_filename(BOOLEAN)'.
statement error
SELECT parse_filename('path/to/file.csv', 'system', true);
----
Binder Error: No function matches the given name and argument types 'parse_filename(STRING_LITERAL, STRING_LITERAL, BOOLEAN)'.
statement error
SELECT parse_filename('path/to/file.csv', 'system', 'true');
----
Conversion Error: Could not convert string 'system' to BOOL
statement error
SELECT parse_filename();
----
Binder Error: No function matches the given name and argument types 'parse_filename()'.

View File

@@ -0,0 +1,173 @@
# name: test/sql/function/string/parse_path_windows.test
# description: test parse path functions in windows
# group: [string]
statement ok
PRAGMA enable_verification
require windows
# all separators option
query T
SELECT * FROM (VALUES (parse_path('path\to\file.csv/file2.csv', 'system')), (parse_path('path/to/file.csv\file2.csv', 'both_slash')), (parse_path('path/to/file.csv', 'forward_slash')), (parse_path('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
[path, to, file.csv/file2.csv]
[path, to, file.csv, file2.csv]
[path, to, file.csv]
[path, to, file.csv/file2.csv]
# default separator
query T
SELECT parse_path('home/user/documents/file.csv\file2.csv');
----
[home, user, documents, file.csv, file2.csv]
# no separators in path
query T
SELECT parse_path('file.csv', 'both_slash');
----
[file.csv]
# start with & have consecutive separators
query T
SELECT parse_path('//path/to///file.csv', 'forward_slash');
----
[/, path, to, file.csv]
query T
SELECT parse_path(NULL, NULL);
----
NULL
query T
SELECT parse_path('');
----
[]
# test incorrect usage
statement error
SELECT parse_path();
----
Binder Error: No function matches the given name and argument types 'parse_path()'. You might need to add explicit type casts.
# test parse_dirname
# all separators option
query T
SELECT * FROM (VALUES (parse_dirname('path\to\file.csv/file2.csv', 'system')), (parse_dirname('path/to/file.csv\file2.csv', 'both_slash')), (parse_dirname('path/to/file.csv', 'forward_slash')), (parse_dirname('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
path
path
path
path
# default separator
query T
SELECT parse_dirname('path/to/file.csv\file2.csv');
----
path
# start with & have consecutive separators
query T
SELECT parse_dirname('///path/to//file.csv', 'forward_slash');
----
/
query T
select parse_dirname('file.csv');
----
(empty)
query T
SELECT parse_dirname('');
----
(empty)
# test incorrect usage
statement error
SELECT parse_dirname();
----
Binder Error: No function matches the given name and argument types 'parse_dirname()'.
# test parse_dirpath
# all separators option
query T
SELECT * FROM (VALUES (parse_dirpath('path\to\file.csv/file2.csv', 'system')), (parse_dirpath('path/to/file.csv\file2.csv', 'both_slash')), (parse_dirpath('path/to/file.csv', 'forward_slash')), (parse_dirpath('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
path\to
path/to/file.csv
path/to
path\to
# default separator
query T
SELECT parse_dirpath('path/to/file.csv\file2.csv');
----
path/to/file.csv
# start with & have consecutive separators
query T
SELECT parse_dirpath('///path/to//file.csv', 'forward_slash');
----
///path/to/
query T
select parse_dirpath('file.csv');
----
(empty)
query T
SELECT parse_dirpath('');
----
(empty)
# test incorrect usage
statement error
SELECT parse_dirpath();
----
Binder Error: No function matches the given name and argument types 'parse_dirpath()'.
# test parse_filename
# all separators option
query T
SELECT * FROM (VALUES (parse_filename('path\to\file.csv/file2.csv', 'system')), (parse_filename('path/to/file.csv\file2.csv', 'both_slash')), (parse_filename('path/to/file.csv', 'forward_slash')), (parse_filename('path\to\file.csv/file2.csv', 'backslash'))) tbl(i);
----
file.csv/file2.csv
file2.csv
file.csv
file.csv/file2.csv
# end with separator
query T
SELECT parse_filename('file2.csv/', 'forward_slash');
----
(empty)
# default args
query T
SELECT parse_filename('path/to/file.csv\file2.csv');
----
file2.csv
query T
SELECT parse_filename('path/to/file.csv\file2', true, 'backslash');
----
file2
query T
SELECT parse_filename('/path/to/file.csv\file2.csv', true, 'forward_slash');
----
file.csv\file2
query T
SELECT parse_filename('');
----
(empty)
# test incorrect usage
statement error
SELECT parse_filename();
----
Binder Error: No function matches the given name and argument types 'parse_filename()'.

View File

@@ -0,0 +1,143 @@
# name: test/sql/function/string/regex_capture.test
# description: Percent Rank
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE filenames (filename VARCHAR);
statement ok
INSERT INTO filenames VALUES
('rundate_2023-01-01_pass_1'),
('rundate_2023-01-01_pass_2'),
('rundate_2023-01-01_pass_3'),
('rundate_2023-01-10_pass_1'),
('rundate_2023-01-10_pass_2'),
('rundate_2023-02-14_pass_1'),
('invalid'),
(NULL)
;
# Single chunk
query III rowsort
WITH files AS (
SELECT f.*, payload FROM filenames f, range(3) t(payload)
), extracted AS (
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', 'pass']) AS groups, payload
FROM files
)
SELECT groups.rundate::DATE AS rundate, groups.pass::SMALLINT AS PASS, SUM(payload)
FROM extracted
WHERE LENGTH(groups.rundate) > 0
GROUP BY ALL
----
2023-01-01 1 3
2023-01-01 2 3
2023-01-01 3 3
2023-01-10 1 3
2023-01-10 2 3
2023-02-14 1 3
# Scaled up
query III rowsort
WITH files AS (
SELECT f.*, payload FROM filenames f, range(1000) t(payload)
), extracted AS (
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', 'pass']) AS groups, payload
FROM files
)
SELECT groups.rundate::DATE AS rundate, groups.pass::SMALLINT AS PASS, SUM(payload)
FROM extracted
WHERE LENGTH(groups.rundate) > 0
GROUP BY ALL
----
2023-01-01 1 499500
2023-01-01 2 499500
2023-01-01 3 499500
2023-01-10 1 499500
2023-01-10 2 499500
2023-02-14 1 499500
# Optional capture success
query IIII rowsort
WITH files AS (
SELECT f.*, payload FROM filenames f, range(3) t(payload)
), extracted AS (
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_([a-z]+?)_(\d+)', ['rundate', 'opt', 'pass']) AS groups, payload
FROM files
)
SELECT groups.rundate::DATE AS rundate, groups.opt AS opt, groups.pass::SMALLINT AS pass, SUM(payload)
FROM extracted
WHERE LENGTH(groups.rundate) > 0
GROUP BY ALL
----
2023-01-01 pass 1 3
2023-01-01 pass 2 3
2023-01-01 pass 3 3
2023-01-10 pass 1 3
2023-01-10 pass 2 3
2023-02-14 pass 1 3
# Optional capture failure
query IIII
WITH files AS (
SELECT f.*, payload FROM filenames f, range(3) t(payload)
), extracted AS (
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_([0-9]+?)_(\d+)', ['rundate', 'opt', 'pass']) AS groups, payload
FROM files
)
SELECT groups.rundate::DATE AS rundate, groups.opt AS opt, groups.pass::SMALLINT AS pass, SUM(payload)
FROM extracted
WHERE LENGTH(groups.rundate) > 0
GROUP BY ALL
----
#
# Errors
#
statement error
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', []) AS groups
FROM filenames
----
non-empty lists of capture names
statement error
WITH patterns AS (
SELECT 'rundate_(\d+-\d+-\d+)_pass_(\d+)' AS pattern FROM range(3)
)
SELECT regexp_extract(filename, pattern, ['rundate', 'pass']) AS groups
FROM filenames, patterns
----
constant pattern
statement error
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', NULL]) AS groups
FROM filenames
----
NULL group name
statement error
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', 'rundate']) AS groups
FROM filenames
----
Duplicate group name
statement error
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', 'RUNDATE']) AS groups
FROM filenames
----
Duplicate group name
statement error
SELECT regexp_extract(filename, 'rundate_(\d+-\d+-\d+)_pass_(\d+)', ['rundate', 'pass', 'overflow']) AS groups
FROM filenames
----
Not enough group names
statement error
SELECT regexp_extract(filename, NULL, ['rundate', 'pass']) AS groups
FROM filenames
----
constant pattern

View File

@@ -0,0 +1,99 @@
# name: test/sql/function/string/regex_escape.test
# description: regex escape test
# group: [string]
statement ok
PRAGMA enable_verification
# test the example
query T
SELECT regexp_escape('https://duckdb.org');
----
https\:\/\/duckdb\.org
# no special chars
query T
SELECT regexp_escape('abc123ABC');
----
abc123ABC
# metacharacters
query T
SELECT regexp_escape('a.b[c]*');
----
a\.b\[c\]\*
# whitespaces
query T
SELECT regexp_escape('a b c');
----
a\ b\ c
# new line character
query T
SELECT regexp_escape('\n');
----
\\n
query T
SELECT regexp_escape('line1\nline2');
----
line1\\nline2
# unicode character
query T
SELECT regexp_escape('@');
----
\@
# backslashes
query T
SELECT regexp_escape('path\to\wonderland');
----
path\\to\\wonderland
# more special characters
query T
SELECT regexp_escape('$()*+.?[\]^{|}-');
----
\$\(\)\*\+\.\?\[\\\]\^\{\|\}\-
# mode output_hash
# test a table of 1000 strings with special characters
statement ok
CREATE TABLE tbl (c VARCHAR(255));
statement ok
INSERT INTO tbl SELECT 'a)*.?[\]b^{2.+_c' FROM generate_series(1, 500);
statement ok
INSERT INTO tbl(c) SELECT '1?ch@racter$' FROM generate_series(1, 500);
query I
SELECT regexp_escape(c) FROM tbl;
----
1000 values hashing to d9c29c89fadac59fb2be2397a94af1ee
query I
WITH cte AS (
SELECT c
FROM tbl
LIMIT 500
)
SELECT sum(cast(regexp_escape(c) = 'a\)\*\.\?\[\\\]b\^\{2\.\+_c' as int))
FROM cte
----
500
query I
WITH cte AS (
SELECT c
FROM tbl
OFFSET 500 LIMIT 500
)
SELECT sum(cast(regexp_escape(c) = '1\?ch\@racter\$' as int))
FROM cte
----
500

View File

@@ -0,0 +1,112 @@
# name: test/sql/function/string/regex_extract.test
# description: regex extract test
# group: [string]
statement ok
PRAGMA enable_verification
query T
SELECT regexp_extract('foobarbaz', 'b..')
----
bar
query T
SELECT regexp_extract('foobarbaz', 'B..')
----
(empty)
# pass in regex options
query T
SELECT regexp_extract('foobarbaz', 'B..', 0, 'i')
----
bar
query T
SELECT regexp_extract('foobarbaz', 'b..', 1)
----
(empty)
query T
SELECT regexp_extract('foobarbaz', '(b..)(b..)')
----
barbaz
query T
SELECT regexp_extract('foobarbaz', '(b..)(b..)', 1)
----
bar
query T
SELECT regexp_extract('foobarbaz', '(b..)(b..)', 2)
----
baz
statement error
SELECT regexp_extract('foobarbaz', '(b..)(b..)', -1)
----
<REGEX>:.*Invalid Input Error: Group index must be.*
statement error
SELECT regexp_extract('foobarbaz', '(b..)(b..)', 42)
----
<REGEX>:.*Invalid Input Error: Group index must be.*
statement ok
CREATE TABLE test (s VARCHAR, p VARCHAR, i INT)
statement ok
INSERT INTO test VALUES
('foobarbaz', 'b..', 0),
('foobarbaz', 'b..', 1),
('foobarbaz', '(b..)(b..)', 0),
('foobarbaz', '(b..)(b..)', 1),
('foobarbaz', '(b..)(b..)', 2)
statement error
SELECT regexp_extract(s, p, i) FROM test
----
<REGEX>:.*Invalid Input Error.*must be a constant.*
query T
SELECT regexp_extract(s, p, 0) FROM test
----
bar
bar
barbaz
barbaz
barbaz
query T
SELECT regexp_extract(s, 'b..', 0) FROM test
----
bar
bar
bar
bar
bar
statement error
SELECT regexp_extract(s, '(b..)(b..)', i) FROM test
----
<REGEX>:.*Invalid Input Error.*must be a constant.*
# null values
query T
SELECT regexp_extract('foobarbaz', NULL, 0)
----
NULL
query T
SELECT regexp_extract('foobarbaz', 'b..', NULL)
----
(empty)
query T
SELECT regexp_extract(NULL, 'b..')
----
NULL
statement error
SELECT regexp_extract('foobarbaz', 'b..', '1')
----
<REGEX>:.*Binder Error.*Could not choose a best candidate.*

View File

@@ -0,0 +1,709 @@
# name: test/sql/function/string/regex_extract_all.test
# description: regex extract test
# group: [string]
statement ok
PRAGMA enable_verification
query I
SELECT regexp_extract_all('1a 2b 14m', '(\d+)', 1);
----
[1, 2, 14]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\d+)([a-z]+)', 2)
----
[a, b, m]
query I
SELECT REGEXP_EXTRACT_ALL('test', '.')
----
[t, e, s, t]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '\\d+');
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '\\d+', 0);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '\\d+', 1);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '\\d+', 2);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '\\d+', -1);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)?', 1);
----
[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL]
query I
SELECT regexp_extract_all('a 2b 14m', '(\\d+)?', 1);
----
[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)');
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 0);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 1);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 2);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1);
----
[]
query I
SELECT regexp_extract_all('1a 2b 14m', '(\\d+)?([a-z]+)', 1);
----
[NULL, NULL, NULL]
query I
SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1);
----
[NULL, NULL, NULL]
# source:
# https://github.com/ep-infosec/33_apache_doris/blob/9edb4e8735fd0c56ee63a3d1d2560af867d80440/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/regexp/regexp_extract_all.md
query I
select regexp_extract_all('AbCdE', '([[:lower:]]+)C([[:lower:]]+)', 1);
----
[b]
query I
select regexp_extract_all('AbCdEfCg', '([[:lower:]]+)C([[:lower:]]+)', 1);
----
[b, f]
query I
select regexp_extract_all('abc=111, def=222, ghi=333', '("[^"]+"|\w+)=("[^"]+"|\w+)', 1);
----
[abc, def, ghi]
query I
select regexp_extract_all('', NULL)
----
NULL
query I
select regexp_extract_all(NULL, '')
----
NULL
query I
select regexp_extract_all('', '')
----
['']
query I
select regexp_extract_all('', 'abc')
----
[]
query I
select regexp_extract_all('abc', '.')
----
[a, b, c]
query I
select regexp_extract_all('aaa', '^a')
----
[a]
query I
select regexp_extract_all('abc', 'abc')
----
[abc]
query I
select regexp_extract_all('abcdef', 'a.c.*f')
----
[abcdef]
query I
select regexp_extract_all('abcdef', 'ac.*e.')
----
[]
query I
select regexp_extract_all('abcdef', 'bcde')
----
[bcde]
query I
select regexp_extract_all('aabca', 'a*')
----
[aa, '', '', a, '']
query I
select regexp_extract_all('aaba', 'a?')
----
[a, a, '', a, '']
query I
select regexp_extract_all('baac', 'a*')
----
['', aa, '', '']
query I
select regexp_extract_all('abcd', 'a(bc)*')
----
[abc]
query I
select regexp_extract_all('щцф', '.')
----
[щ, ц, ф]
query I
select regexp_extract_all('щцф', '.{3}')
----
[щцф]
query I
select regexp_extract_all('щцф', '.{6}')
----
[]
query I
select regexp_extract_all('щццф', 'ц*')
----
['', цц, '', '']
query I
select regexp_extract_all('abba', 'b*')
----
['', bb, '', '']
query I
select regexp_extract_all('', '()')
----
['']
query I
select regexp_extract_all('', '(abc)')
----
[]
query I
select regexp_extract_all('', '(abc)?')
----
['']
query I
select regexp_extract_all('abc', 'a(b)c')
----
[abc]
query I
select regexp_extract_all('abbb', '^a*(b)')
----
[ab]
query I
select regexp_extract_all('XbASDZb', '(.)b')
----
[Xb, Zb]
query I
select regexp_extract_all('abcdef', 'a(.c.*)f')
----
[abcdef]
query I
select regexp_extract_all('abcdef', '(bcde)')
----
[bcde]
query I
select regexp_extract_all('this_is__a___Test', '(.*?)(?:_|$)')
----
[this_, is_, _, a_, _, _, Test, '']
query I
select regexp_extract_all('щцф', 'щ(.).', 1)
----
[ц]
query I
select regexp_extract_all('щцф', '(.{6})')
----
[]
query I
select regexp_extract_all('abc', '((a))')
----
[a]
query I
select regexp_extract_all('abc', '(a)(b)')
----
[ab]
statement error
select regexp_extract_all('', '(')
----
Invalid Input Error: missing )
query I
select regexp_extract_all('abcdef', 'ac.*e.')
----
[]
query I
select regexp_extract_all('щцф', '.{2}')
----
[щц]
query I
select regexp_extract_all('abc', '.{2}')
----
[ab]
query I
select regexp_extract_all('\001\002\003', '\002?')
----
['', '', '', '', '', '', '', '', '', '', '', '', '']
query I
select regexp_extract_all('', '()')
----
['']
query I
select regexp_extract_all('', '(abc)')
----
[]
query I
select regexp_extract_all('', '(abc)?')
----
['']
query I
select regexp_extract_all('this__test_case', '(.*?)(?:_|$)')
----
[this_, _, test_, case, '']
query I
select regexp_extract_all('щцф', 'щ(..)..')
----
[]
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'ab?cd') AS matched
FROM (
VALUES ('acd'), ('abcd'), ('abcdacd'), ('abbcd'), ('abbbcd'), ('ab1cd')
) AS t(str)
----
acd [acd]
abcd [abcd]
abcdacd [abcd, acd]
abbcd []
abbbcd []
ab1cd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'a(bc)?d') AS matched
FROM (
VALUES ('ad'), ('abd'), ('acd'), ('abcd'), ('abce'), ('abcxd'), ('abcbcd')
) AS t(str)
----
ad [ad]
abd []
acd []
abcd [abcd]
abce []
abcxd []
abcbcd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'a[bc]?d') AS matched
FROM (
VALUES ('ad'), ('abd'), ('acd'), ('acde'), ('abcd'), ('abbd')
) AS t(str)
----
ad [ad]
abd [abd]
acd [acd]
acde [acd]
abcd []
abbd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'X[\d]?Y[\d]?') AS matched
FROM (
VALUES ('XY'),('X1Y'),('X123Y'),('X1Y23')
) AS t(str)
----
XY [XY]
X1Y [X1Y]
X123Y []
X1Y23 [X1Y2]
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'ab*cd') AS matched
FROM (
VALUES ('acd'), ('abcd'), ('abcdacd'), ('abbcd'), ('abbbcd'), ('ab1cd')
) AS t(str)
----
acd [acd]
abcd [abcd]
abcdacd [abcd, acd]
abbcd [abbcd]
abbbcd [abbbcd]
ab1cd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'a(bc)*d') AS matched
FROM (
VALUES ('ad'), ('abd'), ('acd'), ('abcd'), ('abcxd'), ('abcbcd'), ('abcbce')
) AS t(str)
----
ad [ad]
abd []
acd []
abcd [abcd]
abcxd []
abcbcd [abcbcd]
abcbce []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'a[bc]*d') AS matched
FROM (
VALUES ('ad'), ('abd'), ('acd'), ('abcd'), ('abcbbcd'), ('abce')
) AS t(str)
----
ad [ad]
abd [abd]
acd [acd]
abcd [abcd]
abcbbcd [abcbbcd]
abce []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'X[\d]*Y[\d]*') AS matched
FROM (
VALUES ('XY'),('X1Y'),('X123Y'),('X1Y23'),('X12Z34Y')
) AS t(str)
----
XY [XY]
X1Y [X1Y]
X123Y [X123Y]
X1Y23 [X1Y23]
X12Z34Y []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'ab+cd') AS matched
FROM (
VALUES ('acd'), ('abcd'), ('abcdacd'), ('abbcd'), ('abbbcd'), ('ab1cd')
) AS t(str)
----
acd []
abcd [abcd]
abcdacd [abcd]
abbcd [abbcd]
abbbcd [abbbcd]
ab1cd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'(ab)+cd') AS matched
FROM (
VALUES ('bcd'), ('abcd'), ('abbcd'), ('ababcd'), ('ababxcd')
) AS t(str)
----
bcd []
abcd [abcd]
abbcd []
ababcd [ababcd]
ababxcd []
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'a[bc]+d') AS matched
FROM (
VALUES ('ad'), ('abd'), ('acd'), ('abcd'), ('abce'), ('abcbd')
) AS t(str)
----
ad []
abd [abd]
acd [acd]
abcd [abcd]
abce []
abcbd [abcbd]
query II
SELECT str,
REGEXP_EXTRACT_ALL(str,'X[\d]+Y[\d]*') AS matched
FROM (
VALUES ('XY'),('X1Y'),('X123Y'),('X1Y23'),('X12Z34Y')
) AS t(str)
----
XY []
X1Y [X1Y]
X123Y [X123Y]
X1Y23 [X1Y23]
X12Z34Y []
query IIIII
SELECT str,
REGEXP_EXTRACT_ALL(str,'ab{3}cd') AS m1,
REGEXP_EXTRACT_ALL(str,'ab{1,}cd') AS m2,
REGEXP_EXTRACT_ALL(str,'ab{1,2}cd') AS m3,
REGEXP_EXTRACT_ALL(str,'ab{0,2}cd') AS m4
FROM (
VALUES ('acd'), ('abcd'), ('abcdacd'), ('abbcd'), ('abbbcd'), ('ab1cd')
) AS t(str)
----
acd [] [] [] [acd]
abcd [] [abcd] [abcd] [abcd]
abcdacd [] [abcd] [abcd] [abcd, acd]
abbcd [] [abbcd] [abbcd] [abbcd]
abbbcd [abbbcd] [abbbcd] [] []
ab1cd [] [] [] []
query IIIII
SELECT str,
REGEXP_EXTRACT_ALL(str,'(ab){2}cd') AS m1,
REGEXP_EXTRACT_ALL(str,'(ab){1,}cd') AS m2,
REGEXP_EXTRACT_ALL(str,'(ab){1,2}cd') AS m3,
REGEXP_EXTRACT_ALL(str,'(ab){0,2}cd') AS m4
FROM (
VALUES ('acd'), ('bcd'),('abcd'), ('abbcd'), ('ababcd'), ('ab1cd')
) AS t(str)
----
acd [] [] [] [cd]
bcd [] [] [] [cd]
abcd [] [abcd] [abcd] [abcd]
abbcd [] [] [] [cd]
ababcd [ababcd] [ababcd] [ababcd] [ababcd]
ab1cd [] [] [] [cd]
query IIIIII
SELECT str,
REGEXP_EXTRACT_ALL(str,'[\d]{3}') AS m1,
REGEXP_EXTRACT_ALL(str,'[\d]{5}') AS m2,
REGEXP_EXTRACT_ALL(str,'[\d]{1,}') AS m3,
REGEXP_EXTRACT_ALL(str,'[\d]{1,2}') AS m4,
REGEXP_EXTRACT_ALL(str,'[\d]{0,2}') AS m5
FROM (
VALUES ('03-123-4567')
) AS t(str)
----
03-123-4567 [123, 456] [] [03, 123, 4567] [03, 12, 3, 45, 67] [03, '', 12, 3, '', 45, 67, '']
statement error
SELECT str,
REGEXP_EXTRACT_ALL(str,'ab++') AS m1_long,
FROM (
VALUES ('acd'), ('abcd'), ('abbcd'), ('abbbcd')
) AS t(str)
----
Invalid Input Error: bad repetition operator: ++
query IIIIIII
SELECT
REGEXP_EXTRACT_ALL('123456789', '^[0-9]*$') AS m1,
REGEXP_EXTRACT_ALL('abcdefg', '^[a-z]*$') AS m2,
REGEXP_EXTRACT_ALL('ABCDEFG', '^[A-Z]*$') AS m3,
REGEXP_EXTRACT_ALL('ABCdefg', '^[a-zA-Z]*$') AS m4,
REGEXP_EXTRACT_ALL('12aaAA', '^[0-9a-zA-Z]*$') AS m5,
REGEXP_EXTRACT_ALL('123-1234', '^[0-9]{3}-[0-9]{4}$') AS m6,
REGEXP_EXTRACT_ALL('2009/7/29', '^[0-9]{4}/[01]?[0-9]/[0123]?[0-9]$') AS m7
----
[123456789] [abcdefg] [ABCDEFG] [ABCdefg] [12aaAA] [123-1234] [2009/7/29]
query III
WITH sample AS
(
SELECT s FROM
(
VALUES
('https://docs.fluentd.org/v0.12/articles/out_file'),
('https://docs.fluentd.org/v0.12/articles/out_forward'),
('https://www.fluentd.org/v0.12/articles/out_file'),
('out_file/article/docs.fluentd.org/')
) AS t(s)
)
SELECT
s, REGEXP_EXTRACT_ALL(s,'docs.fluentd.org.*out_file') AS match_strs1,
REGEXP_EXTRACT_ALL(s,'docs.fluentd.org|out_file') AS match_strs2
FROM sample
----
https://docs.fluentd.org/v0.12/articles/out_file [docs.fluentd.org/v0.12/articles/out_file] [docs.fluentd.org, out_file]
https://docs.fluentd.org/v0.12/articles/out_forward [] [docs.fluentd.org]
https://www.fluentd.org/v0.12/articles/out_file [] [out_file]
out_file/article/docs.fluentd.org/ [] [out_file, docs.fluentd.org]
statement error
select REGEXP_EXTRACT_ALL('hello', '.', 2);
----
Pattern has 0 groups. Cannot access group 2
query II
SELECT
REGEXP_EXTRACT_ALL('https://www.emakina.nl/?utm_source=sf_mail&user_id=7h87hte51kj_9866c', '([^\?&]+)=') AS parameter_key,
REGEXP_EXTRACT_ALL('https://www.emakina.nl/?utm_source=sf_mail&user_id=7h87hte51kj_9866c', '=([^&]+)') AS parameter_value
----
['utm_source=', 'user_id='] ['=sf_mail', '=7h87hte51kj_9866c']
# Double anchor:
query I
select regexp_extract_all('si1si2', 'si\d$');
----
[si2]
query I
select regexp_extract_all('si1si2', '^(si\d)(?:.*)$', 1);
----
[si1]
query I
select regexp_extract_all('aabb', '^((aa)(bb))$', 3);
----
[bb]
# Different size patterns
query I
WITH t(pattern) AS (
VALUES
(NULL),
('(a)'),
('(a)(a)(a)'),
('()'),
('(a)(a)'),
(NULL),
('(a)'),
('(a)(b)?(a)'),
('(a)(a)(a)(a)(a)(a)(a)'),
(NULL),
)
select regexp_extract_all('aaaaaaaa', pattern) from t;
----
NULL
[a, a, a, a, a, a, a, a]
[aaa, aaa]
['', '', '', '', '', '', '', '', '']
[aa, aa, aa, aa]
NULL
[a, a, a, a, a, a, a, a]
[aa, aa, aa, aa]
[aaaaaaa]
NULL
query I
WITH t(input, pattern, g) AS (
VALUES
(NULL, NULL, 0),
(NULL, NULL, 1),
('aaaaaaaa', '(a)', 0),
('aaaaaaaa', '(a)(a)(a)', 0),
('aaaaaaaa', '()', 1),
('aaaaaaaa', '(a)(a)', 0),
(NULL, NULL, NULL),
('aaaaaaaa', '(a)', 0),
('aaaaaaaa', '(a)(b)?(a)', NULL),
('aaa', '(a)(a)(a)(a)(a)(a)(a)', 0),
('aaaaaaaaaaa', '(a)(a)(a)(a)(a)(a)(a)', 0),
(NULL, '()', NULL)
)
select regexp_extract_all(input, pattern, g) from t;
----
NULL
NULL
[a, a, a, a, a, a, a, a]
[aaa, aaa]
['', '', '', '', '', '', '', '', '']
[aa, aa, aa, aa]
NULL
[a, a, a, a, a, a, a, a]
NULL
[]
[aaaaaaa]
NULL
# With regex options
query I
select regexp_extract_all('foobarbaz', '(BA[R|Z])', 1, 'i');
----
[bar, baz]
query I
WITH t(input, pattern, g) AS (
VALUES
(NULL, NULL, 0),
(NULL, NULL, 1),
('aaaaaaaa', '(A)', 0),
('aaaaaaaa', '(a)(A)(a)', 0),
('aaaaaaaa', '()', 1),
('aaaaaaaa', '(a)(A)', 0),
(NULL, NULL, NULL),
('aaaaaaaa', '(a)', 0),
('aaaaaaaa', '(a)(B)?(a)', NULL),
('aaa', '(a)(a)(A)(A)(a)(A)(a)', 0),
('aaaaaaaaaaa', '(a)(A)(a)(a)(a)(a)(a)', 0),
(NULL, '()', NULL)
)
select regexp_extract_all(input, pattern, g, 'i') from t;
----
NULL
NULL
[a, a, a, a, a, a, a, a]
[aaa, aaa]
['', '', '', '', '', '', '', '', '']
[aa, aa, aa, aa]
NULL
[a, a, a, a, a, a, a, a]
NULL
[]
[aaaaaaa]
NULL

View File

@@ -0,0 +1,34 @@
# name: test/sql/function/string/regex_filter_pushdown.test
# description: regex filter push test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE regex(s STRING)
statement ok
INSERT INTO regex VALUES ('asdf'), ('xxxx'), ('aaaa')
query T
SELECT s FROM regex WHERE REGEXP_MATCHES(s, 'as(c|d|e)f')
----
asdf
query T
SELECT s FROM regex WHERE NOT REGEXP_MATCHES(s, 'as(c|d|e)f')
----
xxxx
aaaa
query T
SELECT s FROM regex WHERE REGEXP_MATCHES(s, 'as(c|d|e)f') AND s = 'asdf'
----
asdf
query T
SELECT s FROM regex WHERE REGEXP_MATCHES(s, 'as(c|d|e)f') AND REGEXP_MATCHES(s, 'as[a-z]f')
----
asdf

View File

@@ -0,0 +1,121 @@
# name: test/sql/function/string/regex_replace.test
# description: regex replace test
# group: [string]
statement ok
PRAGMA enable_verification
# standard replace
query T
SELECT regexp_replace('foobarbaz', 'b..', 'X')
----
fooXbaz
# global replace
query T
SELECT regexp_replace('ana ana', 'ana', 'banana', 'g')
----
banana banana
query T
SELECT regexp_replace('ANA ana', 'ana', 'banana', 'gi')
----
banana banana
# case sensitivity
query T
SELECT regexp_replace('ana', 'ana', 'banana', 'c')
----
banana
query T
SELECT regexp_replace('ANA', 'ana', 'banana', 'i')
----
banana
# literal match
query T
SELECT regexp_replace('as^/$df', '^/$', '', 'l')
----
asdf
query T
SELECT regexp_replace('as^/$df', '^/$', '')
----
as^/$df
# dot matches newline
query T
SELECT regexp_replace('hello
world', '.*', 'x', 'sg')
----
x
# the result here is a single row with a newline ('x\nx')
# this is a bit complicated to check in sqllogictest, so we use a JOIN with a count
# to verify the correct result
query T
SELECT COUNT(*) FROM (SELECT 'x
x') t1(a) JOIN (SELECT regexp_replace('hello
world', '.*', 'x', 'ng')) t2(a) USING (a)
----
1
# this also works with tables
statement ok
CREATE TABLE test(v VARCHAR);
statement ok
INSERT INTO test VALUES ('hello'), ('HELLO');
query T
SELECT regexp_replace(v, 'h.*', 'world', 'i') FROM test ORDER BY v
----
world
world
query T
SELECT regexp_replace(v, 'h.*', 'world', 'c') FROM test ORDER BY v
----
HELLO
world
# we cannot use non-constant options (currently)
statement error
SELECT regexp_replace(v, 'h.*', 'world', v) FROM test ORDER BY v
----
# throw on invalid options
statement error
SELECT regexp_replace('asdf', '.*SD.*', 'a', 'q')
----
# this used to fail as it should but lets make sure it still fails
statement error
select regexp_matches('abc', '*');
----
no argument for repetition operator: *
# this used to silently swallow the error from the invalid regex
statement error
select regexp_replace('abc', '*', 'X');
----
no argument for repetition operator: *
# make sure this also holds for non-constant case
statement ok
create table regex (s string, r string);
statement ok
insert into regex values ('abc', '*');
statement error
select regexp_matches(s, r) from regex;
----
no argument for repetition operator: *
statement error
select regexp_replace(s, r, 'X') from regex;
----
no argument for repetition operator: *

View File

@@ -0,0 +1,205 @@
# name: test/sql/function/string/regex_search.test
# description: regex search test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t0 as FROM VALUES('asdf') t(c0);
# null
query I
SELECT regexp_matches(c0, NULL) from t0
----
NULL
# constant strings
query T
SELECT regexp_matches(c0, '.*sd.*') from t0;
----
1
query T
SELECT regexp_matches(c0, '.*yu.*') from t0;
----
0
query T
SELECT regexp_matches(c0, '') from t0;
----
1
# partial matches okay
query T
SELECT regexp_matches(c0, 'sd') from t0;
----
1
query T
SELECT regexp_full_match(c0, 'sd') from t0;
----
0
query T
SELECT regexp_full_match(c0, '.sd.') from t0;
----
1
query T
SELECT regexp_matches(c0, '^sdf$') from t0;
----
0
# empty strings
query T
SELECT regexp_matches('', '.*yu.*')
----
0
query T
SELECT regexp_matches('', '.*')
----
1
# NULLs
query T
SELECT regexp_matches(c0, CAST(NULL AS STRING)) from t0;
----
NULL
query T
SELECT regexp_matches(CAST(NULL AS STRING), '.*sd.*')
----
NULL
query T
SELECT regexp_matches(CAST(NULL AS STRING), CAST(NULL AS STRING))
----
NULL
query T
SELECT regexp_matches('foobarbequebaz', '(bar)(beque)')
----
1
# postgres says throw error on invalid regex
statement error
SELECT regexp_matches('', '\X')
----
<REGEX>:.*Invalid Input Error: invalid escape sequence.*
statement ok
CREATE TABLE regex(s STRING, p STRING)
statement ok
INSERT INTO regex VALUES ('asdf', 'sd'), ('asdf', '^sd'), (NULL, '^sd'), ('asdf', NULL)
query T
SELECT regexp_matches(s, '.*') FROM regex
----
1
1
NULL
1
query T
SELECT regexp_matches(s, p) FROM regex
----
1
0
NULL
NULL
# test regex_matches with options
# case sensitivity
query T
SELECT regexp_matches(c0, '.*SD.*', 'i') from t0;
----
1
query T
SELECT regexp_matches(c0, '.*SD.*', 'c') from t0;
----
0
# literal match
query T
SELECT regexp_matches('as^/$df', '^/$', 'l')
----
1
query T
SELECT regexp_matches('as^/$df', '^/$')
----
0
# dot matches newline
query T
SELECT regexp_matches('hello
world', '.*', 's')
----
1
query T
SELECT regexp_full_match('hello
world', '.*', 'n')
----
0
# whitespace is ignored
query T
SELECT regexp_matches(c0, '.*SD.*', ' i ') from t0;
----
1
# NULL in options is an error
statement error
SELECT regexp_matches(c0, '.*SD.*', NULL) from t0;
----
<REGEX>:.*Invalid Input Error.*must not be NULL.*
# this also works with tables
statement ok
CREATE TABLE test(v VARCHAR);
statement ok
INSERT INTO test VALUES ('hello'), ('HELLO');
query T
SELECT regexp_matches(v, 'h.*', 'i') FROM test ORDER BY v
----
1
1
query T
SELECT regexp_matches(v, 'h.*', 'c') FROM test ORDER BY v
----
0
1
statement error
SELECT regexp_matches(v, 'h.*', v) FROM test ORDER BY v
----
<REGEX>:.*Invalid Input Error.*must be a constant.*
# throw on invalid options
statement error
SELECT regexp_matches(c0, '.*SD.*', 'q') from t0;
----
<REGEX>:.*Invalid Input Error.*Unrecognized.*
# can only use "g" with regexp replace
statement error
SELECT regexp_matches(c0, '.*SD.*', 'g') from t0;
----
<REGEX>:.*Invalid Input Error.*only valid for regexp_replace.*
# error in non-constant regex
statement ok
INSERT INTO regex VALUES ('asdf', '(')
statement error
SELECT regexp_matches(s, p) FROM regex
----
<REGEX>:.*Invalid Input Error.*missing.*

View File

@@ -0,0 +1,30 @@
# name: test/sql/function/string/regexp_split_to_table.test
# description: regexp_split_to_table test
# group: [string]
statement ok
PRAGMA enable_verification
# non-regex split
query T
SELECT regexp_split_to_table('a b c', ' ')
----
a
b
c
# regex-based split
query T
SELECT regexp_split_to_table('axbyc', '[x|y]')
----
a
b
c
# regex-based split with an unaffected column
query II
SELECT regexp_split_to_table('axbyc', '[x|y]'), 42
----
a 42
b 42
c 42

View File

@@ -0,0 +1,55 @@
# name: test/sql/function/string/regexp_unicode_literal.test
# description: Issue #10058: Regex match turns non-breakable space into regular space
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE data(wsc INT, zipcode VARCHAR)
statement ok
INSERT INTO data VALUES (32, '00' || chr(32) || '001'), (160, '00' || chr(160) || '001'), (0, '00🦆001');
query II
from data
where regexp_matches(zipcode, '^00\x{0020}001$')
----
32 00 001
query II
from data
where regexp_matches(zipcode, '^00\x{00A0}001$')
----
160 00 001
query II
from data
where regexp_matches(zipcode, '\x{00A0}001$')
----
160 00 001
query II
from data
where regexp_matches(zipcode, '^00\x{1F986}001$')
----
0 00🦆001
query II
from data
where regexp_matches(zipcode, '\x{1F986}')
----
0 00🦆001
query II
select *
from data
where regexp_matches(zipcode, '^00\x{00A0}001$')
and regexp_matches(zipcode, '^00\x{0020}001$')
----
statement error
select regexp_matches(zipcode, '^00\x{FFFFFFFF}001$') from data
----
invalid escape sequence

View File

@@ -0,0 +1,62 @@
# name: test/sql/function/string/sha1.test
# description: Test the sha1 function
# group: [string]
statement ok
PRAGMA enable_verification
query II
SELECT sha1('hello'), sha1(NULL)
----
aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d
NULL
query I
SELECT sha1('')
----
da39a3ee5e6b4b0d3255bfef95601890afd80709
query I
SELECT sha1('\xff\xff'::BLOB)
----
a19f987b885f5a96069f4bc7f12b9e84ceba7dfa
statement ok
CREATE TABLE strings AS SELECT s::VARCHAR s FROM generate_series(0,10,1) t(s)
query II
SELECT sha1(s), sha1('1') FROM strings ORDER BY s
----
b6589fc6ab0dc82cf12099d1c2d40ab994e8410c 356a192b7913b04c54574d18c28d46e6395428ab
356a192b7913b04c54574d18c28d46e6395428ab 356a192b7913b04c54574d18c28d46e6395428ab
b1d5781111d84f7b3fe45a0852e59758cd7a87e5 356a192b7913b04c54574d18c28d46e6395428ab
da4b9237bacccdf19c0760cab7aec4a8359010b0 356a192b7913b04c54574d18c28d46e6395428ab
77de68daecd823babbb58edb1c8e14d7106e83bb 356a192b7913b04c54574d18c28d46e6395428ab
1b6453892473a467d07372d45eb05abc2031647a 356a192b7913b04c54574d18c28d46e6395428ab
ac3478d69a3c81fa62e60f5c3696165a4e5e6ac4 356a192b7913b04c54574d18c28d46e6395428ab
c1dfd96eea8cc2b62785275bca38ac261256e278 356a192b7913b04c54574d18c28d46e6395428ab
902ba3cda1883801594b6e1b452790cc53948fda 356a192b7913b04c54574d18c28d46e6395428ab
fe5dbbcea5ce7e2988b8c69bcfdfde8904aabc1f 356a192b7913b04c54574d18c28d46e6395428ab
0ade7c2cf97f75d009975f4d720d1fa6c19f4897 356a192b7913b04c54574d18c28d46e6395428ab
query II
SELECT sha1(s), sha1('1') FROM strings WHERE s::INTEGER BETWEEN 1 AND 3 ORDER BY s
----
356a192b7913b04c54574d18c28d46e6395428ab 356a192b7913b04c54574d18c28d46e6395428ab
da4b9237bacccdf19c0760cab7aec4a8359010b0 356a192b7913b04c54574d18c28d46e6395428ab
77de68daecd823babbb58edb1c8e14d7106e83bb 356a192b7913b04c54574d18c28d46e6395428ab
statement error
SELECT sha1()
----
<REGEX>:.*Binder Error: No function matches.*
query I
SELECT sha1(''::blob)
----
da39a3ee5e6b4b0d3255bfef95601890afd80709
statement error
SELECT sha1(42)
----
<REGEX>:.*Binder Error: No function matches.*

View File

@@ -0,0 +1,52 @@
# name: test/sql/function/string/sha256.test
# description: Test the sha256 function
# group: [string]
statement ok
PRAGMA enable_verification
query II
SELECT sha256('hello'), sha256(NULL)
----
2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824
NULL
query I
SELECT sha256('')
----
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
query I
SELECT sha256('\xff\xff'::BLOB)
----
ca2fd00fa001190744c15c317643ab092e7048ce086a243e2be9437c898de1bb
statement ok
CREATE TABLE strings AS SELECT s::VARCHAR s FROM generate_series(0,10,1) t(s)
query II
SELECT sha256(s), sha256('1') FROM strings ORDER BY s
----
5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
4a44dc15364204a80fe80e9039455cc1608281820fe2b24f1e5233ade6af1dd5 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328cb08b5531fcacdabf8a 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
e7f6c011776e8db7cd330b54174fd76f7d0216b612387a5ffcfb81e6f0919683 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
2c624232cdd221771294dfbb310aca000a0df6ac8b66b696d90ef06fdefb64a3 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
19581e27de7ced00ff1ce50b2047e7a567c76b1cbaebabe5ef03f7c3017bb5b7 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
query II
SELECT sha256(s), sha256('1') FROM strings WHERE s::INTEGER BETWEEN 1 AND 3 ORDER BY s
----
6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce 6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
statement error
SELECT sha256()
----
<REGEX>:.*Binder Error: No function matches.*

View File

@@ -0,0 +1,37 @@
# name: test/sql/function/string/strip_accents.test
# description: Test strip accents function
# group: [string]
query TT
SELECT strip_accents('hello'), strip_accents('héllo')
----
hello hello
query TT
SELECT strip_accents('mühleisen'), strip_accents('hannes mühleisen')
----
muhleisen hannes muhleisen
statement ok
CREATE TABLE collate_test(s VARCHAR, str VARCHAR)
statement ok
INSERT INTO collate_test VALUES ('äää', 'aaa')
statement ok
INSERT INTO collate_test VALUES ('hännës mühlëïsën', 'hannes muhleisen')
statement ok
INSERT INTO collate_test VALUES ('olá', 'ola')
statement ok
INSERT INTO collate_test VALUES ('ôâêóáëòõç', 'oaeoaeooc')
query T
SELECT strip_accents(s)=strip_accents(str) FROM collate_test
----
1
1
1
1

View File

@@ -0,0 +1,164 @@
# name: test/sql/function/string/test_array_extract.test
# description: Substring test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1), ('world', 2), ('b', 1), (NULL, 2)
# test zero length
query TT
SELECT array_extract('🦆ab', 4), array_extract('abc', 4)
----
(empty) (empty)
# constant offset/length
# normal array_extract
query T
SELECT array_extract(s, 2) FROM strings
----
e
o
(empty)
NULL
# array_extract out of range
query T
SELECT array_extract(s, 3) FROM strings
----
l
r
(empty)
NULL
# variable length offset/length
query T
SELECT array_extract(s, off) FROM strings
----
h
o
b
NULL
query T
SELECT array_extract(s, 2) FROM strings
----
e
o
(empty)
NULL
query T
SELECT array_extract('hello', off) FROM strings
----
h
e
h
e
# test substrings with constant nulls in different places
query T
SELECT array_extract(NULL::VARCHAR, off) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_extract('hello', NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_extract(NULL::VARCHAR, NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_extract(NULL::VARCHAR, off) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_extract(NULL::VARCHAR, NULL) FROM strings
----
NULL
NULL
NULL
NULL
# negative offset
query T
SELECT array_extract(s, -1) FROM strings
----
o
d
b
NULL
# zero offset
query T
SELECT array_extract(s, 1) FROM strings
----
h
w
b
NULL
# length 0
query T
SELECT array_extract(s, 6) FROM strings
----
(empty)
(empty)
(empty)
NULL
# very large offset and length
query T
SELECT array_extract(s, 2147483646) FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT array_extract(s, -2147483647) FROM strings
----
(empty)
(empty)
(empty)
NULL
# Issue #4978 -- Substring overflow 3
query I
SELECT list_extract('1', -1);
----
1
statement error
SELECT array_extract('1', 9223372036854775807);
----
Out of Range Error: Substring offset outside of supported range (> 4294967295)
statement error
SELECT array_extract('0', -9223372036854775808);
----
Out of Range Error: Substring offset outside of supported range (< -4294967296)

View File

@@ -0,0 +1,88 @@
# name: test/sql/function/string/test_ascii.test
# description: test ascii() and chr() functions
# group: [string]
statement ok
PRAGMA enable_verification
# Some ascii checks
query I
SELECT ascii('x')
----
120
query I
SELECT ASCII('a')
----
97
query I
SELECT ASCII('ABC')
----
65
query I
SELECT ASCII('Ω')
----
937
query I
SELECT ASCII('ΩΩ')
----
937
query I
SELECT ASCII('Ä')
----
196
query I
SELECT ASCII('5')
----
53
query I
SELECT ASCII(NULL)
----
NULL
statement error
SELECT ASCII()
----
<REGEX>:.*Binder Error: No function matches.*
query T
SELECT CHR(97)
----
a
query T
SELECT CHR(196)
----
Ä
query T
SELECT CHR(937)
----
Ω
query T
SELECT CHR(NULL)
----
NULL
statement error
SELECT CHR(-10)
----
<REGEX>:.*Invalid Input Error: Invalid UTF8.*
statement error
SELECT CHR(1073741824)
----
<REGEX>:.*Invalid Input Error: Invalid UTF8.*
statement error
SELECT CHR()
----
<REGEX>:.*Binder Error: No function matches.*

View File

@@ -0,0 +1,188 @@
# name: test/sql/function/string/test_bar.test
# description: BAR test
# group: [string]
statement ok
pragma enable_verification
query I
select bar(x * x, 0, 100) from range(0, 11) t(x)
----
query I
select bar(9, 10, 20)
----
query I
select bar(120, -10, 100, 10)
----
query I
select bar(40, 20, 0)
----
query I
select bar(100, 200, 0)
----
query I
select bar(-10, 20, 0)
----
query I
select bar('nan'::double, 0, 10)
----
query I
select bar('infinity'::double, 0, 10)
----
query I
select bar('-infinity'::double, 0, 10)
----
query I
select bar(null, 0, 10)
----
NULL
query I
select bar(1, 'nan'::double, 10)
----
statement error
select bar(1, '-infinity'::double, 10)
----
query I
select bar(1, 'infinity'::double, 10)
----
query I
select bar(1, null, 10)
----
NULL
query I
select bar(1, 0, 'nan'::double)
----
query I
select bar(1, 0, '-infinity'::double)
----
query I
select bar(1, 0, 'infinity'::double)
----
statement error
select bar(1, 0, 10, 'nan'::double)
----
statement error
select bar(1, 0, 10, 'infinity'::double)
----
statement error
select bar(1, 0, 10, '-infinity'::double)
----
query I
select bar(1, 0, 10, 1000)
----
statement error
select bar(1, 0, 10, 1001)
----
query I
select bar(1, 0, 10, 1)
----
query I
select bar(10, 10, 10, 10)
----
statement error
select bar(1, 0, 10, 0.99)
----
query I
select bar(1, 0, 1, 1.125)
----
query I
select bar(1, 0, 1, 1.25)
----
query I
select bar(1, 0, 1, 1.375)
----
query I
select bar(1, 0, 1, 1.5)
----
query I
select bar(1, 0, 1, 1.625)
----
query I
select bar(1, 0, 1, 1.75)
----
query I
select bar(1, 0, 1, 1.875)
----
query I
select bar(1, 0, 1, 2)
----
query I
select bar(1, 0, 1, width) from (values (1), (1.125), (1.25), (1.375), (1.5), (1.625), (1.75), (1.875), (2)) as _(width);
----

View File

@@ -0,0 +1,54 @@
# name: test/sql/function/string/test_bit_length.test
# description: BIT_LENGTH test
# group: [string]
statement ok
PRAGMA enable_verification
# test on scalars
query IIIIII
select BIT_LENGTH(NULL), BIT_LENGTH(''), BIT_LENGTH('$'), BIT_LENGTH('¢'), BIT_LENGTH(''), BIT_LENGTH('𐍈')
----
NULL 0 8 16 24 32
# test on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('', 'Zero'), ('$', NULL), ('¢','Two'), ('', NULL), ('𐍈','Four')
query I
select BIT_LENGTH(a) FROM strings
----
0
8
16
24
32
query I
select BIT_LENGTH(b) FROM strings
----
32
NULL
24
NULL
32
query I
select BIT_LENGTH(a) FROM strings WHERE b IS NOT NULL
----
0
16
32
# test incorrect usage
statement error
select BIT_LENGTH()
----
statement error
select BIT_LENGTH(1, 2)
----

View File

@@ -0,0 +1,76 @@
# name: test/sql/function/string/test_caseconvert.test
# description: UPPER/LOWER test
# group: [string]
statement ok
PRAGMA enable_verification
# unicode
query TTTT
select UPPER('áaaá'), UPPER('ö'), LOWER(''), UPPER('ω')
----
ÁAAÁ Ö Ω
# greek
query TT
SELECT UPPER('Αα Ββ Γγ Δδ Εε Ζζ Ηη Θθ Ιι Κκ Λλ Μμ Νν Ξξ Οο Ππ Ρρ Σσς Ττ Υυ Φφ Χχ Ψψ Ωω'), LOWER('Αα Ββ Γγ Δδ Εε Ζζ Ηη Θθ Ιι Κκ Λλ Μμ Νν Ξξ Οο Ππ Ρρ Σσς Ττ Υυ Φφ Χχ Ψψ Ωω')
----
ΑΑ ΒΒ ΓΓ ΔΔ ΕΕ ΖΖ ΗΗ ΘΘ ΙΙ ΚΚ ΛΛ ΜΜ ΝΝ ΞΞ ΟΟ ΠΠ ΡΡ ΣΣΣ ΤΤ ΥΥ ΦΦ ΧΧ ΨΨ ΩΩ αα ββ γγ δδ εε ζζ ηη θθ ιι κκ λλ μμ νν ξξ οο ππ ρρ σσς ττ υυ φφ χχ ψψ ωω
# test upper/lower on scalar values
query TTTT
select UPPER(''), UPPER('hello'), UPPER('MotörHead'), UPPER(NULL)
----
(empty) HELLO MOTÖRHEAD NULL
query TTTT
select LOWER(''), LOWER('hello'), LOWER('MotörHead'), LOWER(NULL)
----
(empty) hello motörhead NULL
# test ucase/lcase on scalar values
query TTTT
select UCASE(''), UCASE('hello'), UCASE('MotörHead'), UCASE(NULL)
----
(empty) HELLO MOTÖRHEAD NULL
query TTTT
select LCASE(''), LCASE('hello'), LCASE('MotörHead'), LCASE(NULL)
----
(empty) hello motörhead NULL
# test on entire tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks')
query TT
select UPPER(a), UCASE(a) FROM strings
----
HELLO HELLO
HULLD HULLD
MOTÖRHEAD MOTÖRHEAD
query TT
select LOWER(a), LCASE(a) FROM strings
----
hello hello
hulld hulld
motörhead motörhead
query TT
select LOWER(b), LCASE(b) FROM strings
----
world world
NULL NULL
räcks räcks
# test with selection vector
query TTTT
select UPPER(a), LOWER(a), UCASE(a), LCASE(a) FROM strings WHERE b IS NOT NULL
----
HELLO hello HELLO hello
MOTÖRHEAD motörhead MOTÖRHEAD motörhead

View File

@@ -0,0 +1,101 @@
# name: test/sql/function/string/test_complex_unicode.test
# description: Test correct behavior of various string functions under complex unicode characters
# group: [string]
statement ok
PRAGMA enable_verification
# length_grapheme returns the number of grapheme clusters
query I
SELECT length_grapheme('S̈a')
----
2
query I
SELECT length_grapheme('🤦🏼‍♂️')
----
1
query I
SELECT length_grapheme('🤦🏼‍♂️ L🤦🏼R 🤦🏼‍♂️')
----
7
# length returns the number of unicode codepoints
query I
SELECT length('S̈a')
----
3
query I
SELECT length('🤦🏼‍♂️')
----
5
query I
SELECT length('🤦🏼‍♂️ L🤦🏼R 🤦🏼‍♂️')
----
19
# strlen returns size in bytes
query I
SELECT strlen('🤦🏼‍♂️')
----
17
query I
SELECT strlen('S̈a')
----
4
# reverse with grapheme clusters
query T
SELECT REVERSE('S̈a')
----
a
query T
SELECT REVERSE('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢')
----
A̴̵̜̰͔ͫ͗͢Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍
query T
SELECT REVERSE('🤦🏼‍♂️')
----
🤦🏼
query T
SELECT REVERSE('🤦🏼‍♂️ L🤦🏼R 🤦🏼‍♂️')
----
🤦🏼 R🤦🏼L 🤦🏼
query T
SELECT REVERSE('MotörHead')
----
daeHrötoM
# substring with grapheme clusters
query T
SELECT substring_grapheme('🤦🏼‍♂️🤦🏼‍♂️🤦🏼‍♂️', 1, 1)
----
🤦🏼
query T
SELECT substring_grapheme('S̈a', 2, 1)
----
a
query T
SELECT substring_grapheme('test: 🤦🏼hello🤦🏼 world', 7, 7)
----
🤦🏼hello🤦🏼
query T
SELECT substring_grapheme('S̈a', 1, 1)
----
query T
SELECT substring_grapheme('S̈a', -1, 1)
----
a

View File

@@ -0,0 +1,83 @@
# name: test/sql/function/string/test_concat.test
# description: Test concat function
# group: [string]
statement ok
SET default_null_order='nulls_first';
# Test Case disclaimer
# Assertions built using the Domain Testing technique
# at: https://bbst.courses/wp-content/uploads/2018/01/Kaner-Intro-to-Domain-Testing-2018.pdf
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello'), ('world'), (NULL)
# normal concat
query T
SELECT s || ' ' || s FROM strings ORDER BY s
----
NULL
hello hello
world world
# unicode concat
query T
SELECT s || ' ' || '🦆' FROM strings ORDER BY s
----
NULL
hello 🦆
world 🦆
query T
SELECT s || ' ' || '🦆' FROM strings ORDER BY s
----
NULL
hello 🦆
world 🦆
# concat with constant NULL
query T
SELECT s || ' ' || '🦆' || NULL FROM strings ORDER BY s
----
NULL
NULL
NULL
# concat requires at least one argument
statement error
SELECT CONCAT()
----
# concat with one argument works
query T
SELECT CONCAT('hello')
----
hello
# varargs concat
query T
SELECT CONCAT('hello', 33, 22)
----
hello3322
# CONCAT ignores null values
query T
SELECT CONCAT('hello', 33, NULL, 22, NULL)
----
hello3322
# this also applies to non-constant null values
query T
SELECT CONCAT('hello', ' ', s) FROM strings ORDER BY s
----
hello
hello hello
hello world

View File

@@ -0,0 +1,79 @@
# name: test/sql/function/string/test_concat_binding.test
# description: Test the binding of the concat function
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
SET default_null_order='nulls_first';
query I
select [1] || [2];
----
[1, 2]
query I
select [1] || NULL;
----
NULL
query I
select list_concat([1], NULL);
----
[1]
query I
SELECT CONCAT('hello')
----
hello
query I
select array[1] || array[2];
----
[1, 2]
query I
select array[1] || array[NULL];
----
[1, NULL]
query I
select list_concat(array[1], array[NULL]);
----
[1, NULL]
query I
select array[1] || cast(NULL as int array);
----
NULL
statement error
select concat([1], 'hello');
----
Binder Error: Cannot concatenate types INTEGER[] and VARCHAR
statement error
SELECT list_concat([1, 2], ['3', '4'])
----
Binder Error: Cannot concatenate lists of types INTEGER[] and VARCHAR[]
statement error
SELECT list_concat([1, 2], 4)
----
Binder Error: No function matches the given name and argument types 'list_concat(INTEGER[], INTEGER_LITERAL)'. You might need to add explicit type casts.
query I
select 'hi' || NULL;
----
NULL
query I
select list_concat([1], [2], [3]);
----
[1, 2, 3]
query I
select [1] || [2] || [3];
----
[1, 2, 3]

View File

@@ -0,0 +1,64 @@
# name: test/sql/function/string/test_concat_function.test
# description: CONCAT test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks')
query T
select CONCAT(a, 'SUFFIX') FROM strings
----
HelloSUFFIX
HuLlDSUFFIX
MotörHeadSUFFIX
query T
select CONCAT('PREFIX', b) FROM strings
----
PREFIXWorld
PREFIX
PREFIXRÄcks
query T
select CONCAT(a, b) FROM strings
----
HelloWorld
HuLlD
MotörHeadRÄcks
query T
select CONCAT(a, b, 'SUFFIX') FROM strings
----
HelloWorldSUFFIX
HuLlDSUFFIX
MotörHeadRÄcksSUFFIX
query T
select CONCAT(a, b, a) FROM strings
----
HelloWorldHello
HuLlDHuLlD
MotörHeadRÄcksMotörHead
query T
select CONCAT('1', '2', '3', '4', '5', '6', '7', '8', '9', '0')
----
1234567890
# concat a long string
query TT
select '1234567890' || '1234567890', '1234567890' || NULL
----
12345678901234567890 NULL
query TT
select CONCAT('1234567890', '1234567890'), CONCAT('1234567890', NULL)
----
12345678901234567890 1234567890

View File

@@ -0,0 +1,160 @@
# name: test/sql/function/string/test_concat_ws.test
# description: CONCAT_WS test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks')
query T
select CONCAT_WS(',',a, 'SUFFIX') FROM strings
----
Hello,SUFFIX
HuLlD,SUFFIX
MotörHead,SUFFIX
query T
select CONCAT_WS('@','PREFIX', b) FROM strings
----
PREFIX@World
PREFIX
PREFIX@RÄcks
query T
select CONCAT_WS('$',a, b) FROM strings
----
Hello$World
HuLlD
MotörHead$RÄcks
query T
select CONCAT_WS(a, b, 'SUFFIX') FROM strings
----
WorldHelloSUFFIX
SUFFIX
RÄcksMotörHeadSUFFIX
query T
select CONCAT_WS(a, b, b) FROM strings
----
WorldHelloWorld
(empty)
RÄcksMotörHeadRÄcks
query T
select CONCAT_WS('@','1', '2', '3', '4', '5', '6', '7', '8', '9')
----
1@2@3@4@5@6@7@8@9
query T
select CONCAT_WS(b, '[', ']') FROM strings ORDER BY a
----
[World]
NULL
[RÄcks]
# filters
query T
select CONCAT_WS(',', a, 'SUFFIX') FROM strings WHERE a != 'Hello'
----
HuLlD,SUFFIX
MotörHead,SUFFIX
# concat WS needs at least two parameters
statement error
select CONCAT_WS()
----
statement error
select CONCAT_WS(',')
----
# one entry: just returns the entry
query T
select CONCAT_WS(',', 'hello')
----
hello
# NULL in separator results in null
query T
select CONCAT_WS(NULL, 'hello')
----
NULL
# NULL in data results in empty string
query T
select CONCAT_WS(',', NULL)
----
(empty)
# NULL separator returns in entire column being NULL
query T
select CONCAT_WS(NULL, b, 'SUFFIX') FROM strings
----
NULL
NULL
NULL
# NULL in separator is just ignored
query T
select CONCAT_WS(',', NULL, 'SUFFIX') FROM strings
----
SUFFIX
SUFFIX
SUFFIX
# empty strings still get split up by the separator
query T
select CONCAT_WS(',', '', '')
----
,
query T
select CONCAT_WS(',', '', '', '')
----
,,
# but NULLs do not
query T
select CONCAT_WS(',', NULL, NULL)
----
(empty)
query T
select CONCAT_WS(',', NULL, NULL, NULL)
----
(empty)
query T
select CONCAT_WS(',', NULL, NULL, 'hello')
----
hello
# now test for non-constant separators
query T
select CONCAT_WS(a, '', NULL, '') FROM strings ORDER BY a
----
Hello
HuLlD
MotörHead
query T
select CONCAT_WS(a, NULL, '', '') FROM strings ORDER BY a;
----
Hello
HuLlD
MotörHead
# now non-constant separator with a mix of constant and non-constant strings to concatenate
query T
select CONCAT_WS(a, NULL, b, '') FROM strings ORDER BY a
----
WorldHello
(empty)
RÄcksMotörHead

View File

@@ -0,0 +1,152 @@
# name: test/sql/function/string/test_contains.test
# description: Contains test
# group: [string]
statement ok
PRAGMA enable_verification
# contains of various lengths
query IIIIIIIIII
SELECT CONTAINS('hello world', 'h'),
CONTAINS('hello world', 'he'),
CONTAINS('hello world', 'hel'),
CONTAINS('hello world', 'hell'),
CONTAINS('hello world', 'hello'),
CONTAINS('hello world', 'hello '),
CONTAINS('hello world', 'hello w'),
CONTAINS('hello world', 'hello wo'),
CONTAINS('hello world', 'hello wor'),
CONTAINS('hello world', 'hello worl')
----
1 1 1 1 1 1 1 1 1 1
query IIIIIIIIII
SELECT CONTAINS('hello world', 'a'),
CONTAINS('hello world', 'ha'),
CONTAINS('hello world', 'hea'),
CONTAINS('hello world', 'hela'),
CONTAINS('hello world', 'hella'),
CONTAINS('hello world', 'helloa'),
CONTAINS('hello world', 'hello a'),
CONTAINS('hello world', 'hello wa'),
CONTAINS('hello world', 'hello woa'),
CONTAINS('hello world', 'hello wora')
----
0 0 0 0 0 0 0 0 0 0
# empty contains
query III
select contains('hello', ''), contains('', ''), contains(NULL, '')
----
1 1 NULL
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1, 2), ('world', 2, 3), ('b', 1, 1), (NULL, 2, 2)
# Test first letter
query T
SELECT contains(s,'h') FROM strings
----
1
0
0
NULL
# Test second letter
query T
SELECT contains(s,'e') FROM strings
----
1
0
0
NULL
# Test last letter
query T
SELECT contains(s,'d') FROM strings
----
0
1
0
NULL
# Test multiple letters
query T
SELECT contains(s,'he') FROM strings
----
1
0
0
NULL
# Test multiple letters in the middle
query T
SELECT contains(s,'ello') FROM strings
----
1
0
0
NULL
# Test multiple letters at the end
query T
SELECT contains(s,'lo') FROM strings
----
1
0
0
NULL
# Test no match
query T
SELECT contains(s,'he-man') FROM strings
----
0
0
0
NULL
# Test matching needle in multiple rows
query T
SELECT contains(s,'o') FROM strings
----
1
1
0
NULL
# Test NULL constant in different places
query T
SELECT contains(NULL,'o') FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT contains(s,NULL) FROM strings
----
NULL
NULL
NULL
NULL
statement error
SELECT contains(NULL,NULL) FROM strings
----
Binder Error: Could not choose a best candidate function
# Test empty pattern
query T
SELECT contains(s,'') FROM strings
----
1
1
1
NULL

View File

@@ -0,0 +1,85 @@
# name: test/sql/function/string/test_contains_utf8.test
# description: Contains test with UTF8
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('átomo')
statement ok
INSERT INTO strings VALUES ('olá mundo')
statement ok
INSERT INTO strings VALUES ('你好世界')
statement ok
INSERT INTO strings VALUES ('two ñ three ₡ four 🦆 end')
# Test one matching UTF8 letter
query T
SELECT contains(s,'á') FROM strings
----
1
1
0
0
# Test a sentence with an UTF-8
query T
SELECT contains(s,'olá mundo') FROM strings
----
0
1
0
0
# Test an entire UTF-8 word
query T
SELECT contains(s,'你好世界') FROM strings
----
0
0
1
0
# Test a substring of the haystack from the beginning
query T
SELECT contains(s,'two ñ thr') FROM strings
----
0
0
0
1
# Test a single UTF8 substring of the haystack in the middle
query T
SELECT contains(s,'ñ') FROM strings
----
0
0
0
1
# Test a multiple UTF8 substring of the haystack in the middle
query T
SELECT contains(s,'₡ four 🦆 e') FROM strings
----
0
0
0
1
# Test a substring of the haystack from the middle to the end
query T
SELECT contains(s,'🦆 end') FROM strings
----
0
0
0
1

View File

@@ -0,0 +1,361 @@
# name: test/sql/function/string/test_damerau_levenshtein.test
# description: Test damerau_levenshtein function
# group: [string]
statement ok
PRAGMA enable_verification
# normal queries
query I
SELECT damerau_levenshtein('out', 'out')
----
0
query I
SELECT damerau_levenshtein('three', 'there')
----
1
query I
SELECT damerau_levenshtein('potion', 'option')
----
1
query I
SELECT damerau_levenshtein('letter', 'lettre')
----
1
query I
SELECT damerau_levenshtein('three', 'there')
----
1
query I
SELECT damerau_levenshtein('out', 'to')
----
2
query I
SELECT damerau_levenshtein('to', 'out')
----
2
query I
SELECT damerau_levenshtein('laos', 'also')
----
2
query I
SELECT damerau_levenshtein('tomato', 'otamot')
----
3
query I
SELECT damerau_levenshtein('abcdefg', 'bacedgf')
----
3
query I
SELECT damerau_levenshtein('abcdefg', 'bacedgf')
----
3
query I
SELECT damerau_levenshtein('abcdefghi', 'bzacdefig')
----
4
query I
SELECT damerau_levenshtein('bzacdefig', 'abcdefghi')
----
4
query I
SELECT damerau_levenshtein('at', 'tarokk')
----
5
query I
SELECT damerau_levenshtein('tarokk', 'at')
----
5
query I
SELECT damerau_levenshtein('organ', 'no')
----
4
query I
SELECT damerau_levenshtein('trips', 'strip')
----
2
query I
SELECT damerau_levenshtein('cat', 'cats')
----
1
query I
SELECT damerau_levenshtein('rat', 'brat')
----
1
query I
SELECT damerau_levenshtein('amanaplanacanalpanama', 'm23aanaplancaanaalnama')
----
6
query I
SELECT damerau_levenshtein('', 'great')
----
5
query I
SELECT damerau_levenshtein('great', '')
----
5
query I
SELECT damerau_levenshtein('', '')
----
0
query I
SELECT damerau_levenshtein(NULL, 'drive')
----
NULL
query I
SELECT damerau_levenshtein('drive', NULL)
----
NULL
query I
SELECT damerau_levenshtein(NULL, NULL)
----
NULL
query I
SELECT damerau_levenshtein('', NULL)
----
NULL
query I
SELECT damerau_levenshtein(NULL, '')
----
NULL
statement error
SELECT damerau_levenshtein('one', 'two', 'three')
----
statement error
SELECT damerau_levenshtein('one')
----
statement error
SELECT damerau_levenshtein()
----
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('here'), ('heres'), ('there'), ('three'), ('threes')
query I
SELECT damerau_levenshtein(s, 'theres') FROM strings ORDER BY s
----
2
1
1
2
1
query I
SELECT damerau_levenshtein('herse', s) FROM strings ORDER BY s
----
1
1
2
3
3
query I
SELECT damerau_levenshtein(NULL, s) FROM strings
----
NULL
NULL
NULL
NULL
NULL
query I
SELECT damerau_levenshtein(NULL, s) FROM strings
----
NULL
NULL
NULL
NULL
NULL
query I
SELECT damerau_levenshtein('', s) FROM strings ORDER BY s
----
4
5
5
5
6
query I
SELECT damerau_levenshtein(s, '') FROM strings ORDER BY s
----
4
5
5
5
6
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES (NULL)
query I
SELECT damerau_levenshtein(s, NULL) from strings
----
NULL
query I
SELECT damerau_levenshtein(NULL, s) from strings
----
NULL
query I
SELECT damerau_levenshtein('test', s) from strings
----
NULL
query I
SELECT damerau_levenshtein(s, 'test') from strings
----
NULL
query I
SELECT damerau_levenshtein('null', s) from strings
----
NULL
query I
SELECT damerau_levenshtein('', s) FROM strings
----
NULL
query I
SELECT damerau_levenshtein(s, '') FROM strings
----
NULL
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('')
query I
SELECT damerau_levenshtein(NULL, s) FROM strings
----
NULL
query I
SELECT damerau_levenshtein(s, NULL) FROM strings
----
NULL
query I
SELECT damerau_levenshtein(s, '') FROM strings
----
0
query I
SELECT damerau_levenshtein('', s) FROM strings
----
0
query I
SELECT damerau_levenshtein(s, 'test') FROM strings
----
4
query I
SELECT damerau_levenshtein('test', s) FROM strings
----
4
query I
SELECT damerau_levenshtein('null', s) FROM strings
----
4
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s_left VARCHAR, s_right VARCHAR)
statement ok
INSERT INTO strings VALUES ('identical', 'identical'), ('dientical', 'identical'),
('dinetcila', 'identical'), ('abcdefghijk', 'bacdfzzeghki'),
('abcd', 'bcda'), ('great', 'greta'),
('abcdefghijklmnopqrstuvwxyz', 'abdcpoxwz'),
('a_considerably_longer_string', 'a_ocnsiderably_longre_tsrig'),
('another-quite-long-string', 'naothre-quit-elongstrnig'),
('littlehampton', 'littlerhamptoner'),
('an_incredibly_long_string_to_compare', 'na_incerdibl_ylong_sr56ting_ot_ocmrpe'),
('smaller', 'notsmaller,longer'),
('againalongerstring', 'string'),
(NULL, NULL), ('', ''),
(NULL, 'test'), ('test', NULL),
('four', ''), ('', 'four'),
(NULL, ''), ('', NULL)
query I
SELECT damerau_levenshtein(s_left, s_right) FROM strings
----
0
1
4
6
2
1
20
4
5
3
10
10
12
NULL
0
NULL
NULL
4
4
NULL
NULL

View File

@@ -0,0 +1,127 @@
# name: test/sql/function/string/test_format.test
# description: Test format
# group: [string]
statement ok
PRAGMA enable_verification
# format without format specifiers
query TT
SELECT format('hello'), format(NULL)
----
hello NULL
# format strings
query TT
SELECT format('{}', 'hello'), format('{}: {}', 'hello', 'world')
----
hello hello: world
# format strings with NULL values
query TT
SELECT format('{}', NULL), format(NULL, 'hello', 'world')
----
NULL NULL
# booleans
query T
SELECT format('{} {}', TRUE, FALSE)
----
true false
# integers
query TT
SELECT format('{}', 33), format('{} + {} = {}', 3, 5, 3 + 5)
----
33 3 + 5 = 8
# exotic types
# dates, times and timestamps are strings
query T
SELECT format('{} {} = {}', DATE '1992-01-01', TIME '12:01:00', TIMESTAMP '1992-01-01 12:01:00')
----
1992-01-01 12:01:00 = 1992-01-01 12:01:00
# blob
query T
SELECT format('{}', BLOB '\x00hello')
----
\x00hello
# hugeint
query T
SELECT format('{}', 120381902481294715712::HUGEINT)
----
120381902481294715712
# uhugeint
query T
SELECT format('{}', 120381902481294715712::UHUGEINT)
----
120381902481294715712
# decimal
query T
SELECT format('{:.3f}', '1.234'::DECIMAL)
----
1.234
# integers with special formatting specifiers
query TTT
SELECT format('{:04d}', 33), format('{} {:02d}:{:02d}:{:02d} {}', 'time', 12, 3, 16, 'AM'), format('{:10d}', 1992)
----
0033 time 12:03:16 AM 1992
# numeric input of arguments
query T
SELECT format('{1} {1} {0} {0}', 1, 2)
----
2 2 1 1
# hexadecimal
query I
select format('{:x}', 123456789)
----
75bcd15
# binary
query I
select format('{:b}', 123456789)
----
111010110111100110100010101
query I
select format('{:.2}', 0.00023404094995959);
----
0.00023
query I
select format('{:.3}', 0.0);
----
0.00
# incorrect number of parameters
# too few parameters
statement error
SELECT format('{}')
----
statement error
SELECT format('{} {}', 'hello')
----
# excess parameters are ignored
query T
SELECT format('{}', 'hello', 'world')
----
hello
# incorrect types
statement error
SELECT format('{:s}', 42)
----
statement error
SELECT format('{:d}', 'hello')
----

View File

@@ -0,0 +1,145 @@
# name: test/sql/function/string/test_format_extensions.test
# description: Test format extensions
# group: [string]
statement ok
PRAGMA enable_verification
# thousands separator
query I
select printf('%,d', 123456789)
----
123,456,789
query I
select format('{:d}', 123456789)
----
123456789
query I
select printf('%,d', 123456789123456789123456789::HUGEINT)
----
123,456,789,123,456,789,123,456,789
# other supported thousand separators
query I
select printf('%.d', 123456789)
----
123.456.789
query I
select printf('%.d', -123456789123456789123456789::HUGEINT)
----
-123.456.789.123.456.789.123.456.789
query I
select printf('%_d', 123456789)
----
123_456_789
query I
select printf('%''d', 123456789)
----
123'456'789
query I
select printf('%.0d', 123456789)
----
123456789
# prints a thousands separator as well
query I
select format('{:,}', 123456789)
----
123,456,789
query I
select format('{:_}', 123456789)
----
123_456_789
query I
select format('{:''}', 123456789)
----
123'456'789
query I
select format('{:,}', 123456789123456789123456789::UHUGEINT)
----
123,456,789,123,456,789,123,456,789
# custom thousand separator
query I
select format('{:t }', 123456789)
----
123 456 789
query I
select format('{:t|}', 123456789)
----
123|456|789
query I
select format('{:tss}', 123456789)
----
123s456s789
# thousands separator for floats
query I
select format('{:,}', 123456789.123)
----
123,456,789.123
query I
select format('{:_}', 123456789.123)
----
123_456_789.123
query I
select printf('%,.3f', 123456.789::DOUBLE)
----
123,456.789
foreach val 1.724e12 1.7e12 1.72456e12 1.723456e11 1.724567e12 1.234e4
query I
select printf('%,.f', ${val}::double) = printf('%,d', ${val}::bigint);
----
true
endloop
statement error
select format('{:t}', 123456789)
----
query I
select format('{0:d} {0:L}', 123456789)
----
123456789 123456789
statement error
select format('{1}', 123456789)
----
Argument index "1" out of range
statement error
select format('{L}', 123456789)
----
Argument with name
# better error messages
statement error
select printf('%:', 123456789)
----
Invalid type specifier ":"
statement error
select printf('%:', 123456789.123)
----
Invalid type specifier ":"
statement error
select printf('%:', 'str')
----
Invalid type specifier ":"

View File

@@ -0,0 +1,256 @@
# name: test/sql/function/string/test_glob.test
# description: Test GLOB statement
# group: [string]
statement ok
PRAGMA enable_verification
# scalar glob
query T
SELECT 'aaa' GLOB 'bbb'
----
0
query T
SELECT 'aaa' GLOB 'aaa'
----
1
query T
SELECT 'aaa' GLOB '*'
----
1
query T
SELECT 'aaa' GLOB '*a'
----
1
query T
SELECT 'aaa' GLOB '*b'
----
0
query T
SELECT 'aaa' GLOB 'a*'
----
1
query T
SELECT 'aaa' GLOB 'b*'
----
0
query T
SELECT 'aaa' GLOB 'a?a'
----
1
query T
SELECT 'aaa' GLOB 'a?'
----
0
query T
SELECT 'aaa' GLOB '??*'
----
1
query T
SELECT 'aaa' GLOB '????*'
----
0
query T
SELECT 'ababac' GLOB '*abac'
----
1
# bracket matching
query T
SELECT '3' GLOB '[0-9]'
----
1
query T
SELECT 'a' GLOB '[0-9]'
----
0
# multiple brackets in a row
query T
SELECT '012' GLOB '[0-9][0-9][0-9]'
----
1
# trailing in pattern after brackets
query T
SELECT '012' GLOB '[0-9][0-9][0-9]a'
----
0
# trailing in string after brackets
query T
SELECT '012a' GLOB '[0-9][0-9][0-9]'
----
0
# more complicated brackets
query T
SELECT 'b3' GLOB '[abc0-9][abc0-9]'
----
1
query T
SELECT 'd3' GLOB '[abc0-9][abc0-9]'
----
0
# inverse brackets
query T
SELECT 'a' GLOB '[!0-9]'
----
1
query T
SELECT '1' GLOB '[!0-9]'
----
0
# escapes
query T
SELECT '*' GLOB '\*'
----
1
query T
SELECT 'a' GLOB '\*'
----
0
# escaped escape
query T
SELECT '\' GLOB '\\'
----
1
# '
query T
SELECT 'a' GLOB '\\'
----
0
# bracket in a bracket
# this is valid as long as the closing bracket is the first non-exclamation mark character
query T
SELECT '3]' GLOB '[]3][]]'
----
1
query T
SELECT '3]' GLOB '[]3][]]'
----
1
# bracket is not properly closed
query T
SELECT '3' GLOB '[3'
----
0
# trailing range
query T
SELECT '3' GLOB '[3-'
----
0
# trailing escape
query T
SELECT '3' GLOB '\\'
----
0
query T
SELECT '3' GLOB '\'
----
0
# a bunch of asterisks
query T
SELECT '3' GLOB '3***'
----
1
query T
SELECT '1245' GLOB '**1***2*******4*5***'
----
1
query T
SELECT 'aaaaaaaaaaaaaaaaaaaaaaa' GLOB '*a'
----
1
# special characters in brackets
query T
SELECT '?' GLOB '[?]'
----
1
query T
SELECT '3' GLOB '[?]'
----
0
query T
SELECT '*' GLOB '[*]'
----
1
query T
SELECT '3' GLOB '[*]'
----
0
# multiple ranges in a bracket
query T
SELECT '6' GLOB '[1-35-7]'
----
1
query T
SELECT '4' GLOB '[1-35-7]'
----
0
# trailing bracket
query T
SELECT '4' GLOB '['
----
0
# like with table
statement ok
CREATE TABLE strings(s STRING, pat STRING);
statement ok
INSERT INTO strings VALUES ('abab', 'ab*'), ('aaa', 'a?a'), ('aaa', '*b*')
query T
SELECT s FROM strings WHERE s GLOB 'ab*'
----
abab
query T
SELECT s FROM strings WHERE 'aba' GLOB pat
----
abab
aaa
aaa
query T
SELECT s FROM strings WHERE s GLOB pat
----
abab
aaa

View File

@@ -0,0 +1,150 @@
# name: test/sql/function/string/test_ilike.test
# description: Test ILIKE statement
# group: [string]
statement ok
PRAGMA enable_verification
# scalar like
query T
SELECT 'aaa' ILIKE 'bbb'
----
0
query T
SELECT 'aaa' ILIKE 'aAa'
----
1
query T
SELECT 'aaa' ILIKE '%'
----
1
query T
SELECT 'aaa' ILIKE '%A'
----
1
query T
SELECT 'aaa' ILIKE '%b'
----
0
query T
SELECT 'aaa' ILIKE 'A%'
----
1
query T
SELECT 'aaa' ILIKE 'b%'
----
0
query T
SELECT 'aaa' ILIKE 'A_a'
----
1
query T
SELECT 'aaa' ILIKE 'a_'
----
0
query T
SELECT 'aaa' ILIKE '__%'
----
1
query T
SELECT 'aaa' ILIKE '____%'
----
0
query T
SELECT 'ababac' ILIKE '%abac'
----
1
query T
SELECT 'ababac' ILIKE '%%%aBac'
----
1
query T
SELECT 'ababac' ILIKE 'abab%%%%%'
----
1
query T
SELECT 'ababac' ILIKE '%%%a%%%b%%a%b%%%%%a%c%%'
----
1
query T
SELECT 'ababac' ILIKE '%%%a%%%b%%a%b%%%%%a%d%%'
----
0
query T
SELECT 'ababac' NOT ILIKE '%Abac'
----
0
# like with table
statement ok
CREATE TABLE strings(s STRING, pat STRING);
statement ok
INSERT INTO strings VALUES ('abab', 'Ab%'), ('aaa', 'A_a'), ('aaa', '%b%')
query T
SELECT s FROM strings WHERE s LIKE 'ab%'
----
abab
query T
SELECT s FROM strings WHERE 'aba' ILIKE pat
----
abab
aaa
aaa
query T
SELECT s FROM strings WHERE 'aba' NOT ILIKE pat
----
query T
SELECT s FROM strings WHERE s ILIKE pat
----
abab
aaa
query T
SELECT s FROM strings WHERE s NOT ILIKE pat
----
aaa
# unicode
query T
SELECT 'MÜHLEISEN' ILIKE 'mühleisen'
----
1
statement ok
CREATE TABLE unicode_strings(s STRING, pat STRING);
statement ok
INSERT INTO unicode_strings VALUES ('öäb', 'Ö%B'), ('aaÄ', 'A_ä'), ('aaa', '%b%')
query T
SELECT s FROM unicode_strings WHERE s ILIKE pat
----
öäb
aaÄ
query T
SELECT s FROM unicode_strings WHERE s NOT ILIKE pat
----
aaa

View File

@@ -0,0 +1,82 @@
# name: test/sql/function/string/test_ilike_escape.test
# description: Test ILIKE statement with ESCAPE
# group: [string]
statement ok
PRAGMA enable_verification
# scalar ilike
query T
select 'a%c' ilike 'a$%C' escape '$';
----
true
query T
select 'A%C' ilike 'a$%c' escape '$';
----
true
query T
select 'a%c' ilike 'a$%C' escape '/';
----
false
# nulls as parameter
query T
select NULL ilike 'a$%C' escape '/';
----
NULL
query T
select 'a%c' ilike NULL escape '$';
----
NULL
query T
select 'a%c' ilike 'a$%C' escape NULL;
----
NULL
# non-scalar ilike
statement ok
CREATE TABLE tbl(str VARCHAR, pat VARCHAR);
statement ok
INSERT INTO tbl VALUES ('a%c', 'a$%C');
query T
SELECT str ILIKE pat ESCAPE '$' FROM tbl
----
true
query T
SELECT str NOT ILIKE pat ESCAPE '$' FROM tbl
----
false
query T
SELECT NULL ILIKE pat ESCAPE '$' FROM tbl
----
NULL
query T
SELECT str ILIKE NULL ESCAPE '$' FROM tbl
----
NULL
query T
SELECT str ILIKE pat ESCAPE NULL FROM tbl
----
NULL
# multi-byte escape not supported
statement error
select 'a%c' ilike 'a$%C' escape '///';
----
<REGEX>:.*Syntax Error: Invalid escape string.*
# escape must be a constant
statement error
SELECT str ILIKE pat ESCAPE str FROM tbl
----
<REGEX>:.*Syntax Error: Invalid escape string.*

View File

@@ -0,0 +1,127 @@
# name: test/sql/function/string/test_instr.test
# description: Instr test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1, 2), ('world', 2, 3), ('b', 1, 1), (NULL, 2, 2)
# Test first letter
query I
SELECT instr(s,'h') FROM strings
----
1
0
0
NULL
# position is an alias for instr
query I
SELECT position('h' in s) FROM strings
----
1
0
0
NULL
# Test second letter
query I
SELECT instr(s,'e') FROM strings
----
2
0
0
NULL
# Test last letter
query I
SELECT instr(s,'d') FROM strings
----
0
5
0
NULL
# Test multiple letters
query I
SELECT instr(s,'he') FROM strings
----
1
0
0
NULL
query I
SELECT position('he' in s) FROM strings
----
1
0
0
NULL
# Test multiple letters in the middle
query I
SELECT instr(s,'ello') FROM strings
----
2
0
0
NULL
# Test multiple letters at the end
query I
SELECT instr(s,'lo') FROM strings
----
4
0
0
NULL
# Test no match
query I
SELECT instr(s,'he-man') FROM strings
----
0
0
0
NULL
# Test matching needle in multiple rows
query IT
SELECT instr(s,'o'),s FROM strings
----
5 hello
2 world
0 b
NULL NULL
# Test NULL constant in different places
query I
SELECT instr(NULL,'o') FROM strings
----
NULL
NULL
NULL
NULL
query I
SELECT instr(s,NULL) FROM strings
----
NULL
NULL
NULL
NULL
query I
SELECT instr(NULL,NULL) FROM strings
----
NULL
NULL
NULL
NULL

View File

@@ -0,0 +1,93 @@
# name: test/sql/function/string/test_instr_utf8.test
# description: Instr test with UTF8
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('átomo')
statement ok
INSERT INTO strings VALUES ('olá mundo')
statement ok
INSERT INTO strings VALUES ('你好世界')
statement ok
INSERT INTO strings VALUES ('two ñ three ₡ four 🦆 end')
# Test one matching UTF8 letter
query I
SELECT INSTR(s,'á') FROM strings
----
1
3
0
0
query I
SELECT POSITION('á' in s) FROM strings
----
1
3
0
0
# Test a sentence with UTF-8
query I
SELECT INSTR(s,'olá mundo') FROM strings
----
0
1
0
0
# Test an entire UTF-8 word
query I
SELECT INSTR(s,'你好世界') FROM strings
----
0
0
1
0
# Test a substring of the haystack from the beginning
query I
SELECT instr(s,'two ñ thr') FROM strings
----
0
0
0
1
# Test a single UTF8 substring of the haystack in the middle
query I
SELECT instr(s,'ñ') FROM strings
----
0
0
0
5
# Test a multiple UTF8 substring of the haystack in the middle
query I
SELECT instr(s,'₡ four 🦆 e') FROM strings
----
0
0
0
13
# Test a substring of the haystack from the middle to the end
query I
SELECT instr(s,'🦆 end') FROM strings
----
0
0
0
20

View File

@@ -0,0 +1,58 @@
# name: test/sql/function/string/test_issue_1812.test
# description: Test LIKE statement
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t (str VARCHAR);
statement ok
INSERT INTO t VALUES ('hello1'), ('hello2'), ('hello3'), ('world1'), ('world2'), ('world3');
query I
SELECT COUNT(*) FROM t WHERE str LIKE '%o%'; -- 6
----
6
query I
SELECT COUNT(*) FROM t WHERE str LIKE '%rld%'; -- 3
----
3
query I
SELECT COUNT(*) FROM t WHERE str LIKE '%o%' OR (str LIKE '%o%' AND str LIKE '%rld%');
----
6
query I
SELECT COUNT(*) FROM t
WHERE (str LIKE '%o%' AND str LIKE '%rld%')
OR str LIKE '%o%';
----
6
query I
SELECT COUNT(*) FROM t
WHERE (str LIKE '%o%' AND str LIKE '%rld%')
OR (str LIKE '%o%')
OR (str LIKE '%o%');
----
6
query I
SELECT COUNT(*) FROM t
WHERE (str LIKE '%o%' AND str LIKE '%rld%')
OR (str LIKE '%o%')
OR (str LIKE '%o%' AND str LIKE 'blabla%');
----
6
query I
SELECT COUNT(*) FROM t
WHERE (str LIKE '%o%' AND str LIKE '%1%')
OR (str LIKE '%o%' AND str LIKE '%1%' AND str LIKE 'blabla%')
OR (str LIKE '%o%' AND str LIKE '%1%' AND str LIKE 'blabla2%')
----
2

View File

@@ -0,0 +1,149 @@
# name: test/sql/function/string/test_jaccard.test
# description: Test jaccard function
# group: [string]
statement ok
PRAGMA enable_verification
# jaccard
query I
SELECT jaccard('hello', 'hello')
----
1
query I
SELECT jaccard('hello', NULL)
----
NULL
query I
SELECT jaccard(NULL, 'hello')
----
NULL
query I
SELECT jaccard(NULL, NULL)
----
NULL
query I
SELECT jaccard('ab', 'aabb')
----
1.0
query I
SELECT jaccard('aabb', 'ab')
----
1.0
query I
SELECT jaccard('ab', 'cd')
----
0.0
query I
SELECT jaccard('cd', 'ab')
----
0.0
query I
SELECT round(jaccard('ab', 'aabbcc'), 3)
----
0.667
query I
SELECT round(jaccard('aabbcc', 'ab'), 3)
----
0.667
query I
SELECT round(jaccard('aabbccddeeff', 'ab'), 3)
----
0.333
query I
SELECT round(jaccard('ab', 'aabbccddeeff'), 3)
----
0.333
query I
SELECT round(jaccard('aabbccddeeffg', 'ab'), 3)
----
0.286
query I
SELECT round(jaccard('ab', 'aaaabbbccddeeffgabcccc'), 3)
----
0.286
query I
SELECT round(jaccard('ababababababba', 'aaaabbbccddeeffgabcccc'), 3)
----
0.286
statement error
SELECT jaccard('hello', '')
----
statement error
SELECT jaccard('', 'hello')
----
statement error
SELECT jaccard('', '')
----
statement ok
CREATE TABLE strings(s VARCHAR, t VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello', 'hallo'), ('aloha', 'fello'), ('fellow', 'ducks'), (NULL, NULL)
query I
select round(jaccard(s, t), 1) from strings
----
0.6
0.3
0.0
NULL
query I
select round(jaccard(s, 'hallo'), 1) from strings
----
0.6
1.0
0.3
NULL
query I
select round(jaccard('hallo', t), 1) from strings
----
1.0
0.3
0.0
NULL
query I
select round(jaccard(NULL, t), 1) from strings
----
NULL
NULL
NULL
NULL
query I
select round(jaccard(s, NULL), 1) from strings
----
NULL
NULL
NULL
NULL
statement error
select round(jaccard('', t), 1) from strings
----
statement error
select round(jaccard(s, ''), 1) from strings
----

View File

@@ -0,0 +1,210 @@
# name: test/sql/function/string/test_jaro_winkler.test
# description: Test jaro and jaro_winkler function
# group: [string]
statement ok
PRAGMA enable_verification
query T
select jaro_winkler_similarity('CRATE', 'TRACE')
----
0.733333
query T
select jaro_winkler_similarity('DwAyNE', 'DuANE')
----
0.8400000000000001
# stole these from the jaro winkler library
query T
select jaro_winkler_similarity('0', '0')
----
1
query T
select jaro_winkler_similarity('00', '00')
----
1
query T
select jaro_winkler_similarity('0', '00')
----
0.85
query T
select jaro_winkler_similarity('00000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000')
----
1
query T
select jaro_winkler_similarity('0000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000')
----
0.9969
query T
select jaro_winkler_similarity('000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000')
----
0.9938
query T
select jaro_winkler_similarity('10000000000000000000000000000000000000000000000000000000000000020', '00000000000000000000000000000000000000000000000000000000000000000')
----
0.97948
query T
select jaro_winkler_similarity('0000000000000000000000000000000000000000000000000000000000000000000000000000001', '00000000000000100000000000000000000000010000000000000000000000000')
----
0.95333
query T
select jaro_winkler_similarity('01000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000')
----
0.85234
# stole these from the Apache Commons library jaro winkler implementation
query T
select jaro_winkler_similarity(null, null)
----
NULL
query T
select jaro_winkler_similarity('foo', null)
----
NULL
query T
select jaro_winkler_similarity(null, 'foo')
----
NULL
# this is 1.0 in the apache commons library, but 0.0 in our library
query T
select jaro_winkler_similarity('', '')
----
0.0
query T
select jaro_winkler_similarity('foo', 'foo')
----
1.0
query T
select jaro_winkler_similarity('foo', 'foo ')
----
0.9416666666666667
query T
select jaro_winkler_similarity('foo', 'foo ')
----
0.9066666666666667
query T
select jaro_winkler_similarity('foo', ' foo ')
----
0.8666666666666667
query T
select jaro_winkler_similarity('foo', ' foo')
----
0.5111111111111111
query T
select jaro_winkler_similarity('', 'a')
----
0.0
query T
select jaro_winkler_similarity('aaapppp', '')
----
0.0
query T
select jaro_winkler_similarity('frog', 'fog')
----
0.9249999999999999
query T
select jaro_winkler_similarity('fly', 'ant')
----
0.0
query T
select jaro_winkler_similarity('elephant', 'hippo')
----
0.44166666666666665
query T
select jaro_winkler_similarity('hippo', 'elephant')
----
0.44166666666666665
query T
select jaro_winkler_similarity('hippo', 'zzzzzzzz')
----
0.0
query T
select jaro_winkler_similarity('hello', 'hallo')
----
0.88
query T
select jaro_winkler_similarity('ABC Corporation', 'ABC Corp')
----
0.9066666666666666
# this is 0.95 in the apache commons library but 0.8648324514991181 in ours
query T
select jaro_winkler_similarity('D N H Enterprises Inc', 'D &amp; H Enterprises, Inc.')
----
0.8648324514991181
# this is 0.92 in the apache commons library but 0.942 in ours
query T
select jaro_winkler_similarity('My Gym Children''s Fitness Center', 'My Gym. Childrens Fitness')
----
0.942
# this is 0.88 in the apache commons library but 0.8980186480186481 in ours
query T
select jaro_winkler_similarity('PENNSYLVANIA', 'PENNCISYLVNIA')
----
0.8980186480186481
# test score cutoff
query T
select jaro_winkler_similarity('CRATE', 'TRACE', 0.6)
----
0.733333
query T
select jaro_winkler_similarity('CRATE', 'TRACE', 0.8)
----
0.0
query T
select jaro_winkler_similarity('000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000', 0.9)
----
0.9938
query T
select jaro_winkler_similarity('000000000000000000000000000000000000000000000000000000000000000', '00000000000000000000000000000000000000000000000000000000000000000', 0.995)
----
0.0
# test with table just in case
statement ok
create table test as select '0000' || range::varchar s from range(10000);
# results should be the same regardless of the constant being first or second arg!
# because of floating point arithmetic this is off slightly at smaller vector sizes
require vector_size 1024
query T nosort q0
select avg(jaro_winkler_similarity('00000000', s)) from test;
----
query T nosort q0
select avg(jaro_winkler_similarity(s, '00000000')) from test;
----

View File

@@ -0,0 +1,107 @@
# name: test/sql/function/string/test_left.test
# description: LEFT test
# group: [string]
statement ok
PRAGMA enable_verification
foreach FUN LEFT LEFT_GRAPHEME
# test LEFT on positive positions
query TTTTT
SELECT ${FUN}('abcd', 0), ${FUN}('abc', 1), ${FUN}('abc', 2), ${FUN}('abc', 3), ${FUN}('abc', 4)
----
(empty) a ab abc abc
query TTTTT
SELECT ${FUN}('🦆ab', 0), ${FUN}('🦆ab', 1), ${FUN}('🦆ab', 2), ${FUN}('🦆ab', 3), ${FUN}('🦆ab', 4)
----
(empty) 🦆 🦆a 🦆ab 🦆ab
# test LEFT on negative positions
query TTTTT
SELECT ${FUN}('abcd', 0), ${FUN}('abc', -1), ${FUN}('abc', -2), ${FUN}('abc', -3), ${FUN}('abc', -4)
----
(empty) ab a (empty) (empty)
query TTTTT
SELECT ${FUN}('🦆ab', 0), ${FUN}('🦆ab', -1), ${FUN}('🦆ab', -2), ${FUN}('🦆ab', -3), ${FUN}('🦆ab', -4)
----
(empty) 🦆a 🦆 (empty) (empty)
# test LEFT on NULL values
query TTT
SELECT ${FUN}(NULL, 0), ${FUN}('abc', NULL), ${FUN}(NULL, NULL)
----
NULL NULL NULL
query TTT
SELECT ${FUN}(NULL, 0), ${FUN}('🦆ab', NULL), ${FUN}(NULL, NULL)
----
NULL NULL NULL
# test on tables
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES ('abcd', 0), ('abc', 1), ('abc', 2), ('abc', 3), ('abc', 4)
query T
SELECT ${FUN}(a, b) FROM strings
----
(empty)
a
ab
abc
abc
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES ('abcd', 0), ('abc', -1), ('abc', -2), ('abc', -3), ('abc', -4)
query T
SELECT ${FUN}(a, b) FROM strings
----
(empty)
ab
a
(empty)
(empty)
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES (NULL, 0), ('abc', NULL), (NULL, NULL)
query T
SELECT ${FUN}(a, b) FROM strings
----
NULL
NULL
NULL
endloop
# grapheme clusters
query TTTT
SELECT LEFT_GRAPHEME('🦆🤦S̈', 0), LEFT_GRAPHEME('🦆🤦S̈', 1), LEFT_GRAPHEME('🦆🤦S̈', 2), LEFT_GRAPHEME('🦆🤦S̈', 3)
----
(empty) 🦆 🦆🤦 🦆🤦
query TTTT
SELECT LEFT_GRAPHEME('🦆🤦S̈', 0), LEFT_GRAPHEME('🦆🤦S̈', -1), LEFT_GRAPHEME('🦆🤦S̈', -2), LEFT_GRAPHEME('🦆🤦S̈', -3)
----
(empty) 🦆🤦 🦆 (empty)

View File

@@ -0,0 +1,38 @@
# name: test/sql/function/string/test_length.test
# description: Test length function
# group: [string]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello'), ('world'), (NULL)
# normal length
query I
SELECT length(s) FROM strings ORDER BY s
----
NULL
5
5
# length after concat
query I
SELECT length(s || ' ' || '🦆') FROM strings ORDER BY s
----
NULL
7
7
# aliases work
query II
SELECT char_length('asdf'), CHARACTER_LENGTH('asdf')
----
4
4

View File

@@ -0,0 +1,287 @@
# name: test/sql/function/string/test_levenshtein.test
# description: Test levenshtein function
# group: [string]
statement ok
PRAGMA enable_verification
# normal queries
query I
SELECT levenshtein('hallo', 'hallo')
----
0
query I
SELECT levenshtein('hallo', 'hello')
----
1
query I
SELECT levenshtein('hello', 'hallo')
----
1
query I
SELECT levenshtein('lawn', 'flaw')
----
2
query I
SELECT levenshtein('flaw', 'lawn')
----
2
query I
SELECT levenshtein('kitten', 'sitting')
----
3
query I
SELECT levenshtein('sitting', 'kitten')
----
3
query I
SELECT levenshtein('hallo', 'hoi')
----
4
query I
SELECT levenshtein('hoi', 'hallo')
----
4
query I
SELECT levenshtein(NULL, 'hi')
----
NULL
query I
SELECT levenshtein('hi', NULL)
----
NULL
query I
SELECT levenshtein(NULL, NULL)
----
NULL
query I
SELECT levenshtein('', NULL)
----
NULL
query I
SELECT levenshtein(NULL, '')
----
NULL
query I
SELECT levenshtein('', 'hi')
----
2
query I
SELECT levenshtein('hi', '')
----
2
query I
SELECT levenshtein('', '')
----
0
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello'), ('hallo'), ('aloha'), ('fello'), ('fellow'), ('ducks')
query I
SELECT levenshtein(s, 'hallo') FROM strings ORDER BY s
----
4
5
2
3
0
1
query I
SELECT levenshtein('hallo', s) FROM strings ORDER BY s
----
4
5
2
3
0
1
query I
SELECT levenshtein(NULL, s) FROM strings
----
NULL
NULL
NULL
NULL
NULL
NULL
query I
SELECT levenshtein(NULL, s) FROM strings
----
NULL
NULL
NULL
NULL
NULL
NULL
query I
SELECT levenshtein('', s) FROM strings ORDER BY s
----
5
5
5
6
5
5
query I
SELECT levenshtein(s, '') FROM strings ORDER BY s
----
5
5
5
6
5
5
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES (NULL)
query I
SELECT levenshtein(s, NULL) from strings
----
NULL
query I
SELECT levenshtein(NULL, s) from strings
----
NULL
query I
SELECT levenshtein('hi', s) from strings
----
NULL
query I
SELECT levenshtein(s, 'hi') from strings
----
NULL
query I
SELECT levenshtein('', s) FROM strings
----
NULL
query I
SELECT levenshtein(s, '') FROM strings
----
NULL
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('')
query I
SELECT levenshtein(NULL, s) FROM strings
----
NULL
query I
SELECT levenshtein(s, NULL) FROM strings
----
NULL
query I
SELECT levenshtein(s, '') FROM strings
----
0
query I
SELECT levenshtein('', s) FROM strings
----
0
query I
SELECT levenshtein(s, 'hi') FROM strings
----
2
query I
SELECT levenshtein('hi', s) FROM strings
----
2
# editdist3
query I
SELECT editdist3('hallo', 'hello')
----
1
query I
SELECT editdist3(s, 'hello') FROM strings
----
5
# Comparing fields from two columns row wise
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR, t VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello', 'hello'), ('hello', 'hallo'), ('flaw', 'lawn'),
('sitting', 'kitten'), ('hallo', 'aloha'), ('hello', 'aloha'),
(NULL, NULL), ('', ''),
(NULL, 'bora'), ('bora', NULL),
('hi', ''), ('', 'hi'),
(NULL, ''), ('', NULL)
query I
SELECT levenshtein(s, t) ld FROM strings
----
0
1
2
3
4
5
NULL
0
NULL
NULL
2
2
NULL
NULL

View File

@@ -0,0 +1,293 @@
# name: test/sql/function/string/test_like.test
# description: Test LIKE statement
# group: [string]
statement ok
PRAGMA enable_verification
# scalar like
query T
SELECT 'aaa' LIKE 'bbb'
----
0
query T
SELECT 'aaa' LIKE 'abab'
----
0
query T
SELECT 'aaa' LIKE 'aaa'
----
1
query T
SELECT 'aaa' LIKE '%'
----
1
query T
SELECT 'aaa' LIKE '%a'
----
1
query T
SELECT 'aaa' LIKE '%b'
----
0
query T
SELECT 'aaa' LIKE 'a%'
----
1
query T
SELECT 'aaa' LIKE 'b%'
----
0
query T
SELECT 'aaa' LIKE 'a_a'
----
1
query T
SELECT 'aaa' LIKE 'a_'
----
0
query T
SELECT 'aaa' LIKE '__%'
----
1
query T
SELECT 'aaa' LIKE '____%'
----
0
query T
SELECT 'ababac' LIKE '%abac'
----
1
query T
SELECT 'ababac' LIKE '%%%abac'
----
1
query T
SELECT 'ababac' LIKE 'abab%%%%%'
----
1
query T
SELECT 'ababac' LIKE '%%%a%%%b%%a%b%%%%%a%c%%'
----
1
query T
SELECT 'ababac' LIKE '%%%a%%%b%%a%b%%%%%a%d%%'
----
0
query T
SELECT 'ababac' NOT LIKE '%abac'
----
0
query T
SELECT 'aabbccc' LIKE '%aa%bb%cc'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE ''
----
0
query T
SELECT 'zebra elephant tiger horse' LIKE '%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra'
----
0
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra elephant tiger horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra elephant tiger horse%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%zebra elephant tiger horse%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%zebra elephant tiger horse blabla'
----
0
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra elephant tiger horse blabla%'
----
0
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%elephant%horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%elephant%tiger%horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%zebra%elephant%tiger%horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%elephant%tiger%horse%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%zebra%elephant%tiger%horse%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE '%%zebra %%%ele%phan%t t%ig%er% horse%'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%tiger%horse'
----
1
query T
SELECT 'zebra elephant tiger horse' LIKE 'zebra%tiger%elephant%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE ''
----
1
query T
SELECT 'zebra elephant tiger horse' NOT LIKE '%'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra'
----
1
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra elephant tiger horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE '%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%elephant%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%elephant%tiger%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE '%zebra%elephant%tiger%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%elephant%tiger%horse%'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE '%zebra%elephant%tiger%horse%'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE '%%zebra %%%ele%phan%t t%ig%er% horse%'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%tiger%horse'
----
0
query T
SELECT 'zebra elephant tiger horse' NOT LIKE 'zebra%tiger%elephant%horse'
----
1
# like with table
statement ok
CREATE TABLE strings(s STRING, pat STRING);
statement ok
INSERT INTO strings VALUES ('abab', 'ab%'), ('aaa', 'a_a'), ('aaa', '%b%')
query T
SELECT s FROM strings WHERE s LIKE 'ab%'
----
abab
query T
SELECT s FROM strings WHERE 'aba' LIKE pat
----
abab
aaa
aaa
query T
SELECT s FROM strings WHERE s LIKE pat
----
abab
aaa
# Reject invalid collations
statement error
SELECT 'hello' LIKE 'hê?llo' COLLATE idontexist;
----
<REGEX>:.*Catalog Error.*does not exist.*

View File

@@ -0,0 +1,114 @@
# name: test/sql/function/string/test_like_escape.test
# description: Test LIKE statement with custom ESCAPE
# group: [string]
# scalar like with escape
query T
SELECT '%++' LIKE '*%++' ESCAPE '*';
----
1
# Not Like
query T
SELECT '%++' NOT LIKE '*%++' ESCAPE '*';
----
0
# unterminated escapes
statement error
SELECT '%' LIKE '%' ESCAPE '%';
----
statement error
SELECT '%' LIKE '*' ESCAPE '*';
----
# Default tests
query T
SELECT '\' LIKE '\\' ESCAPE '\';
----
1
query T
SELECT '\\' LIKE '\\' ESCAPE '\';
----
0
query T
SELECT '%' LIKE '*%' ESCAPE '*';
----
1
query T
SELECT '_ ' LIKE '*_ ' ESCAPE '*';
----
1
query T
SELECT ' a ' LIKE '*_ ' ESCAPE '*';
----
0
query T
SELECT '%_' LIKE '%_' ESCAPE '';
----
1
query T
SELECT '*%' NOT LIKE '*%' ESCAPE '*';
----
1
# It should fail when more than one escape character is specified
statement error
SELECT '%_' LIKE '%_' ESCAPE '\\';
----
statement error
SELECT '%_' LIKE '%_' ESCAPE '**';
----
# Test LIKE statement with ESCAPE in the middle of the pattern
statement ok
CREATE TABLE strings(s STRING, pat STRING);
statement ok
INSERT INTO strings VALUES ('abab', 'ab%'), ('aaa', 'a*_a'), ('aaa', '*%b'), ('bbb', 'a%');
query T
SELECT s FROM strings;
----
abab
aaa
aaa
bbb
query T
SELECT pat FROM strings;
----
ab%
a*_a
*%b
a%
query T
SELECT s FROM strings WHERE pat LIKE 'a*%' ESCAPE '*';
----
bbb
query T
SELECT s FROM strings WHERE 'aba' LIKE pat ESCAPE '*';
----
abab
bbb
query T
SELECT s FROM strings WHERE s LIKE pat ESCAPE '*';
----
abab
query I
select 'a' like 'a' escape NULL;
----
NULL

View File

@@ -0,0 +1,192 @@
# name: test/sql/function/string/test_mismatches.test
# description: Test mismatches function
# group: [string]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
query I
SELECT mismatches('hallo', 'hallo')
----
0
query I
SELECT mismatches('hello', 'hallo')
----
1
query I
SELECT mismatches('hallo', 'hello')
----
1
query I
SELECT mismatches('aloha', 'hallo')
----
5
query I
SELECT mismatches('hallo', 'aloha')
----
5
query I
SELECT mismatches(NULL, 'hallo')
----
NULL
query I
SELECT mismatches('hello', NULL)
----
NULL
query I
SELECT mismatches(NULL, NULL)
----
NULL
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello'), ('hallo'), ('aloha'), ('world'), (NULL)
# normal counts
query I
SELECT mismatches(s, 'hallo') FROM strings ORDER BY s
----
NULL
5
0
1
4
query I
SELECT mismatches('hallo', s) FROM strings ORDER BY s
----
NULL
5
0
1
4
# special cases
statement error
SELECT mismatches('', '')
----
query I
SELECT mismatches(NULL, s) FROM strings ORDER BY s
----
NULL
NULL
NULL
NULL
NULL
query I
SELECT mismatches(s, NULL) FROM strings ORDER BY s
----
NULL
NULL
NULL
NULL
NULL
# incorrect usages
statement error
SELECT mismatches('hoi', 'hallo')
----
statement error
SELECT mismatches('hallo', 'hoi')
----
statement error
SELECT mismatches('', 'hallo')
----
statement error
SELECT mismatches('hi', '')
----
statement error
SELECT mismatches('', s) FROM strings ORDER BY s
----
statement error
SELECT mismatches(s, '') FROM strings ORDER BY s
----
statement ok
DROP TABLE strings
statement ok
CREATE TABLE strings(s VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello'), ('halo'), (NULL)
# incorrect usage
statement error
SELECT mismatches(s, 'hallo') FROM strings
----
statement error
SELECT mismatches('hallo', s) FROM strings
----
query I
SELECT hamming('hallo', 'hallo')
----
0
query I
SELECT hamming('hello', 'hallo')
----
1
query I
SELECT hamming(s, 'hallo') FROM strings WHERE s = 'hello'
----
1
# Comparing fields from two columns row wise
statement ok
DROP TABLE strings;
statement ok
CREATE TABLE strings(s VARCHAR, t VARCHAR)
statement ok
INSERT INTO strings VALUES ('hello', 'world'), ('hallo', 'ola'), ('hello', ''), (NULL, NULL), ('', ''), ('bora', 'bora')
statement error
SELECT s, t, hamming(s, t) hd FROM strings WHERE length(s) = length(t)
----
statement error
SELECT hamming(s, t) FROM strings
----

View File

@@ -0,0 +1,112 @@
# name: test/sql/function/string/test_pad.test
# description: LPAD/RPAD test
# group: [string]
statement ok
PRAGMA enable_verification
# test lpad on NULLs
query TTTTTTT
select LPAD(NULL, 7, '-'), LPAD('Base', NULL, '-'), LPAD('Base', 7, NULL), LPAD(NULL, NULL, '-'), LPAD(NULL, 7, NULL), LPAD('Base', NULL, NULL), LPAD(NULL, NULL, NULL)
----
NULL NULL NULL NULL NULL NULL NULL
# test rpad on NULLs
query TTTTTTT
select RPAD(NULL, 7, '-'), RPAD('Base', NULL, '-'), RPAD('Base', 7, NULL), RPAD(NULL, NULL, '-'), RPAD(NULL, 7, NULL), RPAD('Base', NULL, NULL), RPAD(NULL, NULL, NULL)
----
NULL NULL NULL NULL NULL NULL NULL
# test lpad/rpad on scalar values
query TTTT
select LPAD('Base', 7, '-'), LPAD('Base', 4, '-'), LPAD('Base', 2, ''), LPAD('Base', -1, '-')
----
---Base Base Ba (empty)
query TTTT
select RPAD('Base', 7, '-'), RPAD('Base', 4, '-'), RPAD('Base', 2, ''), RPAD('Base', -1, '-')
----
Base--- Base Ba (empty)
query TTTT
select LPAD('Base', 7, '-|'), LPAD('Base', 6, '-|'), LPAD('Base', 5, '-|'), LPAD('Base', 4, '-|')
----
-|-Base -|Base -Base Base
query TTTT
select RPAD('Base', 7, '-|'), RPAD('Base', 6, '-|'), RPAD('Base', 5, '-|'), RPAD('Base', 4, '-|')
----
Base-|- Base-| Base- Base
query TTT
select LPAD('MotörHead', 16, 'RÄcks'), LPAD('MotörHead', 12, 'RÄcks'), LPAD('MotörHead', 10, 'RÄcks')
----
RÄcksRÄMotörHead RÄcMotörHead RMotörHead
query TTT
select RPAD('MotörHead', 16, 'RÄcks'), RPAD('MotörHead', 12, 'RÄcks'), RPAD('MotörHead', 10, 'RÄcks')
----
MotörHeadRÄcksRÄ MotörHeadRÄc MotörHeadR
# test on entire tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks')
query TT
select LPAD(a, 16, b), RPAD(a, 16, b) FROM strings
----
WorldWorldWHello HelloWorldWorldW
NULL NULL
RÄcksRÄMotörHead MotörHeadRÄcksRÄ
# test with selection vector
query TTTT
select LPAD(a, 12, b), RPAD(a, 12, b), UCASE(a), LCASE(a) FROM strings WHERE b IS NOT NULL
----
WorldWoHello HelloWorldWo HELLO hello
RÄcMotörHead MotörHeadRÄc MOTÖRHEAD motörhead
# test incorrect usage
statement error
select LPAD()
----
statement error
select LPAD(1)
----
statement error
select LPAD(1, 2)
----
statement error
select LPAD('Hello', 10, '')
----
statement error
select LPAD('a', 100000000000000000, 0)
----
statement error
select RPAD()
----
statement error
select RPAD(1)
----
statement error
select RPAD(1, 2)
----
statement error
select RPAD('Hello', 10, '')
----
statement error
select RPAD('a', 100000000000000000, 0)
----

View File

@@ -0,0 +1,249 @@
# name: test/sql/function/string/test_prefix.test
# description: Prefix test
# group: [string]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
# "Early out prefix"
query T
SELECT prefix('abcd', 'a')
----
1
query T
SELECT prefix('abcd', 'ab')
----
1
query T
SELECT prefix('abcd', 'abc')
----
1
query T
SELECT prefix('abcd', 'abcd')
----
1
query T
SELECT prefix('abcd', 'b')
----
0
# "Inlined string"
query T
SELECT prefix('abcdefgh', 'a')
----
1
query T
SELECT prefix('abcdefgh', 'ab')
----
1
query T
SELECT prefix('abcdefgh', 'abc')
----
1
query T
SELECT prefix('abcdefgh', 'abcd')
----
1
query T
SELECT prefix('abcdefgh', 'abcde')
----
1
query T
SELECT prefix('abcdefgh', 'b')
----
0
# Longer (non-inlined) strings
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'a')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'ab')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'abc')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'abcd')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'abcde')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'b')
----
0
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'abcdefghijklmnopqrstuvwx')
----
1
query T
SELECT prefix('abcdefghijklmnopqrstuvwxyz', 'abcfefghijklmnopqrstuvwx')
----
0
# "Empty string and prefix"
query T
SELECT prefix('', 'aaa')
----
0
query T
SELECT prefix('aaa', '')
----
1
# "Issue #572 alloc exception on empty table"
statement ok
CREATE TABLE t0(c0 VARCHAR)
query T
SELECT * FROM t0 WHERE PREFIX(t0.c0, '')
----
# "Prefix test with UTF8"
query T
SELECT prefix('átomo', 'á')
----
1
query T
SELECT prefix('átomo', 'á')
----
1
query T
SELECT prefix('átomo', 'a')
----
0
query T
SELECT prefix('olá mundo', 'olá')
----
1
query T
SELECT prefix('olá mundo', 'olá')
----
1
query T
SELECT prefix('olá mundo', 'ola')
----
0
query T
SELECT prefix('ñeft', 'ñ')
----
1
query T
SELECT prefix('ñeft', 'ñ')
----
1
query T
SELECT prefix('ñeft', 'ñeft')
----
1
query T
SELECT prefix('ñeft', 'neft')
----
0
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two n')
----
0
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ three')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ three ₡')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆 end')
----
1
query T
SELECT prefix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆 end')
----
1
# test empty prefix removal
statement ok
INSERT INTO t0 VALUES ('a'), ('b'), ('d')
query I
SELECT COUNT(*) FROM t0 WHERE prefix(t0.c0, '');
----
3
query II
SELECT c0, prefix(t0.c0, '') FROM t0 WHERE c0 <> 'b' ORDER BY t0.c0;
----
a 1
d 1
# now with a NULL value
statement ok
INSERT INTO t0 VALUES (NULL)
query I
SELECT COUNT(*) FROM t0 WHERE prefix(t0.c0, '');
----
3
query II
SELECT c0, prefix(t0.c0, '') FROM t0 WHERE c0 IS NULL OR c0 <> 'b' ORDER BY t0.c0;
----
NULL NULL
a 1
d 1

View File

@@ -0,0 +1,253 @@
# name: test/sql/function/string/test_printf.test
# description: Test printf
# group: [string]
statement ok
PRAGMA enable_verification
# printf without format specifiers
query TT
SELECT printf('hello'), printf(NULL)
----
hello NULL
# format strings
query TT
SELECT printf('%s', 'hello'), printf('%s: %s', 'hello', 'world')
----
hello hello: world
# format strings with NULL values
query TT
SELECT printf('%s', NULL), printf(NULL, 'hello', 'world')
----
NULL NULL
# booleans
query T
SELECT printf('%d', TRUE)
----
1
# integers
query TT
SELECT printf('%d', 33), printf('%d + %d = %d', 3, 5, 3 + 5)
----
33 3 + 5 = 8
# maximum of ubigint
query I
SELECT printf('%d', 18446744073709551615::UBIGINT);
----
18446744073709551615
# integers with special formatting specifiers
query TTT
SELECT printf('%04d', 33), printf('%s %02d:%02d:%02d %s', 'time', 12, 3, 16, 'AM'), printf('%10d', 1992)
----
0033 time 12:03:16 AM 1992
# different integer types
query T
SELECT printf('%hhd %hd %d %lld', 33::TINYINT, 12::SMALLINT, 40::INTEGER, 80::BIGINT)
----
33 12 40 80
# ...but really any of these can be used
query T
SELECT printf('%d %lld %hhd %hd', 33::TINYINT, 12::SMALLINT, 40::INTEGER, 80::BIGINT)
----
33 12 40 80
# exotic types
# dates, times and timestamps are strings
query T
SELECT printf('%s %s = %s', DATE '1992-01-01', TIME '12:01:00', TIMESTAMP '1992-01-01 12:01:00')
----
1992-01-01 12:01:00 = 1992-01-01 12:01:00
# blob
query T
SELECT printf('%s', BLOB '\x01\xa0')
----
\x01\xA0
# hugeint
query T
SELECT printf('%d', 120381902481294715712::HUGEINT)
----
120381902481294715712
query I
SELECT printf('%d', '-170141183460469231731687303715884105728'::HUGEINT)
----
-170141183460469231731687303715884105728
query I
SELECT printf('%d', '170141183460469231731687303715884105727'::HUGEINT)
----
170141183460469231731687303715884105727
# uhugeint
query T
SELECT printf('%d', 120381902481294715712::UHUGEINT)
----
120381902481294715712
query I
SELECT printf('%d', '340282366920938463463374607431768211455'::UHUGEINT)
----
340282366920938463463374607431768211455
query I
select printf('%x', 255::utinyint);
----
ff
query I
select printf('%x', 65535::usmallint);
----
ffff
query I
select printf('%x', 4294967295::uinteger);
----
ffffffff
query I
select printf('%x', 18446744073709551615::ubigint);
----
ffffffffffffffff
query I
select printf('%x', '340282366920938463463374607431768211455'::uhugeint);
----
ffffffffffffffffffffffffffffffff
# decimal
query T
SELECT printf('%.3f', '1.234'::DECIMAL)
----
1.234
# octal hex etc
query T
SELECT printf('%d %x %o %#x %#o', 100, 100, 100, 100, 100)
----
100 64 144 0x64 0144
query I
select printf('%b', '-170141183460469231731687303715884105728'::HUGEINT);
----
10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
# ascii characters
query T
SELECT printf('%c', 65)
----
A
# width trick
query T
SELECT printf('%*d', 5, 10)
----
10
# floating point numbers
query TT
SELECT printf('%.2f', 10.0::FLOAT), printf('%.4f', 0.5)
----
10.00 0.5000
# weird float stuff
query T
SELECT printf('floats: %4.2f %+.0e %E', 3.1416, 3.1416, 3.1416)
----
floats: 3.14 +3e+00 3.141600E+00
# incorrect number of parameters
# too few parameters
statement error
SELECT printf('%s')
----
statement error
SELECT printf('%s %s', 'hello')
----
# excess parameters are ignored
query T
SELECT printf('%s', 'hello', 'world')
----
hello
# incorrect types
statement error
SELECT printf('%s', 42)
----
statement error
SELECT printf('%d', 'hello')
----
# Test printf with vectors
statement ok
CREATE TABLE strings(idx INTEGER, fmt STRING, pint INTEGER, pstring STRING)
statement ok
INSERT INTO strings VALUES (1, '%d: %s', 10, 'hello')
statement ok
INSERT INTO strings VALUES (2, 'blabla %d blabla %s', 20, 'blabla')
statement ok
INSERT INTO strings VALUES (3, NULL, 30, 'abcde')
# printf without format specifiers: too few parameters
statement error
SELECT printf(fmt) FROM strings ORDER BY idx
----
query T
SELECT printf(CASE WHEN pint < 15 THEN NULL ELSE pint::VARCHAR END) FROM strings ORDER BY idx
----
NULL
20
30
# standard vectorized printf
query T
SELECT printf(fmt, pint, pstring) FROM strings ORDER BY idx
----
10: hello
blabla 20 blabla blabla
NULL
# printf with constants in format arguments
query T
SELECT printf(fmt, 10, pstring) FROM strings ORDER BY idx
----
10: hello
blabla 10 blabla blabla
NULL
# printf with constant format string
query T
SELECT printf('%s: %s', pstring, pstring) FROM strings ORDER BY idx
----
hello: hello
blabla: blabla
abcde: abcde
# printf with selection vector
query T
SELECT printf('%s: %s', pstring, pstring) FROM strings WHERE idx <> 2 ORDER BY idx
----
hello: hello
abcde: abcde
# cast integers correctly
query T
SELECT printf('%d %d %d %d', 100::tinyint, 1000::smallint, 1000::int, 1000::bigint);
----
100 1000 1000 1000

View File

@@ -0,0 +1,89 @@
# name: test/sql/function/string/test_repeat.test
# description: REPEAT test
# group: [string]
statement ok
PRAGMA enable_verification
# test repeat on NULLs
query TTT
select REPEAT(NULL, NULL), REPEAT(NULL, 3), REPEAT('MySQL', NULL)
----
NULL NULL NULL
# test repeat on scalars
query TTTT
select REPEAT('', 3), REPEAT('MySQL', 3), REPEAT('MotörHead', 2), REPEAT('Hello', -1)
----
(empty) MySQLMySQLMySQL MotörHeadMotörHead (empty)
# test repeat on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks'), ('', NULL)
query T
select REPEAT(a, 3) FROM strings
----
HelloHelloHello
HuLlDHuLlDHuLlD
MotörHeadMotörHeadMotörHead
(empty)
query T
select REPEAT(b, 2) FROM strings
----
WorldWorld
NULL
RÄcksRÄcks
NULL
query T
select REPEAT(a, 4) FROM strings WHERE b IS NOT NULL
----
HelloHelloHelloHello
MotörHeadMotörHeadMotörHeadMotörHead
# empty string
query I
SELECT repeat('', 99);
----
(empty)
# no repeat
query I
SELECT repeat('hello world', 0);
----
(empty)
# negative repeat
query I
SELECT repeat('hello world', -1);
----
(empty)
# repeat blob
query I
SELECT repeat(blob '00', 2);
----
0000
# test incorrect usage of reverse
statement error
select REPEAT()
----
statement error
select REPEAT(1)
----
statement error
select REPEAT('hello', 'world')
----
statement error
select REPEAT('hello', 'world', 3)
----

View File

@@ -0,0 +1,83 @@
# name: test/sql/function/string/test_replace.test
# description: REPLACE test
# group: [string]
statement ok
PRAGMA enable_verification
# test replace on NULLs
query T
select REPLACE('This is the main test string', NULL, 'ALT')
----
NULL
query T
select REPLACE(NULL, 'main', 'ALT')
----
NULL
query T
select REPLACE('This is the main test string', 'main', NULL)
----
NULL
# test replace on scalars
query T
select REPLACE('This is the main test string', 'main', 'ALT')
----
This is the ALT test string
query T
select REPLACE('This is the main test string', 'main', 'larger-main')
----
This is the larger-main test string
query T
select REPLACE('aaaaaaa', 'a', '0123456789')
----
0123456789012345678901234567890123456789012345678901234567890123456789
# test replace on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks'), ('', NULL)
query T
select REPLACE(a, 'l', '-') FROM strings
----
He--o
HuL-D
MotörHead
(empty)
query T
select REPLACE(b, 'Ä', '--') FROM strings
----
World
NULL
R--cks
NULL
query T
select REPLACE(a, 'H', '') FROM strings WHERE b IS NOT NULL
----
ello
Motöread
# test incorrect usage of replace
statement error
select REPLACE(1)
----
<REGEX>:.*Binder Error: No function matches.*
statement error
select REPLACE(1, 2)
----
<REGEX>:.*Binder Error: No function matches.*
statement error
select REPLACE(1, 2, 3, 4)
----
<REGEX>:.*Binder Error: No function matches.*

View File

@@ -0,0 +1,57 @@
# name: test/sql/function/string/test_reverse.test
# description: REVERSE test
# group: [string]
statement ok
PRAGMA enable_verification
# test reverse on scalars
query TTTT
select REVERSE(''), REVERSE('Hello'), REVERSE('MotörHead'), REVERSE(NULL)
----
(empty) olleH daeHrötoM NULL
# test reverse on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks'), ('', NULL)
query T
select REVERSE(a) FROM strings
----
olleH
DlLuH
daeHrötoM
(empty)
query T
select REVERSE(b) FROM strings
----
dlroW
NULL
skcÄR
NULL
query T
select REVERSE(a) FROM strings WHERE b IS NOT NULL
----
olleH
daeHrötoM
# test incorrect usage of reverse
statement error
select REVERSE()
----
<REGEX>:^Binder Error: No function matches.*
statement error
select REVERSE(1, 2)
----
<REGEX>:^Binder Error: No function matches.*
statement error
select REVERSE('hello', 'world')
----
<REGEX>:^Binder Error: No function matches.*

View File

@@ -0,0 +1,119 @@
# name: test/sql/function/string/test_right.test
# description: RIGHT test
# group: [string]
statement ok
PRAGMA enable_verification
foreach FUN RIGHT RIGHT_GRAPHEME
# test RIGHT on positive positions
query TTTTT
SELECT ${FUN}('abcd', 0), ${FUN}('abc', 1), ${FUN}('abc', 2), ${FUN}('abc', 3), ${FUN}('abc', 4)
----
(empty) c bc abc abc
query TTTTT
SELECT ${FUN}('🦆ab', 0), ${FUN}('🦆ab', 1), ${FUN}('🦆ab', 2), ${FUN}('🦆ab', 3), ${FUN}('🦆ab', 4)
----
(empty) b ab 🦆ab 🦆ab
# test RIGHT on negative positions
query TTTTT
SELECT ${FUN}('abcd', 0), ${FUN}('abc', -1), ${FUN}('abc', -2), ${FUN}('abc', -3), ${FUN}('abc', -4)
----
(empty) bc c (empty) (empty)
query TTTTT
SELECT ${FUN}('🦆ab', 0), ${FUN}('🦆ab', -1), ${FUN}('🦆ab', -2), ${FUN}('🦆ab', -3), ${FUN}('🦆ab', -4)
----
(empty) ab b (empty) (empty)
# test RIGHT on NULL values
query TTT
SELECT ${FUN}(NULL, 0), ${FUN}('abc', NULL), ${FUN}(NULL, NULL)
----
NULL NULL NULL
query TTT
SELECT ${FUN}(NULL, 0), ${FUN}('🦆ab', NULL), ${FUN}(NULL, NULL)
----
NULL NULL NULL
# test on tables
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES ('abcd', 0), ('abc', 1), ('abc', 2), ('abc', 3), ('abc', 4)
query T
SELECT ${FUN}(a, b) FROM strings
----
(empty)
c
bc
abc
abc
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES ('abcd', 0), ('abc', -1), ('abc', -2), ('abc', -3), ('abc', -4)
query T
SELECT ${FUN}(a, b) FROM strings
----
(empty)
bc
c
(empty)
(empty)
statement ok
DROP TABLE IF EXISTS strings
statement ok
CREATE TABLE strings(a STRING, b BIGINT)
statement ok
INSERT INTO STRINGS VALUES (NULL, 0), ('abc', NULL), (NULL, NULL)
query T
SELECT ${FUN}(a, b) FROM strings
----
NULL
NULL
NULL
endloop
# grapheme clusters
query TTTT
SELECT RIGHT_GRAPHEME('🦆🤦S̈', 0), RIGHT_GRAPHEME('🦆🤦S̈', 1), RIGHT_GRAPHEME('🦆🤦S̈', 2), RIGHT_GRAPHEME('🦆🤦S̈', 3)
----
(empty) 🤦 🦆🤦
query TTTT
SELECT RIGHT_GRAPHEME('🦆🤦S̈', 0), RIGHT_GRAPHEME('🦆🤦S̈', -1), RIGHT_GRAPHEME('🦆🤦S̈', -2), RIGHT_GRAPHEME('🦆🤦S̈', -3)
----
(empty) 🤦 (empty)
# grapheme overflow
statement ok
SELECT right_grapheme('a', -9223372036854775808);
statement ok
SELECT "right"('a', -9223372036854775808);
statement error
SELECT right_grapheme('a', 9223372036854775808);
----
<REGEX>:^Binder Error: No function matches.*

View File

@@ -0,0 +1,137 @@
# name: test/sql/function/string/test_similar_to.test
# description: Test SIMILAR TO statement
# group: [string]
query T
SELECT 'aaa' SIMILAR TO 'bbb'
----
0
query T
SELECT 'aaa' SIMILAR TO 'aaa'
----
1
query T
SELECT 'aaa' SIMILAR TO '.*'
----
1
query T
SELECT 'aaa' SIMILAR TO 'a.*'
----
1
query T
SELECT 'aaa' SIMILAR TO '.*a'
----
1
query T
SELECT 'aaa' SIMILAR TO '.*b'
----
0
query T
SELECT 'aaa' SIMILAR TO 'b.*'
----
0
query T
SELECT 'aaa' SIMILAR TO 'a[a-z]a'
----
1
query T
SELECT 'aaa' SIMILAR TO 'a[a-z]{2}'
----
1
query T
SELECT 'aaa' SIMILAR TO 'a[a-z].*'
----
1
query T
SELECT 'aaa' SIMILAR TO '[a-z][a-z].*'
----
1
query T
SELECT 'aaa' SIMILAR TO '[a-z]{3}'
----
1
query T
SELECT 'aaa' NOT SIMILAR TO '[b-z]{3}'
----
1
query T
SELECT 'aaa' ~ 'aaa'
----
1
query T
SELECT 'aaa' !~ 'bbb'
----
1
# similar to must match entire expression
query T
SELECT 'aaa' ~ '^a'
----
0
query T
SELECT 'aaa' ~ '^a+'
----
1
query T
SELECT 'aaa' ~ '(a|b)*'
----
1
query T
SELECT 'abc' ~ '^(b|c)'
----
0
# Test SIMILAR TO statement with expressions
statement ok
CREATE TABLE strings (s STRING, p STRING);
statement ok
INSERT INTO strings VALUES('aaa', 'a[a-z]a'), ('abab', 'ab.*'), ('aaa', 'a[a-z]a'), ('aaa', '.*b.*');
query T
SELECT s FROM strings WHERE s SIMILAR TO 'ab.*'
----
abab
query T
SELECT s FROM strings WHERE 'aba' SIMILAR TO p
----
aaa
abab
aaa
aaa
query T
SELECT s FROM strings WHERE s SIMILAR TO p
----
aaa
abab
aaa
query T
SELECT s FROM strings WHERE s NOT SIMILAR TO p
----
aaa
# invalid SIMILAR TO expression
statement error
SELECT s FROM strings WHERE s SIMILAR TO 'ab.*%' {escape ''}
----
<REGEX>:.*Parser Error: syntax error.*

View File

@@ -0,0 +1,107 @@
# name: test/sql/function/string/test_split_part.test
# description: split part test
# group: [string]
statement ok
PRAGMA enable_verification
# test core functionality
query T
select split_part('a,b,c',',',1)
----
a
query T
select split_part('a,b,c',',',2)
----
b
query T
select split_part('a,,b,,c',',,',2)
----
b
query T
SELECT split_part('a,b,c','|',1)
----
a,b,c
# test negative indexes
query T
select split_part('a,b,c',',',-1)
----
c
query T
select split_part('a,b,c',',',-2)
----
b
# test exceeding the bounds of the list generated by splitting, both negative and positive, and 0 position
# should always return an empty string in these cases (to match Postgres behavior)
query T
select split_part('a,b,c',',',0)
----
(empty)
query T
select split_part('a,b,c',',',5)
----
(empty)
query T
select split_part('a,b,c',',',-5)
----
(empty)
# test empty string inputs and null inputs
query T
select split_part('','',1)
----
(empty)
query T
select split_part('a,b,c','',3)
----
b
query T
select split_part('',',',1)
----
(empty)
query T
select split_part(NULL,NULL,1)
----
NULL
query T
select split_part('a,b,c',NULL,1)
----
NULL
query T
select split_part(NULL,',',1)
----
NULL
query T
select split_part('a,b,c', ',', NULL)
----
NULL
# test incorrect usage
statement error
select split_part()
----
<REGEX>:.*Binder Error.*does not support the supplied arguments.*
statement error
select split_part('a')
----
<REGEX>:.*Binder Error.*does not support the supplied arguments.*
statement error
select split_part('a','a')
----
<REGEX>:.*Binder Error.*does not support the supplied arguments.*

View File

@@ -0,0 +1,109 @@
# name: test/sql/function/string/test_starts_with_function.test
# description: starts_with function test
# group: [string]
statement ok
PRAGMA enable_verification
# starts_with of various lengths
query IIIIIIIIII
SELECT STARTS_WITH('hello world', 'h'),
STARTS_WITH('hello world', 'he'),
STARTS_WITH('hello world', 'hel'),
STARTS_WITH('hello world', 'hell'),
STARTS_WITH('hello world', 'hello'),
STARTS_WITH('hello world', 'hello '),
STARTS_WITH('hello world', 'hello w'),
STARTS_WITH('hello world', 'hello wo'),
STARTS_WITH('hello world', 'hello wor'),
STARTS_WITH('hello world', 'hello worl')
----
1 1 1 1 1 1 1 1 1 1
query IIIIIIIIII
SELECT STARTS_WITH('hello world', 'a'),
STARTS_WITH('hello world', 'ha'),
STARTS_WITH('hello world', 'hea'),
STARTS_WITH('hello world', 'hela'),
STARTS_WITH('hello world', 'hella'),
STARTS_WITH('hello world', 'helloa'),
STARTS_WITH('hello world', 'hello a'),
STARTS_WITH('hello world', 'hello wa'),
STARTS_WITH('hello world', 'hello woa'),
STARTS_WITH('hello world', 'hello wora')
----
0 0 0 0 0 0 0 0 0 0
# empty starts_with
query III
select starts_with('hello', ''), starts_with('', ''), starts_with(NULL, '')
----
1 1 NULL
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1, 2), ('world', 2, 3), ('h', 1, 1), (NULL, 2, 2)
# Test first letter
query T
SELECT starts_with(s,'h') FROM strings
----
1
0
1
NULL
# Test multiple letters
query T
SELECT starts_with(s,'he') FROM strings
----
1
0
0
NULL
# Test no match
query T
SELECT starts_with(s,'he-man') FROM strings
----
0
0
0
NULL
# Test NULL constant in different places
query T
SELECT starts_with(NULL,'h') FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT starts_with(s,NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT starts_with(NULL,NULL) FROM strings
----
NULL
NULL
NULL
NULL
# Test empty pattern
query T
SELECT starts_with(s,'') FROM strings
----
1
1
1
NULL

View File

@@ -0,0 +1,76 @@
# name: test/sql/function/string/test_starts_with_function_utf8.test
# description: starts_with function test with UTF8
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('átomo')
statement ok
INSERT INTO strings VALUES ('olá mundo')
statement ok
INSERT INTO strings VALUES ('你好世界')
statement ok
INSERT INTO strings VALUES ('two ñ three ₡ four 🦆 end')
# Test one matching UTF8 letter
query T
SELECT starts_with(s,'á') FROM strings
----
1
0
0
0
# Test a sentence with an UTF-8
query T
SELECT starts_with(s,'olá mundo') FROM strings
----
0
1
0
0
# Test an entire UTF-8 word
query T
SELECT starts_with(s,'你好世界') FROM strings
----
0
0
1
0
# Test a substring of the haystack from the beginning
query T
SELECT starts_with(s,'two ñ thr') FROM strings
----
0
0
0
1
# Test a single UTF8 substring of the haystack in the middle
query T
SELECT starts_with(s,'ñ') FROM strings
----
0
0
0
0
# Test a multiple UTF8 substring of the haystack in the middle
query T
SELECT starts_with(s,'₡ four 🦆 e') FROM strings
----
0
0
0
0

View File

@@ -0,0 +1,109 @@
# name: test/sql/function/string/test_starts_with_operator.test
# description: ^@ starts_with operator test
# group: [string]
statement ok
PRAGMA enable_verification
# starts_with of various lengths
query IIIIIIIIII
SELECT 'hello world' ^@ 'h',
'hello world' ^@ 'he',
'hello world' ^@ 'hel',
'hello world' ^@ 'hell',
'hello world' ^@ 'hello',
'hello world' ^@ 'hello ',
'hello world' ^@ 'hello w',
'hello world' ^@ 'hello wo',
'hello world' ^@ 'hello wor',
'hello world' ^@ 'hello worl'
----
1 1 1 1 1 1 1 1 1 1
query IIIIIIIIII
SELECT 'hello world' ^@ 'a',
'hello world' ^@ 'ha',
'hello world' ^@ 'hea',
'hello world' ^@ 'hela',
'hello world' ^@ 'hella',
'hello world' ^@ 'helloa',
'hello world' ^@ 'hello a',
'hello world' ^@ 'hello wa',
'hello world' ^@ 'hello woa',
'hello world' ^@ 'hello wora'
----
0 0 0 0 0 0 0 0 0 0
# empty starts_with
query III
select 'hello' ^@ '', '' ^@ '', NULL ^@ ''
----
1 1 NULL
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1, 2), ('world', 2, 3), ('h', 1, 1), (NULL, 2, 2)
# Test first letter
query T
SELECT s ^@ 'h' FROM strings
----
1
0
1
NULL
# Test multiple letters
query T
SELECT s ^@ 'he' FROM strings
----
1
0
0
NULL
# Test no match
query T
SELECT s ^@ 'he-man' FROM strings
----
0
0
0
NULL
# Test NULL constant in different places
query T
SELECT NULL ^@ 'h' FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT s ^@ NULL FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT NULL ^@ NULL FROM strings
----
NULL
NULL
NULL
NULL
# Test empty pattern
query T
SELECT s ^@ '' FROM strings
----
1
1
1
NULL

View File

@@ -0,0 +1,76 @@
# name: test/sql/function/string/test_starts_with_operator_utf8.test
# description: ^@ starts_with operator test with UTF8
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('átomo')
statement ok
INSERT INTO strings VALUES ('olá mundo')
statement ok
INSERT INTO strings VALUES ('你好世界')
statement ok
INSERT INTO strings VALUES ('two ñ three ₡ four 🦆 end')
# Test one matching UTF8 letter
query T
SELECT s ^@ 'á' FROM strings
----
1
0
0
0
# Test a sentence with an UTF-8
query T
SELECT s ^@ 'olá mundo' FROM strings
----
0
1
0
0
# Test an entire UTF-8 word
query T
SELECT s ^@ '你好世界' FROM strings
----
0
0
1
0
# Test a substring of the haystack from the beginning
query T
SELECT s ^@ 'two ñ thr' FROM strings
----
0
0
0
1
# Test a single UTF8 substring of the haystack in the middle
query T
SELECT s ^@ 'ñ' FROM strings
----
0
0
0
0
# Test a multiple UTF8 substring of the haystack in the middle
query T
SELECT s ^@ '₡ four 🦆 e' FROM strings
----
0
0
0
0

View File

@@ -0,0 +1,256 @@
# name: test/sql/function/string/test_string_array_slice.test
# description: String slicing test
# group: [string]
statement ok
PRAGMA enable_verification
query I
SELECT 'hello'[0:2]
----
he
query I
SELECT ('hello')[0:2]
----
he
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 0, 2), ('world', 1, 3), ('b', 0, 1), (NULL, 1, 2)
# test zero length
query TT
SELECT array_slice('🦆ab', 0, 0), array_slice('abc', 0, 0)
----
(empty) (empty)
# constant offset/length
# normal slice
query T
SELECT array_slice(s, 0, 2) FROM strings
----
he
wo
b
NULL
# list_slice alias
query T
SELECT list_slice(s, 0, 2) FROM strings
----
he
wo
b
NULL
query T
SELECT array_slice(s, 1, 3) FROM strings
----
hel
wor
b
NULL
# index out of range
query T
SELECT array_slice(s, 2, 3) FROM strings
----
el
or
(empty)
NULL
# variable length offset/length
query T
SELECT array_slice(s, off, length+off) FROM strings
----
he
worl
b
NULL
query T
SELECT array_slice(s, off, 2+off) FROM strings
----
he
wor
b
NULL
query T
SELECT array_slice(s, 0, length) FROM strings
----
he
wor
b
NULL
query T
SELECT array_slice('hello', off, length+off) FROM strings
----
he
hell
h
hel
# test substrings with constant nulls in different places
query T
SELECT array_slice(NULL::varchar, off, length+off) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice('hello', NULL, length+NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice('hello', off+1, NULL+off) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice(NULL::varchar, NULL, length+NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice('hello', NULL, NULL+NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice(NULL::varchar, off, NULL+off) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT array_slice(NULL::varchar, NULL, NULL+NULL) FROM strings
----
NULL
NULL
NULL
NULL
# fixed slice
query T
SELECT array_slice(s, -2, NULL) FROM strings
----
NULL
NULL
NULL
NULL
# length 1
query T
SELECT array_slice(s, 0, 1) FROM strings
----
h
w
b
NULL
# negative offset and negative length
query T
SELECT array_slice(s, -4, -2) FROM strings
----
ell
orl
(empty)
NULL
# length 0
query T
SELECT array_slice(s, 1, 0) FROM strings
----
(empty)
(empty)
(empty)
NULL
# no end
query T
SELECT array_slice(s, 2, NULL) FROM strings
----
NULL
NULL
NULL
NULL
# very large offset and length
query T
SELECT array_slice(s, (2147483647-1), 1) FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT array_slice(s, (2147483647-1), -1) FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT array_slice(s, (-2147483646-1), -1) FROM strings
----
hello
world
b
NULL
query T
SELECT array_slice(s, (-2147483646-1), -2147483647) FROM strings
----
(empty)
(empty)
(empty)
NULL
# out of bounds array slice
query I
select array_slice([], -1, -9223372036854775808)
----
[]
# With constant null
query I
select * from (SELECT list_slice(NULL, 1, 3, 2));
----
NULL
# With constant null
query I
select s[1:2] from (SELECT NULL) as t(s);
----
NULL
# Also for arrays
query I
select * from (SELECT list_slice(NULL::INT[3], 1, 3, 2));
----
NULL

View File

@@ -0,0 +1,253 @@
# name: test/sql/function/string/test_string_slice.test
# description: String slicing test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 0, 2), ('world', 1, 3), ('b', 0, 1), (NULL, 1, 2)
# Slicing NULLs is not supported
statement ok
CREATE TABLE nulltable(n VARCHAR);
statement ok
INSERT INTO nulltable VALUES (NULL)
statement error
SELECT NULL::VARCHAR[off:length+off] FROM strings
----
statement error
SELECT NULL::VARCHAR[NULL:length+NULL] FROM strings
----
statement error
SELECT NULL::VARCHAR[off:NULL+off] FROM strings
----
statement error
SELECT NULL::VARCHAR[off:NULL+off] FROM strings
----
statement error
SELECT NULL::VARCHAR[NULL:NULL+NULL] FROM strings
----
# test zero length
query II
SELECT '🦆ab'[0:0], 'abc'[0:0]
----
(empty) (empty)
query I
SELECT 'MotörHead'[:5]
----
Motör
# constant offset/length
# normal slice
query T
SELECT s[0:2] FROM strings
----
he
wo
b
NULL
# index out of range
query T
SELECT s[1:3] FROM strings
----
hel
wor
b
NULL
query T
SELECT s[2:3] FROM strings
----
el
or
(empty)
NULL
# variable length offset/length
query T
SELECT s[off:length+off] FROM strings
----
he
worl
b
NULL
query T
SELECT s[off:2+off] FROM strings
----
he
wor
b
NULL
query T
SELECT s[0:length] FROM strings
----
he
wor
b
NULL
query T
SELECT 'hello'[off:length+off] FROM strings
----
he
hell
h
hel
# test substrings with constant nulls in different places
query T
SELECT n[off:length+off] FROM strings, nulltable
----
NULL
NULL
NULL
NULL
query T
SELECT 'hello'[NULL:length+NULL] FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT 'hello'[off:NULL+off] FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT 'hello'[off+1:NULL+off] FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT n[NULL:length+NULL] FROM strings, nulltable
----
NULL
NULL
NULL
NULL
query T
SELECT 'hello'[NULL:NULL+NULL] FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT n[off:NULL+off] FROM strings, nulltable
----
NULL
NULL
NULL
NULL
query T
SELECT n[NULL:NULL+NULL] FROM strings, nulltable
----
NULL
NULL
NULL
NULL
# fixed slice
query T
SELECT s[-2:] FROM strings
----
lo
ld
b
NULL
# length 1
query T
SELECT s[0:1] FROM strings
----
h
w
b
NULL
# negative offset and negative length
query T
SELECT s[-4:-2] FROM strings
----
ell
orl
(empty)
NULL
# length 0
query T
SELECT s[1:0] FROM strings
----
(empty)
(empty)
(empty)
NULL
# no end
query T
SELECT s[2:] FROM strings
----
ello
orld
(empty)
NULL
# very large offset and length
query T
SELECT s[(2147483647-1):1] FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT s[(2147483647-1):-1] FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT s[(-2147483646-1):-1] FROM strings
----
hello
world
b
NULL
query T
SELECT s[(-2147483646-1):-2147483647] FROM strings
----
(empty)
(empty)
(empty)
NULL

View File

@@ -0,0 +1,360 @@
# name: test/sql/function/string/test_string_split.test
# description: String split test
# group: [string]
statement ok
PRAGMA enable_verification
# test unnesting of null values a bit
query T
SELECT string_split(NULL, NULL)
----
NULL
query T
SELECT * FROM (VALUES (string_split('hello world', ' ')), (string_split(NULL, ' ')), (string_split('a b c', NULL)), (string_split('a b c', ' '))) tbl(i)
----
[hello, world]
NULL
[a b c]
[a, b, c]
statement ok
CREATE TABLE strings_with_null (s VARCHAR)
statement ok
INSERT INTO strings_with_null VALUES ('aba'), (NULL), ('ababa')
query T
SELECT UNNEST(string_split(s, 'b')) FROM strings_with_null
----
a
a
a
a
a
query T
SELECT UNNEST(string_split(NULL, ' ')) IS NULL LIMIT 5
----
query T
SELECT UNNEST(string_split('üüüüü', '◌̈'))
----
üüüüü
query T
SELECT UNNEST(string_split('üüüüü', ''))
----
üüüüü
query T
SELECT UNNEST(string_split_regex('üüüüü', '◌̈'))
----
üüüüü
query T
SELECT UNNEST(string_split_regex('üüüüü', ''))
----
üüüüü
query T
SELECT UNNEST(string_split(' 🦆🦆 🦆🦆', ' '))
----
🦆🦆
🦆🦆
query T
SELECT UNNEST(string_split('a a a a a', ' '))
----
a
a
a
a
a
query T
SELECT UNNEST(string_split('🦆 🦆 🦆 🦆 🦆', ' '))
----
🦆
🦆
🦆
🦆
🦆
query T
SELECT UNNEST(string_split('🦆🐈🐈🦆🐈🐈🦆🐈🐈🦆🐈🐈🦆', '🐈🐈'))
----
🦆
🦆
🦆
🦆
🦆
query T
SELECT UNNEST(string_split('', 'delim'))
----
(empty)
query T
SELECT UNNEST(string_split('aaaaa', ''))
----
a
a
a
a
a
query T
SELECT UNNEST(string_split('🦆🦆🦆🦆🦆', ''))
----
🦆
🦆
🦆
🦆
🦆
query T
SELECT UNNEST(string_split('abab', 'b'))
----
a
a
(empty)
query T
SELECT UNNEST(string_split('🦆b🦆b', 'b'))
----
🦆
🦆
(empty)
statement ok
CREATE TABLE documents(s VARCHAR)
statement ok
INSERT INTO documents VALUES ('baabbaa'), ('aabbaab'), ('ababababa'), ('b🦆🦆bb🦆🦆'), ('🦆🦆bb🦆🦆b'), ('🦆b🦆b🦆b🦆b🦆')
query T
SELECT UNNEST(string_split(s, 'bb')) FROM documents WHERE 1
----
baa
aa
aa
aab
ababababa
b🦆🦆
🦆🦆
🦆🦆
🦆🦆b
🦆b🦆b🦆b🦆b🦆
query T
SELECT UNNEST(string_split(s, 'bb')) FROM documents WHERE s LIKE 'b%'
----
baa
aa
b🦆🦆
🦆🦆
query T
SELECT string_agg(ss, 'bb') FROM (SELECT rowid AS id, UNNEST(string_split(s, 'bb')) AS ss FROM documents) AS q GROUP BY id ORDER BY id
----
baabbaa
aabbaab
ababababa
b🦆🦆bb🦆🦆
🦆🦆bb🦆🦆b
🦆b🦆b🦆b🦆b🦆
query T
SELECT UNNEST(string_split_regex('a1a11a111a', '[0-9]+'))
----
a
a
a
a
query T
SELECT UNNEST(string_split_regex('aaaaa', ''))
----
a
a
a
a
a
query T
SELECT UNNEST(string_split_regex('a a a a', '\s+'))
----
a
a
a
a
query T
SELECT UNNEST(string_split('aaaaa', NULL))
----
aaaaa
# taken from postgres string_to_array tests
query T
select UNNEST(string_split('1|2|3', '|'))
----
1
2
3
query T
select UNNEST(string_split('1|2|3|', '|'))
----
1
2
3
(empty)
query T
select UNNEST(string_split('1||2|3||', '||'))
----
1
2|3
(empty)
query T
select UNNEST(string_split('1|2|3', ''))
----
1
|
2
|
3
query T
select UNNEST(string_split('', '|'))
----
(empty)
query T
select UNNEST(string_split('1|2|3', NULL))
----
1|2|3
query T
select string_split(NULL, '|') IS NULL
----
1
query T
select UNNEST(string_split('abc', ''))
----
a
b
c
query T
select UNNEST(string_split_regex('abc', '(|abc)'))
----
a
b
c
query T
select UNNEST(string_split_regex('abc', '(abc|)'))
----
(empty)
(empty)
query T
select UNNEST(string_split('abc', ','))
----
abc
query T
select UNNEST(string_split_regex('abc', '(,|abc)'))
----
(empty)
(empty)
query T
select UNNEST(string_split_regex('abc', '(abc|,)'))
----
(empty)
(empty)
query T
select UNNEST(string_split('1,2,3,4,,6', ','))
----
1
2
3
4
(empty)
6
query T
select UNNEST(string_split_regex('1,2,3,4,,6', '(,|)'))
----
1
(empty)
2
(empty)
3
(empty)
4
(empty)
(empty)
6
query T
select UNNEST(string_split_regex('1,2,3,4,,6', '(|,)'))
----
1
,
2
,
3
,
4
,
,
6
query T
select UNNEST(string_split_regex('1,2,3,4,*,6', '(,|\*)'))
----
1
2
3
4
(empty)
(empty)
6
query T
select UNNEST(string_split_regex('1,2,3,4,*,6', '(\*|,)'))
----
1
2
3
4
(empty)
(empty)
6
# test incorrect usage
statement error
select string_split()
----
<REGEX>:.*Binder Error.*No function matches.*
statement error
select string_split('a')
----
<REGEX>:.*Binder Error.*No function matches.*
# incorrect regex
statement error
SELECT string_split_regex(a, '[') FROM test ORDER BY a;
----
<REGEX>:.*Catalog Error.*does not exist.*

View File

@@ -0,0 +1,18 @@
# name: test/sql/function/string/test_string_split_large.test_slow
# description: String split test with many strings
# group: [string]
query I
SELECT UNNEST(string_split(string_agg(range, 'DUCK '), ' ')) AS s, mod(range, 100) xx FROM range(50000) GROUP BY xx ORDER BY s
----
100000 values hashing to 4df57751b24295162836fcb48d04aa04
query I
SELECT UNNEST(string_split(string_agg(range, '🦆 '), ' ')) AS s, mod(range, 100) xx FROM range(50000) GROUP BY xx ORDER BY s
----
100000 values hashing to 6a8c3f073a6f4bb5c62cb51ef2389dcf
query I
SELECT UNNEST(string_split_regex(string_agg(range, 'DUCK '), '\s')) AS s, mod(range, 100) xx FROM range(50000) GROUP BY xx ORDER BY s
----
100000 values hashing to 4df57751b24295162836fcb48d04aa04

View File

@@ -0,0 +1,136 @@
# name: test/sql/function/string/test_subscript.test
# description: Substring test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1), ('world', 2), ('b', 1), (NULL, 2)
# test direct subscript
query TT
SELECT '🦆ab'[1], 'abc'[2]
----
🦆 b
# constant offset/length
# normal array_extract
query T
SELECT s[2] FROM strings
----
e
o
(empty)
NULL
# array_extract out of range
query T
SELECT s[3] FROM strings
----
l
r
(empty)
NULL
# variable length offset/length
query T
SELECT s[off] FROM strings
----
h
o
b
NULL
query T
SELECT s[2] FROM strings
----
e
o
(empty)
NULL
query T
SELECT 'hello'[off] FROM strings
----
h
e
h
e
# test substrings with constant nulls in different places
statement error
SELECT NULL::VARCHAR[off] FROM strings
----
query T
SELECT 'hello'[NULL] FROM strings
----
NULL
NULL
NULL
NULL
statement error
SELECT NULL::VARCHAR[NULL] FROM strings
----
statement error
SELECT NULL::VARCHAR[off] FROM strings
----
statement error
SELECT NULL::VARCHAR[NULL] FROM strings
----
# negative offset
query T
SELECT s[-1] FROM strings
----
o
d
b
NULL
# zero offset
query T
SELECT s[1] FROM strings
----
h
w
b
NULL
# length 0
query T
SELECT s[6] FROM strings
----
(empty)
(empty)
(empty)
NULL
# very large offset and length
query T
SELECT s[2147483646] FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT s[-2147483647] FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT ([1,2,3])[-2147483647]
----
NULL

View File

@@ -0,0 +1,303 @@
# name: test/sql/function/string/test_substring.test
# description: Substring test
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);
statement ok
INSERT INTO strings VALUES ('hello', 1, 2), ('world', 2, 3), ('b', 1, 1), (NULL, 2, 2)
foreach FUN substring substring_grapheme
# test zero length
query TT
SELECT ${FUN}('🦆ab', 1, 0), ${FUN}('abc', 1, 0)
----
(empty) (empty)
# constant offset/length
# normal substring
query T
SELECT substring(s from 1 for 2) FROM strings
----
he
wo
b
NULL
# substring out of range
query T
SELECT substring(s from 2 for 2) FROM strings
----
el
or
(empty)
NULL
# variable length offset/length
query T
SELECT substring(s from off for length) FROM strings
----
he
orl
b
NULL
query T
SELECT substring(s from off for 2) FROM strings
----
he
or
b
NULL
query T
SELECT substring(s from 1 for length) FROM strings
----
he
wor
b
NULL
query T
SELECT substring('hello' from off for length) FROM strings
----
he
ell
h
el
# test substrings with constant nulls in different places
query T
SELECT substring(NULL from off for length) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring('hello' from NULL for length) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring('hello' from off for NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring(NULL from NULL for length) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring('hello' from NULL for NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring(NULL from off for NULL) FROM strings
----
NULL
NULL
NULL
NULL
query T
SELECT substring(NULL from NULL for NULL) FROM strings
----
NULL
NULL
NULL
NULL
# fixed slice
query T
SELECT substring(s from -2 for 2) FROM strings
----
lo
ld
b
NULL
# zero offset (this is accepted by SQLite)
query T
SELECT substring(s from 0 for length) FROM strings
----
h
wo
(empty)
NULL
# negative length
query T
SELECT ${FUN}(s, 2, -2) FROM strings
----
h
w
b
NULL
# negative offset and negative length
query T
SELECT ${FUN}(s, -2, -2) FROM strings
----
el
or
(empty)
NULL
# length 0
query T
SELECT ${FUN}(s, 2, 0) FROM strings
----
(empty)
(empty)
(empty)
NULL
# no length
query T
SELECT ${FUN}(s, 2) FROM strings
----
ello
orld
(empty)
NULL
query T
SELECT ${FUN}(${FUN}(s, 2), 2) FROM strings
----
llo
rld
(empty)
NULL
# very large offset and length
query T
SELECT ${FUN}(s, 2147483647, 2147483647) FROM strings
----
(empty)
(empty)
(empty)
NULL
query T
SELECT ${FUN}(s, 2147483647, -2147483648) FROM strings
----
hello
world
b
NULL
query T
SELECT ${FUN}(s, -2147483647, 2147483647) FROM strings
----
hello
world
b
NULL
query T
SELECT ${FUN}(s, -2147483648, -2147483648) FROM strings
----
(empty)
(empty)
(empty)
NULL
# Issue #2553 - accept BIGINT arguments
query I
SELECT ${FUN}('abc', INSTR('abc', 'b'));
----
bc
# Issue #4978 - substring integer overflow
query I
SELECT ${FUN}('a', -1)
----
a
query I
SELECT ${FUN}('abcd', -1)
----
d
query I
SELECT ${FUN}('abcd', -7)
----
abcd
# Even tough we accept bigints, we don't allow offsets and lengths larger than
# a 32-bit integer, since we need to be able to do the internal resulting string
# length calculations within a 64-bit integer to avoid overflows.
statement error
SELECT ${FUN}(s, 9223372036854775807, -9223372036854775808) FROM strings
----
Out of Range Error: Substring offset outside of supported range (> 4294967295)
statement error
SELECT ${FUN}(s, -9223372036854775808, -9223372036854775808) FROM strings
----
Out of Range Error: Substring offset outside of supported range (< -4294967296)
statement error
SELECT ${FUN}(s, 9223372036854775807, 9223372036854775807) FROM strings
----
Out of Range Error: Substring offset outside of supported range (> 4294967295)
statement error
SELECT ${FUN}(s, -9223372036854775808, 9223372036854775807) FROM strings
----
Out of Range Error: Substring offset outside of supported range (< -4294967296)
statement error
SELECT ${FUN}(s, 0, 9223372036854775807) FROM strings
----
Out of Range Error: Substring length outside of supported range (> 4294967295)
statement error
SELECT ${FUN}(s, 0, -9223372036854775808) FROM strings
----
Out of Range Error: Substring length outside of supported range (< -4294967296)
# int32_t limits
statement error
SELECT ${FUN}(s, 4294967296, 2147483647) FROM strings
----
Out of Range Error: Substring offset outside of supported range (> 4294967295)
statement error
SELECT ${FUN}(s, -4294967297, 2147483647) FROM strings
----
Out of Range Error: Substring offset outside of supported range (< -4294967296)
statement error
SELECT ${FUN}(s, 0, 4294967296) FROM strings
----
Out of Range Error: Substring length outside of supported range (> 4294967295)
statement error
SELECT ${FUN}(s, 0, -4294967297) FROM strings
----
Out of Range Error: Substring length outside of supported range (< -4294967296)
endloop

View File

@@ -0,0 +1,63 @@
# name: test/sql/function/string/test_substring_utf8.test
# description: Substring test with UTF8
# group: [string]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('twoñthree₡four🦆end')
query T
SELECT substring(s from 1 for 7) FROM strings
----
twoñthr
query T
SELECT substring(s from 10 for 7) FROM strings
----
four🦆e
query T
SELECT substring(s from 15 for 7) FROM strings
----
🦆end
foreach FUN substr substring_grapheme
# negative lengths and offsets
query T
SELECT ${FUN}(s, -4, 4) FROM strings
----
🦆end
query T
SELECT ${FUN}(s, -1, -4) FROM strings
----
r🦆en
query T
SELECT ${FUN}(s, 0, -4) FROM strings
----
(empty)
query T
SELECT ${FUN}(s, 0, 5) FROM strings
----
twoñ
query T
SELECT ${FUN}(s, 5, -5) FROM strings
----
twoñ
# length is optional
query T
SELECT ${FUN}(s, 5) FROM strings
----
threefour🦆end
endloop

View File

@@ -0,0 +1,232 @@
# name: test/sql/function/string/test_suffix.test
# description: Suffix test
# group: [string]
statement ok
PRAGMA enable_verification
# Short string (4bytes)
query T
SELECT suffix('abcd', 'd')
----
1
query T
SELECT suffix('abcd', 'cd')
----
1
query T
SELECT suffix('abcd', 'bcd')
----
1
query T
SELECT suffix('abcd', 'abcd')
----
1
query T
SELECT suffix('abcd', 'X')
----
0
# Medium string (8bytes)
query T
SELECT suffix('abcdefgh', 'h')
----
1
query T
SELECT suffix('abcdefgh', 'gh')
----
1
query T
SELECT suffix('abcdefgh', 'fgh')
----
1
query T
SELECT suffix('abcdefgh', 'efgh')
----
1
query T
SELECT suffix('abcdefgh', 'defgh')
----
1
query T
SELECT suffix('abcdefgh', 'X')
----
0
query T
SELECT suffix('abcdefgh', 'abcdefgh')
----
1
# Long string (> 15bytes)
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'z')
----
1
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'yz')
----
1
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'xyz')
----
1
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'wxyz')
----
1
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'vwxyz')
----
1
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'X')
----
0
query T
SELECT suffix('abcdefghijklmnopqrstuvwxyz', 'defghijklmnopqrstuvwxyz')
----
1
# Empty string and suffix
query T
SELECT suffix('', 'aaa')
----
0
query T
SELECT suffix('aaa', '')
----
1
# NULL string and suffix
query T
SELECT suffix(NULL, 'aaa')
----
NULL
query T
SELECT suffix('aaa', NULL)
----
NULL
query T
SELECT suffix(NULL, NULL)
----
NULL
# Suffix test with UTF8
# inverse "átomo" (atom)
query T
SELECT suffix('omotá', 'á')
----
1
query T
SELECT suffix('omotá', 'á')
----
1
query T
SELECT suffix('omotá', 'a')
----
0
# inverse "olá mundo" (hello world)
query T
SELECT suffix('mundo olá', 'olá')
----
1
query T
SELECT suffix('mundo olá', 'olá')
----
1
query T
SELECT suffix('mundo olá', 'mundo olá')
----
1
query T
SELECT suffix('mundo olá', 'ola')
----
0
# eftñ
query T
SELECT suffix('eftñ', 'ñ')
----
1
query T
SELECT suffix('ñeft', 'ñeft')
----
1
query T
SELECT suffix('ñeft', 'neft')
----
0
# two ñ three four 🦆 end
query T
SELECT suffix('two ñ three ₡ four 🦆 end', '🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', '🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'three ₡ four 🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'three ₡ four 🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'two ñ three ₡ four 🦆 end')
----
1
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'two n three ₡ four 🦆 end')
----
0
query T
SELECT suffix('two ñ three ₡ four 🦆 end', 'XXXtwo ñ three ₡ four 🦆 end')
----
0

View File

@@ -0,0 +1,248 @@
# name: test/sql/function/string/test_to_base.test
# description: to_base tests
# group: [string]
statement ok
PRAGMA enable_verification
statement error
SELECT to_base(-10, 2)
----
Invalid Input Error: 'to_base' number must be greater than or equal to 0
statement error
SELECT to_base(-10, 2, 64)
----
Invalid Input Error: 'to_base' number must be greater than or equal to 0
statement error
SELECT to_base(10, 1)
----
radix must be between 2 and 36
statement error
SELECT to_base(10, 37)
----
radix must be between 2 and 36
statement error
SELECT to_base(10, 0, 10)
----
radix must be between 2 and 36
statement error
SELECT to_base(10, 37, 10)
----
radix must be between 2 and 36
statement error
SELECT to_base(10, 2, -10)
----
min_length must be between 0 and 64
# Basic checks
query I
SELECT to_base(10, 2)
----
1010
query I
SELECT to_base(10, 2, 64)
----
0000000000000000000000000000000000000000000000000000000000001010
query I
SELECT to_base(10, 3)
----
101
query I
SELECT to_base(10, 16)
----
A
query I
SELECT to_base(10, 36)
----
A
query I
SELECT to_base(42, 36)
----
16
# Check some ranges
query IIIIII
SELECT
to_base(range, 2),
to_base(range, 2, 8),
to_base(range, 16),
to_base(range, 16, 2),
to_base(range, 36),
to_base(range, 36, 2)
FROM range(1, 43)
ORDER BY range
----
1 00000001 1 01 1 01
10 00000010 2 02 2 02
11 00000011 3 03 3 03
100 00000100 4 04 4 04
101 00000101 5 05 5 05
110 00000110 6 06 6 06
111 00000111 7 07 7 07
1000 00001000 8 08 8 08
1001 00001001 9 09 9 09
1010 00001010 A 0A A 0A
1011 00001011 B 0B B 0B
1100 00001100 C 0C C 0C
1101 00001101 D 0D D 0D
1110 00001110 E 0E E 0E
1111 00001111 F 0F F 0F
10000 00010000 10 10 G 0G
10001 00010001 11 11 H 0H
10010 00010010 12 12 I 0I
10011 00010011 13 13 J 0J
10100 00010100 14 14 K 0K
10101 00010101 15 15 L 0L
10110 00010110 16 16 M 0M
10111 00010111 17 17 N 0N
11000 00011000 18 18 O 0O
11001 00011001 19 19 P 0P
11010 00011010 1A 1A Q 0Q
11011 00011011 1B 1B R 0R
11100 00011100 1C 1C S 0S
11101 00011101 1D 1D T 0T
11110 00011110 1E 1E U 0U
11111 00011111 1F 1F V 0V
100000 00100000 20 20 W 0W
100001 00100001 21 21 X 0X
100010 00100010 22 22 Y 0Y
100011 00100011 23 23 Z 0Z
100100 00100100 24 24 10 10
100101 00100101 25 25 11 11
100110 00100110 26 26 12 12
100111 00100111 27 27 13 13
101000 00101000 28 28 14 14
101001 00101001 29 29 15 15
101010 00101010 2A 2A 16 16
# Check some fib numbers
statement ok
CREATE TABLE fib AS SELECT * FROM (VALUES
(0),
(1),
(1),
(2),
(3),
(5),
(8),
(13),
(21),
(34),
(55),
(89),
(144),
(233),
(377),
(610),
(987),
(1597),
(2584),
(4181),
(6765),
(10946),
(17711),
(28657),
(46368)
)
query I
SELECT to_base(col0, 2) FROM fib ORDER BY col0;
----
0
1
1
10
11
101
1000
1101
10101
100010
110111
1011001
10010000
11101001
101111001
1001100010
1111011011
11000111101
101000011000
1000001010101
1101001101101
10101011000010
100010100101111
110111111110001
1011010100100000
query I
SELECT to_base(col0, 16) FROM fib ORDER BY col0;
----
0
1
1
2
3
5
8
D
15
22
37
59
90
E9
179
262
3DB
63D
A18
1055
1A6D
2AC2
452F
6FF1
B520
query I
SELECT to_base(col0, 36) FROM fib ORDER BY col0;
----
0
1
1
2
3
5
8
D
L
Y
1J
2H
40
6H
AH
GY
RF
18D
1ZS
385
57X
8G2
DNZ
M41
ZS0

View File

@@ -0,0 +1,140 @@
# name: test/sql/function/string/test_translate.test
# description: TRANSLATE test
# group: [string]
statement ok
PRAGMA enable_verification
# test translate on NULLs
query T
select TRANSLATE('This is the main test string', NULL, 'ALT')
----
NULL
query T
select TRANSLATE(NULL, 'main', 'ALT')
----
NULL
query T
select TRANSLATE('This is the main test string', 'main', NULL)
----
NULL
# test translate on scalars
query T
select TRANSLATE('12', '2', 'a')
----
1a
query T
select TRANSLATE('abcde', 'abcde', 'fghij')
----
fghij
query T
select TRANSLATE('abcde', 'aabcc', '14235')
----
123de
query T
select TRANSLATE('https://dxyzdb.org', 'zyx.orghttps:/', 'kcu')
----
duckdb
query T
select TRANSLATE('12345', '14367', 'ax')
----
a2x5
query T
select TRANSLATE('hacco worcdxxx', 'acx2', 'el')
----
hello world
query T
select TRANSLATE('hacCo worcd', 'acC', 'ellaabb')
----
hello world
query T
select TRANSLATE('RÄcks', 'Ä', 'A')
----
RAcks
query T
select TRANSLATE('🦆', '🦆', 'D')
----
D
query T
select TRANSLATE('MotörHeadΩ', 'aeΩ', '')
----
MotörHÄed
query T
select TRANSLATE('This is 🐱.', '🐱', '🦆')
----
This is 🦆.
query T
select TRANSLATE('Äañt₡Xá你好世界我', 'ñá世界我xyz', 'naDBá')
----
ÄantXa你好DBá
# test translate on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('Hello', 'World'), ('HuLlD', NULL), ('MotörHead','RÄcks'), ('', NULL), ('Hi', '🦆')
query T
select TRANSLATE(a, 'öHl', 'oA-') FROM strings
----
Ae--o
AuL-D
MotorAead
(empty)
Ai
query T
select TRANSLATE(a, 'loD', '🦆') FROM strings
----
He🦆🦆
HuL🦆
MtörHead
(empty)
Hi
query T
select TRANSLATE(b, 'ÄW🦆l', 'ow🐱') FROM strings
----
word
NULL
Rocks
NULL
🐱
query T
select TRANSLATE(a, 'oel', 'OEL') FROM strings WHERE b IS NOT NULL
----
HELLO
MOtörHEad
Hi
# test incorrect usage of translate
statement error
select TRANSLATE(1)
----
<REGEX>:.*Binder Error.*No function matches.*
statement error
select TRANSLATE(1, 2)
----
<REGEX>:.*Binder Error.*No function matches.*
statement error
select TRANSLATE(1, 2, 3, 4)
----
<REGEX>:.*Binder Error.*No function matches.*

View File

@@ -0,0 +1,141 @@
# name: test/sql/function/string/test_trim.test
# description: LTRIM/RTRIM/TRIM test
# group: [string]
statement ok
PRAGMA enable_verification
# test ltrim on scalars
query TTTTTTT
select LTRIM(''), LTRIM('Neither'), LTRIM(' Leading'), LTRIM('Trailing '), LTRIM(' Both '), LTRIM(NULL), LTRIM(' ')
----
(empty) Neither Leading Trailing Both NULL (empty)
# test rtrim on scalars
query TTTTTTT
select RTRIM(''), RTRIM('Neither'), RTRIM(' Leading'), RTRIM('Trailing '), RTRIM(' Both '), RTRIM(NULL), RTRIM(' ')
----
(empty) Neither Leading Trailing Both NULL (empty)
# test trim on scalars
query TTTTTTT
select TRIM(''), TRIM('Neither'), TRIM(' Leading'), TRIM('Trailing '), TRIM(' Both '), TRIM(NULL), TRIM(' ')
----
(empty) Neither Leading Trailing Both NULL (empty)
# test on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('', 'Neither'), (' Leading', NULL), (' Both ','Trailing '), ('', NULL)
query T
select LTRIM(a) FROM strings
----
(empty)
Leading
Both
(empty)
query T
select LTRIM(b) FROM strings
----
Neither
NULL
Trailing
NULL
query T
select LTRIM(a) FROM strings WHERE b IS NOT NULL
----
(empty)
Both
# test rtrim on tables
query T
select RTRIM(a) FROM strings
----
(empty)
Leading
Both
(empty)
query T
select RTRIM(b) FROM strings
----
Neither
NULL
Trailing
NULL
query T
select RTRIM(a) FROM strings WHERE b IS NOT NULL
----
(empty)
Both
# test ltrim/rtrim/trim with custom trim filter
query TTTTTTT
select LTRIM('', 'ho'), LTRIM('hello', 'ho'), LTRIM('papapapa', 'pa'), LTRIM('blaHblabla', 'bla'), LTRIM('blabla', NULL), LTRIM(NULL, 'blabla'), LTRIM('blabla', '')
----
(empty) ello (empty) Hblabla NULL NULL blabla
query TTTTTTT
select RTRIM('', 'ho'), RTRIM('hello', 'ho'), RTRIM('papapapa', 'pa'), RTRIM('blaHblabla', 'bla'), RTRIM('blabla', NULL), RTRIM(NULL, 'blabla'), RTRIM('blabla', '')
----
(empty) hell (empty) blaH NULL NULL blabla
query TTTTTTT
select TRIM('', 'ho'), TRIM('hello', 'ho'), TRIM('papapapa', 'pa'), TRIM('blaHblabla', 'bla'), TRIM('blabla', NULL), TRIM(NULL, 'blabla'), TRIM('blabla', '')
----
(empty) ell (empty) H NULL NULL blabla
# test on tables
statement ok
CREATE TABLE trim_test(a VARCHAR, b VARCHAR)
statement ok
INSERT INTO trim_test VALUES ('hello', 'ho'), ('test', 't'), ('mühleisen','mün'), (NULL, ' '), ('', NULL), ('', ''), (NULL, NULL)
query TTT
SELECT LTRIM(a, b), RTRIM(a, b), TRIM(a, b) FROM trim_test
----
ello hell ell
est tes es
hleisen mühleise hleise
NULL NULL NULL
NULL NULL NULL
(empty) (empty) (empty)
NULL NULL NULL
# test incorrect usage of ltrim/rtrim/trim
statement error
select LTRIM()
----
<REGEX>:.*Binder Error: No function matches.*ltrim().*
statement error
select LTRIM('hello', 'world', 'aaa')
----
<REGEX>:.*Binder Error: No function matches.*ltrim().*
statement error
select RTRIM()
----
<REGEX>:.*Binder Error: No function matches.*rtrim().*
statement error
select RTRIM('hello', 'world', 'aaa')
----
<REGEX>:.*Binder Error: No function matches.*rtrim().*
statement error
select TRIM()
----
Parser Error: syntax error at or near ")"
statement error
select TRIM('hello', 'world', 'aaa')
----
<REGEX>:.*Binder Error: No function matches.*trim().*

View File

@@ -0,0 +1,54 @@
# name: test/sql/function/string/test_unicode.test
# description: UNICODE test
# group: [string]
statement ok
PRAGMA enable_verification
# test on scalars
query IIIIII
select UNICODE(NULL), UNICODE(''), UNICODE('$'), UNICODE('¢'), UNICODE(''), UNICODE('𐍈')
----
NULL -1 36 162 8364 66376
# test on tables
statement ok
CREATE TABLE strings(a STRING, b STRING)
statement ok
INSERT INTO strings VALUES ('', 'Zero'), ('$', NULL), ('¢','Two'), ('', NULL), ('𐍈','Four')
query I
select UNICODE(a) FROM strings
----
-1
36
162
8364
66376
query I
select UNICODE(b) FROM strings
----
90
NULL
84
NULL
70
query I
select UNICODE(a) FROM strings WHERE b IS NOT NULL
----
-1
162
66376
# test incorrect usage
statement error
select UNICODE()
----
statement error
select UNICODE(1, 2)
----

View File

@@ -0,0 +1,39 @@
# name: test/sql/function/string/test_url_encode.test
# description: Test url_encode/url_decode
# group: [string]
statement ok
PRAGMA enable_verification
query II
SELECT url_encode(''), url_decode('')
----
(empty) (empty)
query II
SELECT url_encode(NULL), url_decode(NULL)
----
NULL NULL
query I
SELECT url_decode(url_encode('http://www.google.com/this is a long url'))
----
http://www.google.com/this is a long url
# verify round trips
query I
SELECT COUNT(*) from range(1000) t(n) WHERE url_decode(url_encode(chr(n::INT))) = chr(n::INT)
----
1000
# partial escapes and invalid escapes are included as literals
query IIII
SELECT url_decode('%'), url_decode('%5'), url_decode('%X'), url_decode('%%')
----
% %5 %X %%
# check invalid UTF8
statement error
select url_decode('%FF%FF%FF');
----
decoded value is invalid UTF8