should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,20 @@
# name: test/sql/collate/collate_filter_pushdown.test
# description: Test collation interacting with filter pushdown
# group: [collate]
statement ok
CREATE TABLE t0(c0 BOOLEAN, PRIMARY KEY(c0));
statement ok
CREATE TABLE t63(c0 VARCHAR COLLATE C, PRIMARY KEY(c0));
statement ok
insert into t0(c0) values (0.7);
statement ok
insert into t63(c0) values ('1');
query I
SELECT t63.c0 FROM t0 NATURAL LEFT JOIN t63;
----
1

View File

@@ -0,0 +1,35 @@
# name: test/sql/collate/collate_in_subquery.test
# description: Test collations with IN
# group: [collate]
# uncorrelated
query I
select 'ABC' collate nocase in (select 'aBc');
----
true
# correlated
query I
select upper in (select lower) from (values ('ABC' COLLATE nocase, 'aBc' COLLATE nocase)) t(upper, lower);
----
true
query I
select 'ABC' collate nocase in (select s) from (values ('aBc')) t(s);
----
true
query I
select 'ABC' in (select s collate nocase) from (values ('aBc')) t(s);
----
true
query I
select 'AB' collate nocase in (select 'AB');
----
true
query I
select 'Ab' collate nocase in (select 'Ab');
----
true

View File

@@ -0,0 +1,98 @@
# name: test/sql/collate/collate_like.test
# description: Test collations in the LIKE clause
# group: [collate]
statement ok
PRAGMA enable_verification
query I
SELECT 'a' LIKE 'A' COLLATE NOCASE
----
true
query I
SELECT 'a' NOT LIKE 'A' COLLATE NOCASE
----
false
query I
SELECT 'A' COLLATE NOCASE LIKE 'a'
----
true
# like optimization rules
query I
SELECT 'a' LIKE 'A%' COLLATE NOCASE
----
true
query I
SELECT 'A' COLLATE NOCASE LIKE '%A' COLLATE NOCASE
----
true
query I
SELECT 'a' COLLATE NOCASE LIKE '%A%' COLLATE NOCASE
----
true
query I
SELECT 'OX' COLLATE NOACCENT.NOCASE LIKE 'ö%'
----
true
#### Testing LIKE with collated columns
statement ok
CREATE TABLE t1 (c1 VARCHAR, pattern VARCHAR)
statement ok
INSERT INTO t1 VALUES('A', 'a'),('a', 'A'),('AAAA', 'AaAa'),('aaaa', 'baba')
query I
SELECT c1 FROM t1 WHERE c1 LIKE pattern
----
query I
SELECT c1 FROM t1 WHERE c1 LIKE pattern COLLATE NOCASE
----
A
a
AAAA
# like escape
query I
SELECT 'a%ö' COLLATE NOACCENT LIKE 'a$%ö' ESCAPE '$'
----
true
query I
SELECT 'a%ö' COLLATE NOACCENT NOT LIKE 'a$%ö' ESCAPE '$'
----
false
# ilike
query I
SELECT 'oX' ILIKE 'Ö%'
----
false
query I
SELECT 'OX' COLLATE NOACCENT ILIKE 'ö%'
----
true
query I
SELECT 'öX' COLLATE NOACCENT NOT ILIKE 'Ö%'
----
false
# glob
query I
SELECT 'oX' GLOB 'O*'
----
false
query I
SELECT 'oX' COLLATE NOCASE GLOB 'O*'
----
true

View File

@@ -0,0 +1,14 @@
# name: test/sql/collate/collate_non_varchar.test
# description: Test collation of non-varchar columns
# group: [collate]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA default_collation=NOCASE;
query I
select typeof(x) from (select 1::INT as x group by x);
----
INTEGER

View File

@@ -0,0 +1,37 @@
# name: test/sql/collate/collate_order_by_alias.test
# description: Test collation of ORDER BY with alias
# group: [collate]
statement ok
CREATE TABLE CreditCardTable(id BIGINT, creditCard_number VARCHAR);
statement ok
CREATE TABLE CustomerTable(id BIGINT, pk BIGINT);
statement ok
INSERT INTO CreditCardTable VALUES (1, 'A'), (2, 'z');
statement ok
INSERT INTO CustomerTable VALUES (1, 100), (2, 0);
query II
select
creditCard_number as "pk", CustomerTable.pk AS inner_pk
from
CreditCardTable JOIN CustomerTable USING (id)
order by
"pk" COLLATE NOCASE;
----
A 100
z 0
# positional reference
query II
select
creditCard_number as "pk", CustomerTable.pk AS inner_pk
from
CreditCardTable JOIN CustomerTable USING (id)
order by #1 COLLATE NOCASE;
----
A 100
z 0

View File

@@ -0,0 +1,14 @@
# name: test/sql/collate/collate_ordered_aggregate.test
# description: Test collation for ordered aggregates
# group: [collate]
statement ok
create table tbl(id int, val varchar);
statement ok
insert into tbl values (0, 'a'), (1, 'B');
query I
select list(id order by val collate nocase) from tbl;
----
[0, 1]

View File

@@ -0,0 +1,19 @@
# name: test/sql/collate/collate_subquery.test
# description: Test collate statement over a subquery
# group: [collate]
statement ok
create table t0(c1 varchar);
statement ok
insert into t0 values ('XXX');
query I
select (select c1 from t0) collate nocase;
----
XXX
query I
select (select c1 from t0) collate nocase='xxx';
----
true

View File

@@ -0,0 +1,106 @@
# name: test/sql/collate/icu_collation_propagation.test
# description: Test collations with string functions
# group: [collate]
statement ok
create table tbl (a varchar, b varchar);
statement ok
insert into tbl values ('ö', '>>>>>ö<<<<<'), ('o', '>>>>>o<<<<<'), ('p', '>>>>>p<<<<<');
require icu
# test propagation of collation through string functions
query I
select concat(a collate de, a) from tbl order by all;
----
oo
öö
pp
query I
select lower(a collate de) from tbl order by all;
----
o
ö
p
query I
select upper(a collate de) from tbl order by all;
----
O
Ö
P
query I
select trim(b collate de, '<>') from tbl order by all
----
o
ö
p
query I
select ltrim(b collate de, '<>') from tbl order by all
----
o<<<<<
ö<<<<<
p<<<<<
query I
select rtrim(b collate de, '<>') from tbl order by all
----
>>>>>o
>>>>>ö
>>>>>p
query I
select repeat(a collate de, 10) from tbl order by all;
----
oooooooooo
öööööööööö
pppppppppp
query I
select left(b collate de, 6) from tbl order by all;
----
>>>>>o
>>>>>ö
>>>>>p
query I
select right(b collate de, 6) from tbl order by all;
----
o<<<<<
ö<<<<<
p<<<<<
query I
select right(left(b collate de, 6), 1) from tbl order by all;
----
o
ö
p
query I
select reverse(a collate de) from tbl order by all;
----
o
ö
p
# test application of collation
query I
select a from tbl where contains(b collate de, 'o') order by all
----
o
query I
select a from tbl where starts_with(b collate de, '>>>>>o') order by all
----
o
query I
select a from tbl where b collate de like '%>o<%' order by all
----
o

View File

@@ -0,0 +1,69 @@
# name: test/sql/collate/test_collate_accent_insensitive.test
# description: Test accent insensitive collation
# group: [collate]
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOACCENT)
statement ok
INSERT INTO collate_test VALUES ('Mühleisen'), ('Hëllö')
# collate in equality
query T
SELECT * FROM collate_test WHERE s='Muhleisen'
----
Mühleisen
statement ok
SELECT * FROM collate_test WHERE s='mühleisen'
query T
SELECT * FROM collate_test WHERE s='Hello'
----
Hëllö
# join with collation
statement ok
CREATE TABLE collate_join_table(s VARCHAR, i INTEGER)
statement ok
INSERT INTO collate_join_table VALUES ('Hello', 1), ('Muhleisen', 3)
query TTI
SELECT collate_test.s, collate_join_table.s, i FROM collate_test JOIN collate_join_table ON (collate_test.s=collate_join_table.s) ORDER BY 1
----
Hëllö Hello 1
Mühleisen Muhleisen 3
statement ok
DROP TABLE collate_test
# ORDER BY with collation
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOACCENT)
statement ok
INSERT INTO collate_test VALUES ('Hällo'), ('Hallo'), ('Hello')
query T
SELECT * FROM collate_test ORDER BY s
----
Hällo
Hallo
Hello
# DISTINCT with collation
statement ok
DROP TABLE collate_test
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOACCENT)
statement ok
INSERT INTO collate_test VALUES ('Hällo'), ('Hallo')
query T
SELECT DISTINCT s FROM collate_test
----
Hällo

View File

@@ -0,0 +1,82 @@
# name: test/sql/collate/test_collate_and_grouping_sets.test
# description: Test collation and grouping sets.
# group: [collate]
require icu
statement ok
set default_collation=c;
statement ok
CREATE TABLE sales (
product_id INT,
region VARCHAR(50),
year INT,
amount_sold DECIMAL(10,2)
);
statement ok
INSERT INTO sales VALUES
(1, 'North', 2020, 1000.00),
(1, 'North', 2021, 1500.00),
(1, 'South', 2020, 800.00),
(1, 'South', 2021, 700.00),
(2, 'North', 2020, 500.00),
(2, 'North', 2021, 600.00),
(2, 'South', 2020, 400.00),
(2, 'South', 2021, 550.00);
statement ok
set default_collation=c;
query III nosort grouping_sets_collation_result
SELECT product_id, region, SUM(amount_sold) AS total_amount
FROM sales
GROUP BY GROUPING SETS ((product_id), (region), ())
ORDER BY product_id, region, total_amount;
statement ok
set default_collation=en_us;
query III nosort grouping_sets_collation_result
SELECT product_id, region, SUM(amount_sold) AS total_amount
FROM sales
GROUP BY GROUPING SETS ((product_id), (region), ())
ORDER BY product_id, region, total_amount;
statement ok
set default_collation=c
query III nosort union_groups_collation_result
select NULL product_id, region, sum(amount_sold) from sales group by region
UNION ALL
select NULL product_id, NULL region, sum(amount_sold) from sales
UNION ALL
select product_id, NULL region, sum(amount_sold) from sales group by product_id order by 1,2;
statement ok
set default_collation=en_us;
query III nosort union_groups_collation_result
select NULL product_id, region, sum(amount_sold) from sales group by region
UNION ALL
select NULL product_id, NULL region, sum(amount_sold) from sales
UNION ALL
select product_id, NULL region, sum(amount_sold) from sales group by product_id order by 1,2;
# also test that union all is the same as using grouping sets
query III nosort grouping_sets_collation
SELECT product_id, region, SUM(amount_sold) AS total_amount
FROM sales
GROUP BY GROUPING SETS ((product_id), (region), ())
ORDER BY product_id, region, total_amount;
query III nosort grouping_sets_collation
select NULL product_id, region, sum(amount_sold) from sales group by region
UNION ALL
select NULL product_id, NULL region, sum(amount_sold) from sales
UNION ALL
select product_id, NULL region, sum(amount_sold) from sales group by product_id order by 1,2;

View File

@@ -0,0 +1,37 @@
# name: test/sql/collate/test_collate_between.test
# description: Test accent insensitive collation
# group: [collate]
statement ok
CREATE TABLE collate_test(s VARCHAR, t VARCHAR)
statement ok
INSERT INTO collate_test VALUES ('mark', 'muhleisen')
query I
SELECT COUNT(*) FROM collate_test WHERE 'mórritz' BETWEEN s AND t
----
0
query I
SELECT COUNT(*) FROM collate_test WHERE 'mórritz' COLLATE NOACCENT BETWEEN s AND t
----
1
query I
SELECT COUNT(*) FROM collate_test WHERE 'mórritz' BETWEEN (s COLLATE NOACCENT) AND t
----
1
query I
SELECT COUNT(*) FROM collate_test WHERE 'mórritz' BETWEEN s AND (t COLLATE NOACCENT)
----
1
statement ok
PRAGMA default_collation='NOACCENT'
query I
SELECT COUNT(*) FROM collate_test WHERE 'mórritz' BETWEEN s AND t
----
1

View File

@@ -0,0 +1,83 @@
# name: test/sql/collate/test_collate_case_insensitive.test
# description: Test case insensitive collation
# group: [collate]
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOCASE)
statement ok
INSERT INTO collate_test VALUES ('hello'), ('WoRlD'), ('world'), ('Mühleisen')
# collate in equality
query T
SELECT * FROM collate_test WHERE s='HeLlo'
----
hello
query T
SELECT * FROM collate_test WHERE s='MÜhleisen'
----
Mühleisen
query T
SELECT * FROM collate_test WHERE s='world'
----
WoRlD
world
# collate in join
statement ok
CREATE TABLE collate_join_table(s VARCHAR, i INTEGER)
statement ok
INSERT INTO collate_join_table VALUES ('HeLlO', 1), ('mÜHLEISEN', 3)
query TTI
SELECT collate_test.s, collate_join_table.s, i FROM collate_test JOIN collate_join_table ON (collate_test.s=collate_join_table.s) ORDER BY i
----
hello HeLlO 1
Mühleisen mÜHLEISEN 3
statement ok
DROP TABLE collate_test
# ORDER BY with collation
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOCASE)
statement ok
INSERT INTO collate_test VALUES ('Hallo'), ('ham'), ('HELLO'), ('hElp')
query T
SELECT * FROM collate_test ORDER BY s
----
Hallo
ham
HELLO
hElp
statement ok
DROP TABLE collate_test
# DISTINCT with collation
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOCASE)
statement ok
INSERT INTO collate_test VALUES ('Hallo'), ('hallo')
query T
SELECT DISTINCT s FROM collate_test
----
Hallo
# LIKE with collation: not yet supported
# REQUIRE_NO_FAIL(con.Query("DROP TABLE collate_test"));
# REQUIRE_NO_FAIL(con.Query("CREATE TABLE collate_test(s VARCHAR COLLATE NOCASE)"));
# REQUIRE_NO_FAIL(con.Query("INSERT INTO collate_test VALUES ('Hallo'), ('hallo')"));
# result = con.Query("SELECT * FROM collate_test WHERE s LIKE 'h%'");
# REQUIRE(CHECK_COLUMN(result, 0, {"Hallo", "hallo"}));
# result = con.Query("SELECT * FROM collate_test WHERE s LIKE 'HA%'");
# REQUIRE(CHECK_COLUMN(result, 0, {"Hallo", "hallo"}));

View File

@@ -0,0 +1,56 @@
# name: test/sql/collate/test_collate_expression.test
# description: Test COLLATE in individual expressions
# group: [collate]
statement ok
CREATE TABLE collate_test(s VARCHAR)
statement ok
INSERT INTO collate_test VALUES ('hEllO'), ('WöRlD'), ('wozld')
query T
SELECT 'hëllo' COLLATE NOACCENT='hello'
----
1
query T
SELECT 'hëllo' COLLATE POSIX='hello'
----
0
query T
SELECT 'hëllo' COLLATE C='hello'
----
0
statement ok
SELECT * FROM collate_test WHERE s='hello'
query T
SELECT * FROM collate_test WHERE s='hello' COLLATE NOCASE
----
hEllO
query T
SELECT * FROM collate_test WHERE s COLLATE NOCASE='hello'
----
hEllO
statement error
SELECT * FROM collate_test WHERE s COLLATE NOCASE='hello' COLLATE NOACCENT
----
query T
SELECT * FROM collate_test ORDER BY s COLLATE NOCASE
----
hEllO
wozld
WöRlD
query T
SELECT * FROM collate_test ORDER BY s COLLATE NOCASE.NOACCENT
----
hEllO
WöRlD
wozld

View File

@@ -0,0 +1,11 @@
# name: test/sql/collate/test_collate_list.test
# description: Get list of collations
# group: [collate]
statement ok
PRAGMA collations
statement error
PRAGMA collations=3
----
<REGEX>:.*Catalog Error: unrecognized configuration parameter.*

View File

@@ -0,0 +1,118 @@
# name: test/sql/collate/test_collate_orderby_column_number.test
# description: Test collation on ORDER BY with column number
# group: [collate]
query I
SELECT 'a' AS c1 ORDER BY 1 COLLATE NOCASE;
----
a
query I
SELECT 'a' ORDER BY 1 COLLATE NOCASE;
----
a
query II
SELECT 'A', 'B' ORDER BY 2 COLLATE NOCASE;
----
A B
query I
SELECT 999::VARCHAR ORDER BY 1 COLLATE NOCASE;
----
999
statement error
SELECT 999 FROM ORDER BY 1 COLLATE NOCASE;
----
Parser Error: syntax error at or near "ORDER"
statement error
SELECT 'a' FROM ORDER BY -1 COLLATE NOCASE;
----
Parser Error: syntax error at or near "ORDER"
statement error
SELECT 'a' FROM ORDER BY 0 COLLATE NOCASE;
----
Parser Error: syntax error at or near "ORDER"
statement error
SELECT 'a' FROM ORDER BY -0 COLLATE NOCASE;
----
Parser Error: syntax error at or near "ORDER"
statement ok
CREATE TABLE collate_test(s VARCHAR);
statement ok
INSERT INTO collate_test VALUES ('ã'),('B'),('a'),('A');
query I
SELECT s FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
a
A
B
ã
query I
SELECT s FROM collate_test ORDER BY s COLLATE NOCASE;
----
a
A
B
ã
query I
SELECT CONCAT(s, s) FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
aa
AA
BB
ãã
query II
SELECT CONCAT(s, s) AS concat, concat FROM collate_test ORDER BY 2 COLLATE NOCASE;
----
aa aa
AA AA
BB BB
ãã ãã
query I
SELECT collate_test.s FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
a
A
B
ã
query I
SELECT CASE WHEN s[1]='a' THEN s ELSE NULL END FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
a
NULL
NULL
NULL
query I
SELECT (SELECT s) FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
a
A
B
ã
statement error
SELECT (SELECT s) AS c1 FROM collate_test ORDER BY c11 COLLATE NOCASE;
----
query I
SELECT concat('a', (SELECT s)) FROM collate_test ORDER BY 1 COLLATE NOCASE;
----
aa
aA
aB

View File

@@ -0,0 +1,57 @@
# name: test/sql/collate/test_collate_pivot.test
# description: Test collation and PIVOT
# group: [collate]
statement ok
PRAGMA default_collation=NOACCENT;
statement ok
CREATE TABLE Cities (
Country VARCHAR, Name VARCHAR, Year INTEGER, Population INTEGER
);
statement ok
INSERT INTO Cities VALUES ('NL', 'Amsterdam', 2010, 1005);
statement ok
INSERT INTO Cities VALUES ('NL', 'Amsterdam', 2011, 1065);
statement ok
INSERT INTO Cities VALUES ('NL', 'Amsterdam', 2012, 1158);
statement ok
INSERT INTO Cities VALUES ('US', 'Seattle', 2013, 564);
statement ok
INSERT INTO Cities VALUES ('US', 'Seattle', 2014, 608);
statement ok
INSERT INTO Cities VALUES ('US', 'Seattle', 2015, 738);
statement ok
INSERT INTO Cities VALUES ('US', 'New York City', 2016, 8015);
statement ok
INSERT INTO Cities VALUES ('US', 'New York City', 2017, 8175);
statement ok
INSERT INTO Cities VALUES ('US', 'New York City', 2018, 8772);
statement ok
INSERT INTO Cities VALUES ('US', 'New York City', 2019, 8772);
statement ok
INSERT INTO Cities VALUES ('US', 'New York City', 2020, 8772);
statement ok
SET pivot_filter_threshold=99
loop i 0 2
statement ok
PIVOT Cities ON Year USING sum(Population);
statement ok
SET pivot_filter_threshold=0
endloop

View File

@@ -0,0 +1,116 @@
# name: test/sql/collate/test_collation_propagation.test
# description: Test collations with string functions.
# group: [collate]
statement ok
create table tbl (a varchar, b varchar);
statement ok
insert into tbl values ('ö', '>>>>>ö<<<<<'), ('o', '>>>>>o<<<<<'), ('p', '>>>>>p<<<<<');
query I
select a from tbl where contains(b collate nocase, 'O') order by all
----
o
# test propagation of collation through string functions
query I
select concat(a collate noaccent, a) from tbl order by all;
----
öö
oo
pp
query I
select lower(a collate noaccent) from tbl order by all;
----
ö
o
p
query I
select upper(a collate noaccent) from tbl order by all;
----
Ö
O
P
query I
select trim(b collate noaccent, '<>') from tbl order by all
----
ö
o
p
query I
select ltrim(b collate noaccent, '<>') from tbl order by all
----
ö<<<<<
o<<<<<
p<<<<<
query I
select rtrim(b collate noaccent, '<>') from tbl order by all
----
>>>>>ö
>>>>>o
>>>>>p
query I
select repeat(a collate noaccent, 10) from tbl order by all;
----
öööööööööö
oooooooooo
pppppppppp
query I
select left(b collate noaccent, 6) from tbl order by all;
----
>>>>>ö
>>>>>o
>>>>>p
query I
select right(b collate noaccent, 6) from tbl order by all;
----
ö<<<<<
o<<<<<
p<<<<<
query I
select right(left(b collate noaccent, 6), 1) from tbl order by all;
----
ö
o
p
query I
select reverse(a collate noaccent) from tbl order by all;
----
ö
o
p
# test pushing collations
query I
select a from tbl where contains(b collate noaccent, 'o') order by all
----
o
ö
query I
select a from tbl where contains(b, 'ö' collate noaccent) order by all
----
o
ö
query I
select a from tbl where contains(b collate nocase, 'O') order by all
----
o
query I
select a from tbl where starts_with(b collate noaccent, '>>>>>o') order by all
----
o
ö

View File

@@ -0,0 +1,25 @@
# name: test/sql/collate/test_combined_collation.test
# description: Test combined collations
# group: [collate]
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOACCENT.NOCASE)
statement ok
INSERT INTO collate_test VALUES ('Mühleisen'), ('Hëllö')
query T
SELECT * FROM collate_test WHERE s='Muhleisen'
----
Mühleisen
query T
SELECT * FROM collate_test WHERE s='muhleisen'
----
Mühleisen
query T
SELECT * FROM collate_test WHERE s='hEllô'
----
Hëllö

View File

@@ -0,0 +1,40 @@
# name: test/sql/collate/test_default_collations.test
# description: Test default collations
# group: [collate]
statement ok
PRAGMA default_collation='NOCASE'
statement ok
CREATE TABLE collate_test(s VARCHAR)
statement ok
INSERT INTO collate_test VALUES ('hEllO'), ('WöRlD'), ('wozld')
query I
SELECT COUNT(*) FROM collate_test WHERE 'BlA'='bLa'
----
3
query T
SELECT * FROM collate_test WHERE s='hello'
----
hEllO
query T
SELECT * FROM collate_test ORDER BY s
----
hEllO
wozld
WöRlD
statement ok
PRAGMA default_collation='NOCASE.NOACCENT'
query T
SELECT * FROM collate_test ORDER BY s
----
hEllO
WöRlD
wozld

View File

@@ -0,0 +1,152 @@
# name: test/sql/collate/test_icu_collate.test
# description: Test basic ICU extension usage
# group: [collate]
require icu
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('Gabel'), ('Göbel'), ('Goethe'), ('Goldmann'), ('Göthe'), ('Götz');
# ordering
query T
SELECT * FROM strings ORDER BY s COLLATE de;
----
Gabel
Göbel
Goethe
Goldmann
Göthe
Götz
# range filter
query T
SELECT * FROM strings WHERE 'Goethe' > s COLLATE de ORDER BY 1
----
Gabel
Göbel
# default binary collation, Göbel is not smaller than Gabel in UTF8 encoding
query T
SELECT * FROM strings WHERE 'Goethe' > s ORDER BY 1
----
Gabel
# we can also combine this collation with NOCASE
query T
SELECT * FROM strings WHERE 'goethe' > s COLLATE de.NOCASE ORDER BY 1
----
Gabel
Göbel
query T
SELECT * FROM strings WHERE 'goethe' > s COLLATE NOCASE.de ORDER BY 1
----
Gabel
Göbel
# and with NOACCENT
query I
SELECT * FROM strings WHERE 'goethe' > s COLLATE NOACCENT.de ORDER BY 1
----
Gabel
Göbel
# japanese collation
statement ok
DELETE FROM strings
statement ok
INSERT INTO strings VALUES ('賃貸人側連絡先 (Lessor side contact)'), ('賃借人側連絡先 (Lessee side contact)'), ('解約連絡先 (Termination contacts)'), ('更新連絡先 (Update contact)')
query T
SELECT * FROM strings ORDER BY s
----
(Update contact)
(Termination contacts)
(Lessee side contact)
(Lessor side contact)
query T
SELECT * FROM strings ORDER BY s COLLATE ja.NOCASE
----
(Termination contacts)
(Update contact)
(Lessee side contact)
(Lessor side contact)
# test icu_sort_key function
statement ok
select icu_sort_key('Ş', 'ro');
statement error
SELECT icu_sort_key('goose', 'DUCK_DUCK_ENUM');
----
Invalid Input Error
statement ok
select icu_sort_key('æ', 'icu_noaccent');
statement ok
select icu_sort_key('Æ', 'icu_noaccent');
# issue duckdb/duckdb#9692
query I
select chr(2*16*256+1*256+2*16+11) collate da =chr(12*16+5) collate da;
----
True
query I
select icu_sort_key(chr(2*16*256+1*256+2*16+11),'da')=icu_sort_key(chr(12*16+5),'da');
----
True
query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE
query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE
query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate da;
----
2
query I
select nfc_normalize(chr(2*16*256+1*256+2*16+11))=nfc_normalize(chr(12*16+5));
----
TRUE
query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate nfc;
----
2
# ICU noaccent collate
statement ok
CREATE TABLE t1 (c1 CHAR(10))
statement ok
INSERT INTO t1 VALUES('z'),('Z'),('a'),('A'),('æ'),('Æ'),('à'),('À'),('á'),('Á'),('â'),('Â'),
('ã'),('Ã'),('ä'),('Ä'),('å'),('Å'),('b'),('B')
query I
SELECT GROUP_CONCAT(c1, '') as group_c1 FROM t1 GROUP BY c1 COLLATE "NOCASE.ICU_NOACCENT" ORDER BY group_c1 COLLATE "NOCASE.ICU_NOACCENT"
----
aAàÀáÁâÂãÃäÄåÅ
æÆ
bB
zZ
statement ok
SELECT 'Á' COLLATE "ICU_NOACCENT.NOACCENT"
statement ok
SELECT 'Á' COLLATE "NOACCENT.ICU_NOACCENT"

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,176 @@
# name: test/sql/collate/test_strpos_collate.test
# description: Test that strpos/instr/position functions properly support COLLATE NOCASE
# group: [collate]
# Test that strpos works with COLLATE NOCASE
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'el')
----
2
# Test with different case combinations
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'EL')
----
2
query I
SELECT strpos('hello' COLLATE NOCASE, 'EL')
----
2
query I
SELECT strpos('HeLLo' COLLATE NOCASE, 'el')
----
2
# Test instr function (alias for strpos)
query I
SELECT instr('HELLO' COLLATE NOCASE, 'el')
----
2
query I
SELECT instr('hello' COLLATE NOCASE, 'EL')
----
2
# Test position function (alias for strpos)
query I
SELECT position('el' IN ('HELLO' COLLATE NOCASE))
----
2
query I
SELECT position('EL' IN ('hello' COLLATE NOCASE))
----
2
# Test edge cases
query I
SELECT strpos('HELLO' COLLATE NOCASE, '')
----
1
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'xyz')
----
0
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'HELLO')
----
1
# Test with longer strings
query I
SELECT strpos('Hello World' COLLATE NOCASE, 'world')
----
7
query I
SELECT strpos('HELLO WORLD' COLLATE NOCASE, 'o w')
----
5
# Test that other string functions still work (regression test)
query T
SELECT contains('HELLO' COLLATE NOCASE, 'hEllO')
----
true
query T
SELECT starts_with('HELLO' COLLATE NOCASE, 'heL')
----
true
# Test with table data
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOCASE)
statement ok
INSERT INTO collate_test VALUES ('Hello World'), ('HELLO WORLD'), ('hElLo WoRlD')
# Test basic functionality
query I
SELECT strpos(s COLLATE NOCASE, 'hello') FROM collate_test ORDER BY s
----
1
1
1
query I
SELECT strpos(s COLLATE NOCASE, 'world') FROM collate_test ORDER BY s
----
7
7
7
# Test with mixed collations
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'el' COLLATE NOCASE)
----
2
query I
SELECT strpos('HELLO' COLLATE NOCASE, 'EL')
----
2
# Test that non-collated versions still work
query I
SELECT strpos('HELLO', 'el')
----
0
query I
SELECT strpos('HELLO', 'EL')
----
2
# Test with empty strings and edge cases
query I
SELECT strpos('' COLLATE NOCASE, '')
----
1
query I
SELECT strpos('' COLLATE NOCASE, 'a')
----
0
query I
SELECT strpos('a' COLLATE NOCASE, '')
----
1
# Test with special characters
query I
SELECT strpos('HéLLO' COLLATE NOCASE, 'éll')
----
2
query I
SELECT strpos('HÉLLO' COLLATE NOCASE, 'éll')
----
2
# Test that the fix doesn't break existing behavior
query I
SELECT strpos('HELLO', 'HELLO')
----
1
query I
SELECT strpos('HELLO', '')
----
1
query I
SELECT strpos('HELLO', 'xyz')
----
0
# Clean up
statement ok
DROP TABLE collate_test

View File

@@ -0,0 +1,34 @@
# name: test/sql/collate/test_unsupported_collations.test
# description: Test unsupported collations
# group: [collate]
# unrecognized collation
statement error
CREATE TABLE collate_test(s VARCHAR COLLATE blabla)
----
Catalog Error: Collation with name blabla does not exist!
# non-varchar columns cannot have collations
statement error
CREATE TABLE collate_test(s INTEGER COLLATE blabla)
----
Parser Error: Only VARCHAR columns can have collations!
# we can combine multiple of the same collation
statement ok
CREATE TABLE collate_test(s VARCHAR COLLATE NOACCENT.NOACCENT)
statement error
CREATE TABLE collate_test(s VARCHAR COLLATE 1)
----
Parser Error: syntax error at or near "1"
statement error
CREATE TABLE collate_test(s VARCHAR COLLATE 'hello')
----
Parser Error: syntax error at or near "'hello'"
statement error
PRAGMA default_collation='blabla'
----
Catalog Error: Collation with name blabla does not exist!