Files
email-tracker/external/duckdb/test/sql/collate/test_icu_collate.test
2025-10-24 19:21:19 -05:00

153 lines
3.2 KiB
SQL

# name: test/sql/collate/test_icu_collate.test
# description: Test basic ICU extension usage
# group: [collate]
require icu
statement ok
CREATE TABLE strings(s VARCHAR);
statement ok
INSERT INTO strings VALUES ('Gabel'), ('Göbel'), ('Goethe'), ('Goldmann'), ('Göthe'), ('Götz');
# ordering
query T
SELECT * FROM strings ORDER BY s COLLATE de;
----
Gabel
Göbel
Goethe
Goldmann
Göthe
Götz
# range filter
query T
SELECT * FROM strings WHERE 'Goethe' > s COLLATE de ORDER BY 1
----
Gabel
Göbel
# default binary collation, Göbel is not smaller than Gabel in UTF8 encoding
query T
SELECT * FROM strings WHERE 'Goethe' > s ORDER BY 1
----
Gabel
# we can also combine this collation with NOCASE
query T
SELECT * FROM strings WHERE 'goethe' > s COLLATE de.NOCASE ORDER BY 1
----
Gabel
Göbel
query T
SELECT * FROM strings WHERE 'goethe' > s COLLATE NOCASE.de ORDER BY 1
----
Gabel
Göbel
# and with NOACCENT
query I
SELECT * FROM strings WHERE 'goethe' > s COLLATE NOACCENT.de ORDER BY 1
----
Gabel
Göbel
# japanese collation
statement ok
DELETE FROM strings
statement ok
INSERT INTO strings VALUES ('賃貸人側連絡先 (Lessor side contact)'), ('賃借人側連絡先 (Lessee side contact)'), ('解約連絡先 (Termination contacts)'), ('更新連絡先 (Update contact)')
query T
SELECT * FROM strings ORDER BY s
----
(Update contact)
(Termination contacts)
(Lessee side contact)
(Lessor side contact)
query T
SELECT * FROM strings ORDER BY s COLLATE ja.NOCASE
----
(Termination contacts)
(Update contact)
(Lessee side contact)
(Lessor side contact)
# test icu_sort_key function
statement ok
select icu_sort_key('Ş', 'ro');
statement error
SELECT icu_sort_key('goose', 'DUCK_DUCK_ENUM');
----
Invalid Input Error
statement ok
select icu_sort_key('æ', 'icu_noaccent');
statement ok
select icu_sort_key('Æ', 'icu_noaccent');
# issue duckdb/duckdb#9692
query I
select chr(2*16*256+1*256+2*16+11) collate da =chr(12*16+5) collate da;
----
True
query I
select icu_sort_key(chr(2*16*256+1*256+2*16+11),'da')=icu_sort_key(chr(12*16+5),'da');
----
True
query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE
query I
select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da;
----
FALSE
query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate da;
----
2
query I
select nfc_normalize(chr(2*16*256+1*256+2*16+11))=nfc_normalize(chr(12*16+5));
----
TRUE
query I
select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate nfc;
----
2
# ICU noaccent collate
statement ok
CREATE TABLE t1 (c1 CHAR(10))
statement ok
INSERT INTO t1 VALUES('z'),('Z'),('a'),('A'),('æ'),('Æ'),('à'),('À'),('á'),('Á'),('â'),('Â'),
('ã'),('Ã'),('ä'),('Ä'),('å'),('Å'),('b'),('B')
query I
SELECT GROUP_CONCAT(c1, '') as group_c1 FROM t1 GROUP BY c1 COLLATE "NOCASE.ICU_NOACCENT" ORDER BY group_c1 COLLATE "NOCASE.ICU_NOACCENT"
----
aAàÀáÁâÂãÃäÄåÅ
æÆ
bB
zZ
statement ok
SELECT 'Á' COLLATE "ICU_NOACCENT.NOACCENT"
statement ok
SELECT 'Á' COLLATE "NOACCENT.ICU_NOACCENT"