# name: test/sql/collate/test_icu_collate.test # description: Test basic ICU extension usage # group: [collate] require icu statement ok CREATE TABLE strings(s VARCHAR); statement ok INSERT INTO strings VALUES ('Gabel'), ('Göbel'), ('Goethe'), ('Goldmann'), ('Göthe'), ('Götz'); # ordering query T SELECT * FROM strings ORDER BY s COLLATE de; ---- Gabel Göbel Goethe Goldmann Göthe Götz # range filter query T SELECT * FROM strings WHERE 'Goethe' > s COLLATE de ORDER BY 1 ---- Gabel Göbel # default binary collation, Göbel is not smaller than Gabel in UTF8 encoding query T SELECT * FROM strings WHERE 'Goethe' > s ORDER BY 1 ---- Gabel # we can also combine this collation with NOCASE query T SELECT * FROM strings WHERE 'goethe' > s COLLATE de.NOCASE ORDER BY 1 ---- Gabel Göbel query T SELECT * FROM strings WHERE 'goethe' > s COLLATE NOCASE.de ORDER BY 1 ---- Gabel Göbel # and with NOACCENT query I SELECT * FROM strings WHERE 'goethe' > s COLLATE NOACCENT.de ORDER BY 1 ---- Gabel Göbel # japanese collation statement ok DELETE FROM strings statement ok INSERT INTO strings VALUES ('賃貸人側連絡先 (Lessor side contact)'), ('賃借人側連絡先 (Lessee side contact)'), ('解約連絡先 (Termination contacts)'), ('更新連絡先 (Update contact)') query T SELECT * FROM strings ORDER BY s ---- 更新連絡先 (Update contact) 解約連絡先 (Termination contacts) 賃借人側連絡先 (Lessee side contact) 賃貸人側連絡先 (Lessor side contact) query T SELECT * FROM strings ORDER BY s COLLATE ja.NOCASE ---- 解約連絡先 (Termination contacts) 更新連絡先 (Update contact) 賃借人側連絡先 (Lessee side contact) 賃貸人側連絡先 (Lessor side contact) # test icu_sort_key function statement ok select icu_sort_key('Ş', 'ro'); statement error SELECT icu_sort_key('goose', 'DUCK_DUCK_ENUM'); ---- Invalid Input Error statement ok select icu_sort_key('æ', 'icu_noaccent'); statement ok select icu_sort_key('Æ', 'icu_noaccent'); # issue duckdb/duckdb#9692 query I select chr(2*16*256+1*256+2*16+11) collate da =chr(12*16+5) collate da; ---- True query I select icu_sort_key(chr(2*16*256+1*256+2*16+11),'da')=icu_sort_key(chr(12*16+5),'da'); ---- True query I select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da; ---- FALSE query I select chr(2*16*256+1*256+2*16+11) collate da > chr(12*16+5) collate da; ---- FALSE query I select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate da; ---- 2 query I select nfc_normalize(chr(2*16*256+1*256+2*16+11))=nfc_normalize(chr(12*16+5)); ---- TRUE query I select count(*) from (select chr(2*16*256+1*256+2*16+11) union select chr(12*16+5)) as t(s) group by s collate nfc; ---- 2 # ICU noaccent collate statement ok CREATE TABLE t1 (c1 CHAR(10)) statement ok INSERT INTO t1 VALUES('z'),('Z'),('a'),('A'),('æ'),('Æ'),('à'),('À'),('á'),('Á'),('â'),('Â'), ('ã'),('Ã'),('ä'),('Ä'),('å'),('Å'),('b'),('B') query I SELECT GROUP_CONCAT(c1, '') as group_c1 FROM t1 GROUP BY c1 COLLATE "NOCASE.ICU_NOACCENT" ORDER BY group_c1 COLLATE "NOCASE.ICU_NOACCENT" ---- aAàÀáÁâÂãÃäÄåÅ æÆ bB zZ statement ok SELECT 'Á' COLLATE "ICU_NOACCENT.NOACCENT" statement ok SELECT 'Á' COLLATE "NOACCENT.ICU_NOACCENT"