# name: test/sql/function/generic/hash_func.test # description: Test HASH function # group: [generic] # Unsupported types foreach datatype ANY HASH POINTER statement error SELECT HASH(NULL::${datatype}); ---- endloop # NULLS of all scalar types should produce the same value on all platforms foreach datatype CHAR BLOB DATE TIME TIMETZ TIMESTAMP TIMESTAMPTZ DECIMAL(16,4) UUID query I SELECT HASH(NULL::${datatype}); ---- 13787848793156543929 endloop # # Nested types # # Structs statement ok CREATE TABLE structs AS SELECT * FROM (VALUES ({'i': 5, 's': 'string'}), ({'i': -2, 's': NULL}), ({'i': NULL, 's': 'not null'}), ({'i': NULL, 's': NULL}), (NULL) ) tbl(s); query II SELECT s, HASH(s) FROM structs ---- {'i': 5, 's': string} 312378390946197788 {'i': -2, 's': NULL} 13311620765177879553 {'i': NULL, 's': not null} 12187543307399756733 {'i': NULL, 's': NULL} 18212156630472451589 NULL 18212156630472451589 # Lists statement ok CREATE TABLE lists AS SELECT * FROM (VALUES ([1], ['TGTA']), ([1, 2], ['CGGT']), ([], ['CCTC']), ([1, 2, 3], ['TCTA']), ([1, 2, 3, 4, 5], ['AGGG']), (NULL, NULL) ) tbl(li, lg); query II SELECT li, HASH(li) FROM lists ---- [1] 4717996019076358352 [1, 2] 6530802887144669425 [] 13787848793156543929 [1, 2, 3] 12722334483198565868 [1, 2, 3, 4, 5] 6649915151332802727 NULL 13787848793156543929 # These should all be different query II SELECT lg, HASH(lg) FROM lists ---- [TGTA] 2473061308111828075 [CGGT] 17252230290449032892 [CCTC] 12469451733100292545 [TCTA] 16441147910138644840 [AGGG] 6734708784738468094 NULL 13787848793156543929 # Maps statement ok CREATE TABLE maps AS SELECT * FROM (VALUES (MAP([1], ['TGTA'])), (MAP([1, 2], ['CGGT', 'CCTC'])), (MAP([], [])), (MAP([1, 2, 3], ['TCTA', NULL, 'CGGT'])), (MAP([1, 2, 3, 4, 5], ['TGTA', 'CGGT', 'CCTC', 'TCTA', 'AGGG'])), (NULL) ) tbl(m); query II SELECT m, HASH(m) FROM maps ---- {1=TGTA} 7235425910004250312 {1=CGGT, 2=CCTC} 1011047862598495049 {} 13787848793156543929 {1=TCTA, 2=NULL, 3=CGGT} 6001596667924474868 {1=TGTA, 2=CGGT, 3=CCTC, 4=TCTA, 5=AGGG} 16287978232011168685 NULL 13787848793156543929 statement ok CREATE TABLE map_as_list AS SELECT * FROM (VALUES ([{'key':1, 'value':'TGTA'}]), ([{'key':1, 'value':'CGGT'}, {'key':2, 'value':'CCTC'}]), ([]), ([{'key':1, 'value':'TCTA'}, {'key':2, 'value':NULL}, {'key':3, 'value':'CGGT'}]), ([{'key':1, 'value':'TGTA'}, {'key':2, 'value':'CGGT'}, {'key':3, 'value':'CCTC'}, {'key':4, 'value':'TCTA'}, {'key':5, 'value':'AGGG'}]), (NULL) ) tbl(m); # Because the map has physical type LIST, it creates an identical hash when the same values are stored as list of key/val structs query I nosort map_hashes SELECT HASH(m) FROM maps query I nosort map_hashes SELECT HASH(m) FROM map_as_list # Enums statement ok CREATE TYPE resistor AS ENUM ( 'black', 'brown', 'red', 'orange', 'yellow', 'green', 'blue', 'violet', 'grey', 'white' ); statement ok CREATE TABLE enums (r resistor); statement ok INSERT INTO enums VALUES ('black'), ('brown'), ('red'), ('orange'), ('yellow'), ('green'), ('blue'), ('violet'), ('grey'), ('white'), (NULL) ; query II SELECT r, HASH(r) FROM enums; ---- black 0 brown 4717996019076358352 red 2060787363917578834 orange 8131803788478518982 yellow 8535942711051191036 green 4244145009296420692 blue 8888402906861678137 violet 8736873150706563146 grey 14111048738911615569 white 17319221087726947361 NULL 13787848793156543929 # # Variadic arguments # # Zero arguments are not allowed statement error SELECT HASH(); ---- statement error SELECT r, HASH() FROM enums; ---- # Multiple arguments of any kind are accepted query II SELECT r, HASH(r, 'capacitor') FROM enums; ---- black 16797622758688705282 brown 12620868779234625953 red 17584344400128560708 orange 268160620305560594 yellow 895888387990267895 green 16089427619650030004 blue 10156864916169405730 violet 3549084991787980581 grey 17281098274178594641 white 1655957553588749778 NULL 12320705626460735678 query II SELECT r, HASH('2022-02-12'::DATE, r) FROM enums; ---- black 4250466044961212059 brown 8900520483163022923 red 2766849995292148937 orange 5342755900462846045 yellow 5515065604690625639 green 7471453529827791 blue 4730260654388144290 violet 4882794310426623697 grey 17953657405078846666 white 14602512259699608250 NULL 9630093706189153058 query II SELECT r, HASH(r, r) FROM enums; ---- black 0 brown 523193599206204019 red 111573794787247892 orange 11131893570948557270 yellow 10594212293773127177 green 914862583577390562 blue 2211471294594404377 violet 11628961430775669869 grey 14203064203985765890 white 1133846801649713905 NULL 18212156630472451589 # # Bugs # # Issue #2498: Identical nested lists should have the same hash statement ok CREATE TABLE issue2498 AS SELECT * FROM (VALUES (24, {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]}), (34, {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]}) ) tbl(v, k); query II SELECT k, HASH(k) FROM issue2498 ---- {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]} 14225696893928945203 {'x': [{'l4': [52, 53]}, {'l4': [54, 55]}]} 14225696893928945203 # CombineHashScalar used to have an issue that made combining a hash with itself yield hashes that all have # a similar number of trailing zero's, so approx_count_distinct was off by a lot (and we had more collisions in HTs) query I select approx_count_distinct((range, range)) > 800_000 from range(1_000_000) ---- 1