# name: test/sql/aggregate/aggregates/histogram_exact.test # description: Test histogram_exact # group: [aggregates] statement ok PRAGMA enable_verification statement ok CREATE TABLE obs(n BIGINT); statement ok INSERT INTO obs VALUES (0), (5), (7), (12), (20), (23), (24), (25), (26), (28), (31), (34), (36), (41), (47) # histogram_exact finds exact matches only, and puts everything else into the "other" category # the value of the other category depends on the data type of the bin # for integer values it is the highest value of the type query I SELECT histogram_exact(n, [10, 20, 30, 40, 50]) FROM obs ---- {10=0, 20=1, 30=0, 40=0, 50=0, 9223372036854775807=14} # for doubles/dates/timestamps it is infinite query I SELECT histogram_exact(n::double, [10, 20, 30, 40, 50]) FROM obs ---- {10.0=0, 20.0=1, 30.0=0, 40.0=0, 50.0=0, inf=14} query I SELECT histogram_exact((date '2000-01-01' + interval (n) days)::date, [date '2000-01-01' + interval (x) days for x in [10, 20, 30, 40, 50]]) FROM obs ---- {2000-01-11=0, 2000-01-21=1, 2000-01-31=0, 2000-02-10=0, 2000-02-20=0, infinity=14} # for strings it is the empty string query I SELECT histogram_exact(n::varchar, [10, 20, 30, 40, 50]) FROM obs ---- {10=0, 20=1, 30=0, 40=0, 50=0, ''=14} # for lists it is an empty list query I SELECT histogram_exact([n], [[x] for x in [10, 20, 30, 40, 50]]) FROM obs ---- {[10]=0, [20]=1, [30]=0, [40]=0, [50]=0, []=14} # we can use the function "is_histogram_other_bin" to check if it is this other bin query II SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin, count FROM ( SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count FROM (SELECT histogram_exact(n, [10, 20, 30, 40, 50]) AS hist FROM obs) ) ---- 10 0 20 1 30 0 40 0 50 0 (other values) 14 query II SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin, count FROM ( SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count FROM (SELECT histogram(n, [10, 20, 30, 40]) AS hist FROM obs) ) ---- 10 3 20 2 30 5 40 3 (other values) 2 # when there are no other values the other bin is omitted from the result query I SELECT histogram_exact(r, [0, 1, 2, 3]) FROM range(4) t(r); ---- {0=1, 1=1, 2=1, 3=1} query I SELECT is_histogram_other_bin(NULL) ---- NULL query I SELECT is_histogram_other_bin([[1]]) ---- false query I SELECT is_histogram_other_bin([]::INT[][][]) ---- true query I SELECT is_histogram_other_bin({'i': NULL::INT[][]}) ---- true