Files
email-tracker/external/duckdb/test/sql/aggregate/aggregates/histogram_exact.test
2025-10-24 19:21:19 -05:00

100 lines
2.5 KiB
SQL

# name: test/sql/aggregate/aggregates/histogram_exact.test
# description: Test histogram_exact
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE obs(n BIGINT);
statement ok
INSERT INTO obs VALUES (0), (5), (7), (12), (20), (23), (24), (25), (26), (28), (31), (34), (36), (41), (47)
# histogram_exact finds exact matches only, and puts everything else into the "other" category
# the value of the other category depends on the data type of the bin
# for integer values it is the highest value of the type
query I
SELECT histogram_exact(n, [10, 20, 30, 40, 50]) FROM obs
----
{10=0, 20=1, 30=0, 40=0, 50=0, 9223372036854775807=14}
# for doubles/dates/timestamps it is infinite
query I
SELECT histogram_exact(n::double, [10, 20, 30, 40, 50]) FROM obs
----
{10.0=0, 20.0=1, 30.0=0, 40.0=0, 50.0=0, inf=14}
query I
SELECT histogram_exact((date '2000-01-01' + interval (n) days)::date, [date '2000-01-01' + interval (x) days for x in [10, 20, 30, 40, 50]]) FROM obs
----
{2000-01-11=0, 2000-01-21=1, 2000-01-31=0, 2000-02-10=0, 2000-02-20=0, infinity=14}
# for strings it is the empty string
query I
SELECT histogram_exact(n::varchar, [10, 20, 30, 40, 50]) FROM obs
----
{10=0, 20=1, 30=0, 40=0, 50=0, ''=14}
# for lists it is an empty list
query I
SELECT histogram_exact([n], [[x] for x in [10, 20, 30, 40, 50]]) FROM obs
----
{[10]=0, [20]=1, [30]=0, [40]=0, [50]=0, []=14}
# we can use the function "is_histogram_other_bin" to check if it is this other bin
query II
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
count
FROM (
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
FROM (SELECT histogram_exact(n, [10, 20, 30, 40, 50]) AS hist FROM obs)
)
----
10 0
20 1
30 0
40 0
50 0
(other values) 14
query II
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
count
FROM (
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
FROM (SELECT histogram(n, [10, 20, 30, 40]) AS hist FROM obs)
)
----
10 3
20 2
30 5
40 3
(other values) 2
# when there are no other values the other bin is omitted from the result
query I
SELECT histogram_exact(r, [0, 1, 2, 3]) FROM range(4) t(r);
----
{0=1, 1=1, 2=1, 3=1}
query I
SELECT is_histogram_other_bin(NULL)
----
NULL
query I
SELECT is_histogram_other_bin([[1]])
----
false
query I
SELECT is_histogram_other_bin([]::INT[][][])
----
true
query I
SELECT is_histogram_other_bin({'i': NULL::INT[][]})
----
true