100 lines
2.5 KiB
SQL
100 lines
2.5 KiB
SQL
# name: test/sql/aggregate/aggregates/histogram_exact.test
|
|
# description: Test histogram_exact
|
|
# group: [aggregates]
|
|
|
|
statement ok
|
|
PRAGMA enable_verification
|
|
|
|
statement ok
|
|
CREATE TABLE obs(n BIGINT);
|
|
|
|
statement ok
|
|
INSERT INTO obs VALUES (0), (5), (7), (12), (20), (23), (24), (25), (26), (28), (31), (34), (36), (41), (47)
|
|
|
|
# histogram_exact finds exact matches only, and puts everything else into the "other" category
|
|
# the value of the other category depends on the data type of the bin
|
|
# for integer values it is the highest value of the type
|
|
query I
|
|
SELECT histogram_exact(n, [10, 20, 30, 40, 50]) FROM obs
|
|
----
|
|
{10=0, 20=1, 30=0, 40=0, 50=0, 9223372036854775807=14}
|
|
|
|
# for doubles/dates/timestamps it is infinite
|
|
query I
|
|
SELECT histogram_exact(n::double, [10, 20, 30, 40, 50]) FROM obs
|
|
----
|
|
{10.0=0, 20.0=1, 30.0=0, 40.0=0, 50.0=0, inf=14}
|
|
|
|
query I
|
|
SELECT histogram_exact((date '2000-01-01' + interval (n) days)::date, [date '2000-01-01' + interval (x) days for x in [10, 20, 30, 40, 50]]) FROM obs
|
|
----
|
|
{2000-01-11=0, 2000-01-21=1, 2000-01-31=0, 2000-02-10=0, 2000-02-20=0, infinity=14}
|
|
|
|
# for strings it is the empty string
|
|
query I
|
|
SELECT histogram_exact(n::varchar, [10, 20, 30, 40, 50]) FROM obs
|
|
----
|
|
{10=0, 20=1, 30=0, 40=0, 50=0, ''=14}
|
|
|
|
# for lists it is an empty list
|
|
query I
|
|
SELECT histogram_exact([n], [[x] for x in [10, 20, 30, 40, 50]]) FROM obs
|
|
----
|
|
{[10]=0, [20]=1, [30]=0, [40]=0, [50]=0, []=14}
|
|
|
|
# we can use the function "is_histogram_other_bin" to check if it is this other bin
|
|
query II
|
|
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
|
|
count
|
|
FROM (
|
|
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
|
|
FROM (SELECT histogram_exact(n, [10, 20, 30, 40, 50]) AS hist FROM obs)
|
|
)
|
|
----
|
|
10 0
|
|
20 1
|
|
30 0
|
|
40 0
|
|
50 0
|
|
(other values) 14
|
|
|
|
query II
|
|
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
|
|
count
|
|
FROM (
|
|
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
|
|
FROM (SELECT histogram(n, [10, 20, 30, 40]) AS hist FROM obs)
|
|
)
|
|
----
|
|
10 3
|
|
20 2
|
|
30 5
|
|
40 3
|
|
(other values) 2
|
|
|
|
# when there are no other values the other bin is omitted from the result
|
|
query I
|
|
SELECT histogram_exact(r, [0, 1, 2, 3]) FROM range(4) t(r);
|
|
----
|
|
{0=1, 1=1, 2=1, 3=1}
|
|
|
|
query I
|
|
SELECT is_histogram_other_bin(NULL)
|
|
----
|
|
NULL
|
|
|
|
query I
|
|
SELECT is_histogram_other_bin([[1]])
|
|
----
|
|
false
|
|
|
|
query I
|
|
SELECT is_histogram_other_bin([]::INT[][][])
|
|
----
|
|
true
|
|
|
|
query I
|
|
SELECT is_histogram_other_bin({'i': NULL::INT[][]})
|
|
----
|
|
true
|