Files
email-tracker/external/duckdb/test/sql/window/test_window_distinct.test
2025-10-24 19:21:19 -05:00

313 lines
4.7 KiB
SQL

# name: test/sql/window/test_window_distinct.test
# description: Windowed distinct aggregates functionality
# group: [window]
statement ok
PRAGMA enable_verification
query I
SELECT COUNT(DISTINCT 42) OVER ()
----
1
query IIII
WITH t AS (
SELECT col0 AS a, col1 AS b
FROM (VALUES
(1,2),
(1,1),
(1,2),
(2,1),
(2,1),
(2,2),
(2,3),
(2,4)
) v)
SELECT *, COUNT(b) OVER(PARTITION BY a), COUNT(DISTINCT b) OVER(PARTITION BY a)
FROM t
ORDER BY 1, 2
----
1 1 3 2
1 2 3 2
1 2 3 2
2 1 5 4
2 1 5 4
2 2 5 4
2 3 5 4
2 4 5 4
statement ok
CREATE TABLE figure1 AS
SELECT *
FROM VALUES
(1, 'a'),
(2, 'b'),
(3, 'b'),
(4, 'c'),
(5, 'c'),
(6, 'b'),
(7, 'c'),
(8, 'a')
v(i, s);
query III
SELECT i
, s
, COUNT(DISTINCT s) OVER( ORDER BY i ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM figure1
ORDER BY i
----
1 a 2
2 b 3
3 b 3
4 c 2
5 c 2
6 b 3
7 c 3
8 a 3
query III
WITH uncascaded AS (
SELECT i, i % 29 AS v
FROM range(1000) tbl(i)
)
SELECT i
, v
, COUNT(DISTINCT v) OVER (ORDER BY i ROWS BETWEEN 25 PRECEDING AND 25 FOLLOWING) AS w
FROM uncascaded
ORDER BY i
----
3000 values hashing to cb9c296986f7b9eaeee380bbc049ab39
query III
WITH cascaded AS (
SELECT i, i % 29 AS v
FROM range(10000) tbl(i)
)
SELECT i
, v
, COUNT(DISTINCT v) OVER (ORDER BY i ROWS BETWEEN 25 PRECEDING AND 25 FOLLOWING) AS w
FROM cascaded
ORDER BY i
----
30000 values hashing to 673869e81fecab82f0bcec032236115a
# Exclude falls back to naïve
query IIII
SELECT i
, s
, i // 2 AS o
, COUNT(DISTINCT s) OVER(
ORDER BY i // 2
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
EXCLUDE TIES
) AS c
FROM figure1
ORDER BY i
----
1 a 0 2
2 b 1 3
3 b 1 3
4 c 2 2
5 c 2 2
6 b 3 3
7 c 3 2
8 a 4 3
# DISTINCT aggregate with NULL values in the dataset
statement ok
INSERT INTO figure1 VALUES
(9, NULL),
(NULL, 'b'),
(NULL, NULL),
;
query III
SELECT i
, s
, COUNT(DISTINCT s) OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM figure1
ORDER BY i, s NULLS LAST
----
1 a 2
2 b 3
3 b 3
4 c 2
5 c 2
6 b 3
7 c 3
8 a 3
9 NULL 3
NULL b 2
NULL NULL 1
# DISTINCT over nested types, e.g. LIST/STRUCT
statement ok
CREATE TABLE nested AS
SELECT
i,
s,
{"m": i % 2, "s": s} AS n,
[(i % 2)::VARCHAR, s] AS l,
i * i AS r
FROM figure1
query III
SELECT i
, n
, COUNT(DISTINCT n) OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 {'m': 1, 's': a} 3
2 {'m': 0, 's': b} 4
3 {'m': 1, 's': b} 5
4 {'m': 0, 's': c} 4
5 {'m': 1, 's': c} 4
6 {'m': 0, 's': b} 4
7 {'m': 1, 's': c} 4
8 {'m': 0, 's': a} 5
9 {'m': 1, 's': NULL} 5
NULL {'m': NULL, 's': b} 4
NULL {'m': NULL, 's': NULL} 3
query III
SELECT i
, l
, COUNT(DISTINCT l) OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 [1, a] 3
2 [0, b] 4
3 [1, b] 5
4 [0, c] 4
5 [1, c] 4
6 [0, b] 4
7 [1, c] 4
8 [0, a] 5
9 [1, NULL] 5
NULL [NULL, b] 4
NULL [NULL, NULL] 3
# DISTINCT with RANGE instead of ROWS
query III
SELECT r
, s
, COUNT(DISTINCT s) OVER( ORDER BY r RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 a 2
4 b 2
9 b 3
16 c 2
25 c 1
36 b 1
49 c 1
64 a 1
81 NULL 0
NULL b 1
NULL NULL 1
# DISTINCT with an aggregate with a destructor (e.g. LIST or STRING_AGG)
query III
SELECT i
, s
, STRING_AGG(DISTINCT s, ', ') OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 a a, b
2 b a, b, c
3 b a, b, c
4 c b, c
5 c b, c
6 b c, b, a
7 c c, b, a
8 a b, c, a
9 NULL c, a, b
NULL b a, b
NULL NULL b
# DISTINCT MEDIAN, or distinct for aggregates that have a special window function?
query III
SELECT i
, s
, MEDIAN(DISTINCT s) OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 a a
2 b b
3 b b
4 c b
5 c b
6 b b
7 c b
8 a b
9 NULL b
NULL b a
NULL NULL b
# DISTINCT FILTER
query III
SELECT i
, s
, COUNT(DISTINCT s)
FILTER (WHERE i % 3 = 0)
OVER( ORDER BY i, s NULLS LAST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
AS c
FROM nested
ORDER BY i, s NULLS LAST
----
1 a 1
2 b 1
3 b 1
4 c 1
5 c 1
6 b 1
7 c 1
8 a 1
9 NULL 0
NULL b 0
NULL NULL 0
query III
SELECT i
, s
, COUNT(DISTINCT s) FILTER (WHERE i % 3 = 1) OVER( ORDER BY i ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i
----
1 a 1
2 b 2
3 b 2
4 c 1
5 c 1
6 b 1
7 c 1
8 a 1
9 NULL 1
NULL b 0
NULL NULL 0
query III
SELECT i
, s
, COUNT(DISTINCT s) FILTER (WHERE i % 3 = 2) OVER( ORDER BY i ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS c
FROM nested
ORDER BY i
----
1 a 1
2 b 1
3 b 2
4 c 2
5 c 1
6 b 2
7 c 2
8 a 1
9 NULL 1
NULL b 1
NULL NULL 0