Files
email-tracker/external/duckdb/test/sql/window/test_quantile_window.test
2025-10-24 19:21:19 -05:00

538 lines
8.1 KiB
SQL

# name: test/sql/window/test_quantile_window.test
# description: Test MEDIAN and QUANTILE aggregates as window functions
# group: [window]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
create table quantiles as select range r from range(10) union all values (NULL), (NULL), (NULL);
query III
SELECT r % 2, r, median(r) over (partition by r % 2 order by r) FROM quantiles ORDER BY 1, 2
----
NULL NULL NULL
NULL NULL NULL
NULL NULL NULL
0 0 0.0
0 2 1.0
0 4 2.0
0 6 3.0
0 8 4.0
1 1 1.0
1 3 2.0
1 5 3.0
1 7 4.0
1 9 5.0
query II
SELECT r, median(r) over (order by r rows between 1 preceding and 1 following) FROM quantiles ORDER BY 1, 2
----
NULL NULL
NULL NULL
NULL 0.0
0 0.5
1 1.0
2 2.0
3 3.0
4 4.0
5 5.0
6 6.0
7 7.0
8 8.0
9 8.5
query II
SELECT r, median(r) over (order by r rows between 1 preceding and 3 following) FROM quantiles ORDER BY 1, 2
----
NULL 0.0
NULL 0.5
NULL 1.0
0 1.5
1 2.0
2 3.0
3 4.0
4 5.0
5 6.0
6 7.0
7 7.5
8 8.0
9 8.5
query II
SELECT r, quantile(r, 0.5) over (order by r rows between 1 preceding and 3 following) FROM quantiles ORDER BY 1, 2
----
NULL 0
NULL 0
NULL 1
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 7
8 8
9 8
#
# VARCHAR
#
query III
SELECT r % 2, r, median(r::VARCHAR) over (partition by r % 2 order by r) FROM quantiles ORDER BY 1, 2
----
NULL NULL NULL
NULL NULL NULL
NULL NULL NULL
0 0 0
0 2 0
0 4 2
0 6 2
0 8 4
1 1 1
1 3 1
1 5 3
1 7 3
1 9 5
query II
SELECT r, median(r::VARCHAR) over (order by r rows between 1 preceding and 1 following) FROM quantiles ORDER BY 1, 2
----
NULL NULL
NULL NULL
NULL 0
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 8
query II
SELECT r, quantile(r::VARCHAR, 0.5) over (order by r rows between 1 preceding and 3 following) FROM quantiles ORDER BY 1, 2
----
NULL 0
NULL 0
NULL 1
0 1
1 2
2 3
3 4
4 5
5 6
6 7
7 7
8 8
9 8
# Non-inlined
query II
SELECT r, median('prefix-' || r::VARCHAR || '-suffix') over (order by r rows between 1 preceding and 1 following) FROM quantiles ORDER BY 1, 2
----
NULL NULL
NULL NULL
NULL prefix-0-suffix
0 prefix-0-suffix
1 prefix-1-suffix
2 prefix-2-suffix
3 prefix-3-suffix
4 prefix-4-suffix
5 prefix-5-suffix
6 prefix-6-suffix
7 prefix-7-suffix
8 prefix-8-suffix
9 prefix-8-suffix
# Scattered NULLs
query IIII
SELECT r % 3, r, n, median(n) over (partition by r % 3 order by r)
FROM (SELECT r, CASE r % 2 WHEN 0 THEN r ELSE NULL END AS n FROM quantiles) nulls
ORDER BY 1, 2
----
NULL NULL NULL NULL
NULL NULL NULL NULL
NULL NULL NULL NULL
0 0 0 0.0
0 3 NULL 0.0
0 6 6 3.0
0 9 NULL 3.0
1 1 NULL NULL
1 4 4 4.0
1 7 NULL 4.0
2 2 2 2.0
2 5 NULL 2.0
2 8 8 5.0
query III
SELECT r, n, median(n) over (order by r rows between 1 preceding and 1 following)
FROM (SELECT r, CASE r % 2 WHEN 0 THEN r ELSE NULL END AS n FROM quantiles) nulls
ORDER BY 1
----
NULL NULL NULL
NULL NULL NULL
NULL NULL 0.0
0 0 0.0
1 NULL 1.0
2 2 2.0
3 NULL 3.0
4 4 4.0
5 NULL 5.0
6 6 6.0
7 NULL 7.0
8 8 8.0
9 NULL 8.0
query III
SELECT r, n, median(n) over (order by r rows between 1 preceding and 3 following)
FROM (SELECT r, CASE r % 2 WHEN 0 THEN r ELSE NULL END AS n FROM quantiles) nulls
ORDER BY 1
----
NULL NULL 0.0
NULL NULL 0.0
NULL NULL 1.0
0 0 1.0
1 NULL 2.0
2 2 3.0
3 NULL 4.0
4 4 5.0
5 NULL 6.0
6 6 7.0
7 NULL 7.0
8 8 8.0
9 NULL 8.0
query III
SELECT r, n, median(n) over (order by r rows between unbounded preceding and unbounded following)
FROM (SELECT r, CASE r % 2 WHEN 0 THEN r ELSE NULL END AS n FROM quantiles) nulls
ORDER BY 1
----
NULL NULL 4
NULL NULL 4
NULL NULL 4
0 0 4
1 NULL 4
2 2 4
3 NULL 4
4 4 4
5 NULL 4
6 6 4
7 NULL 4
8 8 4
9 NULL 4
#
# Coverage tests
#
# ReuseIndexes, backwards moving frame
query II
WITH t(i, p, f) AS (VALUES
(0, 1, 1),
(1, 1, 1),
(2, 1, 1),
(3, 3, 1),
(4, 1, 1),
(5, 3, 1)
)
SELECT i, MEDIAN(i) OVER (ORDER BY i ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 0.5
1 1.0
2 2.0
3 2.0
4 4.0
5 3.5
# CanReplace
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 1),
(1, 1, 1, 1),
(2, 2, 1, 1),
(3, 0, 1, 1),
(4, 1, 1, 1),
(5, 2, 1, 1)
)
SELECT r, MEDIAN(i) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 0.5
1 1.0
2 1.0
3 1.0
4 1.0
5 1.5
# Moving discrete list
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 3, 1, 2),
(4, 4, 1, 2),
(5, 5, 1, 2)
)
SELECT r, QUANTILE_DISC(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [0, 1, 2]
1 [0, 1, 2]
2 [1, 2, 3]
3 [2, 3, 4]
4 [3, 4, 5]
5 [4, 4, 5]
# Moving discrete list with NULLs
query II
WITH t(r, i, p, f) AS (VALUES
(0, NULL, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 3, 1, 2),
(4, 4, 1, 2),
(5, 5, 1, 2)
)
SELECT r, QUANTILE_DISC(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [1, 1, 2]
1 [1, 2, 3]
2 [1, 2, 3]
3 [2, 3, 4]
4 [3, 4, 5]
5 [4, 4, 5]
# Moving discrete list with all NULLs
query II
WITH t(r, i, p, f) AS (VALUES
(0, NULL, 1, 2),
(1, NULL, 1, 2),
(2, NULL, 1, 2),
(3, NULL, 1, 2),
(4, NULL, 1, 2),
(5, NULL, 1, 2)
)
SELECT r, QUANTILE_DISC(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 NULL
1 NULL
2 NULL
3 NULL
4 NULL
5 NULL
# Moving continuous list
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 3, 1, 2),
(4, 4, 1, 2),
(5, 5, 1, 2)
)
SELECT r, QUANTILE_CONT(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [0.5, 1.0, 1.5]
1 [0.75, 1.5, 2.25]
2 [1.75, 2.5, 3.25]
3 [2.75, 3.5, 4.25]
4 [3.5, 4.0, 4.5]
5 [4.25, 4.5, 4.75]
# Moving continuous list with replacement
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 0, 1, 2),
(4, 1, 1, 2),
(5, 2, 1, 2)
)
SELECT r, QUANTILE_CONT(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [0.5, 1.0, 1.5]
1 [0.0, 0.5, 1.25]
2 [0.75, 1.0, 1.25]
3 [0.75, 1.5, 2.0]
4 [0.5, 1.0, 1.5]
5 [1.25, 1.5, 1.75]
#
# Coverage
#
# Discrete replace coverage
query II
SELECT r, quantile_disc(i, 0.5) OVER (ORDER BY r ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) q
FROM (VALUES
(0, 0),
(1, 1),
(2, 2),
(3, 0),
(4, 1)
) tbl(r, i)
ORDER BY 1, 2
----
0 0
1 1
2 1
3 1
4 0
# Continuous interpolated replace coverage
query II
SELECT r, quantile_cont(i, 0.5) OVER (ORDER BY r ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING) q
FROM (VALUES
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 0),
(5, 1)
) tbl(r, i)
ORDER BY 1, 2
----
0 0.5
1 1.0
2 1.5
3 1.5
4 1.5
5 1.0
# NULL replace coverage
query II
SELECT r, quantile_cont(i, 0.5) OVER (ORDER BY r ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING) q
FROM (VALUES
(0, NULL),
(1, 1),
(2, 2),
(3, 3),
(4, NULL),
(5, 1)
) tbl(r, i)
ORDER BY 1, 2
----
0 1.0
1 1.5
2 2.0
3 2.0
4 2.0
5 2.0
#
# Compare implementations
#
foreach windowmode "window" "combine" "separate"
statement ok
PRAGMA debug_window_mode=${windowmode}
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 1),
(1, 1, 1, 1),
(2, 2, 1, 1),
(3, 0, 1, 1),
(4, 1, 1, 1),
(5, 2, 1, 1)
)
SELECT r, MEDIAN(i) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 0.5
1 1.0
2 1.0
3 1.0
4 1.0
5 1.5
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 1),
(1, 1, 1, 1),
(2, 2, 1, 1),
(3, 0, 1, 1),
(4, 1, 1, 1),
(5, 2, 1, 1)
)
SELECT r, QUANTILE_DISC(i, 0.5) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 0
1 1
2 1
3 1
4 1
5 1
query II
WITH t(r, i, p, f) AS (VALUES
(0, NULL, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 3, 1, 2),
(4, 4, 1, 2),
(5, 5, 1, 2)
)
SELECT r, QUANTILE_DISC(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [1, 1, 2]
1 [1, 2, 3]
2 [1, 2, 3]
3 [2, 3, 4]
4 [3, 4, 5]
5 [4, 4, 5]
query II
WITH t(r, i, p, f) AS (VALUES
(0, 0, 1, 2),
(1, 1, 1, 2),
(2, 2, 1, 2),
(3, 0, 1, 2),
(4, 1, 1, 2),
(5, 2, 1, 2)
)
SELECT r, QUANTILE_CONT(i, [0.25, 0.5, 0.75]) OVER (ORDER BY r ROWS BETWEEN p PRECEDING and f FOLLOWING)
FROM t
ORDER BY 1
----
0 [0.5, 1.0, 1.5]
1 [0.0, 0.5, 1.25]
2 [0.75, 1.0, 1.25]
3 [0.75, 1.5, 2.0]
4 [0.5, 1.0, 1.5]
5 [1.25, 1.5, 1.75]
endloop