should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/test/sql/aggregate/distinct/distinct_on_nulls.test
+++ b/external/duckdb/test/sql/aggregate/distinct/distinct_on_nulls.test
@@ -0,0 +1,83 @@
+# name: test/sql/aggregate/distinct/distinct_on_nulls.test
+# description: Test DISTINCT ON with NULL values
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE integers(i INTEGER, j INTEGER);
+
+statement ok
+INSERT INTO integers VALUES (2, 3), (4, 5), (2, NULL), (NULL, NULL);
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j
+----
+2	3
+4	5
+NULL	NULL
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j
+----
+2	3
+4	5
+NULL	NULL
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i NULLS FIRST, j NULLS FIRST
+----
+NULL	NULL
+2	NULL
+4	5
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j NULLS FIRST
+----
+2	NULL
+4	5
+NULL	NULL
+
+# multi-way sort and ties
+statement ok
+CREATE TABLE distinct_on_test(key INTEGER, v1 VARCHAR, v2 INTEGER[], v3 INTEGER);
+
+statement ok
+INSERT INTO distinct_on_test VALUES
+	(1, 'hello', ARRAY[1], 42), -- ASC
+	(1, 'hello', ARRAY[1], 42),
+	(1, 'hello', ARRAY[1], 43), -- DESC
+	(2, NULL, NULL, 0),     -- ASC
+	(2, NULL, NULL, 1),
+	(2, NULL, NULL, NULL),  -- DESC
+	(3, 'thisisalongstring', NULL, 0),     -- ASC
+	(3, 'thisisalongstringbutlonger', NULL, 1),
+	(3, 'thisisalongstringbutevenlonger', ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9], 2)  -- DESC
+;
+
+query IIII
+SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1, v2, v3
+----
+1	hello	[1]	42
+2	NULL	NULL	0
+3	thisisalongstring	NULL	0
+
+query IIII
+SELECT DISTINCT ON (key) * FROM distinct_on_test  WHERE key <> 2 ORDER BY key, v1, v2, v3
+----
+1	hello	[1]	42
+3	thisisalongstring	NULL	0
+
+query IIII
+SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
+----
+1	hello	[1]	43
+2	NULL	NULL	NULL
+3	thisisalongstringbutlonger	NULL	1
+
+query IIII
+SELECT DISTINCT ON (key) * FROM distinct_on_test WHERE key <> 2 ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
+----
+1	hello	[1]	43
+3	thisisalongstringbutlonger	NULL	1
--- a/external/duckdb/test/sql/aggregate/distinct/distinct_on_order_by.test
+++ b/external/duckdb/test/sql/aggregate/distinct/distinct_on_order_by.test
@@ -0,0 +1,161 @@
+# name: test/sql/aggregate/distinct/distinct_on_order_by.test
+# description: Test DISTINCT ON ORDER BY
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);
+
+statement ok
+INSERT INTO integers VALUES (2, 3, 5), (4, 5, 6), (2, 7, 6);
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j DESC;
+----
+2	7
+4	5
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j;
+----
+2	3
+4	5
+
+# we don't need to ORDER BY i
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j DESC;
+----
+2	7
+4	5
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j;
+----
+2	3
+4	5
+
+# DISTINCT ON in correlated subqueries
+query III
+SELECT i, j, (SELECT DISTINCT ON(i) j) AS k FROM integers ORDER BY i, j;
+----
+2	3	3
+2	7	7
+4	5	5
+
+query III
+SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, j DESC) AS k FROM integers ORDER BY i, j;
+----
+2	3	3
+2	7	7
+4	5	5
+
+query III
+SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, k) AS k FROM integers ORDER BY i, j;
+----
+2	3	3
+2	7	7
+4	5	5
+
+# DISTINCT ON with multiple parameters
+statement ok
+INSERT INTO integers VALUES (2, 3, 7), (4, 5, 11);
+
+query III
+SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k ASC
+----
+2	3	5
+4	5	6
+
+query III
+SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k DESC
+----
+2	3	7
+4	5	11
+
+# DISTINCT ON with NULL values
+statement ok
+INSERT INTO integers VALUES (2, NULL, 27), (4, 88, NULL);
+
+query III
+SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k DESC NULLS LAST;
+----
+2	NULL	27
+4	5	11
+
+query III
+SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k NULLS FIRST;
+----
+2	NULL	27
+4	5	6
+
+query III
+SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, k NULLS FIRST, j NULLS FIRST;
+----
+2	3	5
+4	88	NULL
+
+# examples from the original issue
+statement ok
+create table foo(a real, b real);
+
+statement ok
+insert into foo values (1, 69), (1, 420), (2, 69), (2, 420);
+
+query II rowsort
+select distinct on(a) a, b from foo order by b asc;
+----
+1	69
+2	69
+
+query II rowsort
+select distinct on(a) a, b from foo order by b desc;
+----
+1	420
+2	420
+
+statement ok
+CREATE TABLE example (
+    id               INT,
+    person_id        INT,
+    address_id       INT,
+    effective_date   DATE
+);
+
+statement ok
+INSERT INTO
+    example (id, person_id, address_id, effective_date)
+VALUES
+    (1, 2, 1, '2000-01-01'),  -- Moved to first house
+    (5, 2, 2, '2004-08-19'),  -- Went to uni
+    (9, 2, 1, '2007-06-12'),  -- Moved back home
+    (2, 4, 3, '2007-05-18'),  -- Moved to first house
+    (3, 4, 4, '2016-02-09')   -- Moved to new house
+;
+
+query IIII
+SELECT DISTINCT ON (person_id)
+    *
+FROM
+    example
+ORDER BY
+    person_id,
+    effective_date ASC
+;
+----
+1	2	1	2000-01-01
+2	4	3	2007-05-18
+
+query IIII
+SELECT DISTINCT ON (person_id)
+    *
+FROM
+    example
+ORDER BY
+    person_id,
+    effective_date DESC
+;
+----
+9	2	1	2007-06-12
+3	4	4	2016-02-09
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/combined_with_grouping.test
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/combined_with_grouping.test
@@ -0,0 +1,274 @@
+# name: test/sql/aggregate/distinct/grouped/combined_with_grouping.test
+# group: [grouped]
+
+statement ok
+SET default_null_order='nulls_first';
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+PRAGMA verify_parallelism
+
+statement ok
+create table students (
+	course VARCHAR,
+	type VARCHAR,
+	value BIGINT
+);
+
+statement ok
+insert into students
+		(course, type, value)
+	values
+		('CS', 'Bachelor', 34),
+		('CS', 'Bachelor', 34),
+		('CS', 'PhD', 12),
+		('Math', 'Masters', 12),
+		('CS', NULL, 10),
+		('CS', NULL, 12),
+		('Math', NULL, 12),
+		('Math', NULL, NULL);
+
+query IIII
+SELECT GROUPING(course), course, sum(distinct value), COUNT(*) FROM students GROUP BY course ORDER BY all;
+----
+0	CS	56	5
+0	Math	12	3
+
+query IIII
+SELECT sum(distinct value), GROUPING_ID(course), course, COUNT(*) FROM students GROUP BY course ORDER BY all;
+----
+12	0	Math	3
+56	0	CS	5
+
+query IIIIIII
+SELECT GROUPING(course), GROUPING(type), course, type, sum(distinct value), COUNT(*), sum(distinct value), FROM students GROUP BY course, type ORDER BY all;
+----
+0	0	CS	NULL	22	2	22
+0	0	CS	Bachelor	34	2	34
+0	0	CS	PhD	12	1	12
+0	0	Math	NULL	12	2	12
+0	0	Math	Masters	12	1	12
+
+query IIIIIII
+SELECT GROUPING(course), GROUPING(type), avg(distinct value), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
+----
+0	0	11.0	CS	NULL	2	22
+0	0	12.0	CS	PhD	1	12
+0	0	12.0	Math	NULL	2	12
+0	0	12.0	Math	Masters	1	12
+0	0	34.0	CS	Bachelor	2	34
+0	1	12.0	Math	NULL	3	12
+0	1	18.666666666666668	CS	NULL	5	56
+1	0	11.0	NULL	NULL	4	22
+1	0	12.0	NULL	Masters	1	12
+1	0	12.0	NULL	PhD	1	12
+1	0	34.0	NULL	Bachelor	2	34
+1	1	18.666666666666668	NULL	NULL	8	56
+
+query IIIIII
+SELECT sum(distinct value), GROUPING(course, type), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
+----
+12	0	CS	PhD	1	12
+12	0	Math	NULL	2	12
+12	0	Math	Masters	1	12
+12	1	Math	NULL	3	12
+12	2	NULL	Masters	1	12
+12	2	NULL	PhD	1	12
+22	0	CS	NULL	2	22
+22	2	NULL	NULL	4	22
+34	0	CS	Bachelor	2	34
+34	2	NULL	Bachelor	2	34
+56	1	CS	NULL	5	56
+56	3	NULL	NULL	8	56
+
+query IIIIIIII
+SELECT GROUPING(course), GROUPING(type), sum(distinct value), GROUPING(course)+GROUPING(type), course, type, count(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, type) ORDER BY all;
+----
+0	0	12	0	CS	PhD	1	1
+0	0	12	0	Math	NULL	1	2
+0	0	12	0	Math	Masters	1	1
+0	0	22	0	CS	NULL	2	2
+0	0	34	0	CS	Bachelor	1	2
+0	1	12	1	Math	NULL	1	3
+0	1	56	1	CS	NULL	3	5
+1	0	12	1	NULL	Masters	1	1
+1	0	12	1	NULL	PhD	1	1
+1	0	22	1	NULL	NULL	2	4
+1	0	34	1	NULL	Bachelor	1	2
+1	1	56	2	NULL	NULL	3	8
+
+# many repeated groupings
+query IIIIIII
+SELECT GROUPING(course, type, course, course, type, value, type, course), avg(distinct value), avg(value), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
+----
+0	NULL	NULL	NULL	Math	NULL	1
+0	10.0	10.0	10.0	CS	NULL	1
+0	12.0	12.0	12.0	CS	NULL	1
+0	12.0	12.0	12.0	CS	PhD	1
+0	12.0	12.0	12.0	Math	NULL	1
+0	12.0	12.0	12.0	Math	Masters	1
+0	34.0	34.0	34.0	CS	Bachelor	2
+4	11.0	11.0	11.0	CS	NULL	2
+4	12.0	12.0	12.0	CS	PhD	1
+4	12.0	12.0	12.0	Math	NULL	2
+4	12.0	12.0	12.0	Math	Masters	1
+4	34.0	34.0	34.0	CS	Bachelor	2
+74	NULL	NULL	NULL	Math	NULL	1
+74	10.0	10.0	10.0	CS	NULL	1
+74	12.0	12.0	12.0	CS	NULL	2
+74	12.0	12.0	12.0	Math	NULL	2
+74	34.0	34.0	34.0	CS	NULL	2
+78	12.0	12.0	12.0	Math	NULL	3
+78	18.666666666666668	20.4	18.666666666666668	CS	NULL	5
+177	NULL	NULL	NULL	NULL	NULL	1
+177	10.0	10.0	10.0	NULL	NULL	1
+177	12.0	12.0	12.0	NULL	NULL	2
+177	12.0	12.0	12.0	NULL	Masters	1
+177	12.0	12.0	12.0	NULL	PhD	1
+177	34.0	34.0	34.0	NULL	Bachelor	2
+181	11.0	11.333333333333334	11.0	NULL	NULL	4
+181	12.0	12.0	12.0	NULL	Masters	1
+181	12.0	12.0	12.0	NULL	PhD	1
+181	34.0	34.0	34.0	NULL	Bachelor	2
+251	NULL	NULL	NULL	NULL	NULL	1
+251	10.0	10.0	10.0	NULL	NULL	1
+251	12.0	12.0	12.0	NULL	NULL	4
+251	34.0	34.0	34.0	NULL	NULL	2
+255	18.666666666666668	18.0	18.666666666666668	NULL	NULL	8
+
+# GROUPING with different table qualifications
+query IIIIIIII
+SELECT GROUPING(students.course), GROUPING(students.type), sum(distinct value), GROUPING(course)+GROUPING(type), course, avg(distinct value), type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
+----
+0	0	NULL	0	Math	NULL	NULL	1
+0	0	10	0	CS	10.0	NULL	1
+0	0	12	0	CS	12.0	NULL	1
+0	0	12	0	CS	12.0	PhD	1
+0	0	12	0	CS	12.0	PhD	1
+0	0	12	0	Math	12.0	NULL	1
+0	0	12	0	Math	12.0	NULL	2
+0	0	12	0	Math	12.0	Masters	1
+0	0	12	0	Math	12.0	Masters	1
+0	0	22	0	CS	11.0	NULL	2
+0	0	34	0	CS	34.0	Bachelor	2
+0	0	34	0	CS	34.0	Bachelor	2
+0	1	NULL	1	Math	NULL	NULL	1
+0	1	10	1	CS	10.0	NULL	1
+0	1	12	1	CS	12.0	NULL	2
+0	1	12	1	Math	12.0	NULL	2
+0	1	12	1	Math	12.0	NULL	3
+0	1	34	1	CS	34.0	NULL	2
+0	1	56	1	CS	18.666666666666668	NULL	5
+1	0	NULL	1	NULL	NULL	NULL	1
+1	0	10	1	NULL	10.0	NULL	1
+1	0	12	1	NULL	12.0	NULL	2
+1	0	12	1	NULL	12.0	Masters	1
+1	0	12	1	NULL	12.0	Masters	1
+1	0	12	1	NULL	12.0	PhD	1
+1	0	12	1	NULL	12.0	PhD	1
+1	0	22	1	NULL	11.0	NULL	4
+1	0	34	1	NULL	34.0	Bachelor	2
+1	0	34	1	NULL	34.0	Bachelor	2
+1	1	NULL	2	NULL	NULL	NULL	1
+1	1	10	2	NULL	10.0	NULL	1
+1	1	12	2	NULL	12.0	NULL	4
+1	1	34	2	NULL	34.0	NULL	2
+1	1	56	2	NULL	18.666666666666668	NULL	8
+
+query IIIIIIII
+SELECT GROUPING(course), GROUPING(type), avg(value), GROUPING(course)+GROUPING(type), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(students.course, students.type) ORDER BY all;
+----
+0	0	11.0	0	11.0	CS	NULL	2
+0	0	12.0	0	12.0	CS	PhD	1
+0	0	12.0	0	12.0	Math	NULL	2
+0	0	12.0	0	12.0	Math	Masters	1
+0	0	34.0	0	34.0	CS	Bachelor	2
+0	1	12.0	1	12.0	Math	NULL	3
+0	1	20.4	1	18.666666666666668	CS	NULL	5
+1	0	11.333333333333334	1	11.0	NULL	NULL	4
+1	0	12.0	1	12.0	NULL	Masters	1
+1	0	12.0	1	12.0	NULL	PhD	1
+1	0	34.0	1	34.0	NULL	Bachelor	2
+1	1	18.0	2	18.666666666666668	NULL	NULL	8
+
+# GROUPING in HAVING clause
+query IIIII
+SELECT GROUPING(course), GROUPING(value), course, sum(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value) HAVING GROUPING(course)=0 ORDER BY all;
+----
+0	0	CS	10	1
+0	0	CS	12	2
+0	0	CS	34	2
+0	0	Math	NULL	1
+0	0	Math	12	2
+0	1	CS	56	5
+0	1	Math	12	3
+
+query IIIIIIII
+SELECT GROUPING(course), GROUPING(type), sum(distinct value), course, type, sum(distinct value), avg(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value, type, value) HAVING GROUPING(students.course)=0 ORDER BY all;
+----
+0	0	NULL	Math	NULL	NULL	NULL	1
+0	0	NULL	Math	NULL	NULL	NULL	1
+0	0	NULL	Math	NULL	NULL	NULL	1
+0	0	10	CS	NULL	10	10.0	1
+0	0	10	CS	NULL	10	10.0	1
+0	0	10	CS	NULL	10	10.0	1
+0	0	12	CS	NULL	12	12.0	1
+0	0	12	CS	NULL	12	12.0	1
+0	0	12	CS	NULL	12	12.0	1
+0	0	12	CS	PhD	12	12.0	1
+0	0	12	CS	PhD	12	12.0	1
+0	0	12	CS	PhD	12	12.0	1
+0	0	12	CS	PhD	12	12.0	1
+0	0	12	Math	NULL	12	12.0	1
+0	0	12	Math	NULL	12	12.0	1
+0	0	12	Math	NULL	12	12.0	1
+0	0	12	Math	NULL	12	12.0	2
+0	0	12	Math	Masters	12	12.0	1
+0	0	12	Math	Masters	12	12.0	1
+0	0	12	Math	Masters	12	12.0	1
+0	0	12	Math	Masters	12	12.0	1
+0	0	22	CS	NULL	22	11.0	2
+0	0	34	CS	Bachelor	34	34.0	2
+0	0	34	CS	Bachelor	34	34.0	2
+0	0	34	CS	Bachelor	34	34.0	2
+0	0	34	CS	Bachelor	34	34.0	2
+0	1	NULL	Math	NULL	NULL	NULL	1
+0	1	NULL	Math	NULL	NULL	NULL	1
+0	1	NULL	Math	NULL	NULL	NULL	1
+0	1	10	CS	NULL	10	10.0	1
+0	1	10	CS	NULL	10	10.0	1
+0	1	10	CS	NULL	10	10.0	1
+0	1	12	CS	NULL	12	12.0	2
+0	1	12	CS	NULL	12	12.0	2
+0	1	12	CS	NULL	12	12.0	2
+0	1	12	Math	NULL	12	12.0	2
+0	1	12	Math	NULL	12	12.0	2
+0	1	12	Math	NULL	12	12.0	2
+0	1	12	Math	NULL	12	12.0	3
+0	1	34	CS	NULL	34	34.0	2
+0	1	34	CS	NULL	34	34.0	2
+0	1	34	CS	NULL	34	34.0	2
+0	1	56	CS	NULL	56	18.666666666666668	5
+
+
+# GROUPING in ORDER BY clause
+query IIIII
+SELECT type, COUNT(*), avg(value), sum(distinct value), avg(distinct value), FROM students GROUP BY CUBE(value, type) ORDER BY GROUPING(value), GROUPING(type), 1, 2, 3, 4, 5;
+----
+NULL	1	NULL	NULL	NULL
+NULL	1	10.0	10	10.0
+NULL	2	12.0	12	12.0
+Bachelor	2	34.0	34	34.0
+Masters	1	12.0	12	12.0
+PhD	1	12.0	12	12.0
+NULL	1	NULL	NULL	NULL
+NULL	1	10.0	10	10.0
+NULL	2	34.0	34	34.0
+NULL	4	12.0	12	12.0
+NULL	4	11.333333333333334	22	11.0
+Bachelor	2	34.0	34	34.0
+Masters	1	12.0	12	12.0
+PhD	1	12.0	12	12.0
+NULL	8	18.0	56	18.666666666666668
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/coverage.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/coverage.test_slow
@@ -0,0 +1,122 @@
+# name: test/sql/aggregate/distinct/grouped/coverage.test_slow
+# description: DISTINCT aggregations
+# group: [grouped]
+
+# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
+# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
+# And not on variation between types and functions
+
+#Recursive CTE
+
+query I
+with recursive t as (select 1 as x union select sum(distinct x+1) from t where x < 3 group by x) select * from t order by x;
+----
+1
+2
+3
+
+# Prepared statement
+
+statement ok
+CREATE TABLE tbl AS SELECT i, i%5 as j FROM range(1000000) tbl(i);
+
+statement ok
+PREPARE v1 AS SELECT SUM(DISTINCT i%5+?::INT) FROM tbl group by j order by all;
+
+query I
+EXECUTE v1(1);
+----
+1
+2
+3
+4
+5
+
+query I
+EXECUTE v1(2);
+----
+2
+3
+4
+5
+6
+
+query I
+EXECUTE v1(3);
+----
+3
+4
+5
+6
+7
+
+# DISTINCT aggregate parameter as expression
+
+query I
+SELECT COUNT(distinct i % 5) from tbl group by j;
+----
+1
+1
+1
+1
+1
+
+# Correlated subquery
+
+query I
+SELECT COUNT(distinct (SELECT i%5)) from tbl group by j;
+----
+1
+1
+1
+1
+1
+
+## Aggregate with multiple parameters
+
+query I
+SELECT ARG_MIN(distinct i%5, i) from tbl group by j order by all;
+----
+0
+1
+2
+3
+4
+
+# Distinct lists
+
+statement ok
+CREATE TABLE lists_tbl AS SELECT i%20 as groups, [x + i for x in range(280)] AS l FROM range(200000) tmp(i);
+
+query IIII
+SELECT COUNT(l), avg(groups), COUNT(DISTINCT l), groups FROM lists_tbl group by groups order by groups limit 10;
+----
+10000	0.0	10000	0
+10000	1.0	10000	1
+10000	2.0	10000	2
+10000	3.0	10000	3
+10000	4.0	10000	4
+10000	5.0	10000	5
+10000	6.0	10000	6
+10000	7.0	10000	7
+10000	8.0	10000	8
+10000	9.0	10000	9
+
+# Non-inlined (>12 length) strings
+
+statement ok
+create table strings_tbl as select gen_random_uuid() as strings, i as groups from range(200000) tbl(i);
+
+query II
+select count(strings), count(distinct strings) from strings_tbl group by groups order by groups limit 10;
+----
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/distinct_and_non_distinct_mixed.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/distinct_and_non_distinct_mixed.test_slow
@@ -0,0 +1,255 @@
+# name: test/sql/aggregate/distinct/grouped/distinct_and_non_distinct_mixed.test_slow
+# description: DISTINCT aggregations
+# group: [grouped]
+
+# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
+# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
+# And not on variation between types and functions
+
+#distinct aggregate =	'D'
+#regular aggregate =	'-'
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+create table tbl as
+	(select i%50 as i, i%100 as j, i%5 as groups from range(50000) tbl(i))
+;
+
+# D
+query I
+select
+	count(distinct i)
+from tbl group by groups;;
+----
+10
+10
+10
+10
+10
+
+# D--
+query III rowsort
+select
+	sum(distinct i),
+	sum(i),
+	sum(j)
+from tbl group by groups;;
+----
+225	225000	475000
+235	235000	485000
+245	245000	495000
+255	255000	505000
+265	265000	515000
+
+# --D
+query III rowsort
+select
+	sum(i),
+	sum(j),
+	sum(distinct i)
+from tbl group by groups;;
+----
+225000	475000	225
+235000	485000	235
+245000	495000	245
+255000	505000	255
+265000	515000	265
+
+# -D-
+query III rowsort
+select
+	sum(i),
+	sum(distinct i),
+	sum(j)
+from tbl group by groups;;
+----
+225000	225	475000
+235000	235	485000
+245000	245	495000
+255000	255	505000
+265000	265	515000
+
+# D-D
+query III rowsort
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl group by groups;;
+----
+225	10000	950
+235	10000	970
+245	10000	990
+255	10000	1010
+265	10000	1030
+
+#-D-D
+query IIII rowsort
+select
+	sum(j),
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl group by groups;;
+----
+475000	225	10000	950
+485000	235	10000	970
+495000	245	10000	990
+505000	255	10000	1010
+515000	265	10000	1030
+
+#-D-D
+query IIII rowsort
+select
+	sum(j),
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl group by groups;;
+----
+475000	225	10000	950
+485000	235	10000	970
+495000	245	10000	990
+505000	255	10000	1010
+515000	265	10000	1030
+
+#D-D-
+query IIII rowsort
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j),
+	sum(j)
+from tbl group by groups;;
+----
+225	10000	950	475000
+235	10000	970	485000
+245	10000	990	495000
+255	10000	1010	505000
+265	10000	1030	515000
+
+# These next tests will repeat the previous test, with the addition of filters
+# filtered =		'F'
+# not filtered =	'-'
+
+# D
+# F
+query I
+select
+	count(distinct i) FILTER (WHERE i >= 20)
+from tbl group by groups;;
+----
+6
+6
+6
+6
+6
+
+# D--
+# -FF
+query III rowsort
+select
+	sum(distinct i),
+	sum(i) FILTER (WHERE j < 20),
+	sum(j) FILTER (WHERE i >= 20)
+from tbl group by groups;;
+----
+225	15000	345000
+235	17000	351000
+245	19000	357000
+255	21000	363000
+265	23000	369000
+
+# --D
+# -FF
+query III rowsort
+select
+	sum(i),
+	sum(j) FILTER (WHERE j == 0),
+	sum(distinct i) FILTER (WHERE i == 0)
+from tbl group by groups;;
+----
+225000	0	0
+235000	NULL	NULL
+245000	NULL	NULL
+255000	NULL	NULL
+265000	NULL	NULL
+
+# -D-
+# F-F
+query III rowsort
+select
+	sum(i) FILTER (WHERE j == 5),
+	sum(distinct i),
+	sum(j) FILTER (WHERE i == 5)
+from tbl group by groups;;
+----
+2500	225	30000
+NULL	235	NULL
+NULL	245	NULL
+NULL	255	NULL
+NULL	265	NULL
+
+# D-D
+# F-F
+query III rowsort
+select
+	sum(distinct i) FILTER (WHERE i == 5),
+	count(j),
+	sum(distinct j) FILTER (WHERE i == 5)
+from tbl group by groups;;
+----
+5	10000	60
+NULL	10000	NULL
+NULL	10000	NULL
+NULL	10000	NULL
+NULL	10000	NULL
+
+#-D-D
+#FF--
+query IIII rowsort
+select
+	sum(j) FILTER (WHERE j == 5),
+	sum(distinct i) FILTER (WHERE j == 5),
+	count(j),
+	sum(distinct j)
+from tbl group by groups;;
+----
+2500	5	10000	950
+NULL	NULL	10000	1010
+NULL	NULL	10000	1030
+NULL	NULL	10000	970
+NULL	NULL	10000	990
+
+#-D-D
+#F--F
+query IIII rowsort
+select
+	sum(j) FILTER (WHERE i == 5),
+	sum(distinct i),
+	count(j),
+	sum(distinct j) FILTER (WHERE j == 5)
+from tbl group by groups;;
+----
+30000	225	10000	5
+NULL	235	10000	NULL
+NULL	245	10000	NULL
+NULL	255	10000	NULL
+NULL	265	10000	NULL
+
+#D-D-
+query IIII rowsort
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j) FILTER (WHERE j == 5),
+	sum(j) FILTER (WHERE j == 5)
+from tbl group by groups;;
+----
+225	10000	5	2500
+235	10000	NULL	NULL
+245	10000	NULL	NULL
+255	10000	NULL	NULL
+265	10000	NULL	NULL
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/distinct_grouping_tpch.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/distinct_grouping_tpch.test_slow
@@ -0,0 +1,64 @@
+# name: test/sql/aggregate/distinct/grouped/distinct_grouping_tpch.test_slow
+# group: [grouped]
+
+require tpch
+
+require ram 8gb
+
+require disk_space 40gb
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+statement ok
+PRAGMA verify_external
+
+statement ok
+CALL dbgen(sf=1);
+
+query IIIII
+select
+	grouping(l_returnflag, l_linestatus),
+	l_returnflag,
+	l_linestatus,
+	count(distinct l_orderkey),
+	count(distinct l_comment)
+from lineitem
+group by cube(l_returnflag, l_linestatus)
+order by all;
+----
+0	A	F	644207	1181362
+0	N	F	30908	37987
+0	N	O	770587	2146525
+0	R	F	645527	1181807
+1	A	NULL	644207	1181362
+1	N	NULL	780997	2168690
+1	R	NULL	645527	1181807
+2	NULL	F	767956	2142221
+2	NULL	O	770587	2146525
+3	NULL	NULL	1500000	3610733
+
+query IIIIIIIII
+SELECT
+	COUNT(DISTINCT l_orderkey),
+	COUNT(DISTINCT l_partkey),
+	COUNT(*),
+	MIN(l_orderkey),
+	MAX(l_orderkey),
+	MIN(l_partkey),
+	MAX(l_partkey),
+	SUM(distinct_comment),
+	AVG(distinct_comment)
+FROM (
+	select
+		l_orderkey,
+		l_partkey,
+		count(distinct l_comment) AS distinct_comment
+	from lineitem
+	group by cube(l_orderkey, l_partkey)
+);
+----
+1500000	200000	7701170	1	6000000	1	200000	21614257	2.8066199032095125
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/identical_inputs.test
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/identical_inputs.test
@@ -0,0 +1,15 @@
+# name: test/sql/aggregate/distinct/grouped/identical_inputs.test
+# description: DISTINCT aggregations
+# group: [grouped]
+
+statement ok
+create table tbl as select i%50::BIGINT as i, i%5::BIGINT as j from range(1000000) tbl(i);
+
+query IIIII
+select count(distinct i), min(distinct i), max(distinct i), sum(distinct i), product(distinct i) from tbl group by j order by all;
+----
+10	0	45	225	0.0
+10	1	46	235	1213563326976.0
+10	2	47	245	3965002804224.0
+10	3	48	255	9360955828224.0
+10	4	49	265	19053977918976.0
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/issue_5070.test
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/issue_5070.test
@@ -0,0 +1,23 @@
+# name: test/sql/aggregate/distinct/grouped/issue_5070.test
+# group: [grouped]
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+query II
+WITH evs AS (
+  SELECT * FROM (VALUES
+    ('1','123','7'),
+    ('1','456','7')
+  ) AS t("id", "type", "value" )
+)
+SELECT "id"
+, COUNT(DISTINCT "value") FILTER (WHERE "type" = '456') AS type_456_count
+FROM evs
+GROUP BY "id"
+----
+1	1
+
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/long_input.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/long_input.test_slow
@@ -0,0 +1,23 @@
+# name: test/sql/aggregate/distinct/grouped/long_input.test_slow
+# description: DISTINCT aggregations
+# group: [grouped]
+
+# This is string aggr, which goes through the HashAggregateOperator, so this is in fact 'grouped'
+
+statement ok
+create or replace table tbl as select * FROM ( VALUES
+	([repeat('a', 1000000)]),
+	([repeat('a', 1000000)]),
+	([repeat('a', 1000000)]),
+	([repeat('a', 1000000)]),
+	([repeat('b', 1000000)]),
+	([repeat('b', 1000000)]),
+	([repeat('b', 1000000)]),
+	([repeat('b', 1000000)]),
+) tbl(i)
+
+query I
+select (min(distinct i)::TEXT)[2:2] from tbl group by i order by all;
+----
+a
+b
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/many_distinct_groups.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/many_distinct_groups.test_slow
@@ -0,0 +1,56 @@
+# name: test/sql/aggregate/distinct/grouped/many_distinct_groups.test_slow
+# group: [grouped]
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+statement ok
+PRAGMA verify_external
+
+# 10000 is the radix_limit
+
+# Many distinct grouping values + many distinct aggregate inputs
+
+# Create the tables
+
+statement ok
+create table all_valid as select i%81982 as value, i%20000 as groups from range(200000) tbl(i);
+
+statement ok
+create table with_nulls as select CASE WHEN i%2343=0 THEN NULL ELSE i%63423 END as value, i%20000 as groups from range(200000) tbl(i);
+
+statement ok
+create table mostly_nulls as select CASE WHEN i%7=0 THEN i ELSE NULL END as value, i%20000 as groups from range(200000) tbl(i);
+
+# Perform the checks
+
+query IIII
+select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from all_valid group by groups order by groups;
+----
+<FILE>:test/sql/aggregate/distinct/grouped/results/all_valid.csv
+
+query IIII
+select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from with_nulls group by groups order by groups;
+----
+<FILE>:test/sql/aggregate/distinct/grouped/results/with_nulls.csv
+
+query IIII
+select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from mostly_nulls group by groups order by groups;
+----
+<FILE>:test/sql/aggregate/distinct/grouped/results/mostly_nulls.csv
+
+# Only many distinct grouping values
+
+statement ok
+create table all_distinct as select 1 as value, i as groups from range(200000) tbl(i);
+
+query I nosort q1
+select sum(distinct value), count(distinct value), avg(distinct value), sum(distinct value) from all_distinct group by groups order by groups;
+----
+
+query I nosort q1
+select sum(value), count(value), avg(value), sum(value) from all_distinct group by groups order by groups;
+----
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/memory_consumption.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/memory_consumption.test_slow
@@ -0,0 +1,43 @@
+# name: test/sql/aggregate/distinct/grouped/memory_consumption.test_slow
+# group: [grouped]
+
+require tpch
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+statement ok
+pragma threads=4
+
+# This same test will fail on current master (01c074b55e), as it requires 3.2gb minimum
+# UPDATE: PR #5263 found issues with unpinned blocks - this now requires more memory
+statement ok
+pragma memory_limit='4gb'
+
+statement ok
+CALL dbgen(sf=1);
+
+query II
+select
+	l_comment,
+	count(distinct l_orderkey),
+from
+	lineitem
+group by 1
+order by all desc
+limit 10
+;
+----
+zzle? furiously iro	1
+zzle; furiously regular ac	2
+zzle; fluffily special a	1
+zzle: slyly even ideas wake fu	1
+zzle. slyly final Tiresias nag a	1
+zzle. slyly	2
+zzle. silently ironic deposits mu	2
+zzle. regular, regular foxes 	1
+zzle. regular, express theodol	1
+zzle. regular, express packages hagg	1
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/multiple_grouping_sets.test
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/multiple_grouping_sets.test
@@ -0,0 +1,283 @@
+# name: test/sql/aggregate/distinct/grouped/multiple_grouping_sets.test
+# group: [grouped]
+
+statement ok
+SET default_null_order='nulls_first';
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+# Distinct aggregates mixed with multiple grouping sets
+
+statement ok
+create table students (
+	course VARCHAR,
+	type VARCHAR,
+	value BIGINT
+);
+
+statement ok
+insert into students
+		(course, type, value)
+	values
+		('CS', 'Bachelor', 20),
+		('CS', 'Bachelor', 10),
+		('CS', 'PhD', -20),
+		('Math', 'Masters', 10),
+		('CS', NULL, -15),
+		('CS', NULL, 10),
+		('Math', NULL, 15);
+
+query IIII
+select course, type, count(*), sum(distinct value) from students group by course, type order by all;
+----
+CS	NULL	2	-5
+CS	Bachelor	2	30
+CS	PhD	1	-20
+Math	NULL	1	15
+Math	Masters	1	10
+
+query IIII
+select course, type, count(*), sum(distinct value) from students group by (course, type) order by all;
+----
+CS	NULL	2	-5
+CS	Bachelor	2	30
+CS	PhD	1	-20
+Math	NULL	1	15
+Math	Masters	1	10
+
+query III
+select course, count(*), sum(distinct value) from students group by (), course, () order by all;
+----
+CS	5	-5
+Math	2	25
+
+# multiple grouping sets
+query IIII
+select count(*), course, type, sum(distinct value)
+	from students
+	group by grouping sets ((course), (type))
+	order by all;
+----
+1	NULL	Masters	10
+1	NULL	PhD	-20
+2	NULL	Bachelor	30
+2	Math	NULL	25
+3	NULL	NULL	10
+5	CS	NULL	-5
+
+# multiple grouping sets
+# these are combined as if they are regular GROUP BY expressions
+# i.e. the result is just GROUP BY course, type
+
+# If multiple grouping items are specified in a single GROUP BY clause,
+# then the final list of grouping sets is the cross product of the individual items.
+query IIIII
+select sum(distinct value), count(*), course, avg(distinct value), type
+	from students
+	group by grouping sets (course), grouping sets(type)
+	order by all;
+----
+-20	1	CS	-20.0	PhD
+-5	2	CS	-2.5	NULL
+10	1	Math	10.0	Masters
+15	1	Math	15.0	NULL
+30	2	CS	15.0	Bachelor
+
+# combining grouping sets with non-grouping sets
+query IIIII
+select sum(distinct value), count(*), count(distinct value), course, type
+	from students
+	group by course, grouping sets(type)
+	order by all;
+----
+-20	1	1	CS	PhD
+-5	2	2	CS	NULL
+10	1	1	Math	Masters
+15	1	1	Math	NULL
+30	2	2	CS	Bachelor
+
+# with multiple grouping sets...
+query IIIII
+select count(*), ARG_MIN(distinct value%5, value), course, sum(distinct value), type
+	from students
+	group by course, grouping sets(type, ())
+	order by all;
+----
+1	0	CS	-20	PhD
+1	0	Math	10	Masters
+1	0	Math	15	NULL
+2	0	CS	-5	NULL
+2	0	CS	30	Bachelor
+2	0	Math	25	NULL
+5	0	CS	-5	NULL
+
+query IIII
+select sum(distinct value), count(*), course, type
+	from students
+	group by grouping sets((course, type), (course))
+	order by all;
+----
+-20	1	CS	PhD
+-5	2	CS	NULL
+-5	5	CS	NULL
+10	1	Math	Masters
+15	1	Math	NULL
+25	2	Math	NULL
+30	2	CS	Bachelor
+
+# nested grouping sets
+# If one GROUPING SETS clause is nested inside another,
+# the effect is the same as if all the elements of the inner clause had been written directly in the outer clause.
+query IIIIII
+select count(*), count(distinct value), count(value), course, sum(distinct value), type
+	from students
+	group by grouping sets (grouping sets(course), grouping sets(type))
+	order by all;
+----
+1	1	1	NULL	-20	PhD
+1	1	1	NULL	10	Masters
+2	2	2	NULL	30	Bachelor
+2	2	2	Math	25	NULL
+3	3	3	NULL	10	NULL
+5	4	5	CS	-5	NULL
+
+query IIIIII
+select count(*), avg(distinct value) FILTER (where value < 5), avg(distinct value), course, avg(value), type
+        from students
+        group by grouping sets (grouping sets(course, ()), grouping sets(type))
+        order by all;
+----
+1	NULL	10.0	NULL	10.0	Masters
+1	-20.0	-20.0	NULL	-20.0	PhD
+2	NULL	12.5	Math	12.5	NULL
+2	NULL	15.0	NULL	15.0	Bachelor
+3	-15.0	3.3333333333333335	NULL	3.3333333333333335	NULL
+5	-17.5	-1.25	CS	1.0	NULL
+7	-17.5	2.0	NULL	4.285714285714286	NULL
+
+query IIII
+select count(*), sum(distinct value), course, type
+        from students
+        group by grouping sets ((course), (), (type))
+        order by all;
+----
+1	-20	NULL	PhD
+1	10	NULL	Masters
+2	25	Math	NULL
+2	30	NULL	Bachelor
+3	10	NULL	NULL
+5	-5	CS	NULL
+7	10	NULL	NULL
+
+query IIIII
+select count(*), count(distinct value), sum(distinct value), course, type
+        from students
+        group by grouping sets(course, ()), grouping sets(type)
+        order by all;
+----
+1	1	-20	NULL	PhD
+1	1	-20	CS	PhD
+1	1	10	NULL	Masters
+1	1	10	Math	Masters
+1	1	15	Math	NULL
+2	2	-5	CS	NULL
+2	2	30	NULL	Bachelor
+2	2	30	CS	Bachelor
+3	3	10	NULL	NULL
+
+query IIIII
+select sum(distinct value), count(*), course, type, sum(distinct value)
+        from students
+        group by grouping sets(course, ()), type
+        order by all;
+----
+-20	1	NULL	PhD	-20
+-20	1	CS	PhD	-20
+-5	2	CS	NULL	-5
+10	1	NULL	Masters	10
+10	1	Math	Masters	10
+10	3	NULL	NULL	10
+15	1	Math	NULL	15
+30	2	NULL	Bachelor	30
+30	2	CS	Bachelor	30
+
+query IIIII
+select sum(distinct value) FILTER (where value % 10 != 0), count(*), course, type, sum(distinct value)
+        from students
+        group by grouping sets((course, type), (type))
+        order by all;
+----
+NULL	1	NULL	Masters	10
+NULL	1	NULL	PhD	-20
+NULL	1	CS	PhD	-20
+NULL	1	Math	Masters	10
+NULL	2	NULL	Bachelor	30
+NULL	2	CS	Bachelor	30
+-15	2	CS	NULL	-5
+0	3	NULL	NULL	10
+15	1	Math	NULL	15
+
+# references to group ids by index
+query IIII
+select count(*), sum(distinct value), course, type
+        from students
+        group by grouping sets((3, 4), (4))
+        order by all;
+----
+1	-20	NULL	PhD
+1	-20	CS	PhD
+1	10	NULL	Masters
+1	10	Math	Masters
+1	15	Math	NULL
+2	-5	CS	NULL
+2	30	NULL	Bachelor
+2	30	CS	Bachelor
+3	10	NULL	NULL
+
+query IIII
+select count(*), course AS crs, sum(distinct value), type AS tp
+        from students
+        group by grouping sets((crs, tp), (tp))
+        order by all;
+----
+1	NULL	-20	PhD
+1	NULL	10	Masters
+1	CS	-20	PhD
+1	Math	10	Masters
+1	Math	15	NULL
+2	NULL	30	Bachelor
+2	CS	-5	NULL
+2	CS	30	Bachelor
+3	NULL	10	NULL
+
+query IIII
+select sum(distinct value), count(*), course, type
+        from students
+        group by grouping sets (grouping sets(course, ()), grouping sets(type, ()))
+        order by all;
+----
+-20	1	NULL	PhD
+-5	5	CS	NULL
+10	1	NULL	Masters
+10	3	NULL	NULL
+10	7	NULL	NULL
+10	7	NULL	NULL
+25	2	Math	NULL
+30	2	NULL	Bachelor
+
+# re-do the first query with one thread (internal issue 2046)
+statement ok
+set threads=1
+
+query IIII
+select course, type, count(*), sum(distinct value) from students group by course, type order by all;
+----
+CS	NULL	2	-5
+CS	Bachelor	2	30
+CS	PhD	1	-20
+Math	NULL	1	15
+Math	Masters	1	10
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/partitioned_case.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/partitioned_case.test_slow
@@ -0,0 +1,64 @@
+# name: test/sql/aggregate/distinct/grouped/partitioned_case.test_slow
+# group: [grouped]
+
+# force parallelism of the queries
+statement ok
+PRAGMA verify_parallelism
+
+statement ok
+create table tbl as select i%100 as few, i%100000 as many, i%5 as groups from range(1000000) tbl(i);
+
+# Few amount of rows, not partitioned
+
+query I
+select count(distinct few) from tbl group by groups;
+----
+20
+20
+20
+20
+20
+
+# Large amount of rows, partitioned
+
+query I
+select count(distinct many) from tbl group by groups;
+----
+20000
+20000
+20000
+20000
+20000
+
+# Mixed few and large amount of rows
+
+query II
+select count(distinct few), count(distinct many) from tbl group by groups;
+----
+20	20000
+20	20000
+20	20000
+20	20000
+20	20000
+
+# Mixed, different order
+
+query II
+select count(distinct many), count(distinct few) from tbl group by groups;
+----
+20000	20
+20000	20
+20000	20
+20000	20
+20000	20
+
+# Mixed, with non-distinct inbetween
+
+query III
+select count(distinct many), count(few), count(distinct few) from tbl group by groups;
+----
+20000	200000	20
+20000	200000	20
+20000	200000	20
+20000	200000	20
+20000	200000	20
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/results/all_valid.csv
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/results/all_valid.csv
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/results/mostly_nulls.csv
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/results/mostly_nulls.csv
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/results/with_nulls.csv
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/results/with_nulls.csv
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/simple.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/simple.test_slow
@@ -0,0 +1,52 @@
+# name: test/sql/aggregate/distinct/grouped/simple.test_slow
+# description: Test aggregation/group by statements
+# group: [grouped]
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+statement ok
+PRAGMA verify_external
+
+statement ok
+CREATE TABLE test (a INTEGER, b INTEGER);
+
+statement ok
+INSERT INTO test VALUES
+	(11, 22),
+	(12, 19),
+	(13, 23),
+	(13, 22),
+	(12, 21),
+	(12, 19)
+
+# aggregations with group by
+query IIII
+SELECT b, SUM(distinct a), SUM(distinct a+2), AVG(a) FROM test GROUP BY b ORDER BY b;
+----
+19	12	14	12.0
+21	12	14	12.0
+22	24	28	12.0
+23	13	15	13.0
+
+# TEST THAT FAILS ON MASTER IF WE PARALLELIZE DISTINCT AGGREGATES WITH NO EXTRA WORK
+
+statement ok
+create table distinct_sum as select ((random() * 100) % 2)::BIGINT as i, (i % 10) as j from range(1000000) tbl(i);
+
+statement ok
+pragma threads=8;
+
+query I nosort test1
+SELECT sum(distinct i) from distinct_sum group by j order by j;
+----
+
+statement ok
+pragma threads=1;
+
+query I nosort test1
+SELECT sum(distinct i) from distinct_sum group by j order by j;
+----
--- a/external/duckdb/test/sql/aggregate/distinct/grouped/string_agg.test
+++ b/external/duckdb/test/sql/aggregate/distinct/grouped/string_agg.test
@@ -0,0 +1,77 @@
+# name: test/sql/aggregate/distinct/grouped/string_agg.test
+# group: [grouped]
+
+statement ok
+SET default_null_order='nulls_first';
+
+statement ok
+pragma enable_verification
+
+statement ok
+pragma verify_parallelism
+
+# test string aggregation on a set of values
+statement ok
+CREATE TABLE strings(
+	g INTEGER,
+	x VARCHAR,
+	y VARCHAR
+);
+
+statement ok
+INSERT INTO strings VALUES
+	(1,'a','/'),
+	(1,'b','-'),
+	(2,'i','/'),
+	(2,NULL,'-'),
+	(2,'j','+'),
+	(3,'p','/'),
+	(4,'x','/'),
+	(4,'y','-'),
+	(4,'z','+');
+
+# ORDER + FILTER + DISTINCT
+query II
+SELECT g, STRING_AGG(DISTINCT y, ',' ORDER BY y DESC) FILTER (WHERE g < 4)
+FROM strings
+GROUP BY g
+ORDER BY 1
+----
+1	/,-
+2	/,-,+
+3	/
+4	NULL
+
+# ORDER + FILTER + DISTINCT
+query IIII
+SELECT g, count(y), STRING_AGG(DISTINCT y, ',' ORDER BY y DESC) FILTER (WHERE g < 4), sum(1)
+FROM strings
+GROUP BY g
+ORDER BY 1
+----
+1	2	/,-	2
+2	3	/,-,+	3
+3	1	/	1
+4	3	NULL	3
+
+statement error
+SELECT g, STRING_AGG(DISTINCT y ORDER BY y, '_' ) FILTER (WHERE g < 4)
+FROM strings
+GROUP BY g
+ORDER BY 1
+----
+ORDER BY non-integer literal has no effect
+
+statement ok
+SET order_by_non_integer_literal=true
+
+query II
+SELECT g, STRING_AGG(DISTINCT y ORDER BY y, '_' ) FILTER (WHERE g < 4)
+FROM strings
+GROUP BY g
+ORDER BY 1
+----
+1	-,/
+2	+,-,/
+3	/
+4	NULL
--- a/external/duckdb/test/sql/aggregate/distinct/issue2656.test
+++ b/external/duckdb/test/sql/aggregate/distinct/issue2656.test
@@ -0,0 +1,43 @@
+# name: test/sql/aggregate/distinct/issue2656.test
+# description: Issue #2656: DISTINCT + ORDER produces incorrect result
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE T (t1 int, t2 int);
+
+statement ok
+INSERT INTO t VALUES (1, 1), (1, 2);
+
+query I
+SELECT DISTINCT t1
+FROM T
+ORDER BY t1, t2;
+----
+1
+
+query II
+SELECT DISTINCT ON (1) t1, t2
+FROM T
+ORDER BY t1, t2;
+----
+1	1
+
+query I
+SELECT DISTINCT t1 FROM T
+UNION
+SELECT DISTINCT t1 FROM T
+ORDER BY t1;
+----
+1
+
+query I
+SELECT DISTINCT t1 FROM T
+UNION ALL
+SELECT DISTINCT t1 FROM T
+ORDER BY t1;
+----
+1
+1
--- a/external/duckdb/test/sql/aggregate/distinct/issue8505.test
+++ b/external/duckdb/test/sql/aggregate/distinct/issue8505.test
@@ -0,0 +1,24 @@
+# name: test/sql/aggregate/distinct/issue8505.test
+# description: Issue #8505: Distinct On Memory Issues
+# group: [distinct]
+
+statement ok
+create table test (id int, provider int, record_key int, record_rank int, record_date int)
+
+# this subquery was fast, we select only record_key from it so it's equivalent to the next query
+query II
+explain select record_key from (
+    select distinct on (id, provider) id, provider, record_key from test order by id, provider, record_rank desc, record_date
+)
+----
+physical_plan	<REGEX>:.*HASH_GROUP_BY.*#0.*#1.*arg_min_null.*
+
+
+# this query was slow, but should be equivalent to the first query
+query II
+explain select distinct on (id, provider) record_key from test order by id, provider, record_rank desc, record_date
+----
+physical_plan	<REGEX>:.*HASH_GROUP_BY.*#0.*#1.*arg_min_null.*
+
+# the problem was that our aggregate became way too big in the second case because we didn't de-duplicate columns
+# this regex checks that they both have the same 'minimal' aggregate (this test is a bit fragile to binder changes tho)
--- a/external/duckdb/test/sql/aggregate/distinct/issue9241.test
+++ b/external/duckdb/test/sql/aggregate/distinct/issue9241.test
@@ -0,0 +1,23 @@
+# name: test/sql/aggregate/distinct/issue9241.test
+# description: Test DISTINCT ON
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+create table foo (a int, b int);
+
+statement ok
+insert into foo values (1, 1), (2, 1), (2, 2);
+
+query II
+select * from (select distinct on (a) a, b from foo order by a, b desc) sub;
+----
+1	1
+2	2
+
+query II
+select * from (select distinct on (a) a, b from foo order by a, b desc) sub where b <> 2;
+----
+1	1
--- a/external/duckdb/test/sql/aggregate/distinct/test_distinct.test
+++ b/external/duckdb/test/sql/aggregate/distinct/test_distinct.test
@@ -0,0 +1,68 @@
+# name: test/sql/aggregate/distinct/test_distinct.test
+# description: Test DISTINCT keyword
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE test (a INTEGER, b INTEGER);
+
+statement ok
+INSERT INTO test VALUES (11, 22), (13, 22), (11, 21), (11, 22)
+
+query II
+SELECT DISTINCT a, b FROM test ORDER BY a, b
+----
+11	21
+11	22
+13	22
+
+query II
+SELECT DISTINCT test.a, b FROM test ORDER BY a, b
+----
+11	21
+11	22
+13	22
+
+query I
+SELECT DISTINCT a FROM test ORDER BY a
+----
+11
+13
+
+query I
+SELECT DISTINCT b FROM test ORDER BY b
+----
+21
+22
+
+query IR
+SELECT DISTINCT a, SUM(B) FROM test GROUP BY a ORDER BY a
+----
+11	65.000000
+13	22.000000
+
+query I
+SELECT DISTINCT MAX(b) FROM test GROUP BY a
+----
+22
+
+query I
+SELECT DISTINCT CASE WHEN a > 11 THEN 11 ELSE a END FROM test
+----
+11
+
+# Distinct LIST<VARCHAR> with NULL in a subsequent position (Issue #3056)
+statement ok
+CREATE TABLE issue3056 AS (SELECT * FROM (VALUES
+	(['TGTA']),
+	(['CGGT']),
+	(['CCTC']),
+	(['TCTA']),
+	(['AGGG']),
+	(NULL))
+tbl(genes));
+
+statement ok
+SELECT DISTINCT genes FROM issue3056;
--- a/external/duckdb/test/sql/aggregate/distinct/test_distinct_on.test
+++ b/external/duckdb/test/sql/aggregate/distinct/test_distinct_on.test
@@ -0,0 +1,153 @@
+# name: test/sql/aggregate/distinct/test_distinct_on.test
+# description: Test DISTINCT ON
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);
+
+statement ok
+INSERT INTO integers VALUES (2, 3, 5), (4, 5, 6), (2, 7, 6)
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers WHERE i <> 2
+----
+4	5
+
+query II
+SELECT DISTINCT ON (j) i, j FROM integers WHERE i <> 2
+----
+4	5
+
+query II
+SELECT DISTINCT ON (j, i) i, j FROM integers WHERE i <> 2
+----
+4	5
+
+query II
+SELECT DISTINCT ON (j + 1, i * 3) i, j FROM integers WHERE i <> 2
+----
+4	5
+
+query II
+SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i
+----
+2	3
+4	5
+
+query II
+SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i LIMIT 1
+----
+2	3
+
+query II
+SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i LIMIT 1 OFFSET 1
+----
+4	5
+
+query II
+SELECT DISTINCT ON (2) i, j FROM integers ORDER BY 2
+----
+2	3
+4	5
+2	7
+
+query II
+SELECT DISTINCT ON (2) j, k FROM integers ORDER BY 2
+----
+3	5
+5	6
+
+query III
+SELECT DISTINCT ON (3) i, j, k FROM integers ORDER BY 2
+----
+2	3	5
+4	5	6
+
+query III
+SELECT DISTINCT ON (3) i, j, k FROM integers ORDER BY 3
+----
+2	3	5
+4	5	6
+
+query II
+SELECT DISTINCT ON (2) j, (SELECT i FROM integers WHERE i=2 LIMIT 1) FROM integers ORDER BY 2
+----
+3	2
+
+query II
+SELECT DISTINCT ON (2) j, (SELECT DISTINCT ON (i) i FROM integers ORDER BY 1 LIMIT 1) FROM integers ORDER BY 2
+----
+3	2
+
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j
+----
+2	3
+4	5
+
+query II
+SELECT * FROM (SELECT DISTINCT ON (i) i, j FROM integers) tbl1 WHERE i <> 2
+----
+4	5
+
+# order by a column that does not exist in the SELECT clause
+query II
+SELECT DISTINCT ON (i) i, j FROM integers ORDER BY k
+----
+2	3
+4	5
+
+# equivalent to this, but without projecting the k
+query III
+SELECT DISTINCT ON (i) i, j, k FROM integers ORDER BY k
+----
+2	3	5
+4	5	6
+
+# binding of DISTINCT ON with different column names
+query II
+SELECT DISTINCT ON (integers.i) i, j FROM integers ORDER BY 1, 2
+----
+2	3
+4	5
+
+query II
+SELECT DISTINCT ON (i) integers.i, integers.j FROM integers ORDER BY 1, 2
+----
+2	3
+4	5
+
+query II
+SELECT DISTINCT ON (integers.i) integers.i, integers.j FROM integers ORDER BY i, j
+----
+2	3
+4	5
+
+# out of bounds
+statement error
+SELECT DISTINCT ON (2) i FROM integers
+----
+Binder Error: ORDER term out of range - should be between 1 and 1
+
+# DISTINCT ON constant returns an error
+statement error
+SELECT DISTINCT ON(i, 'literal') i FROM integers
+----
+DISTINCT ON non-integer literal has no effect
+
+statement ok
+SET order_by_non_integer_literal=true
+
+query I
+SELECT DISTINCT ON(i, 'literal') i FROM integers ORDER BY ALL
+----
+2
+4
+
+statement error
+PREPARE v1 AS select distinct on (?) 42;
+----
+Parameter not supported in DISTINCT ON clause
--- a/external/duckdb/test/sql/aggregate/distinct/test_distinct_order_by.test
+++ b/external/duckdb/test/sql/aggregate/distinct/test_distinct_order_by.test
@@ -0,0 +1,57 @@
+# name: test/sql/aggregate/distinct/test_distinct_order_by.test
+# description: Test DISTINCT and ORDER BY
+# group: [distinct]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+CREATE TABLE integers(i INTEGER);
+
+statement ok
+INSERT INTO integers VALUES (1), (2), (3)
+
+query I
+SELECT DISTINCT i%2 FROM integers ORDER BY 1
+----
+0
+1
+
+# controversial: Postgres fails here with the error "with SELECT DISTINCT columns from ORDER BY must appear in the
+# SELECT clause" but SQLite succeeds
+# we also succeed here, even though it can give unintuitive results
+# this is transformed into SELECT DISTINCT(1) i % 2, i
+query I
+SELECT DISTINCT i % 2 FROM integers WHERE i<3 ORDER BY i
+----
+1
+0
+
+query II
+SELECT DISTINCT ON (1) i % 2, i FROM integers WHERE i<3 ORDER BY i
+----
+1	1
+0	2
+
+# binding of DISTINCT with column names
+query I
+SELECT DISTINCT integers.i FROM integers ORDER BY i DESC
+----
+3
+2
+1
+
+query I
+SELECT DISTINCT i FROM integers ORDER BY integers.i DESC
+----
+3
+2
+1
+
+query I
+SELECT DISTINCT integers.i FROM integers ORDER BY integers.i DESC
+----
+3
+2
+1
+
--- a/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test
+++ b/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test
@@ -0,0 +1,194 @@
+# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test
+# description: DISTINCT aggregations, without GROUP BY
+# group: [ungrouped]
+
+# Since these tests are made to test the ungrouped operator, and not necessarily the functions themselves
+# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
+# And not on variation between types and functions
+
+#distinct aggregate =	'D'
+#regular aggregate =	'-'
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+PRAGMA verify_external
+
+statement ok
+create table tbl as
+	(select i%50 as i, i%100 as j from range(50000) tbl(i))
+;
+
+# D
+query I
+select
+	count(distinct i)
+from tbl;
+----
+50
+
+# D--
+query III
+select
+	sum(distinct i),
+	sum(i),
+	sum(j)
+from tbl;
+----
+1225	1225000	2475000
+
+# --D
+query III
+select
+	sum(i),
+	sum(j),
+	sum(distinct i)
+from tbl;
+----
+1225000	2475000	1225
+
+# -D-
+query III
+select
+	sum(i),
+	sum(distinct i),
+	sum(j)
+from tbl;
+----
+1225000	1225	2475000
+
+# D-D
+query III
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl;
+----
+1225	50000	4950
+
+#-D-D
+query IIII
+select
+	sum(j),
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl;
+----
+2475000	1225	50000	4950
+
+#-D-D
+query IIII
+select
+	sum(j),
+	sum(distinct i),
+	count(j),
+	sum(distinct j)
+from tbl;
+----
+2475000	1225	50000	4950
+
+#D-D-
+query IIII
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j),
+	sum(j)
+from tbl;
+----
+1225	50000	4950	2475000
+
+# These next tests will repeat the previous test, with the addition of filters
+# filtered =		'F'
+# not filtered =	'-'
+
+# D
+# F
+query I
+select
+	count(distinct i) FILTER (WHERE i >= 20)
+from tbl;
+----
+30
+
+# D--
+# -FF
+query III
+select
+	sum(distinct i),
+	sum(i) FILTER (WHERE j < 20),
+	sum(j) FILTER (WHERE i >= 20)
+from tbl;
+----
+1225	95000	1785000
+
+# --D
+# -FF
+query III
+select
+	sum(i),
+	sum(j) FILTER (WHERE j == 0),
+	sum(distinct i) FILTER (WHERE i == 0)
+from tbl;
+----
+1225000	0	0
+
+# -D-
+# F-F
+query III
+select
+	sum(i) FILTER (WHERE j == 5),
+	sum(distinct i),
+	sum(j) FILTER (WHERE i == 5)
+from tbl;
+----
+2500	1225	30000
+
+# D-D
+# F-F
+query III
+select
+	sum(distinct i) FILTER (WHERE i == 5),
+	count(j),
+	sum(distinct j) FILTER (WHERE i == 5)
+from tbl;
+----
+5	50000	60
+
+#-D-D
+#FF--
+query IIII
+select
+	sum(j) FILTER (WHERE j == 5),
+	sum(distinct i) FILTER (WHERE j == 5),
+	count(j),
+	sum(distinct j)
+from tbl;
+----
+2500	5	50000	4950
+
+#-D-D
+#F--F
+query IIII
+select
+	sum(j) FILTER (WHERE i == 5),
+	sum(distinct i),
+	count(j),
+	sum(distinct j) FILTER (WHERE j == 5)
+from tbl;
+----
+30000	1225	50000	5
+
+#D-D-
+query IIII
+select
+	sum(distinct i),
+	count(j),
+	sum(distinct j) FILTER (WHERE j == 5),
+	sum(j) FILTER (WHERE j == 5)
+from tbl;
+----
+1225	50000	5	2500
--- a/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test_slow
@@ -0,0 +1,64 @@
+# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test_slow
+# description: DISTINCT aggregations, without GROUP BY
+# group: [ungrouped]
+
+statement ok
+SET default_null_order='nulls_first';
+
+# Since these tests are made to test the ungrouped operator, and not necessarily the functions themselves
+# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
+# And not on variation between types and functions
+
+#Recursive CTE
+
+query I
+with recursive t as (select 1 as x union select sum(distinct x+1) from t where x < 3) select * from t order by x;
+----
+NULL
+1
+2
+3
+
+# Prepared statement
+
+statement ok
+CREATE TABLE tbl AS SELECT * FROM range(1000000) tbl(i);
+
+statement ok
+PREPARE v1 AS SELECT SUM(DISTINCT i%5+?::INT) FROM tbl;
+
+query I
+EXECUTE v1(1);
+----
+15
+
+query I
+EXECUTE v1(2);
+----
+20
+
+query I
+EXECUTE v1(3);
+----
+25
+
+# DISTINCT aggregate parameter as expression
+
+query I
+SELECT COUNT(distinct i % 5) from tbl;
+----
+5
+
+# Correlated subquery
+
+query I
+SELECT COUNT(distinct (SELECT i%5)) from tbl;
+----
+5
+
+## Aggregate with multiple parameters
+
+query I
+SELECT ARG_MIN(distinct i%5, i) from tbl;
+----
+0
--- a/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_parallel.test_slow
+++ b/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_parallel.test_slow
@@ -0,0 +1,45 @@
+# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_parallel.test_slow
+# description: DISTINCT aggregations, without GROUP BY
+# group: [ungrouped]
+
+# force parallelism of the queries
+statement ok
+PRAGMA verify_parallelism
+
+# Few amount of rows, not partitioned
+
+statement ok
+create table tbl as select i%100 as few, i%100000 as many from range(1000000) tbl(i);
+
+query I
+select count(distinct few) from tbl;
+----
+100
+
+# Large amount of rows, partitioned
+
+query I
+select count(distinct many) from tbl;
+----
+100000
+
+# Mixed few and large amount of rows
+
+query II
+select count(distinct few), count(distinct many) from tbl;
+----
+100	100000
+
+# Mixed, different order
+
+query II
+select count(distinct many), count(distinct few) from tbl;
+----
+100000	100
+
+# Mixed, with non-distinct inbetween
+
+query III
+select count(distinct many), count(few), count(distinct few) from tbl;
+----
+100000	1000000	100
--- a/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_shared_input.test
+++ b/external/duckdb/test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_shared_input.test
@@ -0,0 +1,11 @@
+# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_shared_input.test
+# description: DISTINCT aggregations, without GROUP BY
+# group: [ungrouped]
+
+statement ok
+create table tbl as select i%50 as i from range(1000000) tbl(i);
+
+query IIIII
+select count(distinct i), min(distinct i), max(distinct i), sum(distinct i), product(distinct i) from tbl;
+----
+50	0	49	1225	0.0