should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,83 @@
# name: test/sql/aggregate/distinct/distinct_on_nulls.test
# description: Test DISTINCT ON with NULL values
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER);
statement ok
INSERT INTO integers VALUES (2, 3), (4, 5), (2, NULL), (NULL, NULL);
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j
----
2 3
4 5
NULL NULL
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j
----
2 3
4 5
NULL NULL
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i NULLS FIRST, j NULLS FIRST
----
NULL NULL
2 NULL
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j NULLS FIRST
----
2 NULL
4 5
NULL NULL
# multi-way sort and ties
statement ok
CREATE TABLE distinct_on_test(key INTEGER, v1 VARCHAR, v2 INTEGER[], v3 INTEGER);
statement ok
INSERT INTO distinct_on_test VALUES
(1, 'hello', ARRAY[1], 42), -- ASC
(1, 'hello', ARRAY[1], 42),
(1, 'hello', ARRAY[1], 43), -- DESC
(2, NULL, NULL, 0), -- ASC
(2, NULL, NULL, 1),
(2, NULL, NULL, NULL), -- DESC
(3, 'thisisalongstring', NULL, 0), -- ASC
(3, 'thisisalongstringbutlonger', NULL, 1),
(3, 'thisisalongstringbutevenlonger', ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9], 2) -- DESC
;
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1, v2, v3
----
1 hello [1] 42
2 NULL NULL 0
3 thisisalongstring NULL 0
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test WHERE key <> 2 ORDER BY key, v1, v2, v3
----
1 hello [1] 42
3 thisisalongstring NULL 0
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
----
1 hello [1] 43
2 NULL NULL NULL
3 thisisalongstringbutlonger NULL 1
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test WHERE key <> 2 ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
----
1 hello [1] 43
3 thisisalongstringbutlonger NULL 1

View File

@@ -0,0 +1,161 @@
# name: test/sql/aggregate/distinct/distinct_on_order_by.test
# description: Test DISTINCT ON ORDER BY
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);
statement ok
INSERT INTO integers VALUES (2, 3, 5), (4, 5, 6), (2, 7, 6);
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j DESC;
----
2 7
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j;
----
2 3
4 5
# we don't need to ORDER BY i
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j DESC;
----
2 7
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j;
----
2 3
4 5
# DISTINCT ON in correlated subqueries
query III
SELECT i, j, (SELECT DISTINCT ON(i) j) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
query III
SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, j DESC) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
query III
SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, k) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
# DISTINCT ON with multiple parameters
statement ok
INSERT INTO integers VALUES (2, 3, 7), (4, 5, 11);
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k ASC
----
2 3 5
4 5 6
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k DESC
----
2 3 7
4 5 11
# DISTINCT ON with NULL values
statement ok
INSERT INTO integers VALUES (2, NULL, 27), (4, 88, NULL);
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k DESC NULLS LAST;
----
2 NULL 27
4 5 11
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k NULLS FIRST;
----
2 NULL 27
4 5 6
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, k NULLS FIRST, j NULLS FIRST;
----
2 3 5
4 88 NULL
# examples from the original issue
statement ok
create table foo(a real, b real);
statement ok
insert into foo values (1, 69), (1, 420), (2, 69), (2, 420);
query II rowsort
select distinct on(a) a, b from foo order by b asc;
----
1 69
2 69
query II rowsort
select distinct on(a) a, b from foo order by b desc;
----
1 420
2 420
statement ok
CREATE TABLE example (
id INT,
person_id INT,
address_id INT,
effective_date DATE
);
statement ok
INSERT INTO
example (id, person_id, address_id, effective_date)
VALUES
(1, 2, 1, '2000-01-01'), -- Moved to first house
(5, 2, 2, '2004-08-19'), -- Went to uni
(9, 2, 1, '2007-06-12'), -- Moved back home
(2, 4, 3, '2007-05-18'), -- Moved to first house
(3, 4, 4, '2016-02-09') -- Moved to new house
;
query IIII
SELECT DISTINCT ON (person_id)
*
FROM
example
ORDER BY
person_id,
effective_date ASC
;
----
1 2 1 2000-01-01
2 4 3 2007-05-18
query IIII
SELECT DISTINCT ON (person_id)
*
FROM
example
ORDER BY
person_id,
effective_date DESC
;
----
9 2 1 2007-06-12
3 4 4 2016-02-09

View File

@@ -0,0 +1,274 @@
# name: test/sql/aggregate/distinct/grouped/combined_with_grouping.test
# group: [grouped]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
create table students (
course VARCHAR,
type VARCHAR,
value BIGINT
);
statement ok
insert into students
(course, type, value)
values
('CS', 'Bachelor', 34),
('CS', 'Bachelor', 34),
('CS', 'PhD', 12),
('Math', 'Masters', 12),
('CS', NULL, 10),
('CS', NULL, 12),
('Math', NULL, 12),
('Math', NULL, NULL);
query IIII
SELECT GROUPING(course), course, sum(distinct value), COUNT(*) FROM students GROUP BY course ORDER BY all;
----
0 CS 56 5
0 Math 12 3
query IIII
SELECT sum(distinct value), GROUPING_ID(course), course, COUNT(*) FROM students GROUP BY course ORDER BY all;
----
12 0 Math 3
56 0 CS 5
query IIIIIII
SELECT GROUPING(course), GROUPING(type), course, type, sum(distinct value), COUNT(*), sum(distinct value), FROM students GROUP BY course, type ORDER BY all;
----
0 0 CS NULL 22 2 22
0 0 CS Bachelor 34 2 34
0 0 CS PhD 12 1 12
0 0 Math NULL 12 2 12
0 0 Math Masters 12 1 12
query IIIIIII
SELECT GROUPING(course), GROUPING(type), avg(distinct value), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
0 0 11.0 CS NULL 2 22
0 0 12.0 CS PhD 1 12
0 0 12.0 Math NULL 2 12
0 0 12.0 Math Masters 1 12
0 0 34.0 CS Bachelor 2 34
0 1 12.0 Math NULL 3 12
0 1 18.666666666666668 CS NULL 5 56
1 0 11.0 NULL NULL 4 22
1 0 12.0 NULL Masters 1 12
1 0 12.0 NULL PhD 1 12
1 0 34.0 NULL Bachelor 2 34
1 1 18.666666666666668 NULL NULL 8 56
query IIIIII
SELECT sum(distinct value), GROUPING(course, type), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
12 0 CS PhD 1 12
12 0 Math NULL 2 12
12 0 Math Masters 1 12
12 1 Math NULL 3 12
12 2 NULL Masters 1 12
12 2 NULL PhD 1 12
22 0 CS NULL 2 22
22 2 NULL NULL 4 22
34 0 CS Bachelor 2 34
34 2 NULL Bachelor 2 34
56 1 CS NULL 5 56
56 3 NULL NULL 8 56
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), sum(distinct value), GROUPING(course)+GROUPING(type), course, type, count(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
0 0 12 0 CS PhD 1 1
0 0 12 0 Math NULL 1 2
0 0 12 0 Math Masters 1 1
0 0 22 0 CS NULL 2 2
0 0 34 0 CS Bachelor 1 2
0 1 12 1 Math NULL 1 3
0 1 56 1 CS NULL 3 5
1 0 12 1 NULL Masters 1 1
1 0 12 1 NULL PhD 1 1
1 0 22 1 NULL NULL 2 4
1 0 34 1 NULL Bachelor 1 2
1 1 56 2 NULL NULL 3 8
# many repeated groupings
query IIIIIII
SELECT GROUPING(course, type, course, course, type, value, type, course), avg(distinct value), avg(value), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
----
0 NULL NULL NULL Math NULL 1
0 10.0 10.0 10.0 CS NULL 1
0 12.0 12.0 12.0 CS NULL 1
0 12.0 12.0 12.0 CS PhD 1
0 12.0 12.0 12.0 Math NULL 1
0 12.0 12.0 12.0 Math Masters 1
0 34.0 34.0 34.0 CS Bachelor 2
4 11.0 11.0 11.0 CS NULL 2
4 12.0 12.0 12.0 CS PhD 1
4 12.0 12.0 12.0 Math NULL 2
4 12.0 12.0 12.0 Math Masters 1
4 34.0 34.0 34.0 CS Bachelor 2
74 NULL NULL NULL Math NULL 1
74 10.0 10.0 10.0 CS NULL 1
74 12.0 12.0 12.0 CS NULL 2
74 12.0 12.0 12.0 Math NULL 2
74 34.0 34.0 34.0 CS NULL 2
78 12.0 12.0 12.0 Math NULL 3
78 18.666666666666668 20.4 18.666666666666668 CS NULL 5
177 NULL NULL NULL NULL NULL 1
177 10.0 10.0 10.0 NULL NULL 1
177 12.0 12.0 12.0 NULL NULL 2
177 12.0 12.0 12.0 NULL Masters 1
177 12.0 12.0 12.0 NULL PhD 1
177 34.0 34.0 34.0 NULL Bachelor 2
181 11.0 11.333333333333334 11.0 NULL NULL 4
181 12.0 12.0 12.0 NULL Masters 1
181 12.0 12.0 12.0 NULL PhD 1
181 34.0 34.0 34.0 NULL Bachelor 2
251 NULL NULL NULL NULL NULL 1
251 10.0 10.0 10.0 NULL NULL 1
251 12.0 12.0 12.0 NULL NULL 4
251 34.0 34.0 34.0 NULL NULL 2
255 18.666666666666668 18.0 18.666666666666668 NULL NULL 8
# GROUPING with different table qualifications
query IIIIIIII
SELECT GROUPING(students.course), GROUPING(students.type), sum(distinct value), GROUPING(course)+GROUPING(type), course, avg(distinct value), type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
----
0 0 NULL 0 Math NULL NULL 1
0 0 10 0 CS 10.0 NULL 1
0 0 12 0 CS 12.0 NULL 1
0 0 12 0 CS 12.0 PhD 1
0 0 12 0 CS 12.0 PhD 1
0 0 12 0 Math 12.0 NULL 1
0 0 12 0 Math 12.0 NULL 2
0 0 12 0 Math 12.0 Masters 1
0 0 12 0 Math 12.0 Masters 1
0 0 22 0 CS 11.0 NULL 2
0 0 34 0 CS 34.0 Bachelor 2
0 0 34 0 CS 34.0 Bachelor 2
0 1 NULL 1 Math NULL NULL 1
0 1 10 1 CS 10.0 NULL 1
0 1 12 1 CS 12.0 NULL 2
0 1 12 1 Math 12.0 NULL 2
0 1 12 1 Math 12.0 NULL 3
0 1 34 1 CS 34.0 NULL 2
0 1 56 1 CS 18.666666666666668 NULL 5
1 0 NULL 1 NULL NULL NULL 1
1 0 10 1 NULL 10.0 NULL 1
1 0 12 1 NULL 12.0 NULL 2
1 0 12 1 NULL 12.0 Masters 1
1 0 12 1 NULL 12.0 Masters 1
1 0 12 1 NULL 12.0 PhD 1
1 0 12 1 NULL 12.0 PhD 1
1 0 22 1 NULL 11.0 NULL 4
1 0 34 1 NULL 34.0 Bachelor 2
1 0 34 1 NULL 34.0 Bachelor 2
1 1 NULL 2 NULL NULL NULL 1
1 1 10 2 NULL 10.0 NULL 1
1 1 12 2 NULL 12.0 NULL 4
1 1 34 2 NULL 34.0 NULL 2
1 1 56 2 NULL 18.666666666666668 NULL 8
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), avg(value), GROUPING(course)+GROUPING(type), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(students.course, students.type) ORDER BY all;
----
0 0 11.0 0 11.0 CS NULL 2
0 0 12.0 0 12.0 CS PhD 1
0 0 12.0 0 12.0 Math NULL 2
0 0 12.0 0 12.0 Math Masters 1
0 0 34.0 0 34.0 CS Bachelor 2
0 1 12.0 1 12.0 Math NULL 3
0 1 20.4 1 18.666666666666668 CS NULL 5
1 0 11.333333333333334 1 11.0 NULL NULL 4
1 0 12.0 1 12.0 NULL Masters 1
1 0 12.0 1 12.0 NULL PhD 1
1 0 34.0 1 34.0 NULL Bachelor 2
1 1 18.0 2 18.666666666666668 NULL NULL 8
# GROUPING in HAVING clause
query IIIII
SELECT GROUPING(course), GROUPING(value), course, sum(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value) HAVING GROUPING(course)=0 ORDER BY all;
----
0 0 CS 10 1
0 0 CS 12 2
0 0 CS 34 2
0 0 Math NULL 1
0 0 Math 12 2
0 1 CS 56 5
0 1 Math 12 3
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), sum(distinct value), course, type, sum(distinct value), avg(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value, type, value) HAVING GROUPING(students.course)=0 ORDER BY all;
----
0 0 NULL Math NULL NULL NULL 1
0 0 NULL Math NULL NULL NULL 1
0 0 NULL Math NULL NULL NULL 1
0 0 10 CS NULL 10 10.0 1
0 0 10 CS NULL 10 10.0 1
0 0 10 CS NULL 10 10.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 2
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 22 CS NULL 22 11.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 1 NULL Math NULL NULL NULL 1
0 1 NULL Math NULL NULL NULL 1
0 1 NULL Math NULL NULL NULL 1
0 1 10 CS NULL 10 10.0 1
0 1 10 CS NULL 10 10.0 1
0 1 10 CS NULL 10 10.0 1
0 1 12 CS NULL 12 12.0 2
0 1 12 CS NULL 12 12.0 2
0 1 12 CS NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 3
0 1 34 CS NULL 34 34.0 2
0 1 34 CS NULL 34 34.0 2
0 1 34 CS NULL 34 34.0 2
0 1 56 CS NULL 56 18.666666666666668 5
# GROUPING in ORDER BY clause
query IIIII
SELECT type, COUNT(*), avg(value), sum(distinct value), avg(distinct value), FROM students GROUP BY CUBE(value, type) ORDER BY GROUPING(value), GROUPING(type), 1, 2, 3, 4, 5;
----
NULL 1 NULL NULL NULL
NULL 1 10.0 10 10.0
NULL 2 12.0 12 12.0
Bachelor 2 34.0 34 34.0
Masters 1 12.0 12 12.0
PhD 1 12.0 12 12.0
NULL 1 NULL NULL NULL
NULL 1 10.0 10 10.0
NULL 2 34.0 34 34.0
NULL 4 12.0 12 12.0
NULL 4 11.333333333333334 22 11.0
Bachelor 2 34.0 34 34.0
Masters 1 12.0 12 12.0
PhD 1 12.0 12 12.0
NULL 8 18.0 56 18.666666666666668

View File

@@ -0,0 +1,122 @@
# name: test/sql/aggregate/distinct/grouped/coverage.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#Recursive CTE
query I
with recursive t as (select 1 as x union select sum(distinct x+1) from t where x < 3 group by x) select * from t order by x;
----
1
2
3
# Prepared statement
statement ok
CREATE TABLE tbl AS SELECT i, i%5 as j FROM range(1000000) tbl(i);
statement ok
PREPARE v1 AS SELECT SUM(DISTINCT i%5+?::INT) FROM tbl group by j order by all;
query I
EXECUTE v1(1);
----
1
2
3
4
5
query I
EXECUTE v1(2);
----
2
3
4
5
6
query I
EXECUTE v1(3);
----
3
4
5
6
7
# DISTINCT aggregate parameter as expression
query I
SELECT COUNT(distinct i % 5) from tbl group by j;
----
1
1
1
1
1
# Correlated subquery
query I
SELECT COUNT(distinct (SELECT i%5)) from tbl group by j;
----
1
1
1
1
1
## Aggregate with multiple parameters
query I
SELECT ARG_MIN(distinct i%5, i) from tbl group by j order by all;
----
0
1
2
3
4
# Distinct lists
statement ok
CREATE TABLE lists_tbl AS SELECT i%20 as groups, [x + i for x in range(280)] AS l FROM range(200000) tmp(i);
query IIII
SELECT COUNT(l), avg(groups), COUNT(DISTINCT l), groups FROM lists_tbl group by groups order by groups limit 10;
----
10000 0.0 10000 0
10000 1.0 10000 1
10000 2.0 10000 2
10000 3.0 10000 3
10000 4.0 10000 4
10000 5.0 10000 5
10000 6.0 10000 6
10000 7.0 10000 7
10000 8.0 10000 8
10000 9.0 10000 9
# Non-inlined (>12 length) strings
statement ok
create table strings_tbl as select gen_random_uuid() as strings, i as groups from range(200000) tbl(i);
query II
select count(strings), count(distinct strings) from strings_tbl group by groups order by groups limit 10;
----
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1

View File

@@ -0,0 +1,255 @@
# name: test/sql/aggregate/distinct/grouped/distinct_and_non_distinct_mixed.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#distinct aggregate = 'D'
#regular aggregate = '-'
statement ok
PRAGMA enable_verification
statement ok
create table tbl as
(select i%50 as i, i%100 as j, i%5 as groups from range(50000) tbl(i))
;
# D
query I
select
count(distinct i)
from tbl group by groups;;
----
10
10
10
10
10
# D--
query III rowsort
select
sum(distinct i),
sum(i),
sum(j)
from tbl group by groups;;
----
225 225000 475000
235 235000 485000
245 245000 495000
255 255000 505000
265 265000 515000
# --D
query III rowsort
select
sum(i),
sum(j),
sum(distinct i)
from tbl group by groups;;
----
225000 475000 225
235000 485000 235
245000 495000 245
255000 505000 255
265000 515000 265
# -D-
query III rowsort
select
sum(i),
sum(distinct i),
sum(j)
from tbl group by groups;;
----
225000 225 475000
235000 235 485000
245000 245 495000
255000 255 505000
265000 265 515000
# D-D
query III rowsort
select
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
225 10000 950
235 10000 970
245 10000 990
255 10000 1010
265 10000 1030
#-D-D
query IIII rowsort
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
475000 225 10000 950
485000 235 10000 970
495000 245 10000 990
505000 255 10000 1010
515000 265 10000 1030
#-D-D
query IIII rowsort
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
475000 225 10000 950
485000 235 10000 970
495000 245 10000 990
505000 255 10000 1010
515000 265 10000 1030
#D-D-
query IIII rowsort
select
sum(distinct i),
count(j),
sum(distinct j),
sum(j)
from tbl group by groups;;
----
225 10000 950 475000
235 10000 970 485000
245 10000 990 495000
255 10000 1010 505000
265 10000 1030 515000
# These next tests will repeat the previous test, with the addition of filters
# filtered = 'F'
# not filtered = '-'
# D
# F
query I
select
count(distinct i) FILTER (WHERE i >= 20)
from tbl group by groups;;
----
6
6
6
6
6
# D--
# -FF
query III rowsort
select
sum(distinct i),
sum(i) FILTER (WHERE j < 20),
sum(j) FILTER (WHERE i >= 20)
from tbl group by groups;;
----
225 15000 345000
235 17000 351000
245 19000 357000
255 21000 363000
265 23000 369000
# --D
# -FF
query III rowsort
select
sum(i),
sum(j) FILTER (WHERE j == 0),
sum(distinct i) FILTER (WHERE i == 0)
from tbl group by groups;;
----
225000 0 0
235000 NULL NULL
245000 NULL NULL
255000 NULL NULL
265000 NULL NULL
# -D-
# F-F
query III rowsort
select
sum(i) FILTER (WHERE j == 5),
sum(distinct i),
sum(j) FILTER (WHERE i == 5)
from tbl group by groups;;
----
2500 225 30000
NULL 235 NULL
NULL 245 NULL
NULL 255 NULL
NULL 265 NULL
# D-D
# F-F
query III rowsort
select
sum(distinct i) FILTER (WHERE i == 5),
count(j),
sum(distinct j) FILTER (WHERE i == 5)
from tbl group by groups;;
----
5 10000 60
NULL 10000 NULL
NULL 10000 NULL
NULL 10000 NULL
NULL 10000 NULL
#-D-D
#FF--
query IIII rowsort
select
sum(j) FILTER (WHERE j == 5),
sum(distinct i) FILTER (WHERE j == 5),
count(j),
sum(distinct j)
from tbl group by groups;;
----
2500 5 10000 950
NULL NULL 10000 1010
NULL NULL 10000 1030
NULL NULL 10000 970
NULL NULL 10000 990
#-D-D
#F--F
query IIII rowsort
select
sum(j) FILTER (WHERE i == 5),
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5)
from tbl group by groups;;
----
30000 225 10000 5
NULL 235 10000 NULL
NULL 245 10000 NULL
NULL 255 10000 NULL
NULL 265 10000 NULL
#D-D-
query IIII rowsort
select
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5),
sum(j) FILTER (WHERE j == 5)
from tbl group by groups;;
----
225 10000 5 2500
235 10000 NULL NULL
245 10000 NULL NULL
255 10000 NULL NULL
265 10000 NULL NULL

View File

@@ -0,0 +1,64 @@
# name: test/sql/aggregate/distinct/grouped/distinct_grouping_tpch.test_slow
# group: [grouped]
require tpch
require ram 8gb
require disk_space 40gb
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
statement ok
PRAGMA verify_external
statement ok
CALL dbgen(sf=1);
query IIIII
select
grouping(l_returnflag, l_linestatus),
l_returnflag,
l_linestatus,
count(distinct l_orderkey),
count(distinct l_comment)
from lineitem
group by cube(l_returnflag, l_linestatus)
order by all;
----
0 A F 644207 1181362
0 N F 30908 37987
0 N O 770587 2146525
0 R F 645527 1181807
1 A NULL 644207 1181362
1 N NULL 780997 2168690
1 R NULL 645527 1181807
2 NULL F 767956 2142221
2 NULL O 770587 2146525
3 NULL NULL 1500000 3610733
query IIIIIIIII
SELECT
COUNT(DISTINCT l_orderkey),
COUNT(DISTINCT l_partkey),
COUNT(*),
MIN(l_orderkey),
MAX(l_orderkey),
MIN(l_partkey),
MAX(l_partkey),
SUM(distinct_comment),
AVG(distinct_comment)
FROM (
select
l_orderkey,
l_partkey,
count(distinct l_comment) AS distinct_comment
from lineitem
group by cube(l_orderkey, l_partkey)
);
----
1500000 200000 7701170 1 6000000 1 200000 21614257 2.8066199032095125

View File

@@ -0,0 +1,15 @@
# name: test/sql/aggregate/distinct/grouped/identical_inputs.test
# description: DISTINCT aggregations
# group: [grouped]
statement ok
create table tbl as select i%50::BIGINT as i, i%5::BIGINT as j from range(1000000) tbl(i);
query IIIII
select count(distinct i), min(distinct i), max(distinct i), sum(distinct i), product(distinct i) from tbl group by j order by all;
----
10 0 45 225 0.0
10 1 46 235 1213563326976.0
10 2 47 245 3965002804224.0
10 3 48 255 9360955828224.0
10 4 49 265 19053977918976.0

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/distinct/grouped/issue_5070.test
# group: [grouped]
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
query II
WITH evs AS (
SELECT * FROM (VALUES
('1','123','7'),
('1','456','7')
) AS t("id", "type", "value" )
)
SELECT "id"
, COUNT(DISTINCT "value") FILTER (WHERE "type" = '456') AS type_456_count
FROM evs
GROUP BY "id"
----
1 1

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/distinct/grouped/long_input.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# This is string aggr, which goes through the HashAggregateOperator, so this is in fact 'grouped'
statement ok
create or replace table tbl as select * FROM ( VALUES
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
) tbl(i)
query I
select (min(distinct i)::TEXT)[2:2] from tbl group by i order by all;
----
a
b

View File

@@ -0,0 +1,56 @@
# name: test/sql/aggregate/distinct/grouped/many_distinct_groups.test_slow
# group: [grouped]
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
statement ok
PRAGMA verify_external
# 10000 is the radix_limit
# Many distinct grouping values + many distinct aggregate inputs
# Create the tables
statement ok
create table all_valid as select i%81982 as value, i%20000 as groups from range(200000) tbl(i);
statement ok
create table with_nulls as select CASE WHEN i%2343=0 THEN NULL ELSE i%63423 END as value, i%20000 as groups from range(200000) tbl(i);
statement ok
create table mostly_nulls as select CASE WHEN i%7=0 THEN i ELSE NULL END as value, i%20000 as groups from range(200000) tbl(i);
# Perform the checks
query IIII
select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from all_valid group by groups order by groups;
----
<FILE>:test/sql/aggregate/distinct/grouped/results/all_valid.csv
query IIII
select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from with_nulls group by groups order by groups;
----
<FILE>:test/sql/aggregate/distinct/grouped/results/with_nulls.csv
query IIII
select avg(distinct value), sum(distinct value), avg(value), count(distinct value) from mostly_nulls group by groups order by groups;
----
<FILE>:test/sql/aggregate/distinct/grouped/results/mostly_nulls.csv
# Only many distinct grouping values
statement ok
create table all_distinct as select 1 as value, i as groups from range(200000) tbl(i);
query I nosort q1
select sum(distinct value), count(distinct value), avg(distinct value), sum(distinct value) from all_distinct group by groups order by groups;
----
query I nosort q1
select sum(value), count(value), avg(value), sum(value) from all_distinct group by groups order by groups;
----

View File

@@ -0,0 +1,43 @@
# name: test/sql/aggregate/distinct/grouped/memory_consumption.test_slow
# group: [grouped]
require tpch
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
statement ok
pragma threads=4
# This same test will fail on current master (01c074b55e), as it requires 3.2gb minimum
# UPDATE: PR #5263 found issues with unpinned blocks - this now requires more memory
statement ok
pragma memory_limit='4gb'
statement ok
CALL dbgen(sf=1);
query II
select
l_comment,
count(distinct l_orderkey),
from
lineitem
group by 1
order by all desc
limit 10
;
----
zzle? furiously iro 1
zzle; furiously regular ac 2
zzle; fluffily special a 1
zzle: slyly even ideas wake fu 1
zzle. slyly final Tiresias nag a 1
zzle. slyly 2
zzle. silently ironic deposits mu 2
zzle. regular, regular foxes 1
zzle. regular, express theodol 1
zzle. regular, express packages hagg 1

View File

@@ -0,0 +1,283 @@
# name: test/sql/aggregate/distinct/grouped/multiple_grouping_sets.test
# group: [grouped]
statement ok
SET default_null_order='nulls_first';
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
# Distinct aggregates mixed with multiple grouping sets
statement ok
create table students (
course VARCHAR,
type VARCHAR,
value BIGINT
);
statement ok
insert into students
(course, type, value)
values
('CS', 'Bachelor', 20),
('CS', 'Bachelor', 10),
('CS', 'PhD', -20),
('Math', 'Masters', 10),
('CS', NULL, -15),
('CS', NULL, 10),
('Math', NULL, 15);
query IIII
select course, type, count(*), sum(distinct value) from students group by course, type order by all;
----
CS NULL 2 -5
CS Bachelor 2 30
CS PhD 1 -20
Math NULL 1 15
Math Masters 1 10
query IIII
select course, type, count(*), sum(distinct value) from students group by (course, type) order by all;
----
CS NULL 2 -5
CS Bachelor 2 30
CS PhD 1 -20
Math NULL 1 15
Math Masters 1 10
query III
select course, count(*), sum(distinct value) from students group by (), course, () order by all;
----
CS 5 -5
Math 2 25
# multiple grouping sets
query IIII
select count(*), course, type, sum(distinct value)
from students
group by grouping sets ((course), (type))
order by all;
----
1 NULL Masters 10
1 NULL PhD -20
2 NULL Bachelor 30
2 Math NULL 25
3 NULL NULL 10
5 CS NULL -5
# multiple grouping sets
# these are combined as if they are regular GROUP BY expressions
# i.e. the result is just GROUP BY course, type
# If multiple grouping items are specified in a single GROUP BY clause,
# then the final list of grouping sets is the cross product of the individual items.
query IIIII
select sum(distinct value), count(*), course, avg(distinct value), type
from students
group by grouping sets (course), grouping sets(type)
order by all;
----
-20 1 CS -20.0 PhD
-5 2 CS -2.5 NULL
10 1 Math 10.0 Masters
15 1 Math 15.0 NULL
30 2 CS 15.0 Bachelor
# combining grouping sets with non-grouping sets
query IIIII
select sum(distinct value), count(*), count(distinct value), course, type
from students
group by course, grouping sets(type)
order by all;
----
-20 1 1 CS PhD
-5 2 2 CS NULL
10 1 1 Math Masters
15 1 1 Math NULL
30 2 2 CS Bachelor
# with multiple grouping sets...
query IIIII
select count(*), ARG_MIN(distinct value%5, value), course, sum(distinct value), type
from students
group by course, grouping sets(type, ())
order by all;
----
1 0 CS -20 PhD
1 0 Math 10 Masters
1 0 Math 15 NULL
2 0 CS -5 NULL
2 0 CS 30 Bachelor
2 0 Math 25 NULL
5 0 CS -5 NULL
query IIII
select sum(distinct value), count(*), course, type
from students
group by grouping sets((course, type), (course))
order by all;
----
-20 1 CS PhD
-5 2 CS NULL
-5 5 CS NULL
10 1 Math Masters
15 1 Math NULL
25 2 Math NULL
30 2 CS Bachelor
# nested grouping sets
# If one GROUPING SETS clause is nested inside another,
# the effect is the same as if all the elements of the inner clause had been written directly in the outer clause.
query IIIIII
select count(*), count(distinct value), count(value), course, sum(distinct value), type
from students
group by grouping sets (grouping sets(course), grouping sets(type))
order by all;
----
1 1 1 NULL -20 PhD
1 1 1 NULL 10 Masters
2 2 2 NULL 30 Bachelor
2 2 2 Math 25 NULL
3 3 3 NULL 10 NULL
5 4 5 CS -5 NULL
query IIIIII
select count(*), avg(distinct value) FILTER (where value < 5), avg(distinct value), course, avg(value), type
from students
group by grouping sets (grouping sets(course, ()), grouping sets(type))
order by all;
----
1 NULL 10.0 NULL 10.0 Masters
1 -20.0 -20.0 NULL -20.0 PhD
2 NULL 12.5 Math 12.5 NULL
2 NULL 15.0 NULL 15.0 Bachelor
3 -15.0 3.3333333333333335 NULL 3.3333333333333335 NULL
5 -17.5 -1.25 CS 1.0 NULL
7 -17.5 2.0 NULL 4.285714285714286 NULL
query IIII
select count(*), sum(distinct value), course, type
from students
group by grouping sets ((course), (), (type))
order by all;
----
1 -20 NULL PhD
1 10 NULL Masters
2 25 Math NULL
2 30 NULL Bachelor
3 10 NULL NULL
5 -5 CS NULL
7 10 NULL NULL
query IIIII
select count(*), count(distinct value), sum(distinct value), course, type
from students
group by grouping sets(course, ()), grouping sets(type)
order by all;
----
1 1 -20 NULL PhD
1 1 -20 CS PhD
1 1 10 NULL Masters
1 1 10 Math Masters
1 1 15 Math NULL
2 2 -5 CS NULL
2 2 30 NULL Bachelor
2 2 30 CS Bachelor
3 3 10 NULL NULL
query IIIII
select sum(distinct value), count(*), course, type, sum(distinct value)
from students
group by grouping sets(course, ()), type
order by all;
----
-20 1 NULL PhD -20
-20 1 CS PhD -20
-5 2 CS NULL -5
10 1 NULL Masters 10
10 1 Math Masters 10
10 3 NULL NULL 10
15 1 Math NULL 15
30 2 NULL Bachelor 30
30 2 CS Bachelor 30
query IIIII
select sum(distinct value) FILTER (where value % 10 != 0), count(*), course, type, sum(distinct value)
from students
group by grouping sets((course, type), (type))
order by all;
----
NULL 1 NULL Masters 10
NULL 1 NULL PhD -20
NULL 1 CS PhD -20
NULL 1 Math Masters 10
NULL 2 NULL Bachelor 30
NULL 2 CS Bachelor 30
-15 2 CS NULL -5
0 3 NULL NULL 10
15 1 Math NULL 15
# references to group ids by index
query IIII
select count(*), sum(distinct value), course, type
from students
group by grouping sets((3, 4), (4))
order by all;
----
1 -20 NULL PhD
1 -20 CS PhD
1 10 NULL Masters
1 10 Math Masters
1 15 Math NULL
2 -5 CS NULL
2 30 NULL Bachelor
2 30 CS Bachelor
3 10 NULL NULL
query IIII
select count(*), course AS crs, sum(distinct value), type AS tp
from students
group by grouping sets((crs, tp), (tp))
order by all;
----
1 NULL -20 PhD
1 NULL 10 Masters
1 CS -20 PhD
1 Math 10 Masters
1 Math 15 NULL
2 NULL 30 Bachelor
2 CS -5 NULL
2 CS 30 Bachelor
3 NULL 10 NULL
query IIII
select sum(distinct value), count(*), course, type
from students
group by grouping sets (grouping sets(course, ()), grouping sets(type, ()))
order by all;
----
-20 1 NULL PhD
-5 5 CS NULL
10 1 NULL Masters
10 3 NULL NULL
10 7 NULL NULL
10 7 NULL NULL
25 2 Math NULL
30 2 NULL Bachelor
# re-do the first query with one thread (internal issue 2046)
statement ok
set threads=1
query IIII
select course, type, count(*), sum(distinct value) from students group by course, type order by all;
----
CS NULL 2 -5
CS Bachelor 2 30
CS PhD 1 -20
Math NULL 1 15
Math Masters 1 10

View File

@@ -0,0 +1,64 @@
# name: test/sql/aggregate/distinct/grouped/partitioned_case.test_slow
# group: [grouped]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
statement ok
create table tbl as select i%100 as few, i%100000 as many, i%5 as groups from range(1000000) tbl(i);
# Few amount of rows, not partitioned
query I
select count(distinct few) from tbl group by groups;
----
20
20
20
20
20
# Large amount of rows, partitioned
query I
select count(distinct many) from tbl group by groups;
----
20000
20000
20000
20000
20000
# Mixed few and large amount of rows
query II
select count(distinct few), count(distinct many) from tbl group by groups;
----
20 20000
20 20000
20 20000
20 20000
20 20000
# Mixed, different order
query II
select count(distinct many), count(distinct few) from tbl group by groups;
----
20000 20
20000 20
20000 20
20000 20
20000 20
# Mixed, with non-distinct inbetween
query III
select count(distinct many), count(few), count(distinct few) from tbl group by groups;
----
20000 200000 20
20000 200000 20
20000 200000 20
20000 200000 20
20000 200000 20

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,52 @@
# name: test/sql/aggregate/distinct/grouped/simple.test_slow
# description: Test aggregation/group by statements
# group: [grouped]
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES
(11, 22),
(12, 19),
(13, 23),
(13, 22),
(12, 21),
(12, 19)
# aggregations with group by
query IIII
SELECT b, SUM(distinct a), SUM(distinct a+2), AVG(a) FROM test GROUP BY b ORDER BY b;
----
19 12 14 12.0
21 12 14 12.0
22 24 28 12.0
23 13 15 13.0
# TEST THAT FAILS ON MASTER IF WE PARALLELIZE DISTINCT AGGREGATES WITH NO EXTRA WORK
statement ok
create table distinct_sum as select ((random() * 100) % 2)::BIGINT as i, (i % 10) as j from range(1000000) tbl(i);
statement ok
pragma threads=8;
query I nosort test1
SELECT sum(distinct i) from distinct_sum group by j order by j;
----
statement ok
pragma threads=1;
query I nosort test1
SELECT sum(distinct i) from distinct_sum group by j order by j;
----

View File

@@ -0,0 +1,77 @@
# name: test/sql/aggregate/distinct/grouped/string_agg.test
# group: [grouped]
statement ok
SET default_null_order='nulls_first';
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
# test string aggregation on a set of values
statement ok
CREATE TABLE strings(
g INTEGER,
x VARCHAR,
y VARCHAR
);
statement ok
INSERT INTO strings VALUES
(1,'a','/'),
(1,'b','-'),
(2,'i','/'),
(2,NULL,'-'),
(2,'j','+'),
(3,'p','/'),
(4,'x','/'),
(4,'y','-'),
(4,'z','+');
# ORDER + FILTER + DISTINCT
query II
SELECT g, STRING_AGG(DISTINCT y, ',' ORDER BY y DESC) FILTER (WHERE g < 4)
FROM strings
GROUP BY g
ORDER BY 1
----
1 /,-
2 /,-,+
3 /
4 NULL
# ORDER + FILTER + DISTINCT
query IIII
SELECT g, count(y), STRING_AGG(DISTINCT y, ',' ORDER BY y DESC) FILTER (WHERE g < 4), sum(1)
FROM strings
GROUP BY g
ORDER BY 1
----
1 2 /,- 2
2 3 /,-,+ 3
3 1 / 1
4 3 NULL 3
statement error
SELECT g, STRING_AGG(DISTINCT y ORDER BY y, '_' ) FILTER (WHERE g < 4)
FROM strings
GROUP BY g
ORDER BY 1
----
ORDER BY non-integer literal has no effect
statement ok
SET order_by_non_integer_literal=true
query II
SELECT g, STRING_AGG(DISTINCT y ORDER BY y, '_' ) FILTER (WHERE g < 4)
FROM strings
GROUP BY g
ORDER BY 1
----
1 -,/
2 +,-,/
3 /
4 NULL

View File

@@ -0,0 +1,43 @@
# name: test/sql/aggregate/distinct/issue2656.test
# description: Issue #2656: DISTINCT + ORDER produces incorrect result
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE T (t1 int, t2 int);
statement ok
INSERT INTO t VALUES (1, 1), (1, 2);
query I
SELECT DISTINCT t1
FROM T
ORDER BY t1, t2;
----
1
query II
SELECT DISTINCT ON (1) t1, t2
FROM T
ORDER BY t1, t2;
----
1 1
query I
SELECT DISTINCT t1 FROM T
UNION
SELECT DISTINCT t1 FROM T
ORDER BY t1;
----
1
query I
SELECT DISTINCT t1 FROM T
UNION ALL
SELECT DISTINCT t1 FROM T
ORDER BY t1;
----
1
1

View File

@@ -0,0 +1,24 @@
# name: test/sql/aggregate/distinct/issue8505.test
# description: Issue #8505: Distinct On Memory Issues
# group: [distinct]
statement ok
create table test (id int, provider int, record_key int, record_rank int, record_date int)
# this subquery was fast, we select only record_key from it so it's equivalent to the next query
query II
explain select record_key from (
select distinct on (id, provider) id, provider, record_key from test order by id, provider, record_rank desc, record_date
)
----
physical_plan <REGEX>:.*HASH_GROUP_BY.*#0.*#1.*arg_min_null.*
# this query was slow, but should be equivalent to the first query
query II
explain select distinct on (id, provider) record_key from test order by id, provider, record_rank desc, record_date
----
physical_plan <REGEX>:.*HASH_GROUP_BY.*#0.*#1.*arg_min_null.*
# the problem was that our aggregate became way too big in the second case because we didn't de-duplicate columns
# this regex checks that they both have the same 'minimal' aggregate (this test is a bit fragile to binder changes tho)

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/distinct/issue9241.test
# description: Test DISTINCT ON
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
create table foo (a int, b int);
statement ok
insert into foo values (1, 1), (2, 1), (2, 2);
query II
select * from (select distinct on (a) a, b from foo order by a, b desc) sub;
----
1 1
2 2
query II
select * from (select distinct on (a) a, b from foo order by a, b desc) sub where b <> 2;
----
1 1

View File

@@ -0,0 +1,68 @@
# name: test/sql/aggregate/distinct/test_distinct.test
# description: Test DISTINCT keyword
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE test (a INTEGER, b INTEGER);
statement ok
INSERT INTO test VALUES (11, 22), (13, 22), (11, 21), (11, 22)
query II
SELECT DISTINCT a, b FROM test ORDER BY a, b
----
11 21
11 22
13 22
query II
SELECT DISTINCT test.a, b FROM test ORDER BY a, b
----
11 21
11 22
13 22
query I
SELECT DISTINCT a FROM test ORDER BY a
----
11
13
query I
SELECT DISTINCT b FROM test ORDER BY b
----
21
22
query IR
SELECT DISTINCT a, SUM(B) FROM test GROUP BY a ORDER BY a
----
11 65.000000
13 22.000000
query I
SELECT DISTINCT MAX(b) FROM test GROUP BY a
----
22
query I
SELECT DISTINCT CASE WHEN a > 11 THEN 11 ELSE a END FROM test
----
11
# Distinct LIST<VARCHAR> with NULL in a subsequent position (Issue #3056)
statement ok
CREATE TABLE issue3056 AS (SELECT * FROM (VALUES
(['TGTA']),
(['CGGT']),
(['CCTC']),
(['TCTA']),
(['AGGG']),
(NULL))
tbl(genes));
statement ok
SELECT DISTINCT genes FROM issue3056;

View File

@@ -0,0 +1,153 @@
# name: test/sql/aggregate/distinct/test_distinct_on.test
# description: Test DISTINCT ON
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);
statement ok
INSERT INTO integers VALUES (2, 3, 5), (4, 5, 6), (2, 7, 6)
query II
SELECT DISTINCT ON (i) i, j FROM integers WHERE i <> 2
----
4 5
query II
SELECT DISTINCT ON (j) i, j FROM integers WHERE i <> 2
----
4 5
query II
SELECT DISTINCT ON (j, i) i, j FROM integers WHERE i <> 2
----
4 5
query II
SELECT DISTINCT ON (j + 1, i * 3) i, j FROM integers WHERE i <> 2
----
4 5
query II
SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i
----
2 3
4 5
query II
SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i LIMIT 1
----
2 3
query II
SELECT DISTINCT ON (1) i, j FROM integers ORDER BY i LIMIT 1 OFFSET 1
----
4 5
query II
SELECT DISTINCT ON (2) i, j FROM integers ORDER BY 2
----
2 3
4 5
2 7
query II
SELECT DISTINCT ON (2) j, k FROM integers ORDER BY 2
----
3 5
5 6
query III
SELECT DISTINCT ON (3) i, j, k FROM integers ORDER BY 2
----
2 3 5
4 5 6
query III
SELECT DISTINCT ON (3) i, j, k FROM integers ORDER BY 3
----
2 3 5
4 5 6
query II
SELECT DISTINCT ON (2) j, (SELECT i FROM integers WHERE i=2 LIMIT 1) FROM integers ORDER BY 2
----
3 2
query II
SELECT DISTINCT ON (2) j, (SELECT DISTINCT ON (i) i FROM integers ORDER BY 1 LIMIT 1) FROM integers ORDER BY 2
----
3 2
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j
----
2 3
4 5
query II
SELECT * FROM (SELECT DISTINCT ON (i) i, j FROM integers) tbl1 WHERE i <> 2
----
4 5
# order by a column that does not exist in the SELECT clause
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY k
----
2 3
4 5
# equivalent to this, but without projecting the k
query III
SELECT DISTINCT ON (i) i, j, k FROM integers ORDER BY k
----
2 3 5
4 5 6
# binding of DISTINCT ON with different column names
query II
SELECT DISTINCT ON (integers.i) i, j FROM integers ORDER BY 1, 2
----
2 3
4 5
query II
SELECT DISTINCT ON (i) integers.i, integers.j FROM integers ORDER BY 1, 2
----
2 3
4 5
query II
SELECT DISTINCT ON (integers.i) integers.i, integers.j FROM integers ORDER BY i, j
----
2 3
4 5
# out of bounds
statement error
SELECT DISTINCT ON (2) i FROM integers
----
Binder Error: ORDER term out of range - should be between 1 and 1
# DISTINCT ON constant returns an error
statement error
SELECT DISTINCT ON(i, 'literal') i FROM integers
----
DISTINCT ON non-integer literal has no effect
statement ok
SET order_by_non_integer_literal=true
query I
SELECT DISTINCT ON(i, 'literal') i FROM integers ORDER BY ALL
----
2
4
statement error
PREPARE v1 AS select distinct on (?) 42;
----
Parameter not supported in DISTINCT ON clause

View File

@@ -0,0 +1,57 @@
# name: test/sql/aggregate/distinct/test_distinct_order_by.test
# description: Test DISTINCT and ORDER BY
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3)
query I
SELECT DISTINCT i%2 FROM integers ORDER BY 1
----
0
1
# controversial: Postgres fails here with the error "with SELECT DISTINCT columns from ORDER BY must appear in the
# SELECT clause" but SQLite succeeds
# we also succeed here, even though it can give unintuitive results
# this is transformed into SELECT DISTINCT(1) i % 2, i
query I
SELECT DISTINCT i % 2 FROM integers WHERE i<3 ORDER BY i
----
1
0
query II
SELECT DISTINCT ON (1) i % 2, i FROM integers WHERE i<3 ORDER BY i
----
1 1
0 2
# binding of DISTINCT with column names
query I
SELECT DISTINCT integers.i FROM integers ORDER BY i DESC
----
3
2
1
query I
SELECT DISTINCT i FROM integers ORDER BY integers.i DESC
----
3
2
1
query I
SELECT DISTINCT integers.i FROM integers ORDER BY integers.i DESC
----
3
2
1

View File

@@ -0,0 +1,194 @@
# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test
# description: DISTINCT aggregations, without GROUP BY
# group: [ungrouped]
# Since these tests are made to test the ungrouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#distinct aggregate = 'D'
#regular aggregate = '-'
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
create table tbl as
(select i%50 as i, i%100 as j from range(50000) tbl(i))
;
# D
query I
select
count(distinct i)
from tbl;
----
50
# D--
query III
select
sum(distinct i),
sum(i),
sum(j)
from tbl;
----
1225 1225000 2475000
# --D
query III
select
sum(i),
sum(j),
sum(distinct i)
from tbl;
----
1225000 2475000 1225
# -D-
query III
select
sum(i),
sum(distinct i),
sum(j)
from tbl;
----
1225000 1225 2475000
# D-D
query III
select
sum(distinct i),
count(j),
sum(distinct j)
from tbl;
----
1225 50000 4950
#-D-D
query IIII
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl;
----
2475000 1225 50000 4950
#-D-D
query IIII
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl;
----
2475000 1225 50000 4950
#D-D-
query IIII
select
sum(distinct i),
count(j),
sum(distinct j),
sum(j)
from tbl;
----
1225 50000 4950 2475000
# These next tests will repeat the previous test, with the addition of filters
# filtered = 'F'
# not filtered = '-'
# D
# F
query I
select
count(distinct i) FILTER (WHERE i >= 20)
from tbl;
----
30
# D--
# -FF
query III
select
sum(distinct i),
sum(i) FILTER (WHERE j < 20),
sum(j) FILTER (WHERE i >= 20)
from tbl;
----
1225 95000 1785000
# --D
# -FF
query III
select
sum(i),
sum(j) FILTER (WHERE j == 0),
sum(distinct i) FILTER (WHERE i == 0)
from tbl;
----
1225000 0 0
# -D-
# F-F
query III
select
sum(i) FILTER (WHERE j == 5),
sum(distinct i),
sum(j) FILTER (WHERE i == 5)
from tbl;
----
2500 1225 30000
# D-D
# F-F
query III
select
sum(distinct i) FILTER (WHERE i == 5),
count(j),
sum(distinct j) FILTER (WHERE i == 5)
from tbl;
----
5 50000 60
#-D-D
#FF--
query IIII
select
sum(j) FILTER (WHERE j == 5),
sum(distinct i) FILTER (WHERE j == 5),
count(j),
sum(distinct j)
from tbl;
----
2500 5 50000 4950
#-D-D
#F--F
query IIII
select
sum(j) FILTER (WHERE i == 5),
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5)
from tbl;
----
30000 1225 50000 5
#D-D-
query IIII
select
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5),
sum(j) FILTER (WHERE j == 5)
from tbl;
----
1225 50000 5 2500

View File

@@ -0,0 +1,64 @@
# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped.test_slow
# description: DISTINCT aggregations, without GROUP BY
# group: [ungrouped]
statement ok
SET default_null_order='nulls_first';
# Since these tests are made to test the ungrouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#Recursive CTE
query I
with recursive t as (select 1 as x union select sum(distinct x+1) from t where x < 3) select * from t order by x;
----
NULL
1
2
3
# Prepared statement
statement ok
CREATE TABLE tbl AS SELECT * FROM range(1000000) tbl(i);
statement ok
PREPARE v1 AS SELECT SUM(DISTINCT i%5+?::INT) FROM tbl;
query I
EXECUTE v1(1);
----
15
query I
EXECUTE v1(2);
----
20
query I
EXECUTE v1(3);
----
25
# DISTINCT aggregate parameter as expression
query I
SELECT COUNT(distinct i % 5) from tbl;
----
5
# Correlated subquery
query I
SELECT COUNT(distinct (SELECT i%5)) from tbl;
----
5
## Aggregate with multiple parameters
query I
SELECT ARG_MIN(distinct i%5, i) from tbl;
----
0

View File

@@ -0,0 +1,45 @@
# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_parallel.test_slow
# description: DISTINCT aggregations, without GROUP BY
# group: [ungrouped]
# force parallelism of the queries
statement ok
PRAGMA verify_parallelism
# Few amount of rows, not partitioned
statement ok
create table tbl as select i%100 as few, i%100000 as many from range(1000000) tbl(i);
query I
select count(distinct few) from tbl;
----
100
# Large amount of rows, partitioned
query I
select count(distinct many) from tbl;
----
100000
# Mixed few and large amount of rows
query II
select count(distinct few), count(distinct many) from tbl;
----
100 100000
# Mixed, different order
query II
select count(distinct many), count(distinct few) from tbl;
----
100000 100
# Mixed, with non-distinct inbetween
query III
select count(distinct many), count(few), count(distinct few) from tbl;
----
100000 1000000 100

View File

@@ -0,0 +1,11 @@
# name: test/sql/aggregate/distinct/ungrouped/test_distinct_ungrouped_shared_input.test
# description: DISTINCT aggregations, without GROUP BY
# group: [ungrouped]
statement ok
create table tbl as select i%50 as i from range(1000000) tbl(i);
query IIIII
select count(distinct i), min(distinct i), max(distinct i), sum(distinct i), product(distinct i) from tbl;
----
50 0 49 1225 0.0