should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

10
external/duckdb/test/sql/CMakeLists.txt vendored Normal file
View File

@@ -0,0 +1,10 @@
add_subdirectory(filter)
add_subdirectory(function)
add_subdirectory(index)
add_subdirectory(parallelism)
add_subdirectory(pivot)
add_subdirectory(storage)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES}
PARENT_SCOPE)

View File

@@ -0,0 +1,16 @@
# name: test/sql/aggregate/aggregates/aggregate_limit.test_slow
# description: Test running a limit over a big aggregate
# group: [aggregates]
statement ok
SET threads=8
statement ok
CREATE TABLE big_tbl AS SELECT i, concat('thisisalongstring', i) as str FROM range(10000000) t(i);
loop i 0 10
statement ok
SELECT i, str, COUNT(*) FROM big_tbl GROUP BY ALL LIMIT 10
endloop

View File

@@ -0,0 +1,60 @@
# name: test/sql/aggregate/aggregates/approx_top_k.test
# description: Test approx top K operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers AS SELECT i%5 as even_groups, log(1 + i*i)::int as skewed_groups FROM range(10000) t(i);
# select all groups
query I
SELECT list_sort(approx_top_k(even_groups, 10)) FROM integers
----
[0, 1, 2, 3, 4]
# select a subset of the groups
# they should be ordered from most frequent to least frequent
query I
SELECT approx_top_k(skewed_groups, 5) FROM integers
----
[8, 7, 6, 5, 4]
# varchars
query I
SELECT approx_top_k(concat('this is a long prefix', skewed_groups::VARCHAR), 5) FROM integers
----
[this is a long prefix8, this is a long prefix7, this is a long prefix6, this is a long prefix5, this is a long prefix4]
# lists
query I
SELECT approx_top_k([skewed_groups], 5) FROM integers
----
[[8], [7], [6], [5], [4]]
# structs
query I
SELECT approx_top_k({'i': skewed_groups}, 5) FROM integers
----
[{'i': 8}, {'i': 7}, {'i': 6}, {'i': 5}, {'i': 4}]
statement error
select approx_top_k(i, 0) from range(5) t(i)
----
k value must be > 0
statement error
select approx_top_k(i, -1) from range(5) t(i)
----
k value must be > 0
statement error
select approx_top_k(i, 999999999999999) from range(5) t(i)
----
k value must be < 1000000
statement error
select approx_top_k(i, NULL) from range(5) t(i)
----
NULL

View File

@@ -0,0 +1,32 @@
# name: test/sql/aggregate/aggregates/approx_top_k_big.test_slow
# description: Test approx top K operator
# group: [aggregates]
# approx top k where we first have many of one element, then the most frequent element, then a non frequent element
statement ok
CREATE TABLE alternating_sequences AS SELECT 0 i FROM range(10000) UNION ALL SELECT 1 FROM range(100000) UNION ALL SELECT 2 FROM range(10);
query I
SELECT approx_top_k(i, 3) FROM alternating_sequences
----
[1, 0, 2]
# alternating pattern (1, 0, 0, 1, 0, 0, ...)
statement ok
CREATE TABLE alternating AS SELECT case when i%3=0 then 1 else 0 end i FROM range(100000) t(i)
query I
SELECT approx_top_k(i, 2) FROM alternating
----
[0, 1]
# large top-k value
statement ok
CREATE TABLE topk_approx AS SELECT unnest(approx_top_k(i, 100000)) from range(50000) t(i) order by 1
statement ok
CREATE TABLE topk_exact AS SELECT * from range(50000) t(i) order by 1
query I
FROM topk_approx EXCEPT FROM topk_exact
----

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/aggregates/arg_min_max_all_types.test_slow
# description: Test argmin and argmax operator for all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
foreach col <all_types_columns>
query I
SELECT MIN("${col}") IS NOT DISTINCT FROM ARG_MIN("${col}", "${col}") FROM all_types
----
true
query I
SELECT MAX("${col}") IS NOT DISTINCT FROM ARG_MAX("${col}", "${col}") FROM all_types
----
true
endloop

View File

@@ -0,0 +1,77 @@
# name: test/sql/aggregate/aggregates/arg_min_max_n.test
# description: Test for arg_max with N
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1 (val VARCHAR, arg INT);
statement ok
INSERT INTO t1 VALUES ('a', 2), ('a', 1), ('b', 5), ('b', 4), ('a', 3), ('b', 6);
query I
SELECT arg_max(val, arg, 3 ORDER BY val DESC) FROM t1;
----
[b, b, b]
query I
SELECT list(rs.val) FROM (SELECT val, arg, row_number() OVER (ORDER BY arg DESC) as rid FROM t1 ORDER BY val) as rs WHERE rid < 4;
----
[b, b, b]
query I rowsort
SELECT arg_max(arg, val, 2 ORDER BY arg) FROM t1 GROUP BY val;
----
[2, 1]
[5, 4]
statement ok
CREATE TABLE t2 AS SELECT i%5 as even_groups, i FROM range(10000) t(i);
query I
SELECT arg_max(even_groups, i, 3) FROM t2;
----
[4, 3, 2]
# Test limits
foreach FUNC min max
statement error
select ${FUNC}(arg, NULL) from t1;
----
Invalid Input Error: Invalid input for MIN/MAX: n value cannot be NULL
statement error
SELECT ${FUNC}(arg, -1) FROM t1;
----
Invalid Input Error: Invalid input for MIN/MAX: n value must be > 0
statement error
select ${FUNC}(arg, 1000000) from t1;
----
Invalid Input Error: Invalid input for MIN/MAX: n value must be < 1000000
endloop
foreach FUNC arg_min arg_max min_by max_by
statement error
select ${FUNC}(arg, val, NULL) from t1;
----
Invalid Input Error: Invalid input for arg_min/arg_max: n value cannot be NULL
statement error
SELECT ${FUNC}(arg, val, -1) FROM t1;
----
Invalid Input Error: Invalid input for arg_min/arg_max: n value must be > 0
statement error
select ${FUNC}(arg, val, 1000000) from t1;
----
Invalid Input Error: Invalid input for arg_min/arg_max: n value must be < 1000000
endloop

View File

@@ -0,0 +1,57 @@
# name: test/sql/aggregate/aggregates/arg_min_max_n_all_types.test_slow
# description: Test the ARG_MIN and ARG_MAX "N" overloads with all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
foreach col bool tinyint smallint int bigint hugeint uhugeint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob bit small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map union fixed_int_array fixed_varchar_array fixed_nested_int_array fixed_nested_varchar_array fixed_struct_array struct_of_fixed_array fixed_array_of_int_list list_of_fixed_int_array
query I
SELECT MIN("${col}", 1) IS NOT DISTINCT FROM ARG_MIN("${col}", "${col}", 1) FROM all_types
----
true
query I
SELECT MAX("${col}", 1) IS NOT DISTINCT FROM ARG_MAX("${col}", "${col}", 1) FROM all_types
----
true
# Ensure the MIN(X,N) and MAX(X,N) produce the same result as the window function equivalent
query I
SELECT MAX("${col}", 2)
IS NOT DISTINCT FROM
(SELECT list(rs."${col}") FROM (SELECT "${col}", row_number() OVER (ORDER BY "${col}" DESC) as rid FROM all_types ORDER BY "${col}" DESC) as rs WHERE rid < 3)
FROM all_types;
----
true
query I
SELECT MIN("${col}", 2)
IS NOT DISTINCT FROM
(SELECT list(rs."${col}") FROM (SELECT "${col}", row_number() OVER (ORDER BY "${col}" ASC) as rid FROM all_types ORDER BY "${col}" ASC) as rs WHERE rid < 3)
FROM all_types;
----
true
# Ensure the MIN_BY(X,N) and MAX_BY(X,N) produce the same result as the window function equivalent
query I
SELECT ARG_MAX("${col}", "${col}", 2)
IS NOT DISTINCT FROM
(SELECT list(rs."${col}") FROM (SELECT "${col}", row_number() OVER (ORDER BY "${col}" DESC) as rid FROM all_types ORDER BY "${col}" DESC) as rs WHERE rid < 3)
FROM all_types;
----
true
query I
SELECT ARG_MIN("${col}", "${col}", 2)
IS NOT DISTINCT FROM
(SELECT list(rs."${col}") FROM (SELECT "${col}", row_number() OVER (ORDER BY "${col}" ASC) as rid FROM all_types ORDER BY "${col}" ASC) as rs WHERE rid < 3)
FROM all_types;
----
true
endloop

View File

@@ -0,0 +1,72 @@
# name: test/sql/aggregate/aggregates/arg_min_max_n_tpch.test
# description: Test max/min N overloads with TPCH data
# group: [aggregates]
require tpch
statement ok
PRAGMA enable_verification
statement ok
CALL dbgen(sf=0.001);
query I
select min(l_orderkey, 3) from lineitem;
----
[1, 1, 1]
query I
select max(l_orderkey, 3) from lineitem;
----
[5988, 5987, 5987]
# Test with different N values
query II
SELECT l_returnflag, max(
CASE WHEN l_returnflag='R' THEN null ELSE l_orderkey END,
CASE WHEN l_returnflag='N' THEN 5 ELSE 3 END)
FROM lineitem GROUP BY ALL ORDER BY ALL;
----
A [5986, 5986, 5986]
N [5987, 5987, 5987, 5987, 5958]
R NULL
# This should be equivalent to "SELECT max(val_col, k) FROM table_name GROUP BY group_col"
statement ok
CREATE MACRO compute_top_k(table_name, group_col, val_col, k) AS TABLE
SELECT rs.grp, array_agg(rs.val ORDER BY rid)
FROM (
SELECT group_col AS grp, val_col AS val, row_number() OVER (PARTITION BY group_col ORDER BY val_col DESC) as rid
FROM query_table(table_name::VARCHAR) ORDER BY group_col DESC
) as rs
WHERE rid <= k
GROUP BY ALL
ORDER BY ALL;
# Disable top_n_window_elimination to prevent comparing max and max_by
statement ok
SET disabled_optimizers = 'top_n_window_elimination'
query II nosort top_resultset
SELECT * FROM compute_top_k(lineitem, l_returnflag, l_orderkey, 3);
query II nosort top_resultset
SELECT l_returnflag, max(l_orderkey, 3) FROM lineitem GROUP BY ALL ORDER BY ALL;
# This should be equivalent to "SELECT min(val_col, k) FROM table_name GROUP BY group_col"
statement ok
CREATE MACRO compute_bottom_k(table_name, group_col, val_col, k) AS TABLE
SELECT rs.grp, array_agg(rs.val ORDER BY rid)
FROM (
SELECT group_col AS grp, val_col AS val, row_number() OVER (PARTITION BY group_col ORDER BY val_col ASC) as rid
FROM query_table(table_name::VARCHAR) ORDER BY group_col ASC
) as rs
WHERE rid <= k
GROUP BY ALL
ORDER BY ALL;
query II nosort bottom_resultset
SELECT * FROM compute_bottom_k(lineitem, l_returnflag, l_orderkey, 3);
query II nosort bottom_resultset
SELECT l_returnflag, min(l_orderkey, 3) FROM lineitem GROUP BY ALL ORDER BY ALL;

View File

@@ -0,0 +1,64 @@
# name: test/sql/aggregate/aggregates/arg_min_max_nulls_last.test
# description: Test arg_min_nulls_last and arg_max_nulls_last
# group: [aggregates]
statement ok
CREATE TABLE tbl AS SELECT * FROM VALUES (1, 5, 1), (1, NULL, 2), (1, 3, NULL), (2, NULL, NULL), (3, 1, NULL) t(grp, arg, val)
query I
SELECT arg_max_nulls_last(arg, val) FROM tbl
----
NULL
query I
SELECT arg_max_nulls_last(arg, val, 1) FROM tbl
----
[NULL]
query I
SELECT arg_max_nulls_last(val, val, 4) FROM tbl
----
[2, 1, NULL, NULL]
query II
SELECT grp, arg_max_nulls_last(arg, val) FROM tbl GROUP BY grp ORDER BY grp
----
1 NULL
2 NULL
3 1
query II
SELECT grp, arg_max_nulls_last(arg, val, 2) FROM tbl GROUP BY grp ORDER BY grp
----
1 [NULL, 5]
2 [NULL]
3 [1]
query I
SELECT arg_min_nulls_last(arg, val) FROM tbl
----
5
query I
SELECT arg_min_nulls_last(arg, val, 1) FROM tbl
----
[5]
query I
SELECT arg_min_nulls_last(val, val, 4) FROM tbl
----
[1, 2, NULL, NULL]
query II
SELECT grp, arg_min_nulls_last(arg, val) FROM tbl GROUP BY grp ORDER BY grp
----
1 5
2 NULL
3 1
query II
SELECT grp, arg_min_nulls_last(arg, val, 2) FROM tbl GROUP BY grp ORDER BY grp
----
1 [5, NULL]
2 [NULL]
3 [1]

View File

@@ -0,0 +1,43 @@
# name: test/sql/aggregate/aggregates/arg_min_max_nulls_last_all_types.test_slow
# description: Test the ARG_MIN_NULLS_LAST and ARG_MAX_NULLS_LAST overloads with all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
foreach col bool tinyint smallint int bigint hugeint uhugeint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob bit small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map union fixed_int_array fixed_varchar_array fixed_nested_int_array fixed_nested_varchar_array fixed_struct_array struct_of_fixed_array fixed_array_of_int_list list_of_fixed_int_array
statement ok
CREATE OR REPLACE TABLE asc_ordered AS SELECT "${col}" FROM all_types ORDER BY "${col}" ASC NULLS LAST
statement ok
CREATE OR REPLACE TABLE desc_ordered AS SELECT "${col}" FROM all_types ORDER BY "${col}" DESC NULLS LAST
statement ok
CREATE OR REPLACE TABLE arg_min_result AS SELECT unnest(arg_min_nulls_last("${col}", "${col}", 3)) FROM all_types
statement ok
CREATE OR REPLACE TABLE arg_max_result AS SELECT unnest(arg_max_nulls_last("${col}", "${col}", 3)) FROM all_types
query II
SELECT * FROM (SELECT rowid, * FROM asc_ordered ORDER BY rowid) EXCEPT SELECT * FROM (SELECT rowid, * FROM arg_min_result ORDER BY rowid);
query II
SELECT * FROM (SELECT rowid, * FROM desc_ordered ORDER BY rowid) EXCEPT SELECT * FROM (SELECT rowid, * FROM arg_max_result ORDER BY rowid);
statement ok
CREATE OR REPLACE TABLE arg_min_result AS SELECT arg_min_nulls_last("${col}", "${col}") FROM all_types
statement ok
CREATE OR REPLACE TABLE arg_max_result AS SELECT arg_max_nulls_last("${col}", "${col}") FROM all_types
query II
SELECT * FROM (SELECT rowid, * FROM asc_ordered ORDER BY rowid LIMIT 1) EXCEPT SELECT * FROM (SELECT rowid, * FROM arg_min_result ORDER BY rowid);
query II
SELECT * FROM (SELECT rowid, * FROM desc_ordered ORDER BY rowid LIMIT 1) EXCEPT SELECT * FROM (SELECT rowid, * FROM arg_max_result ORDER BY rowid);
endloop

View File

@@ -0,0 +1,349 @@
# name: test/sql/aggregate/aggregates/binning.test
# description: Test binning functions
# group: [aggregates]
require 64bit
statement ok
PRAGMA enable_verification
query I
SELECT equi_width_bins(0, 10, 2, true)
----
[5, 10]
# small bins with big base
query I
SELECT equi_width_bins(1000000, 1000010, 2, true)
----
[1000005, 1000010]
# bounds cannot be nice because of step size
query I
SELECT equi_width_bins(99, 101, 2, true)
----
[100, 101]
query I
SELECT equi_width_bins(9, 11, 2, true)
----
[10, 11]
query I
SELECT equi_width_bins(10, 11, 2, true)
----
[10, 11]
# we cannot have duplicate bin boundaries
query I
SELECT equi_width_bins(0, 5, 10, true)
----
[0, 1, 2, 3, 4, 5]
query I
SELECT equi_width_bins(0, 10, 5, true)
----
[2, 4, 6, 8, 10]
query I
SELECT equi_width_bins(-10, 0, 5, true)
----
[-8, -6, -4, -2, 0]
query I
SELECT equi_width_bins(-10, 10, 5, true)
----
[-5, 0, 5, 10]
query I
SELECT equi_width_bins(0, 9, 5, true)
----
[2, 4, 6, 8, 10]
query I
SELECT equi_width_bins(0, 1734, 10, true)
----
[200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]
query I
SELECT equi_width_bins(0, 1724, 10, true)
----
[200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]
# not nice
query I
SELECT equi_width_bins(0, 1734, 10, false)
----
[173, 346, 520, 693, 867, 1040, 1213, 1387, 1560, 1734]
query I
SELECT equi_width_bins(0, 39343341, 10, true)
----
[5000000, 10000000, 15000000, 20000000, 25000000, 30000000, 35000000, 40000000]
query I
SELECT equi_width_bins(1, 6000000, 7, true)
----
[1000000, 2000000, 3000000, 4000000, 5000000, 6000000]
query I
SELECT equi_width_bins(1, 6000000, 7, false)
----
[857143, 1714286, 2571429, 3428571, 4285714, 5142857, 6000000]
# big numbers
query I
SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 5, true)
----
[-5000000000000000000, 0, 5000000000000000000, 9223372036854775807]
query I
SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 10, true)
----
[-8000000000000000000, -6000000000000000000, -4000000000000000000, -2000000000000000000, 0, 2000000000000000000, 4000000000000000000, 6000000000000000000, 8000000000000000000, 9223372036854775807]
query I
SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 20, true)
----
[-9000000000000000000, -8000000000000000000, -7000000000000000000, -6000000000000000000, -5000000000000000000, -4000000000000000000, -3000000000000000000, -2000000000000000000, -1000000000000000000, 0, 1000000000000000000, 2000000000000000000, 3000000000000000000, 4000000000000000000, 5000000000000000000, 6000000000000000000, 7000000000000000000, 8000000000000000000, 9000000000000000000, 9223372036854775807]
query I
SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 30, true)
----
[-9000000000000000000, -8500000000000000000, -8000000000000000000, -7500000000000000000, -7000000000000000000, -6500000000000000000, -6000000000000000000, -5500000000000000000, -5000000000000000000, -4500000000000000000, -4000000000000000000, -3500000000000000000, -3000000000000000000, -2500000000000000000, -2000000000000000000, -1500000000000000000, -1000000000000000000, -500000000000000000, 0, 500000000000000000, 1000000000000000000, 1500000000000000000, 2000000000000000000, 2500000000000000000, 3000000000000000000, 3500000000000000000, 4000000000000000000, 4500000000000000000, 5000000000000000000, 5500000000000000000, 6000000000000000000, 6500000000000000000, 7000000000000000000, 7500000000000000000, 8000000000000000000, 8500000000000000000, 9000000000000000000, 9223372036854775807]
# floating point numbers
query I
SELECT equi_width_bins(0.0, 9.0, 5, true);
----
[2.0, 4.0, 6.0, 8.0, 10.0]
query I
SELECT equi_width_bins(0.0, 9.0, 7, true);
----
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
query I
SELECT unnest(equi_width_bins(0.0, 9.0, 7, false));
----
1.2857142857142863
2.571428571428572
3.8571428571428577
5.142857142857143
6.428571428571429
7.714285714285714
9.0
query I
SELECT equi_width_bins(0.0, 90.0, 5, true);
----
[20.0, 40.0, 60.0, 80.0, 100.0]
query I
SELECT equi_width_bins(0.0, 1.0, 5, true);
----
[0.2, 0.4, 0.6, 0.8, 1.0]
query I
SELECT equi_width_bins(0.0, 1.0, 5, true);
----
[0.2, 0.4, 0.6, 0.8, 1.0]
query I
SELECT equi_width_bins(-1.0, 0.0, 5, true);
----
[-0.8, -0.6, -0.4, -0.2, 0.0]
query I
SELECT equi_width_bins(-1.0, 1.0, 5, true);
----
[-0.5, 0.0, 0.5, 1.0]
# test giant numbers
query I
SELECT unnest(equi_width_bins(-1e308, 1e308, 5, true));
----
-5e+307
-0.0
5e+307
1e+308
1.5e+308
# more exhaustive nice bin tests
query I
select equi_width_bins(0.0, 6.347, 3, true) AS boundaries;
----
[2.0, 4.0, 6.0, 8.0]
query I
select equi_width_bins(0.0, 6.347, 7, true) AS boundaries;
----
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
query I
select equi_width_bins(0.0, 6.347, 10, true) AS boundaries;
----
[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5]
query I
select equi_width_bins(0.0, 6.347, 20, true) AS boundaries;
----
[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5]
query I
select equi_width_bins(0.0, 6.347, 30, true) AS boundaries;
----
[0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0, 5.2, 5.4, 5.6, 5.8, 6.0, 6.2, 6.4]
query I
select equi_width_bins(0.0, 3.974, 5, true) AS boundaries;
----
[1.0, 2.0, 3.0, 4.0]
query I
select equi_width_bins(0.0, 3.974, 7, true) AS boundaries;
----
[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
query I
select equi_width_bins(0.0, 3.974, 10, true) AS boundaries;
----
[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
query I
select equi_width_bins(0.0, 3.974, 20, true) AS boundaries;
----
[0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0]
query I
select equi_width_bins(0.0, 3.974, 40, true) AS boundaries;
----
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0]
# last bin should always be bigger than the input max
query I
select equi_width_bins(0, 101, 5, true);
----
[20, 40, 60, 80, 100, 120]
query I
select equi_width_bins(0, 101.5, 5, true);
----
[20.0, 40.0, 60.0, 80.0, 100.0, 120.0]
# dates/timestamps
query I
SELECT equi_width_bins(date '1992-01-01', date '2000-01-01', 2, true)
----
[1996-01-01, 2000-01-01]
query I
SELECT equi_width_bins(timestamp '1992-01-01', timestamp '2000-01-01', 2, true)
----
['1996-01-01 00:00:00', '2000-01-01 00:00:00']
query I
SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '2000-01-01 04:03:21', 2, true)
----
['1996-02-01 00:00:00', '2000-02-01 00:00:00']
query I
SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '2000-01-01 04:03:21', 5, true)
----
['1993-10-01 00:00:00', '1995-05-01 00:00:00', '1996-12-01 00:00:00', '1998-07-01 00:00:00', '2000-02-01 00:00:00']
# bins within a year
query I
SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '1992-12-01 04:03:21', 4, true)
----
['1992-03-27 00:00:00', '1992-06-18 00:00:00', '1992-09-10 00:00:00', '1992-12-02 00:00:00']
# bins within a month
query I
SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '1992-01-31 04:03:21', 4, true)
----
['1992-01-11 00:00:00', '1992-01-18 00:00:00', '1992-01-25 00:00:00', '1992-02-01 00:00:00']
# bins within a day
query I
SELECT equi_width_bins(timestamp '1992-01-01 01:23:37.999', timestamp '1992-01-01 23:03:21.3', 4, true)
----
['1992-01-01 07:30:00', '1992-01-01 13:00:00', '1992-01-01 18:30:00', '1992-01-02 00:00:00']
# bins within an hour
query I
SELECT equi_width_bins(timestamp '1992-01-01 01:23:37.999', timestamp '1992-01-01 01:53:21.3', 4, true)
----
['1992-01-01 01:31:30', '1992-01-01 01:39:00', '1992-01-01 01:46:30', '1992-01-01 01:54:00']
# bins within a minute
query I
SELECT equi_width_bins(timestamp '1992-01-01 01:23:01.999', timestamp '1992-01-01 01:23:49.377', 4, true)
----
['1992-01-01 01:23:14', '1992-01-01 01:23:26', '1992-01-01 01:23:38', '1992-01-01 01:23:50']
# bins within a second
query I
SELECT equi_width_bins(timestamp '1992-01-01 01:23:01.2', timestamp '1992-01-01 01:23:01.943', 4, true)
----
['1992-01-01 01:23:01.38575', '1992-01-01 01:23:01.5715', '1992-01-01 01:23:01.75725', '1992-01-01 01:23:01.943']
# difference is more than one day, but step size is less than one day
query I
select equi_width_bins(timestamp '2024-06-21 15:00:00', timestamp '2024-06-22 9:00:00', 4, true);
----
['2024-06-21 19:30:00', '2024-06-22 00:00:00', '2024-06-22 04:30:00', '2024-06-22 09:00:00']
# difference is more than one month, but step size is less than one month
query I
select equi_width_bins(timestamp '2024-06-21 15:00:00', timestamp '2024-07-21 9:00:00', 4, true);
----
['2024-07-01 00:00:00', '2024-07-08 00:00:00', '2024-07-15 00:00:00', '2024-07-22 00:00:00']
# what if we create more partitions than there are microseconds
query I
select equi_width_bins(timestamp '2024-06-21 15:00:00.123456', timestamp '2024-06-21 15:00:00.123458', 10, true);
----
['2024-06-21 15:00:00.123456', '2024-06-21 15:00:00.123457', '2024-06-21 15:00:00.123458']
query I
SELECT EQUI_WIDTH_BINS(0, 10, 5999, TRUE)
----
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
query I
SELECT EQUI_WIDTH_BINS(0, 10, 5999, false)
----
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
statement error
SELECT equi_width_bins(-0.0, -1.0, 5, true);
----
max value is smaller than min value
statement error
SELECT equi_width_bins(0.0, 'inf'::double, 5, true);
----
does not support infinite or nan as min/max value
statement error
SELECT equi_width_bins(0.0, 'nan'::double, 5, true);
----
does not support infinite or nan as min/max value
statement error
SELECT equi_width_bins(0.0, 1.0, -1, true);
----
there must be > 0 bins
statement error
SELECT equi_width_bins(0.0, 1.0, 99999999, true);
----
max bin count
statement error
SELECT equi_width_bins('a'::VARCHAR, 'z'::VARCHAR, 2, true)
----
Unsupported type "VARCHAR"

View File

@@ -0,0 +1,25 @@
# name: test/sql/aggregate/aggregates/bitstring_agg_empty.test
# description: Test BITSTRING_AGG operator
# group: [aggregates]
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE t1 (k VARCHAR, el VARCHAR);
statement ok
CREATE VIEW t1_v AS (SELECT * FROM t1 LIMIT 0);
statement ok
CREATE TABLE el_ids (el VARCHAR, idx INTEGER);
statement ok
INSERT INTO el_ids VALUES ('el', 10);
query II
SELECT k, bitstring_agg(idx)
FROM t1_v
JOIN el_ids USING (el)
GROUP BY k;
----

View File

@@ -0,0 +1,17 @@
# name: test/sql/aggregate/aggregates/first_memory_usage.test_slow
# description: Issue 14132 - Out of memory on basic hash aggregations with large values/aggregates
# group: [aggregates]
load __TEST_DIR__/first_memory_usage.db
statement ok
set threads=1;
statement ok
set memory_limit='500mb';
# this query uses the first() aggregate, which used to use too much memory (it did redundant allocation in Combine)
# we also limit the number of threads in RadixPartitionedHashtable to limit memory usage when close to the limit
# we can now easily complete this query
statement ok
select distinct on (a) b from (select s a, md5(s::text) b from generate_series(1,5_000_000) as g(s)) limit 10;

View File

@@ -0,0 +1,22 @@
# name: test/sql/aggregate/aggregates/first_test_all_types.test_slow
# description: Test the first aggregate on all types
# group: [aggregates]
statement ok
pragma enable_verification
# verify that first produces the same result as limit 1 for all types
statement ok
CREATE TABLE all_types AS FROM test_all_types();
query I nosort all_types_first
SELECT * FROM all_types LIMIT 1
query I nosort all_types_first
SELECT FIRST(COLUMNS(*)) FROM all_types
query I nosort all_types_last
SELECT * FROM all_types LIMIT 1 OFFSET 2
query I nosort all_types_last
SELECT LAST(COLUMNS(*)) FROM all_types

View File

@@ -0,0 +1,99 @@
# name: test/sql/aggregate/aggregates/histogram_exact.test
# description: Test histogram_exact
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE obs(n BIGINT);
statement ok
INSERT INTO obs VALUES (0), (5), (7), (12), (20), (23), (24), (25), (26), (28), (31), (34), (36), (41), (47)
# histogram_exact finds exact matches only, and puts everything else into the "other" category
# the value of the other category depends on the data type of the bin
# for integer values it is the highest value of the type
query I
SELECT histogram_exact(n, [10, 20, 30, 40, 50]) FROM obs
----
{10=0, 20=1, 30=0, 40=0, 50=0, 9223372036854775807=14}
# for doubles/dates/timestamps it is infinite
query I
SELECT histogram_exact(n::double, [10, 20, 30, 40, 50]) FROM obs
----
{10.0=0, 20.0=1, 30.0=0, 40.0=0, 50.0=0, inf=14}
query I
SELECT histogram_exact((date '2000-01-01' + interval (n) days)::date, [date '2000-01-01' + interval (x) days for x in [10, 20, 30, 40, 50]]) FROM obs
----
{2000-01-11=0, 2000-01-21=1, 2000-01-31=0, 2000-02-10=0, 2000-02-20=0, infinity=14}
# for strings it is the empty string
query I
SELECT histogram_exact(n::varchar, [10, 20, 30, 40, 50]) FROM obs
----
{10=0, 20=1, 30=0, 40=0, 50=0, ''=14}
# for lists it is an empty list
query I
SELECT histogram_exact([n], [[x] for x in [10, 20, 30, 40, 50]]) FROM obs
----
{[10]=0, [20]=1, [30]=0, [40]=0, [50]=0, []=14}
# we can use the function "is_histogram_other_bin" to check if it is this other bin
query II
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
count
FROM (
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
FROM (SELECT histogram_exact(n, [10, 20, 30, 40, 50]) AS hist FROM obs)
)
----
10 0
20 1
30 0
40 0
50 0
(other values) 14
query II
SELECT case when is_histogram_other_bin(bin) then '(other values)' else bin::varchar end as bin,
count
FROM (
SELECT UNNEST(map_keys(hist)) AS bin, UNNEST(map_values(hist)) AS count
FROM (SELECT histogram(n, [10, 20, 30, 40]) AS hist FROM obs)
)
----
10 3
20 2
30 5
40 3
(other values) 2
# when there are no other values the other bin is omitted from the result
query I
SELECT histogram_exact(r, [0, 1, 2, 3]) FROM range(4) t(r);
----
{0=1, 1=1, 2=1, 3=1}
query I
SELECT is_histogram_other_bin(NULL)
----
NULL
query I
SELECT is_histogram_other_bin([[1]])
----
false
query I
SELECT is_histogram_other_bin([]::INT[][][])
----
true
query I
SELECT is_histogram_other_bin({'i': NULL::INT[][]})
----
true

View File

@@ -0,0 +1,133 @@
# name: test/sql/aggregate/aggregates/histogram_table_function.test
# description: Test the histogram table function
# group: [aggregates]
require 64bit
statement ok
pragma enable_verification
# integers
statement ok
create table integers(i int);
statement ok
insert into integers values (42);
statement ok
insert into integers values (84);
query II
SELECT * FROM histogram_values(integers, i, bin_count := 2)
----
60 1
80 0
100 1
# missing column
statement error
SELECT * FROM histogram_values(integers, k)
----
<REGEX>:.*Binder Error.*not found in FROM clause.*
statement ok
INSERT INTO integers FROM range(127)
query II
SELECT * FROM histogram_values(integers, i, bin_count => 10, technique => 'equi-width')
----
12 13
25 13
37 12
50 14
63 13
75 12
88 14
100 12
113 13
126 13
query II
SELECT bin, count FROM histogram(integers, i, bin_count := 10, technique := 'equi-width')
----
x <= 12 13
12 < x <= 25 13
25 < x <= 37 12
37 < x <= 50 14
50 < x <= 63 13
63 < x <= 75 12
75 < x <= 88 14
88 < x <= 100 12
100 < x <= 113 13
113 < x <= 126 13
statement ok
INSERT INTO integers VALUES (99999999)
query II
SELECT COUNT(*), AVG(count) FROM histogram_values(integers, i, technique := 'equi-height')
----
10 13
# sample integers
query II
SELECT * FROM histogram_values(integers, i%2, technique := 'sample')
----
0 66
1 64
# varchar
query II
SELECT * FROM histogram_values(integers, (i%2)::VARCHAR)
----
0 66
1 64
# FIXME: there are some minor rounding problems on ARM64 with the below tests
# mode skip
# varchar does not work with equi-width-bins
statement error
SELECT * FROM histogram_values(integers, (i%2)::VARCHAR, technique := 'equi-width')
----
<REGEX>:.*Binder Error.*Unsupported type "VARCHAR" for equi_width_bins.*
# but it works with equi-height
query II
SELECT COUNT(*), AVG(count) FROM histogram_values(integers, i::VARCHAR, technique := 'equi-height')
----
10 13.0
# histogram with ranges
query II
SELECT COUNT(bin), AVG(count) FROM histogram(integers, i::VARCHAR, technique := 'equi-height')
----
10 13
# booleans
statement ok
create table booleans(b bool);
statement ok
insert into booleans select case when i%4=0 then true else false end from range(100) t(i)
query II
SELECT * FROM histogram_values(booleans, b::INTEGER)
----
0 75
1 25
mode skip
# FIXME: booleans do not work yet because of quantile turning any unsupported type into VARCHAR
query II
SELECT * FROM histogram_values(booleans, b)
----
# lists
# FIXME: lists do not work yet because of quantile turning any unsupported type into VARCHAR
query II
SELECT * FROM histogram_values(integers, [i%2])
----
mode unskip

View File

@@ -0,0 +1,30 @@
# name: test/sql/aggregate/aggregates/histogram_test_all_types.test_slow
# description: Test histogram operator for all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
foreach col <all_types_columns>
query II
SELECT histogram[min], histogram[max] FROM (
SELECT HISTOGRAM("${col}") histogram, MIN("${col}") min, MAX("${col}") max
FROM all_types
)
----
1 1
# binned histogram
query II
SELECT histogram[min], histogram[max] FROM (
SELECT HISTOGRAM("${col}", [(select min("${col}") from all_types), ((select max("${col}") from all_types))]) histogram, MIN("${col}") min, MAX("${col}") max
FROM all_types
)
----
1 1
endloop

View File

@@ -0,0 +1,125 @@
# name: test/sql/aggregate/aggregates/histogram_tpch.test_slow
# description: Test histogram operator on TPC-H
# group: [aggregates]
require tpch
statement ok
CALL dbgen(sf=1);
query I
SELECT histogram(l_orderkey, range(0, 7000000, 1000000))
FROM lineitem
----
{0=0, 1000000=1000049, 2000000=1000448, 3000000=999174, 4000000=1000987, 5000000=1000496, 6000000=1000061}
query I
SELECT histogram(l_orderkey // 1000000)
FROM lineitem
----
{0=1000048, 1=1000447, 2=999171, 3=1000989, 4=1000498, 5=1000060, 6=2}
query I
SELECT histogram(l_shipdate, range((SELECT MIN(l_shipdate) FROM lineitem), (SELECT MAX(l_shipdate) FROM lineitem), interval '1' year))
FROM lineitem
----
{1992-01-02=17, 1993-01-02=761193, 1994-01-02=908785, 1995-01-02=909464, 1996-01-02=914963, 1997-01-02=913658, 1998-01-02=911349, infinity=681786}
# grouped histogram
query II nosort grouped_map
SELECT l_returnflag, histogram(l_orderkey, range(0, 7000000, 1000000))
FROM lineitem
GROUP BY l_returnflag
ORDER BY l_returnflag
----
# compute using filtered aggregates - this should produce the same result
query II nosort grouped_map
SELECT l_returnflag,
map {
'0': case when sum(1) filter(l_orderkey <= 0) is null then 0 end,
'1000000': sum(1) filter(l_orderkey > 0 and l_orderkey <= 1000000),
'2000000': sum(1) filter(l_orderkey > 1000000 and l_orderkey <= 2000000),
'3000000': sum(1) filter(l_orderkey > 2000000 and l_orderkey <= 3000000),
'4000000': sum(1) filter(l_orderkey > 3000000 and l_orderkey <= 4000000),
'5000000': sum(1) filter(l_orderkey > 4000000 and l_orderkey <= 5000000),
'6000000': sum(1) filter(l_orderkey > 5000000 and l_orderkey <= 6000000)
}
FROM lineitem
GROUP BY l_returnflag
ORDER BY l_returnflag
----
# histogram table function
# decimals
query II
SELECT bin, count FROM histogram(lineitem, l_extendedprice)
----
x <= 10000.0 773545
10000.0 < x <= 20000.0 831988
20000.0 < x <= 30000.0 833410
30000.0 < x <= 40000.0 835155
40000.0 < x <= 50000.0 829356
50000.0 < x <= 60000.0 724657
60000.0 < x <= 70000.0 528755
70000.0 < x <= 80000.0 358127
80000.0 < x <= 90000.0 207206
90000.0 < x <= 100000.0 74894
100000.0 < x <= 110000.0 4122
# dates
query II
SELECT bin, count FROM histogram(lineitem, l_shipdate)
----
x <= 1992-12-01 682114
1992-12-01 < x <= 1993-08-01 603468
1993-08-01 < x <= 1994-04-01 606065
1994-04-01 < x <= 1994-12-01 608515
1994-12-01 < x <= 1995-08-01 608265
1995-08-01 < x <= 1996-04-01 609527
1996-04-01 < x <= 1996-12-01 609621
1996-12-01 < x <= 1997-08-01 607911
1997-08-01 < x <= 1998-04-01 605815
1998-04-01 < x <= 1998-12-01 459914
# varchar
query II
SELECT bin, count FROM histogram(lineitem, l_returnflag)
----
A 1478493
N 3043852
R 1478870
# list
query II
SELECT bin, count FROM histogram(lineitem, [l_returnflag])
----
[A] 1478493
[N] 3043852
[R] 1478870
# struct
query II
SELECT bin, count FROM histogram(lineitem, {'i': l_returnflag})
----
{'i': A} 1478493
{'i': N} 3043852
{'i': R} 1478870
# string stress test
query I nosort histstrings
SELECT unnest(map_keys(histogram(l_comment))) FROM lineitem
----
query I nosort histstrings
SELECT DISTINCT l_comment FROM lineitem ORDER BY l_comment
----
# approx_top_k stress test
query I
SELECT list_sort(approx_top_k(l_returnflag, 5)) FROM lineitem
----
[A, N, R]
statement ok
SELECT approx_top_k(l_comment, 5) FROM lineitem

View File

@@ -0,0 +1,50 @@
# name: test/sql/aggregate/aggregates/max_n_all_types_grouped.test
# description: Test max "n" aggregate function for multiple types grouped
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE MACRO compute_top_k(table_name, group_col, val_col, k) AS TABLE
SELECT rs.grp, array_agg(rs.val)
FROM (
SELECT group_col AS grp, val_col AS val, row_number() OVER (PARTITION BY group_col ORDER BY val_col DESC) as rid
FROM query_table(table_name::VARCHAR) ORDER BY group_col DESC
) as rs
WHERE rid <= k
GROUP BY ALL
ORDER BY ALL;
statement ok
create table all_types as from test_all_types()
foreach val_col bool int bigint hugeint date time timestamp float double dec_4_1 uuid interval varchar
# Create a cross product of the 3 selected types
statement ok
CREATE OR REPLACE TABLE tbl AS SELECT * FROM
(SELECT ${val_col} as val_col FROM all_types)
CROSS JOIN (SELECT i % 2 as grp_col FROM range(5) as r(i));
# Disable top_n_window_elimination to prevent comparing max and max_by
statement ok
SET disabled_optimizers = 'top_n_window_elimination'
statement ok
CREATE OR REPLACE TABLE window_table AS SELECT * FROM compute_top_k(tbl, grp_col, val_col, 2) as rs(grp, res);
statement ok
SET disabled_optimizers = ''
statement ok
CREATE OR REPLACE TABLE agg_table AS SELECT grp_col as grp, max(val_col, 2) as res FROM tbl GROUP BY ALL ORDER BY ALL;
# Check that the two tables are equal (expecting no rows)
query II
SELECT * FROM (SELECT * FROM window_table ORDER BY rowid) EXCEPT SELECT * FROM (SELECT * FROM agg_table ORDER BY rowid);
----
endloop

View File

@@ -0,0 +1,29 @@
# name: test/sql/aggregate/aggregates/mode_test_all_types.test_slow
# description: Test mode operator for all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
foreach col <all_types_columns>
query I
SELECT mode IS NOT DISTINCT FROM min_val FROM (
SELECT MODE(v) AS mode, MIN(v) AS min_val
FROM (SELECT "${col}" AS v FROM all_types UNION ALL SELECT MIN("${col}"), FROM all_types)
)
----
true
query I
SELECT mode IS NOT DISTINCT FROM max_val FROM (
SELECT MODE(v) AS mode, MAX(v) AS max_val
FROM (SELECT "${col}" AS v FROM all_types UNION ALL SELECT MAX("${col}"), FROM all_types)
)
----
true
endloop

View File

@@ -0,0 +1,34 @@
# name: test/sql/aggregate/aggregates/mode_tpch.test_slow
# description: Test mode function with large data sets
# group: [aggregates]
require tpch
statement ok
CALL dbgen(sf=1);
query II
select l_returnflag, mode(l_comment) from lineitem where l_returnflag <> 'N' group by l_returnflag;
----
A furiously
R furiously
# run a windowed mode
query I
SELECT avg(strlen(padded_mode)) FROM
(SELECT mode(l_comment) OVER (ORDER BY rowid ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING) AS padded_mode FROM lineitem)
----
26.4927
# mode on nested types
query III
SELECT mode(l_shipdate), mode([l_shipdate]), mode({'i': l_shipdate}) from lineitem;
----
1997-06-01 [1997-06-01] {'i': 1997-06-01}
query IIII
SELECT l_returnflag, mode(l_shipdate), mode([l_shipdate]), mode({'i': l_shipdate}) from lineitem group by l_returnflag order by l_returnflag
----
A 1995-03-24 [1995-03-24] {'i': 1995-03-24}
N 1997-06-01 [1997-06-01] {'i': 1997-06-01}
R 1994-08-23 [1994-08-23] {'i': 1994-08-23}

View File

@@ -0,0 +1,28 @@
# name: test/sql/aggregate/aggregates/quantile_test_all_types.test_slow
# description: Test quantile operator for all types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table all_types as from test_all_types()
# date is not tested here because the extreme date values overflow timestamps
# and median(date) result sin timestamp
foreach col <all_types_columns> !date
# quantile_disc
query III
SELECT quantile_disc("${col}", 0.2) IS NOT DISTINCT FROM MIN("${col}"),
quantile_disc("${col}", 0.8) IS NOT DISTINCT FROM MAX("${col}"),
quantile_disc("${col}", [0.8, 0.2]) IS NOT DISTINCT FROM [MAX("${col}"), MIN("${col}")]
FROM all_types
----
true true true
# median
statement ok
SELECT median("${col}") >= min("${col}") AND median("${col}") <= max("${col}") FROM all_types
endloop

View File

@@ -0,0 +1,30 @@
# name: test/sql/aggregate/aggregates/string_agg_union.test
# description: Issue #2591: string_agg only returns final row if there is no Group By clause
# group: [aggregates]
statement ok
PRAGMA enable_verification
query I
WITH my_data as (
SELECT 'text1'::varchar(1000) as my_column union all
SELECT 'text1'::varchar(1000) as my_column union all
SELECT 'text1'::varchar(1000) as my_column
)
SELECT string_agg(my_column,', ') as my_string_agg
FROM my_data
----
text1, text1, text1
query I
WITH my_data as (
SELECT 1 as dummy, 'text1'::varchar(1000) as my_column union all
SELECT 1 as dummy, 'text1'::varchar(1000) as my_column union all
SELECT 1 as dummy, 'text1'::varchar(1000) as my_column
)
SELECT string_agg(my_column,', ') as my_string_agg
FROM my_data
GROUP BY
dummy
----
text1, text1, text1

View File

@@ -0,0 +1,73 @@
# name: test/sql/aggregate/aggregates/test_aggr_string.test
# description: Test aggregations on strings
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
query TTTTI
SELECT NULL as a, NULL as b, NULL as c, NULL as d, 1 as id UNION SELECT 'Кирилл' as a, 'Müller' as b, '我是谁' as c, 'ASCII' as d, 2 as id ORDER BY 1
----
NULL NULL NULL NULL 1
Кирилл Müller ASCII 2
statement ok
CREATE TABLE test (a INTEGER, s VARCHAR);
statement ok
INSERT INTO test VALUES (11, 'hello'), (12, 'world'), (11, NULL)
# scalar aggregation on string
query II
SELECT COUNT(*), COUNT(s) FROM test;
----
3 2
# grouped aggregation on string
query III
SELECT a, COUNT(*), COUNT(s) FROM test GROUP BY a ORDER BY a;
----
11 2 1
12 1 1
# group by the strings
query TR
SELECT s, SUM(a) FROM test GROUP BY s ORDER BY s;
----
NULL 11.000000
hello 11.000000
world 12.000000
# distinct aggregations ons tring
statement ok
INSERT INTO test VALUES (11, 'hello'), (12, 'world')
# scalar distinct
query III
SELECT COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test;
----
5 4 2
# grouped distinct
query IIII
SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test GROUP BY a ORDER BY a;
----
11 3 2 1
12 2 2 1
# now with WHERE clause
query IIII
SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test WHERE s IS NOT NULL GROUP BY a ORDER BY a;
----
11 2 2 1
12 2 2 1
# string min/max with long strings
statement ok
CREATE TABLE test_strings(s VARCHAR);
INSERT INTO test_strings VALUES ('aaaaaaaahello'), ('bbbbbbbbbbbbbbbbbbbbhello'), ('ccccccccccccccchello'), ('aaaaaaaaaaaaaaaaaaaaaaaahello');;
query II
SELECT MIN(s), MAX(s) FROM test_strings;
----
aaaaaaaaaaaaaaaaaaaaaaaahello ccccccccccccccchello

View File

@@ -0,0 +1,230 @@
# name: test/sql/aggregate/aggregates/test_aggregate_types.test
# description: Test aggregates with many different types
# group: [aggregates]
statement ok
CREATE TABLE strings(s STRING, g INTEGER)
statement ok
INSERT INTO strings VALUES ('hello', 0), ('world', 1), (NULL, 0), ('r', 1)
# simple aggregates only
query IITT
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings
----
4
3
hello
world
query IITT
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL
----
1
0
NULL
NULL
# add string agg
query TTTT
SELECT STRING_AGG(s, ' '), STRING_AGG(s, ''), STRING_AGG('', ''), STRING_AGG('hello', ' ') FROM strings
----
hello world r
helloworldr
(empty)
hello hello hello hello
# more complex agg (groups)
query IIITTT
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s), STRING_AGG(s, ' ') FROM strings GROUP BY g ORDER BY g
----
0
2
1
hello
hello
hello
1
2
2
r
world
world r
# complex agg with distinct and order by and conversion from int to string
query IIITTT
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s), STRING_AGG(DISTINCT g::VARCHAR ORDER BY g::VARCHAR DESC) FROM strings GROUP BY g ORDER BY g;
----
0
2
1
hello
hello
0
1
2
2
r
world
1
# complex agg with with distinct and order by with NULL
query IIITTT
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s), STRING_AGG(DISTINCT s ORDER BY s ASC) FROM strings GROUP BY g ORDER BY g;
----
0
2
1
hello
hello
hello
1
2
2
r
world
r,world
# empty group
query IIITTT
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s), STRING_AGG(s, ' ') FROM strings WHERE s IS NULL OR s <> 'hello' GROUP BY g ORDER BY g
----
0
1
0
NULL
NULL
NULL
1
2
2
r
world
world r
# unsupported aggregates
statement error
SELECT SUM(s) FROM strings GROUP BY g ORDER BY g
----
statement error
SELECT AVG(s) FROM strings GROUP BY g ORDER BY g
----
# booleans
statement ok
CREATE TABLE booleans(b BOOLEAN, g INTEGER)
statement ok
INSERT INTO booleans VALUES (false, 0), (true, 1), (NULL, 0), (false, 1)
query IITT
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans
----
4
3
0
1
query IITT
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL
----
1
0
NULL
NULL
query IIITT
SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans GROUP BY g ORDER BY g
----
0
2
1
0
0
1
2
2
0
1
query IIITT
SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL OR b=true GROUP BY g ORDER BY g
----
0
1
0
NULL
NULL
1
1
1
1
1
query I
SELECT SUM(b) FROM booleans GROUP BY g ORDER BY g
----
0
1
statement error
SELECT AVG(b) FROM booleans GROUP BY g ORDER BY g
----
statement ok
CREATE TABLE integers(i INTEGER, g INTEGER)
statement ok
INSERT INTO integers VALUES (12, 0), (22, 1), (NULL, 0), (14, 1)
query IIIIR
SELECT COUNT(*), COUNT(i), MIN(i), MAX(i), SUM(i) FROM integers
----
4
3
12
22
48.000000
query IIIIR
SELECT COUNT(*), COUNT(i), MIN(i), MAX(i), SUM(i) FROM INTEGERS WHERE i IS NULL
----
1
0
NULL
NULL
NULL
query IIIIIR
SELECT g, COUNT(*), COUNT(i), MIN(i), MAX(i), SUM(i) FROM integers GROUP BY g ORDER BY g
----
0
2
1
12
12
12.000000
1
2
2
14
22
36.000000
query IIIIIR
SELECT g, COUNT(*), COUNT(i), MIN(i), MAX(i), SUM(i) FROM integers WHERE i IS NULL OR i > 15 GROUP BY g ORDER BY g
----
0
1
0
NULL
NULL
NULL
1
1
1
22
22
22.000000

View File

@@ -0,0 +1,161 @@
# name: test/sql/aggregate/aggregates/test_aggregate_types_scalar.test
# description: Test scalar aggregates with many different types
# group: [aggregates]
query IIIIII
SELECT COUNT(), COUNT(1), COUNT(*), COUNT(NULL), COUNT('hello'), COUNT(DATE '1992-02-02')
----
1
1
1
0
1
1
statement error
SELECT COUNT(1, 2)
----
query RRR
SELECT SUM(1), SUM(NULL), SUM(33.3)
----
1.000000
NULL
33.3
query I
SELECT SUM(True)
----
1
statement error
SELECT SUM('hello')
----
statement error
SELECT SUM(DATE '1992-02-02')
----
statement error
SELECT SUM()
----
statement error
SELECT SUM(1, 2)
----
query IIRTTTT
SELECT MIN(1), MIN(NULL), MIN(33.3), MIN('hello'), MIN(True), MIN(DATE '1992-02-02'), MIN(TIMESTAMP '2008-01-01 00:00:01')
----
1 NULL 33.3 hello 1 1992-02-02 2008-01-01 00:00:01
statement error
SELECT MIN()
----
query I
SELECT MIN(1, 2)
----
[1]
query IIRTTTT
SELECT MAX(1), MAX(NULL), MAX(33.3), MAX('hello'), MAX(True), MAX(DATE '1992-02-02'), MAX(TIMESTAMP '2008-01-01 00:00:01')
----
1 NULL 33.3 hello 1 1992-02-02 2008-01-01 00:00:01
statement error
SELECT MAX()
----
query I
SELECT MAX(1, 2)
----
[1]
query IIRTTTT
SELECT FIRST(1), FIRST(NULL), FIRST(33.3), FIRST('hello'), FIRST(True), FIRST(DATE '1992-02-02'), FIRST(TIMESTAMP '2008-01-01 00:00:01')
----
1 NULL 33.3 hello 1 1992-02-02 2008-01-01 00:00:01
statement error
SELECT FIRST()
----
statement error
SELECT FIRST(1, 2)
----
query IIRTTTT
SELECT LAST(1), LAST(NULL), LAST(33.3), LAST('hello'), LAST(True), LAST(DATE '1992-02-02'), LAST(TIMESTAMP '2008-01-01 00:00:01')
----
1 NULL 33.3 hello 1 1992-02-02 2008-01-01 00:00:01
statement error
SELECT LAST()
----
statement error
SELECT LAST(1, 2)
----
query RRR
SELECT AVG(1), AVG(NULL), AVG(33.3)
----
1.000000 NULL 33.3
statement error
SELECT AVG(True)
----
statement error
SELECT AVG('hello')
----
query I
SELECT AVG(DATE '1992-02-02')
----
1992-02-02 00:00:00
statement error
SELECT AVG()
----
statement error
SELECT AVG(1, 2)
----
query T
SELECT STRING_AGG('hello')
----
hello
query TTTTT
SELECT STRING_AGG('hello', ' '), STRING_AGG('hello', NULL), STRING_AGG(NULL, ' '), STRING_AGG(NULL, NULL), STRING_AGG('', '')
----
hello NULL NULL NULL (empty)
statement error
SELECT STRING_AGG()
----
statement error
SELECT STRING_AGG(1, 2, 3)
----
statement ok
CREATE TABLE test_val(val INT)
statement ok
INSERT INTO test_val VALUES(1), (2), (3), (3), (2)
query T
SELECT STRING_AGG(DISTINCT val::VARCHAR ORDER BY val::VARCHAR DESC) from test_val;
----
3,2,1
query IT
SELECT COUNT(NULL), STRING_AGG(DISTINCT val::VARCHAR ORDER BY val::VARCHAR ASC) from test_val;
----
0
1,2,3

View File

@@ -0,0 +1,228 @@
# name: test/sql/aggregate/aggregates/test_any_value.test
# description: Test the ANY_VALUE function
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE tbl(i INTEGER)
statement ok
INSERT INTO tbl VALUES (NULL), (2), (3)
# FIRST() here returns NULL
query I
SELECT ANY_VALUE(i) AS a FROM tbl
----
2
statement ok
DROP TABLE tbl
#
# Test all types
#
# Numerics
foreach type <numeric>
statement ok
CREATE TABLE five AS SELECT i::${type} AS i FROM range(1, 6, 1) t1(i)
query I
SELECT ANY_VALUE(i) FROM five
----
1
query II
SELECT i % 3 AS g, ANY_VALUE(i) FROM five GROUP BY 1 ORDER BY 1
----
0 3
1 1
2 2
query I
SELECT ANY_VALUE(i ORDER BY 5-i) FROM five
----
5
query II
SELECT i % 3 AS g, ANY_VALUE(i ORDER BY 5-i) FROM five GROUP BY 1 ORDER BY 1
----
0 3
1 4
2 5
statement ok
DROP TABLE five
endloop
# Decimals
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1)
statement ok
CREATE TABLE five AS SELECT i::${type} AS i FROM range(1, 6, 1) t1(i)
query I
SELECT ANY_VALUE(i ORDER BY 5-i) FROM five
----
5.0
query II
SELECT i::INTEGER % 3 AS g, ANY_VALUE(i ORDER BY 5-i) FROM five GROUP BY 1 ORDER BY 1
----
0 3.0
1 4.0
2 5.0
statement ok
DROP TABLE five
endloop
# Temporal
statement ok
CREATE TABLE five_dates AS
SELECT 1 AS i,
NULL::DATE AS d,
NULL::TIMESTAMP AS dt,
NULL::TIME AS t,
NULL::INTERVAL AS s
UNION ALL
SELECT
i::integer AS i,
'2021-08-20'::DATE + i::INTEGER AS d,
'2021-08-20'::TIMESTAMP + INTERVAL (i) HOUR AS dt,
'14:59:37'::TIME + INTERVAL (i) MINUTE AS t,
INTERVAL (i) SECOND AS s
FROM range(1, 6, 1) t1(i)
query IIII
SELECT ANY_VALUE(d), ANY_VALUE(dt), ANY_VALUE(t), ANY_VALUE(s) FROM five_dates
----
2021-08-21 2021-08-20 01:00:00 15:00:37 00:00:01
query IIIII
SELECT i % 3 AS g, ANY_VALUE(d), ANY_VALUE(dt), ANY_VALUE(t), ANY_VALUE(s)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-23 2021-08-20 03:00:00 15:02:37 00:00:03
1 2021-08-21 2021-08-20 01:00:00 15:00:37 00:00:01
2 2021-08-22 2021-08-20 02:00:00 15:01:37 00:00:02
query IIII
SELECT ANY_VALUE(d ORDER BY 5-i), ANY_VALUE(dt ORDER BY 5-i), ANY_VALUE(t ORDER BY 5-i), ANY_VALUE(s ORDER BY 5-i) FROM five_dates
----
2021-08-25 2021-08-20 05:00:00 15:04:37 00:00:05
query IIIII
SELECT i % 3 AS g, ANY_VALUE(d ORDER BY 5-i), ANY_VALUE(dt ORDER BY 5-i), ANY_VALUE(t ORDER BY 5-i), ANY_VALUE(s ORDER BY 5-i)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-23 2021-08-20 03:00:00 15:02:37 00:00:03
1 2021-08-24 2021-08-20 04:00:00 15:03:37 00:00:04
2 2021-08-25 2021-08-20 05:00:00 15:04:37 00:00:05
# WITH TIME ZONE
query II
SELECT ANY_VALUE(dt::TIMESTAMPTZ), ANY_VALUE(t::TIMETZ) FROM five_dates
----
2021-08-20 01:00:00+00 15:00:37+00
query III
SELECT i % 3 AS g, ANY_VALUE(dt::TIMESTAMPTZ), ANY_VALUE(t::TIMETZ)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-20 03:00:00+00 15:02:37+00
1 2021-08-20 01:00:00+00 15:00:37+00
2 2021-08-20 02:00:00+00 15:01:37+00
query II
SELECT ANY_VALUE(dt::TIMESTAMPTZ ORDER BY 5-i), ANY_VALUE(t::TIMETZ ORDER BY 5-i) FROM five_dates
----
2021-08-20 05:00:00+00 15:04:37+00
query III
SELECT i % 3 AS g, ANY_VALUE(dt::TIMESTAMPTZ ORDER BY 5-i), ANY_VALUE(t::TIMETZ ORDER BY 5-i)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-20 03:00:00+00 15:02:37+00
1 2021-08-20 04:00:00+00 15:03:37+00
2 2021-08-20 05:00:00+00 15:04:37+00
statement ok
DROP TABLE five_dates
# Complex
statement ok
CREATE TABLE five_complex AS
SELECT
1 AS i,
NULL::VARCHAR AS s,
NULL::BIGINT[] AS l,
NULL AS r
UNION ALL
SELECT
i::integer AS i,
i::VARCHAR AS s,
[i] AS l,
{'a': i} AS r
FROM range(1, 6, 1) t1(i)
query III
SELECT ANY_VALUE(s), ANY_VALUE(l), ANY_VALUE(r)
FROM five_complex
----
1 [1] {'a': 1}
query IIII
SELECT i % 3 AS g, ANY_VALUE(s), ANY_VALUE(l), ANY_VALUE(r)
FROM five_complex
GROUP BY 1
ORDER BY 1
----
0 3 [3] {'a': 3}
1 1 [1] {'a': 1}
2 2 [2] {'a': 2}
query III
SELECT ANY_VALUE(s ORDER BY 5-i), ANY_VALUE(l ORDER BY 5-i), ANY_VALUE(r ORDER BY 5-i)
FROM five_complex
----
5 [5] {'a': 5}
query IIII
SELECT i % 3 AS g, ANY_VALUE(s ORDER BY 5-i), ANY_VALUE(l ORDER BY 5-i), ANY_VALUE(r ORDER BY 5-i)
FROM five_complex
GROUP BY 1
ORDER BY 1
----
0 3 [3] {'a': 3}
1 4 [4] {'a': 4}
2 5 [5] {'a': 5}
statement ok
DROP TABLE five_complex
# Window Function
query I
SELECT ANY_VALUE(i) OVER (PARTITION BY i) AS a
FROM generate_series(1, 5) t(i)
ORDER BY ALL
----
1
2
3
4
5

View File

@@ -0,0 +1,58 @@
# name: test/sql/aggregate/aggregates/test_any_value_noninlined.test
# description: Test ANY_VALUE with non-inlined strings
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE tbl(a INTEGER, b VARCHAR)
statement ok
INSERT INTO tbl VALUES (1, NULL), (2, 'thisisalongstring'), (3, 'thisisalsoalongstring')
# non-grouped aggregate
query T
SELECT ANY_VALUE(b) FROM tbl
----
thisisalongstring
query T
SELECT ANY_VALUE(b) FROM tbl WHERE a=2
----
thisisalongstring
query T
SELECT ANY_VALUE(b) FROM tbl WHERE a=1
----
NULL
query T
SELECT ANY_VALUE(b) FROM tbl WHERE a=1 GROUP BY a
----
NULL
query T
SELECT ANY_VALUE(b) FROM tbl WHERE a=0
----
NULL
query T
SELECT ANY_VALUE(b) FROM tbl WHERE a=0 GROUP BY b
----
# grouped aggregate
query IT
SELECT a, ANY_VALUE(b) FROM tbl GROUP BY a ORDER BY a
----
1
NULL
2
thisisalongstring
3
thisisalsoalongstring
query I
SELECT ANY_VALUE(i) FROM (VALUES (NULL::INT32)) tbl(i)
----
NULL

View File

@@ -0,0 +1,308 @@
# name: test/sql/aggregate/aggregates/test_approx_quantile.test
# description: Test approx quantile operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
set seed 0.8675309
statement ok
create table quantile as select range r, random() from range(10000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
# Verify that there are two versions of each return type (with and without the count)
query II
SELECT return_type, count(*) AS defined
FROM duckdb_functions()
WHERE function_name = 'reservoir_quantile'
GROUP BY ALL
HAVING defined <> 2
ORDER BY ALL;
----
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.5) as approx_quantile ,quantile(r,0.5) as true_quantile FROM quantile) AS T
----
1
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 1.0) as approx_quantile ,quantile(r, 1.0) as true_quantile FROM quantile) AS T
----
1
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.0) as approx_quantile ,quantile(r, 0.0) as true_quantile from quantile) AS T
----
1
query II
SELECT approx_quantile(NULL, 0.5) as approx_quantile ,quantile(NULL, 0.5) as true_quantile
----
NULL NULL
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(42, 0.5) as approx_quantile ,quantile(42, 0.5) as true_quantile) AS T
----
1
query II
SELECT approx_quantile(NULL, 0.5) as approx_quantile ,quantile(NULL, 0.5) as true_quantile FROM quantile
----
NULL NULL
query II
SELECT approx_quantile(1, 0.5) as approx_quantile ,quantile(1, 0.5) as true_quantile FROM quantile
----
1 1
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(42, 0.5) as approx_quantile ,quantile(42, 0.5) as true_quantile) AS T
----
1
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.1) as approx_quantile ,quantile(r, 0.1) as true_quantile from quantile) AS T
----
1
query I
SELECT CASE
WHEN ( approx_quantile between (true_quantile - 100) and (true_quantile + 100) )
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.9) as approx_quantile ,quantile(r, 0.9) as true_quantile from quantile) AS T
----
1
# TIMETZ Support. Note this is not exact because we use DOUBLEs internally.
query I
SELECT approx_quantile('1:02:03.000000+05:30'::TIMETZ, 0.5);
----
01:02:42+05:30:39
# List versions
query I
SELECT [
(a[1] BETWEEN (q[1] - 100) AND (q[1] + 100)),
(a[2] BETWEEN (q[2] - 100) AND (q[2] + 100)),
(a[3] BETWEEN (q[3] - 100) AND (q[3] + 100)),
]
FROM (
SELECT approx_quantile(r, [0.25, 0.5, 0.75]) AS a,
quantile(r, [0.25, 0.5, 0.75]) AS q,
FROM quantile
) tbl;
----
[true, true, true]
query I
SELECT [
(a[1] BETWEEN (q[1] - 100) AND (q[1] + 100)),
(a[2] BETWEEN (q[2] - 100) AND (q[2] + 100)),
(a[3] BETWEEN (q[3] - 100) AND (q[3] + 100)),
]
FROM (
SELECT reservoir_quantile(r, [0.25, 0.5, 0.75], 4096) AS a,
quantile(r, [0.25, 0.5, 0.75]) AS q,
FROM quantile
) tbl;
----
[true, true, true]
# Array lists
query I
SELECT approx_quantile(col, [0.5, 0.4, 0.1]) AS percentile
FROM VALUES (0), (1), (2), (10) AS tab(col);
----
[2, 1, 0]
query I
SELECT approx_quantile(col, ARRAY_VALUE(0.5, 0.4, 0.1)) AS percentile
FROM VALUES (0), (1), (2), (10) AS tab(col);
----
[2, 1, 0]
# Errors
statement error
SELECT approx_quantile(r, -0.1) FROM quantile
----
statement error
SELECT approx_quantile(r, 1.1) FROM quantile
----
statement error
SELECT approx_quantile(r, NULL) FROM quantile
----
statement error
SELECT approx_quantile(r, r) FROM quantile
----
statement error
SELECT approx_quantile(r::string, 0.5) FROM quantile
----
statement error
SELECT approx_quantile(r) FROM quantile
----
statement error
SELECT approx_quantile(r, 0.1, 0.2) FROM quantile
----
statement error
SELECT approx_quantile(42, CAST(NULL AS INT[]));
----
APPROXIMATE QUANTILE parameter list cannot be NULL
statement ok
pragma threads=4
statement ok
PRAGMA verify_parallelism
query I
SELECT CASE
WHEN (approx_quantile between (true_quantile - (sumr * 0.01)) and (true_quantile + (sumr * 0.01)))
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.1) as approx_quantile, quantile(r, 0.1) as true_quantile, SUM(r) as sumr from quantile) AS T
----
1
query I
SELECT CASE
WHEN (approx_quantile between (true_quantile - (sumr * 0.01)) and (true_quantile + (sumr * 0.01)))
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.9) as approx_quantile, quantile(r, 0.9) as true_quantile, SUM(r) as sumr from quantile) AS T
----
1
query I
SELECT CASE
WHEN (approx_quantile between (true_quantile - (sumr * 0.01)) and (true_quantile + (sumr * 0.01)))
THEN TRUE
ELSE FALSE
END
FROM (SELECT approx_quantile(r, 0.5) as approx_quantile, quantile(r, 0.5) as true_quantile, SUM(r) as sumr from quantile) AS T
----
1
# test with a DECIMAL column
statement ok
CREATE TABLE repro (i DECIMAL(15,2));
statement ok
SELECT approx_quantile(i, 0.5) FROM repro;
statement ok
SELECT approx_quantile(i, [0.5]) FROM repro;
# Test reservoir quantile
statement ok
PRAGMA disable_verification;
statement ok
PRAGMA disable_verify_external;
statement ok
PRAGMA disable_verify_fetch_row;
statement ok
SELECT reservoir_quantile(r, 0.9) from quantile
statement ok
SELECT reservoir_quantile(r, 0.9,1000) from quantile
# reservoir = 1
query I
SELECT reservoir_quantile(1, 0.5, 1) FROM quantile
----
1
statement ok
SELECT RESERVOIR_QUANTILE(b, 0.5)
FROM (SELECT 'a' AS a, 1.0 AS b) y
GROUP BY a
statement ok
SELECT APPROX_QUANTILE(b, 0.5)
FROM (
SELECT 'a' AS a, 1.0 AS b
UNION ALL SELECT 'a' AS a, 1.0 AS b
UNION ALL SELECT 'b' AS a, 1.0 AS b
) y
GROUP BY a
statement error
SELECT reservoir_quantile(r, r) from quantile
----
statement error
SELECT reservoir_quantile(r, NULL) from quantile
----
statement error
SELECT reservoir_quantile(r, r, r) from quantile
----
statement error
SELECT reservoir_quantile(r, 0.9, NULL) from quantile
----
statement error
SELECT reservoir_quantile(r, 0.9, r) from quantile
----
statement error
SELECT reservoir_quantile(r, random()::float) from quantile
----
statement error
SELECT reservoir_quantile(r, 0.9, random()::float) from quantile
----
# DECIMAL binding
query I
SELECT RESERVOIR_QUANTILE(0., 0.9, 1000);
----
0

View File

@@ -0,0 +1,126 @@
# name: test/sql/aggregate/aggregates/test_approximate_distinct_count.test
# description: Test approx_count_distinct operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement error
select approx_count_distinct(*)
----
query I
select approx_count_distinct(1)
----
1
query I
select approx_count_distinct(NULL)
----
0
query I
select approx_count_distinct('hello')
----
1
query II
select approx_count_distinct(10), approx_count_distinct('hello') from range(100);
----
1 1
query I
select approx_count_distinct(i) from range (100) tbl(i) WHERE 1 == 0;
----
0
statement ok
CREATE TABLE IF NOT EXISTS dates (t date);
statement ok
INSERT INTO dates VALUES ('2008-01-01'), (NULL), ('2007-01-01'), ('2008-02-01'), ('2008-01-02'), ('2008-01-01'), ('2008-01-01'), ('2008-01-01')
statement ok
CREATE TABLE IF NOT EXISTS timestamp (t TIMESTAMP);
statement ok
INSERT INTO timestamp VALUES ('2008-01-01 00:00:01'), (NULL), ('2007-01-01 00:00:01'), ('2008-02-01 00:00:01'), ('2008-01-02 00:00:01'), ('2008-01-01 10:00:00'), ('2008-01-01 00:10:00'), ('2008-01-01 00:00:10')
statement ok
CREATE TABLE IF NOT EXISTS names (t string);
statement ok
INSERT INTO names VALUES ('Pedro'), (NULL), ('Pedro'), ('Pedro'), ('Mark'), ('Mark'),('Mark'),('Hannes-Muehleisen'),('Hannes-Muehleisen')
# test counts on a set of values
statement ok
create table t as select range a, mod(range,10) b from range(2000);
query III
SELECT COUNT( a),approx_count_distinct(a),approx_count_distinct(b) from t
----
2000 2322 11
query I
SELECT approx_count_distinct(a) from t group by a %2 order by all;
----
1006
1230
query I
SELECT count(*) from t where a < 10;
----
10
query I
SELECT approx_count_distinct(a) over (partition by a%2) from t where a < 10;
----
5
5
5
5
5
5
5
5
5
5
query II
SELECT COUNT( t),approx_count_distinct(t) from timestamp
----
7 6
query II
SELECT COUNT( t),approx_count_distinct(t) from dates
----
7 4
query II
SELECT COUNT(t),approx_count_distinct(t) from names
----
8 3
statement ok
create table customers (cname varchar)
statement ok
insert into customers values ('Customer#000000001'), ('Customer#000000002'), ('Customer#000000003'), ('Customer#000000004')
query T
select approx_count_distinct(cname) from customers
----
4
# ORDER BY STRUCT
statement ok
create table issue5259(c0 int);
statement ok
insert into issue5259 values (1),(2),(3);
statement ok
SELECT approx_count_distinct(c0 ORDER BY (c0, 1)) FROM issue5259;

View File

@@ -0,0 +1,214 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max.test
# description: Test argmin and argmax operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
#Corner cases
statement error
select argmin()
----
query I
select argmin(NULL,NULL)
----
NULL
query I
select argmin(1,1)
----
1
statement error
select argmin(*)
----
query I
select argmin(i,i) from range (100) tbl(i);
----
0
query I
select argmin(i,i) from range (100) tbl(i) where 1 = 0;
----
NULL
statement error
select argmax()
----
query I
select argmax(NULL,NULL)
----
NULL
query I
select argmax(1,1)
----
1
statement error
select argmax(*)
----
query I
select argmax(i,i) from range (100) tbl(i);
----
99
query I
select argmax(i,i) from range (100) tbl(i) where 1 = 0;
----
NULL
statement ok
create table args (a integer, b integer)
statement ok
insert into args values (1,1), (2,2), (8,8), (10,10)
query II
select argmin(a,b), argmax(a,b) from args;
----
1.000000 10.000000
query II
select argmin(a,b), argmax(a,b) from args group by a%2 ORDER BY argmin(a,b);
----
1 1
2 10
# Avoid lossy promotion from hugeint to double
statement ok
CREATE TABLE hugeints (z HUGEINT);
statement ok
insert into hugeints values
(-168123123123200005565479978461862821890),
(-168123123123200005565479978461862821889),
(-168123123123200005565479978461862821888),
(-168123123123200005565479978461862821893)
query I
SELECT min(z) - arg_min(z,z) FROM hugeints;
----
0
statement ok
CREATE TABLE blobs (b BYTEA, a BIGINT);
statement ok
INSERT INTO blobs VALUES('\xaa\xff\xaa',5), ('\xAA\xFF\xAA\xAA\xFF\xAA',30), ('\xAA\xFF\xAA\xAA\xFF\xAA\xAA\xFF\xAA',20)
query II
select argmin(b,a), argmax(b,a) from blobs ;
----
\xAA\xFF\xAA \xAA\xFF\xAA\xAA\xFF\xAA
query II
select argmin(a,b), argmax(a,b) from blobs;
----
5 20
# Window Function
query I rowsort
select argmin(a,b) over ( partition by a%2) from args;
----
1
2
2
2
query I rowsort
select argmax(a,b) over ( partition by a%2) from args;
----
1
10
10
10
statement ok
create table names (name string, salary integer)
statement ok
insert into names values ('Pedro',10), ('Hannes',20), ('Mark',15), ('Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff',30)
query II
select argmin(name,salary),argmax(name,salary) from names;
----
Pedro Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff
query II
select argmin(salary,name),argmax(salary,name) from names;
----
20 10
# test min_by max_by alias
query II
select min_by(name,salary),max_by(name,salary) from names;
----
Pedro Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff
# test arg_min arg_max alias
query II
select arg_min(name,salary),arg_max(name,salary) from names;
----
Pedro Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff
statement ok
drop table names;
statement ok
CREATE OR REPLACE TABLE employees(
employee_id NUMERIC,
department_id NUMERIC,
salary NUMERIC);
statement ok
INSERT INTO employees VALUES
(1001, 10, 10000),
(1020, 10, 9000),
(1030, 10, 8000),
(900, 20, 15000),
(2000, 20, NULL),
(2010, 20, 15000),
(2020, 20, 8000);
foreach casting true false
statement ok
SET old_implicit_casting=${casting};
query I
SELECT MAX_BY(employee_id, salary) as employee_with_biggest_salary
FROM employees;
----
900
query I
SELECT MIN_BY(employee_id, salary) as employee_with_least_salary
FROM employees;
----
1030
endloop
query I
SELECT max_by(c0, c1) FROM (values (1, null)) t(c0,c1);
----
NULL
statement ok
create table names (first_name string, last_name string)
statement ok
insert into names values ('PedroPedroPedroPedroPedro','HolandaHolandaHolandaHolandaHolanda'), ('HannesHannesHannesHannesHannesHannes','MuhleisenMuhleisenMuhleisenMuhleisenMuhleisenMuhleisen'), ('MarkMarkMark','RaasveldtRaasveldtRaasveldtRaasveldtRaasveldtRaasveldt'), ('Hubert-BlaineHubert-BlaineHubert-BlaineHubert-Blaine','WolfeschlegelsteinhausenbergerdorffWolfeschlegelsteinhausenbergerdorffWolfeschlegelsteinhausenbergerdorff')
query II
select arg_min(first_name,last_name),arg_max(first_name,last_name) from names;
----
PedroPedroPedroPedroPedro Hubert-BlaineHubert-BlaineHubert-BlaineHubert-Blaine

View File

@@ -0,0 +1,146 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max_nested.test_slow
# description: Test arg_min/arg_max with nested types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
set seed 0.8675309
statement ok
CREATE TABLE tbl(
"DATE" DATE,
"TIMESTAMP" TIMESTAMP,
"INTEGER" INTEGER,
"BIGINT" BIGINT,
"DOUBLE" DOUBLE,
"VARCHAR" VARCHAR);
# fill up the table with some random gunk in the middle
statement ok
INSERT INTO tbl
SELECT
DATE '1992-01-02' + INTERVAL ((RANDOM() * 300)::INT) DAYS d,
TIMESTAMP '1992-01-02 23:20:11' + INTERVAL ((RANDOM() * 300)::INT) DAYS + INTERVAL ((RANDOM() * 60 * 60)::INT) SECONDS ts,
50 + (RANDOM() * 6000)::INT i,
10 + (RANDOM() * 899999999)::BIGINT bi,
1 + RANDOM() * 99 dbl,
concat(chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), repeat(chr(98 + (RANDOM() * 24)::INT), 29)) str
FROM
range(10000);
# insert the min and the max values
statement ok
INSERT INTO tbl VALUES (
DATE '1992-01-01',
TIMESTAMP '1992-01-01 23:20:11',
42,
0,
0.5,
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
)
statement ok
INSERT INTO tbl VALUES (
DATE '1993-01-01',
TIMESTAMP '1993-01-01 23:20:11',
8400,
999999999,
100.5,
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
)
#
# STRUCTs
#
# By VARCHAR
query I
SELECT arg_min({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 's': aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
query I
SELECT arg_max({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}
#
# Lists
#
# By VARCHAR
query I
SELECT arg_min(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
# By Scalar
query I
SELECT arg_min(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
#
# Nested
#
# By VARCHAR
query I
SELECT arg_min({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 's': aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
query I
SELECT arg_max({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}

View File

@@ -0,0 +1,152 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max_null.test
# description: Test arg_min_null and arg_max_null operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
#Corner cases
statement error
select arg_min_null()
----
query I
select arg_min_null(NULL,NULL)
----
NULL
query I
select arg_min_null(1,1)
----
1
statement error
select arg_min_null(*)
----
query I
select arg_min_null(i,i) from range (100) tbl(i);
----
0
query I
select arg_min_null(i,i) from range (100) tbl(i) where 1 = 0;
----
NULL
statement error
select arg_max_null()
----
query I
select arg_max_null(NULL,NULL)
----
NULL
query I
select arg_max_null(1,1)
----
1
statement error
select arg_max_null(*)
----
query I
select arg_max_null(i,i) from range (100) tbl(i);
----
99
query I
select arg_max_null(i,i) from range (100) tbl(i) where 1 = 0;
----
NULL
statement ok
create table args (a integer, b integer)
statement ok
insert into args values (1,1), (2,2), (8,8), (10,10)
query II
select arg_min_null(a,b), arg_max_null(a,b) from args;
----
1.000000 10.000000
query II
select arg_min_null(a,b), arg_max_null(a,b) from args group by a%2 ORDER BY arg_min_null(a,b);
----
1 1
2 10
statement ok
insert into args values (NULL, 0), (NULL, 12)
query II
select arg_min_null(a,b), arg_max_null(a,b) from args;
----
NULL NULL
query II
select arg_min_null(a,b), arg_max_null(a,b) from args group by a%2 ORDER BY arg_min_null(a,b);
----
1 1
2 10
NULL NULL
statement ok
CREATE TABLE blobs (b BYTEA, a BIGINT);
statement ok
INSERT INTO blobs VALUES('\xaa\xff\xaa',5), ('\xAA\xFF\xAA\xAA\xFF\xAA',30), ('\xAA\xFF\xAA\xAA\xFF\xAA\xAA\xFF\xAA',20)
query II
select arg_min_null(b,a), arg_max_null(b,a) from blobs ;
----
\xAA\xFF\xAA \xAA\xFF\xAA\xAA\xFF\xAA
query II
select arg_min_null(a,b), arg_max_null(a,b) from blobs;
----
5 20
# Window Function
query I rowsort
select arg_min_null(a,b) over ( partition by a%2) from args;
----
1
2
2
2
NULL
NULL
query I rowsort
select arg_max_null(a,b) over ( partition by a%2) from args;
----
1
10
10
10
NULL
NULL
statement ok
create table names (name string, salary integer)
statement ok
insert into names values ('Pedro',10), ('Hannes',20), ('Mark',15), ('Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff',30)
query II
select arg_min_null(name,salary),arg_max_null(name,salary) from names;
----
Pedro Hubert-Blaine-Wolfeschlegelsteinhausenbergerdorff
query II
select arg_min_null(salary,name),arg_max_null(salary,name) from names;
----
20 10

View File

@@ -0,0 +1,354 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max_null_nested.test_slow
# description: Test arg_min_null/arg_max_null with nested types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
set seed 0.8675309
statement ok
CREATE TABLE tbl(
"DATE" DATE,
"TIMESTAMP" TIMESTAMP,
"INTEGER" INTEGER,
"BIGINT" BIGINT,
"DOUBLE" DOUBLE,
"VARCHAR" VARCHAR);
# fill up the table with some random gunk in the middle
statement ok
INSERT INTO tbl
SELECT
DATE '1992-01-02' + INTERVAL ((RANDOM() * 300)::INT) DAYS d,
TIMESTAMP '1992-01-02 23:20:11' + INTERVAL ((RANDOM() * 300)::INT) DAYS + INTERVAL ((RANDOM() * 60 * 60)::INT) SECONDS ts,
50 + (RANDOM() * 6000)::INT i,
10 + (RANDOM() * 899999999)::BIGINT bi,
1 + RANDOM() * 99 dbl,
concat(chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), repeat(chr(98 + (RANDOM() * 24)::INT), 29)) str
FROM
range(10000);
# insert the min and the max values
statement ok
INSERT INTO tbl VALUES (
DATE '1992-01-01',
TIMESTAMP '1992-01-01 23:20:11',
42,
0,
0.5,
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
)
statement ok
INSERT INTO tbl VALUES (
DATE '1993-01-01',
TIMESTAMP '1993-01-01 23:20:11',
8400,
999999999,
100.5,
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
)
#
# STRUCTs
#
# By VARCHAR
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 's': aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}
#
# Lists
#
# By VARCHAR
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
# By Scalar
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
#
# Nested
#
# By VARCHAR
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 's': aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}
# Now insert NULLs for min
statement ok
INSERT INTO tbl VALUES (
DATE '1991-01-01',
TIMESTAMP '1991-01-01 23:20:11',
41,
-1,
0.25,
NULL
)
#
# STRUCTs
#
# By VARCHAR
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1991-01-01, 'ts': '1991-01-01 23:20:11', 'i': 41, 'b': -1, 's': NULL}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}
#
# Lists
#
# By VARCHAR
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
# By Scalar
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1991-01-01 00:00:00', '1991-01-02 00:00:00', NULL, '1991-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
#
# Nested
#
# By VARCHAR
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1991-01-01 00:00:00', '1991-01-02 00:00:00', NULL, '1991-01-01 23:20:11'], 'i': 41, 'b': -1, 's': NULL}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 's': zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz}
# And insert NULLs for max
statement ok
INSERT INTO tbl VALUES (
DATE '1994-01-01',
TIMESTAMP '1994-01-01 23:20:11',
9400,
9999999999,
101.5,
NULL
)
#
# STRUCTs
#
# By VARCHAR
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1992-01-01, 'ts': '1992-01-01 23:20:11', 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'d': 1993-01-01, 'ts': '1993-01-01 23:20:11', 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1991-01-01, 'ts': '1991-01-01 23:20:11', 'i': 41, 'b': -1, 's': NULL}
query I
SELECT arg_max_null({d: "DATE", ts: "TIMESTAMP", i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'d': 1994-01-01, 'ts': '1994-01-01 23:20:11', 'i': 9400, 'b': 9999999999, 's': NULL}
#
# Lists
#
# By VARCHAR
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "VARCHAR")
FROM tbl;
----
['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11']
# By Scalar
query I
SELECT arg_min_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1991-01-01 00:00:00', '1991-01-02 00:00:00', NULL, '1991-01-01 23:20:11']
query I
SELECT arg_max_null(["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], "DOUBLE")
FROM tbl;
----
['1994-01-01 00:00:00', '1994-01-02 00:00:00', NULL, '1994-01-01 23:20:11']
#
# Nested
#
# By VARCHAR
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1992-01-01 00:00:00', '1992-01-02 00:00:00', NULL, '1992-01-01 23:20:11'], 'i': 42, 'b': 0, 'f': 0.5}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", f: "DOUBLE"}, "VARCHAR")
FROM tbl;
----
{'l': ['1993-01-01 00:00:00', '1993-01-02 00:00:00', NULL, '1993-01-01 23:20:11'], 'i': 8400, 'b': 999999999, 'f': 100.5}
# By Scalar
query I
SELECT arg_min_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1991-01-01 00:00:00', '1991-01-02 00:00:00', NULL, '1991-01-01 23:20:11'], 'i': 41, 'b': -1, 's': NULL}
query I
SELECT arg_max_null({l: ["DATE", "DATE" + INTERVAL 1 DAY, NULL, "TIMESTAMP"], i: "INTEGER", b: "BIGINT", s: "VARCHAR"}, "DOUBLE")
FROM tbl;
----
{'l': ['1994-01-01 00:00:00', '1994-01-02 00:00:00', NULL, '1994-01-01 23:20:11'], 'i': 9400, 'b': 9999999999, 's': NULL}

View File

@@ -0,0 +1,126 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max_null_strings.test_slow
# description: Test arg_min_null/arg_max_null with strings
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE tbl(
"DATE" DATE,
"TIMESTAMP" TIMESTAMP,
"INTEGER" INTEGER,
"BIGINT" BIGINT,
"DOUBLE" DOUBLE,
"VARCHAR" VARCHAR
);
# fill up the table with some random gunk in the middle
statement ok
INSERT INTO tbl
SELECT
DATE '1992-01-02' + INTERVAL ((RANDOM() * 300)::INT) DAYS d,
TIMESTAMP '1992-01-02 23:20:11' + INTERVAL ((RANDOM() * 300)::INT) DAYS + INTERVAL ((RANDOM() * 60 * 60)::INT) SECONDS ts,
50 + (RANDOM() * 6000)::INT i,
10 + (RANDOM() * 899999999)::BIGINT bi,
1 + RANDOM() * 99 dbl,
concat(chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), repeat(chr(98 + (RANDOM() * 24)::INT), 29)) str
FROM
range(10000);
# insert non-NULL min and max values
statement ok
INSERT INTO tbl VALUES (
DATE '1992-01-01',
TIMESTAMP '1992-01-01 23:20:11',
42,
0,
0.5,
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
)
statement ok
INSERT INTO tbl VALUES (
DATE '1993-01-01',
TIMESTAMP '1993-01-01 23:20:11',
8400,
999999999,
100.5,
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
)
foreach type DATE TIMESTAMP INTEGER BIGINT VARCHAR
query I
SELECT arg_min_null("VARCHAR", "${type}") FROM tbl
----
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
query I
SELECT arg_min_null("${type}", "VARCHAR") FROM tbl EXCEPT SELECT MIN("${type}") FROM tbl
----
query I
SELECT arg_max_null("VARCHAR", "${type}") FROM tbl
----
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
query I
SELECT arg_max_null("${type}", "VARCHAR") FROM tbl EXCEPT SELECT MAX("${type}") FROM tbl
----
endloop
# Now insert NULLs for min
statement ok
INSERT INTO tbl VALUES (
DATE '1991-01-01',
TIMESTAMP '1991-01-01 23:20:11',
41,
-1,
0.25,
NULL
)
foreach type DATE TIMESTAMP INTEGER BIGINT
query I
SELECT arg_min_null("VARCHAR", "${type}") FROM tbl
----
NULL
query I
SELECT arg_max_null("VARCHAR", "${type}") FROM tbl
----
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
endloop
# And insert NULLs for max
statement ok
INSERT INTO tbl VALUES (
DATE '1994-01-01',
TIMESTAMP '1994-01-01 23:20:11',
9400,
9999999999,
101.5,
NULL
)
foreach type DATE TIMESTAMP INTEGER BIGINT
query I
SELECT arg_min_null("VARCHAR", "${type}") FROM tbl
----
NULL
query I
SELECT arg_max_null("VARCHAR", "${type}") FROM tbl
----
NULL
endloop

View File

@@ -0,0 +1,76 @@
# name: test/sql/aggregate/aggregates/test_arg_min_max_strings.test_slow
# description: Test arg_min/arg_max with strings
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE tbl(
"DATE" DATE,
"TIMESTAMP" TIMESTAMP,
"INTEGER" INTEGER,
"BIGINT" BIGINT,
"DOUBLE" DOUBLE,
"VARCHAR" VARCHAR);
# fill up the table with some random gunk in the middle
statement ok
INSERT INTO tbl
SELECT
DATE '1992-01-02' + INTERVAL ((RANDOM() * 300)::INT) DAYS d,
TIMESTAMP '1992-01-02 23:20:11' + INTERVAL ((RANDOM() * 300)::INT) DAYS + INTERVAL ((RANDOM() * 60 * 60)::INT) SECONDS ts,
50 + (RANDOM() * 6000)::INT i,
10 + (RANDOM() * 899999999)::BIGINT bi,
1 + RANDOM() * 99 dbl,
concat(chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), chr(98 + (RANDOM() * 24)::INT), repeat(chr(98 + (RANDOM() * 24)::INT), 29)) str
FROM
range(10000);
# insert the min and the max values
statement ok
INSERT INTO tbl VALUES (
DATE '1992-01-01',
TIMESTAMP '1992-01-01 23:20:11',
42,
0,
0.5,
'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
)
statement ok
INSERT INTO tbl VALUES (
DATE '1993-01-01',
TIMESTAMP '1993-01-01 23:20:11',
8400,
999999999,
100.5,
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
)
foreach type DATE TIMESTAMP INTEGER BIGINT VARCHAR
query I
SELECT arg_min("VARCHAR", "${type}") FROM tbl
----
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
query I
SELECT arg_min("${type}", "VARCHAR") FROM tbl EXCEPT SELECT MIN("${type}") FROM tbl
----
query I
SELECT arg_max("VARCHAR", "${type}") FROM tbl
----
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
query I
SELECT arg_max("${type}", "VARCHAR") FROM tbl EXCEPT SELECT MAX("${type}") FROM tbl
----
endloop

View File

@@ -0,0 +1,179 @@
# name: test/sql/aggregate/aggregates/test_avg.test
# description: Test AVG operator
# group: [aggregates]
# scalar average
query RR
SELECT AVG(3), AVG(NULL)
----
3
NULL
query RR
SELECT AVG(3::SMALLINT), AVG(NULL::SMALLINT)
----
3
NULL
query RR
SELECT AVG(3::DOUBLE), AVG(NULL::DOUBLE)
----
3
NULL
# test average on sequence
statement ok
CREATE SEQUENCE seq;
query R
SELECT AVG(nextval('seq'))
----
1
query R
SELECT AVG(nextval('seq'))
----
2
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (3)
query RRRR
SELECT AVG(i), AVG(1), AVG(DISTINCT i), AVG(NULL) FROM integers
----
2
1
2
NULL
query R
SELECT AVG(i) FROM integers WHERE i > 100
----
NULL
# Intervals
statement ok
CREATE TABLE intervals(itvl INTERVAL)
statement ok
INSERT INTO intervals VALUES
('1 day'),
('30 days'),
('30 days'),
('30 days'),
('30 days')
query II
SELECT AVG(itvl), AVG(DISTINCT itvl) FROM intervals
----
24 days 04:48:00 15 days 12:00:00
# The only test of AVG(INTERVAL) in the PG test suite...
statement ok
CREATE TABLE interval_tbl (f1 interval);
statement ok
INSERT INTO interval_tbl (f1) VALUES
('@ 1 minute'),
('@ 5 hour'),
('@ 10 day'),
('@ 34 year'),
('@ 3 months'),
('@ 14 seconds ago'),
('1 day 2 hours 3 minutes 4 seconds'),
('6 years'),
('5 months'),
('5 months 12 hours');
query I
select avg(f1) from interval_tbl;
----
4 years 1 month 10 days 04:18:23
# invalid use of average
statement error
SELECT AVG()
----
statement error
SELECT AVG(1, 2, 3)
----
statement error
SELECT AVG(AVG(1))
----
# empty average
statement ok
CREATE TABLE vals(i INTEGER, j DOUBLE, k HUGEINT);
statement ok
INSERT INTO vals VALUES (NULL, NULL, NULL)
query III
SELECT AVG(i), AVG(j), AVG(k) FROM vals;
----
NULL NULL NULL
# Temporal values
statement ok
CREATE OR REPLACE TABLE timestamps AS
SELECT range AS ts
FROM range('2024-11-01'::DATE, '2024-12-01'::DATE, INTERVAL 1 DAY)
query I
SELECT AVG(ts::DATE)
FROM timestamps
----
2024-11-15 12:00:00
query I
SELECT AVG(ts)
FROM timestamps
----
2024-11-15 12:00:00
query I
SELECT AVG(ts::TIMESTAMPTZ)
FROM timestamps
----
2024-11-15 12:00:00+00
statement ok
CREATE OR REPLACE TABLE times AS
SELECT range AS ts
FROM range('2024-11-01'::DATE, '2024-11-02'::DATE, INTERVAL 7 MINUTES)
query I
SELECT AVG(ts::TIME)
FROM times
----
11:57:30
# TIMETZ - just average the normalised times
statement ok
CREATE TABLE timetzs (ttz TIMETZ);
statement ok
INSERT INTO timetzs VALUES
(NULL),
('00:00:00+1559'),
('00:00:00+1558'),
('02:30:00'),
('02:30:00+04'),
('02:30:00+04:30'),
('02:30:00+04:30:45'),
('16:15:03.123456'),
('02:30:00+1200'),
('02:30:00-1200'),
('24:00:00-1558'),
('24:00:00-1559'),
;
query I
SELECT AVG(ttz) FROM timetzs;
----
14:44:56.193041+00

View File

@@ -0,0 +1,17 @@
# name: test/sql/aggregate/aggregates/test_bigint_avg.test
# description: Test AVG on integers with no exact float64 representation
# group: [aggregates]
statement ok
CREATE TABLE bigints(n HUGEINT);
statement ok
INSERT INTO bigints (n) VALUES ('9007199254740992'::HUGEINT), (1::HUGEINT), (0::HUGEINT);
# this would give the wrong result with 'double' precision
require longdouble
query R
SELECT AVG(n)::DOUBLE - '3002399751580331'::DOUBLE FROM bigints;
----
0

View File

@@ -0,0 +1,133 @@
# name: test/sql/aggregate/aggregates/test_binned_histogram.test
# description: Test binned histograms
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE obs(n BIGINT);
statement ok
INSERT INTO obs VALUES (0), (5), (7), (12), (20), (23), (24), (25), (26), (28), (31), (34), (36), (41), (47)
query I
SELECT histogram(n, [10, 20, 30, 40, 50]) FROM obs
----
{10=3, 20=2, 30=5, 40=3, 50=2}
# other values are placed into the other bin
query I
SELECT histogram(n, [10, 20, 30, 40]) FROM obs
----
{10=3, 20=2, 30=5, 40=3, 9223372036854775807=2}
query I
SELECT histogram(n::double, [10, 20, 30, 40]) FROM obs
----
{10.0=3, 20.0=2, 30.0=5, 40.0=3, inf=2}
# empty bins
query I
SELECT histogram(n, []) FROM obs
----
{9223372036854775807=15}
# bounds that are not sorted
query I
SELECT histogram(n, [10, 40, 50, 30, 20]) FROM obs
----
{10=3, 20=2, 30=5, 40=3, 50=2}
# grouped aggregation
# uneven: 5, 7, 23, 25, 31, 41, 47
# even: 0, 12, 20, 24, 26, 28, 34, 36
query II
SELECT n%2=0 is_even, histogram(n, [10, 20, 30, 40, 50]) FROM obs GROUP BY is_even ORDER BY is_even
----
false {10=2, 20=0, 30=2, 40=1, 50=2}
true {10=1, 20=2, 30=3, 40=2, 50=0}
# different bounds per group
query II
SELECT n%2=0 is_even, histogram(n, case when n%2=0 then [10, 20, 30, 40, 50] else [11, 21, 31, 41, 51] end) FROM obs GROUP BY is_even ORDER BY is_even
----
0 {11=2, 21=0, 31=3, 41=1, 51=1}
1 {10=1, 20=2, 30=3, 40=2, 50=0}
# values bigger than the max bin are ignored
query I
SELECT histogram(n, [10, 20, 30, 40, 50]) FROM obs
----
{10=3, 20=2, 30=5, 40=3, 50=2}
# larger bins
query I
SELECT histogram(i, range(999, 10000, 1000)) FROM range(10000) t(i)
----
{999=1000, 1999=1000, 2999=1000, 3999=1000, 4999=1000, 5999=1000, 6999=1000, 7999=1000, 8999=1000, 9999=1000}
# extreme values
query I
SELECT histogram(v, [-9223372036854775808, -9223372036854775807, 9223372036854775807]) FROM
(VALUES (-9223372036854775808), (-9223372036854775807), (0), (9223372036854775807)) t(v)
----
{-9223372036854775808=1, -9223372036854775807=1, 9223372036854775807=2}
# extreme doubles/negative values
query I
SELECT histogram(v, ['-infinity'::double, -10, 0, 10, 'infinity']) FROM
(VALUES (-1e308), (-0.5), (0), ('inf'), ('-inf'), (0.5)) t(v)
----
{-inf=1, -10.0=1, 0.0=2, 10.0=1, inf=1}
# timestamps
query I
SELECT histogram(v, range(timestamp '2000-01-01', timestamp '2005-01-01', interval '1 year')) FROM
(VALUES (timestamp '2000-01-01'), (timestamp '2003-01-01')) t(v)
----
{'2000-01-01 00:00:00'=1, '2001-01-01 00:00:00'=0, '2002-01-01 00:00:00'=0, '2003-01-01 00:00:00'=1, '2004-01-01 00:00:00'=0}
# strings
query I
SELECT histogram(v, ['a', 'b', 'c', 'z']) FROM
(VALUES ('a'), ('aaaa'), ('b'), ('c'), ('d')) t(v)
----
{a=1, b=2, c=1, z=1}
# non-inlined strings
query I
SELECT histogram(concat('thisisalongprefix_', v), ['thisisalongprefix_'||x for x in ['a', 'b', 'c', 'z']]) FROM
(VALUES ('a'), ('aaaa'), ('b'), ('c'), ('d')) t(v)
----
{thisisalongprefix_a=1, thisisalongprefix_b=2, thisisalongprefix_c=1, thisisalongprefix_z=1}
# structs
query I
SELECT histogram({'i': n}, [{'i': x} for x in [10, 20, 30, 40, 50]]) FROM obs
----
{{'i': 10}=3, {'i': 20}=2, {'i': 30}=5, {'i': 40}=3, {'i': 50}=2}
# lists
query I
SELECT histogram([n], [[x] for x in [10, 20, 30, 40, 50]]) FROM obs
----
{[10]=3, [20]=2, [30]=5, [40]=3, [50]=2}
# duplicate bounds in bins
query I
SELECT histogram(n, [10, 10, 10, 10]) FROM obs
----
{10=3}
# null WITHIN bins
statement error
SELECT histogram(n, [10, 20, NULL]) FROM obs
----
Histogram bin entry cannot be NULL
# NULL bins
statement error
SELECT histogram(n, NULL::BIGINT[]) FROM obs
----
Histogram bin list cannot be NULL

View File

@@ -0,0 +1,80 @@
# name: test/sql/aggregate/aggregates/test_bit_and.test
# description: Test BIT_AND operator
# group: [aggregates]
# test on scalar values
query II
SELECT BIT_AND(3), BIT_AND(NULL)
----
3
NULL
# test on a sequence
statement ok
CREATE SEQUENCE seq;
query I
SELECT BIT_AND(nextval('seq'))
----
1
query I
SELECT BIT_AND(nextval('seq'))
----
2
# test on a set of integers
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (3), (7), (15), (31), (3), (15)
query IIII
SELECT BIT_AND(i), BIT_AND(1), BIT_AND(DISTINCT i), BIT_AND(NULL) FROM integers
----
3
1
3
NULL
# test on an empty set
query I
SELECT BIT_AND(i) FROM integers WHERE i > 100
----
NULL
# test incorrect usage
statement error
SELECT BIT_AND()
----
statement error
SELECT BIT_AND(1, 2, 3)
----
statement error
SELECT BIT_AND(BIT_AND(1))
----
#test on bit types
statement ok
CREATE TABLE bits(b BIT);
statement ok
INSERT INTO bits VALUES ('1110101011'), ('0111010101'), ('0101011101'), ('1111111111'), ('0100010011'), ('1100110011')
query I
SELECT BIT_AND(b) FROM bits
----
0100000001
query I
SELECT BIT_AND(b) FROM bits WHERE get_bit(b, 2) = 1;
----
0110000001
query I
SELECT BIT_AND('010110'::BIT)
----
010110

View File

@@ -0,0 +1,80 @@
# name: test/sql/aggregate/aggregates/test_bit_or.test
# description: Test BIT_OR operator
# group: [aggregates]
# test on scalar values
query II
SELECT BIT_OR(3), BIT_OR(NULL)
----
3
NULL
# test on a sequence
statement ok
CREATE SEQUENCE seq;
query I
SELECT BIT_OR(nextval('seq'))
----
1
query I
SELECT BIT_OR(nextval('seq'))
----
2
# test on a set of integers
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (3), (7), (15), (31), (3), (15)
query IIII
SELECT BIT_OR(i), BIT_OR(1), BIT_OR(DISTINCT i), BIT_OR(NULL) FROM integers
----
31
1
31
NULL
# # test on an empty set
query I
SELECT BIT_OR(i) FROM integers WHERE i > 100
----
NULL
# test incorrect usage
statement error
SELECT BIT_OR()
----
statement error
SELECT BIT_OR(1, 2, 3)
----
statement error
SELECT BIT_OR(BIT_AND(1))
----
#test on bit types
statement ok
CREATE TABLE bits(b BIT);
statement ok
INSERT INTO bits VALUES ('1010101001'), ('0011010101'), ('0001011101'), ('1011111101'), ('0000010001'), ('1000110001')
query I
SELECT BIT_OR(b) FROM bits
----
1011111101
query I
SELECT BIT_OR(b) FROM bits WHERE get_bit(b, 3) = 0;
----
1010111001
query I
SELECT BIT_OR('111010'::BIT)
----
111010

View File

@@ -0,0 +1,87 @@
# name: test/sql/aggregate/aggregates/test_bit_xor.test
# description: Test BIT_XOR operator
# group: [aggregates]
# test on scalar values
query II
SELECT BIT_XOR(3), BIT_XOR(NULL)
----
3
NULL
# test on a sequence
statement ok
CREATE SEQUENCE seq;
query I
SELECT BIT_XOR(nextval('seq'))
----
1
query I
SELECT BIT_XOR(nextval('seq'))
----
2
# test on a set of integers
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (3), (7), (15), (31), (3), (15)
query IIII
SELECT BIT_XOR(i), BIT_XOR(1), BIT_XOR(DISTINCT i), BIT_XOR(NULL) FROM integers
----
24
0
20
NULL
# test on an empty set
query I
SELECT BIT_XOR(i) FROM integers WHERE i > 100
----
NULL
# test incorrect usage
statement error
SELECT BIT_XOR()
----
statement error
SELECT BIT_XOR(1, 2, 3)
----
statement error
SELECT BIT_XOR(BIT_XOR(1))
----
#test on bit types
statement ok
CREATE TABLE bits(b BIT);
statement ok
INSERT INTO bits VALUES ('1010101001'), ('0011010101'), ('0001011101'), ('1011111101'), ('0000010001'), ('1000110001')
query I
SELECT BIT_XOR(b) FROM bits
----
1011111100
query I
SELECT BIT_XOR(b) FROM bits WHERE get_bit(b, 3) = 1;
----
1001110101
query I
SELECT BIT_XOR('101011'::BIT)
----
101011
query I
SELECT BIT_XOR('0010101010101010101101011'::BIT) from bits
----
0000000000000000000000000

View File

@@ -0,0 +1,247 @@
# name: test/sql/aggregate/aggregates/test_bitstring_agg.test
# description: Test BITSTRING_AGG operator
# group: [aggregates]
statement ok
PRAGMA verify_external
# test tinyints
statement ok
CREATE TABLE tinyints(i TINYINT)
statement ok
INSERT INTO tinyints VALUES(1), (8), (3), (12), (7), (1), (2), (8)
query I
SELECT BITSTRING_AGG(i) FROM tinyints
----
111000110001
query I
SELECT bit_count(BITSTRING_AGG(i)) FROM tinyints WHERE i <= 7
----
4
# test smallints
statement ok
CREATE TABLE smallints(i SMALLINT)
statement ok
INSERT INTO smallints VALUES(1), (8), (-3), (12), (7), (1), (-1), (-9), (NULL), (-2), (8)
query I
SELECT BITSTRING_AGG(i) FROM smallints
----
1000001110100000110001
query I
SELECT bit_count(BITSTRING_AGG(i)) FROM smallints WHERE i = 8
----
1
# test integers
statement ok
CREATE TABLE ints(i INTEGER);
statement ok
INSERT INTO ints VALUES(10), (-5), (11), (NULL), (30), (11), (23), (17), (27), (15), (5), (14)
query I
SELECT BITSTRING_AGG(i) FROM ints
----
100000000010000110011010000010001001
query I
SELECT bit_count(BITSTRING_AGG(i)) FROM ints WHERE i > 20 AND i < 28
----
2
# test bigints
statement ok
CREATE TABLE bigints(i BIGINT);
statement ok
INSERT INTO bigints VALUES(2378097), (2378100), (2378095), (2378104), (NULL), (2378113), (2378100), (2378095), (2378105), (2378097)
query I
SELECT BITSTRING_AGG(i) FROM bigints
----
1010010001100000001
query I
SELECT bit_count(BITSTRING_AGG(i)) FROM bigints WHERE i = 100
----
NULL
# test hugeints
statement ok
CREATE TABLE hugeints(i HUGEINT);
statement ok
INSERT INTO hugeints VALUES(12243372036854775807), (12243372036854778191), (12243372036854730332), (12243372036854773737), (12243372036854737711), (12243372036854722124), (12243372036854778191)
query I
SELECT bit_length(BITSTRING_AGG(i)) FROM hugeints
----
56068
# test uhugeints
statement ok
CREATE TABLE uhugeints(i UHUGEINT);
statement ok
INSERT INTO uhugeints VALUES(12243372036854775807), (12243372036854778191), (12243372036854730332), (12243372036854773737), (12243372036854737711), (12243372036854722124), (12243372036854778191)
query I
SELECT bit_length(BITSTRING_AGG(i)) FROM uhugeints
----
56068
# bit_count of BITSTRING_AGG should give same results as DISTINCT COUNT
query I nosort distinct_small
SELECT bit_count(BITSTRING_AGG(i)) FROM smallints
----
query I nosort distinct_small
SELECT COUNT(DISTINCT i) FROM smallints
----
query I nosort distinct_ints
SELECT bit_count(BITSTRING_AGG(i)) FROM ints
----
query I nosort distinct_ints
SELECT COUNT(DISTINCT i) FROM ints
----
query I nosort distinct_bigints
SELECT bit_count(BITSTRING_AGG(i)) FROM bigints
----
query I nosort distinct_bigints
SELECT COUNT(DISTINCT i) FROM bigints
----
query I nosort distinct_hugeints
SELECT bit_count(BITSTRING_AGG(i)) FROM hugeints
----
query I nosort distinct_hugeints
SELECT COUNT(DISTINCT i) FROM hugeints
----
# Overload that takes min and max as function arguments
query I
SELECT BITSTRING_AGG(i, -5, 30) FROM ints
----
100000000010000110011010000010001001
query I
SELECT BITSTRING_AGG(i, -10, 40) FROM ints
----
000001000000000100001100110100000100010010000000000
statement error
SELECT BITSTRING_AGG(i, -10, 20) FROM ints
----
Out of Range Error: Value 30 is outside of provided min and max range (-10 <-> 20)
query I
SELECT BITSTRING_AGG(i, 0, 15) FROM tinyints
----
0111000110001000
statement error
SELECT BITSTRING_AGG(i, 2, 15) FROM tinyints
----
Out of Range Error: Value 1 is outside of provided min and max range (2 <-> 15)
query I
SELECT BITSTRING_AGG(i, 2378080, 2378150) FROM bigints
----
00000000000000010100100011000000010000000000000000000000000000000000000
# test on scalar values
query I
SELECT BITSTRING_AGG(3)
----
1
query I
SELECT BITSTRING_AGG(2, 0, 5)
----
001000
# test with NULL
statement ok
CREATE TABLE null_table(i INT);
statement ok
INSERT INTO null_table VALUES(NULL)
query I
SELECT BITSTRING_AGG(i) FROM null_table
----
NULL
statement ok
INSERT INTO null_table VALUES(6), (NULL), (NULL), (NULL), (NULL)
query I
SELECT BITSTRING_AGG(i) FROM null_table
----
1
# test on csv files - csvs do not provide column statistics
statement ok
COPY (SELECT i FROM ints) TO '__TEST_DIR__/bitstring_agg.csv' (HEADER 0);
statement error
SELECT BITSTRING_AGG(column0) FROM '__TEST_DIR__/bitstring_agg.csv';
----
Binder Error: Could not retrieve required statistics. Alternatively, try by providing the statistics explicitly: BITSTRING_AGG(col, min, max)
query I
SELECT BITSTRING_AGG(column0, -10, 40) FROM '__TEST_DIR__/bitstring_agg.csv';
----
000001000000000100001100110100000100010010000000000
# test incorrect usage
statement error
SELECT BITSTRING_AGG()
----
statement error
SELECT BITSTRING_AGG(1, 3, 4, 8, 0)
----
# group by
statement ok
CREATE TABLE groups(i INT, g VARCHAR);
statement ok
INSERT INTO groups VALUES(10, 'a'), (13, 'b'), (9, 'a'), (16, 'c'), (NULL, 'd'), (2, 'a'), (6, 'c'), (9, 'b')
query II
SELECT g, BITSTRING_AGG(i) FROM groups GROUP BY g ORDER BY g
----
a 100000011000000
b 000000010001000
c 000010000000001
d NULL
# disable optimizer
statement ok
PRAGMA disable_optimizer
statement error
SELECT BITSTRING_AGG(i) FROM ints
----
Binder Error: Could not retrieve required statistics. Alternatively, try by providing the statistics explicitly: BITSTRING_AGG(col, min, max)
query I
SELECT BITSTRING_AGG(i, -5, 32) FROM ints
----
10000000001000011001101000001000100100

View File

@@ -0,0 +1,103 @@
# name: test/sql/aggregate/aggregates/test_bool.test
# description: Test Bool operator
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
# Corner Cases
statement error
select bool_or(0)
----
statement error
select bool_and(0)
----
query I
select bool_or(NULL)
----
NULL
query I
select bool_and(NULL)
----
NULL
statement error
select bool_or()
----
statement error
select bool_and()
----
statement error
select bool_or(*)
----
statement error
select bool_and(*)
----
query I
SELECT bool_or(True) FROM range(100);
----
1
query I
SELECT bool_and(True) FROM range(100);
----
1
query I
SELECT bool_or(True) FROM range(100) tbl(i) WHERE 1=0;
----
NULL
query I
SELECT bool_and(True) FROM range(100) tbl(i) WHERE 1=0;
----
NULL
statement ok
create table t (d date)
statement ok
insert into t values (DATE'2021-02-09'-1),(DATE'2021-02-09'+1),(NULL)
query II
select bool_or(d > '2021-02-09') AS or_result,
bool_and(d > '2021-02-09') AS and_result
from t;
----
1 0
query III
select d,bool_or(d > '2021-02-09') AS or_result,
bool_and(d > '2021-02-09') AS and_result
from t
group by d
order by d;
----
NULL NULL NULL
2021-02-08 0 0
2021-02-10 1 1
# Window Function
query I
select bool_or(d > '2021-02-09') over (partition by d)
from t order by d;
----
NULL
0
1
query I
select bool_and(d > '2021-02-09') over (partition by d)
from t order by d;
----
NULL
0
1

View File

@@ -0,0 +1,61 @@
# name: test/sql/aggregate/aggregates/test_corr.test
# description: Test CORR operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
#Corner cases
statement error
select corr()
----
query I
select corr(NULL,NULL)
----
NULL
query I
select corr(1,1)
----
NAN
statement error
select corr(*)
----
statement ok
create table aggr(k int, v decimal(10,2), v2 decimal(10, 2));
statement ok
insert into aggr values(1, 10, null),(2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35);
query II
select k, corr(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 0.9988445981
query I
select corr(v, v2) from aggr
----
0.9988445981
# Window Function
query I rowsort
select corr(v, v2) over (partition by k)
from aggr;
----
0.998845
0.998845
0.998845
0.998845
NULL
statement error
SELECT corr(a,b) FROM (values (1e301, 0), (-1e301, 0)) tbl(a,b);
----
statement error
SELECT corr(b,a) FROM (values (1e301, 0), (-1e301, 0)) tbl(a,b);
----

View File

@@ -0,0 +1,45 @@
# name: test/sql/aggregate/aggregates/test_count.test
# description: Test COUNT operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
# test counts on scalar values
query IIIII
SELECT COUNT(*), COUNT(1), COUNT(100), COUNT(NULL), COUNT(DISTINCT 1)
----
1
1
1
0
1
# test counts on a set of values
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (NULL)
query IIIIII
SELECT COUNT(*), COUNT(1), COUNT(i), COUNT(COALESCE(i, 1)), COUNT(DISTINCT i), COUNT(DISTINCT 1) FROM integers
----
3
3
2
3
2
1
# ordered aggregates are not supported
query I
SELECT COUNT(1 ORDER BY 1)
----
1
# cannot do DISTINCT *
statement error
SELECT COUNT(DISTINCT *) FROM integers
----
Binder Error: STAR expression is only allowed as the root element

View File

@@ -0,0 +1,62 @@
# name: test/sql/aggregate/aggregates/test_count_all_types.test
# description: Test COUNT operator with different vector types
# group: [aggregates]
statement ok
PRAGMA enable_verification
foreach flatten false true
query I nosort count_int
SELECT COUNT(n) FROM test_vector_types(NULL::INT, all_flat=${flatten}) t(n);
----
query I nosort count_int_list
SELECT list_aggr(n, 'count') FROM test_vector_types(NULL::INT[], all_flat=${flatten}) t(n);
----
query I nosort count_varchar
SELECT COUNT(n) FROM test_vector_types(NULL::VARCHAR, all_flat=${flatten}) t(n);
----
query I nosort count_distinct_int
SELECT COUNT(DISTINCT n) FROM test_vector_types(NULL::INT, all_flat=${flatten}) t(n);
----
query I nosort count_distinct_varchar
SELECT COUNT(DISTINCT n) FROM test_vector_types(NULL::VARCHAR, all_flat=${flatten}) t(n);
----
query I nosort count_int_grouped
SELECT n, COUNT(n) FROM test_vector_types(NULL::INT, all_flat=${flatten}) t(n) GROUP BY n ORDER BY ALL;
----
endloop
statement ok
CREATE TABLE int(i INT);
statement ok
INSERT INTO int FROM range(128);
INSERT INTO int SELECT NULL FROM range(128);
INSERT INTO int FROM range(77);
INSERT INTO int SELECT NULL FROM range(61);
INSERT INTO int FROM range(88);
INSERT INTO int SELECT NULL FROM range(33);
INSERT INTO int FROM range(44);
INSERT INTO int SELECT NULL FROM range(11);
INSERT INTO int FROM range(13);
INSERT INTO int SELECT NULL FROM range(27);
query II
SELECT COUNT(i), COUNT(rowid) FROM int
----
350 610
query III rowsort
SELECT rowid // 200 AS g, COUNT(i), COUNT(rowid) FROM int GROUP BY g
----
0 128 200
1 83 200
2 139 200
3 0 10

View File

@@ -0,0 +1,22 @@
# name: test/sql/aggregate/aggregates/test_count_star.test
# description: Aggregate only COUNT STAR
# group: [aggregates]
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER);
statement ok
INSERT INTO integers VALUES (3, 4), (3, 4), (2, 4);
query II
SELECT i, COUNT(*) FROM integers GROUP BY i ORDER BY i
----
2 1
3 2
# test COUNT without the *
query II
SELECT i, COUNT() FROM integers GROUP BY i ORDER BY i
----
2 1
3 2

View File

@@ -0,0 +1,104 @@
# name: test/sql/aggregate/aggregates/test_covar.test
# description: Test COVAR operators
# group: [aggregates]
# test incorrect usage of COVAR_POP function
statement error
SELECT COVAR_POP()
----
statement error
SELECT COVAR_POP(1, 2, 3)
----
statement error
SELECT COVAR_POP(COVAR_POP(1))
----
# test incorrect usage of COVAR_SAMP function
statement error
SELECT COVAR_SAMP()
----
statement error
SELECT COVAR_SAMP(1, 2, 3)
----
statement error
SELECT COVAR_SAMP(COVAR_SAMP(1))
----
# test population covariance on scalar values
query RRRR
SELECT COVAR_POP(3,3), COVAR_POP(NULL,3), COVAR_POP(3,NULL), COVAR_POP(NULL,NULL)
----
0.000000
NULL
NULL
NULL
# test sample covariance on scalar values
query RRRR
SELECT COVAR_SAMP(3,3), COVAR_SAMP(NULL,3), COVAR_SAMP(3,NULL), COVAR_SAMP(NULL,NULL)
----
NULL
NULL
NULL
NULL
# test population covariance on a sequence
statement ok
CREATE SEQUENCE seqx;
statement ok
CREATE SEQUENCE seqy;
query R
SELECT COVAR_POP(nextval('seqx'),nextval('seqy'))
----
0.000000
query R
SELECT COVAR_POP(nextval('seqx'),nextval('seqy'))
----
0.000000
# test population covariance on a set of values
statement ok
CREATE TABLE integers(x INTEGER, y INTEGER);
statement ok
INSERT INTO integers VALUES (10,NULL), (10,11), (20,22), (25,NULL), (30,35)
query RRRRR
SELECT COVAR_POP(x,y), COVAR_POP(x,1), COVAR_POP(1,y), COVAR_POP(x,NULL), COVAR_POP(NULL,y) FROM integers
----
80.000000
0.000000
0.000000
NULL
NULL
query RRRRR
SELECT COVAR_SAMP(x,y), COVAR_SAMP(x,1), COVAR_SAMP(1,y), COVAR_SAMP(x,NULL), COVAR_SAMP(NULL,y) FROM integers
----
120.000000
0.000000
0.000000
NULL
NULL
# test covar on empty set
query RR
SELECT COVAR_POP(x,y), COVAR_SAMP(x,y) FROM integers WHERE x > 100
----
NULL
NULL
# test covar with only null inputs
query RR
SELECT COVAR_POP(NULL, NULL), COVAR_SAMP(NULL, NULL) FROM integers
----
NULL
NULL

View File

@@ -0,0 +1,32 @@
# name: test/sql/aggregate/aggregates/test_empty_aggregate.test
# description: Test aggregate operators on empty set
# group: [aggregates]
statement ok
CREATE TABLE integers(i INTEGER);
query IIRRRIIII
SELECT COUNT(*), COUNT(i), STDDEV_SAMP(i), SUM(i), SUM(DISTINCT i), FIRST(i), LAST(i), MAX(i), MIN(i) FROM integers WHERE i > 100
----
0
0
NULL
NULL
NULL
NULL
NULL
NULL
NULL
statement ok
CREATE TABLE emptyaggr(i INTEGER);
query I
SELECT COUNT(*) FROM emptyaggr
----
0
query RIIIRIIIII
SELECT SUM(i), COUNT(i), COUNT(DISTINCT i), COUNT(*), AVG(i), COUNT(*)+1, COUNT(i)+1, MIN(i), MIN(i+1), MIN(i)+1 FROM emptyaggr
----
NULL 0 0 0 NULL 1 1 NULL NULL NULL

View File

@@ -0,0 +1,116 @@
# name: test/sql/aggregate/aggregates/test_entropy.test
# description: Test entropy function
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
#Corner cases
statement error
select entropy()
----
query I
select entropy(NULL)
----
0
query I
select entropy(1)
----
0
statement error
select entropy(*)
----
statement ok
create table aggr(k int);
statement ok
insert into aggr values (0),(1),(1),(1),(4),(0),(3),(3),(2),(2),(4),(4),(2),(4),(0),(0),(0),(1),(2),(3),(4),(2),(3),(3),(1);
query I
select entropy(k) from aggr ;
----
2.321928
query I
SELECT entropy(2) FROM range(100);
----
0
query I
select entropy(k) from aggr group by k%2 order by all
----
1.000000
1.584963
statement ok
create table names (name string)
statement ok
insert into names values ('pedro'), ('pedro'), ('pedro'),('hannes'),('hannes'),('mark'),(null);
query I
select entropy(name) from names;
----
1.459148
# arrays
statement ok
create table array_names as select case when name is null then null else [name] end l from names
query I
select entropy(l) from array_names;
----
1.459148
# array of structs
statement ok
create table array_of_structs as select case when name is null then null else [{'name': name}] end l from names
query I
select entropy(l) from array_of_structs;
----
1.459148
query I rowsort
select entropy(k) over (partition by k%2)
from aggr;
----
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.000000
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
1.584963
# Empty Table
query I
SELECT entropy(i) FROM range(100) tbl(i) WHERE 1=0;
----
0

View File

@@ -0,0 +1,77 @@
# name: test/sql/aggregate/aggregates/test_first_last_any_ordered.test
# description: Test first/last/any_value and ordered aggregates
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, grp INTEGER);
statement ok
INSERT INTO integers VALUES (1, NULL), (2, 3), (3, 2), (NULL, 1);
query I
SELECT FIRST(i ORDER BY grp NULLS LAST) FROM integers
----
NULL
query I
SELECT FIRST(i ORDER BY grp NULLS FIRST) FROM integers
----
1
query I
SELECT ANY_VALUE(i ORDER BY grp NULLS FIRST) FROM integers
----
1
query I
SELECT ANY_VALUE(i ORDER BY grp NULLS LAST) FROM integers
----
3
query I
SELECT ARG_MIN(i, grp) FROM integers
----
3
query I
SELECT FIRST(i ORDER BY grp DESC NULLS LAST) FROM integers
----
2
query I
SELECT ANY_VALUE(i ORDER BY grp DESC NULLS FIRST) FROM integers
----
1
query I
SELECT ANY_VALUE(i ORDER BY grp DESC NULLS LAST) FROM integers
----
2
query I
SELECT ARG_MAX(i, grp) FROM integers
----
2
query I
SELECT LAST(i ORDER BY grp NULLS FIRST) FROM integers
----
2
query I
SELECT ARG_MAX(i, grp) FROM integers
----
2
query I
SELECT LAST(i ORDER BY grp DESC NULLS FIRST) FROM integers
----
NULL
query I
SELECT ARG_MIN(i, grp) FROM integers
----
3

View File

@@ -0,0 +1,59 @@
# name: test/sql/aggregate/aggregates/test_first_noninlined.test
# description: Test FIRST with non-inlined strings
# group: [aggregates]
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE tbl(a INTEGER, b VARCHAR)
statement ok
INSERT INTO tbl VALUES (1, NULL), (2, 'thisisalongstring'), (3, 'thisisalsoalongstring')
# non-grouped aggregate
query T
SELECT FIRST(b) FROM tbl WHERE a=2
----
thisisalongstring
# Check if arbitraty alias works like FIRST
query T
SELECT ARBITRARY(b) FROM tbl WHERE a=2
----
thisisalongstring
query T
SELECT FIRST(b) FROM tbl WHERE a=1
----
NULL
query T
SELECT FIRST(b) FROM tbl WHERE a=1 GROUP BY a
----
NULL
query T
SELECT FIRST(b) FROM tbl WHERE a=0
----
NULL
query T
SELECT FIRST(b) FROM tbl WHERE a=0 GROUP BY b
----
# grouped aggregate
query IT
SELECT a, FIRST(b) FROM tbl GROUP BY a ORDER BY a
----
1
NULL
2
thisisalongstring
3
thisisalsoalongstring
query I
SELECT FIRST(i) FROM (VALUES (NULL::INT32)) tbl(i)
----
NULL

View File

@@ -0,0 +1,37 @@
# name: test/sql/aggregate/aggregates/test_group_by_many_groups.test_slow
# description: Test GROUP BY with many groups
# group: [aggregates]
statement ok
CREATE TABLE integers AS SELECT i, 1 AS j FROM range(0, 10000, 1) t1(i) UNION ALL SELECT i, 2 j FROM range(0, 10000, 1) t1(i);
query RR
SELECT SUM(i), SUM(sums) FROM (SELECT i, SUM(j) AS sums FROM integers GROUP BY i) tbl1
----
49995000
30000
# Multiple 8-bit bitmasks
query RR
SELECT SUM(a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10), SUM(sums)
FROM (
SELECT
i+0 as a0,
i+1 as a1,
i+2 as a2,
i+3 as a3,
i+4 as a4,
i+5 as a5,
i+6 as a6,
i+7 as a7,
i+8 as a8,
i+9 as a9,
i+10 as a10,
sum(j) as sums
FROM integers
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
) s
----
550495000
30000

View File

@@ -0,0 +1,75 @@
# name: test/sql/aggregate/aggregates/test_group_on_expression.test
# description: Test GROUP BY on expression
# group: [aggregates]
statement ok
CREATE TABLE integer(i INTEGER, j INTEGER);
statement ok
INSERT INTO integer VALUES (3, 4), (3, 5), (3, 7);
# group by on expression
query I
SELECT j * 2 FROM integer GROUP BY j * 2 ORDER BY j * 2;
----
8
10
14
# verify that adding or removing the table name does not impact the validity of the query
query I
SELECT integer.j * 2 FROM integer GROUP BY j * 2 ORDER BY j * 2;
----
8
10
14
query I
SELECT j * 2 FROM integer GROUP BY integer.j * 2 ORDER BY j * 2;
----
8
10
14
query I
SELECT j * 2 FROM integer GROUP BY j * 2 ORDER BY integer.j * 2;
----
8
10
14
query I
SELECT integer.j * 2 FROM integer GROUP BY j * 2 ORDER BY integer.j * 2;
----
8
10
14
query I
SELECT j * 2 FROM integer GROUP BY integer.j * 2 ORDER BY integer.j * 2;
----
8
10
14
query I
SELECT integer.j * 2 FROM integer GROUP BY integer.j * 2 ORDER BY j * 2;
----
8
10
14
query I
SELECT integer.j * 2 FROM integer GROUP BY integer.j * 2 ORDER BY integer.j * 2;
----
8
10
14
query I
SELECT j * 2 AS i FROM integer GROUP BY j * 2 ORDER BY i;
----
8
10
14

View File

@@ -0,0 +1,146 @@
# name: test/sql/aggregate/aggregates/test_histogram.test
# description: Test histogram aggregation
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
query I
select histogram(NULL)
----
NULL
# Empty Table
query I
SELECT histogram(i) FROM range(100) tbl(i) WHERE 1=0;
----
NULL
query I
select histogram(1)
----
{1=1}
# Allow ascii characters in strings
query I
SELECT histogram('')
----
{=1}
query I
SELECT histogram(2) FROM range(100);
----
{2=100}
statement ok
CREATE TABLE hist_data (g INTEGER, e INTEGER)
statement ok
INSERT INTO hist_data VALUES (1, 1), (1, 2), (2, 3), (2, 4), (2, 5), (3, 6), (5, NULL)
query T
SELECT histogram(g) from hist_data
----
{1=2, 2=3, 3=1, 5=1}
query T
SELECT histogram(e) from hist_data
----
{1=1, 2=1, 3=1, 4=1, 5=1, 6=1}
query I
select histogram(g)
from hist_data
group by g%2==0 ORDER BY g%2==0
----
{1=2, 3=1, 5=1}
{2=3}
query I
select histogram(g)
from hist_data
where g < 3
----
{1=2, 2=3}
statement ok
create table names (name string)
statement ok
insert into names values ('pedro'), ('pedro'), ('pedro'),('hannes'),('hannes'),('mark'),(null),('Hubert Blaine Wolfeschlegelsteinhausenbergerdorff Sr.');
query I
select histogram(name) from names;
----
{Hubert Blaine Wolfeschlegelsteinhausenbergerdorff Sr.=1, hannes=2, mark=1, pedro=3}
# Variant time type binding (Issue #3290)
query I
SELECT histogram(CAST('2021-08-20' AS TIMESTAMP_S));
----
{'2021-08-20 00:00:00'=1}
query I
SELECT histogram(CAST('2021-08-20' AS TIMESTAMP_MS));
----
{'2021-08-20 00:00:00'=1}
query I
SELECT histogram(CAST('2021-08-20' AS TIMESTAMP_NS));
----
{'2021-08-20 00:00:00'=1}
query I
SELECT histogram(CAST('15:05:42' AS TIME));
----
{'15:05:42'=1}
query I
SELECT histogram(CAST('15:05:42+00' AS TIME WITH TIME ZONE));
----
{'15:05:42+00'=1}
query I
SELECT histogram(CAST('2022-01-02' AS DATE));
----
{2022-01-02=1}
query II rowsort
select g,histogram(g) over (partition by g%2)
from hist_data;
----
1 {1=2, 3=1, 5=1}
1 {1=2, 3=1, 5=1}
2 {2=3}
2 {2=3}
2 {2=3}
3 {1=2, 3=1, 5=1}
5 {1=2, 3=1, 5=1}
#Corner cases
statement error
select histogram()
----
statement error
select histogram(*)
----
# enums
statement ok
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy')
statement ok
CREATE TABLE enums (e mood)
statement ok
INSERT INTO enums VALUES ('happy'), ('ok')
query I
SELECT histogram(e) FROM enums
----
{ok=1, happy=1}

View File

@@ -0,0 +1,29 @@
# name: test/sql/aggregate/aggregates/test_histogram_3529.test
# description: Test histogram aggregation issue 3529
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table tmp (c0 integer, c1 integer);
statement ok
insert into tmp values
(0, 0),
(1, 1),
(2, 0),
(0, 1),
(1, 0),
(2, 1),
(0, 0),
(1, 1),
(2, 0),
(0, 1);
query II
SELECT c0, histogram(c1) FROM tmp GROUP BY c0 ORDER BY ALL
----
0 {0=2, 1=2}
1 {0=1, 1=2}
2 {0=2, 1=1}

View File

@@ -0,0 +1,88 @@
# name: test/sql/aggregate/aggregates/test_incorrect_aggregate.test
# description: Test incorrect usages of aggregates
# group: [aggregates]
statement error
SELECT COUNT(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'count
statement error
SELECT COUNT(COUNT(1))
----
Binder Error: aggregate function calls cannot be nested
statement error
SELECT STDDEV_SAMP()
----
Binder Error: No function matches the given name and argument types 'stddev_samp
statement error
SELECT STDDEV_SAMP(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'stddev_samp
statement error
SELECT STDDEV_SAMP(STDDEV_SAMP(1))
----
Binder Error: aggregate function calls cannot be nested
statement error
SELECT SUM()
----
Binder Error: No function matches the given name and argument types 'sum
statement error
SELECT SUM(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'sum
statement error
SELECT SUM(SUM(1))
----
Binder Error: aggregate function calls cannot be nested
statement error
SELECT FIRST()
----
Binder Error: No function matches the given name and argument types 'first
statement error
SELECT FIRST(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'first
statement error
SELECT FIRST(FIRST(1))
----
Binder Error: aggregate function calls cannot be nested
statement error
SELECT MAX()
----
Binder Error: No function matches the given name and argument types 'max
statement error
SELECT MAX(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'max
statement error
SELECT MAX(MAX(1))
----
Binder Error: aggregate function calls cannot be nested
statement error
SELECT MIN()
----
Binder Error: No function matches the given name and argument types 'min
statement error
SELECT MIN(1, 2, 3)
----
Binder Error: No function matches the given name and argument types 'min
statement error
SELECT MIN(MIN(1))
----
Binder Error: aggregate function calls cannot be nested

View File

@@ -0,0 +1,15 @@
# name: test/sql/aggregate/aggregates/test_kahan_avg.test
# description: Test averages in which the intermediate sums are not exact
# group: [aggregates]
statement ok
CREATE TABLE doubles(n DOUBLE);
statement ok
INSERT INTO doubles (n) VALUES ('9007199254740992'::DOUBLE), (1::DOUBLE), (1::DOUBLE), (0::DOUBLE);
# this would give the wrong result with a simple sum-and-divide
query R
SELECT FAVG(n) - '2251799813685248.5'::DOUBLE FROM doubles;
----
0

View File

@@ -0,0 +1,25 @@
# name: test/sql/aggregate/aggregates/test_kahan_sum.test
# description: Test sums in which temporary results are not exact
# group: [aggregates]
statement ok
CREATE TABLE doubles(n DOUBLE);
statement ok
INSERT INTO doubles (n) VALUES ('9007199254740992'::DOUBLE), (1::DOUBLE), (1::DOUBLE), (0::DOUBLE);
# this would give the wrong result with a simple sum
query I
SELECT FSUM(n)::BIGINT FROM doubles;
----
9007199254740994
query I
SELECT sumKahan(n)::BIGINT FROM doubles;
----
9007199254740994
query I
SELECT kahan_sum(n)::BIGINT FROM doubles;
----
9007199254740994

View File

@@ -0,0 +1,104 @@
# name: test/sql/aggregate/aggregates/test_kurtosis.test
# description: Test kurtosis aggregate
# group: [aggregates]
statement ok
PRAGMA enable_verification
#Corner cases
statement error
select kurtosis()
----
query I
select kurtosis(NULL)
----
NULL
query I
select kurtosis(1)
----
NULL
statement error
select kurtosis(*)
----
query I
select kurtosis(i) from (values (0), (0), (0), (0), (0), (0)) tbl(i)
----
NULL
# out of range
statement error
select kurtosis(i) from (values (2e304), (2e305), (2e306), (2e307)) tbl(i)
----
# Constant Value (This should be an error)
query I
select kurtosis(10) from range (5)
----
NULL
#Empty Table
query I
select kurtosis(10) from range (5) where 1 == 0
----
NULL
statement ok
create table aggr(k int, v int, v2 int);
statement ok
insert into aggr values
(1, 10, null),
(2, 10, 11),
(2, 10, 15),
(2, 10, 18),
(2, 20, 22),
(2, 20, 25),
(2, 25, null),
(2, 30, 35),
(2, 30, 40),
(2, 30, 50),
(2, 30, 51);
query III
select kurtosis(k), kurtosis(v), kurtosis(v2) from aggr;
----
11.000000 -1.961428 -1.445120
query III
select kurtosis_pop(k), kurtosis_pop(v), kurtosis_pop(v2) from aggr;
----
6.100000 -1.676857 -1.358688
query I
with onetwo as (select range::float as v from range(1,3)) select kurtosis_pop(v) from onetwo;
----
-2.0
query I
select kurtosis(v2) from aggr group by v ORDER BY ALL;
----
-3.977599
NULL
NULL
NULL
# Window Function
query I rowsort
select kurtosis(v2) over (partition by v)
from aggr;
----
-3.977599
-3.977599
-3.977599
-3.977599
NULL
NULL
NULL
NULL
NULL
NULL
NULL

View File

@@ -0,0 +1,188 @@
# name: test/sql/aggregate/aggregates/test_last.test
# description: Test the LAST function
# group: [aggregates]
statement ok
PRAGMA enable_verification
#
# Test all types
#
# Numerics
foreach type <numeric>
statement ok
CREATE TABLE five AS SELECT i::${type} AS i FROM range(1, 6, 1) t1(i)
query I
SELECT LAST(i) FROM five
----
5
query II
SELECT i % 3 AS g, LAST(i) FROM five GROUP BY 1 ORDER BY 1
----
0 3
1 4
2 5
query I
SELECT LAST(i ORDER BY 5-i) FROM five
----
1
query II
SELECT i % 3 AS g, LAST(i ORDER BY 5-i) FROM five GROUP BY 1 ORDER BY 1
----
0 3
1 1
2 2
statement ok
DROP TABLE five
endloop
# Decimals
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1)
statement ok
CREATE TABLE five AS SELECT i::${type} AS i FROM range(1, 6, 1) t1(i)
query I
SELECT LAST(i ORDER BY 5-i) FROM five
----
1.0
query II
SELECT i::INTEGER % 3 AS g, LAST(i ORDER BY 5-i) FROM five GROUP BY 1 ORDER BY 1
----
0 3.0
1 1.0
2 2.0
statement ok
DROP TABLE five
endloop
# Temporal
statement ok
CREATE TABLE five_dates AS
SELECT
i::integer AS i,
'2021-08-20'::DATE + i::INTEGER AS d,
'2021-08-20'::TIMESTAMP + INTERVAL (i) HOUR AS dt,
'14:59:37'::TIME + INTERVAL (i) MINUTE AS t,
INTERVAL (i) SECOND AS s
FROM range(1, 6, 1) t1(i)
query IIII
SELECT LAST(d), LAST(dt), LAST(t), LAST(s) FROM five_dates
----
2021-08-25 2021-08-20 05:00:00 15:04:37 00:00:05
query IIIII
SELECT i % 3 AS g, LAST(d), LAST(dt), LAST(t), LAST(s)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-23 2021-08-20 03:00:00 15:02:37 00:00:03
1 2021-08-24 2021-08-20 04:00:00 15:03:37 00:00:04
2 2021-08-25 2021-08-20 05:00:00 15:04:37 00:00:05
query IIII
SELECT LAST(d ORDER BY 5-i), LAST(dt ORDER BY 5-i), LAST(t ORDER BY 5-i), LAST(s ORDER BY 5-i) FROM five_dates
----
2021-08-21 2021-08-20 01:00:00 15:00:37 00:00:01
query IIIII
SELECT i % 3 AS g, LAST(d ORDER BY 5-i), LAST(dt ORDER BY 5-i), LAST(t ORDER BY 5-i), LAST(s ORDER BY 5-i)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-23 2021-08-20 03:00:00 15:02:37 00:00:03
1 2021-08-21 2021-08-20 01:00:00 15:00:37 00:00:01
2 2021-08-22 2021-08-20 02:00:00 15:01:37 00:00:02
# WITH TIME ZONE
query II
SELECT LAST(dt::TIMESTAMPTZ), LAST(t::TIMETZ) FROM five_dates
----
2021-08-20 05:00:00+00 15:04:37+00
query III
SELECT i % 3 AS g, LAST(dt::TIMESTAMPTZ), LAST(t::TIMETZ)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-20 03:00:00+00 15:02:37+00
1 2021-08-20 04:00:00+00 15:03:37+00
2 2021-08-20 05:00:00+00 15:04:37+00
query II
SELECT LAST(dt::TIMESTAMPTZ ORDER BY 5-i), LAST(t::TIMETZ ORDER BY 5-i) FROM five_dates
----
2021-08-20 01:00:00+00 15:00:37+00
query III
SELECT i % 3 AS g, LAST(dt::TIMESTAMPTZ ORDER BY 5-i), LAST(t::TIMETZ ORDER BY 5-i)
FROM five_dates
GROUP BY 1
ORDER BY 1
----
0 2021-08-20 03:00:00+00 15:02:37+00
1 2021-08-20 01:00:00+00 15:00:37+00
2 2021-08-20 02:00:00+00 15:01:37+00
statement ok
DROP TABLE five_dates
# Complex
statement ok
CREATE TABLE five_complex AS
SELECT
i::integer AS i,
i::VARCHAR AS s,
[i] AS l,
{'a': i} AS r
FROM range(1, 6, 1) t1(i)
query III
SELECT LAST(s), LAST(l), LAST(r)
FROM five_complex
----
5 [5] {'a': 5}
query IIII
SELECT i % 3 AS g, LAST(s), LAST(l), LAST(r)
FROM five_complex
GROUP BY 1
ORDER BY 1
----
0 3 [3] {'a': 3}
1 4 [4] {'a': 4}
2 5 [5] {'a': 5}
query III
SELECT LAST(s ORDER BY 5-i), LAST(l ORDER BY 5-i), LAST(r ORDER BY 5-i)
FROM five_complex
----
1 [1] {'a': 1}
query IIII
SELECT i % 3 AS g, LAST(s ORDER BY 5-i), LAST(l ORDER BY 5-i), LAST(r ORDER BY 5-i)
FROM five_complex
GROUP BY 1
ORDER BY 1
----
0 3 [3] {'a': 3}
1 1 [1] {'a': 1}
2 2 [2] {'a': 2}
statement ok
DROP TABLE five_complex

View File

@@ -0,0 +1,53 @@
# name: test/sql/aggregate/aggregates/test_last_noninlined.test
# description: Test LAST with non-inlined strings
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE tbl(a INTEGER, b VARCHAR)
statement ok
INSERT INTO tbl VALUES (1, NULL), (2, 'thisisalongstring'), (3, 'thisisalsoalongstring')
# non-grouped aggregate
query T
SELECT LAST(b) FROM tbl WHERE a=2
----
thisisalongstring
query T
SELECT LAST(b) FROM tbl WHERE a=1
----
NULL
query T
SELECT LAST(b) FROM tbl WHERE a=1 GROUP BY a
----
NULL
query T
SELECT LAST(b) FROM tbl WHERE a=0
----
NULL
query T
SELECT LAST(b) FROM tbl WHERE a=0 GROUP BY b
----
# grouped aggregate
query IT
SELECT a, LAST(b) FROM tbl GROUP BY a ORDER BY a
----
1
NULL
2
thisisalongstring
3
thisisalsoalongstring
query I
SELECT LAST(i) FROM (VALUES (NULL::INT32)) tbl(i)
----
NULL

View File

@@ -0,0 +1,120 @@
# name: test/sql/aggregate/aggregates/test_list_aggregate.test_slow
# description: Test aggregate list
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
# this is underspecified for parallelism because the order in the list() aggr is arbitrary then
statement ok
PRAGMA threads=1
statement ok
CREATE TABLE list_extract_test(i INTEGER, g INTEGER);
statement ok
INSERT INTO list_extract_test VALUES (1, 1), (2, 1), (3, 2), (NULL, 3), (42, 3);
query II
SELECT g, LIST_EXTRACT(LIST(i), 1) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 1
2 3
3 NULL
query II
SELECT g, LIST_EXTRACT(LIST(i), 2) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 2
2 NULL
3 42
query II
SELECT g, LIST_EXTRACT(LIST(i), 3) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 NULL
2 NULL
3 NULL
# Use ORDER BY, which is robust to parallelism
statement ok
PRAGMA threads=4
query I
with t as (
values ('a',1), ('c',3), ('b',2)
)
select list(col0 order by col1) from t
----
[a, b, c]
query II
SELECT g, LIST(i ORDER BY i ASC) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [1, 2]
2 [3]
3 [NULL, 42]
query II
SELECT g, LIST(i ORDER BY i ASC NULLS FIRST) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [1, 2]
2 [3]
3 [NULL, 42]
query II
SELECT g, LIST(i ORDER BY i ASC NULLS LAST) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [1, 2]
2 [3]
3 [42, NULL]
query II
SELECT g, LIST(i ORDER BY i DESC) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [2, 1]
2 [3]
3 [NULL, 42]
query II
SELECT g, LIST(i ORDER BY i DESC NULLS FIRST) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [2, 1]
2 [3]
3 [NULL, 42]
query II
SELECT g, LIST(i ORDER BY i DESC NULLS LAST) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [2, 1]
2 [3]
3 [42, NULL]
query II
SELECT g, LIST(i ORDER BY i ASC) FILTER (WHERE i <> 3) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [1, 2]
2 NULL
3 [42]
query II
SELECT g, LIST(i ORDER BY i ASC) FILTER (WHERE i IS NULL) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 NULL
2 NULL
3 [NULL]
query II
SELECT g, LIST(i ORDER BY i ASC) FILTER (WHERE i = 1337) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 NULL
2 NULL
3 NULL
# test forcing reallocation of lists
statement ok
CREATE TABLE test_realloc AS (SELECT range % 4 g, list(range || 'some_string_longer_than_12') l FROM range(500000) GROUP BY range % 4);

View File

@@ -0,0 +1,231 @@
# name: test/sql/aggregate/aggregates/test_list_aggregate_function.test_slow
# description: Test the list aggregate function for all data types
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# INTEGER types
foreach type tinyint smallint integer bigint utinyint usmallint uinteger ubigint
statement ok
CREATE TABLE test (g INTEGER, i ${type})
query I
SELECT LIST(i) FROM test GROUP BY g ORDER BY g
----
statement ok
INSERT INTO test VALUES (1, 2), (1, 3), (2, 4)
query I
SELECT LIST(i) FROM test GROUP BY g ORDER BY g
----
[2, 3]
[4]
statement ok
DROP TABLE test
endloop
# FLOAT, DOUBLE
foreach type float double
statement ok
CREATE TABLE test (g INTEGER, i ${type})
statement ok
INSERT INTO test VALUES (1, 2), (1, 3), (2, 4)
query I
SELECT LIST(i) FROM test GROUP BY g ORDER BY g
----
[2.0, 3.0]
[4.0]
statement ok
DROP TABLE test
endloop
# VARCHAR
statement ok
CREATE TABLE varch (g integer, str varchar)
statement ok
INSERT INTO varch VALUES (1, 'hello'), (1, 'was'), (2, 'geht')
query I
SELECT LIST(str) FROM varch GROUP BY g ORDER BY g
----
[hello, was]
[geht]
# LIST
# one level nested LIST
statement ok
CREATE TABLE nested_lists (g INTEGER, i INTEGER[])
statement ok
INSERT INTO nested_lists VALUES (1, [2]), (1, [3]), (2, [4])
query I
SELECT LIST(i) FROM nested_lists GROUP BY g ORDER BY g
----
[[2], [3]]
[[4]]
# bigger lists than standard vector size
statement ok
CREATE TABLE bigger_lists AS SELECT range % 4 g, range i FROM range(100000)
query I
SELECT list_count(LIST(i)) FROM bigger_lists GROUP BY g ORDER BY g
----
25000
25000
25000
25000
# many groups with small lists
statement ok
CREATE TABLE lists AS SELECT range % 10000 g, range i FROM range(100000);
query I
SELECT list_count(LIST(i)) FROM lists GROUP BY g ORDER BY g LIMIT 2;
----
10
10
# STRUCTS
statement ok
CREATE TABLE structs AS SELECT 1 AS g, {'x': 3, 'b': 2} AS s
statement ok
INSERT INTO structs VALUES (1, {'x': 5, 'b': 4}), (2, {'x': 50, 'b': 40})
query I
SELECT LIST(s) FROM structs GROUP BY g ORDER BY g
----
[{'x': 3, 'b': 2}, {'x': 5, 'b': 4}]
[{'x': 50, 'b': 40}]
statement ok
CREATE TABLE structs_list_strings AS SELECT 1 AS g, {'a': ['strr', 'strrr']} AS s
statement ok
INSERT INTO structs_list_strings VALUES (1, {'a': ['str1', 'str2']}), (1, {'a': ['str3', 'str4']}), (2, {'a': ['str1', 'str2']})
query I
SELECT LIST(s) FROM structs_list_strings GROUP BY g ORDER BY g
----
[{'a': [strr, strrr]}, {'a': [str1, str2]}, {'a': [str3, str4]}]
[{'a': [str1, str2]}]
statement ok
CREATE TABLE structs_ints AS SELECT 1 AS g, {'a': [1, 2]} AS s
statement ok
INSERT INTO structs_ints VALUES (1, {'a': [3, 4]}), (1, {'a': [5, 6]}), (2, {'a': [1, 2]})
query I
SELECT LIST(s) FROM structs_ints GROUP BY g ORDER BY g
----
[{'a': [1, 2]}, {'a': [3, 4]}, {'a': [5, 6]}]
[{'a': [1, 2]}]
statement ok
CREATE TABLE structs_strings AS SELECT 1 AS g, {'n': 'nm1588970'} AS s
statement ok
INSERT INTO structs_strings VALUES (1, {'n': 'nm0005690'})
query I
SELECT LIST(s) FROM structs_strings GROUP BY g ORDER BY g
----
[{'n': nm1588970}, {'n': nm0005690}]
# ISSUE 3734
require parquet
statement ok
SELECT tconst, list(principals) as principals FROM parquet_scan('data/parquet-testing/bug3734.parquet') GROUP BY 1 limit 10;
# test all data types
foreach type bool tinyint smallint int bigint hugeint uhugeint utinyint usmallint uint ubigint date time timestamp timestamp_s timestamp_ms timestamp_ns time_tz timestamp_tz float double dec_4_1 dec_9_4 dec_18_6 dec38_10 uuid interval varchar blob small_enum medium_enum large_enum int_array double_array date_array timestamp_array timestamptz_array varchar_array nested_int_array struct struct_of_arrays array_of_structs map
statement ok
CREATE TABLE t1 AS SELECT ${type} AS c1 FROM test_all_types();
statement ok
CREATE TABLE t2 AS SELECT UNNEST(LIST(${type})) AS c2 FROM test_all_types();
query I
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
----
query I
SELECT * FROM t2 EXCEPT SELECT * FROM t1;
----
query I
SELECT t1.c1 IS NOT DISTINCT FROM t2.c2
FROM t1, t2
WHERE t1.rowid = t2.rowid;
----
True
True
True
statement ok
DROP TABLE t1;
statement ok
DROP TABLE t2;
endloop
# testing really long strings
statement ok
CREATE TABLE long_str AS SELECT range % 4 g, repeat(range::VARCHAR, 10000) i FROM range (10);
query II
SELECT g, list_count(LIST(i)) FROM long_str GROUP BY g ORDER BY g
----
0 3
1 3
2 2
3 2
# fix issue 5523
statement ok
SELECT LIST(i) OVER (PARTITION BY i % 10 ORDER BY i) FROM range(10000) t(i);
# test linked list capacity of 0
statement ok
CREATE TABLE list_extract_test(i INTEGER, g INTEGER);
statement ok
INSERT INTO list_extract_test VALUES (1, 1), (2, 1), (3, 2), (NULL, 3), (42, 3);
query II
SELECT g, LIST(i ORDER BY i ASC) FILTER (WHERE i <> 3) FROM list_extract_test GROUP BY g ORDER BY ALL;
----
1 [1, 2]
2 NULL
3 [42]

View File

@@ -0,0 +1,135 @@
# name: test/sql/aggregate/aggregates/test_mad.test
# description: Test MAD (Moving Absolute Deviation) aggregate
# group: [aggregates]
statement ok
PRAGMA enable_verification
# scalar mad is zero
query II
SELECT mad(NULL), mad(1)
----
NULL 0
# constant mad is zero
query II
SELECT mad(NULL), mad(1) FROM range(2000)
----
NULL 0
#
# Small numerics
#
statement ok
create table tinys as
select range r, random()
from range(100)
union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9)
order by 2;
foreach type tinyint decimal(4,1)
query I
SELECT mad(r::${type}) FROM tinys
----
25
query I
SELECT mad(NULL::${type}) FROM tinys
----
NULL
query I
SELECT mad(42::${type}) FROM tinys
----
0
endloop
#
# Large numerics
#
statement ok
create table numerics as
select range r, random()
from range(10000)
union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9)
order by 2;
foreach type smallint integer bigint hugeint uhugeint float double decimal(8,1) decimal(12,1) decimal(18,1) decimal(24,1)
query I
SELECT mad(r::${type}) FROM numerics
----
2500
query I
SELECT mad(NULL::${type}) FROM numerics
----
NULL
query I
SELECT mad(42::${type}) FROM numerics
----
0
endloop
#
# Temporal
#
query I
SELECT mad(('2018-01-01'::DATE + INTERVAL (r) DAY)::DATE) FROM numerics
----
2500 days
query I
SELECT mad('2018-01-01'::TIMESTAMP + INTERVAL (r) HOUR) FROM numerics
----
104 days 04:00:00
query I
SELECT mad('00:00:00'::TIME + INTERVAL (r) SECOND) FROM numerics
----
00:41:40
#
# Extreme values
#
query I
select mad(x) from (values ('127'::DECIMAL(3,0)), ('-128'::DECIMAL(3,0))) tbl(x);
----
127
query I
select mad(x) from (values ('32767'::DECIMAL(5,0)), ('-32768'::DECIMAL(5,0))) tbl(x);
----
32767
query I
select mad(x) from (values ('2147483647'::DECIMAL(10,0)), ('-2147483648'::DECIMAL(10,0))) tbl(x);
----
2147483647
statement ok
select mad(x) from (values (-1e308), (1e308)) tbl(x);
query I
select mad(x) from (values ('294247-01-10'::date), ('290309-12-22 (BC)'::date)) tbl(x);
----
106751991 days
query I
select mad(x) from (values
('294247-01-10 04:00:54.775806'::timestamp),
('290309-12-22 (BC) 00:00:00'::timestamp)
) tbl(x);
----
106751991 days 02:00:27.387903
query I
select mad(x) from (values ('23:59:59.999999'::time), ('00:00:00'::time)) tbl(x);
----
12:00:00

View File

@@ -0,0 +1,168 @@
# name: test/sql/aggregate/aggregates/test_median.test
# description: Test MEDIAN aggregate
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# scalar quantiles
query II
SELECT median(NULL), median(1)
----
NULL 1
query II
SELECT median(NULL), median(1) FROM range(2000)
----
NULL 1
statement ok
create table quantile as select range r, random() from range(10000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
query I
SELECT median(r)::VARCHAR FROM quantile
----
4999.5
query R
SELECT median(r::float)::VARCHAR FROM quantile
----
4999.5
query R
SELECT median(r::double)::VARCHAR FROM quantile
----
4999.5
query I
SELECT median(r::tinyint)::VARCHAR FROM quantile where r < 100
----
49.5
query I
SELECT median(r::smallint)::VARCHAR FROM quantile
----
4999.5
query I
SELECT median(r::integer)::VARCHAR FROM quantile
----
4999.5
query I
SELECT median(r::bigint)::VARCHAR FROM quantile
----
4999.5
query I
SELECT median(r::hugeint)::VARCHAR FROM quantile
----
4999.5
query I
SELECT median(r::decimal(10,2))::VARCHAR FROM quantile
----
4999.50
query I
SELECT median(case when r is null then null else [r] end)::VARCHAR FROM quantile
----
[4999]
query I
SELECT median(case when r is null then null else {'i': r} end)::VARCHAR FROM quantile
----
{'i': 4999}
# sorting order is different for varchars and numbers
# so a different result here is expected
query I
SELECT median(r::varchar) FROM quantile
----
5498
# adding a prefix
query I
SELECT median(case when r is null then null else concat('thishasalongprefix_', r::varchar) end) FROM quantile
----
thishasalongprefix_5498
query I
SELECT median(NULL) FROM quantile
----
NULL
query I
SELECT median(42) FROM quantile
----
42
#
# Validate even number of values for all types.
#
# Interpolated
foreach type <integral> DECIMAL(4,1) DECIMAL(9,1) DECIMAL(18,1) DECIMAL(38,1) float double
# We don't need to cast these values because
# they are too small for the 1% floating point margin to affect the result
query I
SELECT MEDIAN(range::${type})
FROM range(0,10);
----
4.5
endloop
foreach type date timestamp timestamp_s timestamp_ms timestamp_ns
query I
SELECT MEDIAN(range::${type})
FROM range('2024-01-01'::date, '2024-01-11'::date, INTERVAL 1 DAY);
----
2024-01-05 12:00:00
endloop
query I
SELECT MEDIAN(range::timestamptz)
FROM range('2024-01-01'::date, '2024-01-11'::date, INTERVAL 1 DAY);
----
2024-01-05 12:00:00+00
query I
SELECT MEDIAN('00:00:00'::TIME + INTERVAL (range) HOUR)
FROM range(0,10);
----
04:30:00
query I
SELECT MEDIAN(ttz::TIMETZ)
FROM (VALUES
('00:00:00-09'),
('00:00:00-08'),
('00:00:00-07'),
('00:00:00-06'),
('00:00:00-05'),
('00:00:00-04'),
('00:00:00-03'),
('00:00:00-02'),
('00:00:00-01'),
('00:00:00+00'),
) tbl(ttz);
----
00:00:00-04:30
# Floored
foreach type varchar
query I
SELECT MEDIAN(range::${type}) FROM range(0,10);
----
4
endloop

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/aggregates/test_minmax.test
# description: Test min/max aggregate
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# List slicing
statement ok
create table lists as select array[i] l from generate_series(0,5,1) tbl(i);
query I
select min(l) from lists where l[1]>2;
----
[3]
query I
select min(l) from lists where l[0]>2;
----
NULL

View File

@@ -0,0 +1,14 @@
# name: test/sql/aggregate/aggregates/test_minmax_14145.test
# description: Test minmax aggregation issue 14145
# group: [aggregates]
require json
statement ok
PRAGMA enable_verification
query IIIIII
DESCRIBE SELECT max(l) from (select unnest( [{'a':1}::JSON, [2]::JSON ]) as l);
----
max(l) JSON YES NULL NULL NULL

View File

@@ -0,0 +1,308 @@
# name: test/sql/aggregate/aggregates/test_mode.test
# description: Test mode function
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
#Corner cases
statement error
select mode()
----
query I
select mode(NULL)
----
NULL
query I
select mode(1)
----
1
statement error
select mode(*)
----
statement ok
create table aggr(k int, v decimal(10,2));
statement ok
insert into aggr (k, v) values (1, 10), (1, 10), (1, 20), (1, 21);
query I
select mode(v) from aggr;
----
10
query I
SELECT mode(2) FROM range(100);
----
2
statement ok
insert into aggr (k, v) values (2, 20),(2, 20), (2, 25), (2, 30);
query I
SELECT CASE
WHEN ( value = 10 or value = 20)
THEN TRUE
ELSE FALSE
END
FROM (select mode(v) as value from aggr) AS T
----
1
statement ok
insert into aggr (k, v) values (3, null);
query II
select k, mode(v)
from aggr
group by k
order by k;
----
1 10.000000
2 20.000000
3 NULL
statement ok
create table names (name string)
statement ok
insert into names values ('pedro'), ('pedro'), ('pedro'),('hannes'),('hannes'),('mark'),(null);
query I
select mode(name) from names;
----
pedro
query III
select k, v, mode(v) over (partition by k)
from aggr
order by k, v;
----
1 10.00 10.000000
1 10.00 10.000000
1 20.00 10.000000
1 21.00 10.000000
2 20.00 20.000000
2 20.00 20.000000
2 25.00 20.000000
2 30.00 20.000000
3 NULL NULL
# Empty Table
query I
SELECT mode(i) FROM range(100) tbl(i) WHERE 1=0;
----
NULL
# Temporal types
statement ok
create table dates (k int, v date)
statement ok
insert into dates values
(1, '2021-05-02'),
(1, '2021-05-02'),
(2, '2021-05-02'),
(2, '2020-02-29'),
(2, '2020-02-29'),
(3, '2004-09-01'),
(1, null);
query I
select mode(v) from dates;
----
2021-05-02
query II
select k, mode(v) from dates group by k ORDER BY ALL;
----
1 2021-05-02
2 2020-02-29
3 2004-09-01
query III
select k, v, mode(v) over (partition by k)
from dates
order by k, v;
----
1 NULL 2021-05-02
1 2021-05-02 2021-05-02
1 2021-05-02 2021-05-02
2 2020-02-29 2020-02-29
2 2020-02-29 2020-02-29
2 2021-05-02 2020-02-29
3 2004-09-01 2004-09-01
statement ok
create table times (k int, v time)
statement ok
insert into times values
(1, '12:11:49.5'),
(1, '12:11:49.5'),
(2, '12:11:49.5'),
(2, '06:30:00'),
(2, '06:30:00'),
(3, '21:15:22'),
(1, null);
query I
select mode(v) from times;
----
12:11:49.5
query II
select k, mode(v) from times group by k ORDER BY ALL;
----
1 12:11:49.5
2 06:30:00
3 21:15:22
query III
select k, v, mode(v) over (partition by k)
from times
order by k, v;
----
1 NULL 12:11:49.5
1 12:11:49.5 12:11:49.5
1 12:11:49.5 12:11:49.5
2 06:30:00 06:30:00
2 06:30:00 06:30:00
2 12:11:49.5 06:30:00
3 21:15:22 21:15:22
statement ok
create table timestamps (k int, v timestamp)
statement ok
insert into timestamps values
(1, '2021-05-02 12:11:49.5'),
(1, '2021-05-02 12:11:49.5'),
(2, '2021-05-02 12:11:49.5'),
(2,'2020-02-29 06:30:00'),
(2,'2020-02-29 06:30:00'),
(3,'2004-09-01 21:15:22'),
(1, null);
query I
select mode(v) from timestamps;
----
2021-05-02 12:11:49.5
query II
select k, mode(v) from timestamps group by k ORDER BY ALL;
----
1 2021-05-02 12:11:49.5
2 2020-02-29 06:30:00
3 2004-09-01 21:15:22
query III
select k, v, mode(v) over (partition by k)
from timestamps
order by k, v;
----
1 NULL 2021-05-02 12:11:49.5
1 2021-05-02 12:11:49.5 2021-05-02 12:11:49.5
1 2021-05-02 12:11:49.5 2021-05-02 12:11:49.5
2 2020-02-29 06:30:00 2020-02-29 06:30:00
2 2020-02-29 06:30:00 2020-02-29 06:30:00
2 2021-05-02 12:11:49.5 2020-02-29 06:30:00
3 2004-09-01 21:15:22 2004-09-01 21:15:22
statement ok
create table intervals (k int, v interval)
statement ok
insert into intervals values
(1, INTERVAL '5 months 2 days 12 hours 11 minutes 49 seconds'),
(1, INTERVAL '5 months 2 days 12 hours 11 minutes 49 seconds'),
(2, INTERVAL '5 months 2 days 12 hours 11 minutes 49 seconds'),
(2, INTERVAL '2 months 29 days 6 hours 30 minutes'),
(2, INTERVAL '2 months 29 days 6 hours 30 minutes'),
(3, INTERVAL '9 months 1 day 21 hours 15 minutes 22 seconds'),
(1, null);
query I
select mode(v) from intervals;
----
5 months 2 days 12:11:49
query II
select k, mode(v) from intervals group by k ORDER BY ALL;
----
1 5 months 2 days 12:11:49
2 2 months 29 days 06:30:00
3 9 months 1 day 21:15:22
query III
select k, v, mode(v) over (partition by k)
from intervals
order by k, v;
----
1 NULL 5 months 2 days 12:11:49
1 5 months 2 days 12:11:49 5 months 2 days 12:11:49
1 5 months 2 days 12:11:49 5 months 2 days 12:11:49
2 2 months 29 days 06:30:00 2 months 29 days 06:30:00
2 2 months 29 days 06:30:00 2 months 29 days 06:30:00
2 5 months 2 days 12:11:49 2 months 29 days 06:30:00
3 9 months 1 day 21:15:22 9 months 1 day 21:15:22
# Huge integers
statement ok
create table hugeints (k int, v hugeint)
statement ok
insert into hugeints values (1, 5), (1, 5), (2, 5), (2, 2), (2, 2), (3, 1), (1, null);
query I
select mode(v) from hugeints;
----
5
query II
select k, mode(v) from hugeints group by k ORDER BY ALL;
----
1 5
2 2
3 1
query III
select k, v, mode(v) over (partition by k)
from hugeints
order by k, v;
----
1 NULL 5
1 5 5
1 5 5
2 2 2
2 2 2
2 5 2
3 1 1
# MODE is order-sensitive, so this should bind and return the larger value
query I
SELECT MODE(order_occurrences ORDER BY order_occurrences DESC) FROM (
VALUES
(500, 1),
(1000, 2),
(800, 3),
(1000, 4),
(500, 5),
(550, 6),
(400, 7),
(200, 8),
(10, 9)
)items_per_order(order_occurrences, item_count);
----
1000

View File

@@ -0,0 +1,409 @@
# name: test/sql/aggregate/aggregates/test_null_aggregates.test
# description: Test aggregates over many nulls on smallint column
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE t1(c0 BIGINT, c1 SMALLINT);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(NULL,NULL);
statement ok
INSERT INTO t1 VALUES(-9121942514766415310,NULL);
statement ok
INSERT INTO t1 VALUES(-9113483941634330359,NULL);
statement ok
INSERT INTO t1 VALUES(-8718457747090493475,NULL);
statement ok
INSERT INTO t1 VALUES(-7650527153348320600,NULL);
statement ok
INSERT INTO t1 VALUES(-7511073704802549520,NULL);
statement ok
INSERT INTO t1 VALUES(-7342137292157212364,NULL);
statement ok
INSERT INTO t1 VALUES(-7003121677824953185,NULL);
statement ok
INSERT INTO t1 VALUES(-6971852266038069200,NULL);
statement ok
INSERT INTO t1 VALUES(-6873545755554765972,NULL);
statement ok
INSERT INTO t1 VALUES(-6355311124878824053,NULL);
statement ok
INSERT INTO t1 VALUES(-6350463272352412486,NULL);
statement ok
INSERT INTO t1 VALUES(-5908442705000090253,NULL);
statement ok
INSERT INTO t1 VALUES(-5897662788702027960,NULL);
statement ok
INSERT INTO t1 VALUES(-5877879044803815845,NULL);
statement ok
INSERT INTO t1 VALUES(-5732980609151508408,NULL);
statement ok
INSERT INTO t1 VALUES(-5361272612100082873,NULL);
statement ok
INSERT INTO t1 VALUES(-5336571579832669145,NULL);
statement ok
INSERT INTO t1 VALUES(-4928993529687100359,NULL);
statement ok
INSERT INTO t1 VALUES(-4468905900574568755,NULL);
statement ok
INSERT INTO t1 VALUES(-4170492860397664351,NULL);
statement ok
INSERT INTO t1 VALUES(-3684174996218175685,NULL);
statement ok
INSERT INTO t1 VALUES(-3550425917959859111,NULL);
statement ok
INSERT INTO t1 VALUES(-3538537641982313134,NULL);
statement ok
INSERT INTO t1 VALUES(-3509778083052175642,NULL);
statement ok
INSERT INTO t1 VALUES(-3297429447844697659,NULL);
statement ok
INSERT INTO t1 VALUES(-3285304895013369375,NULL);
statement ok
INSERT INTO t1 VALUES(-2783073089603195828,NULL);
statement ok
INSERT INTO t1 VALUES(-2422155131602272083,NULL);
statement ok
INSERT INTO t1 VALUES(-2411133157184452856,NULL);
statement ok
INSERT INTO t1 VALUES(-2353272908390735004,NULL);
statement ok
INSERT INTO t1 VALUES(-2242558770815087701,NULL);
statement ok
INSERT INTO t1 VALUES(-1554405226393925625,NULL);
statement ok
INSERT INTO t1 VALUES(-1337520990873830579,NULL);
statement ok
INSERT INTO t1 VALUES(-1217288122333132479,NULL);
statement ok
INSERT INTO t1 VALUES(-829779308050048379,NULL);
statement ok
INSERT INTO t1 VALUES(-783860634233596188,NULL);
statement ok
INSERT INTO t1 VALUES(-750940733896551510,NULL);
statement ok
INSERT INTO t1 VALUES(-595923232719547231,NULL);
statement ok
INSERT INTO t1 VALUES(-542467477806120649,NULL);
statement ok
INSERT INTO t1 VALUES(-424237581585430344,NULL);
statement ok
INSERT INTO t1 VALUES(-214362279664766533,NULL);
statement ok
INSERT INTO t1 VALUES(-71301914094672848,NULL);
statement ok
INSERT INTO t1 VALUES(85486376371946746,NULL);
statement ok
INSERT INTO t1 VALUES(88239714065746993,NULL);
statement ok
INSERT INTO t1 VALUES(587212336705139504,NULL);
statement ok
INSERT INTO t1 VALUES(672222439154311688,NULL);
statement ok
INSERT INTO t1 VALUES(831201880315087268,NULL);
statement ok
INSERT INTO t1 VALUES(995204053540447006,NULL);
statement ok
INSERT INTO t1 VALUES(1246914698489704287,NULL);
statement ok
INSERT INTO t1 VALUES(1546231510864932275,NULL);
statement ok
INSERT INTO t1 VALUES(1791765016181687769,NULL);
statement ok
INSERT INTO t1 VALUES(1799302827895858725,NULL);
statement ok
INSERT INTO t1 VALUES(2026591599286391832,NULL);
statement ok
INSERT INTO t1 VALUES(2195119737828970803,NULL);
statement ok
INSERT INTO t1 VALUES(2342493223442167775,NULL);
statement ok
INSERT INTO t1 VALUES(2453343748991321803,NULL);
statement ok
INSERT INTO t1 VALUES(2499109626526694126,NULL);
statement ok
INSERT INTO t1 VALUES(2753988324592681474,NULL);
statement ok
INSERT INTO t1 VALUES(2810878285747130284,NULL);
statement ok
INSERT INTO t1 VALUES(2848885963459816804,NULL);
statement ok
INSERT INTO t1 VALUES(2915647809434477614,NULL);
statement ok
INSERT INTO t1 VALUES(3475034101394730335,NULL);
statement ok
INSERT INTO t1 VALUES(3626542162137919338,NULL);
statement ok
INSERT INTO t1 VALUES(3877673001272535186,NULL);
statement ok
INSERT INTO t1 VALUES(4007330825134180665,NULL);
statement ok
INSERT INTO t1 VALUES(4077358421272316858,NULL);
statement ok
INSERT INTO t1 VALUES(4690678276679226532,NULL);
statement ok
INSERT INTO t1 VALUES(4866304904348119643,NULL);
statement ok
INSERT INTO t1 VALUES(5214401850561094529,NULL);
statement ok
INSERT INTO t1 VALUES(5272799208960207736,NULL);
statement ok
INSERT INTO t1 VALUES(5530918740051863299,NULL);
statement ok
INSERT INTO t1 VALUES(5569314186296520615,NULL);
statement ok
INSERT INTO t1 VALUES(5740904173463435848,NULL);
statement ok
INSERT INTO t1 VALUES(5849452934504718062,NULL);
statement ok
INSERT INTO t1 VALUES(6218815181136940951,NULL);
statement ok
INSERT INTO t1 VALUES(6275945720557189700,NULL);
statement ok
INSERT INTO t1 VALUES(6279008355318181000,NULL);
statement ok
INSERT INTO t1 VALUES(7017987158241964732,NULL);
statement ok
INSERT INTO t1 VALUES(7237035290160030660,NULL);
statement ok
INSERT INTO t1 VALUES(7374688146326987272,NULL);
statement ok
INSERT INTO t1 VALUES(7612353589185494102,NULL);
statement ok
INSERT INTO t1 VALUES(7958180433948844465,NULL);
statement ok
INSERT INTO t1 VALUES(8093404925372580611,NULL);
statement ok
INSERT INTO t1 VALUES(8165972772169640480,NULL);
statement ok
INSERT INTO t1 VALUES(8531143325322891078,NULL);
statement ok
INSERT INTO t1 VALUES(8658728983219000078,NULL);
statement ok
INSERT INTO t1 VALUES(8730638167239698291,NULL);
statement ok
INSERT INTO t1 VALUES(8757751876611013998,NULL);
statement ok
INSERT INTO t1 VALUES(8994059213096666367,NULL);
statement ok
INSERT INTO t1 VALUES(9034558451786630908,NULL);
statement ok
INSERT INTO t1 VALUES(9049770455330813268,NULL);
statement ok
INSERT INTO t1 VALUES(9196517019233481682,NULL);
query IIII
SELECT c0, sum(c1), min(c1), max(c1) FROM t1 GROUP BY c0 ORDER BY 1, 2, 3, 4
----
NULL NULL NULL NULL
-9121942514766415310 NULL NULL NULL
-9113483941634330359 NULL NULL NULL
-8718457747090493475 NULL NULL NULL
-7650527153348320600 NULL NULL NULL
-7511073704802549520 NULL NULL NULL
-7342137292157212364 NULL NULL NULL
-7003121677824953185 NULL NULL NULL
-6971852266038069200 NULL NULL NULL
-6873545755554765972 NULL NULL NULL
-6355311124878824053 NULL NULL NULL
-6350463272352412486 NULL NULL NULL
-5908442705000090253 NULL NULL NULL
-5897662788702027960 NULL NULL NULL
-5877879044803815845 NULL NULL NULL
-5732980609151508408 NULL NULL NULL
-5361272612100082873 NULL NULL NULL
-5336571579832669145 NULL NULL NULL
-4928993529687100359 NULL NULL NULL
-4468905900574568755 NULL NULL NULL
-4170492860397664351 NULL NULL NULL
-3684174996218175685 NULL NULL NULL
-3550425917959859111 NULL NULL NULL
-3538537641982313134 NULL NULL NULL
-3509778083052175642 NULL NULL NULL
-3297429447844697659 NULL NULL NULL
-3285304895013369375 NULL NULL NULL
-2783073089603195828 NULL NULL NULL
-2422155131602272083 NULL NULL NULL
-2411133157184452856 NULL NULL NULL
-2353272908390735004 NULL NULL NULL
-2242558770815087701 NULL NULL NULL
-1554405226393925625 NULL NULL NULL
-1337520990873830579 NULL NULL NULL
-1217288122333132479 NULL NULL NULL
-829779308050048379 NULL NULL NULL
-783860634233596188 NULL NULL NULL
-750940733896551510 NULL NULL NULL
-595923232719547231 NULL NULL NULL
-542467477806120649 NULL NULL NULL
-424237581585430344 NULL NULL NULL
-214362279664766533 NULL NULL NULL
-71301914094672848 NULL NULL NULL
85486376371946746 NULL NULL NULL
88239714065746993 NULL NULL NULL
587212336705139504 NULL NULL NULL
672222439154311688 NULL NULL NULL
831201880315087268 NULL NULL NULL
995204053540447006 NULL NULL NULL
1246914698489704287 NULL NULL NULL
1546231510864932275 NULL NULL NULL
1791765016181687769 NULL NULL NULL
1799302827895858725 NULL NULL NULL
2026591599286391832 NULL NULL NULL
2195119737828970803 NULL NULL NULL
2342493223442167775 NULL NULL NULL
2453343748991321803 NULL NULL NULL
2499109626526694126 NULL NULL NULL
2753988324592681474 NULL NULL NULL
2810878285747130284 NULL NULL NULL
2848885963459816804 NULL NULL NULL
2915647809434477614 NULL NULL NULL
3475034101394730335 NULL NULL NULL
3626542162137919338 NULL NULL NULL
3877673001272535186 NULL NULL NULL
4007330825134180665 NULL NULL NULL
4077358421272316858 NULL NULL NULL
4690678276679226532 NULL NULL NULL
4866304904348119643 NULL NULL NULL
5214401850561094529 NULL NULL NULL
5272799208960207736 NULL NULL NULL
5530918740051863299 NULL NULL NULL
5569314186296520615 NULL NULL NULL
5740904173463435848 NULL NULL NULL
5849452934504718062 NULL NULL NULL
6218815181136940951 NULL NULL NULL
6275945720557189700 NULL NULL NULL
6279008355318181000 NULL NULL NULL
7017987158241964732 NULL NULL NULL
7237035290160030660 NULL NULL NULL
7374688146326987272 NULL NULL NULL
7612353589185494102 NULL NULL NULL
7958180433948844465 NULL NULL NULL
8093404925372580611 NULL NULL NULL
8165972772169640480 NULL NULL NULL
8531143325322891078 NULL NULL NULL
8658728983219000078 NULL NULL NULL
8730638167239698291 NULL NULL NULL
8757751876611013998 NULL NULL NULL
8994059213096666367 NULL NULL NULL
9034558451786630908 NULL NULL NULL
9049770455330813268 NULL NULL NULL
9196517019233481682 NULL NULL NULL

View File

@@ -0,0 +1,96 @@
# name: test/sql/aggregate/aggregates/test_order_by_aggregate.test
# description: Test ORDER BY in AGGREGATE clause
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(grp INTEGER, i INTEGER);
statement ok
INSERT INTO integers VALUES (1, 10), (2, 15), (1, 30), (2, 20)
query I
SELECT FIRST(i ORDER BY i) FROM integers
----
10
# ordering by the same aggregate multiple times has no effect
query I
SELECT FIRST(i ORDER BY i, i, i) FROM integers
----
10
query I
SELECT FIRST(i ORDER BY i, i DESC, i) FROM integers
----
10
query I
SELECT FIRST(i ORDER BY i DESC) FROM integers
----
30
query I
SELECT FIRST(i ORDER BY i DESC, i ASC) FROM integers
----
30
query II
SELECT FIRST(i ORDER BY i), FIRST(i ORDER BY i DESC) FROM integers
----
10 30
query II
SELECT grp, FIRST(i ORDER BY i) FROM integers GROUP BY grp ORDER BY ALL
----
1 10
2 15
# ordering by the group has no effect
query II
SELECT grp, FIRST(i ORDER BY grp, i, grp DESC, i DESC) FROM integers GROUP BY grp ORDER BY ALL
----
1 10
2 15
query II
SELECT grp, FIRST(i ORDER BY i DESC) FROM integers GROUP BY grp ORDER BY ALL
----
1 30
2 20
statement ok
CREATE TABLE user_causes (
user_id INT,
cause VARCHAR,
"date" DATE
);
statement ok
INSERT INTO user_causes (user_id, cause, "date") VALUES
(1, 'Environmental', '2024-03-18'),
(1, 'Environmental', '2024-02-18'),
(1, 'Health', '2024-01-18'),
(1, 'Social', '2023-12-18'),
(1, NULL, '2023-11-19');
statement error
SELECT
user_id,
list(DISTINCT cause ORDER BY "date" DESC) FILTER(cause IS NOT NULL) AS causes
FROM user_causes
GROUP BY user_id;
----
Binder Error: In a DISTINCT aggregate, ORDER BY expressions must appear in the argument list
query II
SELECT
user_id,
list(DISTINCT cause ORDER BY cause DESC) FILTER(cause IS NOT NULL) AS causes
FROM user_causes
GROUP BY user_id;
----
1 [Social, Health, Environmental]

View File

@@ -0,0 +1,196 @@
# name: test/sql/aggregate/aggregates/test_ordered_aggregates.test
# description: Test rewrites of ordered aggregates
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE flights(
"year" INTEGER,
"month" INTEGER,
"day" INTEGER,
dep_time INTEGER,
sched_dep_time INTEGER,
dep_delay DOUBLE,
arr_time INTEGER,
sched_arr_time INTEGER,
arr_delay DOUBLE,
carrier VARCHAR,
flight INTEGER,
tailnum VARCHAR,
origin VARCHAR,
dest VARCHAR,
air_time DOUBLE,
distance DOUBLE,
"hour" DOUBLE,
"minute" DOUBLE,
time_hour TIMESTAMP);
statement ok
SELECT "dest", mode() WITHIN GROUP (ORDER BY "arr_delay") AS "median_delay"
FROM "flights"
GROUP BY "dest"
statement ok
SELECT "dest", percentile_cont(0.5) WITHIN GROUP (ORDER BY "arr_delay") AS "median_delay"
FROM "flights"
GROUP BY "dest"
statement ok
SELECT "dest", percentile_cont([0.25, 0.5, 0.75]) WITHIN GROUP (ORDER BY "arr_delay") AS "iqr_delay"
FROM "flights"
GROUP BY "dest"
statement ok
SELECT "dest", percentile_disc(0.5) WITHIN GROUP (ORDER BY "arr_delay") AS "median_delay"
FROM "flights"
GROUP BY "dest"
statement ok
SELECT "dest", percentile_disc([0.25, 0.5, 0.75]) WITHIN GROUP (ORDER BY "arr_delay") AS "iqr_delay"
FROM "flights"
GROUP BY "dest"
# ORDER BY DESC
query I
select percentile_disc(0.25) within group(order by i desc) from generate_series(0,100) tbl(i);
----
75
query I
select percentile_disc([0.25, 0.5, 0.75]) within group(order by i desc) from generate_series(0,100) tbl(i);
----
[75, 50, 25]
query I
select percentile_cont(0.25) within group(order by i desc) from generate_series(0,100) tbl(i);
----
75.000000
query I
select percentile_cont([0.25, 0.5, 0.75]) within group(order by i desc) from generate_series(0,100) tbl(i);
----
[75.0, 50.0, 25.0]
# DESC boundary condition
query I
SELECT percentile_disc(.5) WITHIN GROUP (order by col desc)
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
2900
query I
SELECT percentile_disc([.25, .5, .75]) WITHIN GROUP (order by col desc)
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
[3100, 2900, 2600]
# MODE is order-sensitive
query I
SELECT MODE() WITHIN GROUP (ORDER BY order_occurrences DESC) FROM (
VALUES
(500, 1),
(1000, 2),
(800, 3),
(1000, 4),
(500, 5),
(550, 6),
(400, 7),
(200, 8),
(10, 9)
) items_per_order(order_occurrences, item_count);
----
1000
#
# Error checking
#
# Cannot use multiple ORDER BY clauses with WITHIN GROUP
statement error
SELECT "dest", mode() WITHIN GROUP (ORDER BY "arr_delay", "arr_time") AS "median_delay"
FROM "flights"
GROUP BY "dest"
----
# Unknown ordered aggregate "duck".
statement error
SELECT "dest", duck(0.5) WITHIN GROUP (ORDER BY "arr_delay") AS "duck_delay"
FROM "flights"
GROUP BY "dest"
----
# Wrong number of arguments for PERCENTILE_DISC
statement error
select percentile_disc() within group(order by i) from generate_series(0,100) tbl(i);
----
statement error
select percentile_disc(0.25, 0.5) within group(order by i) from generate_series(0,100) tbl(i);
----
# Wrong number of arguments for PERCENTILE_CONT
statement error
select percentile_cont() within group(order by i) from generate_series(0,100) tbl(i);
----
statement error
select percentile_cont(0.25, 0.5) within group(order by i) from generate_series(0,100) tbl(i);
----
# NaN is not allowed
statement error
SELECT percentile_disc(CAST('NaN' AS REAL)) WITHIN GROUP (ORDER BY 1);
----
# Empty list is also not allowed
statement error
SELECT percentile_disc([]) WITHIN GROUP (ORDER BY LAST);
----
# Wrong number of arguments for MODE
statement error
select mode(0.25) within group(order by i) from generate_series(0,100) tbl(i);
----
# No function matches the given name and argument types 'quantile_disc(BIGINT, VARCHAR)'
statement error
select percentile_disc('duck') within group(order by i) from generate_series(0,100) tbl(i);
----
# No function matches the given name and argument types 'quantile_cont(BIGINT, VARCHAR)'
statement error
select percentile_cont('duck') within group(order by i) from generate_series(0,100) tbl(i);
----
# aggregate function calls cannot be nested
statement error
SELECT percentile_disc(sum(1)) WITHIN GROUP (ORDER BY 1 DESC);
----
# NULL fractions with DESC should not be inverted
statement error
SELECT percentile_disc(strftime(DATE '1-11-25',NULL)) WITHIN GROUP (ORDER BY 1 DESC);
----
statement error
SELECT percentile_cont(CASE 1 WHEN 2 THEN 3 END) WITHIN GROUP (ORDER BY 1 DESC);
----
# Negative fractions are not allowed
foreach sense ASC DESC
statement error
SELECT percentile_disc(-.5) WITHIN GROUP (order by col ${sense})
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
statement error
SELECT percentile_disc([-.25, .5, .75]) WITHIN GROUP (order by col ${sense})
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
endloop

View File

@@ -0,0 +1,189 @@
# name: test/sql/aggregate/aggregates/test_perfect_ht.test
# description: Test aggregates that can trigger a perfect HT
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA perfect_ht_threshold=20;
foreach type INTEGER UINTEGER BIGINT UBIGINT
foreach optimizer 'compressed_materialization' ''
# we run these tests with and without compressed materialization to make sure we cover all types
statement ok
SET disabled_optimizers to ${optimizer}
statement ok
CREATE OR REPLACE TABLE timeseries(year ${type}, val ${type});
statement ok
INSERT INTO timeseries VALUES (1996, 10), (1997, 12), (1996, 20), (2001, 30), (NULL, 1), (1996, NULL);
query IIII
SELECT year, SUM(val), COUNT(val), COUNT(*) FROM timeseries GROUP BY year ORDER BY year;
----
NULL 1 1 1
1996 30 2 3
1997 12 1 1
2001 30 1 1
# use aggregates with destructors
query III
SELECT year, LIST(val), STRING_AGG(val::VARCHAR, ',') FROM timeseries GROUP BY year ORDER BY year;
----
NULL [1] 1
1996 [10, 20, NULL] 10,20
1997 [12] 12
2001 [30] 30
endloop
endloop
# many small columns each having only the values 0 and 1
# total possible combinations is 2^10, but there are only 2 groups
statement ok
create table manycolumns as select i a, i b, i c, i d, i e from range(0,2) tbl(i);
query IIIII rowsort
select a, b, c, d, e FROM manycolumns GROUP BY 1, 2, 3, 4, 5
----
0 0 0 0 0
1 1 1 1 1
# test edge cases: multiple tinyints without statistics
# create a table of tinyints [-127, 128] stored as varchar
# by forcing a varchar to tinyint cast we lose statistics
statement ok
CREATE TABLE tinyints AS SELECT i::TINYINT::VARCHAR AS t FROM range(-127, 128) tbl(i);
query IIII
SELECT COUNT(DISTINCT i), MIN(i), MAX(i), SUM(i) / COUNT(i) FROM (SELECT t::TINYINT t1 FROM tinyints GROUP BY t1) tbl(i)
----
255 -127 127 0
# now do the same with a single smallint column
statement ok
CREATE TABLE smallints AS SELECT i::SMALLINT::VARCHAR AS t FROM range(-32767, 32768) tbl(i);
query IIII
SELECT COUNT(DISTINCT i), MIN(i), MAX(i), SUM(i) / COUNT(i) FROM (SELECT t::SMALLINT t1 FROM smallints GROUP BY t1) tbl(i)
----
65535 -32767 32767 0
# test result ordering of perfect HT
statement ok
PRAGMA disable_verification
statement ok
create table dates as select date '1992-01-01' + concat(i, ' months')::interval as d from range(100) tbl(i);
query II
select extract(year from d), extract(month from d) from dates group by 1, 2 ORDER BY ALL;
----
1992 1
1992 2
1992 3
1992 4
1992 5
1992 6
1992 7
1992 8
1992 9
1992 10
1992 11
1992 12
1993 1
1993 2
1993 3
1993 4
1993 5
1993 6
1993 7
1993 8
1993 9
1993 10
1993 11
1993 12
1994 1
1994 2
1994 3
1994 4
1994 5
1994 6
1994 7
1994 8
1994 9
1994 10
1994 11
1994 12
1995 1
1995 2
1995 3
1995 4
1995 5
1995 6
1995 7
1995 8
1995 9
1995 10
1995 11
1995 12
1996 1
1996 2
1996 3
1996 4
1996 5
1996 6
1996 7
1996 8
1996 9
1996 10
1996 11
1996 12
1997 1
1997 2
1997 3
1997 4
1997 5
1997 6
1997 7
1997 8
1997 9
1997 10
1997 11
1997 12
1998 1
1998 2
1998 3
1998 4
1998 5
1998 6
1998 7
1998 8
1998 9
1998 10
1998 11
1998 12
1999 1
1999 2
1999 3
1999 4
1999 5
1999 6
1999 7
1999 8
1999 9
1999 10
1999 11
1999 12
2000 1
2000 2
2000 3
2000 4

View File

@@ -0,0 +1,68 @@
# name: test/sql/aggregate/aggregates/test_product.test
# description: Test Product operator
# group: [aggregates]
#Corner cases
statement error
select product()
----
query I
select product(NULL)
----
NULL
query I
select product(1)
----
1
statement error
select product(*)
----
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2),(4), (NULL)
query I
SELECT product(i) FROM integers
----
8
query I
SELECT PRODUCT(2) FROM range(100);
----
1267650600228229401496703205376
query I
SELECT PRODUCT(2) FROM range(100) tbl(i) WHERE i % 2 != 0;
----
1125899906842624
# Grouped Aggregation
query I
select product(i) from integers group by i%2 order by all
----
1.000000
8.000000
NULL
# Empty Table
query I
SELECT PRODUCT(i) FROM range(100) tbl(i) WHERE 1=0;
----
NULL
# Window Function
query I rowsort
select product(i) over (partition by i%2)
from integers;
----
1
8
8
NULL

View File

@@ -0,0 +1,332 @@
# name: test/sql/aggregate/aggregates/test_quantile_cont.test
# description: Test QUANTILE_CONT aggregate
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# Get around 1% approximately equal for R
statement ok
create table quantile as select range r, random() from range(0,1000000,100) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
statement error
SELECT quantile_cont(r, NULL) FROM quantile
----
Binder Error: QUANTILE argument must not be NULL
query R
SELECT quantile_cont(r, 0.5) FROM quantile
----
499950
query R
SELECT quantile_cont(r::decimal(10,2), 0.5) FROM quantile
----
499950
query R
SELECT quantile_cont(r, 1.0) FROM quantile
----
999900
query R
SELECT quantile_cont(r, 0.0) FROM quantile
----
0
query R
SELECT quantile_cont(NULL, 0.5) FROM quantile
----
NULL
query R
SELECT quantile_cont(42, 0.5) FROM quantile
----
42
query R
SELECT quantile_cont(NULL, 0.5)
----
NULL
query R
SELECT quantile_cont(42, 0.5)
----
42
# single GROUP
query RRR
SELECT quantile_cont(r, 0.25), quantile_cont(r, 0.5), quantile_cont(r, 0.75) from quantile
----
249975
499950
749925
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1) decimal(24,1)
query III
SELECT quantile_cont(d::${type}, 0.25), quantile_cont(d::${type}, 0.5), quantile_cont(d::${type}, 0.75)
FROM range(0,100) tbl(d)
----
24.7 49.5 74.2
endloop
# multiple groups
query RR
SELECT mod(r,1000) as g, quantile_cont(r, 0.25) FROM quantile GROUP BY 1 ORDER BY 1
----
NULL NULL
0 249750
100 249850
200 249950
300 250050
400 250150
500 250250
600 250350
700 250450
800 250550
900 250650
# temporal types
query I
SELECT quantile_cont('2021-01-01'::TIMESTAMP + interval (r) second, 0.5) FROM quantile
----
2021-01-06 18:52:30
query I
SELECT quantile_cont(('1990-01-01'::DATE + interval (r/100) day)::DATE, 0.5) FROM quantile
----
2003-09-09 12:00:00
query I
SELECT quantile_cont('00:00:00'::TIME + interval (r/100) second, 0.5) FROM quantile
----
01:23:19.5
statement error
SELECT quantile_cont(interval (r/100) second, 0.5) FROM quantile
----
<REGEX>:.*Binder Error: No function matches.*'quantile_cont.*
# WITH TIME ZONE
query I
SELECT quantile_cont(('2021-01-01'::TIMESTAMP + interval (r) second)::TIMESTAMPTZ, 0.5) FROM quantile
----
2021-01-06 18:52:30+00
# constant input
query R
SELECT quantile_cont(1, 0.1) FROM quantile
----
1
# Negative/Descending fractions
query I
SELECT quantile_cont(r, -0.1) FROM quantile
----
899910.0
# ORDER BY ... DESC
query II
SELECT
percentile_cont(0.8) WITHIN GROUP (ORDER BY x DESC),
quantile_cont(x, 0.8 ORDER BY x DESC),
FROM
(VALUES (2), (1)) _(x);
----
1.2 1.2
# empty input
query R
SELECT quantile_cont(r, 0.1) FROM quantile WHERE 1=0
----
NULL
statement error
SELECT quantile_cont(r, -1.1) FROM quantile
----
Binder Error: QUANTILE can only take parameters in the range [-1, 1]
statement error
SELECT quantile_cont(r, 1.1) FROM quantile
----
Binder Error: QUANTILE can only take parameters in the range [-1, 1]
statement error
SELECT quantile_cont(r, "string") FROM quantile
----
Binder Error: Referenced column "string" not found in FROM clause
statement error
SELECT quantile_cont(r, NULL) FROM quantile
----
Binder Error: QUANTILE argument must not be NULL
statement error
SELECT quantile_cont(r::string, 0.5) FROM quantile
----
<REGEX>:.*Binder Error: No function matches.*'quantile_cont.*
statement error
SELECT quantile_cont(r) FROM quantile
----
<REGEX>:.*Binder Error: No function matches.*'quantile_cont.*
statement error
SELECT quantile_cont(r, 0.1, 50) FROM quantile
----
<REGEX>:.*Binder Error: No function matches.*'quantile_cont.*
statement ok
pragma threads=4
statement ok
PRAGMA verify_parallelism
# single GROUP
query RRR
SELECT quantile_cont(r, 0.25), quantile_cont(r, 0.5), quantile_cont(r, 0.75) from quantile
----
249975
499950
749925
# multiple groups
query RR
SELECT mod(r,1000) as g, quantile_cont(r, 0.25) FROM quantile GROUP BY 1 ORDER BY 1
----
NULL NULL
0 249750
100 249850
200 249950
300 250050
400 250150
500 250250
600 250350
700 250450
800 250550
900 250650
# constant input
query R
SELECT quantile_cont(1, 0.1) FROM quantile
----
1
# empty input
query R
SELECT quantile_cont(r, 0.1) FROM quantile WHERE 1=0
----
NULL
# TINYINT extremes
query I
SELECT quantile_cont(t, 0.5) FROM (VALUES (120::TINYINT), (122::TINYINT)) tbl(t)
----
121
statement ok
CREATE TABLE tinyints(t TINYINT);
statement ok
INSERT INTO tinyints VALUES (-127), (-127);
query I
SELECT quantile_cont(t, 0.5) FROM tinyints;
----
-127
statement ok
UPDATE tinyints SET t=-t;
query I
SELECT quantile_cont(t, 0.5) FROM tinyints;
----
127
# SMALLINT extremes
query I
SELECT quantile_cont(t, 0.5) FROM (VALUES (32764::SMALLINT), (32766::SMALLINT)) tbl(t)
----
32765
statement ok
CREATE TABLE smallints(t SMALLINT);
statement ok
INSERT INTO smallints VALUES (-32767), (-32767);
query I
SELECT quantile_cont(t, 0.5) FROM smallints;
----
-32767
statement ok
UPDATE smallints SET t=-t;
query I
SELECT quantile_cont(t, 0.5) FROM smallints;
----
32767
# INTEGER extremes
query I
SELECT quantile_cont(t, 0.5) FROM (VALUES (2147483642::INTEGER), (2147483644::INTEGER)) tbl(t)
----
2147483643
statement ok
CREATE TABLE integers(t INTEGER);
statement ok
INSERT INTO integers VALUES (-2147483647), (-2147483647);
query I
SELECT quantile_cont(t, 0.5) FROM integers;
----
-2147483647
statement ok
UPDATE integers SET t=-t;
query I
SELECT quantile_cont(t, 0.5) FROM integers;
----
2147483647
# BIGINT extremes
query I
SELECT quantile_cont(t, 0.5) FROM (VALUES (9223372036854775794::BIGINT), (9223372036854775796::BIGINT)) tbl(t)
----
9223372036854775795
statement ok
CREATE TABLE bigints(t BIGINT);
statement ok
INSERT INTO bigints VALUES (-9223372036854775800), (-9223372036854775800);
query I
SELECT quantile_cont(t, 0.5) FROM bigints;
----
-9223372036854775800
statement ok
UPDATE bigints SET t=-t;
query I
SELECT quantile_cont(t, 0.5) FROM bigints;
----
9223372036854775800
statement error
SELECT quantile_cont(r, random()) FROM quantile
----
Binder Error: QUANTILE can only take constant parameters

View File

@@ -0,0 +1,164 @@
# name: test/sql/aggregate/aggregates/test_quantile_cont_list.test
# description: Test QUANTILE_CONT operator with LIST parameters
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# scalar quantiles
statement ok
create table quantiles as select range r, random() FROM range(0,1000000,100) union all values (NULL, 0.25), (NULL, 0.5), (NULL, 0.75) order by 2;
# temporal types
query I
SELECT quantile_cont('2021-01-01'::TIMESTAMP + interval (r/100) hour, [0.25, 0.5, 0.75]) FROM quantiles
----
['2021-04-15 03:45:00', '2021-07-28 07:30:00', '2021-11-09 11:15:00']
query I
SELECT quantile_cont('1990-01-01'::DATE + interval (r/100) day, [0.25, 0.5, 0.75]) FROM quantiles
----
['1996-11-04 18:00:00', '2003-09-09 12:00:00', '2010-07-14 06:00:00']
query I
SELECT quantile_cont('00:00:00'::TIME + interval (r/100) second, [0.25, 0.5, 0.75]) FROM quantiles
----
['00:41:39.75', '01:23:19.5', '02:04:59.25']
# WITH TIME ZONE
query I
SELECT quantile_cont(('2021-01-01'::TIMESTAMP + interval (r/100) hour)::TIMESTAMPTZ, [0.25, 0.5, 0.75])
FROM quantiles
----
['2021-04-15 03:45:00+00', '2021-07-28 07:30:00+00', '2021-11-09 11:15:00+00']
statement error
SELECT quantile_cont(interval (r/100) second, [0.25, 0.5, 0.75]) FROM quantiles
----
# single GROUP
query R
SELECT quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles
----
[249975.0, 499950.0, 749925.0]
# Decimals
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1) decimal(24,1)
query I
SELECT quantile_cont(d::${type}, [0.25, 0.5, 0.75])
FROM range(0,100) tbl(d)
----
[24.7, 49.5, 74.2]
endloop
# multiple groups
query RR
SELECT mod(r,1000) as g, quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles GROUP BY 1 ORDER BY 1
----
NULL NULL
0 [249750.0, 499500.0, 749250.0]
100 [249850.0, 499600.0, 749350.0]
200 [249950.0, 499700.0, 749450.0]
300 [250050.0, 499800.0, 749550.0]
400 [250150.0, 499900.0, 749650.0]
500 [250250.0, 500000.0, 749750.0]
600 [250350.0, 500100.0, 749850.0]
700 [250450.0, 500200.0, 749950.0]
800 [250550.0, 500300.0, 750050.0]
900 [250650.0, 500400.0, 750150.0]
# constant input
query R
SELECT quantile_cont(1, [0.25, 0.5, 0.75]) FROM quantiles
----
[1.0, 1.0, 1.0]
# empty input
query R
SELECT quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles WHERE 1=0
----
NULL
# empty list
query R
SELECT quantile_cont(r, []) FROM quantiles
----
[]
statement ok
pragma threads=4
statement ok
PRAGMA verify_parallelism
# single GROUP
query R
SELECT quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles
----
[249975.0, 499950.0, 749925.0]
# multiple groups
query RR
SELECT mod(r,1000) as g, quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles GROUP BY 1 ORDER BY 1
----
NULL NULL
0 [249750.0, 499500.0, 749250.0]
100 [249850.0, 499600.0, 749350.0]
200 [249950.0, 499700.0, 749450.0]
300 [250050.0, 499800.0, 749550.0]
400 [250150.0, 499900.0, 749650.0]
500 [250250.0, 500000.0, 749750.0]
600 [250350.0, 500100.0, 749850.0]
700 [250450.0, 500200.0, 749950.0]
800 [250550.0, 500300.0, 750050.0]
900 [250650.0, 500400.0, 750150.0]
# constant input
query R
SELECT quantile_cont(1, [0.25, 0.5, 0.75]) FROM quantiles
----
[1.0, 1.0, 1.0]
# empty input
query R
SELECT quantile_cont(r, [0.25, 0.5, 0.75]) FROM quantiles WHERE 1=0
----
NULL
# empty list
query R
SELECT quantile_cont(r, []) FROM quantiles
----
[]
statement error
SELECT quantile_cont(r, [-0.25, 0.5, 0.75]) FROM quantiles
----
statement error
SELECT quantile_cont(r, (0.25, 0.5, 1.1)) FROM quantiles
----
statement error
SELECT quantile_cont(r, [0.25, 0.5, NULL]) FROM quantiles
----
statement error
SELECT quantile_cont(r, ["0.25", "0.5", "0.75"]) FROM quantiles
----
statement error
SELECT quantile_cont(r::string, [0.25, 0.5, 0.75]) FROM quantiles
----
statement error
SELECT quantile_cont(r, [0.25, 0.5, 0.75], 50) FROM quantiles
----

View File

@@ -0,0 +1,292 @@
# name: test/sql/aggregate/aggregates/test_quantile_disc.test
# description: Test QUANTILE_DISC aggregate
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
CREATE TABLE quantile as
SELECT range r, random() AS q
FROM range(10000)
UNION ALL VALUES (NULL, 0.1), (NULL, 0.5), (NULL, 0.9)
ORDER BY 2;
query I
SELECT quantile_disc(r, 0.5) FROM quantile
----
4999
query I
SELECT quantile_disc(r::decimal(10,2), 0.5) FROM quantile
----
4999
query I
SELECT quantile_disc(case when r is null then null else [r] end, 0.5) FROM quantile
----
[4999]
query I
SELECT quantile_disc(case when r is null then null else {'i': r} end, 0.5) FROM quantile
----
{'i': 4999}
query I
SELECT quantile_disc(r, 1.0) FROM quantile
----
9999
query I
SELECT quantile_disc(r, 0.0) FROM quantile
----
0
query I
SELECT quantile_disc(NULL, 0.5) FROM quantile
----
NULL
query I
SELECT quantile_disc(42, 0.5) FROM quantile
----
42
query I
SELECT quantile_disc(NULL, 0.5)
----
NULL
query I
SELECT quantile_disc(42, 0.5)
----
42
# single GROUP
query III
SELECT quantile_disc(r, 0.1), quantile_disc(r, 0.5), quantile_disc(r, 0.9) from quantile
----
999
4999
8999
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1) decimal(24,1)
query III
SELECT quantile_disc(d::${type}, 0.1), quantile_disc(d::${type}, 0.5), quantile_disc(d::${type}, 0.9)
FROM range(0,100) tbl(d)
----
9.0 49.0 89.0
endloop
# Negative quantiles (use descending intervals)
query I
SELECT quantile_disc(col, -0.5)
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
2900
# ORDER BY ... DESC
query II
SELECT
percentile_disc(0.8) WITHIN GROUP (ORDER BY x DESC),
quantile_disc(x, 0.8 ORDER BY x DESC),
FROM
(VALUES (2), (1)) _(x);
----
1.2 1.2
#
# VARCHAR. Remember, this is dictionary ordering, not numeric ordering!
#
query III
SELECT quantile_disc(d::VARCHAR, 0.1), quantile_disc(d::VARCHAR, 0.5), quantile_disc(d::VARCHAR, 0.9)
FROM range(0,100) tbl(d)
----
17 53 9
# A string column that has only NULL values
query I
SELECT quantile_disc(NULL::VARCHAR, 0.1)
FROM range(0,100) tbl(d)
----
NULL
# A string column that has long strings (>12 characters)
query I
SELECT quantile_disc('prefix-' || d::VARCHAR || '-suffix', 0.1)
FROM range(0,100) tbl(d)
----
prefix-17-suffix
# Using median itself on a string (just an alias for quantile_disc)
query I
SELECT median(d::VARCHAR)
FROM range(0,100) tbl(d)
----
53
# Median on an empty table
query I
SELECT median(d::VARCHAR)
FROM range(0,100) tbl(d)
WHERE d > 100
----
NULL
#
# Multiple groups
#
query II
SELECT mod(r,10) as g, quantile_disc(r, 0.1) FROM quantile GROUP BY 1 ORDER BY 1
----
NULL NULL
0 990
1 991
2 992
3 993
4 994
5 995
6 996
7 997
8 998
9 999
# temporal types
query I
SELECT quantile_disc('2021-01-01'::TIMESTAMP + interval (r) hour, 0.5) FROM quantile
----
2021-07-28 07:00:00
query I
SELECT quantile_disc('1990-01-01'::DATE + interval (r) day, 0.5) FROM quantile
----
2003-09-09 00:00:00
query I
SELECT quantile_disc('00:00:00'::TIME + interval (r) second, 0.5) FROM quantile
----
01:23:19
query I
SELECT quantile_disc(interval (r) second, 0.5) FROM quantile
----
01:23:19
# WITH TIME ZONE
query I
SELECT quantile_disc(('2021-01-01'::TIMESTAMP + interval (r) hour)::TIMESTAMPTZ, 0.5) FROM quantile
----
2021-07-28 07:00:00+00
# constant input
query I
SELECT quantile_disc(1, 0.1) FROM quantile
----
1
# empty input
query I
SELECT quantile_disc(r, 0.1) FROM quantile WHERE 1=0
----
NULL
# Invalid usage
statement error
SELECT quantile_disc(r, -1.1) FROM quantile
----
statement error
SELECT quantile_disc(r, 1.1) FROM quantile
----
statement error
SELECT quantile_disc(r, "string") FROM quantile
----
statement error
SELECT quantile_disc(r, NULL) FROM quantile
----
statement error
SELECT quantile_disc(r) FROM quantile
----
statement error
SELECT quantile_disc(r, 0.1, 50) FROM quantile
----
statement error
SELECT quantile_cont(r, q) FROM quantile
----
statement ok
pragma threads=4
statement ok
PRAGMA verify_parallelism
# single GROUP
query III
SELECT quantile_disc(r, 0.1), quantile_disc(r, 0.5), quantile_disc(r, 0.9) from quantile
----
999
4999
8999
# multiple groups
query II
SELECT mod(r,10) as g, quantile_disc(r, 0.1) FROM quantile GROUP BY 1 ORDER BY 1
----
NULL NULL
0 990
1 991
2 992
3 993
4 994
5 995
6 996
7 997
8 998
9 999
# constant input
query I
SELECT quantile_disc(1, 0.1) FROM quantile
----
1
# empty input
query I
SELECT quantile_disc(r, 0.1) FROM quantile WHERE 1=0
----
NULL
# NaNs
foreach fp float double
loop i 0 10
query I
with a as (
select 'NaN'::${fp} as num
union all
select num::${fp} as num from generate_series(1,99) as tbl(num)
union all
select 'NaN'::${fp} as num
)
select quantile_disc(num, 0.9) c1
from a;
----
91.0
endloop
endloop

View File

@@ -0,0 +1,209 @@
# name: test/sql/aggregate/aggregates/test_quantile_disc_list.test
# description: Test QUANTILE_DISC operator with LIST quantiles
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# scalar quantiles
statement ok
create table quantiles as select range r, random() FROM range(10000) union all values (NULL, 0.1), (NULL, 0.5), (NULL, 0.9) order by 2;
# single GROUP
query I
SELECT quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles
----
[999, 4999, 8999]
query I
SELECT quantile_disc(case when r is null then null else [r] end, [0.1, 0.5, 0.9]) FROM quantiles
----
[[999], [4999], [8999]]
query I
SELECT quantile_disc(case when r is null then null else {'i': r} end, [0.1, 0.5, 0.9]) FROM quantiles
----
[{'i': 999}, {'i': 4999}, {'i': 8999}]
foreach type decimal(4,1) decimal(8,1) decimal(12,1) decimal(18,1) decimal(24,1)
query I
SELECT quantile_disc(d::${type}, [0.1, 0.5, 0.9])
FROM range(0,100) tbl(d)
----
[9.0, 49.0, 89.0]
endloop
# Negative quantiles (use descending intervals)
query I
SELECT quantile_disc(col, [-.25, -.5, -.75])
FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col);
----
[3100, 2900, 2600]
# VARCHAR. Remember, this is dictionary ordering, not numeric ordering!
query I
SELECT quantile_disc(d::VARCHAR, [0.1, 0.5, 0.9])
FROM range(0,100) tbl(d)
----
[17, 53, 9]
# multiple groups
query II
SELECT mod(r,10) as g, quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles GROUP BY 1 ORDER BY 1
----
NULL NULL
0 [990, 4990, 8990]
1 [991, 4991, 8991]
2 [992, 4992, 8992]
3 [993, 4993, 8993]
4 [994, 4994, 8994]
5 [995, 4995, 8995]
6 [996, 4996, 8996]
7 [997, 4997, 8997]
8 [998, 4998, 8998]
9 [999, 4999, 8999]
# constant input
query I
SELECT quantile_disc(1, [0.1, 0.5, 0.9]) FROM quantiles
----
[1, 1, 1]
# empty input
query I
SELECT quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles WHERE 1=0
----
NULL
# empty list
query I
SELECT quantile_disc(r, []) FROM quantiles
----
[]
# temporal types
query I
SELECT quantile_disc('2021-01-01'::TIMESTAMP + interval (r) hour, [0.1, 0.5, 0.9]) FROM quantiles
----
['2021-02-11 15:00:00', '2021-07-28 07:00:00', '2022-01-10 23:00:00']
query I
SELECT quantile_disc('1990-01-01'::DATE + interval (r) day, [0.1, 0.5, 0.9]) FROM quantiles
----
['1992-09-26 00:00:00', '2003-09-09 00:00:00', '2014-08-22 00:00:00']
query I
SELECT quantile_disc('00:00:00'::TIME + interval (r) second, [0.1, 0.5, 0.9]) FROM quantiles
----
['00:16:39', '01:23:19', '02:29:59']
query I
SELECT quantile_disc(interval (r) second, [0.1, 0.5, 0.9]) FROM quantiles
----
['00:16:39', '01:23:19', '02:29:59']
# WITH TIME ZONE
query I
SELECT quantile_disc(('2021-01-01'::TIMESTAMP + interval (r) hour)::TIMESTAMPTZ, [0.1, 0.5, 0.9]) FROM quantiles
----
['2021-02-11 15:00:00+00', '2021-07-28 07:00:00+00', '2022-01-10 23:00:00+00']
statement ok
pragma threads=4
statement ok
PRAGMA verify_parallelism
# single GROUP
query I
SELECT quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles
----
[999, 4999, 8999]
# multiple groups
query II
SELECT mod(r,10) as g, quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles GROUP BY 1 ORDER BY 1
----
NULL NULL
0 [990, 4990, 8990]
1 [991, 4991, 8991]
2 [992, 4992, 8992]
3 [993, 4993, 8993]
4 [994, 4994, 8994]
5 [995, 4995, 8995]
6 [996, 4996, 8996]
7 [997, 4997, 8997]
8 [998, 4998, 8998]
9 [999, 4999, 8999]
# constant input
query I
SELECT quantile_disc(1, [0.1, 0.5, 0.9]) FROM quantiles
----
[1, 1, 1]
# empty input
query I
SELECT quantile_disc(r, [0.1, 0.5, 0.9]) FROM quantiles WHERE 1=0
----
NULL
# empty list
query I
SELECT quantile_disc(r, []) FROM quantiles
----
[]
# Oracle boundaries
query I
SELECT quantile_disc(col, [0.1, 0.32, 0.33, 0.34, 0.49, .5, .51, 0.75, 0.9, 0.999, 1])
FROM VALUES (0), (1), (2), (10) AS tab(col);
----
[0, 1, 1, 1, 1, 1, 2, 2, 10, 10, 10]
query I
SELECT quantile_disc(42::UTINYINT, 0.5);
----
42
# Array arguments
query I
SELECT quantile_disc(col, ARRAY_VALUE(0.5, 0.4, 0.1)) AS percentile
FROM VALUES (0), (1), (2), (10) AS tab(col);
----
[1, 1, 0]
# Invalid use
statement error
SELECT quantile_disc(r, [-0.1, 0.5, 0.9]) FROM quantiles
----
Binder Error: QUANTILE parameters must have consistent signs
statement error
SELECT quantile_disc(r, (0.1, 0.5, 1.1)) FROM quantiles
----
<REGEX>:.*Binder Error: No function matches.*'quantile_disc.*
statement error
SELECT quantile_disc(r, [0.1, 0.5, NULL]) FROM quantiles
----
Binder Error: QUANTILE parameter cannot be NULL
statement error
SELECT quantile_disc(r, ["0.1", "0.5", "0.9"]) FROM quantiles
----
Binder Error: Referenced column "0.1" not found in FROM clause
statement error
SELECT quantile_disc(r, [0.1, 0.5, 0.9], 50) FROM quantiles
----
<REGEX>:.*Binder Error: No function matches.*'quantile_disc.*

View File

@@ -0,0 +1,392 @@
# name: test/sql/aggregate/aggregates/test_regression.test
# description: Test Regression Functions
# group: [aggregates]
statement ok
PRAGMA enable_verification
query I
select regr_avgx(NULL,NULL)
----
NULL
query I
select regr_avgx(1,1)
----
1
#Corner cases
statement error
select regr_avgx()
----
statement error
select regr_avgx(*)
----
statement error
select regr_avgy()
----
query I
select regr_avgy(NULL,NULL)
----
NULL
query I
select regr_avgy(1,1)
----
1
statement error
select regr_avgy(*)
----
statement error
select regr_count()
----
query I
select regr_count(NULL,NULL)
----
0
query I
select regr_count(1,1)
----
1
statement error
select regr_count(*)
----
statement error
select regr_slope()
----
query I
select regr_slope(NULL,NULL)
----
NULL
query I
select regr_slope(1,1)
----
NAN
statement error
select regr_slope(*)
----
statement error
select regr_r2()
----
query I
select regr_r2(NULL,NULL)
----
NULL
query I
select regr_r2(1,1)
----
NULL
#Corner cases
statement error
select regr_r2(0, 1e230*i) from range(5) tbl(i);
----
statement error
select regr_r2(1e230*i, i) from range(5) tbl(i);
----
query I
select regr_r2(1e230*i, 0) from range(5) tbl(i);
----
NULL
query I
select regr_r2(0, i) from range(5) tbl(i);
----
1
statement error
select regr_r2(*)
----
statement error
select regr_sxx()
----
statement error
select regr_sxx(0, 2e230*i) from range(5) tbl(i)
----
query I
select regr_sxx(2e230*i, 0) from range(5) tbl(i)
----
0
query I
select regr_sxx(NULL,NULL)
----
NULL
query I
select regr_sxx(1,1)
----
0
statement error
select regr_syy()
----
query I
select regr_syy(NULL,NULL)
----
NULL
query I
select regr_syy(1,1)
----
0
statement error
select regr_sxy(*)
----
statement error
select regr_sxy()
----
query I
select regr_sxy(NULL,NULL)
----
NULL
query I
select regr_sxy(1,1)
----
0
statement error
select regr_syy(*)
----
statement error
select regr_intercept()
----
query I
select regr_intercept(NULL,NULL)
----
NULL
query I
select regr_intercept(1,1)
----
NULL
statement error
select regr_intercept(*)
----
statement ok
create table aggr(k int, v decimal(10,2), v2 decimal(10, 2));
statement ok
insert into aggr values(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35);
query II
select k, regr_avgx(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 22.666667
query II
select k, regr_avgy(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 20
query III
select k, count(*), regr_count(v, v2) from aggr group by k ORDER BY ALL;
----
1 1 0
2 4 3
query II
select k, regr_slope(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 0.831409
query II
select k, regr_r2(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 0.997691
query II
select k, regr_sxx(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 288.666667
query II
select k, regr_syy(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 200.000000
query II
select k, regr_sxy(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 240.000000
query II
select k, regr_intercept(v, v2) from aggr group by k ORDER BY ALL;
----
1 NULL
2 1.154734
query I
select regr_avgx(v, v2) from aggr ;
----
22.666667
query I
select regr_avgy(v, v2) from aggr ;
----
20
query I
select regr_count(v, v2) from aggr ;
----
3
query I
select regr_slope(v, v2) from aggr ;
----
0.831409
query I
select regr_r2(v, v2) from aggr ;
----
0.997691
query I
select regr_sxx(v, v2) from aggr ;
----
288.666667
query I
select regr_syy(v, v2) from aggr ;
----
200.000000
query I
select regr_sxy(v, v2) from aggr;
----
240.000000
query I
select regr_intercept(v, v2) from aggr;
----
1.154734
# Window Function
query I rowsort
select regr_avgx(v, v2) over (partition by k)
from aggr;
----
22.666667
22.666667
22.666667
22.666667
NULL
query I rowsort
select regr_avgy(v, v2) over (partition by k)
from aggr;
----
20.000000
20.000000
20.000000
20.000000
NULL
query I rowsort
select regr_count(v, v2) over (partition by k)
from aggr;
----
0
3
3
3
3
query I rowsort
select regr_slope(v, v2) over (partition by k)
from aggr;
----
0.831409
0.831409
0.831409
0.831409
NULL
query I rowsort
select regr_r2(v, v2) over (partition by k)
from aggr;
----
0.997691
0.997691
0.997691
0.997691
NULL
query I rowsort
select regr_sxx(v, v2) over (partition by k)
from aggr;
----
288.666667
288.666667
288.666667
288.666667
NULL
query I rowsort
select regr_syy(v, v2) over (partition by k)
from aggr;
----
200.000000
200.000000
200.000000
200.000000
NULL
query I rowsort
select regr_sxy(v, v2) over (partition by k)
from aggr;
----
240.000000
240.000000
240.000000
240.000000
NULL
query I rowsort
select regr_intercept(v, v2) over (partition by k)
from aggr;
----
1.154734
1.154734
1.154734
1.154734
NULL

View File

@@ -0,0 +1,77 @@
# name: test/sql/aggregate/aggregates/test_scalar_aggr.test
# description: Test aggregates with scalar inputs
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
# test aggregate on scalar values
query IIIIIIT
SELECT COUNT(1), MIN(1), FIRST(1), LAST(1),MAX(1), SUM(1), STRING_AGG('hello', ',')
----
1
1
1
1
1
1
hello
# test aggregate on scalar NULLs
query IIIIIIT
SELECT COUNT(NULL), MIN(NULL), FIRST(NULL), LAST(NULL), MAX(NULL), SUM(NULL), STRING_AGG(NULL, NULL)
----
0
NULL
NULL
NULL
NULL
NULL
NULL
query I
SELECT FIRST(NULL)
----
NULL
query I
SELECT LAST(NULL)
----
NULL
query III
SELECT NULL as a, NULL as b, 1 as id UNION SELECT CAST('00:00:00' AS TIME) as a, CAST('12:34:56' AS TIME) as b, 2 as id ORDER BY 1
----
NULL NULL 1
00:00:00 12:34:56 2
# test aggregates on a set of values with scalar inputs
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers VALUES (1), (2), (NULL)
query IIIIIIT
SELECT COUNT(1), MIN(1), FIRST(1), LAST(1), MAX(1), SUM(1), STRING_AGG('hello', ',') FROM integers
----
3
1
1
1
1
3
hello,hello,hello
# test aggregates on a set of values with scalar NULL values as inputs
query IIIIIIT
SELECT COUNT(NULL), MIN(NULL), FIRST(NULL), LAST(NULL), MAX(NULL), SUM(NULL), STRING_AGG(NULL, NULL) FROM integers
----
0
NULL
NULL
NULL
NULL
NULL
NULL

View File

@@ -0,0 +1,55 @@
# name: test/sql/aggregate/aggregates/test_sem.test
# description: Test SEM operator
# group: [aggregates]
statement ok
PRAGMA enable_verification
#Corner cases
statement error
select sem()
----
query I
select sem(NULL)
----
NULL
query I
select sem(1)
----
0.000000
statement error
select sem(*)
----
statement ok
create table aggr(k int, v decimal(10,2), v2 decimal(10, 2));
statement ok
insert into aggr values(1, 10, null),(2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35);
query III
select k, sem(v),sem(v2) from aggr group by k ORDER BY ALL;
----
1 0.000000 NULL
2 3.697550 5.663398
query II
select sem(v),sem(v2) from aggr
----
3.577709 5.663398
# Window Function
query II
select k, sem(v) over (partition by k)
from aggr
order by all
----
1 0.0
2 3.6975498644372604
2 3.6975498644372604
2 3.6975498644372604
2 3.6975498644372604

View File

@@ -0,0 +1,81 @@
# name: test/sql/aggregate/aggregates/test_simple_filter.test
# description: Test simple aggregate filtering
# group: [aggregates]
statement ok
PRAGMA enable_verification
query III
SELECT
count(*) as total_rows,
count(*) FILTER (WHERE i <= 5) as lte_five,
count(*) FILTER (WHERE i % 2 = 1) as odds
FROM generate_series(1,11) tbl(i)
----
11 5 6
query III
SELECT
count(*) FILTER (WHERE i % 2 = 1) as odds,
count(*) FILTER (WHERE i <= 5) as lte_five,
count(*) as total_rows
FROM generate_series(1,11) tbl(i)
----
6 5 11
query III
SELECT
count(*) FILTER (WHERE i <= 5) as lte_five,
count(*) FILTER (WHERE i % 2 = 1) as odds,
count(*) as total_rows
FROM generate_series(1,11) tbl(i)
----
5 6 11
# Test filtered aggregates with arguments
statement ok
CREATE TABLE issue3105(gender VARCHAR, pay FLOAT);
statement ok
INSERT INTO issue3105 VALUES
('male', 100),
('male', 200),
('male', 300),
('female', 150),
('female', 250);
query III
SELECT
SUM(pay) FILTER (WHERE gender = 'male'),
SUM(pay) FILTER (WHERE gender = 'female'),
SUM(pay)
FROM issue3105;
----
600.000000 400.000000 1000.000000
query III
SELECT
SUM(pay),
SUM(pay) FILTER (WHERE gender = 'male'),
SUM(pay) FILTER (WHERE gender = 'female')
FROM issue3105;
----
1000.000000 600.000000 400.000000
query III
SELECT
SUM(pay) FILTER (WHERE gender = 'male'),
SUM(pay),
SUM(pay) FILTER (WHERE gender = 'female')
FROM issue3105;
----
600.000000 1000.000000 400.000000
query III
SELECT
SUM(pay) FILTER (gender = 'male'),
SUM(pay),
SUM(pay) FILTER (gender = 'female')
FROM issue3105;
----
600.000000 1000.000000 400.000000

View File

@@ -0,0 +1,89 @@
# name: test/sql/aggregate/aggregates/test_skewness.test
# description: Test skewness aggregate
# group: [aggregates]
statement ok
PRAGMA enable_verification
#Corner cases
statement error
select skewness()
----
query I
select skewness(NULL)
----
NULL
query I
select skewness(1)
----
NULL
statement error
select skewness(*)
----
# Constant Value
query I
select skewness (10) from range (5)
----
NAN
#Empty Table
query I
select skewness (10) from range (5) where 1 == 0
----
NULL
# out of range
statement error
select skewness(i) from (values (-2e307), (0), (2e307)) tbl(i)
----
statement ok
create table aggr(k int, v decimal(10,2), v2 decimal(10, 2));
statement ok
insert into aggr values
(1, 10, null),
(2, 10, 11),
(2, 10, 15),
(2, 10, 18),
(2, 20, 22),
(2, 20, 25),
(2, 25, null),
(2, 30, 35),
(2, 30, 40),
(2, 30, 50),
(2, 30, 51);
query III
select skewness(k), skewness(v), skewness(v2) from aggr
----
-3.316625 -0.163444 0.365401
query I
select skewness(v2) from aggr group by v ORDER BY ALL
----
-0.423273
-0.330141
NULL
NULL
# Window Function
query I
select skewness(v2) over (partition by v)
from aggr order by v;
----
-0.423273
-0.423273
-0.423273
-0.423273
NULL
NULL
NULL
-0.330141
-0.330141
-0.330141
-0.330141

View File

@@ -0,0 +1,268 @@
# name: test/sql/aggregate/aggregates/test_state_export.test
# description: Test the state export functionality
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
create table dummy as select range % 10 g, range d from range(100);
# ungrouped aggr
query IIIIII nosort res0
SELECT count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM dummy;
----
query IIIIII nosort res0
SELECT finalize(count(*) EXPORT_STATE), finalize(count(d) EXPORT_STATE), finalize(sum(d) EXPORT_STATE), finalize(avg(d) EXPORT_STATE)::integer, finalize(min(d) EXPORT_STATE), finalize(max(d) EXPORT_STATE) FROM dummy;
----
# grouped aggr
query IIIIIII nosort res1
SELECT g, count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM dummy GROUP BY g ORDER BY g;
----
query IIIIIII nosort res1
SELECT g, finalize(count(*) EXPORT_STATE), finalize(count(d) EXPORT_STATE), finalize(sum(d) EXPORT_STATE), finalize(avg(d) EXPORT_STATE)::integer, finalize(min(d) EXPORT_STATE), finalize(max(d) EXPORT_STATE) FROM dummy GROUP BY g ORDER BY g;
----
# we can persist this
statement ok
CREATE TABLE state AS SELECT g, count(*) EXPORT_STATE count_star_state, count(d) EXPORT_STATE count_state, sum(d) EXPORT_STATE sum_state, avg(d) EXPORT_STATE avg_state, min(d) EXPORT_STATE min_state, max(d) EXPORT_STATE max_state FROM dummy GROUP BY g ORDER BY g;
query IIIIIII nosort res1
SELECT g, finalize(count_star_state),finalize(count_state), finalize(sum_state), finalize(avg_state)::integer, finalize(min_state), finalize(max_state) FROM state ORDER BY g;
----
query II nosort res2
SELECT sum(d)*2 FROM dummy;
----
query II nosort res2
SELECT FINALIZE(COMBINE(SUM(d) EXPORT_STATE, SUM(d) EXPORT_STATE)) FROM dummy;
----
query II nosort res3
SELECT g, sum(d)*2 combined_sum FROM dummy GROUP BY g ORDER BY g;
----
query II nosort res3
select g, finalize(combine(sum(d) EXPORT_STATE, sum_state)) combined_sum from dummy join state using (g) group by g, sum_state ORDER BY g;
----
# combine aggregate state in UNION
statement ok
CREATE TABLE state2 AS SELECT g, sum(d) EXPORT_STATE sum_state FROM dummy WHERE g < 5 GROUP BY g ORDER BY g;
query II rowsort res3
select g, finalize(sum_state) * 2 combined_sum from (select g, sum(d) EXPORT_STATE sum_state from dummy where g >= 5 GROUP BY g union all SELECT * FROM state2) ORDER BY g;
----
# combine aggregate states in JOINs with NULLs
query II rowsort res3
with groups as (select distinct g from dummy)
select g, FINALIZE(COMBINE(sum_state, sum_state2)) * 2 from groups left join state2 using(g) left join (select g, sum(d) EXPORT_STATE sum_state2 from dummy where g >= 5 GROUP BY g) using (g)
----
query IIII
with groups as (select distinct g from dummy)
select g, FINALIZE(sum_state), FINALIZE(sum_state2), FINALIZE(COMBINE(sum_state, sum_state2)) from groups left join state2 using(g) left join (select g, sum(d) EXPORT_STATE sum_state2 from dummy where g >= 3 GROUP BY g) using (g) order by g
----
0 450 NULL 450
1 460 NULL 460
2 470 NULL 470
3 480 480 960
4 490 490 980
5 NULL 500 500
6 NULL 510 510
7 NULL 520 520
8 NULL 530 530
9 NULL 540 540
# empty groups
query IIIIII nosort res4
SELECT count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM dummy WHERE FALSE;
----
query IIIIII nosort res4
SELECT finalize(count(*) EXPORT_STATE), finalize(count(d) EXPORT_STATE), finalize(sum(d) EXPORT_STATE), finalize(avg(d) EXPORT_STATE)::integer, finalize(min(d) EXPORT_STATE), finalize(max(d) EXPORT_STATE) FROM dummy WHERE FALSE;
----
# only null scanned
query IIIIII nosort res5
SELECT count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM (SELECT NULL::integer d);
----
query IIIIII nosort res5
SELECT finalize(count(*) EXPORT_STATE), finalize(count(d) EXPORT_STATE), finalize(sum(d) EXPORT_STATE), finalize(avg(d) EXPORT_STATE)::integer, finalize(min(d) EXPORT_STATE), finalize(max(d) EXPORT_STATE) FROM (SELECT NULL::integer d);
----
# only null scanned, but grouped
query IIIIII nosort res6
SELECT count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM (SELECT NULL::integer d, g FROM dummy);
----
query IIIIII nosort res6
SELECT finalize(count(*) EXPORT_STATE), finalize(count(d) EXPORT_STATE), finalize(sum(d) EXPORT_STATE), finalize(avg(d) EXPORT_STATE)::integer, finalize(min(d) EXPORT_STATE), finalize(max(d) EXPORT_STATE) FROM (SELECT NULL::integer d, g FROM dummy);
----
# more aggregates
# we skip these for now as argmin/argmax now has a custom binder (so that it can work with extension types like JSON)
# otherwise we get "Binder Error: Cannot use EXPORT_STATE on aggregate functions with custom binders"
mode skip
query II nosort res7
select argmin(a,b), argmax(a,b) from (values (1,1), (2,2), (8,8), (10,10)) s(a,b);
----
query II nosort res7
select FINALIZE(argmin(a,b) EXPORT_STATE), FINALIZE(argmax(a,b) EXPORT_STATE) from (values (1,1), (2,2), (8,8), (10,10)) s(a,b);
----
mode unskip
query IIIIIII nosort res8
SELECT g, first(d), last(d), fsum(d), favg(d), product(d), bit_xor(d), bool_and(d > 5) FROM dummy GROUP BY g ORDER BY g;
----
query IIIIIII nosort res8
SELECT g, FINALIZE(first(d) EXPORT_STATE), FINALIZE(last(d) EXPORT_STATE), FINALIZE(fsum(d) EXPORT_STATE), FINALIZE(favg(d) EXPORT_STATE), FINALIZE(product(d) EXPORT_STATE), FINALIZE(bit_xor(d) EXPORT_STATE), FINALIZE(bool_and(d > 5) EXPORT_STATE) FROM dummy GROUP BY g ORDER BY g;
----
query II nosort res9
SELECT corr(d, d+1), covar_pop(d, d+1)FROM dummy;
----
query II nosort res9
SELECT FINALIZE(corr(d, d+1) EXPORT_STATE), FINALIZE(covar_pop(d, d+1) EXPORT_STATE) from dummy;
----
# you're holding it wrong:
statement error
SELECT list(d) EXPORT_STATE from dummy;
----
statement error
SELECT string_agg(d, ',') EXPORT_STATE from dummy;
----
statement error
SELECT string_agg(d) EXPORT_STATE from dummy;
----
statement error
SELECT FINALIZE(COMBINE(SUM(d) EXPORT_STATE, AVG(d) EXPORT_STATE)) FROM dummy;
----
statement error
SELECT combine(NULL, NULL);
----
statement error
SELECT combine(42, 42);
----
statement error
SELECT finalize(NULL);
----
statement error
SELECT finalize(42);
----
statement error
SELECT finalize(sum(d)) from dummy;
----
statement error
SELECT finalize(sum(d)) from dummy group by g;
----
# can't finalize twice
statement error
SELECT finalize(finalize(sum(d) EXPORT_STATE)) from dummy;
----
statement error
select sum(42) EXPORT_STATE over ();
----
# tests with ze persistence
load __TEST_DIR__/test_state_export.db
statement ok
create table dummy as select range % 10 g, range d from range(100);
# we can persist this
statement ok
CREATE TABLE state AS SELECT g, count(*) EXPORT_STATE count_star_state, count(d) EXPORT_STATE count_state, sum(d) EXPORT_STATE sum_state, avg(d) EXPORT_STATE avg_state, min(d) EXPORT_STATE min_state, max(d) EXPORT_STATE max_state FROM dummy GROUP BY g ORDER BY g;
statement ok
CREATE VIEW state_view AS SELECT g, count(*) EXPORT_STATE count_star_state, count(d) EXPORT_STATE count_state, sum(d) EXPORT_STATE sum_state, avg(d) EXPORT_STATE avg_state, min(d) EXPORT_STATE min_state, max(d) EXPORT_STATE max_state FROM dummy GROUP BY g ORDER BY g;
restart
query IIIIIII nosort res10
SELECT g, count(*), count(d), sum(d), avg(d)::integer, min(d), max(d) FROM dummy GROUP BY g ORDER BY g;
----
query IIIIIII nosort res10
SELECT g, finalize(count_star_state),finalize(count_state), finalize(sum_state), finalize(avg_state)::integer, finalize(min_state), finalize(max_state) FROM state ORDER BY g;
----
query IIIIIII nosort res10
SELECT g, finalize(count_star_state),finalize(count_state), finalize(sum_state), finalize(avg_state)::integer, finalize(min_state), finalize(max_state) FROM state_view ORDER BY g;
----
# BLOB casting back and forth
statement ok
SELECT (SUM(42) EXPORT_STATE)::BLOB;
query I
SELECT FINALIZE(COMBINE(SUM(42) EXPORT_STATE, (SUM(42) EXPORT_STATE)::BLOB));
----
84
statement error
SELECT (SUM(42) EXPORT_STATE)::INTEGER;
----
statement error
SELECT COMBINE(SUM(42) EXPORT_STATE, 42);
----
statement error
SELECT COMBINE(SUM(42) EXPORT_STATE, 'ASDF'::BLOB);
----
statement error
SELECT COMBINE((SUM(42) EXPORT_STATE)::BLOB, SUM(42) EXPORT_STATE);
----
# simulate round tripping
require parquet
statement ok
COPY (SELECT g, (SUM(d) EXPORT_STATE)::BLOB s1 FROM dummy GROUP BY g) TO '__TEST_DIR__/state.parquet' (FORMAT PARQUET);
query II
SELECT g, FINALIZE(COMBINE(s2, s1)) FROM (SELECT g, SUM(d) EXPORT_STATE s2 FROM dummy GROUP BY g) q1 JOIN '__TEST_DIR__/state.parquet' USING(g) ORDER BY g;
----
0 900
1 920
2 940
3 960
4 980
5 1000
6 1020
7 1040
8 1060
9 1080

View File

@@ -0,0 +1,152 @@
# name: test/sql/aggregate/aggregates/test_stddev.test
# description: STDDEV aggregations
# group: [aggregates]
statement ok
create table stddev_test(val integer, grp integer)
statement ok
insert into stddev_test values (42, 1), (43, 1), (42, 2), (1000, 2), (NULL, 1), (NULL, 3)
query I
SELECT stddev_samp(1)
----
NULL
query I
SELECT var_samp(1)
----
NULL
# stddev_samp
query R
select round(stddev_samp(val), 1) from stddev_test
----
478.800000
query R
select round(stddev_samp(val), 1) from stddev_test where val is not null
----
478.800000
query IRRI
select grp, sum(val), round(stddev_samp(val), 1), min(val) from stddev_test group by grp order by grp
----
1 85.000000 0.700000 42
2 1042.000000 677.400000 42
3 NULL NULL NULL
query IRRI
select grp, sum(val), round(stddev_samp(val), 1), min(val) from stddev_test where val is not null group by grp order by grp
----
1 85.000000 0.700000 42
2 1042.000000 677.400000 42
# stddev_pop
query R
select round(stddev_pop(val), 1) from stddev_test
----
414.700000
query R
select round(stddev_pop(val), 1) from stddev_test where val is not null
----
414.700000
query IRRI
select grp, sum(val), round(stddev_pop(val), 1), min(val) from stddev_test group by grp order by grp
----
1 85.000000 0.500000 42
2 1042.000000 479.000000 42
3 NULL NULL NULL
query IRRI
select grp, sum(val), round(stddev_pop(val), 1), min(val) from stddev_test where val is not null group by grp order by grp
----
1 85.000000 0.500000 42
2 1042.000000 479.000000 42
# var_samp
query R
select round(var_samp(val), 1) from stddev_test
----
229281.600000
query R
select round(variance(val), 1) from stddev_test
----
229281.600000
query R
select round(var_samp(val), 1) from stddev_test where val is not null
----
229281.600000
query IRRI
select grp, sum(val), round(var_samp(val), 1), min(val) from stddev_test group by grp order by grp
----
1 85.000000 0.500000 42
2 1042.000000 458882.000000 42
3 NULL NULL NULL
query IRRI
select grp, sum(val), round(var_samp(val), 1), min(val) from stddev_test where val is not null group by grp order by grp
----
1 85.000000 0.500000 42
2 1042.000000 458882.000000 42
# var_pop
query R
select round(var_pop(val), 1) from stddev_test
----
171961.200000
query R
select round(var_pop(val), 1) from stddev_test where val is not null
----
171961.200000
query IRRI
select grp, sum(val), round(var_pop(val), 2), min(val) from stddev_test group by grp order by grp
----
1 85.000000 0.250000 42
2 1042.000000 229441.000000 42
3 NULL NULL NULL
query IRRI
select grp, sum(val), round(var_pop(val), 2), min(val) from stddev_test where val is not null group by grp order by grp
----
1 85.000000 0.250000 42
2 1042.000000 229441.000000 42
statement ok
create table stddev_test_alias(val integer, grp integer)
statement ok
insert into stddev_test_alias values (42, 1), (43, 1), (42, 2), (1000, 2), (NULL, 1), (NULL, 3)
# stddev_samp
query R
select round(stddev(val), 1) from stddev_test_alias
----
478.800000
query I
select stddev(0) from range(10)
----
0
statement error
select stddev(a) from (values (1e301), (-1e301)) tbl(a)
----
Out of Range Error: STDDEV_SAMP is out of range
statement error
select var_samp(a) from (values (1e301), (-1e301)) tbl(a)
----
Out of Range Error: VARSAMP is out of range
statement error
select var_pop(a) from (values (1e301), (-1e301)) tbl(a)
----
Out of Range Error: VARPOP is out of range

View File

@@ -0,0 +1,233 @@
# name: test/sql/aggregate/aggregates/test_string_agg.test
# description: Test STRING_AGG operator
# group: [aggregates]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
# test incorrect usage of STRING_AGG function
statement error
SELECT STRING_AGG()
----
statement error
SELECT STRING_AGG('a', 'b', 'c')
----
statement error
SELECT STRING_AGG(STRING_AGG('a',','))
----
# test string aggregation on scalar values
query T
SELECT STRING_AGG('a',',')
----
a
# test string aggregation on scalar values
query TTTT
SELECT STRING_AGG('a',','), STRING_AGG(NULL,','), STRING_AGG('a', NULL), STRING_AGG(NULL,NULL)
----
a
NULL
NULL
NULL
# test string aggregation on a set of values
statement ok
CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR);
statement ok
INSERT INTO strings VALUES (1,'a','/'), (1,'b','-'), (2,'i','/'), (2,NULL,'-'), (2,'j','+'), (3,'p','/'), (4,'x','/'), (4,'y','-'), (4,'z','+')
# string agg separator must be a constant
statement error
SELECT STRING_AGG(x,','), STRING_AGG(x,y) FROM strings
----
query II
SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g
----
1 a|b
2 i|j
3 p
4 x|y|z
# test agg on empty set
query T
SELECT STRING_AGG(x,',') FROM strings WHERE g > 100
----
NULL
# numerics are not auto cast to strings
statement error
SELECT STRING_AGG(1, 2)
----
No function matches
# group concat is an alias for string_agg
query T
SELECT GROUP_CONCAT('a', ',')
----
a
query T
SELECT GROUP_CONCAT('a')
----
a
query TT
SELECT g, GROUP_CONCAT(x) FROM strings GROUP BY g ORDER BY g
----
1 a,b
2 i,j
3 p
4 x,y,z
#
# Test ORDER BY
#
# Impervious to threading
statement ok
PRAGMA verify_parallelism
# Single group
query TT
SELECT STRING_AGG(x ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings
----
a,b,i,j,p,x,y,z
a|b|i|j|p|x|y|z
query TT
SELECT STRING_AGG(x ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings
----
z,y,x,p,j,i,b,a
z|y|x|p|j|i|b|a
# Constant separator
query III
SELECT g, STRING_AGG(x ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY 1
----
1 a,b a|b
2 i,j i|j
3 p p
4 x,y,z x|y|z
query III
SELECT g, STRING_AGG(x ORDER BY x DESC), STRING_AGG(x, '|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY 1
----
1 b,a b|a
2 j,i j|i
3 p p
4 z,y,x z|y|x
# Variable separator
statement error
SELECT g, STRING_AGG(x, y ORDER BY x ASC) FROM strings GROUP BY g ORDER BY 1
----
statement error
SELECT g, STRING_AGG(x, y ORDER BY x DESC) FROM strings GROUP BY g ORDER BY 1
----
# A more complex ORDER BY expression
# Ordering [NULL a b i j p x y z]
query I
SELECT STRING_AGG(g::VARCHAR, ',' ORDER BY CONCAT(x, y) ASC) FROM strings ORDER BY 1
----
2,1,1,2,2,3,4,4,4
# Ordering by multiple columns
query I
SELECT STRING_AGG(g::VARCHAR, ',' ORDER BY x, y) FROM strings ORDER BY 1
----
2,1,1,2,2,3,4,4,4
# Two expressions with a different ORDER BY clause
query II
SELECT STRING_AGG(x, ',' ORDER BY x DESC), STRING_AGG(x, ',' ORDER BY x ASC) FROM strings;
----
z,y,x,p,j,i,b,a a,b,i,j,p,x,y,z
# ORDER + FILTER
query II
SELECT y, STRING_AGG(x, ',' ORDER BY x DESC) FILTER (WHERE g < 3)
FROM strings
GROUP BY y
ORDER BY 1
----
+ j
- b
/ i,a
# ORDER + FILTER + DISTINCT
statement error
SELECT g, STRING_AGG(DISTINCT y, ',' ORDER BY x DESC) FILTER (WHERE g < 4)
FROM strings
GROUP BY g
ORDER BY 1
----
Binder Error: In a DISTINCT aggregate, ORDER BY expressions must appear in the argument list
# ORDER BY on a correlated column
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers (VALUES (1), (2), (3), (NULL));
query II
SELECT i1.i, (SELECT STRING_AGG(i::VARCHAR, ',' ORDER BY i1.i+i) FROM integers WHERE i<=i1.i) c1
FROM integers i1
ORDER BY 1 NULLS LAST
----
1 1
2 1,2
3 1,2,3
NULL NULL
# DISTINCT + ORDER BY on non-volatile functional dependencies
query I
SELECT string_agg(DISTINCT CellType, '&' ORDER BY list_position(['L900','L1800','L2100','L2600'], CellType))
FROM (VALUES
('L900'),
('L2600'),
('L2100'),
('L2100'),
('L1800')
) AS t(CellType);
----
L900&L1800&L2100&L2600
# DISTINCT + ORDER BY on volatile functional dependencies
statement error
SELECT first(DISTINCT i ORDER BY random() * i)
FROM (VALUES
(900),
(2600),
(2100),
(2100),
(1800)
) AS t(i);
----
Binder Error: In a DISTINCT aggregate, ORDER BY expressions must appear in the argument list
statement error
SELECT first(DISTINCT random() * i ORDER BY i)
FROM (VALUES
(900),
(2600),
(2100),
(2100),
(1800)
) AS t(i);
----
Binder Error: In a DISTINCT aggregate, ORDER BY expressions must appear in the argument list

View File

@@ -0,0 +1,31 @@
# name: test/sql/aggregate/aggregates/test_string_agg_big.test
# description: STRING_AGG big
# group: [aggregates]
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_external
statement ok
PRAGMA verify_parallelism
statement ok
CREATE TABLE strings AS SELECT c::VARCHAR g, (c*10+e)::VARCHAR x FROM range(0, 100, 1) t1(c), range(0, 100, 1) t2(e);
query I
SELECT COUNT(*) FROM (SELECT g, STRING_AGG(x,',') FROM strings GROUP BY g) t1
----
100
query I
SELECT g, STRING_AGG(x ORDER BY x DESC) FROM strings GROUP BY g ORDER BY 1, 2
----
200 values hashing to d9069f92649cccb595d30934b2c25cbc
query I
SELECT g, STRING_AGG(x,',' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY 1, 2
----
200 values hashing to d9069f92649cccb595d30934b2c25cbc

View File

@@ -0,0 +1,44 @@
# name: test/sql/aggregate/aggregates/test_string_agg_many_groups.test_slow
# description: Test STRING_AGG operator with many groups
# group: [aggregates]
statement ok
PRAGMA enable_Verification
statement ok
PRAGMA verify_parallelism
# generate a table
statement ok
CREATE TABLE strings AS SELECT g, 'hello' x FROM range(0, 10000, 1) t1(g);
query IT
SELECT g, STRING_AGG(x, ',') FROM strings GROUP BY g ORDER BY g
----
20000 values hashing to acd848208cc35c7324ece9fcdd507823
query IT
SELECT 1, STRING_AGG(x, ',') FROM strings GROUP BY 1 ORDER BY 1
----
2 values hashing to 3bd0d16b476d2ffe18a77ebe1098b89c
statement error
SELECT STRING_AGG(k, ','), SUM(CAST(k AS BIGINT)) FROM (SELECT CAST(g AS VARCHAR) FROM strings UNION ALL SELECT CAST(x AS VARCHAR) FROM strings) tbl1(k)
----
query IT
SELECT 1, STRING_AGG(x, ',' ORDER BY g DESC) FROM strings GROUP BY 1 ORDER BY 1
----
2 values hashing to 3bd0d16b476d2ffe18a77ebe1098b89c
# Scans over the range function do not support parallelism,
# so a temporary table is needed
statement ok
CREATE TABLE many_strings AS SELECT i id, i::VARCHAR s FROM range(1000000) tbl(i);
query I
SELECT STRING_AGG(s, ',' ORDER BY id DESC)
FROM many_strings
WHERE id%100000=0;
----
900000,800000,700000,600000,500000,400000,300000,200000,100000,0

View File

@@ -0,0 +1,98 @@
# name: test/sql/aggregate/aggregates/test_sum.test
# description: Test sum aggregate
# group: [aggregates]
statement ok
CREATE TABLE integers(i INTEGER);
statement ok
INSERT INTO integers SELECT * FROM range(0, 1000, 1);
# positive numbers
query I
SELECT SUM(i) FROM integers;
----
499500
# negative numbers
statement ok
INSERT INTO integers SELECT * FROM range(0, -1000, -1);
query I
SELECT SUM(i) FROM integers;
----
0
# more negative numbers
statement ok
INSERT INTO integers SELECT * FROM range(0, -1000, -1);
query I
SELECT SUM(i) FROM integers;
----
-499500
# now perform sum of a constant
query I
SELECT SUM(1) FROM integers;
----
3000
# negative constant
query I
SELECT SUM(-1) FROM integers;
----
-3000
# negative constant with a low amount of values
query I
SELECT SUM(-1) FROM integers WHERE i=-1;
----
-2
# no values
query I
SELECT SUM(-1) FROM integers WHERE i>10000;
----
NULL
# bigint sum
statement ok
CREATE TABLE bigints(b BIGINT);
# a bunch of huge values
statement ok
INSERT INTO bigints SELECT * FROM range(4611686018427387904, 4611686018427388904, 1);
# sum them up
query I
SELECT SUM(b) FROM bigints
----
4611686018427388403500
# this is too big for a bigint
statement error
SELECT SUM(b)::BIGINT FROM bigints
----
Conversion Error: Type INT128 with value 4611686018427388403500
#
# Order by
#
statement ok
CREATE TABLE doubles(n DOUBLE);
statement ok
INSERT INTO doubles (n) VALUES ('9007199254740992'::DOUBLE), (1::DOUBLE), (1::DOUBLE), (0::DOUBLE);
# Correct result when ordered by size
query I
SELECT sum(n ORDER BY ABS(n))::BIGINT FROM doubles;
----
9007199254740994
# Error as an ordered aggregate
statement error
SELECT (sum(n) WITHIN GROUP(ORDER BY ABS(n)))::BIGINT FROM doubles;
----
Parser Error: Unknown ordered aggregate "sum"

View File

@@ -0,0 +1,72 @@
# name: test/sql/aggregate/aggregates/test_weighted_avg.test
# description: Test weighted_avg operator
# group: [aggregates]
# scalar weighted average with NULLs
query RRRR
SELECT weighted_avg(3, 3), weighted_avg(3, NULL), weighted_avg(NULL, 3), weighted_avg(NULL, NULL)
----
3
NULL
NULL
NULL
# scalar weighted with zero weight will result in nan
query RRRR
SELECT weighted_avg(3, 0), weighted_avg(3, 0.0), weighted_avg(0, 3), weighted_avg(0.0, 3)
----
nan
nan
0.0
0.0
# test alias 'wavg'
query R
SELECT wavg(3, 3)
----
3
# test weighted average on real world example
statement ok
CREATE TABLE students(name TEXT, grade INTEGER, etcs INTEGER);
statement ok
INSERT INTO students VALUES ('Alice', 8, 6), ('Alice', 6, 2), ('Bob', 6, 3), ('Bob', 8, 3), ('Bob', 6, 6);
# Alice: (8*6 + 6*2) / (6 + 2) = 60 / 8 = 7.5
# Bob: (6*3 + 8*3 + 6*6) / (3 + 3 + 6) = (18 + 24 + 36) / 12 = 78 / 12 = 6.5
query II
SELECT name, weighted_avg(grade, etcs) FROM students GROUP BY name ORDER BY name
----
Alice 7.5
Bob 6.5
# adding a entry with weight 0 should not change the result
statement ok
INSERT INTO students VALUES ('Alice', 42, 0);
query II
SELECT name, weighted_avg(grade, etcs) FROM students GROUP BY name ORDER BY name
----
Alice 7.5
Bob 6.5
# weighted_avg skips rows were the weight is NULL, so adding a row with NULL weight should not change the result
statement ok
INSERT INTO students VALUES ('Alice', 42, NULL);
query II
SELECT name, weighted_avg(grade, etcs) FROM students GROUP BY name ORDER BY name
----
Alice 7.5
Bob 6.5
# weighted_avg skips rows were the value is NULL, so adding a row with NULL value should not change the result
statement ok
INSERT INTO students VALUES ('Alice', NULL, 42);
query II
SELECT name, weighted_avg(grade, etcs) FROM students GROUP BY name ORDER BY name
----
Alice 7.5
Bob 6.5

View File

@@ -0,0 +1,83 @@
# name: test/sql/aggregate/distinct/distinct_on_nulls.test
# description: Test DISTINCT ON with NULL values
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER);
statement ok
INSERT INTO integers VALUES (2, 3), (4, 5), (2, NULL), (NULL, NULL);
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j
----
2 3
4 5
NULL NULL
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j
----
2 3
4 5
NULL NULL
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i NULLS FIRST, j NULLS FIRST
----
NULL NULL
2 NULL
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j NULLS FIRST
----
2 NULL
4 5
NULL NULL
# multi-way sort and ties
statement ok
CREATE TABLE distinct_on_test(key INTEGER, v1 VARCHAR, v2 INTEGER[], v3 INTEGER);
statement ok
INSERT INTO distinct_on_test VALUES
(1, 'hello', ARRAY[1], 42), -- ASC
(1, 'hello', ARRAY[1], 42),
(1, 'hello', ARRAY[1], 43), -- DESC
(2, NULL, NULL, 0), -- ASC
(2, NULL, NULL, 1),
(2, NULL, NULL, NULL), -- DESC
(3, 'thisisalongstring', NULL, 0), -- ASC
(3, 'thisisalongstringbutlonger', NULL, 1),
(3, 'thisisalongstringbutevenlonger', ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9], 2) -- DESC
;
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1, v2, v3
----
1 hello [1] 42
2 NULL NULL 0
3 thisisalongstring NULL 0
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test WHERE key <> 2 ORDER BY key, v1, v2, v3
----
1 hello [1] 42
3 thisisalongstring NULL 0
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
----
1 hello [1] 43
2 NULL NULL NULL
3 thisisalongstringbutlonger NULL 1
query IIII
SELECT DISTINCT ON (key) * FROM distinct_on_test WHERE key <> 2 ORDER BY key, v1 DESC NULLS FIRST, v2 DESC NULLS FIRST, v3 DESC NULLS FIRST
----
1 hello [1] 43
3 thisisalongstringbutlonger NULL 1

View File

@@ -0,0 +1,161 @@
# name: test/sql/aggregate/distinct/distinct_on_order_by.test
# description: Test DISTINCT ON ORDER BY
# group: [distinct]
statement ok
PRAGMA enable_verification
statement ok
CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);
statement ok
INSERT INTO integers VALUES (2, 3, 5), (4, 5, 6), (2, 7, 6);
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j DESC;
----
2 7
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY i, j;
----
2 3
4 5
# we don't need to ORDER BY i
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j DESC;
----
2 7
4 5
query II
SELECT DISTINCT ON (i) i, j FROM integers ORDER BY j;
----
2 3
4 5
# DISTINCT ON in correlated subqueries
query III
SELECT i, j, (SELECT DISTINCT ON(i) j) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
query III
SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, j DESC) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
query III
SELECT i, j, (SELECT DISTINCT ON(i) j ORDER BY i, k) AS k FROM integers ORDER BY i, j;
----
2 3 3
2 7 7
4 5 5
# DISTINCT ON with multiple parameters
statement ok
INSERT INTO integers VALUES (2, 3, 7), (4, 5, 11);
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k ASC
----
2 3 5
4 5 6
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j ASC, k DESC
----
2 3 7
4 5 11
# DISTINCT ON with NULL values
statement ok
INSERT INTO integers VALUES (2, NULL, 27), (4, 88, NULL);
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k DESC NULLS LAST;
----
2 NULL 27
4 5 11
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, j NULLS FIRST, k NULLS FIRST;
----
2 NULL 27
4 5 6
query III
SELECT DISTINCT ON(i) i, j, k FROM integers ORDER BY i, k NULLS FIRST, j NULLS FIRST;
----
2 3 5
4 88 NULL
# examples from the original issue
statement ok
create table foo(a real, b real);
statement ok
insert into foo values (1, 69), (1, 420), (2, 69), (2, 420);
query II rowsort
select distinct on(a) a, b from foo order by b asc;
----
1 69
2 69
query II rowsort
select distinct on(a) a, b from foo order by b desc;
----
1 420
2 420
statement ok
CREATE TABLE example (
id INT,
person_id INT,
address_id INT,
effective_date DATE
);
statement ok
INSERT INTO
example (id, person_id, address_id, effective_date)
VALUES
(1, 2, 1, '2000-01-01'), -- Moved to first house
(5, 2, 2, '2004-08-19'), -- Went to uni
(9, 2, 1, '2007-06-12'), -- Moved back home
(2, 4, 3, '2007-05-18'), -- Moved to first house
(3, 4, 4, '2016-02-09') -- Moved to new house
;
query IIII
SELECT DISTINCT ON (person_id)
*
FROM
example
ORDER BY
person_id,
effective_date ASC
;
----
1 2 1 2000-01-01
2 4 3 2007-05-18
query IIII
SELECT DISTINCT ON (person_id)
*
FROM
example
ORDER BY
person_id,
effective_date DESC
;
----
9 2 1 2007-06-12
3 4 4 2016-02-09

View File

@@ -0,0 +1,274 @@
# name: test/sql/aggregate/distinct/grouped/combined_with_grouping.test
# group: [grouped]
statement ok
SET default_null_order='nulls_first';
statement ok
PRAGMA enable_verification
statement ok
PRAGMA verify_parallelism
statement ok
create table students (
course VARCHAR,
type VARCHAR,
value BIGINT
);
statement ok
insert into students
(course, type, value)
values
('CS', 'Bachelor', 34),
('CS', 'Bachelor', 34),
('CS', 'PhD', 12),
('Math', 'Masters', 12),
('CS', NULL, 10),
('CS', NULL, 12),
('Math', NULL, 12),
('Math', NULL, NULL);
query IIII
SELECT GROUPING(course), course, sum(distinct value), COUNT(*) FROM students GROUP BY course ORDER BY all;
----
0 CS 56 5
0 Math 12 3
query IIII
SELECT sum(distinct value), GROUPING_ID(course), course, COUNT(*) FROM students GROUP BY course ORDER BY all;
----
12 0 Math 3
56 0 CS 5
query IIIIIII
SELECT GROUPING(course), GROUPING(type), course, type, sum(distinct value), COUNT(*), sum(distinct value), FROM students GROUP BY course, type ORDER BY all;
----
0 0 CS NULL 22 2 22
0 0 CS Bachelor 34 2 34
0 0 CS PhD 12 1 12
0 0 Math NULL 12 2 12
0 0 Math Masters 12 1 12
query IIIIIII
SELECT GROUPING(course), GROUPING(type), avg(distinct value), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
0 0 11.0 CS NULL 2 22
0 0 12.0 CS PhD 1 12
0 0 12.0 Math NULL 2 12
0 0 12.0 Math Masters 1 12
0 0 34.0 CS Bachelor 2 34
0 1 12.0 Math NULL 3 12
0 1 18.666666666666668 CS NULL 5 56
1 0 11.0 NULL NULL 4 22
1 0 12.0 NULL Masters 1 12
1 0 12.0 NULL PhD 1 12
1 0 34.0 NULL Bachelor 2 34
1 1 18.666666666666668 NULL NULL 8 56
query IIIIII
SELECT sum(distinct value), GROUPING(course, type), course, type, COUNT(*), sum(distinct value), FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
12 0 CS PhD 1 12
12 0 Math NULL 2 12
12 0 Math Masters 1 12
12 1 Math NULL 3 12
12 2 NULL Masters 1 12
12 2 NULL PhD 1 12
22 0 CS NULL 2 22
22 2 NULL NULL 4 22
34 0 CS Bachelor 2 34
34 2 NULL Bachelor 2 34
56 1 CS NULL 5 56
56 3 NULL NULL 8 56
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), sum(distinct value), GROUPING(course)+GROUPING(type), course, type, count(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, type) ORDER BY all;
----
0 0 12 0 CS PhD 1 1
0 0 12 0 Math NULL 1 2
0 0 12 0 Math Masters 1 1
0 0 22 0 CS NULL 2 2
0 0 34 0 CS Bachelor 1 2
0 1 12 1 Math NULL 1 3
0 1 56 1 CS NULL 3 5
1 0 12 1 NULL Masters 1 1
1 0 12 1 NULL PhD 1 1
1 0 22 1 NULL NULL 2 4
1 0 34 1 NULL Bachelor 1 2
1 1 56 2 NULL NULL 3 8
# many repeated groupings
query IIIIIII
SELECT GROUPING(course, type, course, course, type, value, type, course), avg(distinct value), avg(value), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
----
0 NULL NULL NULL Math NULL 1
0 10.0 10.0 10.0 CS NULL 1
0 12.0 12.0 12.0 CS NULL 1
0 12.0 12.0 12.0 CS PhD 1
0 12.0 12.0 12.0 Math NULL 1
0 12.0 12.0 12.0 Math Masters 1
0 34.0 34.0 34.0 CS Bachelor 2
4 11.0 11.0 11.0 CS NULL 2
4 12.0 12.0 12.0 CS PhD 1
4 12.0 12.0 12.0 Math NULL 2
4 12.0 12.0 12.0 Math Masters 1
4 34.0 34.0 34.0 CS Bachelor 2
74 NULL NULL NULL Math NULL 1
74 10.0 10.0 10.0 CS NULL 1
74 12.0 12.0 12.0 CS NULL 2
74 12.0 12.0 12.0 Math NULL 2
74 34.0 34.0 34.0 CS NULL 2
78 12.0 12.0 12.0 Math NULL 3
78 18.666666666666668 20.4 18.666666666666668 CS NULL 5
177 NULL NULL NULL NULL NULL 1
177 10.0 10.0 10.0 NULL NULL 1
177 12.0 12.0 12.0 NULL NULL 2
177 12.0 12.0 12.0 NULL Masters 1
177 12.0 12.0 12.0 NULL PhD 1
177 34.0 34.0 34.0 NULL Bachelor 2
181 11.0 11.333333333333334 11.0 NULL NULL 4
181 12.0 12.0 12.0 NULL Masters 1
181 12.0 12.0 12.0 NULL PhD 1
181 34.0 34.0 34.0 NULL Bachelor 2
251 NULL NULL NULL NULL NULL 1
251 10.0 10.0 10.0 NULL NULL 1
251 12.0 12.0 12.0 NULL NULL 4
251 34.0 34.0 34.0 NULL NULL 2
255 18.666666666666668 18.0 18.666666666666668 NULL NULL 8
# GROUPING with different table qualifications
query IIIIIIII
SELECT GROUPING(students.course), GROUPING(students.type), sum(distinct value), GROUPING(course)+GROUPING(type), course, avg(distinct value), type, COUNT(*) FROM students GROUP BY CUBE(course, type, value) ORDER BY all;
----
0 0 NULL 0 Math NULL NULL 1
0 0 10 0 CS 10.0 NULL 1
0 0 12 0 CS 12.0 NULL 1
0 0 12 0 CS 12.0 PhD 1
0 0 12 0 CS 12.0 PhD 1
0 0 12 0 Math 12.0 NULL 1
0 0 12 0 Math 12.0 NULL 2
0 0 12 0 Math 12.0 Masters 1
0 0 12 0 Math 12.0 Masters 1
0 0 22 0 CS 11.0 NULL 2
0 0 34 0 CS 34.0 Bachelor 2
0 0 34 0 CS 34.0 Bachelor 2
0 1 NULL 1 Math NULL NULL 1
0 1 10 1 CS 10.0 NULL 1
0 1 12 1 CS 12.0 NULL 2
0 1 12 1 Math 12.0 NULL 2
0 1 12 1 Math 12.0 NULL 3
0 1 34 1 CS 34.0 NULL 2
0 1 56 1 CS 18.666666666666668 NULL 5
1 0 NULL 1 NULL NULL NULL 1
1 0 10 1 NULL 10.0 NULL 1
1 0 12 1 NULL 12.0 NULL 2
1 0 12 1 NULL 12.0 Masters 1
1 0 12 1 NULL 12.0 Masters 1
1 0 12 1 NULL 12.0 PhD 1
1 0 12 1 NULL 12.0 PhD 1
1 0 22 1 NULL 11.0 NULL 4
1 0 34 1 NULL 34.0 Bachelor 2
1 0 34 1 NULL 34.0 Bachelor 2
1 1 NULL 2 NULL NULL NULL 1
1 1 10 2 NULL 10.0 NULL 1
1 1 12 2 NULL 12.0 NULL 4
1 1 34 2 NULL 34.0 NULL 2
1 1 56 2 NULL 18.666666666666668 NULL 8
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), avg(value), GROUPING(course)+GROUPING(type), avg(distinct value), course, type, COUNT(*) FROM students GROUP BY CUBE(students.course, students.type) ORDER BY all;
----
0 0 11.0 0 11.0 CS NULL 2
0 0 12.0 0 12.0 CS PhD 1
0 0 12.0 0 12.0 Math NULL 2
0 0 12.0 0 12.0 Math Masters 1
0 0 34.0 0 34.0 CS Bachelor 2
0 1 12.0 1 12.0 Math NULL 3
0 1 20.4 1 18.666666666666668 CS NULL 5
1 0 11.333333333333334 1 11.0 NULL NULL 4
1 0 12.0 1 12.0 NULL Masters 1
1 0 12.0 1 12.0 NULL PhD 1
1 0 34.0 1 34.0 NULL Bachelor 2
1 1 18.0 2 18.666666666666668 NULL NULL 8
# GROUPING in HAVING clause
query IIIII
SELECT GROUPING(course), GROUPING(value), course, sum(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value) HAVING GROUPING(course)=0 ORDER BY all;
----
0 0 CS 10 1
0 0 CS 12 2
0 0 CS 34 2
0 0 Math NULL 1
0 0 Math 12 2
0 1 CS 56 5
0 1 Math 12 3
query IIIIIIII
SELECT GROUPING(course), GROUPING(type), sum(distinct value), course, type, sum(distinct value), avg(distinct value), COUNT(*) FROM students GROUP BY CUBE(course, value, type, value) HAVING GROUPING(students.course)=0 ORDER BY all;
----
0 0 NULL Math NULL NULL NULL 1
0 0 NULL Math NULL NULL NULL 1
0 0 NULL Math NULL NULL NULL 1
0 0 10 CS NULL 10 10.0 1
0 0 10 CS NULL 10 10.0 1
0 0 10 CS NULL 10 10.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS NULL 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 CS PhD 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 1
0 0 12 Math NULL 12 12.0 2
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 12 Math Masters 12 12.0 1
0 0 22 CS NULL 22 11.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 0 34 CS Bachelor 34 34.0 2
0 1 NULL Math NULL NULL NULL 1
0 1 NULL Math NULL NULL NULL 1
0 1 NULL Math NULL NULL NULL 1
0 1 10 CS NULL 10 10.0 1
0 1 10 CS NULL 10 10.0 1
0 1 10 CS NULL 10 10.0 1
0 1 12 CS NULL 12 12.0 2
0 1 12 CS NULL 12 12.0 2
0 1 12 CS NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 2
0 1 12 Math NULL 12 12.0 3
0 1 34 CS NULL 34 34.0 2
0 1 34 CS NULL 34 34.0 2
0 1 34 CS NULL 34 34.0 2
0 1 56 CS NULL 56 18.666666666666668 5
# GROUPING in ORDER BY clause
query IIIII
SELECT type, COUNT(*), avg(value), sum(distinct value), avg(distinct value), FROM students GROUP BY CUBE(value, type) ORDER BY GROUPING(value), GROUPING(type), 1, 2, 3, 4, 5;
----
NULL 1 NULL NULL NULL
NULL 1 10.0 10 10.0
NULL 2 12.0 12 12.0
Bachelor 2 34.0 34 34.0
Masters 1 12.0 12 12.0
PhD 1 12.0 12 12.0
NULL 1 NULL NULL NULL
NULL 1 10.0 10 10.0
NULL 2 34.0 34 34.0
NULL 4 12.0 12 12.0
NULL 4 11.333333333333334 22 11.0
Bachelor 2 34.0 34 34.0
Masters 1 12.0 12 12.0
PhD 1 12.0 12 12.0
NULL 8 18.0 56 18.666666666666668

View File

@@ -0,0 +1,122 @@
# name: test/sql/aggregate/distinct/grouped/coverage.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#Recursive CTE
query I
with recursive t as (select 1 as x union select sum(distinct x+1) from t where x < 3 group by x) select * from t order by x;
----
1
2
3
# Prepared statement
statement ok
CREATE TABLE tbl AS SELECT i, i%5 as j FROM range(1000000) tbl(i);
statement ok
PREPARE v1 AS SELECT SUM(DISTINCT i%5+?::INT) FROM tbl group by j order by all;
query I
EXECUTE v1(1);
----
1
2
3
4
5
query I
EXECUTE v1(2);
----
2
3
4
5
6
query I
EXECUTE v1(3);
----
3
4
5
6
7
# DISTINCT aggregate parameter as expression
query I
SELECT COUNT(distinct i % 5) from tbl group by j;
----
1
1
1
1
1
# Correlated subquery
query I
SELECT COUNT(distinct (SELECT i%5)) from tbl group by j;
----
1
1
1
1
1
## Aggregate with multiple parameters
query I
SELECT ARG_MIN(distinct i%5, i) from tbl group by j order by all;
----
0
1
2
3
4
# Distinct lists
statement ok
CREATE TABLE lists_tbl AS SELECT i%20 as groups, [x + i for x in range(280)] AS l FROM range(200000) tmp(i);
query IIII
SELECT COUNT(l), avg(groups), COUNT(DISTINCT l), groups FROM lists_tbl group by groups order by groups limit 10;
----
10000 0.0 10000 0
10000 1.0 10000 1
10000 2.0 10000 2
10000 3.0 10000 3
10000 4.0 10000 4
10000 5.0 10000 5
10000 6.0 10000 6
10000 7.0 10000 7
10000 8.0 10000 8
10000 9.0 10000 9
# Non-inlined (>12 length) strings
statement ok
create table strings_tbl as select gen_random_uuid() as strings, i as groups from range(200000) tbl(i);
query II
select count(strings), count(distinct strings) from strings_tbl group by groups order by groups limit 10;
----
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1

View File

@@ -0,0 +1,255 @@
# name: test/sql/aggregate/distinct/grouped/distinct_and_non_distinct_mixed.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# Since these tests are made to test the grouped operator, and not necessarily the functions themselves
# This test will mostly focus on the order and mixing of distinct and non-distinct aggregates
# And not on variation between types and functions
#distinct aggregate = 'D'
#regular aggregate = '-'
statement ok
PRAGMA enable_verification
statement ok
create table tbl as
(select i%50 as i, i%100 as j, i%5 as groups from range(50000) tbl(i))
;
# D
query I
select
count(distinct i)
from tbl group by groups;;
----
10
10
10
10
10
# D--
query III rowsort
select
sum(distinct i),
sum(i),
sum(j)
from tbl group by groups;;
----
225 225000 475000
235 235000 485000
245 245000 495000
255 255000 505000
265 265000 515000
# --D
query III rowsort
select
sum(i),
sum(j),
sum(distinct i)
from tbl group by groups;;
----
225000 475000 225
235000 485000 235
245000 495000 245
255000 505000 255
265000 515000 265
# -D-
query III rowsort
select
sum(i),
sum(distinct i),
sum(j)
from tbl group by groups;;
----
225000 225 475000
235000 235 485000
245000 245 495000
255000 255 505000
265000 265 515000
# D-D
query III rowsort
select
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
225 10000 950
235 10000 970
245 10000 990
255 10000 1010
265 10000 1030
#-D-D
query IIII rowsort
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
475000 225 10000 950
485000 235 10000 970
495000 245 10000 990
505000 255 10000 1010
515000 265 10000 1030
#-D-D
query IIII rowsort
select
sum(j),
sum(distinct i),
count(j),
sum(distinct j)
from tbl group by groups;;
----
475000 225 10000 950
485000 235 10000 970
495000 245 10000 990
505000 255 10000 1010
515000 265 10000 1030
#D-D-
query IIII rowsort
select
sum(distinct i),
count(j),
sum(distinct j),
sum(j)
from tbl group by groups;;
----
225 10000 950 475000
235 10000 970 485000
245 10000 990 495000
255 10000 1010 505000
265 10000 1030 515000
# These next tests will repeat the previous test, with the addition of filters
# filtered = 'F'
# not filtered = '-'
# D
# F
query I
select
count(distinct i) FILTER (WHERE i >= 20)
from tbl group by groups;;
----
6
6
6
6
6
# D--
# -FF
query III rowsort
select
sum(distinct i),
sum(i) FILTER (WHERE j < 20),
sum(j) FILTER (WHERE i >= 20)
from tbl group by groups;;
----
225 15000 345000
235 17000 351000
245 19000 357000
255 21000 363000
265 23000 369000
# --D
# -FF
query III rowsort
select
sum(i),
sum(j) FILTER (WHERE j == 0),
sum(distinct i) FILTER (WHERE i == 0)
from tbl group by groups;;
----
225000 0 0
235000 NULL NULL
245000 NULL NULL
255000 NULL NULL
265000 NULL NULL
# -D-
# F-F
query III rowsort
select
sum(i) FILTER (WHERE j == 5),
sum(distinct i),
sum(j) FILTER (WHERE i == 5)
from tbl group by groups;;
----
2500 225 30000
NULL 235 NULL
NULL 245 NULL
NULL 255 NULL
NULL 265 NULL
# D-D
# F-F
query III rowsort
select
sum(distinct i) FILTER (WHERE i == 5),
count(j),
sum(distinct j) FILTER (WHERE i == 5)
from tbl group by groups;;
----
5 10000 60
NULL 10000 NULL
NULL 10000 NULL
NULL 10000 NULL
NULL 10000 NULL
#-D-D
#FF--
query IIII rowsort
select
sum(j) FILTER (WHERE j == 5),
sum(distinct i) FILTER (WHERE j == 5),
count(j),
sum(distinct j)
from tbl group by groups;;
----
2500 5 10000 950
NULL NULL 10000 1010
NULL NULL 10000 1030
NULL NULL 10000 970
NULL NULL 10000 990
#-D-D
#F--F
query IIII rowsort
select
sum(j) FILTER (WHERE i == 5),
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5)
from tbl group by groups;;
----
30000 225 10000 5
NULL 235 10000 NULL
NULL 245 10000 NULL
NULL 255 10000 NULL
NULL 265 10000 NULL
#D-D-
query IIII rowsort
select
sum(distinct i),
count(j),
sum(distinct j) FILTER (WHERE j == 5),
sum(j) FILTER (WHERE j == 5)
from tbl group by groups;;
----
225 10000 5 2500
235 10000 NULL NULL
245 10000 NULL NULL
255 10000 NULL NULL
265 10000 NULL NULL

View File

@@ -0,0 +1,64 @@
# name: test/sql/aggregate/distinct/grouped/distinct_grouping_tpch.test_slow
# group: [grouped]
require tpch
require ram 8gb
require disk_space 40gb
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
statement ok
PRAGMA verify_external
statement ok
CALL dbgen(sf=1);
query IIIII
select
grouping(l_returnflag, l_linestatus),
l_returnflag,
l_linestatus,
count(distinct l_orderkey),
count(distinct l_comment)
from lineitem
group by cube(l_returnflag, l_linestatus)
order by all;
----
0 A F 644207 1181362
0 N F 30908 37987
0 N O 770587 2146525
0 R F 645527 1181807
1 A NULL 644207 1181362
1 N NULL 780997 2168690
1 R NULL 645527 1181807
2 NULL F 767956 2142221
2 NULL O 770587 2146525
3 NULL NULL 1500000 3610733
query IIIIIIIII
SELECT
COUNT(DISTINCT l_orderkey),
COUNT(DISTINCT l_partkey),
COUNT(*),
MIN(l_orderkey),
MAX(l_orderkey),
MIN(l_partkey),
MAX(l_partkey),
SUM(distinct_comment),
AVG(distinct_comment)
FROM (
select
l_orderkey,
l_partkey,
count(distinct l_comment) AS distinct_comment
from lineitem
group by cube(l_orderkey, l_partkey)
);
----
1500000 200000 7701170 1 6000000 1 200000 21614257 2.8066199032095125

View File

@@ -0,0 +1,15 @@
# name: test/sql/aggregate/distinct/grouped/identical_inputs.test
# description: DISTINCT aggregations
# group: [grouped]
statement ok
create table tbl as select i%50::BIGINT as i, i%5::BIGINT as j from range(1000000) tbl(i);
query IIIII
select count(distinct i), min(distinct i), max(distinct i), sum(distinct i), product(distinct i) from tbl group by j order by all;
----
10 0 45 225 0.0
10 1 46 235 1213563326976.0
10 2 47 245 3965002804224.0
10 3 48 255 9360955828224.0
10 4 49 265 19053977918976.0

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/distinct/grouped/issue_5070.test
# group: [grouped]
statement ok
pragma enable_verification
statement ok
pragma verify_parallelism
query II
WITH evs AS (
SELECT * FROM (VALUES
('1','123','7'),
('1','456','7')
) AS t("id", "type", "value" )
)
SELECT "id"
, COUNT(DISTINCT "value") FILTER (WHERE "type" = '456') AS type_456_count
FROM evs
GROUP BY "id"
----
1 1

View File

@@ -0,0 +1,23 @@
# name: test/sql/aggregate/distinct/grouped/long_input.test_slow
# description: DISTINCT aggregations
# group: [grouped]
# This is string aggr, which goes through the HashAggregateOperator, so this is in fact 'grouped'
statement ok
create or replace table tbl as select * FROM ( VALUES
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('a', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
([repeat('b', 1000000)]),
) tbl(i)
query I
select (min(distinct i)::TEXT)[2:2] from tbl group by i order by all;
----
a
b

Some files were not shown because too many files have changed in this diff Show More