# name: test/sql/aggregate/aggregates/binning.test # description: Test binning functions # group: [aggregates] require 64bit statement ok PRAGMA enable_verification query I SELECT equi_width_bins(0, 10, 2, true) ---- [5, 10] # small bins with big base query I SELECT equi_width_bins(1000000, 1000010, 2, true) ---- [1000005, 1000010] # bounds cannot be nice because of step size query I SELECT equi_width_bins(99, 101, 2, true) ---- [100, 101] query I SELECT equi_width_bins(9, 11, 2, true) ---- [10, 11] query I SELECT equi_width_bins(10, 11, 2, true) ---- [10, 11] # we cannot have duplicate bin boundaries query I SELECT equi_width_bins(0, 5, 10, true) ---- [0, 1, 2, 3, 4, 5] query I SELECT equi_width_bins(0, 10, 5, true) ---- [2, 4, 6, 8, 10] query I SELECT equi_width_bins(-10, 0, 5, true) ---- [-8, -6, -4, -2, 0] query I SELECT equi_width_bins(-10, 10, 5, true) ---- [-5, 0, 5, 10] query I SELECT equi_width_bins(0, 9, 5, true) ---- [2, 4, 6, 8, 10] query I SELECT equi_width_bins(0, 1734, 10, true) ---- [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800] query I SELECT equi_width_bins(0, 1724, 10, true) ---- [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800] # not nice query I SELECT equi_width_bins(0, 1734, 10, false) ---- [173, 346, 520, 693, 867, 1040, 1213, 1387, 1560, 1734] query I SELECT equi_width_bins(0, 39343341, 10, true) ---- [5000000, 10000000, 15000000, 20000000, 25000000, 30000000, 35000000, 40000000] query I SELECT equi_width_bins(1, 6000000, 7, true) ---- [1000000, 2000000, 3000000, 4000000, 5000000, 6000000] query I SELECT equi_width_bins(1, 6000000, 7, false) ---- [857143, 1714286, 2571429, 3428571, 4285714, 5142857, 6000000] # big numbers query I SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 5, true) ---- [-5000000000000000000, 0, 5000000000000000000, 9223372036854775807] query I SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 10, true) ---- [-8000000000000000000, -6000000000000000000, -4000000000000000000, -2000000000000000000, 0, 2000000000000000000, 4000000000000000000, 6000000000000000000, 8000000000000000000, 9223372036854775807] query I SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 20, true) ---- [-9000000000000000000, -8000000000000000000, -7000000000000000000, -6000000000000000000, -5000000000000000000, -4000000000000000000, -3000000000000000000, -2000000000000000000, -1000000000000000000, 0, 1000000000000000000, 2000000000000000000, 3000000000000000000, 4000000000000000000, 5000000000000000000, 6000000000000000000, 7000000000000000000, 8000000000000000000, 9000000000000000000, 9223372036854775807] query I SELECT equi_width_bins(-9223372036854775808, 9223372036854775807, 30, true) ---- [-9000000000000000000, -8500000000000000000, -8000000000000000000, -7500000000000000000, -7000000000000000000, -6500000000000000000, -6000000000000000000, -5500000000000000000, -5000000000000000000, -4500000000000000000, -4000000000000000000, -3500000000000000000, -3000000000000000000, -2500000000000000000, -2000000000000000000, -1500000000000000000, -1000000000000000000, -500000000000000000, 0, 500000000000000000, 1000000000000000000, 1500000000000000000, 2000000000000000000, 2500000000000000000, 3000000000000000000, 3500000000000000000, 4000000000000000000, 4500000000000000000, 5000000000000000000, 5500000000000000000, 6000000000000000000, 6500000000000000000, 7000000000000000000, 7500000000000000000, 8000000000000000000, 8500000000000000000, 9000000000000000000, 9223372036854775807] # floating point numbers query I SELECT equi_width_bins(0.0, 9.0, 5, true); ---- [2.0, 4.0, 6.0, 8.0, 10.0] query I SELECT equi_width_bins(0.0, 9.0, 7, true); ---- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] query I SELECT unnest(equi_width_bins(0.0, 9.0, 7, false)); ---- 1.2857142857142863 2.571428571428572 3.8571428571428577 5.142857142857143 6.428571428571429 7.714285714285714 9.0 query I SELECT equi_width_bins(0.0, 90.0, 5, true); ---- [20.0, 40.0, 60.0, 80.0, 100.0] query I SELECT equi_width_bins(0.0, 1.0, 5, true); ---- [0.2, 0.4, 0.6, 0.8, 1.0] query I SELECT equi_width_bins(0.0, 1.0, 5, true); ---- [0.2, 0.4, 0.6, 0.8, 1.0] query I SELECT equi_width_bins(-1.0, 0.0, 5, true); ---- [-0.8, -0.6, -0.4, -0.2, 0.0] query I SELECT equi_width_bins(-1.0, 1.0, 5, true); ---- [-0.5, 0.0, 0.5, 1.0] # test giant numbers query I SELECT unnest(equi_width_bins(-1e308, 1e308, 5, true)); ---- -5e+307 -0.0 5e+307 1e+308 1.5e+308 # more exhaustive nice bin tests query I select equi_width_bins(0.0, 6.347, 3, true) AS boundaries; ---- [2.0, 4.0, 6.0, 8.0] query I select equi_width_bins(0.0, 6.347, 7, true) AS boundaries; ---- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0] query I select equi_width_bins(0.0, 6.347, 10, true) AS boundaries; ---- [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5] query I select equi_width_bins(0.0, 6.347, 20, true) AS boundaries; ---- [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5] query I select equi_width_bins(0.0, 6.347, 30, true) AS boundaries; ---- [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0, 4.2, 4.4, 4.6, 4.8, 5.0, 5.2, 5.4, 5.6, 5.8, 6.0, 6.2, 6.4] query I select equi_width_bins(0.0, 3.974, 5, true) AS boundaries; ---- [1.0, 2.0, 3.0, 4.0] query I select equi_width_bins(0.0, 3.974, 7, true) AS boundaries; ---- [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0] query I select equi_width_bins(0.0, 3.974, 10, true) AS boundaries; ---- [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0] query I select equi_width_bins(0.0, 3.974, 20, true) AS boundaries; ---- [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.2, 3.4, 3.6, 3.8, 4.0] query I select equi_width_bins(0.0, 3.974, 40, true) AS boundaries; ---- [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0] # last bin should always be bigger than the input max query I select equi_width_bins(0, 101, 5, true); ---- [20, 40, 60, 80, 100, 120] query I select equi_width_bins(0, 101.5, 5, true); ---- [20.0, 40.0, 60.0, 80.0, 100.0, 120.0] # dates/timestamps query I SELECT equi_width_bins(date '1992-01-01', date '2000-01-01', 2, true) ---- [1996-01-01, 2000-01-01] query I SELECT equi_width_bins(timestamp '1992-01-01', timestamp '2000-01-01', 2, true) ---- ['1996-01-01 00:00:00', '2000-01-01 00:00:00'] query I SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '2000-01-01 04:03:21', 2, true) ---- ['1996-02-01 00:00:00', '2000-02-01 00:00:00'] query I SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '2000-01-01 04:03:21', 5, true) ---- ['1993-10-01 00:00:00', '1995-05-01 00:00:00', '1996-12-01 00:00:00', '1998-07-01 00:00:00', '2000-02-01 00:00:00'] # bins within a year query I SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '1992-12-01 04:03:21', 4, true) ---- ['1992-03-27 00:00:00', '1992-06-18 00:00:00', '1992-09-10 00:00:00', '1992-12-02 00:00:00'] # bins within a month query I SELECT equi_width_bins(timestamp '1992-01-01 12:23:37', timestamp '1992-01-31 04:03:21', 4, true) ---- ['1992-01-11 00:00:00', '1992-01-18 00:00:00', '1992-01-25 00:00:00', '1992-02-01 00:00:00'] # bins within a day query I SELECT equi_width_bins(timestamp '1992-01-01 01:23:37.999', timestamp '1992-01-01 23:03:21.3', 4, true) ---- ['1992-01-01 07:30:00', '1992-01-01 13:00:00', '1992-01-01 18:30:00', '1992-01-02 00:00:00'] # bins within an hour query I SELECT equi_width_bins(timestamp '1992-01-01 01:23:37.999', timestamp '1992-01-01 01:53:21.3', 4, true) ---- ['1992-01-01 01:31:30', '1992-01-01 01:39:00', '1992-01-01 01:46:30', '1992-01-01 01:54:00'] # bins within a minute query I SELECT equi_width_bins(timestamp '1992-01-01 01:23:01.999', timestamp '1992-01-01 01:23:49.377', 4, true) ---- ['1992-01-01 01:23:14', '1992-01-01 01:23:26', '1992-01-01 01:23:38', '1992-01-01 01:23:50'] # bins within a second query I SELECT equi_width_bins(timestamp '1992-01-01 01:23:01.2', timestamp '1992-01-01 01:23:01.943', 4, true) ---- ['1992-01-01 01:23:01.38575', '1992-01-01 01:23:01.5715', '1992-01-01 01:23:01.75725', '1992-01-01 01:23:01.943'] # difference is more than one day, but step size is less than one day query I select equi_width_bins(timestamp '2024-06-21 15:00:00', timestamp '2024-06-22 9:00:00', 4, true); ---- ['2024-06-21 19:30:00', '2024-06-22 00:00:00', '2024-06-22 04:30:00', '2024-06-22 09:00:00'] # difference is more than one month, but step size is less than one month query I select equi_width_bins(timestamp '2024-06-21 15:00:00', timestamp '2024-07-21 9:00:00', 4, true); ---- ['2024-07-01 00:00:00', '2024-07-08 00:00:00', '2024-07-15 00:00:00', '2024-07-22 00:00:00'] # what if we create more partitions than there are microseconds query I select equi_width_bins(timestamp '2024-06-21 15:00:00.123456', timestamp '2024-06-21 15:00:00.123458', 10, true); ---- ['2024-06-21 15:00:00.123456', '2024-06-21 15:00:00.123457', '2024-06-21 15:00:00.123458'] query I SELECT EQUI_WIDTH_BINS(0, 10, 5999, TRUE) ---- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] query I SELECT EQUI_WIDTH_BINS(0, 10, 5999, false) ---- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] statement error SELECT equi_width_bins(-0.0, -1.0, 5, true); ---- max value is smaller than min value statement error SELECT equi_width_bins(0.0, 'inf'::double, 5, true); ---- does not support infinite or nan as min/max value statement error SELECT equi_width_bins(0.0, 'nan'::double, 5, true); ---- does not support infinite or nan as min/max value statement error SELECT equi_width_bins(0.0, 1.0, -1, true); ---- there must be > 0 bins statement error SELECT equi_width_bins(0.0, 1.0, 99999999, true); ---- max bin count statement error SELECT equi_width_bins('a'::VARCHAR, 'z'::VARCHAR, 2, true) ---- Unsupported type "VARCHAR"