should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,31 @@
# name: benchmark/micro/window/mode_fixed_orderby.benchmark
# description: Measure performance of moving MODE with variable frames
# group: [window]
name Windowed MODE, Fixed 200
group micro
subgroup window
require tpch
load
CALL dbgen(sf=0.01);
run
SELECT
l_orderkey,
l_shipdate,
l_linenumber,
mode(l_linenumber ORDER BY l_linenumber DESC) over w AS l_mode,
FROM lineitem
WINDOW w AS (
order by l_orderkey, l_shipdate
rows between 175 preceding and 175 following
)
QUALIFY l_mode > 1
ORDER BY ALL
;
result IIII
8610 1994-07-06 5 2
36738 1996-10-05 6 2

View File

@@ -0,0 +1,68 @@
# name: benchmark/micro/window/mode_variable_400.benchmark
# description: Measure performance of moving MODE with variable frames
# group: [window]
name Windowed MODE, Variable 400
group micro
subgroup window
require tpch
load
CALL dbgen(sf=1);
run
SELECT l_orderkey, l_shipdate, l_linenumber, mode(l_linenumber) over w AS l_mode
FROM lineitem
WINDOW w AS (
order by l_orderkey, l_shipdate
rows between mod(rowid * 47, 521) preceding and 400 - mod(rowid * 47, 521) following
)
QUALIFY l_mode > 1
ORDER BY ALL
;
result IIII
39 1996-12-08 6 3
707618 1993-12-31 2 2
707649 1995-11-03 5 2
707680 1998-02-28 3 2
1701765 1996-09-01 3 2
1701767 1994-06-08 2 2
1701889 1992-10-26 3 2
1701923 1993-09-29 4 2
1702080 1993-06-25 5 2
1702240 1995-03-21 1 2
1805063 1992-04-30 6 2
2596640 1995-09-14 3 2
2596647 1992-05-23 3 2
2596736 1998-04-17 4 2
4578916 1997-06-15 4 2
4578982 1996-09-02 4 2
5646241 1997-02-12 3 2
5646278 1993-07-01 4 2
5646337 1996-11-03 2 2
5646373 1996-02-14 6 2
5646403 1998-03-29 4 2
5646404 1992-07-23 5 2
5646435 1994-08-16 5 2
5646531 1992-12-19 5 2
5646531 1993-04-08 1 2
5646533 1994-12-02 5 2
5646535 1998-05-07 3 2
5646567 1992-10-08 3 2
5646594 1996-07-20 1 2
5646597 1996-12-06 1 2
5646656 1996-06-15 1 2
5646756 1995-02-12 7 2
5783364 1992-06-03 6 2
5783457 1993-02-03 1 2
5783489 1998-07-18 2 2
5783524 1996-02-06 2 2
5783555 1995-03-11 1 2
5783588 1995-06-15 2 2
5894308 1995-12-31 1 2
5894439 1994-09-15 2 2
5894499 1996-05-19 1 2
5894532 1992-04-23 4 2
5894722 1997-03-18 3 2

View File

@@ -0,0 +1,38 @@
# name: benchmark/micro/window/mode_variable_orderby.benchmark
# description: Measure performance of moving MODE with variable frames
# group: [window]
name Windowed MODE, Variable 400
group micro
subgroup window
require tpch
load
CALL dbgen(sf=0.1);
run
SELECT
l_orderkey,
l_shipdate,
l_linenumber,
mode(l_linenumber ORDER BY l_linenumber DESC) over w AS l_mode,
FROM lineitem
WINDOW w AS (
order by l_orderkey, l_shipdate
rows between mod(rowid * 47, 521) preceding and 400 - mod(rowid * 47, 521) following
)
QUALIFY l_mode > 1
ORDER BY ALL
;
result IIII
39 1996-12-08 6 3
599938 1995-07-03 6 2
599942 1995-08-08 1 2
599968 1997-03-18 1 2
599971 1996-01-07 2 2
599973 1996-09-27 6 2
599974 1995-09-05 1 2
599975 1997-05-08 4 2
600000 1998-05-10 1 2

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/window/streaming_lag.benchmark
# description: Verify performance of streaming LAG
# group: [window]
load
SELECT SETSEED(0.8675309);
CREATE OR REPLACE TABLE df AS
SELECT
RANDOM() AS a,
RANDOM() AS b,
RANDOM() AS c,
FROM range(10_000_000);
run
SELECT sum(a_1 + a_2 + b_1 + b_2)
FROM (
SELECT
LAG(a, 1) OVER () AS a_1,
LAG(a, 2) OVER () AS a_2,
LAG(b, 1) OVER () AS b_1,
LAG(b, 2) OVER () AS b_2
FROM df
) t
;
result I
20000902.549240764

View File

@@ -0,0 +1,27 @@
# name: benchmark/micro/window/streaming_lead.benchmark
# description: Verify performance of streaming LEAD
# group: [window]
load
SELECT SETSEED(0.8675309);
CREATE OR REPLACE TABLE df AS
SELECT
RANDOM() AS a,
RANDOM() AS b,
RANDOM() AS c,
FROM range(10_000_000);
run
SELECT sum(a_1 + a_2 + b_1 + b_2)
FROM (
SELECT
LEAD(a, 1) OVER () AS a_1,
LEAD(a, 2) OVER () AS a_2,
LEAD(b, 1) OVER () AS b_1,
LEAD(b, 2) OVER () AS b_2
FROM df
) t
;
result I
20000902.549240764

View File

@@ -0,0 +1,34 @@
# name: benchmark/micro/window/window_constant_aggregates.benchmark
# description: Moving COUNT(*) performance, fixed 100 element window
# group: [window]
load
SELECT SETSEED(0.8675309);
CREATE TABLE dates AS
SELECT ts::DATE AS date, random() AS v1, random() AS v2, random() AS v3
FROM range('2017-01-01'::TIMESTAMP, '2020-12-31'::TIMESTAMP, INTERVAL 1 DAY) r(ts);
CREATE TABLE issue6728 AS
SELECT d.* FROM (
SELECT
rowid AS id,
date,
v1 * (SELECT COUNT(*) FROM dates) AS v1,
v2 * (SELECT COUNT(*) FROM dates) AS v2,
v3 * (SELECT COUNT(*) FROM dates) AS v3,
FROM dates
) d
CROSS JOIN
range(3000)
;
run
SELECT sum(sum_v1)
FROM (
SELECT date, id, v1, v2, v3, quantile(v1, 0.7) OVER (partition by id) AS sum_v1
FROM issue6728
ORDER BY id, date
) df
;
result I
3219979503.458688

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/window/window_constant_count.benchmark
# description: Moving COUNT(*) performance, fixed 100 element window
# group: [window]
load
SELECT SETSEED(0.8675309);
CREATE TABLE arrow_001 AS
SELECT date, item, ROUND(100 * exp(-random() ** 2))::INTEGER AS sale
FROM
(SELECT '1970-01-01'::DATE + INTERVAL (range) DAY AS date FROM range(0, 1000000)) dates,
range(1, 5) items(item)
ORDER BY 1, 2
;
run
SELECT sum(total)
FROM (
SELECT date, item, COUNT(*) FILTER (WHERE item % 3 = 0) OVER (PARTITION BY "date") AS "total"
FROM arrow_001
) df
;
result I
4000000

View File

@@ -0,0 +1,22 @@
# name: benchmark/micro/window/window_count_star_fixed_100.benchmark
# description: Moving COUNT(*) performance, fixed 100 element window
# group: [window]
name Windowed COUNT(*), Fixed 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(c)
from (
select count(*) over (
order by b asc
rows between 100 preceding and current row) as c
from rank100
) q;
result I
1009994950

View File

@@ -0,0 +1,22 @@
# name: benchmark/micro/window/window_count_star_variable_100.benchmark
# description: Moving COUNT(*) performance, variable 100 element window
# group: [window]
name Windowed COUNT(*), Variable 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(c)
from (
select count(*) over (
order by b asc
rows between 100 preceding and current row) as c
from rank100
) q;
result I
1009994950

View File

@@ -0,0 +1,33 @@
# name: benchmark/micro/window/window_fill.benchmark
# description: Measure the perfomance of FILL
# group: [window]
name FillPerformance
group micro
subgroup window
argument sf 10
argument errors 0.1
argument keys 4
load
select setseed(0.8675309);
create or replace table data as (
select
k::TINYINT as k,
(case when random() > ${errors} then m - 1704067200000 else null end) as v,
m,
from range(1704067200000, 1704067200000 + ${sf} * 1_000_000 * 10, 10) times(m)
cross join range(${keys}) keys(k)
);
run
SELECT
m,
k,
fill(v) OVER (PARTITION BY k ORDER BY m) as v
FROM
data
qualify v <> m - 1704067200000;
result III

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_iqr_fixed_100.benchmark
# description: Moving IQR performance, fixed 100 element window
# group: [window]
name Windowed IQR, Fixed 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select min(iqr), max(iqr)
from (
select quantile_cont(a, [0.25, 0.5, 0.75]) over (
order by b asc
rows between 100 preceding and current row) as iqr
from rank100
) q;
result II
[0.000000, 0.000000, 0.000000] [25.000000, 50.000000, 75.000000]

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_iqr_variable_100.benchmark
# description: Moving IQR performance, variable 100 element window
# group: [window]
name Windowed IQR, Variable 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select min(iqr), max(iqr)
from (
select quantile_cont(a, [0.25, 0.5, 0.75]) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as iqr
from rank100
) q;
result II
[0.000000, 0.000000, 0.000000] [76.500000, 84.000000, 91.500000]

View File

@@ -0,0 +1,12 @@
# name: benchmark/micro/window/window_list_aggr.benchmark
# description: List aggregate window performance
# group: [window]
name List aggregate window
group window
load
CREATE TABLE tbl AS SELECT range AS i FROM range(10000000)
run
SELECT i, list(i) OVER (PARTITION BY i % 2 ORDER BY i ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM tbl;

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_mad_fixed_100.benchmark
# description: Moving MAD performance, fixed 100 element frame
# group: [window]
name Windowed MAD, Fixed 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(m)
from (
select mad(a) over (
order by b asc
rows between 100 preceding and current row) as m
from rank100
) q;
result I
249998762.5

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_mad_variable_100.benchmark
# description: Moving MAD performance, variable 100 element frame
# group: [window]
name Windowed MAD, Variable 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(m)
from (
select mad(a) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as m
from rank100
) q;
result I
249994596.000000

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_median_fixed_100.benchmark
# description: Moving MEDIAN performance, fixed 100 element frame
# group: [window]
name Windowed MEDIAN, Fixed 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(m)
from (
select median(a) over (
order by b asc
rows between 100 preceding and current row) as m
from rank100
) q;
result I
494997500

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_median_variable_100.benchmark
# description: Moving MEDIAN performance, varbiable 100 element frame
# group: [window]
name Windowed MEDIAN, Variable 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(m)
from (
select median(a) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as m
from rank100
) q;
result I
494989867

View File

@@ -0,0 +1,36 @@
# name: benchmark/micro/window/window_mode_constant.benchmark
# description: Measure performance of moving MODE with constant frame
# group: [window]
name Windowed MODE, Single value per frame
group micro
subgroup window
require tpch
load
CALL dbgen(sf=0.1);
run
SELECT
l_orderkey,
l_shipmode,
l_linenumber,
mode(l_linenumber ORDER BY l_linenumber DESC) over w AS l_mode,
FROM lineitem
WINDOW w AS (partition by l_shipmode)
ORDER BY ALL
LIMIT 10
;
result IIII
1 AIR 4 1
1 FOB 5 1
1 MAIL 2 1
1 MAIL 6 1
1 REG AIR 3 1
1 TRUCK 1 1
2 RAIL 1 1
3 AIR 1 1
3 FOB 5 1
3 RAIL 2 1

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_mode_fixed_100.benchmark
# description: Moving MODE performance, fixed 100 element frame
# group: [window]
name Windowed MODE, Fixed 100
group window
load
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)
run
select sum(m)
from (
select mode(a) over (
order by b asc
rows between 100 preceding and current row) as m
from rank100
) q;
result I
494995050

View File

@@ -0,0 +1,28 @@
# name: benchmark/micro/window/window_partition.benchmark
# description: Range join between integers
# group: [window]
name Window Partition
group window
load
SELECT SETSEED(0.8675309);
CREATE TABLE df AS
SELECT
idx,
random() AS a,
random() AS b,
round(random() * 10000)::INTEGER AS c,
FROM range(1000000) tbl(idx)
;
run
SELECT SUM(a)
FROM (
SELECT idx,
SUM(a) over (PARTITION BY c ORDER BY idx ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS a
FROM df
);
result I
25474151.276410

View File

@@ -0,0 +1,24 @@
# name: benchmark/micro/window/window_rownumber_orderbys.benchmark
# description: ROW_NUMBER duplicate ORDER BY optimisation
# group: [window]
load
CREATE OR REPLACE TABLE example AS
SELECT
i % 47 AS customer_id,
'2024-01-01'::DATE + (i % 367)::INTEGER AS date
FROM range(10_000_000) tbl(i);
# The TIMESTAMP cast is needed because this is a RANGE query
# and DATE + INTERVAL => TIMESTAMP
run
SELECT
customer_id,
date,
row_number(ORDER BY date::TIMESTAMP ASC) OVER win AS row_by_partition
FROM example
WINDOW win AS (
PARTITION BY customer_id
ORDER BY date ASC
RANGE BETWEEN CURRENT ROW
AND INTERVAL 1 WEEK FOLLOWING);

View File

@@ -0,0 +1,23 @@
# name: benchmark/micro/window/window_streamed_sum.benchmark
# description: Range join between integers
# group: [window]
name Window Partition
group window
load
SELECT SETSEED(0.8675309);
CREATE TABLE streamed AS
SELECT exp(-random() ** 2) AS p
FROM range(0, 10000000)
;
run
SELECT SUM(s)
FROM (
SELECT SUM(p) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) s
FROM streamed
) tbl
result I
73137822900325.6

View File

@@ -0,0 +1,15 @@
# name: benchmark/micro/window/window_sum.benchmark
# description: Window Sum
# group: [window]
name Window Sum
group window
load
CREATE TABLE integers AS SELECT ((i * 9582398353) % 10000)::INTEGER AS i FROM range(0, 100000) tbl(i);
run
SELECT MIN(i) FROM (SELECT SUM(i) OVER(order by i rows between 1000 preceding and 1000 following) FROM integers) tbl(i)
result I
49600