should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,27 @@
# name: benchmark/realnest/micro/01_aggregate-first-level-struct-members.benchmark
# description: Aggregate functions on the struct, group by one parameter
# group: [micro]
name aggregate-first-level-struct-members
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT PV.npvs,
sum(PV.x) AS sum_x,
sum(PV.y) AS sum_y,
sum(PV.z) AS sum_z,
avg(MET.pt) AS avg_pt,
min(MET.phi) AS min_phi,
max(MET.sumet) AS max_sumet
FROM run2012B_singleMu
GROUP BY PV.npvs
HAVING sum_x > 1;

View File

@@ -0,0 +1,17 @@
# name: benchmark/realnest/micro/02_list_sort.benchmark
# description: list_sort text entries
# group: [micro]
name list_sort
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_sort(body_text) FROM cord;

View File

@@ -0,0 +1,43 @@
# name: benchmark/realnest/micro/03_create_table_from_unnested_structs.benchmark
# description: Create a table by unnesting and joining the structs in a JSON file
# group: [micro]
name create_table_from_unnested_structs
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
CREATE OR REPLACE TABLE combined AS
(SELECT unnested_hlt.*,
unnested_pv.*,
unnested_met.*,
unnested_muon.*,
unnested_electron.*,
unnested_tau.*,
unnested_photon.*,
unnested_jet.*
FROM
(SELECT rowid, UNNEST(HLT) AS hlt FROM run2012B_singleMu) AS unnested_hlt
LEFT JOIN
(SELECT rowid, UNNEST(PV) AS pv FROM run2012B_singleMu) AS unnested_pv ON unnested_hlt.rowid = unnested_pv.rowid
LEFT JOIN
(SELECT rowid, UNNEST(MET) AS met FROM run2012B_singleMu) AS unnested_met ON unnested_hlt.rowid = unnested_met.rowid
LEFT JOIN
(SELECT rowid, UNNEST(Muon, recursive:=true) AS muon FROM run2012B_singleMu) AS unnested_muon ON unnested_hlt.rowid = unnested_muon.rowid
LEFT JOIN
(SELECT rowid, UNNEST(Electron, recursive:=true) AS electron FROM run2012B_singleMu) AS unnested_electron ON unnested_hlt.rowid = unnested_electron.rowid
LEFT JOIN
(SELECT rowid, UNNEST(Tau, recursive:=true) AS tau FROM run2012B_singleMu) AS unnested_tau ON unnested_hlt.rowid = unnested_tau.rowid
LEFT JOIN
(SELECT rowid, UNNEST(Photon, recursive:=true) AS photon FROM run2012B_singleMu) AS unnested_photon ON unnested_hlt.rowid = unnested_photon.rowid
LEFT JOIN
(SELECT rowid, UNNEST(Jet, recursive:=true) AS jet FROM run2012B_singleMu) AS unnested_jet ON unnested_hlt.rowid = unnested_jet.rowid
LIMIT 100000
);

View File

@@ -0,0 +1,60 @@
# name: benchmark/realnest/micro/04_list_transform_and_list_aggregate.benchmark
# description: select average from transformed list and group by, having
# group: [micro]
name list_transform plus list_aggregate
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT
list_aggregate(list_transform(Jet, lambda x: x.pt), 'avg') AS avg_pt,
list_aggregate(list_transform(Jet, lambda x: x.eta), 'avg') AS avg_eta,
list_aggregate(list_transform(Jet, lambda x: x.phi), 'avg') AS avg_phi,
list_aggregate(list_transform(Jet, lambda x: x.mass), 'avg') AS avg_mass,
list_aggregate(list_transform(Jet, lambda x: x.btag), 'avg') AS avg_btag,
list_aggregate(list_transform(Photon, lambda x: x.pt), 'avg') AS ph_avg_pt,
list_aggregate(list_transform(Photon, lambda x: x.eta), 'avg') AS ph_avg_eta,
list_aggregate(list_transform(Photon, lambda x: x.phi), 'avg') AS ph_avg_phi,
list_aggregate(list_transform(Photon, lambda x: x.mass), 'avg') AS ph_avg_mass,
list_aggregate(list_transform(Photon, lambda x: x.pfreliso03_all), 'avg') AS ph_avg_pf,
list_aggregate(list_transform(Photon, lambda x: x.jetidx), 'avg') AS ph_avg_jet,
list_aggregate(list_transform(Photon, lambda x: x.genpartidx), 'avg') AS ph_avg_gen,
list_aggregate(list_transform(Tau, lambda x: x.pt), 'avg') AS t_avg_pt,
list_aggregate(list_transform(Tau, lambda x: x.eta), 'avg') AS t_avg_eta,
list_aggregate(list_transform(Tau, lambda x: x.mass), 'avg') AS t_avg_mass,
list_aggregate(list_transform(Tau, lambda x: x.decaymode), 'avg') AS t_avg_dec,
list_aggregate(list_transform(Tau, lambda x: x.reliso_all), 'avg') AS t_avg_rel,
list_aggregate(list_transform(Tau, lambda x: x.jetidx), 'avg') AS t_avg_jet,
list_aggregate(list_transform(Tau, lambda x: x.genpartidx), 'avg') AS t_avg_gen,
list_aggregate(list_transform(Electron, lambda x: x.pt), 'avg') AS el_avg_pt,
list_aggregate(list_transform(Electron, lambda x: x.eta), 'avg') AS el_avg_eta,
list_aggregate(list_transform(Electron, lambda x: x.phi), 'avg') AS el_avg_phi,
list_aggregate(list_transform(Electron, lambda x: x.mass), 'avg') AS el_avg_mass,
list_aggregate(list_transform(Electron, lambda x: x.pfreliso03_all), 'avg') AS el_avg_pf,
list_aggregate(list_transform(Electron, lambda x: x.dxy), 'avg') AS el_avg_dxy,
list_aggregate(list_transform(Electron, lambda x: x.dxyerr), 'avg') AS el_avg_dxyer,
list_aggregate(list_transform(Electron, lambda x: x.dz), 'avg') AS el_avg_dz,
list_aggregate(list_transform(Electron, lambda x: x.dzerr), 'avg') AS el_avg_dzer,
list_aggregate(list_transform(Electron, lambda x: x.jetidx), 'avg') AS el_avg_jet,
list_aggregate(list_transform(Electron, lambda x: x.genpartidx), 'avg') AS el_avg_gen,
list_aggregate(list_transform(Muon, lambda x: x.pt), 'avg') AS mu_avg_pt,
list_aggregate(list_transform(Muon, lambda x: x.eta), 'avg') AS mu_avg_eta,
list_aggregate(list_transform(Muon, lambda x: x.phi), 'avg') AS mu_avg_phi,
list_aggregate(list_transform(Muon, lambda x: x.mass), 'avg') AS mu_avg_mas,
list_aggregate(list_transform(Muon, lambda x: x.pfreliso03_all), 'avg') AS mu_avg_pf3,
list_aggregate(list_transform(Muon, lambda x: x.pfreliso04_all), 'avg') AS mu_avg_pf4,
list_aggregate(list_transform(Muon, lambda x: x.dxy), 'avg') AS mu_avg_dxy,
list_aggregate(list_transform(Muon, lambda x: x.dxyerr), 'avg') AS mu_avg_dxyer,
list_aggregate(list_transform(Muon, lambda x: x.dz), 'avg') AS mu_avg_dz,
list_aggregate(list_transform(Muon, lambda x: x.dzerr), 'avg') AS mu_avg_dzer,
list_aggregate(list_transform(Muon, lambda x: x.jetidx), 'avg') AS mu_avg_jet,
list_aggregate(list_transform(Muon, lambda x: x.genpartidx), 'avg') AS mu_avg_get
FROM run2012B_singleMu;

View File

@@ -0,0 +1,28 @@
# name: benchmark/realnest/micro/05_list_filter.benchmark
# description: Multiple list_filters
# group: [micro]
name list_filters
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT
count(*) AS total_rows,
sum(len(list_filter(Tau, lambda x: x.charge < 0))) AS negatives,
sum(len(list_filter(Tau, lambda x: x.charge > 0))) AS positives,
sum(len(list_filter(Tau, lambda x: x.charge = 0))) AS neutral,
sum(len(list_filter(Tau, lambda x: (x.pt % 2) - 1 > 0))) AS odds,
sum(len(list_filter(Tau, lambda x: x.idIsoVLoose != x.idIsoLoose))) AS idIsoMatch,
sum(len(list_filter(Muon, lambda x: x.tightId == true))) AS muon,
sum(len(list_filter(Electron, lambda x: x.mass > x.eta + x.phi))) AS elentron,
sum(len(list_filter(Photon, lambda x: x.mass > 0))) AS photon,
sum(len(list_filter(Jet, lambda x: x.puId != false))) AS jet,
FROM run2012B_singleMu;

View File

@@ -0,0 +1,23 @@
# name: benchmark/realnest/micro/06_list_filter_on_unnested_structure.benchmark
# description: list_filter on unnested_muon structure
# group: [micro]
name list_filter_on_unnested_structure
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_filter(
[pt, eta, phi, mass, pfRelIso03_all, pfRelIso04_all, dxy, dxyErr, jetIdx, genPartIdx],
lambda x: x > 0.01)
FROM (
SELECT UNNEST(Muon, recursive:=true) AS unnested_muon
FROM run2012B_singleMu
);

View File

@@ -0,0 +1,28 @@
# name: benchmark/realnest/micro/07_list_unique_on_transformed_and_aggregated_list.benchmark
# description: Creates a list by unnesting and re-aggregating it, then performs list_transform and list_unique on that list
# group: [micro]
name list_operations_on_strings
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_unique(list_transform(lt, lambda s: length(s)))
FROM (
SELECT list(text) AS lt
FROM (
SELECT bm.text
FROM (
SELECT UNNEST(back_matter) AS bm
FROM cord
)
)
)
;

View File

@@ -0,0 +1,23 @@
# name: benchmark/realnest/micro/08_count_map_keys.benchmark
# description: Count map keys and aggregate them
# group: [micro]
name count_map_keys
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT keys, count(*) mentions
FROM (
SELECT UNNEST(map_keys(tags)) AS keys
FROM open_street_map
)
GROUP BY keys
ORDER BY mentions DESC;

View File

@@ -0,0 +1,22 @@
# name: benchmark/realnest/micro/09_array_agg.benchmark
# description: Aggregate nested structs
# group: [micro]
name aggregate_nested_structs
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT array_agg(data.entities.hashtags),
array_agg(data.entities.mentions),
array_agg(data.entities.urls),
array_agg(data.entities.annotations),
array_agg(data.entities.cashtags)
FROM twitter;

View File

@@ -0,0 +1,25 @@
# name: benchmark/realnest/micro/11_list_sort_reduce_transform.benchmark
# description: Transform, aggregate, reduce and sort a list
# group: [micro]
name list_sort_reduce_transform
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_sort(
array_agg(
list_reduce(
list_transform(Photon, lambda x: x.pt),
lambda x, y, z: (x + y)^z)
)
) AS List
FROM run2012B_singleMu
WHERE len(Photon) != 0;

View File

@@ -0,0 +1,22 @@
# name: benchmark/realnest/micro/12_map_list_values.benchmark
# description: Map list values
# group: [micro]
name map_list_values
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT map(tau_pt, tau_eta),
map(jet_pt, jet_eta),
map(muon_pt, muon_eta),
map(ph_pt, ph_eta)
FROM singleMu
ORDER BY ALL DESC;

View File

@@ -0,0 +1,28 @@
# name: benchmark/realnest/micro/13_multi_join_nested_data_with_filtering.benchmark
# description: Multiple join conditions and filtering on merged and closed pull requests
# group: [micro]
name multi_join_nested_data_with_filtering
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT gh_pull.pull_request.base.repo.language AS language,
gh_issue.issue.user.login AS login,
gh_pull.pull_request.title AS title,
gh_pull.pull_request.html_url AS url
FROM gh_issue, gh_pull
WHERE gh_pull.pull_request.base.repo.owner = gh_issue.issue.user
AND gh_pull.pull_request.user = gh_pull.pull_request.base.repo.owner
AND gh_issue.issue.assignee = gh_pull.pull_request.base.repo.owner
AND gh_pull.pull_request.assignee = gh_pull.pull_request.base.repo.owner
AND gh_pull.pull_request.merged = 'true'
AND gh_pull.pull_request.state = 'closed'
ORDER BY language, title;

View File

@@ -0,0 +1,25 @@
# name: benchmark/realnest/micro/14_list_slice.benchmark
# description: Benchmark the list_slice function
# group: [micro]
name list_slice
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_slice(Jet, 2, 9),
list_slice(Jet, 1, 6)[:3:-1],
list_slice(Muon, 1, 5),
list_slice(Muon, 2, 3)[:-4:-1],
list_slice(Photon, 1, 3),
list_slice(Photon, 1, 6)[:6:-1],
list_slice(Tau, 5, 9),
list_slice(Tau, 2, 9)[:-8:-1]
FROM single_mu_lists;

View File

@@ -0,0 +1,27 @@
# name: benchmark/realnest/micro/15_list_sort.benchmark
# description: Benchmarks list_sort function
# group: [micro]
name list_sort
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT list_sort(Jet, 'ASC'),
list_sort(Muon, 'DESC'),
list_sort(Muon, 'ASC', 'NULLS FIRST'),
list_sort(Muon, 'ASC', 'NULLS LAST'),
list_sort(Photon, 'ASC'),
list_sort(Photon, 'DESC', 'NULLS FIRST'),
list_sort(Photon, 'DESC', 'NULLS LAST'),
list_sort(Tau, 'DESC'),
list_sort(Tau, 'ASC', 'NULLS FIRST'),
list_sort(Tau, 'ASC', 'NULLS LAST')
FROM single_mu_lists;

View File

@@ -0,0 +1,82 @@
# name: benchmark/realnest/micro/16_most_common_list_aggregates.benchmark
# description: Combination of the most common list_aggregate functions
# group: [micro]
name most_common_list_aggregates
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT
list_aggregate(tau_pt, 'list'),
list_aggregate(tau_eta, 'list'),
list_aggregate(jet_pt, 'list'),
list_aggregate(jet_eta, 'list'),
list_aggregate(muon_pt, 'list'),
list_aggregate(muon_eta, 'list'),
list_aggregate(ph_pt, 'list'),
list_aggregate(ph_eta, 'list'),
list_aggregate(tau_pt, 'sum'),
list_aggregate(tau_eta, 'sum'),
list_aggregate(jet_pt, 'sum'),
list_aggregate(jet_eta, 'sum'),
list_aggregate(muon_pt, 'sum'),
list_aggregate(muon_eta, 'sum'),
list_aggregate(ph_pt, 'sum'),
list_aggregate(ph_eta, 'sum'),
list_aggregate(tau_pt, 'min'),
list_aggregate(tau_eta, 'min'),
list_aggregate(jet_pt, 'min'),
list_aggregate(jet_eta, 'min'),
list_aggregate(muon_pt, 'min'),
list_aggregate(muon_eta, 'min'),
list_aggregate(ph_pt, 'min'),
list_aggregate(ph_eta, 'min'),
list_aggregate(tau_pt, 'max'),
list_aggregate(tau_eta, 'max'),
list_aggregate(jet_pt, 'max'),
list_aggregate(jet_eta, 'max'),
list_aggregate(muon_pt, 'max'),
list_aggregate(muon_eta, 'max'),
list_aggregate(ph_pt, 'max'),
list_aggregate(ph_eta, 'max'),
list_aggregate(tau_pt, 'count'),
list_aggregate(tau_eta, 'count'),
list_aggregate(jet_pt, 'count'),
list_aggregate(jet_eta, 'count'),
list_aggregate(muon_pt, 'count'),
list_aggregate(muon_eta, 'count'),
list_aggregate(ph_pt, 'count'),
list_aggregate(ph_eta, 'count'),
list_aggregate(tau_pt, 'string_agg', '|'),
list_aggregate(tau_eta, 'string_agg', '|'),
list_aggregate(jet_pt, 'string_agg', '|'),
list_aggregate(jet_eta, 'string_agg', '|'),
list_aggregate(muon_pt, 'string_agg', '|'),
list_aggregate(muon_eta, 'string_agg', '|'),
list_aggregate(ph_pt, 'string_agg', '|'),
list_aggregate(ph_eta, 'string_agg', '|'),
list_aggregate(tau_pt, 'avg'),
list_aggregate(tau_eta, 'avg'),
list_aggregate(jet_pt, 'avg'),
list_aggregate(jet_eta, 'avg'),
list_aggregate(muon_pt, 'avg'),
list_aggregate(muon_eta, 'avg'),
list_aggregate(ph_pt, 'avg'),
list_aggregate(ph_eta, 'avg'),
list_aggregate(tau_pt, 'median'),
list_aggregate(tau_eta, 'median'),
list_aggregate(jet_pt, 'median'),
list_aggregate(jet_eta, 'median'),
list_aggregate(muon_pt, 'median'),
list_aggregate(muon_eta, 'median'),
list_aggregate(ph_pt, 'median'),
list_aggregate(ph_eta, 'median')
FROM singleMu;

View File

@@ -0,0 +1,42 @@
# name: benchmark/realnest/micro/17_list_aggregates_histogram_stddev_mode.benchmark
# description: Combination of list_aggregate functions histogram, stddev, mode
# group: [micro]
name list_aggregates_histogram_stddev_mode
group real_nest
require json
require httpfs
cache real_nest.duckdb
load benchmark/realnest/micro/load.sql
run
SELECT
list_aggregate(tau_pt, 'stddev'),
list_aggregate(tau_eta, 'stddev'),
list_aggregate(jet_pt, 'stddev'),
list_aggregate(jet_eta, 'stddev'),
list_aggregate(muon_pt, 'stddev'),
list_aggregate(muon_eta, 'stddev'),
list_aggregate(ph_pt, 'stddev'),
list_aggregate(ph_eta, 'stddev'),
list_aggregate(tau_pt, 'mode'),
list_aggregate(tau_eta, 'mode'),
list_aggregate(jet_pt, 'mode'),
list_aggregate(jet_eta, 'mode'),
list_aggregate(muon_pt, 'mode'),
list_aggregate(muon_eta, 'mode'),
list_aggregate(ph_pt, 'mode'),
list_aggregate(ph_eta, 'mode'),
list_aggregate(tau_pt, 'histogram'),
list_aggregate(tau_eta, 'histogram'),
list_aggregate(jet_pt, 'histogram'),
list_aggregate(jet_eta, 'histogram'),
list_aggregate(muon_pt, 'histogram'),
list_aggregate(muon_eta, 'histogram'),
list_aggregate(ph_pt, 'histogram'),
list_aggregate(ph_eta, 'histogram')
FROM singleMu;

View File

@@ -0,0 +1,27 @@
ATTACH 'https://blobs.duckdb.org/data/realnest/cord_10k.duckdb' AS cord (READ_ONLY);
CREATE TABLE cord AS SELECT * FROM cord.cord;
ATTACH 'https://blobs.duckdb.org/data/realnest/open_street_map_524k.duckdb' AS osm (READ_ONLY);
CREATE TABLE open_street_map AS SELECT * FROM osm.open_street_map;
ATTACH 'https://blobs.duckdb.org/data/realnest/pull_131k.duckdb' AS gh_pull (READ_ONLY);
CREATE TABLE gh_pull AS SELECT * FROM gh_pull.gh_pull;
ATTACH 'https://blobs.duckdb.org/data/realnest/issue_131k.duckdb' AS gh_issue (READ_ONLY);
CREATE TABLE gh_issue AS SELECT * FROM gh_issue.gh_issue;
ATTACH 'https://blobs.duckdb.org/data/realnest/twitter_131k.duckdb' AS tw (READ_ONLY);
CREATE TABLE twitter AS SELECT * FROM tw.twitter;
ATTACH 'https://blobs.duckdb.org/data/realnest/singleMu_524k.duckdb' AS rn_singleMu (READ_ONLY);
CREATE TABLE run2012B_singleMu AS SELECT * FROM rn_singleMu.run2012B_singleMu;
CREATE TABLE single_mu_lists AS SELECT * REPLACE(
list_resize(Jet, 10, NULL) AS Jet, list_resize(Muon, 10, NULL) AS Muon,
list_resize(Photon, 10, NULL) AS Photon, list_resize(Tau, 10, NULL) AS Tau)
FROM rn_singleMu.run2012B_singleMu;
CREATE OR REPLACE TABLE singleMu AS
SELECT
list_distinct(list_transform("Tau", x -> x.pt)) AS tau_pt, list_distinct(list_transform("Tau", x -> x.eta)) AS tau_eta,
list_distinct(list_transform("Jet", x -> x.pt)) AS jet_pt, list_distinct(list_transform("Jet", x -> x.eta)) AS jet_eta,
list_distinct(list_transform("Muon", x -> x.pt)) AS muon_pt, list_distinct(list_transform("Muon", x -> x.eta)) AS muon_eta,
list_distinct(list_transform("Photon", x -> x.pt)) AS ph_pt, list_distinct(list_transform("Photon", x -> x.eta)) AS ph_eta
FROM rn_singleMu.run2012B_singleMu ORDER BY all DESC;
UPDATE singleMu SET jet_eta = list_resize(jet_eta, len(jet_pt));
UPDATE singleMu SET muon_eta = list_resize(muon_eta, len(muon_pt));
UPDATE singleMu SET ph_eta = list_resize(ph_eta, len(ph_pt));
UPDATE singleMu SET tau_eta = list_resize(tau_eta, len(tau_pt));