Files
email-tracker/external/duckdb/scripts/regression_check.py
2025-10-24 19:21:19 -05:00

116 lines
3.1 KiB
Python

import os
import sys
import duckdb
import numpy
import subprocess
from io import StringIO
import csv
import statistics
old_file = None
new_file = None
# the threshold at which we consider something a regression (percentage)
regression_threshold_percentage = 0.1
# minimal seconds diff for something to be a regression (for very fast benchmarks)
regression_threshold_seconds = 0.01
for arg in sys.argv:
if arg.startswith("--old="):
old_file = arg.replace("--old=", "")
elif arg.startswith("--new="):
new_file = arg.replace("--new=", "")
if old_file is None or new_file is None:
print("Usage: python scripts/regression_check.py --old=<old_file> --new-<new_file>")
exit(1)
con = duckdb.connect()
old_timings_l = con.execute(
f"SELECT name, median(time) FROM read_csv_auto('{old_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()
new_timings_l = con.execute(
f"SELECT name, median(time) FROM read_csv_auto('{new_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
).fetchall()
old_timings = {}
new_timings = {}
for entry in old_timings_l:
name = entry[0]
timing = entry[1]
old_timings[name] = timing
for entry in new_timings_l:
name = entry[0]
timing = entry[1]
new_timings[name] = timing
slow_keys = []
multiply_percentage = 1.0 + regression_threshold_percentage
test_keys = list(new_timings.keys())
test_keys.sort()
for key in test_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
if (old_timing + regression_threshold_seconds) * multiply_percentage < new_timing:
slow_keys.append(key)
return_code = 0
if len(slow_keys) > 0:
print(
'''====================================================
============== REGRESSIONS DETECTED =============
====================================================
'''
)
return_code = 1
for key in slow_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
print(key)
print(f"Old timing: {old_timing}")
print(f"New timing: {new_timing}")
print("")
print(
'''====================================================
================== New Timings ==================
====================================================
'''
)
with open(new_file, 'r') as f:
print(f.read())
print(
'''====================================================
================== Old Timings ==================
====================================================
'''
)
with open(old_file, 'r') as f:
print(f.read())
else:
print(
'''====================================================
============== NO REGRESSIONS DETECTED =============
====================================================
'''
)
print(
'''====================================================
=================== ALL TIMINGS ===================
====================================================
'''
)
for key in test_keys:
new_timing = new_timings[key]
old_timing = old_timings[key]
print(key)
print(f"Old timing: {old_timing}")
print(f"New timing: {new_timing}")
print("")
exit(return_code)