Files
email-tracker/external/duckdb/scripts/regression/benchmark.py
2025-10-24 19:21:19 -05:00

194 lines
7.1 KiB
Python

import subprocess
import statistics
from io import StringIO
import csv
from dataclasses import dataclass
import argparse
from typing import Optional, Union, Tuple, List
import functools
print = functools.partial(print, flush=True)
STDERR_HEADER = '''====================================================
============== STDERR =============
====================================================
'''
STDOUT_HEADER = '''====================================================
============== STDOUT =============
====================================================
'''
# timeouts in seconds
MAX_TIMEOUT = 3600
DEFAULT_TIMEOUT = 600
@dataclass
class BenchmarkRunnerConfig:
"Configuration for a BenchmarkRunner"
benchmark_runner: str
benchmark_file: str
verbose: bool = False
threads: Optional[int] = None
memory_limit: Optional[str] = None
disable_timeout: bool = False
max_timeout: int = MAX_TIMEOUT
root_dir: str = ""
no_summary: bool = False
@classmethod
def from_params(cls, benchmark_runner, benchmark_file, **kwargs) -> "BenchmarkRunnerConfig":
verbose = kwargs.get("verbose", False)
threads = kwargs.get("threads", None)
memory_limit = kwargs.get("memory_limit", None)
disable_timeout = kwargs.get("disable_timeout", False)
max_timeout = kwargs.get("max_timeout", MAX_TIMEOUT)
root_dir = kwargs.get("root_dir", "")
no_summary = kwargs.get("no_summary", False)
config = cls(
benchmark_runner=benchmark_runner,
benchmark_file=benchmark_file,
verbose=verbose,
threads=threads,
memory_limit=memory_limit,
disable_timeout=disable_timeout,
max_timeout=max_timeout,
root_dir=root_dir,
no_summary=no_summary,
)
return config
@classmethod
def from_args(cls) -> "BenchmarkRunnerConfig":
parser = argparse.ArgumentParser(description="Benchmark script with old and new runners.")
# Define the arguments
parser.add_argument("--path", type=str, help="Path to the benchmark_runner executable", required=True)
parser.add_argument("--benchmarks", type=str, help="Path to the benchmark file.", required=True)
parser.add_argument("--verbose", action="store_true", help="Enable verbose output.")
parser.add_argument("--threads", type=int, help="Number of threads to use.")
parser.add_argument("--memory_limit", type=str, help="Memory limit to use.")
parser.add_argument("--disable-timeout", action="store_true", help="Disable timeout.")
parser.add_argument(
"--max-timeout", type=int, default=3600, help="Set maximum timeout in seconds (default: 3600)."
)
parser.add_argument("--root-dir", type=str, default="", help="Root directory.")
parser.add_argument(
"--no-summary", type=str, default=False, help="No failures summary is outputed when passing this flag."
)
# Parse arguments
parsed_args = parser.parse_args()
# Create an instance of BenchmarkRunnerConfig using parsed arguments
config = cls(
benchmark_runner=parsed_args.path,
benchmark_file=parsed_args.benchmarks,
verbose=parsed_args.verbose,
threads=parsed_args.threads,
memory_limit=parsed_args.memory_limit,
disable_timeout=parsed_args.disable_timeout,
max_timeout=parsed_args.max_timeout,
root_dir=parsed_args.root_dir,
no_summary=parsed_args.no_summary,
)
return config
class BenchmarkRunner:
def __init__(self, config: BenchmarkRunnerConfig):
self.config = config
self.complete_timings = []
self.benchmark_list: List[str] = []
with open(self.config.benchmark_file, 'r') as f:
self.benchmark_list = [x.strip() for x in f.read().split('\n') if len(x) > 0]
def construct_args(self, benchmark_path):
benchmark_args = []
benchmark_args.extend([self.config.benchmark_runner, benchmark_path])
if self.config.root_dir:
benchmark_args.extend(['--root-dir', self.config.root_dir])
if self.config.threads:
benchmark_args.extend([f"--threads={self.config.threads}"])
if self.config.memory_limit:
benchmark_args.extend([f"--memory_limit={self.config.memory_limit}"])
if self.config.disable_timeout:
benchmark_args.extend(["--disable-timeout"])
if self.config.no_summary:
benchmark_args.extend(["--no-summary"])
return benchmark_args
def run_benchmark(self, benchmark) -> Tuple[Union[float, str], Optional[str]]:
benchmark_args = self.construct_args(benchmark)
timeout_seconds = DEFAULT_TIMEOUT
if self.config.disable_timeout:
timeout_seconds = self.config.max_timeout
try:
proc = subprocess.run(
benchmark_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout_seconds
)
out = proc.stdout.decode('utf8')
err = proc.stderr.decode('utf8')
returncode = proc.returncode
except subprocess.TimeoutExpired:
print("Failed to run benchmark " + benchmark)
print(f"Aborted due to exceeding the limit of {timeout_seconds} seconds")
return (
'Failed to run benchmark ' + benchmark,
f"Aborted due to exceeding the limit of {timeout_seconds} seconds",
)
if returncode != 0:
print("Failed to run benchmark " + benchmark)
print(STDERR_HEADER)
print(err)
print(STDOUT_HEADER)
print(out)
if 'HTTP' in err:
print("Ignoring HTTP error and terminating the running of the regression tests")
exit(0)
return 'Failed to run benchmark ' + benchmark, err
if self.config.verbose:
print(err)
# read the input CSV
f = StringIO(err)
csv_reader = csv.reader(f, delimiter='\t')
header = True
timings = []
try:
for row in csv_reader:
if len(row) == 0:
continue
if header:
header = False
else:
timings.append(row[2])
self.complete_timings.append(row[2])
return float(statistics.median(timings)), None
except:
print("Failed to run benchmark " + benchmark)
print(err)
return 'Failed to run benchmark ' + benchmark, err
def run_benchmarks(self, benchmark_list: List[str]):
results = {}
failures = {}
for benchmark in benchmark_list:
result, failure_message = self.run_benchmark(benchmark)
results[benchmark] = result
failures[benchmark] = failure_message if failure_message else None
return results, failures
def main():
config = BenchmarkRunnerConfig.from_args()
runner = BenchmarkRunner(config)
runner.run_benchmarks()
if __name__ == "__main__":
main()