435 lines
14 KiB
C++
435 lines
14 KiB
C++
#include "benchmark_runner.hpp"
|
|
|
|
#include "duckdb/common/profiler.hpp"
|
|
#include "duckdb/common/file_system.hpp"
|
|
#include "duckdb/common/string_util.hpp"
|
|
#include "duckdb.hpp"
|
|
#include "duckdb_benchmark.hpp"
|
|
#include "interpreted_benchmark.hpp"
|
|
|
|
#define CATCH_CONFIG_RUNNER
|
|
#include "catch.hpp"
|
|
#include "re2/re2.h"
|
|
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <thread>
|
|
|
|
using namespace duckdb;
|
|
|
|
void BenchmarkRunner::RegisterBenchmark(Benchmark *benchmark) {
|
|
GetInstance().benchmarks.push_back(benchmark);
|
|
}
|
|
|
|
Benchmark::Benchmark(bool register_benchmark, string name, string group) : name(name), group(group) {
|
|
if (register_benchmark) {
|
|
BenchmarkRunner::RegisterBenchmark(this);
|
|
}
|
|
}
|
|
|
|
static void listFiles(FileSystem &fs, const string &path, std::function<void(const string &)> cb) {
|
|
fs.ListFiles(path, [&](const string &fname, bool is_dir) {
|
|
string full_path = fs.JoinPath(path, fname);
|
|
if (is_dir) {
|
|
// recurse into directory
|
|
listFiles(fs, full_path, cb);
|
|
} else {
|
|
cb(full_path);
|
|
}
|
|
});
|
|
}
|
|
|
|
static bool endsWith(const string &mainStr, const string &toMatch) {
|
|
return (mainStr.size() >= toMatch.size() &&
|
|
mainStr.compare(mainStr.size() - toMatch.size(), toMatch.size(), toMatch) == 0);
|
|
}
|
|
|
|
BenchmarkRunner::BenchmarkRunner() {
|
|
}
|
|
|
|
void BenchmarkRunner::InitializeBenchmarkDirectory() {
|
|
auto fs = FileSystem::CreateLocal();
|
|
// check if the database directory exists; if not create it
|
|
if (!fs->DirectoryExists(DUCKDB_BENCHMARK_DIRECTORY)) {
|
|
fs->CreateDirectory(DUCKDB_BENCHMARK_DIRECTORY);
|
|
}
|
|
}
|
|
|
|
atomic<bool> is_active;
|
|
atomic<bool> timeout;
|
|
atomic<bool> summarize;
|
|
std::vector<std::string> summary;
|
|
|
|
void sleep_thread(Benchmark *benchmark, BenchmarkRunner *runner, BenchmarkState *state, bool hotrun,
|
|
const optional_idx &optional_timeout) {
|
|
if (!optional_timeout.IsValid()) {
|
|
return;
|
|
}
|
|
auto timeout_duration = optional_timeout.GetIndex();
|
|
|
|
// timeout is given in seconds
|
|
// we wait 10ms per iteration, so timeout * 100 gives us the amount of
|
|
// iterations
|
|
for (size_t i = 0; i < (size_t)(timeout_duration * 100) && is_active; i++) {
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
}
|
|
if (is_active) {
|
|
timeout = true;
|
|
benchmark->Interrupt(state);
|
|
|
|
// wait again after interrupting
|
|
for (size_t i = 0; i < (size_t)(timeout_duration * 100) && is_active; i++) {
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
}
|
|
if (is_active) {
|
|
// still active - we might be stuck in an infinite loop
|
|
// our interrupt is not working
|
|
if (!hotrun) {
|
|
runner->Log(StringUtil::Format("%s\t%d\t", benchmark->name, 0));
|
|
}
|
|
runner->LogResult("Benchmark timeout reached; Interrupt failed. Benchmark killed by benchmark runner");
|
|
exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BenchmarkRunner::Log(string message) {
|
|
fprintf(stderr, "%s", message.c_str());
|
|
fflush(stderr);
|
|
}
|
|
|
|
void BenchmarkRunner::LogLine(string message) {
|
|
fprintf(stderr, "%s\n", message.c_str());
|
|
fflush(stderr);
|
|
}
|
|
|
|
void BenchmarkRunner::LogResult(string message) {
|
|
LogLine(message);
|
|
if (out_file.good()) {
|
|
out_file << message << endl;
|
|
out_file.flush();
|
|
}
|
|
}
|
|
|
|
void BenchmarkRunner::LogOutput(string message) {
|
|
if (log_file.good()) {
|
|
log_file << message << endl;
|
|
log_file.flush();
|
|
}
|
|
}
|
|
|
|
void BenchmarkRunner::LogSummary(string benchmark, string message, size_t i) {
|
|
string log_result_line = StringUtil::Format("%s\t%d\t", benchmark, i) + "\tINCORRECT\n";
|
|
string failure_message = benchmark + "\nname\trun\ttiming\n" + log_result_line + message;
|
|
summary.push_back(failure_message);
|
|
}
|
|
|
|
void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) {
|
|
Profiler profiler;
|
|
auto display_name = benchmark->DisplayName();
|
|
|
|
duckdb::unique_ptr<BenchmarkState> state;
|
|
try {
|
|
state = benchmark->Initialize(configuration);
|
|
benchmark->Assert(state.get());
|
|
} catch (std::exception &ex) {
|
|
Log(StringUtil::Format("%s\t1\t", benchmark->name));
|
|
LogResult("ERROR");
|
|
duckdb::ErrorData error_data(ex);
|
|
LogLine(error_data.Message());
|
|
return;
|
|
}
|
|
auto nruns = benchmark->NRuns();
|
|
for (size_t i = 0; i < nruns + 1; i++) {
|
|
bool hotrun = i > 0;
|
|
if (hotrun) {
|
|
Log(StringUtil::Format("%s\t%d\t", benchmark->name, i));
|
|
}
|
|
if (hotrun && benchmark->RequireReinit()) {
|
|
state = benchmark->Initialize(configuration);
|
|
}
|
|
is_active = true;
|
|
timeout = false;
|
|
std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun,
|
|
benchmark->Timeout(configuration));
|
|
|
|
string error;
|
|
try {
|
|
profiler.Start();
|
|
benchmark->Run(state.get());
|
|
profiler.End();
|
|
} catch (std::exception &ex) {
|
|
duckdb::ErrorData error_data(ex);
|
|
error = error_data.Message();
|
|
}
|
|
|
|
is_active = false;
|
|
interrupt_thread.join();
|
|
if (hotrun) {
|
|
LogOutput(benchmark->GetLogOutput(state.get()));
|
|
if (!error.empty()) {
|
|
LogResult("ERROR");
|
|
LogLine(error);
|
|
break;
|
|
} else if (timeout) {
|
|
LogResult("TIMEOUT");
|
|
break;
|
|
} else {
|
|
// write time
|
|
auto verify = benchmark->Verify(state.get());
|
|
if (!verify.empty()) {
|
|
LogResult("INCORRECT");
|
|
LogLine("INCORRECT RESULT: " + verify);
|
|
LogOutput("INCORRECT RESULT: " + verify);
|
|
LogSummary(benchmark->name, "INCORRECT RESULT: " + verify, i);
|
|
break;
|
|
} else {
|
|
LogResult(std::to_string(profiler.Elapsed()));
|
|
}
|
|
}
|
|
}
|
|
benchmark->Cleanup(state.get());
|
|
}
|
|
benchmark->Finalize();
|
|
}
|
|
|
|
void BenchmarkRunner::RunBenchmarks() {
|
|
LogLine("Starting benchmark run.");
|
|
LogLine("name\trun\ttiming");
|
|
for (auto &benchmark : benchmarks) {
|
|
RunBenchmark(benchmark);
|
|
}
|
|
}
|
|
|
|
void print_help() {
|
|
fprintf(stderr, "Usage: benchmark_runner\n");
|
|
fprintf(stderr, " --list Show a list of all benchmarks\n");
|
|
fprintf(stderr, " --profile Prints the query profile information\n");
|
|
fprintf(stderr, " --detailed-profile Prints detailed query profile information\n");
|
|
fprintf(stderr, " --threads=n Sets the amount of threads to use during execution (default: "
|
|
"hardware concurrency)\n");
|
|
fprintf(stderr, " --memory_limit=n Sets the memory limit to use during execution (default: 0.8 "
|
|
"* system memory)\n");
|
|
fprintf(stderr, " --out=[file] Move benchmark output to file\n");
|
|
fprintf(stderr, " --log=[file] Move log output to file\n");
|
|
fprintf(stderr, " --info Prints info about the benchmark\n");
|
|
fprintf(stderr, " --query Prints query of the benchmark\n");
|
|
fprintf(stderr, " --root-dir Sets the root directory for where to store temp data and "
|
|
"look for the 'benchmarks' directory\n");
|
|
fprintf(stderr, " --disable-timeout Disables killing the run after a certain amount of time has "
|
|
"passed (30 seconds by default)\n");
|
|
fprintf(stderr,
|
|
" [name_pattern] Run only the benchmark which names match the specified name pattern, "
|
|
"e.g., DS.* for TPC-DS benchmarks\n");
|
|
}
|
|
|
|
enum ConfigurationError { None, BenchmarkNotFound, InfoWithoutBenchmarkName };
|
|
|
|
void LoadInterpretedBenchmarks(FileSystem &fs) {
|
|
// load interpreted benchmarks
|
|
listFiles(fs, "benchmark", [](const string &path) {
|
|
if (endsWith(path, ".benchmark")) {
|
|
new InterpretedBenchmark(path);
|
|
}
|
|
});
|
|
}
|
|
|
|
string parse_root_dir_or_default(const int arg_counter, char const *const *arg_values, FileSystem &fs) {
|
|
// check if the user specified a different root directory
|
|
for (int arg_index = 1; arg_index < arg_counter; ++arg_index) {
|
|
string arg = arg_values[arg_index];
|
|
if (arg == "--root-dir") {
|
|
if (arg_index + 1 >= arg_counter) {
|
|
fprintf(stderr, "Missing argument for --root-dir\n");
|
|
print_help();
|
|
exit(1);
|
|
}
|
|
auto path = arg_values[arg_index + 1];
|
|
if (fs.IsPathAbsolute(path)) {
|
|
return path;
|
|
} else {
|
|
return fs.JoinPath(FileSystem::GetWorkingDirectory(), path);
|
|
}
|
|
}
|
|
}
|
|
// default root directory is the duckdb root directory
|
|
return DUCKDB_ROOT_DIRECTORY;
|
|
}
|
|
/**
|
|
* Builds a configuration based on the passed arguments.
|
|
*/
|
|
void parse_arguments(const int arg_counter, char const *const *arg_values) {
|
|
auto &instance = BenchmarkRunner::GetInstance();
|
|
auto &benchmarks = instance.benchmarks;
|
|
for (int arg_index = 1; arg_index < arg_counter; ++arg_index) {
|
|
// make it summarize failures by default
|
|
summarize = true;
|
|
string arg = arg_values[arg_index];
|
|
if (arg == "--list") {
|
|
// list names of all benchmarks
|
|
for (auto &benchmark : benchmarks) {
|
|
fprintf(stdout, "%s\n", benchmark->name.c_str());
|
|
}
|
|
exit(0);
|
|
} else if (arg == "--info") {
|
|
// write info of benchmark
|
|
instance.configuration.meta = BenchmarkMetaType::INFO;
|
|
} else if (arg == "--profile") {
|
|
// write info of benchmark
|
|
instance.configuration.profile_info = BenchmarkProfileInfo::NORMAL;
|
|
} else if (arg == "--detailed-profile") {
|
|
// write info of benchmark
|
|
instance.configuration.profile_info = BenchmarkProfileInfo::DETAILED;
|
|
} else if (StringUtil::StartsWith(arg, "--threads=")) {
|
|
// write info of benchmark
|
|
auto splits = StringUtil::Split(arg, '=');
|
|
instance.threads = Value(splits[1]).DefaultCastAs(LogicalType::UINTEGER).GetValue<uint32_t>();
|
|
} else if (StringUtil::StartsWith(arg, "--memory_limit=")) {
|
|
// write info of benchmark
|
|
auto splits = StringUtil::Split(arg, '=');
|
|
instance.memory_limit = splits[1];
|
|
} else if (arg == "--root-dir") {
|
|
// We've already handled this, skip it
|
|
arg_index++;
|
|
} else if (arg == "--query") {
|
|
// write group of benchmark
|
|
instance.configuration.meta = BenchmarkMetaType::QUERY;
|
|
} else if (arg == "--disable-timeout") {
|
|
instance.configuration.timeout_duration = optional_idx();
|
|
} else if (StringUtil::StartsWith(arg, "--out=") || StringUtil::StartsWith(arg, "--log=")) {
|
|
auto splits = StringUtil::Split(arg, '=');
|
|
if (splits.size() != 2) {
|
|
print_help();
|
|
exit(1);
|
|
}
|
|
auto &file = StringUtil::StartsWith(arg, "--out=") ? instance.out_file : instance.log_file;
|
|
file.open(splits[1]);
|
|
if (!file.good()) {
|
|
fprintf(stderr, "Could not open file %s for writing\n", splits[1].c_str());
|
|
exit(1);
|
|
}
|
|
} else if (arg == "--no-summary") {
|
|
summarize = false;
|
|
} else if (StringUtil::StartsWith(arg, "--")) {
|
|
// custom argument
|
|
auto arg_name = arg.substr(2);
|
|
if (arg_index + 1 >= arg_counter) {
|
|
fprintf(stderr, "Benchmark argument %s requires an argument\n", arg_name.c_str());
|
|
print_help();
|
|
exit(1);
|
|
}
|
|
arg_index++;
|
|
auto arg_value = arg_values[arg_index];
|
|
instance.custom_arguments.emplace(std::move(arg_name), std::move(arg_value));
|
|
} else {
|
|
if (!instance.configuration.name_pattern.empty()) {
|
|
fprintf(stderr, "Only one benchmark can be specified.\n");
|
|
print_help();
|
|
exit(1);
|
|
}
|
|
instance.configuration.name_pattern = arg;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Runs the benchmarks specified by the configuration if possible.
|
|
* Returns an configuration error code.
|
|
*/
|
|
ConfigurationError run_benchmarks() {
|
|
BenchmarkRunner::InitializeBenchmarkDirectory();
|
|
|
|
auto &instance = BenchmarkRunner::GetInstance();
|
|
auto &benchmarks = instance.benchmarks;
|
|
if (!instance.configuration.name_pattern.empty()) {
|
|
// run only benchmarks which names matches the
|
|
// passed name pattern.
|
|
std::vector<int> benchmark_indices {};
|
|
benchmark_indices.reserve(benchmarks.size());
|
|
for (idx_t index = 0; index < benchmarks.size(); ++index) {
|
|
if (RE2::FullMatch(benchmarks[index]->name, instance.configuration.name_pattern)) {
|
|
benchmark_indices.emplace_back(index);
|
|
} else if (RE2::FullMatch(benchmarks[index]->group, instance.configuration.name_pattern)) {
|
|
benchmark_indices.emplace_back(index);
|
|
}
|
|
}
|
|
benchmark_indices.shrink_to_fit();
|
|
if (benchmark_indices.empty()) {
|
|
return ConfigurationError::BenchmarkNotFound;
|
|
}
|
|
std::sort(benchmark_indices.begin(), benchmark_indices.end(),
|
|
[&](const int a, const int b) -> bool { return benchmarks[a]->name < benchmarks[b]->name; });
|
|
if (instance.configuration.meta == BenchmarkMetaType::INFO) {
|
|
// print info of benchmarks
|
|
for (const auto &benchmark_index : benchmark_indices) {
|
|
auto display_name = benchmarks[benchmark_index]->DisplayName();
|
|
auto display_group = benchmarks[benchmark_index]->Group();
|
|
auto subgroup = benchmarks[benchmark_index]->Subgroup();
|
|
fprintf(stdout, "display_name:%s\ngroup:%s\nsubgroup:%s\n", display_name.c_str(), display_group.c_str(),
|
|
subgroup.c_str());
|
|
}
|
|
} else if (instance.configuration.meta == BenchmarkMetaType::QUERY) {
|
|
for (const auto &benchmark_index : benchmark_indices) {
|
|
auto query = benchmarks[benchmark_index]->GetQuery();
|
|
if (query.empty()) {
|
|
continue;
|
|
}
|
|
fprintf(stdout, "%s\n", query.c_str());
|
|
}
|
|
} else {
|
|
instance.LogLine("name\trun\ttiming");
|
|
for (const auto &benchmark_index : benchmark_indices) {
|
|
instance.RunBenchmark(benchmarks[benchmark_index]);
|
|
}
|
|
}
|
|
} else {
|
|
if (instance.configuration.meta != BenchmarkMetaType::NONE) {
|
|
return ConfigurationError::InfoWithoutBenchmarkName;
|
|
}
|
|
// default: run all benchmarks
|
|
instance.RunBenchmarks();
|
|
}
|
|
return ConfigurationError::None;
|
|
}
|
|
|
|
void print_error_message(const ConfigurationError &error) {
|
|
switch (error) {
|
|
case ConfigurationError::BenchmarkNotFound:
|
|
fprintf(stderr, "Benchmark to run could not be found.\n");
|
|
break;
|
|
case ConfigurationError::InfoWithoutBenchmarkName:
|
|
fprintf(stderr, "Info requires benchmark name pattern.\n");
|
|
break;
|
|
case ConfigurationError::None:
|
|
break;
|
|
}
|
|
print_help();
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
duckdb::unique_ptr<FileSystem> fs = FileSystem::CreateLocal();
|
|
// Set the working directory. We need to scan this before loading the benchmarks or parsing the other arguments
|
|
string root_dir = parse_root_dir_or_default(argc, argv, *fs);
|
|
FileSystem::SetWorkingDirectory(root_dir);
|
|
// load interpreted benchmarks before doing anything else
|
|
LoadInterpretedBenchmarks(*fs);
|
|
parse_arguments(argc, argv);
|
|
const auto configuration_error = run_benchmarks();
|
|
|
|
if (!summary.empty() && summarize) {
|
|
std::cout << "\n====================================================" << std::endl;
|
|
std::cout << "================ FAILURES SUMMARY ================" << std::endl;
|
|
std::cout << "====================================================\n" << std::endl;
|
|
for (size_t i = 0; i < summary.size(); i++) {
|
|
std::cout << i + 1 << ": " << summary[i] << std::endl;
|
|
std::cout << "----------------------------------------------------" << std::endl;
|
|
}
|
|
}
|
|
|
|
if (configuration_error != ConfigurationError::None) {
|
|
print_error_message(configuration_error);
|
|
exit(1);
|
|
}
|
|
return 0;
|
|
}
|