should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,95 @@
//===----------------------------------------------------------------------===//
//
// DuckDB
//
// benchmark.hpp
//
// Author: Mark Raasveldt
//
//===----------------------------------------------------------------------===//
#pragma once
#include <memory>
#include "benchmark_configuration.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/string.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/unique_ptr.hpp"
namespace duckdb {
//! Base class for any state that has to be kept by a Benchmark
struct BenchmarkState {
virtual ~BenchmarkState() {
}
};
//! The base Benchmark class is a base class that is used to create and register
//! new benchmarks
class Benchmark {
constexpr static size_t DEFAULT_NRUNS = 5;
Benchmark(Benchmark &) = delete;
public:
//! The name of the benchmark
string name;
//! The benchmark group this benchmark belongs to
string group;
Benchmark(bool register_benchmark, string name, string group);
//! Initialize the benchmark state
virtual duckdb::unique_ptr<BenchmarkState> Initialize(BenchmarkConfiguration &config) {
return nullptr;
}
//! Assert correctness after load, before run
virtual void Assert(BenchmarkState *state) {};
//! Run the benchmark
virtual void Run(BenchmarkState *state) = 0;
//! Cleanup the benchmark, called after each Run
virtual void Cleanup(BenchmarkState *state) = 0;
//! Verify that the output of the benchmark was correct
virtual string Verify(BenchmarkState *state) = 0;
//! Finalize the benchmark runner
virtual void Finalize() {
}
virtual string GetQuery() {
return string();
}
virtual string DisplayName() {
return name;
}
virtual string Group() {
return group;
}
virtual string Subgroup() {
return string();
}
//! Interrupt the benchmark because of a timeout
virtual void Interrupt(BenchmarkState *state) = 0;
//! Returns information about the benchmark
virtual string BenchmarkInfo() = 0;
string GetInfo() {
return name + " - " + group + "\n" + BenchmarkInfo();
}
virtual string GetLogOutput(BenchmarkState *state) = 0;
//! Whether or not Initialize() should be called once for every run or just
//! once
virtual bool RequireReinit() {
return false;
}
//! The amount of runs to do for this benchmark
virtual size_t NRuns() {
return DEFAULT_NRUNS;
}
//! The timeout for this benchmark (in seconds)
virtual optional_idx Timeout(const BenchmarkConfiguration &config) {
return config.timeout_duration;
}
};
} // namespace duckdb

View File

@@ -0,0 +1,33 @@
//===----------------------------------------------------------------------===//
//
// DuckDB
//
// benchmark_configuration.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb/common/string.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/optional_idx.hpp"
namespace duckdb {
enum class BenchmarkMetaType { NONE, INFO, QUERY };
enum class BenchmarkProfileInfo { NONE, NORMAL, DETAILED };
struct BenchmarkConfiguration {
public:
constexpr static size_t DEFAULT_TIMEOUT = 30;
public:
string name_pattern {};
BenchmarkMetaType meta = BenchmarkMetaType::NONE;
BenchmarkProfileInfo profile_info = BenchmarkProfileInfo::NONE;
optional_idx timeout_duration = optional_idx(DEFAULT_TIMEOUT);
};
} // namespace duckdb

View File

@@ -0,0 +1,58 @@
//===----------------------------------------------------------------------===//
//
// DuckDB
//
// benchmark_runner.hpp
//
// Author: Mark Raasveldt
//
//===----------------------------------------------------------------------===//
#pragma once
#include "benchmark_configuration.hpp"
#include "benchmark.hpp"
#include "duckdb/common/constants.hpp"
#include "duckdb/common/fstream.hpp"
#include <thread>
namespace duckdb {
class DuckDB;
//! The benchmark runner class is responsible for running benchmarks
class BenchmarkRunner {
BenchmarkRunner();
public:
static constexpr const char *DUCKDB_BENCHMARK_DIRECTORY = "duckdb_benchmark_data";
BenchmarkConfiguration configuration;
static BenchmarkRunner &GetInstance() {
static BenchmarkRunner instance;
return instance;
}
static void InitializeBenchmarkDirectory();
//! Register a benchmark in the Benchmark Runner, this is done automatically
//! as long as the proper macro's are used
static void RegisterBenchmark(Benchmark *benchmark);
void Log(string message);
void LogLine(string message);
void LogResult(string message);
void LogOutput(string message);
void LogSummary(string benchmark, string message, size_t i);
void RunBenchmark(Benchmark *benchmark);
void RunBenchmarks();
vector<Benchmark *> benchmarks;
ofstream out_file;
ofstream log_file;
uint32_t threads = MaxValue<uint32_t>(std::thread::hardware_concurrency(), 1u);
string memory_limit;
unordered_map<string, string> custom_arguments;
};
} // namespace duckdb

View File

@@ -0,0 +1,135 @@
//===----------------------------------------------------------------------===//
//
// DuckDB
//
// duckdb_benchmark.hpp
//
// Author: Mark Raasveldt
//
//===----------------------------------------------------------------------===//
#pragma once
#include "benchmark.hpp"
#include "duckdb.hpp"
#include "duckdb/main/client_context.hpp"
#include "test_helpers.hpp"
#include "duckdb/main/query_profiler.hpp"
#include "duckdb/common/helper.hpp"
namespace duckdb {
//! Base class for any state that has to be kept by a Benchmark
struct DuckDBBenchmarkState : public BenchmarkState {
DuckDB db;
Connection conn;
duckdb::unique_ptr<QueryResult> result;
DuckDBBenchmarkState(string path) : db(path.empty() ? nullptr : path.c_str()), conn(db) {
auto &instance = BenchmarkRunner::GetInstance();
auto res = conn.Query("PRAGMA threads=" + to_string(instance.threads));
if (!instance.memory_limit.empty()) {
res = conn.Query("PRAGMA memory_limit='" + instance.memory_limit + "'");
D_ASSERT(!res->HasError());
}
D_ASSERT(!res->HasError());
string profiling_mode;
switch (instance.configuration.profile_info) {
case BenchmarkProfileInfo::NONE:
profiling_mode = "";
break;
case BenchmarkProfileInfo::NORMAL:
profiling_mode = "standard";
break;
case BenchmarkProfileInfo::DETAILED:
profiling_mode = "detailed";
break;
default:
throw InternalException("Unknown profiling option \"%s\"", instance.configuration.profile_info);
}
if (!profiling_mode.empty()) {
res = conn.Query("PRAGMA profiling_mode=" + profiling_mode);
D_ASSERT(!res->HasError());
}
}
virtual ~DuckDBBenchmarkState() {
}
};
//! The base Benchmark class is a base class that is used to create and register
//! new benchmarks
class DuckDBBenchmark : public Benchmark {
public:
DuckDBBenchmark(bool register_benchmark, string name, string group) : Benchmark(register_benchmark, name, group) {
}
virtual ~DuckDBBenchmark() {
}
//! Load data into DuckDB
virtual void Load(DuckDBBenchmarkState *state) = 0;
//! Run a bunch of queries, only called if GetQuery() returns an empty string
virtual void RunBenchmark(DuckDBBenchmarkState *state) {
}
//! This function gets called after the GetQuery() method
virtual void Cleanup(DuckDBBenchmarkState *state) {};
//! Verify a result
virtual string VerifyResult(QueryResult *result) = 0;
//! Whether or not the benchmark is performed on an in-memory database
virtual bool InMemory() {
return true;
}
string GetDatabasePath() {
if (!InMemory()) {
string path = "duckdb_benchmark_db.db";
DeleteDatabase(path);
return path;
} else {
return string();
}
}
virtual duckdb::unique_ptr<DuckDBBenchmarkState> CreateBenchmarkState() {
return make_uniq<DuckDBBenchmarkState>(GetDatabasePath());
}
duckdb::unique_ptr<BenchmarkState> Initialize(BenchmarkConfiguration &config) override {
auto state = CreateBenchmarkState();
Load(state.get());
return std::move(state);
}
void Run(BenchmarkState *state_p) override {
auto state = (DuckDBBenchmarkState *)state_p;
string query = GetQuery();
if (query.empty()) {
RunBenchmark(state);
} else {
state->result = state->conn.Query(query);
}
}
void Cleanup(BenchmarkState *state_p) override {
auto state = (DuckDBBenchmarkState *)state_p;
Cleanup(state);
}
string Verify(BenchmarkState *state_p) override {
auto state = (DuckDBBenchmarkState *)state_p;
return VerifyResult(state->result.get());
}
string GetLogOutput(BenchmarkState *state_p) override {
auto state = (DuckDBBenchmarkState *)state_p;
auto &profiler = QueryProfiler::Get(*state->conn.context);
return profiler.ToJSON();
}
//! Interrupt the benchmark because of a timeout
void Interrupt(BenchmarkState *state_p) override {
auto state = (DuckDBBenchmarkState *)state_p;
state->conn.Interrupt();
}
};
} // namespace duckdb

View File

@@ -0,0 +1,34 @@
//===----------------------------------------------------------------------===//
//
// DuckDB
//
// duckdb_benchmark_macro.hpp
//
// Author: Mark Raasveldt
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb_benchmark.hpp"
#define DUCKDB_BENCHMARK(NAME, GROUP) \
class NAME##Benchmark : public DuckDBBenchmark { \
NAME##Benchmark(bool register_benchmark) : DuckDBBenchmark(register_benchmark, "" #NAME, GROUP) { \
} \
\
public: \
static NAME##Benchmark *GetInstance() { \
static NAME##Benchmark singleton(true); \
auto benchmark = duckdb::unique_ptr<DuckDBBenchmark>(new NAME##Benchmark(false)); \
return &singleton; \
}
#define REGISTER_BENCHMARK(NAME) auto global_instance_##NAME = NAME##Benchmark::GetInstance()
#define FINISH_BENCHMARK(NAME) \
} \
; \
REGISTER_BENCHMARK(NAME);
namespace duckdb {}

View File

@@ -0,0 +1,122 @@
//===----------------------------------------------------------------------===//
//
// benchmark/include/interpreted_benchmark.hpp
//
//===----------------------------------------------------------------------===//
#pragma once
#include "benchmark.hpp"
#include "duckdb/main/query_result.hpp"
#include <unordered_map>
#include <unordered_set>
namespace duckdb {
struct BenchmarkFileReader;
class MaterializedQueryResult;
struct InterpretedBenchmarkState;
const string DEFAULT_DB_PATH = "duckdb_benchmark_db.db";
struct BenchmarkQuery {
public:
BenchmarkQuery() {
}
public:
string query;
idx_t column_count = 0;
vector<vector<string>> expected_result;
};
//! Interpreted benchmarks read the benchmark from a file
class InterpretedBenchmark : public Benchmark {
public:
InterpretedBenchmark(string full_path);
void LoadBenchmark();
//! Initialize the benchmark state
duckdb::unique_ptr<BenchmarkState> Initialize(BenchmarkConfiguration &config) override;
//! Assert correct/expected state of the db, before Run
void Assert(BenchmarkState *state) override;
//! Run the benchmark
void Run(BenchmarkState *state) override;
//! Cleanup the benchmark, called after each Run
void Cleanup(BenchmarkState *state) override;
//! Verify that the output of the benchmark was correct
string Verify(BenchmarkState *state) override;
string GetQuery() override;
//! Interrupt the benchmark because of a timeout
void Interrupt(BenchmarkState *state) override;
//! Returns information about the benchmark
string BenchmarkInfo() override;
string GetLogOutput(BenchmarkState *state) override;
string DisplayName() override;
string Group() override;
string Subgroup() override;
string GetDatabasePath();
bool InMemory() {
return in_memory;
}
bool RequireReinit() override {
return require_reinit;
}
QueryResultType ResultMode() const {
return result_type;
}
idx_t ArrowBatchSize() const {
return arrow_batch_size;
}
private:
string VerifyInternal(BenchmarkState *state_p, const BenchmarkQuery &query, MaterializedQueryResult &result);
BenchmarkQuery ReadQueryFromFile(BenchmarkFileReader &reader, string file);
BenchmarkQuery ReadQueryFromReader(BenchmarkFileReader &reader, const string &sql, const string &header);
unique_ptr<QueryResult> RunLoadQuery(InterpretedBenchmarkState &state, const string &load_query);
void ProcessFile(const string &path);
private:
bool is_loaded = false;
std::unordered_map<string, string> replacement_mapping;
unordered_set<string> handled_arguments;
std::unordered_map<string, string> queries;
string run_query;
string benchmark_path;
string cache_db = "";
string cache_file = "";
// check the existence of a cached db, but do not connect
// can be used to test accessing data from a different db in a non-persistent connection
bool cache_no_connect = false;
std::unordered_set<string> extensions;
std::unordered_set<string> load_extensions;
//! Queries used to assert a given state of the data
vector<BenchmarkQuery> assert_queries;
vector<BenchmarkQuery> result_queries;
//! How many times to retry the load, if any
idx_t retry_load = 0;
string display_name;
string display_group;
string subgroup;
bool in_memory = true;
string storage_version;
QueryResultType result_type = QueryResultType::MATERIALIZED_RESULT;
idx_t arrow_batch_size = STANDARD_VECTOR_SIZE;
bool require_reinit = false;
};
} // namespace duckdb