# Script that takes src/include/duckdb/common/enums/optimizer_type.hpp, extracts the optimizer types # and adds them to the metrics types. # Then it creates a new file src/include/duckdb/common/enums/metric_type.hpp with the new metrics types as enums. # and generates both test/sql/pragma/profiling/test_default_profiling_settings.test # and test/sql/pragma/profiling/test_custom_profiling_optimizer.test import re import os os.chdir(os.path.dirname(__file__)) metrics_header_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "metric_type.hpp") metrics_cpp_file = os.path.join("..", "src", "common", "enums", "metric_type.cpp") optimizer_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "optimizer_type.hpp") metrics = [ "ATTACH_LOAD_STORAGE_LATENCY", "ATTACH_REPLAY_WAL_LATENCY", "BLOCKED_THREAD_TIME", "CHECKPOINT_LATENCY", "CPU_TIME", "CUMULATIVE_CARDINALITY", "CUMULATIVE_ROWS_SCANNED", "EXTRA_INFO", "LATENCY", "OPERATOR_CARDINALITY", "OPERATOR_NAME", "OPERATOR_ROWS_SCANNED", "OPERATOR_TIMING", "OPERATOR_TYPE", "QUERY_NAME", "RESULT_SET_SIZE", "ROWS_RETURNED", "SYSTEM_PEAK_BUFFER_MEMORY", "SYSTEM_PEAK_TEMP_DIR_SIZE", "TOTAL_BYTES_READ", "TOTAL_BYTES_WRITTEN", "WAITING_TO_ATTACH_LATENCY", ] phase_timing_metrics = [ "ALL_OPTIMIZERS", "CUMULATIVE_OPTIMIZER_TIMING", "PHYSICAL_PLANNER", "PHYSICAL_PLANNER_COLUMN_BINDING", "PHYSICAL_PLANNER_CREATE_PLAN", "PHYSICAL_PLANNER_RESOLVE_TYPES", "PLANNER", "PLANNER_BINDING", ] query_global_metrics = [ "ATTACH_LOAD_STORAGE_LATENCY", "ATTACH_REPLAY_WAL_LATENCY", "BLOCKED_THREAD_TIME", "CHECKPOINT_LATENCY", "SYSTEM_PEAK_BUFFER_MEMORY", "SYSTEM_PEAK_TEMP_DIR_SIZE", "WAITING_TO_ATTACH_LATENCY", ] optimizer_types = [] # Regular expression to match the enum values enum_pattern = r'\s*([A-Z_]+)\s*=\s*\d+,?|\s*([A-Z_]+),?' inside_enum = False # open the optimizer file and extract the optimizer types with open(optimizer_file, "r") as f: for line in f: line = line.strip() if line.startswith("enum class OptimizerType"): inside_enum = True continue if inside_enum and line.startswith("};"): break if inside_enum: match = re.match(enum_pattern, line) if match: optimizer_type = match[1] if match[1] else match[2] if optimizer_type == "INVALID": continue optimizer_types.append(optimizer_type) header = """//------------------------------------------------------------------------- // DuckDB // // // duckdb/common/enums/metrics_type.hpp // // This file is automatically generated by scripts/generate_metric_enums.py // Do not edit this file manually, your changes will be overwritten //-------------------------------------------------------------------------\n """ typedefs = """struct MetricsTypeHashFunction { uint64_t operator()(const MetricsType &index) const { return std::hash()(static_cast(index)); } }; typedef unordered_set profiler_settings_t; typedef unordered_map profiler_metrics_t; """ get_optimizer_metric_fun = 'GetOptimizerMetrics()' get_phase_timing_metric_fun = 'GetPhaseTimingMetrics()' get_optimizer_metric_by_type_fun = 'GetOptimizerMetricByType(OptimizerType type)' get_optimizer_type_by_metric_fun = 'GetOptimizerTypeByMetric(MetricsType type)' is_optimizer_metric_fun = 'IsOptimizerMetric(MetricsType type)' is_phase_timing_metric_fun = 'IsPhaseTimingMetric(MetricsType type)' is_query_global_metric_fun = 'IsQueryGlobalMetric(MetricsType type)' metrics_class = 'MetricsUtils' # Write the metric type header file with open(metrics_header_file, "w") as f: f.write(header) f.write('#pragma once\n\n') f.write('#include "duckdb/common/types/value.hpp"\n') f.write('#include "duckdb/common/unordered_set.hpp"\n') f.write('#include "duckdb/common/unordered_map.hpp"\n') f.write('#include "duckdb/common/constants.hpp"\n') f.write('#include "duckdb/common/enum_util.hpp"\n') f.write('#include "duckdb/common/enums/optimizer_type.hpp"\n\n') f.write("namespace duckdb {\n\n") f.write("enum class MetricsType : uint8_t {\n") for metric in metrics: f.write(f" {metric},\n") for metric in phase_timing_metrics: f.write(f" {metric},\n") for metric in optimizer_types: f.write(f" OPTIMIZER_{metric},\n") f.write("};\n\n") f.write(typedefs) f.write('class MetricsUtils {\n') f.write('public:\n') f.write(f' static profiler_settings_t {get_optimizer_metric_fun};\n') f.write(f' static profiler_settings_t {get_phase_timing_metric_fun};\n\n') f.write(f' static MetricsType {get_optimizer_metric_by_type_fun};\n') f.write(f' static OptimizerType {get_optimizer_type_by_metric_fun};\n\n') f.write(f' static bool {is_optimizer_metric_fun};\n') f.write(f' static bool {is_phase_timing_metric_fun};\n') f.write(f' static bool {is_query_global_metric_fun};\n') f.write('};\n\n') f.write("} // namespace duckdb\n") # Write the metric_type.cpp file with open(metrics_cpp_file, "w") as f: f.write(header) f.write('#include "duckdb/common/enums/metric_type.hpp"\n') f.write("namespace duckdb {\n\n") f.write(f'profiler_settings_t {metrics_class}::{get_optimizer_metric_fun} {{\n') f.write(f" return {{\n") for metric in optimizer_types: f.write(f" MetricsType::OPTIMIZER_{metric},\n") f.write(" };\n") f.write("}\n\n") f.write(f'profiler_settings_t {metrics_class}::{get_phase_timing_metric_fun} {{\n') f.write(f" return {{\n") for metric in phase_timing_metrics: f.write(f" MetricsType::{metric},\n") f.write(" };\n") f.write("}\n\n") f.write(f'MetricsType {metrics_class}::{get_optimizer_metric_by_type_fun} {{\n') f.write(' switch(type) {\n') for metric in optimizer_types: f.write(f" case OptimizerType::{metric}:\n") f.write(f" return MetricsType::OPTIMIZER_{metric};\n") f.write(' default:\n') f.write( ' throw InternalException("OptimizerType %s cannot be converted to a MetricsType", ' 'EnumUtil::ToString(type));\n' ) f.write(' };\n') f.write('}\n\n') f.write(f'OptimizerType {metrics_class}::{get_optimizer_type_by_metric_fun} {{\n') f.write(' switch(type) {\n') for metric in optimizer_types: f.write(f" case MetricsType::OPTIMIZER_{metric}:\n") f.write(f" return OptimizerType::{metric};\n") f.write(' default:\n') f.write(' return OptimizerType::INVALID;\n') f.write(' };\n') f.write('}\n\n') f.write(f'bool {metrics_class}::{is_optimizer_metric_fun} {{\n') f.write(' switch(type) {\n') for metric in optimizer_types: f.write(f" case MetricsType::OPTIMIZER_{metric}:\n") f.write(' return true;\n') f.write(' default:\n') f.write(' return false;\n') f.write(' };\n') f.write('}\n\n') f.write(f'bool {metrics_class}::{is_phase_timing_metric_fun} {{\n') f.write(' switch(type) {\n') for metric in phase_timing_metrics: f.write(f" case MetricsType::{metric}:\n") f.write(' return true;\n') f.write(' default:\n') f.write(' return false;\n') f.write(' };\n') f.write('}\n\n') f.write(f'bool {metrics_class}::{is_query_global_metric_fun} {{\n') f.write(' switch(type) {\n') for metric in query_global_metrics: f.write(f" case MetricsType::{metric}:\n") f.write(' return true;\n') f.write(' default:\n') f.write(' return false;\n') f.write(' };\n') f.write('}\n\n') f.write("} // namespace duckdb\n") # Generate the test files test_names = ["test_default_profiling_settings", "test_custom_profiling_optimizer"] test_descriptions = ["default", "custom optimizer"] test_files = [os.path.join("..", "test", "sql", "pragma", "profiling", f"{name}.test") for name in test_names] def write_statement(f, statement_type, statement): f.write(f"statement {statement_type}\n") f.write(statement + "\n\n") def write_query(f, options, query): f.write(f"query {options}\n") f.write(query + "\n") f.write("----\n") def write_default_query(f): query = "SELECT unnest(['Maia', 'Thijs', 'Mark', 'Hannes', 'Tom', 'Max', 'Carlo', 'Sam', 'Tania']) AS names ORDER BY random();" write_statement(f, "ok", query) write_statement(f, "ok", "PRAGMA disable_profiling;") def write_get_custom_profiling_settings(f): query = """ SELECT unnest(res) FROM ( SELECT current_setting('custom_profiling_settings') AS raw_setting, raw_setting.trim('{}') AS setting, string_split(setting, ', ') AS res ) ORDER BY ALL; """.strip() write_query(f, "I", query) def write_custom_profiling_optimizer(f): write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"ALL_OPTIMIZERS\": \"true\"}';") write_default_query(f) query = """ SELECT * FROM ( SELECT unnest(res) str FROM ( SELECT current_setting('custom_profiling_settings') as raw_setting, raw_setting.trim('{}') AS setting, string_split(setting, ', ') AS res ) ) WHERE '"true"' NOT in str ORDER BY ALL \ """.strip() write_query(f, "I", query) f.write("\n") write_statement(f, "ok", "PRAGMA custom_profiling_settings='{}'") write_default_query(f) write_get_custom_profiling_settings(f) f.write("(empty)\n\n") write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'") write_default_query(f) write_get_custom_profiling_settings(f) f.write("\"OPTIMIZER_JOIN_ORDER\": \"true\"\n\n") write_statement( f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';" ) query = """ SELECT CASE WHEN optimizer_join_order > 0 THEN 'true' ELSE 'false' END FROM metrics_output; """.strip() write_query(f, "I", query) f.write("true\n\n") write_statement(f, "ok", "SET disabled_optimizers = 'JOIN_ORDER';") write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'") write_default_query(f) write_get_custom_profiling_settings(f) f.write("(empty)\n\n") write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"CUMULATIVE_OPTIMIZER_TIMING\": \"true\"}';") write_default_query(f) write_statement( f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';" ) query = """ SELECT CASE WHEN cumulative_optimizer_timing > 0 THEN 'true' ELSE 'false' END FROM metrics_output; """.strip() write_query(f, "I", query) f.write("true\n\n") f.write("# All phase timings must be collected when using detailed profiling mode.\n\n") write_statement(f, "ok", "RESET custom_profiling_settings;") write_statement(f, "ok", "SET profiling_mode = 'detailed';") write_default_query(f) query = """ SELECT * FROM ( SELECT unnest(res) str FROM ( SELECT current_setting('custom_profiling_settings') AS raw_setting, raw_setting.trim('{}') AS setting, string_split(setting, ', ') AS res ) ) WHERE '"true"' NOT IN str ORDER BY ALL """.strip() write_query(f, "I", query) f.write("\n") write_statement(f, "ok", "RESET custom_profiling_settings;") write_statement(f, "ok", "SET profiling_mode = 'standard';") # Create the test files for test_file, name, description in zip(test_files, test_names, test_descriptions): with open(test_file, "w") as f: display_name = test_file.replace("../", "") f.write(f"# name: {display_name}\n") f.write(f"# description: Test {description} profiling settings.\n") f.write("# group: [profiling]\n\n") f.write("# This file is automatically generated by scripts/generate_metric_enums.py\n") f.write("# Do not edit this file manually, your changes will be overwritten\n\n") f.write("require json\n\n") write_statement(f, "ok", "PRAGMA enable_verification;") write_statement(f, "ok", "PRAGMA enable_profiling = 'json';") write_statement(f, "ok", "PRAGMA profiling_output = '__TEST_DIR__/profiling_output.json';") if name == "test_custom_profiling_optimizer": write_custom_profiling_optimizer(f) write_default_query(f) write_get_custom_profiling_settings(f) metrics.sort() for metric in metrics: f.write(f'"{metric}": "true"\n') f.write("\n") write_statement( f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';" ) write_statement(f, "ok", "SELECT cpu_time, extra_info, rows_returned, latency FROM metrics_output;")