400 lines
13 KiB
Python
400 lines
13 KiB
Python
# Script that takes src/include/duckdb/common/enums/optimizer_type.hpp, extracts the optimizer types
|
|
# and adds them to the metrics types.
|
|
# Then it creates a new file src/include/duckdb/common/enums/metric_type.hpp with the new metrics types as enums.
|
|
# and generates both test/sql/pragma/profiling/test_default_profiling_settings.test
|
|
# and test/sql/pragma/profiling/test_custom_profiling_optimizer.test
|
|
|
|
import re
|
|
import os
|
|
|
|
os.chdir(os.path.dirname(__file__))
|
|
|
|
metrics_header_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "metric_type.hpp")
|
|
metrics_cpp_file = os.path.join("..", "src", "common", "enums", "metric_type.cpp")
|
|
optimizer_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "optimizer_type.hpp")
|
|
|
|
metrics = [
|
|
"ATTACH_LOAD_STORAGE_LATENCY",
|
|
"ATTACH_REPLAY_WAL_LATENCY",
|
|
"BLOCKED_THREAD_TIME",
|
|
"CHECKPOINT_LATENCY",
|
|
"CPU_TIME",
|
|
"CUMULATIVE_CARDINALITY",
|
|
"CUMULATIVE_ROWS_SCANNED",
|
|
"EXTRA_INFO",
|
|
"LATENCY",
|
|
"OPERATOR_CARDINALITY",
|
|
"OPERATOR_NAME",
|
|
"OPERATOR_ROWS_SCANNED",
|
|
"OPERATOR_TIMING",
|
|
"OPERATOR_TYPE",
|
|
"QUERY_NAME",
|
|
"RESULT_SET_SIZE",
|
|
"ROWS_RETURNED",
|
|
"SYSTEM_PEAK_BUFFER_MEMORY",
|
|
"SYSTEM_PEAK_TEMP_DIR_SIZE",
|
|
"TOTAL_BYTES_READ",
|
|
"TOTAL_BYTES_WRITTEN",
|
|
"WAITING_TO_ATTACH_LATENCY",
|
|
]
|
|
|
|
phase_timing_metrics = [
|
|
"ALL_OPTIMIZERS",
|
|
"CUMULATIVE_OPTIMIZER_TIMING",
|
|
"PHYSICAL_PLANNER",
|
|
"PHYSICAL_PLANNER_COLUMN_BINDING",
|
|
"PHYSICAL_PLANNER_CREATE_PLAN",
|
|
"PHYSICAL_PLANNER_RESOLVE_TYPES",
|
|
"PLANNER",
|
|
"PLANNER_BINDING",
|
|
]
|
|
|
|
query_global_metrics = [
|
|
"ATTACH_LOAD_STORAGE_LATENCY",
|
|
"ATTACH_REPLAY_WAL_LATENCY",
|
|
"BLOCKED_THREAD_TIME",
|
|
"CHECKPOINT_LATENCY",
|
|
"SYSTEM_PEAK_BUFFER_MEMORY",
|
|
"SYSTEM_PEAK_TEMP_DIR_SIZE",
|
|
"WAITING_TO_ATTACH_LATENCY",
|
|
]
|
|
|
|
optimizer_types = []
|
|
|
|
# Regular expression to match the enum values
|
|
enum_pattern = r'\s*([A-Z_]+)\s*=\s*\d+,?|\s*([A-Z_]+),?'
|
|
|
|
inside_enum = False
|
|
|
|
# open the optimizer file and extract the optimizer types
|
|
with open(optimizer_file, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
|
|
if line.startswith("enum class OptimizerType"):
|
|
inside_enum = True
|
|
continue
|
|
|
|
if inside_enum and line.startswith("};"):
|
|
break
|
|
|
|
if inside_enum:
|
|
match = re.match(enum_pattern, line)
|
|
if match:
|
|
optimizer_type = match[1] if match[1] else match[2]
|
|
if optimizer_type == "INVALID":
|
|
continue
|
|
optimizer_types.append(optimizer_type)
|
|
|
|
header = """//-------------------------------------------------------------------------
|
|
// DuckDB
|
|
//
|
|
//
|
|
// duckdb/common/enums/metrics_type.hpp
|
|
//
|
|
// This file is automatically generated by scripts/generate_metric_enums.py
|
|
// Do not edit this file manually, your changes will be overwritten
|
|
//-------------------------------------------------------------------------\n
|
|
"""
|
|
|
|
typedefs = """struct MetricsTypeHashFunction {
|
|
uint64_t operator()(const MetricsType &index) const {
|
|
return std::hash<uint8_t>()(static_cast<uint8_t>(index));
|
|
}
|
|
};
|
|
|
|
typedef unordered_set<MetricsType, MetricsTypeHashFunction> profiler_settings_t;
|
|
typedef unordered_map<MetricsType, Value, MetricsTypeHashFunction> profiler_metrics_t;
|
|
|
|
"""
|
|
|
|
get_optimizer_metric_fun = 'GetOptimizerMetrics()'
|
|
get_phase_timing_metric_fun = 'GetPhaseTimingMetrics()'
|
|
get_optimizer_metric_by_type_fun = 'GetOptimizerMetricByType(OptimizerType type)'
|
|
get_optimizer_type_by_metric_fun = 'GetOptimizerTypeByMetric(MetricsType type)'
|
|
is_optimizer_metric_fun = 'IsOptimizerMetric(MetricsType type)'
|
|
is_phase_timing_metric_fun = 'IsPhaseTimingMetric(MetricsType type)'
|
|
is_query_global_metric_fun = 'IsQueryGlobalMetric(MetricsType type)'
|
|
|
|
metrics_class = 'MetricsUtils'
|
|
|
|
# Write the metric type header file
|
|
with open(metrics_header_file, "w") as f:
|
|
f.write(header)
|
|
|
|
f.write('#pragma once\n\n')
|
|
f.write('#include "duckdb/common/types/value.hpp"\n')
|
|
f.write('#include "duckdb/common/unordered_set.hpp"\n')
|
|
f.write('#include "duckdb/common/unordered_map.hpp"\n')
|
|
f.write('#include "duckdb/common/constants.hpp"\n')
|
|
f.write('#include "duckdb/common/enum_util.hpp"\n')
|
|
f.write('#include "duckdb/common/enums/optimizer_type.hpp"\n\n')
|
|
|
|
f.write("namespace duckdb {\n\n")
|
|
|
|
f.write("enum class MetricsType : uint8_t {\n")
|
|
|
|
for metric in metrics:
|
|
f.write(f" {metric},\n")
|
|
|
|
for metric in phase_timing_metrics:
|
|
f.write(f" {metric},\n")
|
|
|
|
for metric in optimizer_types:
|
|
f.write(f" OPTIMIZER_{metric},\n")
|
|
|
|
f.write("};\n\n")
|
|
|
|
f.write(typedefs)
|
|
|
|
f.write('class MetricsUtils {\n')
|
|
f.write('public:\n')
|
|
f.write(f' static profiler_settings_t {get_optimizer_metric_fun};\n')
|
|
f.write(f' static profiler_settings_t {get_phase_timing_metric_fun};\n\n')
|
|
f.write(f' static MetricsType {get_optimizer_metric_by_type_fun};\n')
|
|
f.write(f' static OptimizerType {get_optimizer_type_by_metric_fun};\n\n')
|
|
f.write(f' static bool {is_optimizer_metric_fun};\n')
|
|
f.write(f' static bool {is_phase_timing_metric_fun};\n')
|
|
f.write(f' static bool {is_query_global_metric_fun};\n')
|
|
f.write('};\n\n')
|
|
|
|
f.write("} // namespace duckdb\n")
|
|
|
|
# Write the metric_type.cpp file
|
|
with open(metrics_cpp_file, "w") as f:
|
|
f.write(header)
|
|
|
|
f.write('#include "duckdb/common/enums/metric_type.hpp"\n')
|
|
f.write("namespace duckdb {\n\n")
|
|
|
|
f.write(f'profiler_settings_t {metrics_class}::{get_optimizer_metric_fun} {{\n')
|
|
f.write(f" return {{\n")
|
|
for metric in optimizer_types:
|
|
f.write(f" MetricsType::OPTIMIZER_{metric},\n")
|
|
f.write(" };\n")
|
|
f.write("}\n\n")
|
|
|
|
f.write(f'profiler_settings_t {metrics_class}::{get_phase_timing_metric_fun} {{\n')
|
|
f.write(f" return {{\n")
|
|
for metric in phase_timing_metrics:
|
|
f.write(f" MetricsType::{metric},\n")
|
|
f.write(" };\n")
|
|
f.write("}\n\n")
|
|
|
|
f.write(f'MetricsType {metrics_class}::{get_optimizer_metric_by_type_fun} {{\n')
|
|
f.write(' switch(type) {\n')
|
|
for metric in optimizer_types:
|
|
f.write(f" case OptimizerType::{metric}:\n")
|
|
f.write(f" return MetricsType::OPTIMIZER_{metric};\n")
|
|
f.write(' default:\n')
|
|
f.write(
|
|
' throw InternalException("OptimizerType %s cannot be converted to a MetricsType", '
|
|
'EnumUtil::ToString(type));\n'
|
|
)
|
|
f.write(' };\n')
|
|
f.write('}\n\n')
|
|
|
|
f.write(f'OptimizerType {metrics_class}::{get_optimizer_type_by_metric_fun} {{\n')
|
|
f.write(' switch(type) {\n')
|
|
for metric in optimizer_types:
|
|
f.write(f" case MetricsType::OPTIMIZER_{metric}:\n")
|
|
f.write(f" return OptimizerType::{metric};\n")
|
|
f.write(' default:\n')
|
|
f.write(' return OptimizerType::INVALID;\n')
|
|
f.write(' };\n')
|
|
f.write('}\n\n')
|
|
|
|
f.write(f'bool {metrics_class}::{is_optimizer_metric_fun} {{\n')
|
|
f.write(' switch(type) {\n')
|
|
for metric in optimizer_types:
|
|
f.write(f" case MetricsType::OPTIMIZER_{metric}:\n")
|
|
|
|
f.write(' return true;\n')
|
|
f.write(' default:\n')
|
|
f.write(' return false;\n')
|
|
f.write(' };\n')
|
|
f.write('}\n\n')
|
|
|
|
f.write(f'bool {metrics_class}::{is_phase_timing_metric_fun} {{\n')
|
|
f.write(' switch(type) {\n')
|
|
for metric in phase_timing_metrics:
|
|
f.write(f" case MetricsType::{metric}:\n")
|
|
|
|
f.write(' return true;\n')
|
|
f.write(' default:\n')
|
|
f.write(' return false;\n')
|
|
f.write(' };\n')
|
|
f.write('}\n\n')
|
|
|
|
f.write(f'bool {metrics_class}::{is_query_global_metric_fun} {{\n')
|
|
f.write(' switch(type) {\n')
|
|
for metric in query_global_metrics:
|
|
f.write(f" case MetricsType::{metric}:\n")
|
|
|
|
f.write(' return true;\n')
|
|
f.write(' default:\n')
|
|
f.write(' return false;\n')
|
|
f.write(' };\n')
|
|
f.write('}\n\n')
|
|
|
|
f.write("} // namespace duckdb\n")
|
|
|
|
# Generate the test files
|
|
test_names = ["test_default_profiling_settings", "test_custom_profiling_optimizer"]
|
|
|
|
test_descriptions = ["default", "custom optimizer"]
|
|
|
|
test_files = [os.path.join("..", "test", "sql", "pragma", "profiling", f"{name}.test") for name in test_names]
|
|
|
|
|
|
def write_statement(f, statement_type, statement):
|
|
f.write(f"statement {statement_type}\n")
|
|
f.write(statement + "\n\n")
|
|
|
|
|
|
def write_query(f, options, query):
|
|
f.write(f"query {options}\n")
|
|
f.write(query + "\n")
|
|
f.write("----\n")
|
|
|
|
|
|
def write_default_query(f):
|
|
query = "SELECT unnest(['Maia', 'Thijs', 'Mark', 'Hannes', 'Tom', 'Max', 'Carlo', 'Sam', 'Tania']) AS names ORDER BY random();"
|
|
write_statement(f, "ok", query)
|
|
write_statement(f, "ok", "PRAGMA disable_profiling;")
|
|
|
|
|
|
def write_get_custom_profiling_settings(f):
|
|
query = """
|
|
SELECT unnest(res) FROM (
|
|
SELECT current_setting('custom_profiling_settings') AS raw_setting,
|
|
raw_setting.trim('{}') AS setting,
|
|
string_split(setting, ', ') AS res
|
|
) ORDER BY ALL;
|
|
""".strip()
|
|
write_query(f, "I", query)
|
|
|
|
|
|
def write_custom_profiling_optimizer(f):
|
|
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"ALL_OPTIMIZERS\": \"true\"}';")
|
|
|
|
write_default_query(f)
|
|
|
|
query = """
|
|
SELECT * FROM (
|
|
SELECT unnest(res) str FROM (
|
|
SELECT current_setting('custom_profiling_settings') as raw_setting,
|
|
raw_setting.trim('{}') AS setting,
|
|
string_split(setting, ', ') AS res
|
|
)
|
|
) WHERE '"true"' NOT in str
|
|
ORDER BY ALL \
|
|
""".strip()
|
|
write_query(f, "I", query)
|
|
f.write("\n")
|
|
|
|
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{}'")
|
|
write_default_query(f)
|
|
|
|
write_get_custom_profiling_settings(f)
|
|
f.write("(empty)\n\n")
|
|
|
|
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'")
|
|
write_default_query(f)
|
|
|
|
write_get_custom_profiling_settings(f)
|
|
f.write("\"OPTIMIZER_JOIN_ORDER\": \"true\"\n\n")
|
|
|
|
write_statement(
|
|
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
|
)
|
|
|
|
query = """
|
|
SELECT
|
|
CASE WHEN optimizer_join_order > 0 THEN 'true'
|
|
ELSE 'false' END
|
|
FROM metrics_output;
|
|
""".strip()
|
|
write_query(f, "I", query)
|
|
f.write("true\n\n")
|
|
|
|
write_statement(f, "ok", "SET disabled_optimizers = 'JOIN_ORDER';")
|
|
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'")
|
|
write_default_query(f)
|
|
|
|
write_get_custom_profiling_settings(f)
|
|
f.write("(empty)\n\n")
|
|
|
|
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"CUMULATIVE_OPTIMIZER_TIMING\": \"true\"}';")
|
|
write_default_query(f)
|
|
|
|
write_statement(
|
|
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
|
)
|
|
|
|
query = """
|
|
SELECT
|
|
CASE WHEN cumulative_optimizer_timing > 0 THEN 'true'
|
|
ELSE 'false' END
|
|
FROM metrics_output;
|
|
""".strip()
|
|
write_query(f, "I", query)
|
|
f.write("true\n\n")
|
|
|
|
f.write("# All phase timings must be collected when using detailed profiling mode.\n\n")
|
|
|
|
write_statement(f, "ok", "RESET custom_profiling_settings;")
|
|
write_statement(f, "ok", "SET profiling_mode = 'detailed';")
|
|
write_default_query(f)
|
|
|
|
query = """
|
|
SELECT * FROM (
|
|
SELECT unnest(res) str FROM (
|
|
SELECT current_setting('custom_profiling_settings') AS raw_setting,
|
|
raw_setting.trim('{}') AS setting,
|
|
string_split(setting, ', ') AS res
|
|
)
|
|
)
|
|
WHERE '"true"' NOT IN str
|
|
ORDER BY ALL
|
|
""".strip()
|
|
write_query(f, "I", query)
|
|
f.write("\n")
|
|
|
|
write_statement(f, "ok", "RESET custom_profiling_settings;")
|
|
write_statement(f, "ok", "SET profiling_mode = 'standard';")
|
|
|
|
|
|
# Create the test files
|
|
for test_file, name, description in zip(test_files, test_names, test_descriptions):
|
|
with open(test_file, "w") as f:
|
|
display_name = test_file.replace("../", "")
|
|
f.write(f"# name: {display_name}\n")
|
|
f.write(f"# description: Test {description} profiling settings.\n")
|
|
f.write("# group: [profiling]\n\n")
|
|
f.write("# This file is automatically generated by scripts/generate_metric_enums.py\n")
|
|
f.write("# Do not edit this file manually, your changes will be overwritten\n\n")
|
|
|
|
f.write("require json\n\n")
|
|
|
|
write_statement(f, "ok", "PRAGMA enable_verification;")
|
|
write_statement(f, "ok", "PRAGMA enable_profiling = 'json';")
|
|
write_statement(f, "ok", "PRAGMA profiling_output = '__TEST_DIR__/profiling_output.json';")
|
|
|
|
if name == "test_custom_profiling_optimizer":
|
|
write_custom_profiling_optimizer(f)
|
|
|
|
write_default_query(f)
|
|
|
|
write_get_custom_profiling_settings(f)
|
|
metrics.sort()
|
|
|
|
for metric in metrics:
|
|
f.write(f'"{metric}": "true"\n')
|
|
f.write("\n")
|
|
|
|
write_statement(
|
|
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
|
)
|
|
write_statement(f, "ok", "SELECT cpu_time, extra_info, rows_returned, latency FROM metrics_output;")
|