should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1,4 @@
from .parse_and_sort_settings_in_json import add_all_settings_to_global_list as parse_and_sort_json_file
from .update_settings_header_file import generate as update_header_file
from .update_settings_scopes import generate as update_scopes
from .update_settings_src_code import generate as update_src_code

View File

@@ -0,0 +1,197 @@
import os
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Set, List
from functools import total_ordering
# define file paths and global variables
DUCKDB_DIR = Path(__file__).resolve().parent.parent.parent
DUCKDB_SETTINGS_HEADER_FILE = os.path.join(DUCKDB_DIR, "src/include/duckdb/main", "settings.hpp")
DUCKDB_AUTOGENERATED_SETTINGS_FILE = os.path.join(DUCKDB_DIR, "src/main/settings", "autogenerated_settings.cpp")
DUCKDB_SETTINGS_SCOPE_FILE = os.path.join(DUCKDB_DIR, "src/main", "config.cpp")
JSON_PATH = os.path.join(DUCKDB_DIR, "src/common", "settings.json")
# define scope values
VALID_SCOPE_VALUES = ["GLOBAL", "LOCAL", "GLOBAL_LOCAL"]
INVALID_SCOPE_VALUE = "INVALID"
SQL_TYPE_MAP = {"UBIGINT": "idx_t", "BIGINT": "int64_t", "BOOLEAN": "bool", "DOUBLE": "double", "VARCHAR": "string"}
# global Setting structure
@total_ordering
class Setting:
# track names of written settings to prevent duplicates
__written_settings: Set[str] = set()
def __init__(
self,
name: str,
description: str,
sql_type: str,
scope: str,
internal_setting: str,
on_callbacks: List[str],
custom_implementation,
struct_name: str,
aliases: List[str],
default_scope: str,
default_value: str,
):
self.name = self._get_valid_name(name)
self.description = description
self.sql_type = self._get_sql_type(sql_type)
self.return_type = self._get_setting_type(sql_type)
self.is_enum = sql_type.startswith('ENUM')
self.internal_setting = internal_setting
self.scope = self._get_valid_scope(scope) if scope is not None else None
self.on_set, self.on_reset = self._get_on_callbacks(on_callbacks)
self.is_generic_setting = self.scope is None
if self.is_enum and self.is_generic_setting:
self.on_set = True
custom_callbacks = ['set', 'reset', 'get']
if type(custom_implementation) is bool:
self.all_custom = custom_implementation
self.custom_implementation = custom_callbacks if custom_implementation else []
else:
for entry in custom_implementation:
if entry not in custom_callbacks:
raise ValueError(
f"Setting {self.name} - incorrect input for custom_implementation - expected set/reset/get, got {entry}"
)
self.all_custom = len(set(custom_implementation)) == 3
self.custom_implementation = custom_implementation
self.aliases = self._get_aliases(aliases)
self.struct_name = self._get_struct_name() if len(struct_name) == 0 else struct_name
self.default_scope = self._get_valid_default_scope(default_scope) if default_scope is not None else None
self.default_value = default_value
# define all comparisons to be based on the setting's name attribute
def __eq__(self, other) -> bool:
return isinstance(other, Setting) and self.name == other.name
def __lt__(self, other) -> bool:
return isinstance(other, Setting) and self.name < other.name
def __hash__(self) -> int:
return hash(self.name)
def __repr__(self):
return f"struct {self.struct_name} -> {self.name}, {self.sql_type}, {self.type}, {self.scope}, {self.description} {self.aliases}"
# validate setting name for correct format and uniqueness
def _get_valid_name(self, name: str) -> str:
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
raise ValueError(f"'{name}' cannot be used as setting name - invalid character")
if name in Setting.__written_settings:
raise ValueError(f"'{name}' cannot be used as setting name - already exists")
Setting.__written_settings.add(name)
return name
# ensure the setting scope is valid based on the accepted values
def _get_valid_scope(self, scope: str) -> str:
scope = scope.upper()
if scope in VALID_SCOPE_VALUES:
return scope
return INVALID_SCOPE_VALUE
def _get_valid_default_scope(self, scope: str) -> str:
scope = scope.upper()
if scope == 'GLOBAL':
return scope
elif scope == 'LOCAL':
return 'SESSION'
raise Exception(f"Invalid default scope value {scope}")
# validate and return the correct type format
def _get_sql_type(self, sql_type) -> str:
if sql_type.startswith('ENUM'):
return 'VARCHAR'
if sql_type.endswith('[]'):
# recurse into child-element
sub_type = self._get_sql_type(sql_type[:-2])
return sql_type
if sql_type in SQL_TYPE_MAP:
return sql_type
raise ValueError(f"Invalid SQL type: '{sql_type}' - supported types are {', '.join(SQL_TYPE_MAP.keys())}")
# validate and return the cpp input type
def _get_setting_type(self, type) -> str:
if type.startswith('ENUM'):
return type[len('ENUM<') : -1]
if type.endswith('[]'):
subtype = self._get_setting_type(type[:-2])
return "vector<" + subtype + ">"
return SQL_TYPE_MAP[type]
# validate and return the correct type format
def _get_on_callbacks(self, callbacks) -> (bool, bool):
set = False
reset = False
for entry in callbacks:
if entry == 'set':
set = True
elif entry == 'reset':
reset = True
else:
raise ValueError(f"Invalid entry in on_callbacks list: {entry} (expected set or reset)")
return (set, reset)
# validate and return the set of the aliases
def _get_aliases(self, aliases: List[str]) -> List[str]:
return [self._get_valid_name(alias) for alias in aliases]
# generate a function name
def _get_struct_name(self) -> str:
camel_case_name = ''.join(word.capitalize() for word in re.split(r'[-_]', self.name))
if camel_case_name.endswith("Setting"):
return f"{camel_case_name}"
return f"{camel_case_name}Setting"
# this global list (accessible across all files) stores all the settings definitions in the json file
SettingsList: List[Setting] = []
# global method that finds the indexes of a start and an end marker in a file
def find_start_end_indexes(source_code, start_marker, end_marker, file_path):
start_matches = list(re.finditer(start_marker, source_code))
if len(start_matches) == 0:
raise ValueError(f"Couldn't find start marker {start_marker} in {file_path}")
elif len(start_matches) > 1:
raise ValueError(f"Start marker found more than once in {file_path}")
start_index = start_matches[0].end()
end_matches = list(re.finditer(end_marker, source_code[start_index:]))
if len(end_matches) == 0:
raise ValueError(f"Couldn't find end marker {end_marker} in {file_path}")
elif len(end_matches) > 1:
raise ValueError(f"End marker found more than once in {file_path}")
end_index = start_index + end_matches[0].start()
return start_index, end_index
# global markers
SEPARATOR = "//===----------------------------------------------------------------------===//\n"
SRC_CODE_START_MARKER = "namespace duckdb {"
SRC_CODE_END_MARKER = "} // namespace duckdb"
# global method
def write_content_to_file(new_content, path):
with open(path, 'w') as source_file:
source_file.write("".join(new_content))
def get_setting_heading(setting_struct_name):
struct_name_wt_Setting = re.sub(r'Setting$', '', setting_struct_name)
heading_name = re.sub(r'(?<!^)(?=[A-Z])', ' ', struct_name_wt_Setting)
heading = SEPARATOR + f"// {heading_name}\n" + SEPARATOR
return heading
def make_format():
os.system(f"python3 scripts/format.py {DUCKDB_SETTINGS_HEADER_FILE} --fix --force --noconfirm")
os.system(f"python3 scripts/format.py {DUCKDB_SETTINGS_SCOPE_FILE} --fix --force --noconfirm")
os.system(f"python3 scripts/format.py {DUCKDB_AUTOGENERATED_SETTINGS_FILE} --fix --force --noconfirm")

View File

@@ -0,0 +1,58 @@
import json
from .config import Setting, SettingsList, JSON_PATH
# sort settings in json by name
def sort_json_data(path):
with open(path, 'r') as file:
data = json.load(file)
sorted_data = sorted(data, key=lambda x: x['name'])
with open(path, 'w') as file:
json.dump(sorted_data, file, indent=4)
return sorted_data
# parse json data and stores each entry as a settings object in the global list SettingsList
def add_all_settings_to_global_list():
valid_entries = [
'name',
'description',
'type',
'scope',
'internal_setting',
'on_callbacks',
'custom_implementation',
'struct',
'aliases',
'default_scope',
'default_value',
]
print(f"Parsing and sorting the settings data in {JSON_PATH}")
clear_global_settings_list()
json_data = sort_json_data(JSON_PATH)
# store all the settings in the SettingsList
for entry in json_data:
for field_entry in entry:
if field_entry not in valid_entries:
raise ValueError(
f"Found entry unexpected entry \"{field_entry}\" in setting, expected entry to be in {', '.join(valid_entries)}"
)
setting = Setting(
name=entry['name'],
description=entry['description'],
sql_type=entry['type'],
internal_setting=entry.get('internal_setting', entry['name']),
scope=entry.get('scope', None),
struct_name=entry.get('struct', ''),
on_callbacks=entry.get('on_callbacks', []),
custom_implementation=entry.get('custom_implementation', False),
aliases=entry.get('aliases', []),
default_scope=entry.get('default_scope', None),
default_value=entry.get('default_value', None),
)
SettingsList.append(setting)
def clear_global_settings_list():
SettingsList.clear()

View File

@@ -0,0 +1,132 @@
from .config import (
SRC_CODE_START_MARKER,
SRC_CODE_END_MARKER,
SettingsList,
find_start_end_indexes,
get_setting_heading,
)
def generate_create_value(setting):
if setting.sql_type == 'VARCHAR':
return 'Value'
else:
return f'Value::{setting.sql_type}'
def add_autogenerated_global_functions(setting):
cpp_code = ""
if 'set' not in setting.custom_implementation:
cpp_code += (
f"void {setting.struct_name}::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {{\n"
)
if setting.on_set:
cpp_code += f"\tif (!OnGlobalSet(db, config, input)) {{\n"
cpp_code += f"\t\treturn;\n\t}}\n"
if setting.is_enum:
cpp_code += f"\tauto str_input = StringUtil::Upper(input.GetValue<string>());\n"
cpp_code += f"\tconfig.options.{setting.internal_setting} = EnumUtil::FromString<{setting.return_type}>(str_input);\n"
else:
cpp_code += f"\tconfig.options.{setting.internal_setting} = input.GetValue<{setting.return_type}>();\n"
cpp_code += f"}}\n\n"
if 'reset' not in setting.custom_implementation:
cpp_code += f"void {setting.struct_name}::ResetGlobal(DatabaseInstance *db, DBConfig &config) {{\n"
if setting.on_reset:
cpp_code += f"\tif (!OnGlobalReset(db, config)) {{\n"
cpp_code += f"\t\treturn;\n\t}}\n"
cpp_code += f"\tconfig.options.{setting.internal_setting} = DBConfigOptions().{setting.internal_setting};\n"
cpp_code += f"}}\n\n"
if 'get' not in setting.custom_implementation:
cpp_code += f"Value {setting.struct_name}::GetSetting(const ClientContext &context) {{\n"
cpp_code += f"\tauto &config = DBConfig::GetConfig(context);\n"
if setting.is_enum:
cpp_code += f"\treturn {generate_create_value(setting)}(StringUtil::Lower(EnumUtil::ToString(config.options.{setting.internal_setting})));\n"
else:
cpp_code += f"\treturn {generate_create_value(setting)}(config.options.{setting.internal_setting});\n"
cpp_code += f"}}\n\n"
return cpp_code
def add_autogenerated_local_functions(setting):
cpp_code = ""
if 'set' not in setting.custom_implementation:
cpp_code += f"void {setting.struct_name}::SetLocal(ClientContext &context, const Value &input) {{\n"
if setting.on_set:
cpp_code += f"\tif (!OnLocalSet(context, input)) {{\n"
cpp_code += f"\t\treturn;\n\t}}\n"
cpp_code += f"\tauto &config = ClientConfig::GetConfig(context);\n"
if setting.is_enum:
cpp_code += f"\tauto str_input = StringUtil::Upper(input.GetValue<string>());\n"
cpp_code += (
f"\tconfig.{setting.internal_setting} = EnumUtil::FromString<{setting.return_type}>(str_input);\n"
)
else:
cpp_code += f"\tconfig.{setting.internal_setting} = input.GetValue<{setting.return_type}>();\n"
cpp_code += f"}}\n\n"
if 'reset' not in setting.custom_implementation:
cpp_code += f"void {setting.struct_name}::ResetLocal(ClientContext &context) {{\n"
if setting.on_reset:
cpp_code += f"\tif (!OnLocalReset(context)) {{\n"
cpp_code += f"\t\treturn;\n\t}}\n"
cpp_code += f"\tClientConfig::GetConfig(context).{setting.internal_setting} = ClientConfig().{setting.internal_setting};\n"
cpp_code += f"}}\n\n"
if 'get' not in setting.custom_implementation:
cpp_code += f"Value {setting.struct_name}::GetSetting(const ClientContext &context) {{\n"
cpp_code += f"\tauto &config = ClientConfig::GetConfig(context);\n"
if setting.is_enum:
cpp_code += f"\treturn {generate_create_value(setting)}(StringUtil::Lower(EnumUtil::ToString(config.{setting.internal_setting})));\n"
else:
cpp_code += f"\treturn {generate_create_value(setting)}(config.{setting.internal_setting});\n"
cpp_code += f"}}\n\n"
return cpp_code
def add_autogenerated_enum_set(setting):
if not setting.on_set:
return ""
if not setting.is_enum:
return ""
if 'set' in setting.custom_implementation:
return ""
cpp_code = ""
cpp_code += f"void {setting.struct_name}::OnSet(SettingCallbackInfo &info, Value &parameter) {{\n"
cpp_code += f"\tEnumUtil::FromString<{setting.return_type}>(StringValue::Get(parameter));\n"
cpp_code += f"}}\n\n"
return cpp_code
def add_autogenerated_functions(path):
with open(path, 'r') as source_file:
source_code = source_file.read()
# find start and end indexes of the auto-generated section
start_index, end_index = find_start_end_indexes(source_code, SRC_CODE_START_MARKER, SRC_CODE_END_MARKER, path)
# split source code into sections
start_section = source_code[: start_index + 1] + "\n"
end_section = source_code[end_index:]
new_content = ""
added = 0
for setting in SettingsList:
# if the setting doesn't need custom implementation, an autogenerated one will be included
if not setting.all_custom:
header = get_setting_heading(setting.struct_name)
content = ""
if setting.is_generic_setting:
content += add_autogenerated_enum_set(setting)
else:
if setting.scope == "GLOBAL" or setting.scope == "GLOBAL_LOCAL":
content += add_autogenerated_global_functions(setting)
if setting.scope == "LOCAL" or setting.scope == "GLOBAL_LOCAL":
content += add_autogenerated_local_functions(setting)
if len(content) > 0:
new_content += header
new_content += content
added += 1
return start_section + new_content + end_section, added
if __name__ == '__main__':
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")

View File

@@ -0,0 +1,73 @@
from .config import SEPARATOR, SettingsList, find_start_end_indexes, write_content_to_file
# markers
START_MARKER = (
f"//===----------------------------------------------------------------------===//\n"
f"// This code is autogenerated from 'update_settings_header_file.py'.\n"
f"// Please do not make any changes directly here, as they will be overwritten.\n//\n"
f"// Start of the auto-generated list of settings structures\n"
f"//===----------------------------------------------------------------------===//\n"
)
END_MARKER = "// End of the auto-generated list of settings structures"
def extract_declarations(setting) -> str:
definition = (
f"struct {setting.struct_name} {{\n"
f" using RETURN_TYPE = {setting.return_type};\n"
f" static constexpr const char *Name = \"{setting.name}\";\n"
f" static constexpr const char *Description = \"{setting.description}\";\n"
f" static constexpr const char *InputType = \"{setting.sql_type}\";\n"
)
if setting.scope == "GLOBAL" or setting.scope == "GLOBAL_LOCAL":
definition += f" static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);\n"
definition += f" static void ResetGlobal(DatabaseInstance *db, DBConfig &config);\n"
if setting.on_set:
definition += f"static bool OnGlobalSet(DatabaseInstance *db, DBConfig &config, const Value &input);\n"
if setting.on_reset:
definition += f"static bool OnGlobalReset(DatabaseInstance *db, DBConfig &config);\n"
if setting.scope == "LOCAL" or setting.scope == "GLOBAL_LOCAL":
definition += f" static void SetLocal(ClientContext &context, const Value &parameter);\n"
definition += f" static void ResetLocal(ClientContext &context);\n"
if setting.on_set:
definition += f"static bool OnLocalSet(ClientContext &context, const Value &input);\n"
if setting.on_reset:
definition += f"static bool OnLocalReset(ClientContext &context);\n"
if setting.scope is not None:
definition += f" static Value GetSetting(const ClientContext &context);\n"
if setting.is_generic_setting:
definition += f" static constexpr const char *DefaultValue = \"{setting.default_value}\";\n"
definition += f" static constexpr SetScope DefaultScope = SetScope::{setting.default_scope};\n"
if setting.on_set:
definition += f" static void OnSet(SettingCallbackInfo &info, Value &input);\n"
definition += f"}};\n\n"
return definition
# generate code for all the settings for the the header file
def generate_content(header_file_path):
with open(header_file_path, 'r') as source_file:
source_code = source_file.read()
# find start and end indexes of the auto-generated section
start_index, end_index = find_start_end_indexes(source_code, START_MARKER, END_MARKER, header_file_path)
# split source code into sections
start_section = source_code[: start_index + 1]
end_section = SEPARATOR + source_code[end_index:]
new_content = "".join(extract_declarations(setting) for setting in SettingsList)
return start_section + new_content + end_section
def generate():
from .config import DUCKDB_SETTINGS_HEADER_FILE
print(f"Updating {DUCKDB_SETTINGS_HEADER_FILE}")
new_content = generate_content(DUCKDB_SETTINGS_HEADER_FILE)
write_content_to_file(new_content, DUCKDB_SETTINGS_HEADER_FILE)
if __name__ == '__main__':
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")

View File

@@ -0,0 +1,61 @@
from .config import SettingsList, VALID_SCOPE_VALUES, find_start_end_indexes, write_content_to_file
# markers
START_MARKER = r'static const ConfigurationOption internal_options\[\] = \{\n'
END_MARKER = r',\s*FINAL_ALIAS};'
# generate the scope code for the ConfigurationOption array and insert into the config file
def generate_scope_code(file):
with open(file, 'r') as source_file:
source_code = source_file.read()
# find the start and end indexes of the settings' scope array
start_index, end_index = find_start_end_indexes(source_code, START_MARKER, END_MARKER, file)
# split source code into sections
before_array = source_code[:start_index] + "\n "
after_array = source_code[end_index:]
# generate new entries for the settings array
new_entries = []
new_aliases = []
for setting in SettingsList:
if setting.is_generic_setting:
if setting.on_set:
new_entries.append([setting.name, f"DUCKDB_SETTING_CALLBACK({setting.struct_name})"])
else:
new_entries.append([setting.name, f"DUCKDB_SETTING({setting.struct_name})"])
elif setting.scope in VALID_SCOPE_VALUES: # valid setting_scope values
new_entries.append([setting.name, f"DUCKDB_{setting.scope}({setting.struct_name})"])
else:
raise ValueError(f"Setting {setting.name} has invalid input scope value")
for alias in setting.aliases:
new_aliases.append([alias, setting.name])
new_entries.sort(key=lambda x: x[0])
new_aliases.sort(key=lambda x: x[0])
entry_indexes = {}
for i in range(len(new_entries)):
entry_indexes[new_entries[i][0]] = i
for alias in new_aliases:
alias_index = entry_indexes[alias[1]]
alias.append(f"DUCKDB_SETTING_ALIAS(\"{alias[0]}\", {alias_index})")
new_array_section = ',\n '.join([x[1] for x in new_entries])
new_array_section += ', FINAL_SETTING};\n\n'
new_array_section += 'static const ConfigurationAlias setting_aliases[] = {'
new_array_section += ',\n '.join([x[2] for x in new_aliases])
return before_array + new_array_section + after_array
def generate():
from .config import DUCKDB_SETTINGS_SCOPE_FILE
print(f"Updating {DUCKDB_SETTINGS_SCOPE_FILE}")
new_content = generate_scope_code(DUCKDB_SETTINGS_SCOPE_FILE)
write_content_to_file(new_content, DUCKDB_SETTINGS_SCOPE_FILE)
if __name__ == '__main__':
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")

View File

@@ -0,0 +1,18 @@
import re
from .config import SettingsList, write_content_to_file, find_start_end_indexes
from .update_autogenerated_functions import add_autogenerated_functions
def generate():
from .config import DUCKDB_AUTOGENERATED_SETTINGS_FILE
print(f"Updating {DUCKDB_AUTOGENERATED_SETTINGS_FILE}")
new_autogenerated_content, generated = add_autogenerated_functions(DUCKDB_AUTOGENERATED_SETTINGS_FILE)
write_content_to_file(new_autogenerated_content, DUCKDB_AUTOGENERATED_SETTINGS_FILE)
# NOTE: for debugging purposes
# print(f"The total number of settings is {len(SettingsList)}, and {generated} settings are added in {DUCKDB_AUTOGENERATED_SETTINGS_FILE} and, {added_custom} new and {existing_custom} existing added in {DUCKDB_CUSTOM_DEFINED_SETTINGS_FILE}")
if __name__ == '__main__':
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")