Files
email-tracker/external/duckdb/scripts/generate_functions.py
2025-10-24 19:21:19 -05:00

260 lines
9.6 KiB
Python

import os
import json
from pathlib import Path
function_groups = {
('src', 'include/duckdb', 'function'): ['scalar', 'aggregate'],
('extension', 'core_functions/include', 'core_functions'): ['scalar', 'aggregate'],
}
def get_header():
return '''//===----------------------------------------------------------------------===//
// DuckDB
//
// {HEADER}_functions.hpp
//
//
//===----------------------------------------------------------------------===//
// This file is automatically generated by scripts/generate_functions.py
// Do not edit this file manually, your changes will be overwritten
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb/function/function_set.hpp"
namespace duckdb {
'''
def get_footer():
return '''} // namespace duckdb
'''
def main():
function_type_set = {}
for (root, include_dir, group), function_types in sorted(function_groups.items()):
all_functions_group = []
group_dir = Path(group)
for function_type in function_types:
type_dir = Path(root).joinpath(group_dir.joinpath(function_type))
relative_function_paths = sorted(
[f'{group}/{function_type}/{f.name}' for f in type_dir.iterdir() if f.is_dir()]
)
for function_path in relative_function_paths:
if Path(normalize_path_separators(f'{root}/{function_path}/functions.json')).exists():
create_header_file(root, include_dir, function_path, all_functions_group, function_type_set)
create_function_list_file(root, group, all_functions_group)
def normalize_path_separators(x):
return os.path.sep.join(x.split('/'))
def legal_struct_name(name):
return name.isalnum()
def get_struct_name(function_name):
return function_name.replace('_', ' ').title().replace(' ', '') + 'Fun'
def get_parameter_line(variants):
if not all(
isinstance(variant['parameters'], list)
and all(isinstance(param, dict) for param in variant['parameters'])
and all('name' in param.keys() for param in variant['parameters'])
for variant in variants
):
raise ValueError(
f"invalid parameters for variants {variants}\nParameters should have format: \"parameters\": [{{\"name\": <param_name>, \"type\": <param_type>}}, ...]"
)
return "\\001".join(
",".join(
param['name'] + "::" + param['type'] if ('type' in param) else param['name']
for param in variant['parameters']
)
for variant in variants
)
def get_description_line(variants):
return "\\001".join([variant['description'] for variant in variants])
def get_example_line(variants):
return "\\001".join([example_from_json(variant) for variant in variants])
def example_from_json(json_record):
if 'example' in json_record:
example_line = sanitize_string(json_record['example'])
elif 'examples' in json_record:
example_line = examples_to_line(json_record['examples'])
else:
example_line = ''
return example_line
def examples_to_line(example_list):
return "\\002".join([sanitize_string(example) for example in example_list])
def get_category_line(variants):
return "\\001".join([categories_from_json(variant) for variant in variants])
def categories_from_json(json_record):
if 'categories' in json_record:
category_line = ','.join([category.strip() for category in json_record['categories']])
else:
category_line = ''
return category_line
def sanitize_string(text):
return text.replace('\\', '\\\\').replace('"', '\\"')
def create_header_file(root, include_dir, path, all_function_list, function_type_set):
header_path = normalize_path_separators(f'{root}/{include_dir}/{path}_functions.hpp')
json_path = normalize_path_separators(f'{root}/{path}/functions.json')
with open(json_path, 'r') as f:
parsed_json = json.load(f)
new_text = get_header().replace('{HEADER}', path)
for entry in parsed_json:
function_text = ''
if 'struct' in entry:
struct_name = entry['struct']
else:
struct_name = get_struct_name(entry['name'])
if not legal_struct_name(struct_name):
print(f'Struct name {struct_name} is not a valid struct name!')
exit(1)
if struct_name in function_type_set:
raise Exception("Duplicate entry " + struct_name)
function_type_set[struct_name] = entry['type']
if entry['type'] == 'scalar_function':
function_text = 'static ScalarFunction GetFunction();'
all_function_list.append([entry['name'], f"DUCKDB_SCALAR_FUNCTION({struct_name})"])
elif entry['type'] == 'scalar_function_set':
function_text = 'static ScalarFunctionSet GetFunctions();'
all_function_list.append([entry['name'], f"DUCKDB_SCALAR_FUNCTION_SET({struct_name})"])
elif entry['type'] == 'aggregate_function':
function_text = 'static AggregateFunction GetFunction();'
all_function_list.append([entry['name'], f"DUCKDB_AGGREGATE_FUNCTION({struct_name})"])
elif entry['type'] == 'aggregate_function_set':
function_text = 'static AggregateFunctionSet GetFunctions();'
all_function_list.append([entry['name'], f"DUCKDB_AGGREGATE_FUNCTION_SET({struct_name})"])
else:
print("Unknown entry type " + entry['type'] + ' for entry ' + struct_name)
exit(1)
if 'variants' in entry:
parameter_line = get_parameter_line(entry['variants'])
description_line = get_description_line(entry['variants'])
example_line = get_example_line(entry['variants'])
category_line = get_category_line(entry['variants'])
else:
parameter_line = entry['parameters'].replace(' ', '') if 'parameters' in entry else ''
description_line = sanitize_string(entry['description'])
example_line = example_from_json(entry)
category_line = categories_from_json(entry)
if 'extra_functions' in entry:
for func_text in entry['extra_functions']:
function_text += '\n ' + func_text
new_text += (
'''struct {STRUCT} {
static constexpr const char *Name = "{NAME}";
static constexpr const char *Parameters = "{PARAMETERS}";
static constexpr const char *Description = "{DESCRIPTION}";
static constexpr const char *Example = "{EXAMPLE}";
static constexpr const char *Categories = "{CATEGORIES}";
{FUNCTION}
};
'''.replace(
'{STRUCT}', struct_name
)
.replace('{NAME}', entry['name'])
.replace('{PARAMETERS}', parameter_line)
.replace('{DESCRIPTION}', description_line)
.replace('{EXAMPLE}', example_line)
.replace('{CATEGORIES}', category_line)
.replace('{FUNCTION}', function_text)
)
alias_count = 1
if 'aliases' in entry:
for alias in entry['aliases']:
alias_struct_name = get_struct_name(alias)
if not legal_struct_name(alias_struct_name):
alias_struct_name = struct_name + 'Alias'
if alias_count > 1:
alias_struct_name += str(alias_count)
alias_count += 1
aliased_type = entry['type']
if aliased_type == 'scalar_function':
all_function_list.append([alias, f"DUCKDB_SCALAR_FUNCTION_ALIAS({alias_struct_name})"])
elif aliased_type == 'scalar_function_set':
all_function_list.append([alias, f"DUCKDB_SCALAR_FUNCTION_SET_ALIAS({alias_struct_name})"])
elif aliased_type == 'aggregate_function':
all_function_list.append([alias, f"DUCKDB_AGGREGATE_FUNCTION_ALIAS({alias_struct_name})"])
elif aliased_type == 'aggregate_function_set':
all_function_list.append([alias, f"DUCKDB_AGGREGATE_FUNCTION_SET_ALIAS({alias_struct_name})"])
else:
print("Unknown entry type " + aliased_type + ' for entry ' + struct_name)
exit(1)
function_type_set[alias_struct_name] = aliased_type
new_text += (
'''struct {STRUCT} {
using ALIAS = {ALIAS};
static constexpr const char *Name = "{NAME}";
};
'''.replace(
'{STRUCT}', alias_struct_name
)
.replace('{NAME}', alias)
.replace('{ALIAS}', struct_name)
)
new_text += get_footer()
with open(header_path, 'w+') as f:
f.write(new_text)
def create_function_list_file(root, group, all_function_list):
function_list_file = normalize_path_separators(f'{root}/{group}/function_list.cpp')
with open(function_list_file, 'r') as f:
text = f.read()
static_function = f'static const StaticFunctionDefinition {group}[]' ' = {'
pos = text.find(static_function)
header = text[:pos]
footer_lines = text[pos:].split('\n')
footer = ''
for i in range(len(footer_lines)):
if len(footer_lines[i]) == 0:
footer = '\n'.join(footer_lines[i:])
break
new_text = header
new_text += static_function + '\n'
all_function_list = sorted(all_function_list, key=lambda x: x[0])
for entry in all_function_list:
new_text += '\t' + entry[1] + ',\n'
new_text += '\tFINAL_FUNCTION\n};\n'
new_text += footer
with open(function_list_file, 'w+') as f:
f.write(new_text)
if __name__ == "__main__":
main()