168 lines
5.4 KiB
Python
168 lines
5.4 KiB
Python
import os
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
parser = argparse.ArgumentParser(description='Inline the auto-complete PEG grammar files')
|
|
parser.add_argument(
|
|
'--print', action='store_true', help='Print the grammar instead of writing to a file', default=False
|
|
)
|
|
parser.add_argument(
|
|
'--grammar-file',
|
|
action='store_true',
|
|
help='Write the grammar to a .gram file instead of a C++ header',
|
|
default=False,
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
autocomplete_dir = Path(__file__).parent
|
|
statements_dir = os.path.join(autocomplete_dir, 'grammar', 'statements')
|
|
keywords_dir = os.path.join(autocomplete_dir, 'grammar', 'keywords')
|
|
target_file = os.path.join(autocomplete_dir, 'include', 'inlined_grammar.hpp')
|
|
|
|
contents = ""
|
|
|
|
# Maps filenames to string categories
|
|
FILENAME_TO_CATEGORY = {
|
|
"reserved_keyword.list": "RESERVED_KEYWORD",
|
|
"unreserved_keyword.list": "UNRESERVED_KEYWORD",
|
|
"column_name_keyword.list": "COL_NAME_KEYWORD",
|
|
"func_name_keyword.list": "TYPE_FUNC_NAME_KEYWORD",
|
|
"type_name_keyword.list": "TYPE_FUNC_NAME_KEYWORD",
|
|
}
|
|
|
|
# Maps category names to their C++ map variable names
|
|
CPP_MAP_NAMES = {
|
|
"RESERVED_KEYWORD": "reserved_keyword_map",
|
|
"UNRESERVED_KEYWORD": "unreserved_keyword_map",
|
|
"COL_NAME_KEYWORD": "colname_keyword_map",
|
|
"TYPE_FUNC_NAME_KEYWORD": "typefunc_keyword_map",
|
|
}
|
|
|
|
# Use a dictionary of sets to collect keywords for each category, preventing duplicates
|
|
keyword_sets = {category: set() for category in CPP_MAP_NAMES.keys()}
|
|
|
|
# --- Validation and Loading (largely unchanged) ---
|
|
# For validation during the loading phase
|
|
reserved_set = set()
|
|
unreserved_set = set()
|
|
|
|
|
|
def load_keywords(filepath):
|
|
with open(filepath, "r") as f:
|
|
return [line.strip().lower() for line in f if line.strip()]
|
|
|
|
|
|
for filename in os.listdir(keywords_dir):
|
|
if filename not in FILENAME_TO_CATEGORY:
|
|
continue
|
|
|
|
category = FILENAME_TO_CATEGORY[filename]
|
|
keywords = load_keywords(os.path.join(keywords_dir, filename))
|
|
|
|
for kw in keywords:
|
|
# Validation logic remains the same to enforce rules
|
|
if category == "RESERVED_KEYWORD":
|
|
if kw in reserved_set or kw in unreserved_set:
|
|
print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories")
|
|
exit(1)
|
|
reserved_set.add(kw)
|
|
elif category == "UNRESERVED_KEYWORD":
|
|
if kw in reserved_set or kw in unreserved_set:
|
|
print(f"Keyword '{kw}' has conflicting RESERVED/UNRESERVED categories")
|
|
exit(1)
|
|
unreserved_set.add(kw)
|
|
|
|
# Add the keyword to the appropriate set
|
|
keyword_sets[category].add(kw)
|
|
|
|
# --- C++ Code Generation ---
|
|
output_path = os.path.join(autocomplete_dir, "keyword_map.cpp")
|
|
with open(output_path, "w") as f:
|
|
f.write("/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */\n")
|
|
f.write("#include \"keyword_helper.hpp\"\n\n")
|
|
f.write("namespace duckdb {\n")
|
|
f.write("void PEGKeywordHelper::InitializeKeywordMaps() { // Renamed for clarity\n")
|
|
f.write("\tif (initialized) {\n\t\treturn;\n\t};\n")
|
|
f.write("\tinitialized = true;\n\n")
|
|
|
|
# Get the total number of categories to handle the last item differently
|
|
num_categories = len(keyword_sets)
|
|
|
|
# Iterate through each category and generate code for each map
|
|
for i, (category, keywords) in enumerate(keyword_sets.items()):
|
|
cpp_map_name = CPP_MAP_NAMES[category]
|
|
f.write(f"\t// Populating {cpp_map_name}\n")
|
|
# Sort keywords for deterministic output
|
|
for kw in sorted(list(keywords)):
|
|
# Populate the C++ set with insert
|
|
f.write(f'\t{cpp_map_name}.insert("{kw}");\n')
|
|
|
|
# Add a newline for all but the last block
|
|
if i < num_categories - 1:
|
|
f.write("\n")
|
|
f.write("}\n")
|
|
f.write("} // namespace duckdb\n")
|
|
|
|
print(f"Successfully generated {output_path}")
|
|
|
|
|
|
def filename_to_upper_camel(file):
|
|
name, _ = os.path.splitext(file) # column_name_keywords
|
|
parts = name.split('_') # ['column', 'name', 'keywords']
|
|
return ''.join(p.capitalize() for p in parts)
|
|
|
|
|
|
for file in os.listdir(keywords_dir):
|
|
if not file.endswith('.list'):
|
|
continue
|
|
rule_name = filename_to_upper_camel(file)
|
|
rule = f"{rule_name} <- "
|
|
with open(os.path.join(keywords_dir, file), 'r') as f:
|
|
lines = [f"'{line.strip()}'" for line in f if line.strip()]
|
|
rule += " /\n".join(lines) + "\n"
|
|
contents += rule
|
|
|
|
for file in os.listdir(statements_dir):
|
|
if not file.endswith('.gram'):
|
|
raise Exception(f"File {file} does not end with .gram")
|
|
with open(os.path.join(statements_dir, file), 'r') as f:
|
|
contents += f.read() + "\n"
|
|
|
|
if args.print:
|
|
print(contents)
|
|
exit(0)
|
|
|
|
if args.grammar_file:
|
|
grammar_file = target_file.replace('.hpp', '.gram')
|
|
with open(grammar_file, 'w+') as f:
|
|
f.write(contents)
|
|
exit(0)
|
|
|
|
|
|
def get_grammar_bytes(contents, add_null_terminator=True):
|
|
result_text = ""
|
|
for line in contents.split('\n'):
|
|
if len(line) == 0:
|
|
continue
|
|
result_text += "\t\"" + line.replace('\\', '\\\\').replace('"', '\\"') + "\\n\"\n"
|
|
return result_text
|
|
|
|
|
|
with open(target_file, 'w+') as f:
|
|
f.write(
|
|
'''/* THIS FILE WAS AUTOMATICALLY GENERATED BY inline_grammar.py */
|
|
#pragma once
|
|
|
|
namespace duckdb {
|
|
|
|
const char INLINED_PEG_GRAMMAR[] = {
|
|
'''
|
|
+ get_grammar_bytes(contents)
|
|
+ '''
|
|
};
|
|
|
|
} // namespace duckdb
|
|
'''
|
|
)
|