should be it
This commit is contained in:
608
external/duckdb/scripts/amalgamation.py
vendored
Normal file
608
external/duckdb/scripts/amalgamation.py
vendored
Normal file
@@ -0,0 +1,608 @@
|
||||
# this script creates a single header + source file combination out of the DuckDB sources
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
from python_helpers import open_utf8, normalize_path
|
||||
|
||||
amal_dir = os.path.join('src', 'amalgamation')
|
||||
header_file = os.path.join(amal_dir, "duckdb.hpp")
|
||||
source_file = os.path.join(amal_dir, "duckdb.cpp")
|
||||
temp_header = 'duckdb.hpp.tmp'
|
||||
temp_source = 'duckdb.cpp.tmp'
|
||||
|
||||
skip_duckdb_includes = False
|
||||
|
||||
src_dir = 'src'
|
||||
include_dir = os.path.join('src', 'include')
|
||||
|
||||
# files included in the amalgamated "duckdb.hpp" file
|
||||
main_header_files = [
|
||||
os.path.join(include_dir, 'duckdb.hpp'),
|
||||
os.path.join(include_dir, 'duckdb.h'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'date.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'adbc', 'adbc.h'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'adbc', 'adbc.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'arrow', 'arrow.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'arrow', 'arrow_converter.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'arrow', 'arrow_wrapper.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'blob.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'decimal.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'hugeint.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'uhugeint.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'uuid.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'interval.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'timestamp.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'types', 'time.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'serializer', 'buffered_file_writer.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'common', 'serializer', 'memory_stream.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'main', 'appender.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'main', 'client_context.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'main', 'extension', 'extension_loader.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'function', 'function.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'function', 'table_function.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'parser', 'parsed_data', 'create_table_function_info.hpp'),
|
||||
os.path.join(include_dir, 'duckdb', 'parser', 'parsed_data', 'create_copy_function_info.hpp'),
|
||||
]
|
||||
extended_amalgamation = False
|
||||
if '--extended' in sys.argv:
|
||||
|
||||
def add_include_dir(dirpath):
|
||||
return [os.path.join(dirpath, x) for x in os.listdir(dirpath)]
|
||||
|
||||
extended_amalgamation = True
|
||||
main_header_files += [
|
||||
os.path.join(include_dir, x)
|
||||
for x in [
|
||||
'duckdb/planner/expression/bound_constant_expression.hpp',
|
||||
'duckdb/planner/expression/bound_function_expression.hpp',
|
||||
'duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp',
|
||||
'duckdb/parser/parsed_data/create_table_info.hpp',
|
||||
'duckdb/planner/parsed_data/bound_create_table_info.hpp',
|
||||
'duckdb/parser/constraints/not_null_constraint.hpp',
|
||||
'duckdb/storage/data_table.hpp',
|
||||
'duckdb/function/pragma_function.hpp',
|
||||
'duckdb/parser/qualified_name.hpp',
|
||||
'duckdb/parser/parser.hpp',
|
||||
'duckdb/planner/binder.hpp',
|
||||
'duckdb/storage/object_cache.hpp',
|
||||
'duckdb/planner/table_filter.hpp',
|
||||
"duckdb/storage/statistics/base_statistics.hpp",
|
||||
"duckdb/planner/filter/conjunction_filter.hpp",
|
||||
"duckdb/planner/filter/constant_filter.hpp",
|
||||
"duckdb/common/types/vector_cache.hpp",
|
||||
"duckdb/common/string_map_set.hpp",
|
||||
"duckdb/planner/filter/null_filter.hpp",
|
||||
"duckdb/common/arrow/arrow_wrapper.hpp",
|
||||
"duckdb/common/hive_partitioning.hpp",
|
||||
"duckdb/common/multi_file/union_by_name.hpp",
|
||||
"duckdb/planner/operator/logical_get.hpp",
|
||||
"duckdb/common/compressed_file_system.hpp",
|
||||
]
|
||||
]
|
||||
main_header_files += add_include_dir(os.path.join(include_dir, 'duckdb/parser/expression'))
|
||||
main_header_files += add_include_dir(os.path.join(include_dir, 'duckdb/parser/parsed_data'))
|
||||
main_header_files += add_include_dir(os.path.join(include_dir, 'duckdb/parser/tableref'))
|
||||
main_header_files = normalize_path(main_header_files)
|
||||
|
||||
import package_build
|
||||
|
||||
# include paths for where to search for include files during amalgamation
|
||||
include_paths = [include_dir] + package_build.third_party_includes()
|
||||
# paths of where to look for files to compile and include to the final amalgamation
|
||||
compile_directories = [src_dir] + package_build.third_party_sources()
|
||||
|
||||
# files always excluded
|
||||
always_excluded = normalize_path(
|
||||
[
|
||||
'src/amalgamation/duckdb.cpp',
|
||||
'src/amalgamation/duckdb.hpp',
|
||||
'src/amalgamation/parquet-amalgamation.cpp',
|
||||
'src/amalgamation/parquet-amalgamation.hpp',
|
||||
]
|
||||
)
|
||||
# files excluded from the amalgamation
|
||||
excluded_files = ['grammar.cpp', 'grammar.hpp', 'symbols.cpp']
|
||||
# files excluded from individual file compilation during test_compile
|
||||
excluded_compilation_files = excluded_files + ['gram.hpp', 'kwlist.hpp', "duckdb-c.cpp"]
|
||||
|
||||
linenumbers = False
|
||||
|
||||
|
||||
def get_includes(fpath, text):
|
||||
# find all the includes referred to in the directory
|
||||
regex_include_statements = re.findall("(^[\t ]*[#][\t ]*include[\t ]+[\"]([^\"]+)[\"])", text, flags=re.MULTILINE)
|
||||
include_statements = []
|
||||
include_files = []
|
||||
# figure out where they are located
|
||||
for x in regex_include_statements:
|
||||
included_file = x[1]
|
||||
if skip_duckdb_includes and 'duckdb' in included_file:
|
||||
continue
|
||||
if (
|
||||
'extension_helper.cpp' in fpath
|
||||
and (included_file.endswith('_extension.hpp'))
|
||||
or included_file == 'generated_extension_loader.hpp'
|
||||
or included_file == 'generated_extension_headers.hpp'
|
||||
):
|
||||
continue
|
||||
if 'allocator.cpp' in fpath and included_file.endswith('jemalloc_extension.hpp'):
|
||||
continue
|
||||
if x[0] in include_statements:
|
||||
raise Exception(f"duplicate include {x[0]} in file {fpath}")
|
||||
include_statements.append(x[0])
|
||||
included_file = os.sep.join(included_file.split('/'))
|
||||
found = False
|
||||
for include_path in include_paths:
|
||||
ipath = os.path.join(include_path, included_file)
|
||||
if os.path.isfile(ipath):
|
||||
include_files.append(ipath)
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
raise Exception('Could not find include file "' + included_file + '", included from file "' + fpath + '"')
|
||||
return (include_statements, include_files)
|
||||
|
||||
|
||||
def cleanup_file(text):
|
||||
# remove all "#pragma once" notifications
|
||||
text = re.sub('#pragma once', '', text)
|
||||
return text
|
||||
|
||||
|
||||
# recursively get all includes and write them
|
||||
written_files = {}
|
||||
|
||||
# licenses
|
||||
licenses = []
|
||||
|
||||
|
||||
def need_to_write_file(current_file, ignore_excluded=False):
|
||||
if amal_dir in current_file:
|
||||
return False
|
||||
if current_file in always_excluded:
|
||||
return False
|
||||
if current_file.split(os.sep)[-1] in excluded_files and not ignore_excluded:
|
||||
# file is in ignored files set
|
||||
return False
|
||||
if current_file in written_files:
|
||||
# file is already written
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def find_license(original_file):
|
||||
global licenses
|
||||
file = original_file
|
||||
license = ""
|
||||
while True:
|
||||
(file, end) = os.path.split(file)
|
||||
if file == "":
|
||||
break
|
||||
potential_license = os.path.join(file, "LICENSE")
|
||||
if os.path.exists(potential_license):
|
||||
license = potential_license
|
||||
if license == "":
|
||||
raise "Could not find license for %s" % original_file
|
||||
|
||||
if license not in licenses:
|
||||
licenses += [license]
|
||||
|
||||
return licenses.index(license)
|
||||
|
||||
|
||||
def write_file(current_file, ignore_excluded=False):
|
||||
global linenumbers
|
||||
global written_files
|
||||
if not need_to_write_file(current_file, ignore_excluded):
|
||||
return ""
|
||||
written_files[current_file] = True
|
||||
|
||||
# first read this file
|
||||
with open_utf8(current_file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
if current_file.startswith("third_party") and not current_file.endswith("LICENSE"):
|
||||
lic_idx = find_license(current_file)
|
||||
text = (
|
||||
"\n\n// LICENSE_CHANGE_BEGIN\n// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #%s\n// See the end of this file for a list\n\n"
|
||||
% str(lic_idx + 1)
|
||||
+ text
|
||||
+ "\n\n// LICENSE_CHANGE_END\n"
|
||||
)
|
||||
|
||||
(statements, includes) = get_includes(current_file, text)
|
||||
# find the linenr of the final #include statement we parsed
|
||||
if len(statements) > 0:
|
||||
index = text.find(statements[-1])
|
||||
linenr = len(text[:index].split('\n'))
|
||||
|
||||
# now write all the dependencies of this header first
|
||||
for i in range(len(includes)):
|
||||
include_text = write_file(includes[i])
|
||||
if linenumbers and i == len(includes) - 1:
|
||||
# for the last include statement, we also include a #line directive
|
||||
include_text += '\n#line %d "%s"\n' % (linenr, current_file)
|
||||
text = text.replace(statements[i], include_text)
|
||||
|
||||
# add the initial line here
|
||||
if linenumbers:
|
||||
text = '\n#line 1 "%s"\n' % (current_file,) + text
|
||||
# print(current_file)
|
||||
# now read the header and write it
|
||||
return cleanup_file(text)
|
||||
|
||||
|
||||
def write_dir(dir):
|
||||
files = os.listdir(dir)
|
||||
files.sort()
|
||||
text = ""
|
||||
for fname in files:
|
||||
if fname in excluded_files:
|
||||
continue
|
||||
# print(fname)
|
||||
fpath = os.path.join(dir, fname)
|
||||
if os.path.isdir(fpath):
|
||||
text += write_dir(fpath)
|
||||
elif fname.endswith('.cpp') or fname.endswith('.c') or fname.endswith('.cc'):
|
||||
text += write_file(fpath)
|
||||
return text
|
||||
|
||||
|
||||
def copy_if_different(src, dest):
|
||||
if os.path.isfile(dest):
|
||||
# dest exists, check if the files are different
|
||||
with open_utf8(src, 'r') as f:
|
||||
source_text = f.read()
|
||||
with open_utf8(dest, 'r') as f:
|
||||
dest_text = f.read()
|
||||
if source_text == dest_text:
|
||||
# print("Skipping copy of " + src + ", identical copy already exists at " + dest)
|
||||
return
|
||||
# print("Copying " + src + " to " + dest)
|
||||
shutil.copyfile(src, dest)
|
||||
|
||||
|
||||
def git_commit_hash():
|
||||
git_describe = package_build.get_git_describe()
|
||||
hash = git_describe.split('-')[2].lstrip('g')
|
||||
return hash
|
||||
|
||||
|
||||
######
|
||||
# MAIN_BRANCH_VERSIONING default should be 'True' for main branch and feature branches
|
||||
# MAIN_BRANCH_VERSIONING default should be 'False' for release branches
|
||||
# MAIN_BRANCH_VERSIONING default value needs to keep in sync between:
|
||||
# - CMakeLists.txt
|
||||
# - scripts/amalgamation.py
|
||||
# - scripts/package_build.py
|
||||
######
|
||||
MAIN_BRANCH_VERSIONING = True
|
||||
if os.getenv('MAIN_BRANCH_VERSIONING') == "0":
|
||||
MAIN_BRANCH_VERSIONING = False
|
||||
if os.getenv('MAIN_BRANCH_VERSIONING') == "1":
|
||||
MAIN_BRANCH_VERSIONING = True
|
||||
|
||||
|
||||
def git_dev_version():
|
||||
try:
|
||||
long_version = package_build.get_git_describe()
|
||||
version_splits = long_version.split('-')[0].lstrip('v').split('.')
|
||||
dev_version = long_version.split('-')[1]
|
||||
if int(dev_version) == 0:
|
||||
# directly on a tag: emit the regular version
|
||||
return "v" + '.'.join(version_splits)
|
||||
else:
|
||||
# not on a tag: increment the version by one and add a -devX suffix
|
||||
# this needs to keep in sync with changes to CMakeLists.txt
|
||||
if MAIN_BRANCH_VERSIONING == True:
|
||||
# increment minor version
|
||||
version_splits[1] = str(int(version_splits[1]) + 1)
|
||||
else:
|
||||
# increment patch version
|
||||
version_splits[2] = str(int(version_splits[2]) + 1)
|
||||
return "v" + '.'.join(version_splits) + "-dev" + dev_version
|
||||
except:
|
||||
return "v0.0.0"
|
||||
|
||||
|
||||
def generate_duckdb_hpp(header_file):
|
||||
print("-----------------------")
|
||||
print("-- Writing " + header_file + " --")
|
||||
print("-----------------------")
|
||||
with open_utf8(temp_header, 'w+') as hfile:
|
||||
hfile.write("/*\n")
|
||||
hfile.write(write_file("LICENSE"))
|
||||
hfile.write("*/\n\n")
|
||||
|
||||
hfile.write("#pragma once\n")
|
||||
hfile.write("#define DUCKDB_AMALGAMATION 1\n")
|
||||
if extended_amalgamation:
|
||||
hfile.write("#define DUCKDB_AMALGAMATION_EXTENDED 1\n")
|
||||
hfile.write("#define DUCKDB_SOURCE_ID \"%s\"\n" % git_commit_hash())
|
||||
|
||||
dev_version = git_dev_version()
|
||||
dev_v_parts = dev_version.lstrip('v').split('.')
|
||||
hfile.write("#define DUCKDB_VERSION \"%s\"\n" % dev_version)
|
||||
hfile.write("#define DUCKDB_MAJOR_VERSION %d\n" % int(dev_v_parts[0]))
|
||||
hfile.write("#define DUCKDB_MINOR_VERSION %d\n" % int(dev_v_parts[1]))
|
||||
hfile.write("#define DUCKDB_PATCH_VERSION \"%s\"\n" % dev_v_parts[2])
|
||||
|
||||
for fpath in main_header_files:
|
||||
hfile.write(write_file(fpath))
|
||||
|
||||
|
||||
def generate_amalgamation(source_file, header_file):
|
||||
# construct duckdb.hpp from these headers
|
||||
generate_duckdb_hpp(header_file)
|
||||
|
||||
# now construct duckdb.cpp
|
||||
print("------------------------")
|
||||
print("-- Writing " + source_file + " --")
|
||||
print("------------------------")
|
||||
|
||||
# scan all the .cpp files
|
||||
with open_utf8(temp_source, 'w+') as sfile:
|
||||
header_file_name = header_file.split(os.sep)[-1]
|
||||
sfile.write('#include "' + header_file_name + '"\n\n')
|
||||
sfile.write("#ifndef DUCKDB_AMALGAMATION\n#error header mismatch\n#endif\n\n")
|
||||
sfile.write("#if (!defined(DEBUG) && !defined NDEBUG)\n#define NDEBUG\n#endif\n\n")
|
||||
for compile_dir in compile_directories:
|
||||
sfile.write(write_dir(compile_dir))
|
||||
|
||||
sfile.write('\n\n/*\n')
|
||||
license_idx = 0
|
||||
for license in licenses:
|
||||
sfile.write("\n\n\n### THIRD PARTY LICENSE #%s ###\n\n" % str(license_idx + 1))
|
||||
sfile.write(write_file(license))
|
||||
license_idx += 1
|
||||
sfile.write('\n\n*/\n')
|
||||
|
||||
copy_if_different(temp_header, header_file)
|
||||
copy_if_different(temp_source, source_file)
|
||||
try:
|
||||
os.remove(temp_header)
|
||||
os.remove(temp_source)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def list_files(dname, file_list):
|
||||
files = os.listdir(dname)
|
||||
files.sort()
|
||||
for fname in files:
|
||||
if fname in excluded_files:
|
||||
continue
|
||||
fpath = os.path.join(dname, fname)
|
||||
if os.path.isdir(fpath):
|
||||
list_files(fpath, file_list)
|
||||
elif fname.endswith(('.cpp', '.c', '.cc')):
|
||||
if need_to_write_file(fpath):
|
||||
file_list.append(fpath)
|
||||
|
||||
|
||||
def list_sources():
|
||||
file_list = []
|
||||
for compile_dir in compile_directories:
|
||||
list_files(compile_dir, file_list)
|
||||
return file_list
|
||||
|
||||
|
||||
def list_include_files_recursive(dname, file_list):
|
||||
files = os.listdir(dname)
|
||||
files.sort()
|
||||
for fname in files:
|
||||
if fname in excluded_files:
|
||||
continue
|
||||
fpath = os.path.join(dname, fname)
|
||||
if os.path.isdir(fpath):
|
||||
list_include_files_recursive(fpath, file_list)
|
||||
elif fname.endswith(('.hpp', '.ipp', '.h', '.hh', '.tcc', '.inc')):
|
||||
file_list.append(fpath)
|
||||
|
||||
|
||||
def list_includes_files(include_dirs):
|
||||
file_list = []
|
||||
for include_dir in include_dirs:
|
||||
list_include_files_recursive(include_dir, file_list)
|
||||
return file_list
|
||||
|
||||
|
||||
def list_includes():
|
||||
return list_includes_files(include_paths)
|
||||
|
||||
|
||||
def gather_file(current_file, source_files, header_files):
|
||||
global linenumbers
|
||||
global written_files
|
||||
if not need_to_write_file(current_file, False):
|
||||
return ""
|
||||
written_files[current_file] = True
|
||||
|
||||
# first read this file
|
||||
with open_utf8(current_file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
(statements, includes) = get_includes(current_file, text)
|
||||
# find the linenr of the final #include statement we parsed
|
||||
if len(statements) > 0:
|
||||
index = text.find(statements[-1])
|
||||
linenr = len(text[:index].split('\n'))
|
||||
|
||||
# now write all the dependencies of this header first
|
||||
for i in range(len(includes)):
|
||||
# source file inclusions are inlined into the main text
|
||||
include_text = write_file(includes[i])
|
||||
if linenumbers and i == len(includes) - 1:
|
||||
# for the last include statement, we also include a #line directive
|
||||
include_text += '\n#line %d "%s"\n' % (linenr, current_file)
|
||||
if includes[i].endswith('.cpp') or includes[i].endswith('.cc') or includes[i].endswith('.c'):
|
||||
# source file inclusions are inlined into the main text
|
||||
text = text.replace(statements[i], include_text)
|
||||
else:
|
||||
text = text.replace(statements[i], '')
|
||||
header_files.append(include_text)
|
||||
|
||||
# add the initial line here
|
||||
if linenumbers:
|
||||
text = '\n#line 1 "%s"\n' % (current_file,) + text
|
||||
source_files.append(cleanup_file(text))
|
||||
|
||||
|
||||
def gather_files(dir, source_files, header_files):
|
||||
files = os.listdir(dir)
|
||||
files.sort()
|
||||
for fname in files:
|
||||
if fname in excluded_files:
|
||||
continue
|
||||
fpath = os.path.join(dir, fname)
|
||||
if os.path.isdir(fpath):
|
||||
gather_files(fpath, source_files, header_files)
|
||||
elif fname.endswith('.cpp') or fname.endswith('.c') or fname.endswith('.cc'):
|
||||
gather_file(fpath, source_files, header_files)
|
||||
|
||||
|
||||
def write_license(hfile):
|
||||
hfile.write("// See https://raw.githubusercontent.com/duckdb/duckdb/main/LICENSE for licensing information\n\n")
|
||||
|
||||
|
||||
def generate_amalgamation_splits(source_file, header_file, nsplits):
|
||||
# construct duckdb.hpp from these headers
|
||||
generate_duckdb_hpp(header_file)
|
||||
|
||||
# gather all files to read and write
|
||||
source_files = []
|
||||
header_files = []
|
||||
for compile_dir in compile_directories:
|
||||
if compile_dir != src_dir:
|
||||
continue
|
||||
gather_files(compile_dir, source_files, header_files)
|
||||
|
||||
# write duckdb-internal.hpp
|
||||
if '.hpp' in header_file:
|
||||
internal_header_file = header_file.replace('.hpp', '-internal.hpp')
|
||||
elif '.h' in header_file:
|
||||
internal_header_file = header_file.replace('.h', '-internal.h')
|
||||
else:
|
||||
raise "Unknown extension of header file"
|
||||
|
||||
temp_internal_header = internal_header_file + '.tmp'
|
||||
|
||||
with open_utf8(temp_internal_header, 'w+') as f:
|
||||
write_license(f)
|
||||
for hfile in header_files:
|
||||
f.write(hfile)
|
||||
|
||||
# count the total amount of bytes in the source files
|
||||
total_bytes = 0
|
||||
for sfile in source_files:
|
||||
total_bytes += len(sfile)
|
||||
|
||||
# now write the individual splits
|
||||
# we approximate the splitting up by making every file have roughly the same amount of bytes
|
||||
split_bytes = total_bytes / nsplits
|
||||
current_bytes = 0
|
||||
partitions = []
|
||||
partition_names = []
|
||||
current_partition = []
|
||||
current_partition_idx = 1
|
||||
for sfile in source_files:
|
||||
current_partition.append(sfile)
|
||||
current_bytes += len(sfile)
|
||||
if current_bytes >= split_bytes:
|
||||
partition_names.append(str(current_partition_idx))
|
||||
partitions.append(current_partition)
|
||||
current_partition = []
|
||||
current_bytes = 0
|
||||
current_partition_idx += 1
|
||||
if len(current_partition) > 0:
|
||||
partition_names.append(str(current_partition_idx))
|
||||
partitions.append(current_partition)
|
||||
current_partition = []
|
||||
current_bytes = 0
|
||||
# generate partitions from the third party libraries
|
||||
for compile_dir in compile_directories:
|
||||
if compile_dir != src_dir:
|
||||
partition_names.append(compile_dir.split(os.sep)[-1])
|
||||
partitions.append(write_dir(compile_dir))
|
||||
|
||||
header_file_name = header_file.split(os.sep)[-1]
|
||||
internal_header_file_name = internal_header_file.split(os.sep)[-1]
|
||||
|
||||
partition_fnames = []
|
||||
current_partition = 0
|
||||
for partition in partitions:
|
||||
partition_name = source_file.replace('.cpp', '-%s.cpp' % (partition_names[current_partition],))
|
||||
temp_partition_name = partition_name + '.tmp'
|
||||
partition_fnames.append([partition_name, temp_partition_name])
|
||||
with open_utf8(temp_partition_name, 'w+') as f:
|
||||
write_license(f)
|
||||
f.write('#include "%s"\n#include "%s"' % (header_file_name, internal_header_file_name))
|
||||
f.write(
|
||||
'''
|
||||
#ifndef DUCKDB_AMALGAMATION
|
||||
#error header mismatch
|
||||
#endif
|
||||
'''
|
||||
)
|
||||
for sfile in partition:
|
||||
f.write(sfile)
|
||||
current_partition += 1
|
||||
|
||||
copy_if_different(temp_header, header_file)
|
||||
copy_if_different(temp_internal_header, internal_header_file)
|
||||
try:
|
||||
os.remove(temp_header)
|
||||
os.remove(temp_internal_header)
|
||||
except:
|
||||
pass
|
||||
for p in partition_fnames:
|
||||
copy_if_different(p[1], p[0])
|
||||
try:
|
||||
os.remove(p[1])
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def list_include_dirs():
|
||||
return include_paths
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
nsplits = 1
|
||||
for arg in sys.argv:
|
||||
if arg == '--linenumbers':
|
||||
linenumbers = True
|
||||
elif arg == '--no-linenumbers':
|
||||
linenumbers = False
|
||||
elif arg.startswith('--header='):
|
||||
header_file = os.path.join(*arg.split('=', 1)[1].split('/'))
|
||||
elif arg.startswith('--source='):
|
||||
source_file = os.path.join(*arg.split('=', 1)[1].split('/'))
|
||||
elif arg.startswith('--splits='):
|
||||
nsplits = int(arg.split('=', 1)[1])
|
||||
elif arg.startswith('--list-sources'):
|
||||
file_list = list_sources()
|
||||
print('\n'.join(file_list))
|
||||
exit(1)
|
||||
elif arg.startswith('--list-objects'):
|
||||
file_list = list_sources()
|
||||
print(' '.join([x.rsplit('.', 1)[0] + '.o' for x in file_list]))
|
||||
exit(1)
|
||||
elif arg.startswith('--includes'):
|
||||
include_dirs = list_include_dirs()
|
||||
print(' '.join(['-I' + x for x in include_dirs]))
|
||||
exit(1)
|
||||
elif arg.startswith('--include-directories'):
|
||||
include_dirs = list_include_dirs()
|
||||
print('\n'.join(include_dirs))
|
||||
exit(1)
|
||||
if os.path.exists(amal_dir):
|
||||
shutil.rmtree(amal_dir)
|
||||
os.makedirs(amal_dir)
|
||||
|
||||
if nsplits > 1:
|
||||
generate_amalgamation_splits(source_file, header_file, nsplits)
|
||||
else:
|
||||
generate_amalgamation(source_file, header_file)
|
||||
68
external/duckdb/scripts/append_metadata.cmake
vendored
Normal file
68
external/duckdb/scripts/append_metadata.cmake
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
cmake_minimum_required(VERSION 3.15...3.29)
|
||||
|
||||
# Usage: cmake -DEXTENSION=path/to/extension.duckdb_extension -DPLATFORM_FILE=README.md -DDUCKDB_VERSION=tag1 -DEXTENSION_VERSION=tag2 -P scripts/append_metadata.cmake
|
||||
# Currently hardcoded to host up to 8 fields
|
||||
# Example: ./scripts/append_metadata.sh file.duckdb_extension git_hash_duckdb_file git_hash_extension_file platfrom_file
|
||||
|
||||
set(EXTENSION "" CACHE PATH "Path to the extension where to add metadata")
|
||||
set(NULL_FILE "" CACHE PATH "Path to file containing a single 0 byte")
|
||||
set(META1 "4" CACHE STRING "Metadata field" FORCE)
|
||||
set(PLATFORM_FILE "" CACHE PATH "Metadata field: path of file containing duckdb_platform")
|
||||
set(VERSION_FIELD "" CACHE STRING "Metadata field: path of file containing duckdb_version")
|
||||
set(EXTENSION_VERSION "" CACHE STRING "Metadata field: path of file containing extension_version")
|
||||
set(ABI_TYPE "" CACHE STRING "Metadata field: the ABI type of the extension")
|
||||
set(META6 "" CACHE STRING "Metadata field")
|
||||
set(META7 "" CACHE STRING "Metadata field")
|
||||
set(META8 "" CACHE STRING "Metadata field")
|
||||
|
||||
# null.txt should contain exactly 1 byte of value \x00
|
||||
file(READ "${NULL_FILE}" EMPTY_BYTE)
|
||||
|
||||
string(REPEAT "${EMPTY_BYTE}" 32 EMPTY_32)
|
||||
string(REPEAT "${EMPTY_BYTE}" 256 EMPTY_256)
|
||||
|
||||
# 0 for custom section
|
||||
string(APPEND CUSTOM_SECTION "${EMPTY_BYTE}")
|
||||
# 213 in hex = 531 in decimal, total length of what follows (1 + 16 + 2 + 8x32 + 256)
|
||||
# [1(continuation) + 0010011(payload) = \x93 -> 147, 0(continuation) + 10(payload) = \x04 -> 4]
|
||||
# 10 in hex = 16 in decimal, length of name, 1 byte
|
||||
string(ASCII 147 4 16 CUSTOM_SECTION_2)
|
||||
string(APPEND CUSTOM_SECTION "${CUSTOM_SECTION_2}")
|
||||
|
||||
# the name of the WebAssembly custom section, 16 bytes
|
||||
string(APPEND CUSTOM_SECTION "duckdb_signature")
|
||||
|
||||
# 1000 in hex, 512 in decimal
|
||||
# [1(continuation) + 0000000(payload) = -> 128, 0(continuation) + 100(payload) -> 4],
|
||||
# for a grand total of 2 bytes
|
||||
string(ASCII 128 4 CUSTOM_SECTION_3)
|
||||
string(APPEND CUSTOM_SECTION "${CUSTOM_SECTION_3}")
|
||||
|
||||
# Second metadata-field is special, since content comes from a file
|
||||
file(READ "${PLATFORM_FILE}" META2)
|
||||
|
||||
# Build METADATAx variable by appending and then truncating empty strings
|
||||
string(SUBSTRING "${META1}${EMPTY_32}" 0 32 METADATA1)
|
||||
string(SUBSTRING "${META2}${EMPTY_32}" 0 32 METADATA2)
|
||||
string(SUBSTRING "${VERSION_FIELD}${EMPTY_32}" 0 32 METADATA3)
|
||||
string(SUBSTRING "${EXTENSION_VERSION}${EMPTY_32}" 0 32 METADATA4)
|
||||
string(SUBSTRING "${ABI_TYPE}${EMPTY_32}" 0 32 METADATA5)
|
||||
string(SUBSTRING "${META6}${EMPTY_32}" 0 32 METADATA6)
|
||||
string(SUBSTRING "${META7}${EMPTY_32}" 0 32 METADATA7)
|
||||
string(SUBSTRING "${META8}${EMPTY_32}" 0 32 METADATA8)
|
||||
|
||||
# Append metadata fields, backwards
|
||||
string(APPEND CUSTOM_SECTION "${METADATA8}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA7}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA6}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA5}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA4}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA3}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA2}")
|
||||
string(APPEND CUSTOM_SECTION "${METADATA1}")
|
||||
|
||||
# Append signature (yet to be computed)
|
||||
string(APPEND CUSTOM_SECTION "${EMPTY_256}")
|
||||
|
||||
# Append generated custom section to the extension
|
||||
file(APPEND "${EXTENSION}" "${CUSTOM_SECTION}")
|
||||
99
external/duckdb/scripts/apply_extension_patches.py
vendored
Normal file
99
external/duckdb/scripts/apply_extension_patches.py
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import glob
|
||||
import subprocess
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
# Get the directory and construct the patch file pattern
|
||||
directory = sys.argv[1]
|
||||
patch_pattern = f"{directory}*.patch"
|
||||
|
||||
# Find patch files matching the pattern
|
||||
patches = glob.glob(patch_pattern)
|
||||
|
||||
|
||||
def raise_error(error_msg):
|
||||
sys.stderr.write(error_msg + '\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
patches = sorted(os.listdir(directory))
|
||||
for patch in patches:
|
||||
if not patch.endswith('.patch'):
|
||||
raise_error(
|
||||
f'Patch file {patch} found in directory {directory} does not end in ".patch" - rename the patch file'
|
||||
)
|
||||
|
||||
|
||||
# Exit if no patches are found
|
||||
if not patches:
|
||||
error_message = (
|
||||
f"\nERROR: Extension patching enabled, but no patches found in '{directory}'. "
|
||||
"Please make sure APPLY_PATCHES is only enabled when there are actually patches present. "
|
||||
"See .github/patches/extensions/README.md for more details."
|
||||
)
|
||||
raise_error(error_message)
|
||||
|
||||
|
||||
current_dir = os.getcwd()
|
||||
print(f"Applying patches at '{current_dir}'")
|
||||
print(f"Resetting patches in {directory}\n")
|
||||
|
||||
# capture the current diff
|
||||
diff_proc = subprocess.run(["git", "diff"], capture_output=True, check=True)
|
||||
prev_diff = diff_proc.stdout
|
||||
|
||||
output_proc = subprocess.run(["git", "diff", "--numstat"], capture_output=True, check=True)
|
||||
prev_output_lines = output_proc.stdout.decode('utf8').split('\n')
|
||||
prev_output_lines.sort()
|
||||
|
||||
subprocess.run(["git", "clean", "-f"], check=True)
|
||||
subprocess.run(["git", "reset", "--hard", "HEAD"], check=True)
|
||||
|
||||
|
||||
def apply_patch(patch_file):
|
||||
ARGUMENTS = ["patch", "-p1", "--forward", "-i"]
|
||||
arguments = []
|
||||
arguments.extend(ARGUMENTS)
|
||||
arguments.append(patch_file)
|
||||
try:
|
||||
subprocess.run(arguments, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
except subprocess.CalledProcessError as e:
|
||||
arguments[1:1] = ['-d', current_dir]
|
||||
command = " ".join(arguments)
|
||||
print(f"Failed to apply patch, command to reproduce locally:\n{command}")
|
||||
print("\nError output:")
|
||||
print(e.stderr.decode('utf-8'))
|
||||
print("\nStandard output:")
|
||||
print(e.stdout.decode('utf-8'))
|
||||
print("Exiting")
|
||||
exit(1)
|
||||
|
||||
|
||||
# Apply each patch file using patch
|
||||
for patch in patches:
|
||||
print(f"Applying patch: {patch}\n")
|
||||
apply_patch(os.path.join(directory, patch))
|
||||
|
||||
# all patches have applied - check the current diff
|
||||
output_proc = subprocess.run(["git", "diff", "--numstat"], capture_output=True, check=True)
|
||||
output_lines = output_proc.stdout.decode('utf8').split('\n')
|
||||
output_lines.sort()
|
||||
|
||||
if len(output_lines) <= len(prev_output_lines) and prev_output_lines != output_lines:
|
||||
print("Detected local changes - rolling back patch application")
|
||||
|
||||
subprocess.run(["git", "clean", "-f"], check=True)
|
||||
subprocess.run(["git", "reset", "--hard", "HEAD"], check=True)
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
f.write(prev_diff)
|
||||
apply_patch(f.name)
|
||||
|
||||
print("--------------------------------------------------")
|
||||
print("Generate a patch file using the following command:")
|
||||
print("--------------------------------------------------")
|
||||
print(f"(cd {os.getcwd()} && git diff > {os.path.join(directory, 'fix.patch')})")
|
||||
print("--------------------------------------------------")
|
||||
|
||||
exit(1)
|
||||
126
external/duckdb/scripts/asset-upload-gha.py
vendored
Normal file
126
external/duckdb/scripts/asset-upload-gha.py
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
api_url = 'https://api.github.com/repos/duckdb/duckdb/'
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: [filename1] [filename2] ... ")
|
||||
exit(1)
|
||||
|
||||
# this essentially should run on release tag builds to fill up release assets and master
|
||||
|
||||
repo = os.getenv("GITHUB_REPOSITORY", "")
|
||||
if repo != "duckdb/duckdb":
|
||||
print("Not running on forks. Exiting.")
|
||||
exit(0)
|
||||
|
||||
ref = os.getenv("GITHUB_REF", '') # this env var is always present just not always used
|
||||
if ref == 'refs/heads/main':
|
||||
print("Not running on main branch. Exiting.")
|
||||
exit(0)
|
||||
elif ref.startswith('refs/tags/'):
|
||||
tag = ref.replace('refs/tags/', '')
|
||||
else:
|
||||
print("Not running on branches. Exiting.")
|
||||
exit(0)
|
||||
|
||||
|
||||
print("Running on tag %s" % tag)
|
||||
|
||||
|
||||
token = os.getenv("GH_TOKEN", "")
|
||||
if token == "":
|
||||
raise ValueError('need a GitHub token in GH_TOKEN')
|
||||
|
||||
|
||||
def internal_gh_api(suburl, filename='', method='GET'):
|
||||
url = api_url + suburl
|
||||
headers = {"Content-Type": "application/json", 'Authorization': 'token ' + token}
|
||||
|
||||
body_data = b''
|
||||
raw_resp = None
|
||||
if len(filename) > 0:
|
||||
method = 'POST'
|
||||
body_data = open(filename, 'rb')
|
||||
headers["Content-Type"] = "binary/octet-stream"
|
||||
headers["Content-Length"] = os.path.getsize(local_filename)
|
||||
url = suburl # cough
|
||||
|
||||
req = urllib.request.Request(url, body_data, headers)
|
||||
req.get_method = lambda: method
|
||||
print(f'GH API URL: "{url}" Filename: "{filename}" Method: "{method}"')
|
||||
raw_resp = urllib.request.urlopen(req).read().decode()
|
||||
|
||||
if method != 'DELETE':
|
||||
return json.loads(raw_resp)
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
def gh_api(suburl, filename='', method='GET'):
|
||||
timeout = 1
|
||||
nretries = 10
|
||||
success = False
|
||||
for i in range(nretries + 1):
|
||||
try:
|
||||
response = internal_gh_api(suburl, filename, method)
|
||||
success = True
|
||||
except urllib.error.HTTPError as e:
|
||||
print(e.read().decode()) # gah
|
||||
except Exception as e:
|
||||
print(e)
|
||||
if success:
|
||||
break
|
||||
print(f"Failed upload, retrying in {timeout} seconds... ({i}/{nretries})")
|
||||
time.sleep(timeout)
|
||||
timeout = timeout * 2
|
||||
if not success:
|
||||
raise Exception("Failed to open URL " + suburl)
|
||||
return response
|
||||
|
||||
|
||||
# check if tag exists
|
||||
resp = gh_api('git/ref/tags/%s' % tag)
|
||||
if 'object' not in resp or 'sha' not in resp['object']: # or resp['object']['sha'] != sha
|
||||
raise ValueError('tag %s not found' % tag)
|
||||
|
||||
resp = gh_api('releases/tags/%s' % tag)
|
||||
if 'id' not in resp or 'upload_url' not in resp:
|
||||
raise ValueError('release does not exist for tag ' % tag)
|
||||
|
||||
|
||||
# double-check that release exists and has correct sha
|
||||
# disabled to not spam people watching releases
|
||||
# if 'id' not in resp or 'upload_url' not in resp or 'target_commitish' not in resp or resp['target_commitish'] != sha:
|
||||
# raise ValueError('release does not point to requested commit %s' % sha)
|
||||
|
||||
# TODO this could be a paged response!
|
||||
assets = gh_api('releases/%s/assets' % resp['id'])
|
||||
|
||||
upload_url = resp['upload_url'].split('{')[0] # gah
|
||||
files = sys.argv[1:]
|
||||
for filename in files:
|
||||
if '=' in filename:
|
||||
parts = filename.split("=")
|
||||
asset_filename = parts[0]
|
||||
paths = glob.glob(parts[1])
|
||||
if len(paths) != 1:
|
||||
raise ValueError("Could not find file for pattern %s" % parts[1])
|
||||
local_filename = paths[0]
|
||||
else:
|
||||
asset_filename = os.path.basename(filename)
|
||||
local_filename = filename
|
||||
|
||||
# delete if present
|
||||
for asset in assets:
|
||||
if asset['name'] == asset_filename:
|
||||
gh_api('releases/assets/%s' % asset['id'], method='DELETE')
|
||||
|
||||
resp = gh_api(f'{upload_url}?name={asset_filename}', filename=local_filename)
|
||||
if 'id' not in resp:
|
||||
raise ValueError('upload failed :/ ' + str(resp))
|
||||
print("%s -> %s" % (local_filename, resp['browser_download_url']))
|
||||
106
external/duckdb/scripts/asset-upload.py
vendored
Normal file
106
external/duckdb/scripts/asset-upload.py
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
import mimetypes
|
||||
import urllib.request
|
||||
|
||||
api_url = 'https://api.github.com/repos/duckdb/duckdb/'
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: [filename1] [filename2] ... ")
|
||||
exit(1)
|
||||
|
||||
# this essentially should run on release tag builds to fill up release assets and main
|
||||
|
||||
pr = os.getenv("TRAVIS_PULL_REQUEST", "")
|
||||
if pr != "false":
|
||||
print("Not running on PRs. Exiting.")
|
||||
exit(0)
|
||||
|
||||
tag = os.getenv("TRAVIS_TAG", '') # this env var is always present just not always used
|
||||
if tag == '':
|
||||
tag = 'main-builds'
|
||||
print("Running on tag %s" % tag)
|
||||
|
||||
if tag == "main-builds" and os.getenv("TRAVIS_BRANCH", "") != "main":
|
||||
print("Only running on main branch for %s tag. Exiting." % tag)
|
||||
exit(0)
|
||||
|
||||
|
||||
token = os.getenv("GH_TOKEN", "")
|
||||
if token == "":
|
||||
raise ValueError('need a GitHub token in GH_TOKEN')
|
||||
|
||||
|
||||
def gh_api(suburl, filename='', method='GET'):
|
||||
url = api_url + suburl
|
||||
headers = {"Content-Type": "application/json", 'Authorization': 'token ' + token}
|
||||
|
||||
body_data = b''
|
||||
|
||||
if len(filename) > 0:
|
||||
method = 'POST'
|
||||
body_data = open(filename, 'rb')
|
||||
|
||||
mime_type = mimetypes.guess_type(local_filename)[0]
|
||||
if mime_type is None:
|
||||
mime_type = "application/octet-stream"
|
||||
headers["Content-Type"] = mime_type
|
||||
headers["Content-Length"] = os.path.getsize(local_filename)
|
||||
|
||||
url = suburl # cough
|
||||
|
||||
req = urllib.request.Request(url, body_data, headers)
|
||||
req.get_method = lambda: method
|
||||
try:
|
||||
raw_resp = urllib.request.urlopen(req).read().decode()
|
||||
except urllib.error.HTTPError as e:
|
||||
raw_resp = e.read().decode() # gah
|
||||
|
||||
if method != 'DELETE':
|
||||
return json.loads(raw_resp)
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
# check if tag exists
|
||||
resp = gh_api('git/ref/tags/%s' % tag)
|
||||
if 'object' not in resp or 'sha' not in resp['object']: # or resp['object']['sha'] != sha
|
||||
raise ValueError('tag %s not found' % tag)
|
||||
|
||||
resp = gh_api('releases/tags/%s' % tag)
|
||||
if 'id' not in resp or 'upload_url' not in resp:
|
||||
raise ValueError('release does not exist for tag ' % tag)
|
||||
|
||||
# double-check that release exists and has correct sha
|
||||
# disabled to not spam people watching releases
|
||||
# if 'id' not in resp or 'upload_url' not in resp or 'target_commitish' not in resp or resp['target_commitish'] != sha:
|
||||
# raise ValueError('release does not point to requested commit %s' % sha)
|
||||
|
||||
# TODO this could be a paged response!
|
||||
assets = gh_api('releases/%s/assets' % resp['id'])
|
||||
|
||||
upload_url = resp['upload_url'].split('{')[0] # gah
|
||||
files = sys.argv[1:]
|
||||
for filename in files:
|
||||
if '=' in filename:
|
||||
parts = filename.split("=")
|
||||
asset_filename = parts[0]
|
||||
paths = glob.glob(parts[1])
|
||||
if len(paths) != 1:
|
||||
raise ValueError("Could not find file for pattern %s" % local_filename)
|
||||
local_filename = paths[0]
|
||||
else:
|
||||
asset_filename = os.path.basename(filename)
|
||||
local_filename = filename
|
||||
|
||||
# delete if present
|
||||
for asset in assets:
|
||||
if asset['name'] == asset_filename:
|
||||
gh_api('releases/assets/%s' % asset['id'], method='DELETE')
|
||||
|
||||
resp = gh_api(upload_url + '?name=%s' % asset_filename, filename=local_filename)
|
||||
if 'id' not in resp:
|
||||
raise ValueError('upload failed :/ ' + str(resp))
|
||||
print("%s -> %s" % (local_filename, resp['browser_download_url']))
|
||||
28
external/duckdb/scripts/build_peg_grammar.sh
vendored
Executable file
28
external/duckdb/scripts/build_peg_grammar.sh
vendored
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# Print each command before executing (optional, for debug)
|
||||
# set -x
|
||||
|
||||
# Activate virtual environment
|
||||
if [[ -d ".venv" ]]; then
|
||||
source .venv/bin/activate
|
||||
else
|
||||
echo "Error: .venv directory not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run grammar inlining with and without argument
|
||||
GRAMMAR_FILE="extension/autocomplete/inline_grammar.py"
|
||||
if [[ ! -f "$GRAMMAR_FILE" ]]; then
|
||||
echo "Error: $GRAMMAR_FILE not found"
|
||||
deactivate
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python "$GRAMMAR_FILE" --grammar-file
|
||||
python "$GRAMMAR_FILE"
|
||||
|
||||
echo "Successfully build PEG grammar files"
|
||||
|
||||
# Deactivate virtual environment
|
||||
deactivate
|
||||
63
external/duckdb/scripts/cancel_workflows.py
vendored
Normal file
63
external/duckdb/scripts/cancel_workflows.py
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
import subprocess
|
||||
import duckdb
|
||||
import os
|
||||
import pandas as pd
|
||||
import argparse
|
||||
from io import StringIO
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Cancel all workflows related to a PR.')
|
||||
parser.add_argument(
|
||||
'--title',
|
||||
dest='title',
|
||||
action='store',
|
||||
help='The title of the PR for which we want to rerun workflows (or part of the title) - or "master" for all pushes',
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--repo', dest='repo', action='store', help='The repository to run this workflow on', default='duckdb/duckdb'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--max_workflows',
|
||||
dest='max_workflows',
|
||||
action='store',
|
||||
help='The maximum number of workflows to look at (starting from the latest)',
|
||||
default=200,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
nlimit = args.max_workflows
|
||||
query = args.title
|
||||
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
'gh',
|
||||
'run',
|
||||
'-R',
|
||||
args.repo,
|
||||
'list',
|
||||
'--json',
|
||||
'displayTitle,databaseId,status,conclusion,headSha,event',
|
||||
f'--limit={nlimit}',
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
text = proc.stdout.read().decode('utf8')
|
||||
df = pd.read_json(StringIO(text))
|
||||
|
||||
if query == 'master':
|
||||
result = duckdb.query(
|
||||
f"select databaseId from df WHERE status IN ('queued', 'in_progress') AND event='push'"
|
||||
).fetchall()
|
||||
else:
|
||||
result = duckdb.query(
|
||||
f"select databaseId from df WHERE status IN ('queued', 'in_progress') AND displayTitle LIKE '%{query}%'"
|
||||
).fetchall()
|
||||
if len(result) == 0:
|
||||
print(
|
||||
f"No workflows found in the latest {nlimit} workflows that contain the text {query}.\nPerhaps try running with a higher --max_workflows parameter?"
|
||||
)
|
||||
exit(1)
|
||||
for databaseId in [x[0] for x in result]:
|
||||
os.system(f'gh run -R {args.repo} cancel {databaseId}')
|
||||
33
external/duckdb/scripts/check-issue-for-code-formatting.py
vendored
Normal file
33
external/duckdb/scripts/check-issue-for-code-formatting.py
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
post_text = sys.stdin.read()
|
||||
|
||||
sql_keyword_list = ["select", "from", "where", "join", "group by", "order by", "having", "with recursive", "union"]
|
||||
sql_keyword_regex = f"({'|'.join(sql_keyword_list)})"
|
||||
|
||||
sql_keywords = len(re.findall(rf"{sql_keyword_regex}", post_text, flags=re.MULTILINE | re.IGNORECASE))
|
||||
|
||||
backticked_code_blocks = len(re.findall(r"^```", post_text))
|
||||
|
||||
indented_sql_code_lines = len(re.findall(r"^{sql_keyword_regex}", post_text, flags=re.MULTILINE | re.IGNORECASE))
|
||||
indented_python_code_lines = len(re.findall(r"^ (import|duckdb)", post_text, flags=re.MULTILINE | re.IGNORECASE))
|
||||
indented_r_code_lines = len(re.findall(r"^ (library|dbExecute)", post_text, flags=re.MULTILINE | re.IGNORECASE))
|
||||
indented_hashbang_code_lines = len(re.findall(r"^ #!", post_text, flags=re.MULTILINE | re.IGNORECASE))
|
||||
|
||||
indented_code_lines = indented_sql_code_lines + indented_python_code_lines + indented_r_code_lines
|
||||
inline_code_snippets = len(re.findall(r"`", post_text)) // 2
|
||||
|
||||
print("Metrics computed by 'check-issue-for-code-formatting.py':")
|
||||
print(f"- {sql_keywords} SQL keyword(s)")
|
||||
print(f"- {backticked_code_blocks} backticked code block(s)")
|
||||
print(
|
||||
f"- {indented_code_lines} indented code line(s): {indented_sql_code_lines} SQL, {indented_python_code_lines} Python, {indented_r_code_lines} R, {indented_hashbang_code_lines} hashbangs"
|
||||
)
|
||||
print(f"- {inline_code_snippets} inline code snippet(s)")
|
||||
|
||||
if sql_keywords > 2 and backticked_code_blocks == 0 and indented_code_lines == 0 and inline_code_snippets == 0:
|
||||
print("The post is likely not properly formatted.")
|
||||
exit(1)
|
||||
else:
|
||||
print("The post is likely properly formatted.")
|
||||
129
external/duckdb/scripts/check_coverage.py
vendored
Normal file
129
external/duckdb/scripts/check_coverage.py
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
import argparse
|
||||
import os
|
||||
import math
|
||||
import re
|
||||
|
||||
parser = argparse.ArgumentParser(description='Check code coverage results')
|
||||
|
||||
parser.add_argument(
|
||||
'--uncovered_files',
|
||||
action='store',
|
||||
help='Set of files that are not 100% covered',
|
||||
default=os.path.join(".github", "config", "uncovered_files.csv"),
|
||||
)
|
||||
parser.add_argument('--directory', help='Directory of generated HTML files', action='store', default='coverage_html')
|
||||
parser.add_argument('--fix', help='Fill up the uncovered_files.csv with all files', action='store_true', default=False)
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
if not os.path.exists(args.directory):
|
||||
print(f"The provided directory ({args.directory}) does not exist, please create it first")
|
||||
exit(1)
|
||||
|
||||
covered_regex = (
|
||||
r'<a name="(\d+)">[ \t\n]*<span class="lineNum">[ \t\n0-9]+</span><span class="{COVERED_CLASS}">[ \t\n0-9]+:([^<]+)'
|
||||
)
|
||||
|
||||
|
||||
def get_original_path(path):
|
||||
return (
|
||||
path.replace('.gcov.html', '')
|
||||
.replace(os.getcwd(), '')
|
||||
.replace('coverage_html' + os.path.sep, '')
|
||||
.replace('home/runner/work/duckdb/duckdb/', '')
|
||||
)
|
||||
|
||||
|
||||
def cleanup_line(line):
|
||||
return line.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
|
||||
|
||||
|
||||
partial_coverage_dict = {}
|
||||
with open(args.uncovered_files, 'r') as f:
|
||||
for line in f.readlines():
|
||||
splits = line.split('\t')
|
||||
partial_coverage_dict[splits[0]] = int(splits[1].strip())
|
||||
|
||||
if args.fix:
|
||||
uncovered_file = open(args.uncovered_files, 'w+')
|
||||
|
||||
DASH_COUNT = 80
|
||||
total_difference = 0
|
||||
allowed_difference = 0
|
||||
|
||||
|
||||
def check_file(path, partial_coverage_dict):
|
||||
global any_failed
|
||||
global total_difference
|
||||
if not '.cpp' in path and not '.hpp' in path:
|
||||
# files are named [path].[ch]pp
|
||||
return
|
||||
if not '.html' in path:
|
||||
return
|
||||
with open(path, 'r') as f:
|
||||
text = f.read()
|
||||
original_path = get_original_path(path)
|
||||
uncovered_lines = re.findall(covered_regex.replace('{COVERED_CLASS}', 'lineNoCov'), text)
|
||||
covered_lines = re.findall(covered_regex.replace('{COVERED_CLASS}', 'lineCov'), text)
|
||||
|
||||
total_lines = len(uncovered_lines) + len(covered_lines)
|
||||
if total_lines == 0:
|
||||
# no lines to cover - skip
|
||||
return
|
||||
|
||||
coverage_percentage = round(len(covered_lines) / (total_lines) * 100, 2)
|
||||
expected_uncovered_lines = 0
|
||||
if original_path in partial_coverage_dict:
|
||||
expected_uncovered_lines = partial_coverage_dict[original_path]
|
||||
if args.fix:
|
||||
if expected_uncovered_lines == 0 and len(uncovered_lines) == 0:
|
||||
return
|
||||
expected_uncovered = max(expected_uncovered_lines, len(uncovered_lines) + 1)
|
||||
uncovered_file.write(f'{original_path}\t{expected_uncovered}\n')
|
||||
return
|
||||
|
||||
if len(uncovered_lines) > expected_uncovered_lines:
|
||||
total_difference += len(uncovered_lines) - expected_uncovered_lines
|
||||
|
||||
print("-" * DASH_COUNT)
|
||||
print(f"Coverage failure in file {original_path}")
|
||||
print("-" * DASH_COUNT)
|
||||
print(f"Coverage percentage: {coverage_percentage}%")
|
||||
print(f"Uncovered lines: {len(uncovered_lines)}")
|
||||
print(f"Covered lines: {len(covered_lines)}")
|
||||
print("-" * DASH_COUNT)
|
||||
print(f"Expected uncovered lines: {expected_uncovered_lines}")
|
||||
print("-" * DASH_COUNT)
|
||||
print("Uncovered lines")
|
||||
print("-" * DASH_COUNT)
|
||||
for e in uncovered_lines:
|
||||
print(e[0] + ' ' * 8 + cleanup_line(e[1]))
|
||||
|
||||
|
||||
def scan_directory(path):
|
||||
file_list = []
|
||||
if os.path.isfile(path):
|
||||
file_list.append(path)
|
||||
else:
|
||||
files = os.listdir(path)
|
||||
for file in files:
|
||||
file_list += scan_directory(os.path.join(path, file))
|
||||
return file_list
|
||||
|
||||
|
||||
files = scan_directory(args.directory)
|
||||
files.sort()
|
||||
|
||||
for file in files:
|
||||
check_file(file, partial_coverage_dict)
|
||||
|
||||
if args.fix:
|
||||
uncovered_file.close()
|
||||
|
||||
if total_difference > allowed_difference:
|
||||
exit(1)
|
||||
elif total_difference > 0:
|
||||
print("-" * DASH_COUNT)
|
||||
print("SUCCESS-ish")
|
||||
print("-" * DASH_COUNT)
|
||||
print(f"{total_difference} lines were uncovered but this falls within the margin of {allowed_difference}")
|
||||
369
external/duckdb/scripts/clang-tidy-diff.py
vendored
Normal file
369
external/duckdb/scripts/clang-tidy-diff.py
vendored
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# ===- clang-tidy-diff.py - ClangTidy Diff Checker -----------*- python -*--===#
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
# ===-----------------------------------------------------------------------===#
|
||||
|
||||
r"""
|
||||
ClangTidy Diff Checker
|
||||
======================
|
||||
|
||||
This script reads input from a unified diff, runs clang-tidy on all changed
|
||||
files and outputs clang-tidy warnings in changed lines only. This is useful to
|
||||
detect clang-tidy regressions in the lines touched by a specific patch.
|
||||
Example usage for git/svn users:
|
||||
|
||||
git diff -U0 HEAD^ | clang-tidy-diff.py -p1
|
||||
svn diff --diff-cmd=diff -x-U0 | \
|
||||
clang-tidy-diff.py -fix -checks=-*,modernize-use-override
|
||||
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
is_py2 = sys.version[0] == "2"
|
||||
|
||||
if is_py2:
|
||||
import Queue as queue
|
||||
else:
|
||||
import queue as queue
|
||||
|
||||
|
||||
def run_tidy(task_queue, lock, timeout, failed_files):
|
||||
watchdog = None
|
||||
while True:
|
||||
command = task_queue.get()
|
||||
try:
|
||||
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
if timeout is not None:
|
||||
watchdog = threading.Timer(timeout, proc.kill)
|
||||
watchdog.start()
|
||||
|
||||
stdout, stderr = proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
if proc.returncode < 0:
|
||||
msg = "Terminated by signal %d : %s\n" % (
|
||||
-proc.returncode,
|
||||
" ".join(command),
|
||||
)
|
||||
stderr += msg.encode("utf-8")
|
||||
failed_files.append(command)
|
||||
|
||||
with lock:
|
||||
sys.stdout.write(stdout.decode("utf-8") + "\n")
|
||||
sys.stdout.flush()
|
||||
if stderr:
|
||||
sys.stderr.write(stderr.decode("utf-8") + "\n")
|
||||
sys.stderr.flush()
|
||||
except Exception as e:
|
||||
with lock:
|
||||
sys.stderr.write("Failed: " + str(e) + ": ".join(command) + "\n")
|
||||
finally:
|
||||
with lock:
|
||||
if not (timeout is None or watchdog is None):
|
||||
if not watchdog.is_alive():
|
||||
sys.stderr.write("Terminated by timeout: " + " ".join(command) + "\n")
|
||||
watchdog.cancel()
|
||||
task_queue.task_done()
|
||||
|
||||
|
||||
def start_workers(max_tasks, tidy_caller, arguments):
|
||||
for _ in range(max_tasks):
|
||||
t = threading.Thread(target=tidy_caller, args=arguments)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
|
||||
def merge_replacement_files(tmpdir, mergefile):
|
||||
"""Merge all replacement files in a directory into a single file"""
|
||||
# The fixes suggested by clang-tidy >= 4.0.0 are given under
|
||||
# the top level key 'Diagnostics' in the output yaml files
|
||||
mergekey = "Diagnostics"
|
||||
merged = []
|
||||
for replacefile in glob.iglob(os.path.join(tmpdir, "*.yaml")):
|
||||
content = yaml.safe_load(open(replacefile, "r"))
|
||||
if not content:
|
||||
continue # Skip empty files.
|
||||
merged.extend(content.get(mergekey, []))
|
||||
|
||||
if merged:
|
||||
# MainSourceFile: The key is required by the definition inside
|
||||
# include/clang/Tooling/ReplacementsYaml.h, but the value
|
||||
# is actually never used inside clang-apply-replacements,
|
||||
# so we set it to '' here.
|
||||
output = {"MainSourceFile": "", mergekey: merged}
|
||||
with open(mergefile, "w") as out:
|
||||
yaml.safe_dump(output, out)
|
||||
else:
|
||||
# Empty the file:
|
||||
open(mergefile, "w").close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run clang-tidy against changed files, and " "output diagnostics only for modified " "lines."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-clang-tidy-binary",
|
||||
metavar="PATH",
|
||||
default="clang-tidy",
|
||||
help="path to clang-tidy binary",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
metavar="NUM",
|
||||
default=0,
|
||||
help="strip the smallest prefix containing P slashes",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-regex",
|
||||
metavar="PATTERN",
|
||||
default=None,
|
||||
help="custom pattern selecting file paths to check " "(case sensitive, overrides -iregex)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-iregex",
|
||||
metavar="PATTERN",
|
||||
default=r".*\.(cpp|cc|c\+\+|cxx|c|cl|h|hpp|m|mm|inc)",
|
||||
help="custom pattern selecting file paths to check " "(case insensitive, overridden by -regex)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-j",
|
||||
type=int,
|
||||
default=1,
|
||||
help="number of tidy instances to be run in parallel.",
|
||||
)
|
||||
parser.add_argument("-timeout", type=int, default=None, help="timeout per each file in seconds.")
|
||||
parser.add_argument("-fix", action="store_true", default=False, help="apply suggested fixes")
|
||||
parser.add_argument(
|
||||
"-checks",
|
||||
help="checks filter, when not specified, use clang-tidy " "default",
|
||||
default="",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-config-file",
|
||||
dest="config_file",
|
||||
help="Specify the path of .clang-tidy or custom config file",
|
||||
default="",
|
||||
)
|
||||
parser.add_argument("-use-color", action="store_true", help="Use colors in output")
|
||||
parser.add_argument("-path", dest="build_path", help="Path used to read a compile command database.")
|
||||
if yaml:
|
||||
parser.add_argument(
|
||||
"-export-fixes",
|
||||
metavar="FILE_OR_DIRECTORY",
|
||||
dest="export_fixes",
|
||||
help="A directory or a yaml file to store suggested fixes in, "
|
||||
"which can be applied with clang-apply-replacements. If the "
|
||||
"parameter is a directory, the fixes of each compilation unit are "
|
||||
"stored in individual yaml files in the directory.",
|
||||
)
|
||||
else:
|
||||
parser.add_argument(
|
||||
"-export-fixes",
|
||||
metavar="DIRECTORY",
|
||||
dest="export_fixes",
|
||||
help="A directory to store suggested fixes in, which can be applied "
|
||||
"with clang-apply-replacements. The fixes of each compilation unit are "
|
||||
"stored in individual yaml files in the directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-extra-arg",
|
||||
dest="extra_arg",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Additional argument to append to the compiler " "command line.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-extra-arg-before",
|
||||
dest="extra_arg_before",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Additional argument to prepend to the compiler " "command line.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-quiet",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run clang-tidy in quiet mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-load",
|
||||
dest="plugins",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Load the specified plugin in clang-tidy.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-allow-no-checks",
|
||||
action="store_true",
|
||||
help="Allow empty enabled checks.",
|
||||
)
|
||||
|
||||
clang_tidy_args = []
|
||||
argv = sys.argv[1:]
|
||||
if "--" in argv:
|
||||
clang_tidy_args.extend(argv[argv.index("--") :])
|
||||
argv = argv[: argv.index("--")]
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
# Extract changed lines for each file.
|
||||
filename = None
|
||||
lines_by_file = {}
|
||||
for line in sys.stdin:
|
||||
match = re.search('^\\+\\+\\+\\ "?(.*?/){%s}([^ \t\n"]*)' % args.p, line)
|
||||
if match:
|
||||
filename = match.group(2)
|
||||
if filename is None:
|
||||
continue
|
||||
|
||||
if args.regex is not None:
|
||||
if not re.match("^%s$" % args.regex, filename):
|
||||
continue
|
||||
else:
|
||||
if not re.match("^%s$" % args.iregex, filename, re.IGNORECASE):
|
||||
continue
|
||||
|
||||
match = re.search(r"^@@.*\+(\d+)(,(\d+))?", line)
|
||||
if match:
|
||||
start_line = int(match.group(1))
|
||||
line_count = 1
|
||||
if match.group(3):
|
||||
line_count = int(match.group(3))
|
||||
if line_count == 0:
|
||||
continue
|
||||
end_line = start_line + line_count - 1
|
||||
lines_by_file.setdefault(filename, []).append([start_line, end_line])
|
||||
|
||||
if not any(lines_by_file):
|
||||
print("No relevant changes found.")
|
||||
sys.exit(0)
|
||||
|
||||
max_task_count = args.j
|
||||
if max_task_count == 0:
|
||||
max_task_count = multiprocessing.cpu_count()
|
||||
max_task_count = min(len(lines_by_file), max_task_count)
|
||||
|
||||
combine_fixes = False
|
||||
export_fixes_dir = None
|
||||
delete_fixes_dir = False
|
||||
if args.export_fixes is not None:
|
||||
# if a directory is given, create it if it does not exist
|
||||
if args.export_fixes.endswith(os.path.sep) and not os.path.isdir(args.export_fixes):
|
||||
os.makedirs(args.export_fixes)
|
||||
|
||||
if not os.path.isdir(args.export_fixes):
|
||||
if not yaml:
|
||||
raise RuntimeError(
|
||||
"Cannot combine fixes in one yaml file. Either install PyYAML or specify an output directory."
|
||||
)
|
||||
|
||||
combine_fixes = True
|
||||
|
||||
if os.path.isdir(args.export_fixes):
|
||||
export_fixes_dir = args.export_fixes
|
||||
|
||||
if combine_fixes:
|
||||
export_fixes_dir = tempfile.mkdtemp()
|
||||
delete_fixes_dir = True
|
||||
|
||||
# Tasks for clang-tidy.
|
||||
task_queue = queue.Queue(max_task_count)
|
||||
# A lock for console output.
|
||||
lock = threading.Lock()
|
||||
|
||||
# List of files with a non-zero return code.
|
||||
failed_files = []
|
||||
|
||||
# Run a pool of clang-tidy workers.
|
||||
start_workers(max_task_count, run_tidy, (task_queue, lock, args.timeout, failed_files))
|
||||
|
||||
# Form the common args list.
|
||||
common_clang_tidy_args = []
|
||||
if args.fix:
|
||||
common_clang_tidy_args.append("-fix")
|
||||
if args.checks != "":
|
||||
common_clang_tidy_args.append("-checks=" + args.checks)
|
||||
if args.config_file != "":
|
||||
common_clang_tidy_args.append("-config-file=" + args.config_file)
|
||||
if args.quiet:
|
||||
common_clang_tidy_args.append("-quiet")
|
||||
if args.build_path is not None:
|
||||
common_clang_tidy_args.append("-p=%s" % args.build_path)
|
||||
if args.use_color:
|
||||
common_clang_tidy_args.append("--use-color")
|
||||
if args.allow_no_checks:
|
||||
common_clang_tidy_args.append("--allow-no-checks")
|
||||
for arg in args.extra_arg:
|
||||
common_clang_tidy_args.append("-extra-arg=%s" % arg)
|
||||
for arg in args.extra_arg_before:
|
||||
common_clang_tidy_args.append("-extra-arg-before=%s" % arg)
|
||||
for plugin in args.plugins:
|
||||
common_clang_tidy_args.append("-load=%s" % plugin)
|
||||
|
||||
for name in lines_by_file:
|
||||
line_filter_json = json.dumps([{"name": name, "lines": lines_by_file[name]}], separators=(",", ":"))
|
||||
|
||||
# Run clang-tidy on files containing changes.
|
||||
command = [args.clang_tidy_binary]
|
||||
command.append("-line-filter=" + line_filter_json)
|
||||
if args.export_fixes is not None:
|
||||
# Get a temporary file. We immediately close the handle so clang-tidy can
|
||||
# overwrite it.
|
||||
(handle, tmp_name) = tempfile.mkstemp(suffix=".yaml", dir=export_fixes_dir)
|
||||
os.close(handle)
|
||||
command.append("-export-fixes=" + tmp_name)
|
||||
command.extend(common_clang_tidy_args)
|
||||
command.append(name)
|
||||
command.extend(clang_tidy_args)
|
||||
|
||||
task_queue.put(command)
|
||||
|
||||
# Application return code
|
||||
return_code = 0
|
||||
|
||||
# Wait for all threads to be done.
|
||||
task_queue.join()
|
||||
# Application return code
|
||||
return_code = 0
|
||||
if failed_files:
|
||||
return_code = 1
|
||||
|
||||
if combine_fixes:
|
||||
print("Writing fixes to " + args.export_fixes + " ...")
|
||||
try:
|
||||
merge_replacement_files(export_fixes_dir, args.export_fixes)
|
||||
except:
|
||||
sys.stderr.write("Error exporting fixes.\n")
|
||||
traceback.print_exc()
|
||||
return_code = 1
|
||||
|
||||
if delete_fixes_dir:
|
||||
shutil.rmtree(export_fixes_dir)
|
||||
sys.exit(return_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
19
external/duckdb/scripts/compute-extension-hash.sh
vendored
Executable file
19
external/duckdb/scripts/compute-extension-hash.sh
vendored
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -f hash_concats
|
||||
touch hash_concats
|
||||
|
||||
split -b 1M $1
|
||||
|
||||
FILES="x*"
|
||||
for f in $FILES
|
||||
do
|
||||
# sha256 a segment
|
||||
openssl dgst -binary -sha256 $f >> hash_concats
|
||||
rm $f
|
||||
done
|
||||
|
||||
# sha256 the concatenation
|
||||
openssl dgst -binary -sha256 hash_concats > hash_composite
|
||||
|
||||
cat hash_composite
|
||||
27
external/duckdb/scripts/coverage_check.sh
vendored
Executable file
27
external/duckdb/scripts/coverage_check.sh
vendored
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# prepare coverage file
|
||||
lcov --config-file .github/workflows/lcovrc --zerocounters --directory .
|
||||
lcov --config-file .github/workflows/lcovrc --capture --initial --directory . --base-directory . --no-external --output-file coverage.info
|
||||
|
||||
# build with coverage enabled
|
||||
mkdir -p build/coverage
|
||||
(cd build/coverage && cmake -E env CXXFLAGS="--coverage" cmake -DBUILD_EXTENSIONS="parquet;json;jemalloc;autocomplete;icu" -DENABLE_SANITIZER=0 -DCMAKE_BUILD_TYPE=Debug ../.. && cmake --build .)
|
||||
|
||||
# run tests
|
||||
build/coverage/test/unittest
|
||||
build/coverage/test/unittest "[detailed_profiler]"
|
||||
build/coverage/test/unittest test/sql/tpch/tpch_sf01.test_slow
|
||||
python3 -m pytest --shell-binary build/coverage/duckdb tools/shell/tests/
|
||||
|
||||
# finalize coverage file
|
||||
lcov --config-file .github/workflows/lcovrc --directory . --base-directory . --no-external --capture --output-file coverage.info
|
||||
lcov --config-file .github/workflows/lcovrc --remove coverage.info $(< .github/workflows/lcov_exclude) -o lcov.info
|
||||
|
||||
# generate coverage html
|
||||
genhtml -o coverage_html lcov.info
|
||||
|
||||
# check that coverage passes threshold
|
||||
# python3 scripts/check_coverage.py
|
||||
63
external/duckdb/scripts/create-release-notes.py
vendored
Normal file
63
external/duckdb/scripts/create-release-notes.py
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
import json, os, sys, glob, mimetypes, urllib.request, re
|
||||
|
||||
api_url = 'https://api.github.com/repos/duckdb/duckdb/'
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: [last_tag] ")
|
||||
exit(1)
|
||||
|
||||
|
||||
token = os.getenv("GH_TOKEN", "")
|
||||
if token == "":
|
||||
raise ValueError('need a GitHub token in GH_TOKEN')
|
||||
|
||||
|
||||
# amazingly this is the entire code of the pypy package `linkheader-parser`
|
||||
def extract(link_header):
|
||||
"""Extract links and their relations from a Link Header Field."""
|
||||
links = [l.strip() for l in link_header.split(',')]
|
||||
rels = {}
|
||||
pattern = r'<(?P<url>.*)>;\s*rel="(?P<rel>.*)"'
|
||||
for link in links:
|
||||
group_dict = re.match(pattern, link).groupdict()
|
||||
rels[group_dict['rel']] = group_dict['url']
|
||||
return rels
|
||||
|
||||
|
||||
def gh_api(suburl, full_url=''):
|
||||
if full_url == '':
|
||||
url = api_url + suburl
|
||||
else:
|
||||
url = full_url
|
||||
headers = {"Content-Type": "application/json", 'Authorization': 'token ' + token}
|
||||
|
||||
req = urllib.request.Request(url, b'', headers)
|
||||
req.get_method = lambda: 'GET'
|
||||
next_link = None
|
||||
try:
|
||||
resp = urllib.request.urlopen(req)
|
||||
if not resp.getheader("Link") is None:
|
||||
link_data = extract(resp.getheader("Link"))
|
||||
if "next" in link_data:
|
||||
next_link = link_data["next"]
|
||||
raw_resp = resp.read().decode()
|
||||
except urllib.error.HTTPError as e:
|
||||
raw_resp = e.read().decode() # gah
|
||||
|
||||
ret_json = json.loads(raw_resp)
|
||||
if next_link is not None:
|
||||
return ret_json + gh_api('', full_url=next_link)
|
||||
return ret_json
|
||||
|
||||
|
||||
# get time of tag
|
||||
old_release = gh_api('releases/tags/%s' % sys.argv[1])
|
||||
print(old_release["published_at"])
|
||||
|
||||
pulls = gh_api('pulls?base=main&state=closed')
|
||||
for p in pulls:
|
||||
if p["merged_at"] is None:
|
||||
continue
|
||||
if p["merged_at"] < old_release["published_at"]:
|
||||
continue
|
||||
print(" - #%s: %s" % (p["number"], p["title"]))
|
||||
43
external/duckdb/scripts/create_local_extension_repo.py
vendored
Normal file
43
external/duckdb/scripts/create_local_extension_repo.py
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
###
|
||||
# This script copies all extensions in a build folder from their cmake-produced structure into the extension repository
|
||||
# structure of ./<duckdb_version>/<build_archictecture>/<extension_name>.duckdb_extension
|
||||
# Note that it requires duckdb_platofrom_out file to be populated with the platform
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import glob
|
||||
import shutil
|
||||
|
||||
if len(sys.argv) != 6:
|
||||
print(
|
||||
"Usage: scripts/create_local_extension_repo.py <duckdb_version> <duckdb_platform_out> <path/to/duckdb/build> <path/to/local_repo> <postfix>"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
duckdb_version = sys.argv[1]
|
||||
duckdb_platform_out = sys.argv[2]
|
||||
extension_path = sys.argv[3]
|
||||
dst_path = sys.argv[4]
|
||||
postfix = sys.argv[5]
|
||||
|
||||
if os.name == 'nt':
|
||||
duckdb_platform_out = duckdb_platform_out.replace("/", "\\")
|
||||
extension_path = extension_path.replace("/", "\\")
|
||||
dst_path = dst_path.replace("/", "\\")
|
||||
|
||||
with open(duckdb_platform_out, 'r') as f:
|
||||
lines = f.readlines()
|
||||
duckdb_platform = lines[0]
|
||||
|
||||
# Create destination path
|
||||
dest_path = os.path.join(dst_path, duckdb_version, duckdb_platform)
|
||||
if not os.path.exists(dest_path):
|
||||
os.makedirs(dest_path)
|
||||
|
||||
# Now copy over the extensions to the correct path
|
||||
glob_string = os.path.join(extension_path, 'extension', '*', '*.' + postfix)
|
||||
|
||||
for file in glob.glob(glob_string):
|
||||
dest_file = os.path.join(dest_path, os.path.basename(file))
|
||||
shutil.copy(file, dest_file)
|
||||
147
external/duckdb/scripts/create_patch.py
vendored
Normal file
147
external/duckdb/scripts/create_patch.py
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
import os
|
||||
import argparse
|
||||
import sys
|
||||
import re
|
||||
import subprocess
|
||||
from typing import List, Dict
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(__file__)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate a patch file for a DuckDB extension.")
|
||||
|
||||
parser.add_argument(
|
||||
"repository_path",
|
||||
type=str,
|
||||
help="Path to the repository where the changes live that should be turned into a patch.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"extension_name",
|
||||
type=str,
|
||||
help="Name of the extension to patch, should match the name in `.github/config/extensions/<extension_name>.cmake`.",
|
||||
)
|
||||
|
||||
parser.add_argument("patch_name", type=str, help="Name for the patch file to create.")
|
||||
|
||||
parser.add_argument("--overwrite", action="store_true", help="Allow overwriting the patch file if it already exists.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def verify_git_tag():
|
||||
# Locate the cmake file to extract the GIT_TAG from
|
||||
cmake_path = Path(SCRIPT_DIR) / '..' / ".github" / "config" / "extensions" / f"{args.extension_name}.cmake"
|
||||
if not cmake_path.is_file():
|
||||
print(f"Error: Extension CMake file not found: {cmake_path}")
|
||||
sys.exit(1)
|
||||
|
||||
cmake_content = cmake_path.read_text()
|
||||
|
||||
# Extract GIT_TAG from the cmake file
|
||||
match = re.search(r"\bGIT_TAG\s+([^\s\)]+)", cmake_content)
|
||||
if not match:
|
||||
print(f"Error: Could not find GIT_TAG in {cmake_path}")
|
||||
sys.exit(1)
|
||||
|
||||
git_tag_in_cmake = match.group(1)
|
||||
|
||||
# Get the current commit hash in repository_path
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "HEAD"],
|
||||
cwd=args.repository_path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
current_commit = result.stdout.strip()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error: Failed to run git in {args.repository_path} — {e.stderr.strip()}")
|
||||
sys.exit(1)
|
||||
|
||||
# Compare the tags
|
||||
if git_tag_in_cmake != current_commit:
|
||||
print(
|
||||
f"Error: GIT_TAG in {cmake_path} is {git_tag_in_cmake}, "
|
||||
f"but repository {args.repository_path} is checked out at {current_commit}."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def create_patch():
|
||||
# Collect changes with git diff
|
||||
try:
|
||||
diff_result = subprocess.run(
|
||||
["git", "diff", "--ignore-submodules"],
|
||||
cwd=args.repository_path,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error: Failed to run git diff — {e.stderr.strip()}")
|
||||
sys.exit(1)
|
||||
|
||||
new_patch_content = diff_result.stdout
|
||||
if not new_patch_content.strip():
|
||||
print("⚠️ No changes detected in repository; no patch will be created.")
|
||||
sys.exit(0)
|
||||
|
||||
def parse_patch_files_and_lines(patch_text):
|
||||
changes = {}
|
||||
current_file = None
|
||||
for line in patch_text.splitlines():
|
||||
if line.startswith("diff --git"):
|
||||
parts = line.split()
|
||||
if len(parts) >= 3:
|
||||
# Format: diff --git a/file b/file
|
||||
current_file = parts[2][2:] # remove 'a/'
|
||||
changes.setdefault(current_file, set())
|
||||
elif line.startswith("@@") and current_file:
|
||||
# Format: @@ -old_start,old_count +new_start,new_count @@
|
||||
m = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", line)
|
||||
if m:
|
||||
start = int(m.group(1))
|
||||
length = int(m.group(2) or "1")
|
||||
for l in range(start, start + length):
|
||||
changes[current_file].add(l)
|
||||
return changes
|
||||
|
||||
new_changes = parse_patch_files_and_lines(new_patch_content)
|
||||
|
||||
# Check conflicts with existing patches
|
||||
patch_dir = (Path(SCRIPT_DIR) / ".." / ".github" / "patches" / "extensions" / args.extension_name).resolve()
|
||||
patch_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for existing_patch in patch_dir.glob("*.patch"):
|
||||
if existing_patch.name == f"{args.patch_name}.patch":
|
||||
if not args.overwrite:
|
||||
print(f"A patch by the name '{args.patch_name}.patch' already exists, failed to create patch")
|
||||
sys.exit(1)
|
||||
else:
|
||||
continue
|
||||
existing_changes = parse_patch_files_and_lines(existing_patch.read_text())
|
||||
|
||||
for file, lines in new_changes.items():
|
||||
if file in existing_changes:
|
||||
overlap = lines & existing_changes[file]
|
||||
if overlap:
|
||||
print(f"❌ Conflict detected with existing patch: {existing_patch.name}")
|
||||
print(f" File: {file}")
|
||||
print(f" Overlapping lines: {sorted(overlap)}")
|
||||
sys.exit(1)
|
||||
|
||||
# Save patch file
|
||||
patch_dir = (Path(SCRIPT_DIR) / ".." / ".github" / "patches" / "extensions" / args.extension_name).resolve()
|
||||
patch_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
patch_path = patch_dir / f"{args.patch_name}.patch"
|
||||
patch_path.write_text(diff_result.stdout)
|
||||
|
||||
|
||||
verify_git_tag()
|
||||
|
||||
create_patch()
|
||||
71
external/duckdb/scripts/exported_symbols_check.py
vendored
Normal file
71
external/duckdb/scripts/exported_symbols_check.py
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
if len(sys.argv) < 2 or not os.path.isfile(sys.argv[1]):
|
||||
print("Usage: [libduckdb dynamic library file, release build]")
|
||||
exit(1)
|
||||
|
||||
res = subprocess.run('nm -g -C -P'.split(' ') + [sys.argv[1]], check=True, capture_output=True)
|
||||
if res.returncode != 0:
|
||||
raise ValueError('Failed to run `nm`')
|
||||
|
||||
culprits = []
|
||||
|
||||
whitelist = [
|
||||
'@GLIBC',
|
||||
'@CXXABI',
|
||||
'__gnu_cxx::',
|
||||
'std::',
|
||||
'N6duckdb',
|
||||
'duckdb::',
|
||||
'duckdb_miniz::',
|
||||
'duckdb_fmt::',
|
||||
'duckdb_hll::',
|
||||
'duckdb_moodycamel::',
|
||||
'duckdb_yyjson::',
|
||||
'duckdb_',
|
||||
'RefCounter',
|
||||
'registerTMCloneTable',
|
||||
'RegisterClasses',
|
||||
'Unwind_Resume',
|
||||
'__gmon_start',
|
||||
'_fini',
|
||||
'_init',
|
||||
'_version',
|
||||
'_end',
|
||||
'_edata',
|
||||
'__bss_start',
|
||||
'__udivti3',
|
||||
'__popcount',
|
||||
'Adbc',
|
||||
'ErrorArrayStream',
|
||||
'ErrorFromArrayStream',
|
||||
]
|
||||
|
||||
for symbol in res.stdout.decode('utf-8').split('\n'):
|
||||
if len(symbol.strip()) == 0:
|
||||
continue
|
||||
if symbol.endswith(' U'): # undefined because dynamic linker
|
||||
continue
|
||||
if symbol.endswith(' U 0 0') and "random_device" not in symbol: # undefined because dynamic linker
|
||||
continue
|
||||
|
||||
is_whitelisted = False
|
||||
for entry in whitelist:
|
||||
if entry in symbol and "random_device" not in symbol:
|
||||
is_whitelisted = True
|
||||
if is_whitelisted:
|
||||
continue
|
||||
|
||||
culprits.append(symbol)
|
||||
|
||||
|
||||
if len(culprits) > 0:
|
||||
print("Found leaked symbols. Either white-list above or change visibility:")
|
||||
for symbol in culprits:
|
||||
print(symbol)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
sys.exit(0)
|
||||
51
external/duckdb/scripts/extension-upload-all.sh
vendored
Executable file
51
external/duckdb/scripts/extension-upload-all.sh
vendored
Executable file
@@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Uploads all extensions found in <base_dir_glob> (default: build/release/extension/*)
|
||||
# this script is used by DuckDB CI to upload all extensions at once
|
||||
|
||||
# Usage: ./extension-upload-all.sh <architecture> <duckdb_version> [<base_dir_glob>]
|
||||
|
||||
# The directory that the script lives in, thanks @Tishj
|
||||
script_dir="$(dirname "$(readlink -f "$0")")"
|
||||
|
||||
if [ -z "$1" ] || [ -z "$2" ]; then
|
||||
echo "Usage: ./extension-upload-all.sh <architecture> <duckdb_version> [<base_dir_glob>]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$3" ]; then
|
||||
BASE_DIR="build/release/extension/*"
|
||||
else
|
||||
BASE_DIR="$3"
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
# Ensure we do nothing on failed globs
|
||||
shopt -s nullglob
|
||||
|
||||
# Print dry run / real run
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
echo "Deploying extensions.."
|
||||
else
|
||||
echo "Deploying extensions.. (DRY RUN)"
|
||||
fi
|
||||
|
||||
if [[ $1 == wasm* ]]; then
|
||||
FILES="$BASE_DIR/*.duckdb_extension.wasm"
|
||||
else
|
||||
FILES="$BASE_DIR/*.duckdb_extension"
|
||||
fi
|
||||
|
||||
for f in $FILES
|
||||
do
|
||||
if [[ $1 == wasm* ]]; then
|
||||
ext_name=`basename $f .duckdb_extension.wasm`
|
||||
else
|
||||
ext_name=`basename $f .duckdb_extension`
|
||||
fi
|
||||
echo "found extension: '$ext_name'"
|
||||
|
||||
# args: <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned> [<path_to_ext>]
|
||||
$script_dir/extension-upload-single.sh $ext_name "" "$2" "$1" "duckdb-core-extensions" true false "$(dirname "$f")"
|
||||
done
|
||||
109
external/duckdb/scripts/extension-upload-from-nightly.sh
vendored
Executable file
109
external/duckdb/scripts/extension-upload-from-nightly.sh
vendored
Executable file
@@ -0,0 +1,109 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script deploys the extension binaries that are currently deployed to the nightly bucket to the main bucket
|
||||
|
||||
# WARNING: don't use this script if you don't know exactly what you're doing. To deploy a binary:
|
||||
# - Run the script with ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)
|
||||
# - CHECK the output of the dry run thoroughly
|
||||
# - If successful, set the DUCKDB_DEPLOY_SCRIPT_MODE env variable to the correct value
|
||||
# - run the script again now deploying for real
|
||||
# - check the output
|
||||
# - unset the DUCKDB_DEPLOY_SCRIPT_MODE env var
|
||||
|
||||
if [ -z "$1" ] || [ -z "$2" ]; then
|
||||
echo "Usage: ./extension-upload-from-nightly.sh <extension_name> <duckdb_version> (<nightly_commit>)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$3" ]; then
|
||||
BASE_NIGHTLY_DIR="$2"
|
||||
else
|
||||
BASE_NIGHTLY_DIR="$1/$3/$2"
|
||||
fi
|
||||
|
||||
# CONFIG
|
||||
FROM_BUCKET=duckdb-extensions-nightly
|
||||
TO_BUCKET=duckdb-core-extensions
|
||||
CLOUDFRONT_DISTRIBUTION_ID=E2Z28NDMI4PVXP
|
||||
|
||||
### COPY THE FILES
|
||||
## REAL_RUN is to be used to move non-Wasm extensions
|
||||
REAL_RUN="aws s3 cp s3://$FROM_BUCKET/$BASE_NIGHTLY_DIR s3://$TO_BUCKET/$2 --recursive --exclude '*' --include '*/$1.duckdb_extension.gz' --acl public-read --region us-east-2"
|
||||
DRY_RUN="$REAL_RUN --dryrun"
|
||||
## REAL_RUN_WASM is to be used to move Wasm extensions to new style path (no extra duckdb-wasm)
|
||||
REAL_RUN_WASM="aws s3 cp s3://$FROM_BUCKET/$BASE_NIGHTLY_DIR s3://$TO_BUCKET/$2 --recursive --exclude '*' --include '*/$1.duckdb_extension.wasm' --acl public-read --content-encoding br --content-type='application/wasm' --region us-east-2"
|
||||
DRY_RUN_WASM="$REAL_RUN_WASM --dryrun"
|
||||
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
echo "DEPLOYING"
|
||||
echo "> FROM: $FROM_BUCKET"
|
||||
echo "> TO : $TO_BUCKET"
|
||||
echo "> AWS CLI deploy: "
|
||||
eval "$REAL_RUN"
|
||||
eval "$REAL_RUN_WASM"
|
||||
else
|
||||
echo "DEPLOYING (DRY RUN)"
|
||||
echo "> FROM: $FROM_BUCKET"
|
||||
echo "> TO : $TO_BUCKET"
|
||||
echo "> AWS CLI Dry run: "
|
||||
eval "$DRY_RUN"
|
||||
eval "$DRY_RUN_WASM"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
### INVALIDATE THE CLOUDFRONT CACHE AND CLOUDFLARE
|
||||
# For double checking we are invalidating the correct domain
|
||||
CLOUDFRONT_ORIGINS=`aws cloudfront get-distribution --id $CLOUDFRONT_DISTRIBUTION_ID --query 'Distribution.DistributionConfig.Origins.Items[*].DomainName' --output text`
|
||||
|
||||
# Parse the dry run output
|
||||
output=$(eval "$DRY_RUN" && eval "$DRY_RUN_WASM" && eval "$DRY_RUN_WASM_OLD_STYLE")
|
||||
s3_paths=()
|
||||
while IFS= read -r line; do
|
||||
if [[ $line == *"copy:"* ]]; then
|
||||
s3_path=$(echo $line | grep -o 's3://[^ ]*' | awk 'NR%2==0' | awk -F "s3://$TO_BUCKET" '{print $2}' | cut -d' ' -f1)
|
||||
s3_paths+=("$s3_path")
|
||||
fi
|
||||
done <<< "$output"
|
||||
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
echo "CLOUDFRONT INVALIDATION"
|
||||
echo "> Total files: ${#s3_paths[@]}"
|
||||
echo "> Domain: $CLOUDFRONT_ORIGINS"
|
||||
for path in "${s3_paths[@]}"; do
|
||||
aws cloudfront create-invalidation --distribution-id "$CLOUDFRONT_DISTRIBUTION_ID" --paths "$path"
|
||||
done
|
||||
else
|
||||
echo "INVALIDATION (DRY RUN)"
|
||||
echo "> Total files: ${#s3_paths[@]}"
|
||||
echo "> Domain: $CLOUDFRONT_ORIGINS"
|
||||
echo "> Paths:"
|
||||
for path in "${s3_paths[@]}"; do
|
||||
echo " $path"
|
||||
done
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
if [ ! -z "$CLOUDFLARE_CACHE_PURGE_TOKEN" ]; then
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
echo "CLOUDFLARE INVALIDATION"
|
||||
echo "> Total files: ${#s3_paths[@]}"
|
||||
for path in "${s3_paths[@]}"; do
|
||||
curl --request POST --url https://api.cloudflare.com/client/v4/zones/84f631c38b77d4631b561207f2477332/purge_cache --header 'Content-Type: application/json' --header "Authorization: Bearer $CLOUDFLARE_CACHE_PURGE_TOKEN" --data "{\"files\": [\"http://extensions.duckdb.org$path\"]}"
|
||||
echo ""
|
||||
done
|
||||
else
|
||||
echo "CLOUDFLARE INVALIDATION (DRY RUN)"
|
||||
echo "> Total files: ${#s3_paths[@]}"
|
||||
echo "> Domain: $CLOUDFRONT_ORIGINS"
|
||||
echo "> Paths:"
|
||||
for path in "${s3_paths[@]}"; do
|
||||
echo " http://extensions.duckdb.org$path"
|
||||
done
|
||||
fi
|
||||
else
|
||||
echo "##########################################"
|
||||
echo "WARNING! CLOUDFLARE INVALIDATION DISABLED!"
|
||||
echo "##########################################"
|
||||
fi
|
||||
56
external/duckdb/scripts/extension-upload-repository.sh
vendored
Executable file
56
external/duckdb/scripts/extension-upload-repository.sh
vendored
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Uploads all extensions found in <base_dir_glob> (default: build/release/extension/*)
|
||||
# this script is used by DuckDB CI to upload all extensions at once
|
||||
|
||||
# Usage: ./extension-upload-all.sh <base_dir_glob>
|
||||
# Expected directory structure: <base_dir_glob>/<duckdb_version>/<architecture>/
|
||||
|
||||
# The directory that the script lives in, thanks @Tishj
|
||||
script_dir="$(dirname "$(readlink -f "$0")")"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
BASE_DIR="build/release/repository/*"
|
||||
else
|
||||
BASE_DIR="$1"
|
||||
fi
|
||||
|
||||
echo $BASE_DIR
|
||||
|
||||
set -e
|
||||
|
||||
# Ensure we do nothing on failed globs
|
||||
shopt -s nullglob
|
||||
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
echo "Deploying extensions.."
|
||||
else
|
||||
echo "Deploying extensions.. (DRY RUN)"
|
||||
fi
|
||||
|
||||
for version_dir in $BASE_DIR/*; do
|
||||
duckdb_version=$(basename "$version_dir")
|
||||
for arch_dir in "$version_dir"/*; do
|
||||
architecture=$(basename "$arch_dir")
|
||||
if [[ $architecture == wasm* ]]; then
|
||||
FILES="$arch_dir/*.duckdb_extension.wasm"
|
||||
else
|
||||
FILES="$arch_dir/*.duckdb_extension"
|
||||
fi
|
||||
|
||||
for f in $FILES; do
|
||||
if [[ $architecture == wasm* ]]; then
|
||||
ext_name=`basename $f .duckdb_extension.wasm`
|
||||
else
|
||||
ext_name=`basename $f .duckdb_extension`
|
||||
fi
|
||||
|
||||
echo "Processing extension: $ext_name (architecture: $architecture, version: $duckdb_version, path: $f)"
|
||||
|
||||
# args: <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned> [<path_to_ext>]
|
||||
$script_dir/extension-upload-single.sh $ext_name "" "$duckdb_version" "$architecture" "duckdb-core-extensions" true false "$(dirname "$f")"
|
||||
done
|
||||
echo ""
|
||||
done
|
||||
done
|
||||
|
||||
94
external/duckdb/scripts/extension-upload-single.sh
vendored
Executable file
94
external/duckdb/scripts/extension-upload-single.sh
vendored
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Main extension uploading script
|
||||
|
||||
# Note: use the DUCKDB_DEPLOY_SCRIPT_MODE variable to disable dryrun mode
|
||||
|
||||
# Usage: ./extension-upload-single.sh <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned> [<path_to_ext>]
|
||||
# <name> : Name of the extension
|
||||
# <extension_version> : Version (commit / version tag) of the extension
|
||||
# <duckdb_version> : Version (commit / version tag) of DuckDB
|
||||
# <architecture> : Architecture target of the extension binary
|
||||
# <s3_bucket> : S3 bucket to upload to
|
||||
# <copy_to_latest> : Set this as the latest version ("true" / "false", default: "false")
|
||||
# <copy_to_versioned> : Set this as a versioned version that will not be overwritten
|
||||
# <path_to_ext> : (optional) Search this path for the extension
|
||||
|
||||
set -e
|
||||
|
||||
if [ -z "$8" ]; then
|
||||
BASE_EXT_DIR="/tmp/extension"
|
||||
else
|
||||
BASE_EXT_DIR="$8"
|
||||
fi
|
||||
|
||||
if [[ $4 == wasm* ]]; then
|
||||
ext="$BASE_EXT_DIR/$1.duckdb_extension.wasm"
|
||||
else
|
||||
ext="$BASE_EXT_DIR/$1.duckdb_extension"
|
||||
fi
|
||||
|
||||
script_dir="$(dirname "$(readlink -f "$0")")"
|
||||
|
||||
# calculate SHA256 hash of extension binary
|
||||
cat $ext > $ext.append
|
||||
|
||||
( command -v truncate && truncate -s -256 $ext.append ) || ( command -v gtruncate && gtruncate -s -256 $ext.append ) || exit 1
|
||||
|
||||
# (Optionally) Sign binary
|
||||
if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then
|
||||
echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
|
||||
$script_dir/compute-extension-hash.sh $ext.append > $ext.hash
|
||||
openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign
|
||||
rm -f private.pem
|
||||
else
|
||||
# Default to 256 zeros
|
||||
dd if=/dev/zero of=$ext.sign bs=256 count=1
|
||||
fi
|
||||
|
||||
# append signature to extension binary
|
||||
cat $ext.sign >> $ext.append
|
||||
|
||||
# compress extension binary
|
||||
if [[ $4 == wasm_* ]]; then
|
||||
brotli < $ext.append > "$ext.compressed"
|
||||
else
|
||||
gzip < $ext.append > "$ext.compressed"
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
# Abort if AWS key is not set
|
||||
if [ -z "$AWS_ACCESS_KEY_ID" ]; then
|
||||
echo "No AWS key found, skipping.."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Set dry run unless guard var is set
|
||||
DRY_RUN_PARAM="--dryrun"
|
||||
if [ "$DUCKDB_DEPLOY_SCRIPT_MODE" == "for_real" ]; then
|
||||
DRY_RUN_PARAM=""
|
||||
fi
|
||||
|
||||
# upload versioned version
|
||||
if [[ $7 = 'true' ]]; then
|
||||
if [ -z "$3" ]; then
|
||||
echo "extension-upload-single.sh called with upload_versioned=true but no extension version was passed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $4 == wasm* ]]; then
|
||||
aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm $DRY_RUN_PARAM --acl public-read --content-encoding br --content-type="application/wasm"
|
||||
else
|
||||
aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz $DRY_RUN_PARAM --acl public-read
|
||||
fi
|
||||
fi
|
||||
|
||||
# upload to latest version
|
||||
if [[ $6 = 'true' ]]; then
|
||||
if [[ $4 == wasm* ]]; then
|
||||
aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm $DRY_RUN_PARAM --acl public-read --content-encoding br --content-type="application/wasm"
|
||||
else
|
||||
aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz $DRY_RUN_PARAM --acl public-read
|
||||
fi
|
||||
fi
|
||||
64
external/duckdb/scripts/extension-upload-test.sh
vendored
Executable file
64
external/duckdb/scripts/extension-upload-test.sh
vendored
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
CMAKE_CONFIG=Release
|
||||
EXT_BASE_PATH=build/release
|
||||
|
||||
if [ "${FORCE_32_BIT:0}" == "1" ]; then
|
||||
FORCE_32_BIT_FLAG="-DFORCE_32_BIT=1"
|
||||
else
|
||||
FORCE_32_BIT_FLAG=""
|
||||
fi
|
||||
|
||||
FILES="${EXT_BASE_PATH}/extension/*/*.duckdb_extension"
|
||||
|
||||
EXTENSION_LIST=""
|
||||
for f in $FILES
|
||||
do
|
||||
ext=`basename $f .duckdb_extension`
|
||||
EXTENSION_LIST="${EXTENSION_LIST}-$ext"
|
||||
done
|
||||
mkdir -p testext
|
||||
cd testext
|
||||
|
||||
if [ "$2" = "oote" ]; then
|
||||
CMAKE_ROOT="../duckdb"
|
||||
else
|
||||
CMAKE_ROOT=".."
|
||||
fi
|
||||
|
||||
cmake -DCMAKE_BUILD_TYPE=${CMAKE_CONFIG} ${FORCE_32_BIT_FLAG} -DEXTENSION_TESTS_ONLY=1 -DDUCKDB_EXTENSION_CONFIGS=".github/config/in_tree_extensions.cmake;.github/config/out_of_tree_extensions.cmake" ${CMAKE_ROOT}
|
||||
cmake --build . --config ${CMAKE_CONFIG}
|
||||
cd ..
|
||||
|
||||
duckdb_path="testext/duckdb"
|
||||
unittest_path="testext/test/unittest"
|
||||
if [ ! -f "${duckdb_path}" ]; then
|
||||
duckdb_path="testext/${CMAKE_CONFIG}/duckdb.exe"
|
||||
unittest_path="testext/test/${CMAKE_CONFIG}/unittest.exe"
|
||||
fi
|
||||
|
||||
${duckdb_path} -c "FROM duckdb_extensions()"
|
||||
|
||||
for f in $FILES
|
||||
do
|
||||
ext=`basename $f .duckdb_extension`
|
||||
install_path=${ext}
|
||||
unsigned_flag=
|
||||
if [ "$1" = "local" ]
|
||||
then
|
||||
install_path=${f}
|
||||
unsigned_flag=-unsigned
|
||||
fi
|
||||
echo ${install_path}
|
||||
${duckdb_path} ${unsigned_flag} -c "FORCE INSTALL '${install_path}'"
|
||||
${duckdb_path} ${unsigned_flag} -c "LOAD '${ext}'"
|
||||
done
|
||||
|
||||
# Only run tests for non-local, we have tested in enough other ways
|
||||
if [ "$1" != "local" ]
|
||||
then
|
||||
${unittest_path} --autoloading all --skip-compiled
|
||||
fi
|
||||
59
external/duckdb/scripts/extension-upload-wasm.sh
vendored
Normal file
59
external/duckdb/scripts/extension-upload-wasm.sh
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage: ./extension-upload-wasm.sh <architecture> <commithash or version_tag>
|
||||
|
||||
set -e
|
||||
|
||||
# Ensure we do nothing on failed globs
|
||||
shopt -s nullglob
|
||||
|
||||
if [[ -z "${DUCKDB_EXTENSION_SIGNING_PK}" ]]; then
|
||||
# no private key provided, use the test private key (NOT SAFE)
|
||||
# this is made so private.pem at the end of the block will be in
|
||||
# a valid state, and the rest of the signing process can be tested
|
||||
# even without providing the key
|
||||
cp test/mbedtls/private.pem private.pem
|
||||
else
|
||||
# actual private key provided
|
||||
echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
|
||||
fi
|
||||
|
||||
FILES="build/to_be_deployed/$2/$1/*.duckdb_extension.wasm"
|
||||
for f in $FILES
|
||||
do
|
||||
ext=`basename $f .duckdb_extension.wasm`
|
||||
echo $ext
|
||||
# calculate SHA256 hash of extension binary
|
||||
cat $f > $f.append
|
||||
# 0 for custom section
|
||||
# 113 in hex = 275 in decimal, total length of what follows (1 + 16 + 2 + 256)
|
||||
# [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02]
|
||||
echo -n -e '\x00' >> $f.append
|
||||
echo -n -e '\x93\x02' >> $f.append
|
||||
# 10 in hex = 16 in decimal, length of name, 1 byte
|
||||
echo -n -e '\x10' >> $f.append
|
||||
echo -n -e 'duckdb_signature' >> $f.append
|
||||
# the name of the WebAssembly custom section, 16 bytes
|
||||
# 100 in hex, 256 in decimal
|
||||
# [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)],
|
||||
# for a grand total of 2 bytes
|
||||
echo -n -e '\x80\x02' >> $f.append
|
||||
# the actual payload, 256 bytes, to be added later
|
||||
scripts/compute-extension-hash.sh $f.append > $f.hash
|
||||
# encrypt hash with extension signing private key to create signature
|
||||
openssl pkeyutl -sign -in $f.hash -inkey private.pem -pkeyopt digest:sha256 -out $f.sign
|
||||
# append signature to extension binary
|
||||
cat $f.sign >> $f.append
|
||||
# compress extension binary
|
||||
brotli < $f.append > "$f.brotli"
|
||||
# upload compressed extension binary to S3
|
||||
if [[ -z "${AWS_SECRET_ACCESS_KEY}" ]]; then
|
||||
#AWS_SECRET_ACCESS_KEY is empty -> dry run
|
||||
aws s3 cp $f.brotli s3://duckdb-core-extensions/$2/$1/$ext.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm" --dryrun
|
||||
else
|
||||
aws s3 cp $f.brotli s3://duckdb-core-extensions/$2/$1/$ext.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
|
||||
fi
|
||||
done
|
||||
|
||||
# remove private key
|
||||
rm private.pem
|
||||
461
external/duckdb/scripts/format.py
vendored
Normal file
461
external/duckdb/scripts/format.py
vendored
Normal file
@@ -0,0 +1,461 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# this script is used to format the source directory
|
||||
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
import inspect
|
||||
import subprocess
|
||||
import difflib
|
||||
import re
|
||||
import tempfile
|
||||
import uuid
|
||||
import concurrent.futures
|
||||
import argparse
|
||||
import shutil
|
||||
import traceback
|
||||
from python_helpers import open_utf8
|
||||
|
||||
try:
|
||||
ver = subprocess.check_output(('black', '--version'), text=True)
|
||||
if int(ver.split(' ')[1].split('.')[0]) < 24:
|
||||
print('you need to run `pip install "black>=24"`', ver)
|
||||
exit(-1)
|
||||
except Exception as e:
|
||||
print('you need to run `pip install "black>=24"`', e)
|
||||
exit(-1)
|
||||
|
||||
try:
|
||||
ver = subprocess.check_output(('clang-format', '--version'), text=True)
|
||||
if '11.' not in ver:
|
||||
print('you need to run `pip install clang_format==11.0.1 - `', ver)
|
||||
exit(-1)
|
||||
except Exception as e:
|
||||
print('you need to run `pip install clang_format==11.0.1 - `', e)
|
||||
exit(-1)
|
||||
|
||||
cpp_format_command = 'clang-format --sort-includes=0 -style=file'
|
||||
cmake_format_command = 'cmake-format'
|
||||
|
||||
try:
|
||||
subprocess.check_output(('cmake-format', '--version'), text=True)
|
||||
except Exception as e:
|
||||
print('you need to run `pip install cmake-format`', e)
|
||||
exit(-1)
|
||||
|
||||
extensions = [
|
||||
'.cpp',
|
||||
'.ipp',
|
||||
'.c',
|
||||
'.hpp',
|
||||
'.h',
|
||||
'.cc',
|
||||
'.hh',
|
||||
'CMakeLists.txt',
|
||||
'.test',
|
||||
'.test_slow',
|
||||
'.test_coverage',
|
||||
'.benchmark',
|
||||
'.py',
|
||||
'.java',
|
||||
]
|
||||
formatted_directories = ['src', 'benchmark', 'test', 'tools', 'examples', 'extension', 'scripts']
|
||||
ignored_files = [
|
||||
'tpch_constants.hpp',
|
||||
'tpcds_constants.hpp',
|
||||
'_generated',
|
||||
'tpce_flat_input.hpp',
|
||||
'test_csv_header.hpp',
|
||||
'duckdb.cpp',
|
||||
'duckdb.hpp',
|
||||
'json.hpp',
|
||||
'sqlite3.h',
|
||||
'shell.c',
|
||||
'termcolor.hpp',
|
||||
'test_insert_invalid.test',
|
||||
'httplib.hpp',
|
||||
'os_win.c',
|
||||
'glob.c',
|
||||
'printf.c',
|
||||
'helper.hpp',
|
||||
'single_thread_ptr.hpp',
|
||||
'types.hpp',
|
||||
'default_views.cpp',
|
||||
'default_functions.cpp',
|
||||
'release.h',
|
||||
'genrand.cpp',
|
||||
'address.cpp',
|
||||
'visualizer_constants.hpp',
|
||||
'icu-collate.cpp',
|
||||
'icu-collate.hpp',
|
||||
'yyjson.cpp',
|
||||
'yyjson.hpp',
|
||||
'duckdb_pdqsort.hpp',
|
||||
'pdqsort.h',
|
||||
'stubdata.cpp',
|
||||
'nf_calendar.cpp',
|
||||
'nf_calendar.h',
|
||||
'nf_localedata.cpp',
|
||||
'nf_localedata.h',
|
||||
'nf_zformat.cpp',
|
||||
'nf_zformat.h',
|
||||
'expr.cc',
|
||||
'function_list.cpp',
|
||||
'inlined_grammar.hpp',
|
||||
]
|
||||
ignored_directories = [
|
||||
'.eggs',
|
||||
'__pycache__',
|
||||
'dbgen',
|
||||
os.path.join('tools', 'rpkg', 'src', 'duckdb'),
|
||||
os.path.join('tools', 'rpkg', 'inst', 'include', 'cpp11'),
|
||||
os.path.join('extension', 'tpcds', 'dsdgen'),
|
||||
os.path.join('extension', 'jemalloc', 'jemalloc'),
|
||||
os.path.join('extension', 'icu', 'third_party'),
|
||||
os.path.join('tools', 'nodejs', 'src', 'duckdb'),
|
||||
]
|
||||
format_all = False
|
||||
check_only = True
|
||||
confirm = True
|
||||
silent = False
|
||||
force = False
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(prog='python scripts/format.py', description='Format source directory files')
|
||||
parser.add_argument(
|
||||
'revision', nargs='?', default='HEAD', help='Revision number or --all to format all files (default: HEAD)'
|
||||
)
|
||||
parser.add_argument('--check', action='store_true', help='Only print differences (default)')
|
||||
parser.add_argument('--fix', action='store_true', help='Fix the files')
|
||||
parser.add_argument('-a', '--all', action='store_true', help='Format all files')
|
||||
parser.add_argument('-d', '--directories', nargs='*', default=[], help='Format specified directories')
|
||||
parser.add_argument('-y', '--noconfirm', action='store_true', help='Skip confirmation prompt')
|
||||
parser.add_argument('-q', '--silent', action='store_true', help='Suppress output')
|
||||
parser.add_argument('-f', '--force', action='store_true', help='Force formatting')
|
||||
args = parser.parse_args()
|
||||
|
||||
revision = args.revision
|
||||
if args.check and args.fix:
|
||||
parser.print_usage()
|
||||
exit(1)
|
||||
check_only = not args.fix
|
||||
confirm = not args.noconfirm
|
||||
silent = args.silent
|
||||
force = args.force
|
||||
format_all = args.all
|
||||
if args.directories:
|
||||
formatted_directories = args.directories
|
||||
|
||||
|
||||
def file_is_ignored(full_path):
|
||||
if os.path.basename(full_path) in ignored_files:
|
||||
return True
|
||||
dirnames = os.path.sep.join(full_path.split(os.path.sep)[:-1])
|
||||
for ignored_directory in ignored_directories:
|
||||
if ignored_directory in dirnames:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def can_format_file(full_path):
|
||||
global extensions, formatted_directories, ignored_files
|
||||
if not os.path.isfile(full_path):
|
||||
return False
|
||||
fname = full_path.split(os.path.sep)[-1]
|
||||
found = False
|
||||
# check file extension
|
||||
for ext in extensions:
|
||||
if full_path.endswith(ext):
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
return False
|
||||
# check ignored files
|
||||
if file_is_ignored(full_path):
|
||||
return False
|
||||
# now check file directory
|
||||
for dname in formatted_directories:
|
||||
if full_path.startswith(dname):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
action = "Formatting"
|
||||
if check_only:
|
||||
action = "Checking"
|
||||
|
||||
|
||||
def get_changed_files(revision):
|
||||
proc = subprocess.Popen(['git', 'diff', '--name-only', revision], stdout=subprocess.PIPE)
|
||||
files = proc.stdout.read().decode('utf8').split('\n')
|
||||
changed_files = []
|
||||
for f in files:
|
||||
if not can_format_file(f):
|
||||
continue
|
||||
if file_is_ignored(f):
|
||||
continue
|
||||
changed_files.append(f)
|
||||
return changed_files
|
||||
|
||||
|
||||
if os.path.isfile(revision):
|
||||
print(action + " individual file: " + revision)
|
||||
changed_files = [revision]
|
||||
elif os.path.isdir(revision):
|
||||
print(action + " files in directory: " + revision)
|
||||
changed_files = [os.path.join(revision, x) for x in os.listdir(revision)]
|
||||
|
||||
print("Changeset:")
|
||||
for fname in changed_files:
|
||||
print(fname)
|
||||
elif not format_all:
|
||||
if revision == 'main':
|
||||
# fetch new changes when comparing to the master
|
||||
os.system("git fetch origin main:main")
|
||||
print(action + " since branch or revision: " + revision)
|
||||
changed_files = get_changed_files(revision)
|
||||
if len(changed_files) == 0:
|
||||
print("No changed files found!")
|
||||
exit(0)
|
||||
|
||||
print("Changeset:")
|
||||
for fname in changed_files:
|
||||
print(fname)
|
||||
else:
|
||||
print(action + " all files")
|
||||
|
||||
if confirm and not check_only:
|
||||
print("The files listed above will be reformatted.")
|
||||
result = input("Continue with changes (y/n)?\n")
|
||||
if result != 'y':
|
||||
print("Aborting.")
|
||||
exit(0)
|
||||
|
||||
format_commands = {
|
||||
'.cpp': cpp_format_command,
|
||||
'.ipp': cpp_format_command,
|
||||
'.c': cpp_format_command,
|
||||
'.hpp': cpp_format_command,
|
||||
'.h': cpp_format_command,
|
||||
'.hh': cpp_format_command,
|
||||
'.cc': cpp_format_command,
|
||||
'.txt': cmake_format_command,
|
||||
'.py': 'black --quiet - --skip-string-normalization --line-length 120 --stdin-filename',
|
||||
'.java': cpp_format_command,
|
||||
}
|
||||
|
||||
difference_files = []
|
||||
|
||||
header_top = "//===----------------------------------------------------------------------===//\n"
|
||||
header_top += "// DuckDB\n" + "//\n"
|
||||
header_bottom = "//\n" + "//\n"
|
||||
header_bottom += "//===----------------------------------------------------------------------===//\n\n"
|
||||
base_dir = os.path.join(os.getcwd(), 'src/include')
|
||||
|
||||
|
||||
def get_formatted_text(f, full_path, directory, ext):
|
||||
if not can_format_file(full_path):
|
||||
if not force:
|
||||
print(
|
||||
"File "
|
||||
+ full_path
|
||||
+ " is not normally formatted - but attempted to format anyway. Use --force if formatting is desirable"
|
||||
)
|
||||
exit(1)
|
||||
if f == 'list.hpp':
|
||||
# fill in list file
|
||||
file_list = [
|
||||
os.path.join(dp, f)
|
||||
for dp, dn, filenames in os.walk(directory)
|
||||
for f in filenames
|
||||
if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp")
|
||||
]
|
||||
file_list = [x.replace('src/include/', '') for x in file_list]
|
||||
file_list.sort()
|
||||
result = ""
|
||||
for x in file_list:
|
||||
result += '#include "%s"\n' % (x)
|
||||
return result
|
||||
|
||||
if ext == ".hpp" and directory.startswith("src/include"):
|
||||
with open_utf8(full_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# format header in files
|
||||
header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n"
|
||||
text = header_top + header_middle + header_bottom
|
||||
is_old_header = True
|
||||
for line in lines:
|
||||
if not (line.startswith("//") or line.startswith("\n")) and is_old_header:
|
||||
is_old_header = False
|
||||
if not is_old_header:
|
||||
text += line
|
||||
|
||||
if ext == '.test' or ext == '.test_slow' or ext == '.test_coverage' or ext == '.benchmark':
|
||||
f = open_utf8(full_path, 'r')
|
||||
lines = f.readlines()
|
||||
f.close()
|
||||
|
||||
found_name = False
|
||||
found_group = False
|
||||
group_name = full_path.split('/')[-2]
|
||||
new_path_line = '# name: ' + full_path + '\n'
|
||||
new_group_line = '# group: [' + group_name + ']' + '\n'
|
||||
found_diff = False
|
||||
# Find description.
|
||||
found_description = False
|
||||
for line in lines:
|
||||
if line.lower().startswith('# description:') or line.lower().startswith('#description:'):
|
||||
if found_description:
|
||||
print("Error formatting file " + full_path + ", multiple lines starting with # description found")
|
||||
exit(1)
|
||||
found_description = True
|
||||
new_description_line = '# description: ' + line.split(':', 1)[1].strip() + '\n'
|
||||
# Filter old meta.
|
||||
meta = ['#name:', '# name:', '#description:', '# description:', '#group:', '# group:']
|
||||
lines = [line for line in lines if not any(line.lower().startswith(m) for m in meta)]
|
||||
# Clean up empty leading lines.
|
||||
while lines and not lines[0].strip():
|
||||
lines.pop(0)
|
||||
# Ensure header is prepended.
|
||||
header = [new_path_line]
|
||||
if found_description:
|
||||
header.append(new_description_line)
|
||||
header.append(new_group_line)
|
||||
header.append('\n')
|
||||
return ''.join(header + lines)
|
||||
proc_command = format_commands[ext].split(' ') + [full_path]
|
||||
proc = subprocess.Popen(
|
||||
proc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open(full_path) if ext == '.py' else None
|
||||
)
|
||||
new_text = proc.stdout.read().decode('utf8')
|
||||
stderr = proc.stderr.read().decode('utf8')
|
||||
if len(stderr) > 0:
|
||||
print(os.getcwd())
|
||||
print("Failed to format file " + full_path)
|
||||
print(' '.join(proc_command))
|
||||
print(stderr)
|
||||
exit(1)
|
||||
new_text = new_text.replace('\r', '')
|
||||
new_text = re.sub(r'\n*$', '', new_text)
|
||||
return new_text + '\n'
|
||||
|
||||
|
||||
def file_is_generated(text):
|
||||
if '// This file is automatically generated by scripts/' in text:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def format_file(f, full_path, directory, ext):
|
||||
global difference_files
|
||||
with open_utf8(full_path, 'r') as f:
|
||||
old_text = f.read()
|
||||
# do not format auto-generated files
|
||||
if file_is_generated(old_text) and ext != '.py':
|
||||
return
|
||||
old_lines = old_text.split('\n')
|
||||
|
||||
new_text = get_formatted_text(f, full_path, directory, ext)
|
||||
if ext in ('.cpp', '.hpp'):
|
||||
new_text = new_text.replace('ARGS &&...args', 'ARGS &&... args')
|
||||
if check_only:
|
||||
new_lines = new_text.split('\n')
|
||||
old_lines = [x for x in old_lines if '...' not in x]
|
||||
new_lines = [x for x in new_lines if '...' not in x]
|
||||
diff_result = difflib.unified_diff(old_lines, new_lines)
|
||||
total_diff = ""
|
||||
for diff_line in diff_result:
|
||||
total_diff += diff_line + "\n"
|
||||
total_diff = total_diff.strip()
|
||||
|
||||
if len(total_diff) > 0:
|
||||
print("----------------------------------------")
|
||||
print("----------------------------------------")
|
||||
print("Found differences in file " + full_path)
|
||||
print("----------------------------------------")
|
||||
print("----------------------------------------")
|
||||
print(total_diff)
|
||||
difference_files.append(full_path)
|
||||
else:
|
||||
tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
|
||||
with open_utf8(tmpfile, 'w+') as f:
|
||||
f.write(new_text)
|
||||
shutil.move(tmpfile, full_path)
|
||||
|
||||
|
||||
class ToFormatFile:
|
||||
def __init__(self, filename, full_path, directory):
|
||||
self.filename = filename
|
||||
self.full_path = full_path
|
||||
self.directory = directory
|
||||
self.ext = '.' + filename.split('.')[-1]
|
||||
|
||||
|
||||
def format_directory(directory):
|
||||
files = os.listdir(directory)
|
||||
files.sort()
|
||||
result = []
|
||||
for f in files:
|
||||
full_path = os.path.join(directory, f)
|
||||
if os.path.isdir(full_path):
|
||||
if f in ignored_directories or full_path in ignored_directories:
|
||||
continue
|
||||
result += format_directory(full_path)
|
||||
elif can_format_file(full_path):
|
||||
result += [ToFormatFile(f, full_path, directory)]
|
||||
return result
|
||||
|
||||
|
||||
files = []
|
||||
if format_all:
|
||||
try:
|
||||
os.system(cmake_format_command.replace("${FILE}", "CMakeLists.txt"))
|
||||
except:
|
||||
pass
|
||||
|
||||
for direct in formatted_directories:
|
||||
files += format_directory(direct)
|
||||
|
||||
else:
|
||||
for full_path in changed_files:
|
||||
splits = full_path.split(os.path.sep)
|
||||
fname = splits[-1]
|
||||
dirname = os.path.sep.join(splits[:-1])
|
||||
files.append(ToFormatFile(fname, full_path, dirname))
|
||||
|
||||
|
||||
def process_file(f):
|
||||
if not silent:
|
||||
print(f.full_path)
|
||||
try:
|
||||
format_file(f.filename, f.full_path, f.directory, f.ext)
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Create thread for each file
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
try:
|
||||
threads = [executor.submit(process_file, f) for f in files]
|
||||
# Wait for all tasks to complete
|
||||
concurrent.futures.wait(threads)
|
||||
except KeyboardInterrupt:
|
||||
executor.shutdown(wait=True, cancel_futures=True)
|
||||
raise
|
||||
|
||||
if check_only:
|
||||
if len(difference_files) > 0:
|
||||
print("")
|
||||
print("")
|
||||
print("")
|
||||
print("Failed format-check: differences were found in the following files:")
|
||||
for fname in difference_files:
|
||||
print("- " + fname)
|
||||
print('Run "make format-fix" to fix these differences automatically')
|
||||
exit(1)
|
||||
else:
|
||||
print("Passed format-check")
|
||||
exit(0)
|
||||
81
external/duckdb/scripts/generate_builtin_types.py
vendored
Normal file
81
external/duckdb/scripts/generate_builtin_types.py
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
|
||||
header = '''//===----------------------------------------------------------------------===//
|
||||
// DuckDB
|
||||
//
|
||||
// duckdb/catalog/default/builtin_types/types.hpp
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file is generated by scripts/generate_builtin_types.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "duckdb/common/types.hpp"
|
||||
#include "duckdb/common/array.hpp"
|
||||
|
||||
namespace duckdb {
|
||||
'''
|
||||
|
||||
footer = '''} // namespace duckdb
|
||||
'''
|
||||
|
||||
|
||||
def normalize_path_separators(x):
|
||||
return os.path.sep.join(x.split('/'))
|
||||
|
||||
|
||||
def legal_struct_name(name):
|
||||
return name.isalnum()
|
||||
|
||||
|
||||
def get_struct_name(function_name):
|
||||
return function_name.replace('_', ' ').title().replace(' ', '') + 'Fun'
|
||||
|
||||
|
||||
def sanitize_string(text):
|
||||
return text.replace('"', '\\"')
|
||||
|
||||
|
||||
new_text = header
|
||||
|
||||
type_entries = []
|
||||
json_path = normalize_path_separators(f'src/include/duckdb/catalog/default/builtin_types/types.json')
|
||||
with open(json_path, 'r') as f:
|
||||
parsed_json = json.load(f)
|
||||
|
||||
# Extract all the types from the json
|
||||
for type in parsed_json:
|
||||
names = type['names']
|
||||
|
||||
type_id = type['id']
|
||||
|
||||
type_entries += ['\t{' + f'''"{name}", LogicalTypeId::{type_id}''' + '}' for name in names]
|
||||
|
||||
TYPE_COUNT = len(type_entries)
|
||||
new_text += '''
|
||||
struct DefaultType {
|
||||
const char *name;
|
||||
LogicalTypeId type;
|
||||
};
|
||||
'''
|
||||
new_text += f'''
|
||||
using builtin_type_array = std::array<DefaultType, {TYPE_COUNT}>;
|
||||
'''
|
||||
new_text += '''
|
||||
static constexpr const builtin_type_array BUILTIN_TYPES{{
|
||||
'''
|
||||
|
||||
type_text = ",\n".join(type_entries)
|
||||
new_text += type_text
|
||||
new_text += '''
|
||||
}};
|
||||
|
||||
'''
|
||||
|
||||
new_text += footer
|
||||
|
||||
with open('src/include/duckdb/catalog/default/builtin_types/types.hpp', 'w+') as f:
|
||||
f.write(new_text)
|
||||
1002
external/duckdb/scripts/generate_c_api.py
vendored
Normal file
1002
external/duckdb/scripts/generate_c_api.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
98
external/duckdb/scripts/generate_csv_header.py
vendored
Normal file
98
external/duckdb/scripts/generate_csv_header.py
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
# this script generates data for the TPC-H dbgen
|
||||
import os
|
||||
from python_helpers import open_utf8
|
||||
|
||||
|
||||
def get_csv_text(fpath, add_null_terminator=False):
|
||||
with open(fpath, 'rb') as f:
|
||||
text = bytearray(f.read())
|
||||
result_text = ""
|
||||
first = True
|
||||
for byte in text:
|
||||
if first:
|
||||
result_text += str(byte)
|
||||
else:
|
||||
result_text += ", " + str(byte)
|
||||
first = False
|
||||
if add_null_terminator:
|
||||
result_text += ", 0"
|
||||
return result_text
|
||||
|
||||
|
||||
def write_dir(dirname, varname):
|
||||
files = os.listdir(dirname)
|
||||
files.sort()
|
||||
result = ""
|
||||
aggregated_result = "const char *%s[] = {\n" % (varname,)
|
||||
for fname in files:
|
||||
file_varname = "%s_%s" % (varname, fname.split('.')[0])
|
||||
result += "const uint8_t %s[] = {" % (file_varname,) + get_csv_text(os.path.join(dirname, fname), True) + "};\n"
|
||||
aggregated_result += "\t(const char*) %s,\n" % (file_varname,)
|
||||
aggregated_result = aggregated_result[:-2] + "\n};\n"
|
||||
return result + aggregated_result
|
||||
|
||||
|
||||
# ------------------------------------------- #
|
||||
# ------------------------------------------- #
|
||||
# ------------- TPC-H ------------ #
|
||||
# ------------------------------------------- #
|
||||
# ------------------------------------------- #
|
||||
tpch_dir = 'extension/tpch/dbgen'
|
||||
tpch_queries = os.path.join(tpch_dir, 'queries')
|
||||
tpch_answers_sf001 = os.path.join(tpch_dir, 'answers', 'sf0.01')
|
||||
tpch_answers_sf01 = os.path.join(tpch_dir, 'answers', 'sf0.1')
|
||||
tpch_answers_sf1 = os.path.join(tpch_dir, 'answers', 'sf1')
|
||||
tpch_header = os.path.join(tpch_dir, 'include', 'tpch_constants.hpp')
|
||||
|
||||
|
||||
def create_tpch_header(tpch_dir):
|
||||
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
|
||||
|
||||
#pragma once
|
||||
|
||||
const int TPCH_QUERIES_COUNT = 22;
|
||||
"""
|
||||
# write the queries
|
||||
result += write_dir(tpch_queries, "TPCH_QUERIES")
|
||||
result += write_dir(tpch_answers_sf001, "TPCH_ANSWERS_SF0_01")
|
||||
result += write_dir(tpch_answers_sf01, "TPCH_ANSWERS_SF0_1")
|
||||
result += write_dir(tpch_answers_sf1, "TPCH_ANSWERS_SF1")
|
||||
|
||||
with open_utf8(tpch_header, 'w+') as f:
|
||||
f.write(result)
|
||||
|
||||
|
||||
print(tpch_header)
|
||||
create_tpch_header(tpch_dir)
|
||||
|
||||
# ------------------------------------------- #
|
||||
# ------------------------------------------- #
|
||||
# ------------- TPC-DS ------------ #
|
||||
# ------------------------------------------- #
|
||||
# ------------------------------------------- #
|
||||
tpcds_dir = 'extension/tpcds/dsdgen'
|
||||
tpcds_queries = os.path.join(tpcds_dir, 'queries')
|
||||
tpcds_answers_sf001 = os.path.join(tpcds_dir, 'answers', 'sf0.01')
|
||||
tpcds_answers_sf1 = os.path.join(tpcds_dir, 'answers', 'sf1')
|
||||
tpcds_header = os.path.join(tpcds_dir, 'include', 'tpcds_constants.hpp')
|
||||
|
||||
|
||||
def create_tpcds_header(tpch_dir):
|
||||
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
|
||||
|
||||
#pragma once
|
||||
|
||||
const int TPCDS_QUERIES_COUNT = 99;
|
||||
const int TPCDS_TABLE_COUNT = 24;
|
||||
"""
|
||||
# write the queries
|
||||
result += write_dir(tpcds_queries, "TPCDS_QUERIES")
|
||||
result += write_dir(tpcds_answers_sf001, "TPCDS_ANSWERS_SF0_01")
|
||||
result += write_dir(tpcds_answers_sf1, "TPCDS_ANSWERS_SF1")
|
||||
|
||||
with open_utf8(tpcds_header, 'w+') as f:
|
||||
f.write(result)
|
||||
|
||||
|
||||
print(tpcds_header)
|
||||
create_tpcds_header(tpcds_dir)
|
||||
245
external/duckdb/scripts/generate_enum_util.py
vendored
Normal file
245
external/duckdb/scripts/generate_enum_util.py
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
import os
|
||||
import csv
|
||||
import re
|
||||
import argparse
|
||||
import glob
|
||||
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
|
||||
# Dont generate serialization for these enums
|
||||
blacklist = [
|
||||
"RegexOptions",
|
||||
"Flags",
|
||||
"ContainerType",
|
||||
"Type",
|
||||
"DictionaryAppendState",
|
||||
"DictFSSTMode",
|
||||
"ComplexJSONType",
|
||||
]
|
||||
|
||||
enum_util_header_file = os.path.join("..", "src", "include", "duckdb", "common", "enum_util.hpp")
|
||||
enum_util_source_file = os.path.join("..", "src", "common", "enum_util.cpp")
|
||||
|
||||
# Overrides conversions for the following enums:
|
||||
overrides = {
|
||||
"LogicalTypeId": {
|
||||
"SQLNULL": "NULL",
|
||||
"TIMESTAMP_TZ": "TIMESTAMP WITH TIME ZONE",
|
||||
"TIME_TZ": "TIME WITH TIME ZONE",
|
||||
"TIMESTAMP_SEC": "TIMESTAMP_S",
|
||||
},
|
||||
"JoinType": {"OUTER": "FULL"},
|
||||
"OrderType": {
|
||||
"ORDER_DEFAULT": ["ORDER_DEFAULT", "DEFAULT"],
|
||||
"DESCENDING": ["DESCENDING", "DESC"],
|
||||
"ASCENDING": ["ASCENDING", "ASC"],
|
||||
},
|
||||
"OrderByNullType": {
|
||||
"ORDER_DEFAULT": ["ORDER_DEFAULT", "DEFAULT"],
|
||||
"NULLS_FIRST": ["NULLS FIRST", "NULLS_FIRST"],
|
||||
"NULLS_LAST": ["NULLS LAST", "NULLS_LAST"],
|
||||
},
|
||||
"CheckpointAbort": {
|
||||
"NO_ABORT": "NONE",
|
||||
"DEBUG_ABORT_BEFORE_TRUNCATE": "BEFORE_TRUNCATE",
|
||||
"DEBUG_ABORT_BEFORE_HEADER": "BEFORE_HEADER",
|
||||
"DEBUG_ABORT_AFTER_FREE_LIST_WRITE": "AFTER_FREE_LIST_WRITE",
|
||||
},
|
||||
"SampleMethod": {"SYSTEM_SAMPLE": "System", "BERNOULLI_SAMPLE": "Bernoulli", "RESERVOIR_SAMPLE": "Reservoir"},
|
||||
"TableReferenceType": {"EMPTY_FROM": "EMPTY"},
|
||||
"LogLevel": {
|
||||
"LOG_TRACE": "TRACE",
|
||||
"LOG_DEBUG": "DEBUG",
|
||||
"LOG_INFO": "INFO",
|
||||
"LOG_WARN": "WARN",
|
||||
"LOG_ERROR": "ERROR",
|
||||
"LOG_FATAL": "FATAL",
|
||||
},
|
||||
"RequestType": {
|
||||
"GET_REQUEST": "GET",
|
||||
"PUT_REQUEST": "PUT",
|
||||
"HEAD_REQUEST": "HEAD",
|
||||
"DELETE_REQUEST": "DELETE",
|
||||
"POST_REQUEST": "POST",
|
||||
},
|
||||
"ArrowFormatVersion": {"V1_0": "1.0", "V1_1": "1.1", "V1_2": "1.2", "V1_3": "1.3", "V1_4": "1.4", "V1_5": "1.5"},
|
||||
}
|
||||
|
||||
# get all the headers
|
||||
hpp_files = []
|
||||
for root, dirs, files in os.walk(os.path.join("..", "src")):
|
||||
for file in files:
|
||||
# Dont include the generated header itself recursively
|
||||
if file == "enum_util.hpp":
|
||||
continue
|
||||
if 'amalgamation' in root:
|
||||
continue
|
||||
|
||||
if file.endswith(".hpp"):
|
||||
hpp_files.append(os.path.join(root, file))
|
||||
|
||||
|
||||
def remove_prefix(str, prefix):
|
||||
if str.startswith(prefix):
|
||||
return str[len(prefix) :]
|
||||
return str
|
||||
|
||||
|
||||
# get all the enum classes
|
||||
enums = []
|
||||
enum_paths = []
|
||||
enum_path_set = set()
|
||||
|
||||
for hpp_file in hpp_files:
|
||||
with open(hpp_file, "r") as f:
|
||||
text = f.read()
|
||||
for res in re.finditer(r"enum class (\w*)\s*:\s*(\w*)\s*{((?:\s*[^}])*)}", text, re.MULTILINE):
|
||||
file_path = remove_prefix(os.path.relpath(hpp_file, os.path.join("..", "src")), "include/")
|
||||
enum_name = res.group(1)
|
||||
|
||||
if enum_name in blacklist:
|
||||
print(f"Skipping {enum_name} because it is blacklisted")
|
||||
continue
|
||||
|
||||
enum_type = res.group(2)
|
||||
|
||||
enum_members = []
|
||||
# Capture All members: \w+(\s*\=\s*-?\w*)?
|
||||
# group one is the member name
|
||||
# group two is the member value
|
||||
# First clean group from comments
|
||||
s = res.group(3)
|
||||
s = re.sub(r"\/\/.*", "", s)
|
||||
s = re.sub(r"\/\*.*\*\/", "", s)
|
||||
|
||||
enum_values = {}
|
||||
for member in re.finditer(r"(\w+)(\s*\=\s*-?\w*)?", s):
|
||||
key = member.group(1)
|
||||
strings = [key]
|
||||
if enum_name in overrides and key in overrides[enum_name]:
|
||||
override = overrides[enum_name][key]
|
||||
if isinstance(override, list):
|
||||
print(f"Overriding {enum_name}::{key} to one of {override}")
|
||||
strings = override
|
||||
else:
|
||||
print(f"Overriding {enum_name}::{key} to {override}")
|
||||
strings = [override]
|
||||
|
||||
if member.group(2):
|
||||
# If the member has a value, make sure it isnt already covered by another member
|
||||
# If it is, we cant do anything else than ignore it
|
||||
value = remove_prefix(member.group(2).strip(), "=").strip()
|
||||
if value not in enum_values and value not in dict(enum_members):
|
||||
enum_members.append((key, strings))
|
||||
else:
|
||||
print(f"Skipping {enum_name}::{key} because it has a duplicate value {value}")
|
||||
else:
|
||||
enum_members.append((key, strings))
|
||||
|
||||
if not file_path in enum_path_set:
|
||||
enum_path_set.add(file_path)
|
||||
enum_paths.append(file_path)
|
||||
|
||||
enums.append((enum_name, enum_type, enum_members))
|
||||
|
||||
enum_paths.sort()
|
||||
enums.sort(key=lambda x: x[0])
|
||||
|
||||
header = """//-------------------------------------------------------------------------
|
||||
// This file is automatically generated by scripts/generate_enum_util.py
|
||||
// Do not edit this file manually, your changes will be overwritten
|
||||
// If you want to exclude an enum from serialization, add it to the blacklist in the script
|
||||
//
|
||||
// Note: The generated code will only work properly if the enum is a top level item in the duckdb namespace
|
||||
// If the enum is nested in a class, or in another namespace, the generated code will not compile.
|
||||
// You should move the enum to the duckdb namespace, manually write a specialization or add it to the blacklist
|
||||
//-------------------------------------------------------------------------\n\n
|
||||
"""
|
||||
|
||||
# Write the enum util header
|
||||
with open(enum_util_header_file, "w") as f:
|
||||
f.write(header)
|
||||
|
||||
f.write('#pragma once\n\n')
|
||||
f.write('#include <stdint.h>\n')
|
||||
f.write('#include "duckdb/common/string.hpp"\n\n')
|
||||
|
||||
f.write("namespace duckdb {\n\n")
|
||||
|
||||
f.write(
|
||||
"""struct EnumUtil {
|
||||
// String -> Enum
|
||||
template <class T>
|
||||
static T FromString(const char *value) = delete;
|
||||
|
||||
template <class T>
|
||||
static T FromString(const string &value) { return FromString<T>(value.c_str()); }
|
||||
|
||||
// Enum -> String
|
||||
template <class T>
|
||||
static const char *ToChars(T value) = delete;
|
||||
|
||||
template <class T>
|
||||
static string ToString(T value) { return string(ToChars<T>(value)); }
|
||||
};\n\n"""
|
||||
)
|
||||
|
||||
# Forward declare all enums
|
||||
for enum_name, enum_type, _ in enums:
|
||||
f.write(f"enum class {enum_name} : {enum_type};\n\n")
|
||||
f.write("\n")
|
||||
|
||||
# Forward declare all enum serialization functions
|
||||
for enum_name, enum_type, _ in enums:
|
||||
f.write(f"template<>\nconst char* EnumUtil::ToChars<{enum_name}>({enum_name} value);\n\n")
|
||||
f.write("\n")
|
||||
|
||||
# Forward declare all enum dserialization functions
|
||||
for enum_name, enum_type, _ in enums:
|
||||
f.write(f"template<>\n{enum_name} EnumUtil::FromString<{enum_name}>(const char *value);\n\n")
|
||||
f.write("\n")
|
||||
|
||||
f.write("}\n")
|
||||
|
||||
|
||||
with open(enum_util_source_file, "w") as f:
|
||||
f.write(header)
|
||||
|
||||
f.write('#include "duckdb/common/enum_util.hpp"\n')
|
||||
|
||||
# Write the includes
|
||||
for enum_path in enum_paths:
|
||||
f.write(f'#include "{enum_path}"\n')
|
||||
f.write("\n")
|
||||
|
||||
f.write("namespace duckdb {\n\n")
|
||||
|
||||
for enum_name, enum_type, enum_members in enums:
|
||||
enum_string_array = "Get" + enum_name + "Values()"
|
||||
# Write the enum from string
|
||||
f.write(f"const StringUtil::EnumStringLiteral *{enum_string_array} {{\n")
|
||||
f.write(f"\tstatic constexpr StringUtil::EnumStringLiteral values[] {{\n")
|
||||
member_count = 0
|
||||
for key, strings in enum_members:
|
||||
for str_val in strings:
|
||||
if member_count != 0:
|
||||
f.write(",\n")
|
||||
f.write(f"\t\t{{ static_cast<uint32_t>({enum_name}::{key}), \"{str_val}\" }}")
|
||||
member_count += 1
|
||||
f.write("\n\t};")
|
||||
f.write("\n\treturn values;")
|
||||
f.write("\n}\n\n")
|
||||
f.write(f"template<>\nconst char* EnumUtil::ToChars<{enum_name}>({enum_name} value) {{\n")
|
||||
f.write(
|
||||
f"\treturn StringUtil::EnumToString({enum_string_array}, {member_count}, \"{enum_name}\", static_cast<uint32_t>(value));\n"
|
||||
)
|
||||
f.write("}\n\n")
|
||||
|
||||
# Write the string to enum
|
||||
f.write(f"template<>\n{enum_name} EnumUtil::FromString<{enum_name}>(const char *value) {{\n")
|
||||
f.write(
|
||||
f"\treturn static_cast<{enum_name}>(StringUtil::StringToEnum({enum_string_array}, {member_count}, \"{enum_name}\", value));"
|
||||
)
|
||||
f.write("\n}\n\n")
|
||||
|
||||
f.write("}\n\n")
|
||||
161
external/duckdb/scripts/generate_enums.py
vendored
Normal file
161
external/duckdb/scripts/generate_enums.py
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
|
||||
targets = [{'source': 'extension/json/include/', 'target': 'extension/json'}]
|
||||
|
||||
file_list = []
|
||||
for target in targets:
|
||||
source_base = os.path.sep.join(target['source'].split('/'))
|
||||
target_base = os.path.sep.join(target['target'].split('/'))
|
||||
for fname in os.listdir(source_base):
|
||||
if '_enums.json' not in fname:
|
||||
continue
|
||||
file_list.append(
|
||||
{
|
||||
'source': os.path.join(source_base, fname),
|
||||
'include_path': fname.replace('.json', '.hpp'),
|
||||
'target_hpp': os.path.join(source_base, fname.replace('.json', '.hpp')),
|
||||
'target_cpp': os.path.join(target_base, fname.replace('.json', '.cpp')),
|
||||
}
|
||||
)
|
||||
|
||||
header = '''//===----------------------------------------------------------------------===//
|
||||
// This file is automatically generated by scripts/generate_enums.py
|
||||
// Do not edit this file manually, your changes will be overwritten
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
${INCLUDE_LIST}
|
||||
namespace duckdb {
|
||||
'''
|
||||
|
||||
footer = '''
|
||||
} // namespace duckdb
|
||||
'''
|
||||
|
||||
include_base = '#include "${FILENAME}"\n'
|
||||
|
||||
enum_header = '\nenum class ${ENUM_NAME} : ${ENUM_TYPE} {\n'
|
||||
|
||||
enum_footer = '};'
|
||||
|
||||
enum_value = '\t${ENUM_MEMBER} = ${ENUM_VALUE},\n'
|
||||
|
||||
enum_util_header = '''
|
||||
template<>
|
||||
const char* EnumUtil::ToChars<${ENUM_NAME}>(${ENUM_NAME} value);
|
||||
|
||||
template<>
|
||||
${ENUM_NAME} EnumUtil::FromString<${ENUM_NAME}>(const char *value);
|
||||
'''
|
||||
|
||||
enum_util_conversion_begin = '''
|
||||
template<>
|
||||
const char* EnumUtil::ToChars<${ENUM_NAME}>(${ENUM_NAME} value) {
|
||||
switch(value) {
|
||||
'''
|
||||
|
||||
enum_util_switch = '\tcase ${ENUM_NAME}::${ENUM_MEMBER}:\n\t\treturn "${ENUM_MEMBER}";\n'
|
||||
|
||||
enum_util_conversion_end = ''' default:
|
||||
throw NotImplementedException(StringUtil::Format("Enum value of type ${ENUM_NAME}: '%d' not implemented", value));
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
from_string_begin = '''
|
||||
template<>
|
||||
${ENUM_NAME} EnumUtil::FromString<${ENUM_NAME}>(const char *value) {
|
||||
'''
|
||||
|
||||
from_string_comparison = ''' if (StringUtil::Equals(value, "${ENUM_MEMBER}")) {
|
||||
return ${ENUM_NAME}::${ENUM_MEMBER};
|
||||
}
|
||||
'''
|
||||
|
||||
from_string_end = ''' throw NotImplementedException(StringUtil::Format("Enum value of type ${ENUM_NAME}: '%s' not implemented", value));
|
||||
}
|
||||
'''
|
||||
|
||||
|
||||
class EnumMember:
|
||||
def __init__(self, entry, index):
|
||||
self.comment = None
|
||||
self.index = index
|
||||
if type(entry) == str:
|
||||
self.name = entry
|
||||
else:
|
||||
self.name = entry['name']
|
||||
if 'comment' in entry:
|
||||
self.comment = entry['comment']
|
||||
if 'index' in entry:
|
||||
self.index = int(entry['index'])
|
||||
|
||||
|
||||
class EnumClass:
|
||||
def __init__(self, entry):
|
||||
self.name = entry['name']
|
||||
self.type = 'uint8_t'
|
||||
self.values = []
|
||||
index = 0
|
||||
for value_entry in entry['values']:
|
||||
self.values.append(EnumMember(value_entry, index))
|
||||
index += 1
|
||||
|
||||
|
||||
for entry in file_list:
|
||||
source_path = entry['source']
|
||||
target_header = entry['target_hpp']
|
||||
target_source = entry['target_cpp']
|
||||
include_path = entry['include_path']
|
||||
with open(source_path, 'r') as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
include_list = ['duckdb/common/constants.hpp', 'duckdb/common/enum_util.hpp']
|
||||
enums = []
|
||||
|
||||
for entry in json_data:
|
||||
if 'includes' in entry:
|
||||
include_list += entry['includes']
|
||||
enums.append(EnumClass(entry))
|
||||
|
||||
with open(target_header, 'w+') as f:
|
||||
include_text = '#pragma once\n\n'
|
||||
include_text += ''.join([include_base.replace('${FILENAME}', x) for x in include_list])
|
||||
f.write(header.replace('${INCLUDE_LIST}', include_text))
|
||||
|
||||
for enum in enums:
|
||||
f.write(enum_header.replace('${ENUM_NAME}', enum.name).replace('${ENUM_TYPE}', enum.type))
|
||||
for value in enum.values:
|
||||
if value.comment is not None:
|
||||
f.write('\t//! ' + value.comment + '\n')
|
||||
f.write(enum_value.replace('${ENUM_MEMBER}', value.name).replace('${ENUM_VALUE}', str(value.index)))
|
||||
|
||||
f.write(enum_footer)
|
||||
f.write('\n')
|
||||
|
||||
for enum in enums:
|
||||
f.write(enum_util_header.replace('${ENUM_NAME}', enum.name))
|
||||
|
||||
f.write(footer)
|
||||
|
||||
with open(target_source, 'w+') as f:
|
||||
source_include_list = [include_path, 'duckdb/common/string_util.hpp']
|
||||
f.write(
|
||||
header.replace(
|
||||
'${INCLUDE_LIST}', ''.join([include_base.replace('${FILENAME}', x) for x in source_include_list])
|
||||
)
|
||||
)
|
||||
|
||||
for enum in enums:
|
||||
f.write(enum_util_conversion_begin.replace('${ENUM_NAME}', enum.name))
|
||||
for value in enum.values:
|
||||
f.write(enum_util_switch.replace('${ENUM_MEMBER}', value.name).replace('${ENUM_NAME}', enum.name))
|
||||
|
||||
f.write(enum_util_conversion_end.replace('${ENUM_NAME}', enum.name))
|
||||
f.write(from_string_begin.replace('${ENUM_NAME}', enum.name))
|
||||
for value in enum.values:
|
||||
f.write(from_string_comparison.replace('${ENUM_MEMBER}', value.name).replace('${ENUM_NAME}', enum.name))
|
||||
|
||||
f.write(from_string_end.replace('${ENUM_NAME}', enum.name))
|
||||
f.write(footer)
|
||||
972
external/duckdb/scripts/generate_extensions_function.py
vendored
Normal file
972
external/duckdb/scripts/generate_extensions_function.py
vendored
Normal file
@@ -0,0 +1,972 @@
|
||||
import os
|
||||
import csv
|
||||
import re
|
||||
import argparse
|
||||
import glob
|
||||
from typing import Set, Tuple, cast
|
||||
import pathlib
|
||||
from typing import NamedTuple
|
||||
from typing import List, Dict
|
||||
import json
|
||||
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
# Example usage:
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generates/Validates extension_functions.hpp file')
|
||||
|
||||
parser.add_argument(
|
||||
'--validate',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help='If set will validate that extension_entries.hpp is up to date, otherwise it generates the extension_functions.hpp file.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--extension_repository',
|
||||
action='store',
|
||||
help="The repository to look for the '**/<extension>.duckdb_extension' files",
|
||||
default='build/release/repository',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--shell',
|
||||
action='store',
|
||||
help="Path to the DuckDB shell",
|
||||
default='build/release/duckdb',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--extensions',
|
||||
action='store',
|
||||
help="Comma separated list of extensions - if not provided this is read from the extension configuration",
|
||||
default='',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
EXTENSIONS_PATH = os.path.join("build", "extension_configuration", "extensions.csv")
|
||||
DUCKDB_PATH = os.path.join(*args.shell.split('/'))
|
||||
HEADER_PATH = os.path.join("src", "include", "duckdb", "main", "extension_entries.hpp")
|
||||
|
||||
EXTENSION_DEPENDENCIES = {
|
||||
'iceberg': [
|
||||
'avro',
|
||||
'parquet',
|
||||
]
|
||||
}
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class CatalogType(str, Enum):
|
||||
SCALAR = "CatalogType::SCALAR_FUNCTION_ENTRY"
|
||||
TABLE = "CatalogType::TABLE_FUNCTION_ENTRY"
|
||||
AGGREGATE = "CatalogType::AGGREGATE_FUNCTION_ENTRY"
|
||||
PRAGMA = "CatalogType::PRAGMA_FUNCTION_ENTRY"
|
||||
MACRO = "CatalogType::MACRO_ENTRY"
|
||||
TABLE_MACRO = "CatalogType::TABLE_MACRO_ENTRY"
|
||||
|
||||
|
||||
parameter_type_map = {"TIMESTAMP WITH TIME ZONE": "TIMESTAMPTZ", "TIME WITH TIME ZONE": "TIMETZ"}
|
||||
|
||||
|
||||
def catalog_type_from_type(catalog_type: str) -> CatalogType:
|
||||
TYPE_MAP = {
|
||||
CatalogType.SCALAR.value: CatalogType.SCALAR,
|
||||
CatalogType.TABLE.value: CatalogType.TABLE,
|
||||
CatalogType.AGGREGATE.value: CatalogType.AGGREGATE,
|
||||
CatalogType.PRAGMA.value: CatalogType.PRAGMA,
|
||||
CatalogType.MACRO.value: CatalogType.MACRO,
|
||||
CatalogType.TABLE_MACRO.value: CatalogType.TABLE_MACRO,
|
||||
}
|
||||
if catalog_type not in TYPE_MAP:
|
||||
raise Exception(f"Unrecognized function type: '{catalog_type}'")
|
||||
return TYPE_MAP[catalog_type]
|
||||
|
||||
|
||||
def catalog_type_from_string(catalog_type: str) -> CatalogType:
|
||||
TYPE_MAP = {
|
||||
CatalogType.SCALAR.name.lower(): CatalogType.SCALAR,
|
||||
CatalogType.TABLE.name.lower(): CatalogType.TABLE,
|
||||
CatalogType.AGGREGATE.name.lower(): CatalogType.AGGREGATE,
|
||||
CatalogType.PRAGMA.name.lower(): CatalogType.PRAGMA,
|
||||
CatalogType.MACRO.name.lower(): CatalogType.MACRO,
|
||||
CatalogType.TABLE_MACRO.name.lower(): CatalogType.TABLE_MACRO,
|
||||
}
|
||||
if catalog_type not in TYPE_MAP:
|
||||
raise Exception(f"Unrecognized function type: '{catalog_type}'")
|
||||
return TYPE_MAP[catalog_type]
|
||||
|
||||
|
||||
def parse_records(text):
|
||||
records = [] # Will hold all parsed records
|
||||
current_record = [] # Holds items for the current record
|
||||
current_item = [] # Accumulates characters for the current item
|
||||
in_quote = False # True if we're inside a double-quoted string
|
||||
inside_braces = False # True if we're inside a { ... } block
|
||||
|
||||
for char in text:
|
||||
if char == '"':
|
||||
# Toggle the quote state and always include the quote.
|
||||
in_quote = not in_quote
|
||||
elif char == '{' and not in_quote:
|
||||
# Start of a new record.
|
||||
inside_braces = True
|
||||
# Reset any previous record state.
|
||||
current_record = []
|
||||
current_item = []
|
||||
elif char == '}' and not in_quote and inside_braces:
|
||||
# End of the current record.
|
||||
token = ''.join(current_item).strip()
|
||||
if token:
|
||||
current_record.append(token)
|
||||
records.append(current_record)
|
||||
# Reset state for subsequent records.
|
||||
current_record = []
|
||||
current_item = []
|
||||
inside_braces = False
|
||||
elif char == ',' and not in_quote and inside_braces:
|
||||
# A comma outside quotes indicates the end of the current item.
|
||||
token = ''.join(current_item).strip()
|
||||
if token:
|
||||
current_record.append(token)
|
||||
current_item = []
|
||||
else:
|
||||
# Otherwise, just add the character if we're inside braces.
|
||||
if inside_braces:
|
||||
current_item.append(char)
|
||||
return records
|
||||
|
||||
|
||||
class LogicalType(NamedTuple):
|
||||
type: str
|
||||
|
||||
|
||||
class Function(NamedTuple):
|
||||
name: str
|
||||
type: CatalogType
|
||||
|
||||
|
||||
class FunctionOverload(NamedTuple):
|
||||
name: str
|
||||
type: CatalogType
|
||||
parameters: Tuple
|
||||
return_type: LogicalType
|
||||
|
||||
|
||||
class ExtensionFunctionOverload(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
type: CatalogType
|
||||
parameters: Tuple
|
||||
return_type: LogicalType
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str, str, str]]) -> Dict[Function, List["ExtensionFunctionOverload"]]:
|
||||
output: Dict[Function, List["ExtensionFunctionOverload"]] = {}
|
||||
for x in input:
|
||||
function = Function(x[0], catalog_type_from_type(x[2]))
|
||||
# parse the signature
|
||||
signature = x[3]
|
||||
splits = signature.split('>')
|
||||
return_type = LogicalType(splits[1])
|
||||
parameters = [LogicalType(param) for param in splits[0][1:-1].split(',')]
|
||||
extension_function = ExtensionFunctionOverload(x[1], function.name, function.type, parameters, return_type)
|
||||
if function not in output:
|
||||
output[function] = []
|
||||
output[function].append(extension_function)
|
||||
return output
|
||||
|
||||
|
||||
class ExtensionFunction(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
type: CatalogType
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str, str]]) -> Dict[Function, "ExtensionFunction"]:
|
||||
output: Dict[Function, "ExtensionFunction"] = {}
|
||||
for x in input:
|
||||
key = Function(x[0], catalog_type_from_type(x[2]))
|
||||
output[key] = ExtensionFunction(x[1], key.name, key.type)
|
||||
return output
|
||||
|
||||
|
||||
class ExtensionSetting(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionSetting"]:
|
||||
output: Dict[str, "ExtensionSetting"] = {}
|
||||
for x in input:
|
||||
output[x[0]] = ExtensionSetting(x[1], x[0])
|
||||
return output
|
||||
|
||||
|
||||
class ExtensionSecretType(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionSecretType"]:
|
||||
output: Dict[str, "ExtensionSecretType"] = {}
|
||||
for x in input:
|
||||
output[x[0]] = ExtensionSecretType(x[1], x[0])
|
||||
return output
|
||||
|
||||
|
||||
class ExtensionCopyFunction(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionCopyFunction"]:
|
||||
output: Dict[str, "ExtensionCopyFunction"] = {}
|
||||
for x in input:
|
||||
output[x[0]] = ExtensionCopyFunction(x[1], x[0])
|
||||
return output
|
||||
|
||||
|
||||
class ExtensionType(NamedTuple):
|
||||
extension: str
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def create_map(input: List[Tuple[str, str]]) -> Dict[str, "ExtensionType"]:
|
||||
output: Dict[str, "ExtensionType"] = {}
|
||||
for x in input:
|
||||
output[x[0]] = ExtensionType(x[1], x[0])
|
||||
return output
|
||||
|
||||
|
||||
class ParsedEntries:
|
||||
def __init__(self, file_path):
|
||||
self.path = file_path
|
||||
self.functions = {}
|
||||
self.function_overloads = {}
|
||||
self.settings = {}
|
||||
self.secret_types = {}
|
||||
self.types = {}
|
||||
self.copy_functions = {}
|
||||
|
||||
file = open(file_path, 'r')
|
||||
file_blob = file.read()
|
||||
|
||||
# Get the extension functions
|
||||
ext_functions_file_blob = get_slice_of_file("EXTENSION_FUNCTIONS", file_blob)
|
||||
res = parse_records(ext_functions_file_blob)
|
||||
res = [(x[0], x[1], x[2]) for x in res]
|
||||
self.functions = ExtensionFunction.create_map(res)
|
||||
|
||||
# Get the extension function overloads
|
||||
ext_function_overloads_file_blob = get_slice_of_file("EXTENSION_FUNCTION_OVERLOADS", file_blob)
|
||||
res = parse_records(ext_function_overloads_file_blob)
|
||||
res = [(x[0], x[1], x[2], x[3]) for x in res]
|
||||
self.function_overloads = ExtensionFunctionOverload.create_map(res)
|
||||
|
||||
# Get the extension settings
|
||||
ext_settings_file_blob = get_slice_of_file("EXTENSION_SETTINGS", file_blob)
|
||||
res = parse_records(ext_settings_file_blob)
|
||||
res = [(x[0], x[1]) for x in res]
|
||||
self.settings = ExtensionSetting.create_map(res)
|
||||
|
||||
# Get the extension secret types
|
||||
ext_secret_types_file_blob = get_slice_of_file("EXTENSION_SECRET_TYPES", file_blob)
|
||||
res = parse_records(ext_secret_types_file_blob)
|
||||
res = [(x[0], x[1]) for x in res]
|
||||
self.secret_types = ExtensionSecretType.create_map(res)
|
||||
|
||||
# Get the extension types
|
||||
ext_copy_functions_blob = get_slice_of_file("EXTENSION_COPY_FUNCTIONS", file_blob)
|
||||
res = parse_records(ext_copy_functions_blob)
|
||||
res = [(x[0], x[1]) for x in res]
|
||||
self.copy_functions = ExtensionCopyFunction.create_map(res)
|
||||
|
||||
# Get the extension types
|
||||
ext_types_file_blob = get_slice_of_file("EXTENSION_TYPES", file_blob)
|
||||
res = parse_records(ext_types_file_blob)
|
||||
res = [(x[0], x[1]) for x in res]
|
||||
self.types = ExtensionType.create_map(res)
|
||||
|
||||
def strip_unloaded_extensions(self, extensions: List[str], functions):
|
||||
return [x for x in functions if x.extension not in extensions]
|
||||
|
||||
def filter_entries(self, extensions: List[str]):
|
||||
self.functions = {k: v for k, v in self.functions.items() if v.extension not in extensions}
|
||||
self.function_overloads = {
|
||||
k: self.strip_unloaded_extensions(extensions, v)
|
||||
for k, v in self.function_overloads.items()
|
||||
if len(self.strip_unloaded_extensions(extensions, v)) > 0
|
||||
}
|
||||
self.copy_functions = {k: v for k, v in self.copy_functions.items() if v.extension not in extensions}
|
||||
self.settings = {k: v for k, v in self.settings.items() if v.extension not in extensions}
|
||||
self.secret_types = {k: v for k, v in self.secret_types.items() if v.extension not in extensions}
|
||||
self.types = {k: v for k, v in self.types.items() if v.extension not in extensions}
|
||||
|
||||
|
||||
def check_prerequisites():
|
||||
if not os.path.isfile(DUCKDB_PATH):
|
||||
print(f"{DUCKDB_PATH} not found")
|
||||
print(
|
||||
"please run 'GENERATE_EXTENSION_ENTRIES=1 BUILD_ALL_EXT=1 make release', you might have to manually add DONT_LINK to all extension_configs"
|
||||
)
|
||||
exit(1)
|
||||
if len(args.extensions) == 0 and not os.path.isfile(EXTENSIONS_PATH):
|
||||
print(f"{EXTENSIONS_PATH} not found and --extensions it not set")
|
||||
print("Either:")
|
||||
print(
|
||||
"* run 'GENERATE_EXTENSION_ENTRIES=1 BUILD_ALL_EXT=1 make release', you might have to manually add DONT_LINK to all extension_configs"
|
||||
)
|
||||
print("* Specify a comma separated list of extensions using --extensions")
|
||||
exit(1)
|
||||
if not os.path.isdir(args.extension_repository):
|
||||
print(f"provided --extension_repository '{args.extension_repository}' is not a valid directory")
|
||||
exit(1)
|
||||
|
||||
|
||||
# Parses the extension config files for which extension names there are to be expected
|
||||
def get_extension_names() -> List[str]:
|
||||
if len(args.extensions) > 0:
|
||||
return args.extensions.split(',')
|
||||
extension_names = []
|
||||
with open(EXTENSIONS_PATH) as f:
|
||||
# Skip the csv header
|
||||
next(f)
|
||||
for line in f:
|
||||
extension_name = line.split(',')[0].rstrip()
|
||||
if "jemalloc" in extension_name:
|
||||
# We skip jemalloc as it doesn't produce a loadable extension but is in the config
|
||||
continue
|
||||
extension_names.append(extension_name)
|
||||
return extension_names
|
||||
|
||||
|
||||
def get_query(sql_query, load_query) -> list:
|
||||
# Optionally perform a LOAD of an extension
|
||||
# Then perform a SQL query, fetch the output
|
||||
query = f'{DUCKDB_PATH} -json -unsigned -c "{load_query}{sql_query}" '
|
||||
query_result = os.popen(query).read()
|
||||
result = [x for x in query_result[1:-2].split("\n") if x != '']
|
||||
return result
|
||||
|
||||
|
||||
def transform_parameter(parameter) -> LogicalType:
|
||||
parameter = parameter.upper()
|
||||
if parameter.endswith('[]'):
|
||||
return LogicalType(transform_parameter(parameter[0 : len(parameter) - 2]).type + '[]')
|
||||
if parameter in parameter_type_map:
|
||||
return LogicalType(parameter_type_map[parameter])
|
||||
return LogicalType(parameter)
|
||||
|
||||
|
||||
def transform_parameters(parameters) -> FunctionOverload:
|
||||
parameters = parameters[1:-1].split(', ')
|
||||
return tuple(transform_parameter(param) for param in parameters)
|
||||
|
||||
|
||||
def get_functions(load="") -> (Set[Function], Dict[Function, List[FunctionOverload]]):
|
||||
GET_FUNCTIONS_QUERY = """
|
||||
select distinct
|
||||
function_name,
|
||||
function_type,
|
||||
parameter_types,
|
||||
return_type
|
||||
from duckdb_functions()
|
||||
ORDER BY function_name, function_type;
|
||||
"""
|
||||
# ['name_1,type_1', ..., 'name_n,type_n']
|
||||
results = set(get_query(GET_FUNCTIONS_QUERY, load))
|
||||
|
||||
functions = set()
|
||||
function_overloads = {}
|
||||
for x in results:
|
||||
if x[-1] == ',':
|
||||
# Remove the trailing comma
|
||||
x = x[:-1]
|
||||
function_name, function_type, parameter_types, return_type = [
|
||||
x.lower() if x else "null" for x in json.loads(x).values()
|
||||
]
|
||||
function_parameters = transform_parameters(parameter_types)
|
||||
function_return = transform_parameter(return_type)
|
||||
function = Function(function_name, catalog_type_from_string(function_type))
|
||||
function_overload = FunctionOverload(
|
||||
function_name, catalog_type_from_string(function_type), function_parameters, function_return
|
||||
)
|
||||
if function not in functions:
|
||||
functions.add(function)
|
||||
function_overloads[function] = [function_overload]
|
||||
else:
|
||||
function_overloads[function].append(function_overload)
|
||||
|
||||
return (functions, function_overloads)
|
||||
|
||||
|
||||
def get_settings(load="") -> Set[str]:
|
||||
GET_SETTINGS_QUERY = """
|
||||
select distinct
|
||||
name
|
||||
from duckdb_settings();
|
||||
"""
|
||||
settings = set(get_query(GET_SETTINGS_QUERY, load))
|
||||
res = set()
|
||||
for x in settings:
|
||||
if x[-1] == ',':
|
||||
# Remove the trailing comma
|
||||
x = x[:-1]
|
||||
name = json.loads(x)['name']
|
||||
res.add(name)
|
||||
return res
|
||||
|
||||
|
||||
def get_secret_types(load="") -> Set[str]:
|
||||
GET_SECRET_TYPES_QUERY = """
|
||||
select distinct
|
||||
type
|
||||
from duckdb_secret_types();
|
||||
"""
|
||||
secret_types = set(get_query(GET_SECRET_TYPES_QUERY, load))
|
||||
res = set()
|
||||
for x in secret_types:
|
||||
if x[-1] == ',':
|
||||
# Remove the trailing comma
|
||||
x = x[:-1]
|
||||
type = json.loads(x)['type']
|
||||
res.add(type)
|
||||
return res
|
||||
|
||||
|
||||
class ExtensionData:
|
||||
def __init__(self):
|
||||
# Map of extension -> ExtensionFunction
|
||||
self.function_map: Dict[Function, ExtensionFunction] = {}
|
||||
# Map of extension -> ExtensionSetting
|
||||
self.settings_map: Dict[str, ExtensionSetting] = {}
|
||||
# Map of extension -> ExtensionSecretType
|
||||
self.secret_types_map: Dict[str, ExtensionSecretType] = {}
|
||||
# Map of function -> extension function overloads
|
||||
self.function_overloads: Dict[Function, List[ExtensionFunctionOverload]] = {}
|
||||
# All function overloads (also ones that will not be written to the file)
|
||||
self.all_function_overloads: Dict[Function, List[ExtensionFunctionOverload]] = {}
|
||||
|
||||
self.base_settings: Set[str] = set()
|
||||
self.base_secret_types: Set[str] = set()
|
||||
self.base_functions: Set[Function] = set()
|
||||
|
||||
self.extension_settings: Dict[str, Set[str]] = {}
|
||||
self.extension_secret_types: Dict[str, Set[str]] = {}
|
||||
self.extension_functions: Dict[str, Set[Function]] = {}
|
||||
|
||||
self.added_extensions: Set[str] = set()
|
||||
|
||||
# Map of extension -> extension_path
|
||||
self.extensions: Dict[str, str] = get_extension_path_map()
|
||||
|
||||
self.stored_functions: Dict[str, List[Function]] = {
|
||||
'arrow': [Function("scan_arrow_ipc", CatalogType.TABLE), Function("to_arrow_ipc", CatalogType.TABLE)],
|
||||
'spatial': [],
|
||||
}
|
||||
self.stored_settings: Dict[str, List[str]] = {'arrow': [], 'spatial': []}
|
||||
|
||||
def set_base(self):
|
||||
(functions, function_overloads) = get_functions()
|
||||
self.base_functions: Set[Function] = functions
|
||||
self.base_settings: Set[str] = get_settings()
|
||||
self.base_secret_types: Set[str] = get_secret_types()
|
||||
|
||||
def add_entries(self, entries: ParsedEntries):
|
||||
self.function_map.update(entries.functions)
|
||||
self.function_overloads.update(entries.function_overloads)
|
||||
self.settings_map.update(entries.settings)
|
||||
self.secret_types_map.update(entries.secret_types)
|
||||
|
||||
def load_dependencies(self, extension_name: str) -> str:
|
||||
if extension_name not in EXTENSION_DEPENDENCIES:
|
||||
return ''
|
||||
|
||||
res = ''
|
||||
dependencies = EXTENSION_DEPENDENCIES[extension_name]
|
||||
for item in dependencies:
|
||||
if item not in self.extensions:
|
||||
print(f"Could not load extension '{extension_name}', dependency '{item}' is missing")
|
||||
exit(1)
|
||||
extension_path = self.extensions[item]
|
||||
print(f"Load {item} at {extension_path}")
|
||||
res += f"LOAD '{extension_path}';"
|
||||
return res
|
||||
|
||||
def add_extension(self, extension_name: str):
|
||||
if extension_name in EXTENSION_DEPENDENCIES:
|
||||
for item in EXTENSION_DEPENDENCIES[extension_name]:
|
||||
if item not in self.added_extensions:
|
||||
self.add_extension(item)
|
||||
|
||||
if extension_name in self.extensions:
|
||||
# Perform a LOAD and add the added settings/functions/secret_types
|
||||
extension_path = self.extensions[extension_name]
|
||||
|
||||
print(f"Load {extension_name} at {extension_path}")
|
||||
load = self.load_dependencies(extension_name)
|
||||
load += f"LOAD '{extension_path}';"
|
||||
|
||||
(functions, function_overloads) = get_functions(load)
|
||||
extension_functions = list(functions)
|
||||
extension_settings = list(get_settings(load))
|
||||
extension_secret_types = list(get_secret_types(load))
|
||||
|
||||
self.add_settings(extension_name, extension_settings)
|
||||
self.add_secret_types(extension_name, extension_secret_types)
|
||||
self.add_functions(extension_name, extension_functions, function_overloads)
|
||||
elif extension_name in self.stored_functions or extension_name in self.stored_settings:
|
||||
# Retrieve the list of settings/functions from our hardcoded list
|
||||
extension_functions = self.stored_functions[extension_name]
|
||||
extension_settings = self.stored_settings[extension_name]
|
||||
extension_secret_types = self.stored_secret_types[extension_name]
|
||||
|
||||
print(f"Loading {extension_name} from stored functions: {extension_functions}")
|
||||
self.add_settings(extension_name, extension_settings)
|
||||
self.add_secret_types(extension_name, extension_secret_types)
|
||||
self.add_functions(extension_name, extension_functions)
|
||||
else:
|
||||
error = f"""Missing extension {extension_name} and not found in stored_functions/stored_settings/stored_secret_types
|
||||
Please double check if '{args.extension_repository}' is the right location to look for ./**/*.duckdb_extension files"""
|
||||
print(error)
|
||||
exit(1)
|
||||
self.added_extensions.add(extension_name)
|
||||
|
||||
def add_settings(self, extension_name: str, settings_list: List[str]):
|
||||
extension_name = extension_name.lower()
|
||||
|
||||
base_settings = set()
|
||||
base_settings.update(self.base_settings)
|
||||
if extension_name in EXTENSION_DEPENDENCIES:
|
||||
dependencies = EXTENSION_DEPENDENCIES[extension_name]
|
||||
for item in dependencies:
|
||||
assert item in self.extension_settings
|
||||
base_settings.update(self.extension_settings[item])
|
||||
|
||||
added_settings: Set[str] = set(settings_list) - base_settings
|
||||
|
||||
self.extension_settings[extension_name] = added_settings
|
||||
|
||||
settings_to_add: Dict[str, ExtensionSetting] = {}
|
||||
for setting in added_settings:
|
||||
setting_name = setting.lower()
|
||||
settings_to_add[setting_name] = ExtensionSetting(extension_name, setting_name)
|
||||
|
||||
self.settings_map.update(settings_to_add)
|
||||
|
||||
def add_secret_types(self, extension_name: str, secret_types_list: List[str]):
|
||||
extension_name = extension_name.lower()
|
||||
|
||||
base_secret_types = set()
|
||||
base_secret_types.update(self.base_secret_types)
|
||||
if extension_name in EXTENSION_DEPENDENCIES:
|
||||
dependencies = EXTENSION_DEPENDENCIES[extension_name]
|
||||
for item in dependencies:
|
||||
assert item in self.extension_secret_types
|
||||
base_secret_types.update(self.extension_secret_types[item])
|
||||
|
||||
added_secret_types: Set[str] = set(secret_types_list) - base_secret_types
|
||||
|
||||
self.extension_secret_types[extension_name] = added_secret_types
|
||||
|
||||
secret_types_to_add: Dict[str, ExtensionSecretType] = {}
|
||||
for secret_type in added_secret_types:
|
||||
secret_type_name = secret_type.lower()
|
||||
secret_types_to_add[secret_type_name] = ExtensionSecretType(extension_name, secret_type_name)
|
||||
|
||||
self.secret_types_map.update(secret_types_to_add)
|
||||
|
||||
def get_extension_overloads(
|
||||
self, extension_name: str, overloads: Dict[Function, List[FunctionOverload]]
|
||||
) -> Dict[Function, List[ExtensionFunctionOverload]]:
|
||||
result = {}
|
||||
for function, function_overloads in overloads.items():
|
||||
extension_overloads = []
|
||||
for overload in function_overloads:
|
||||
extension_overloads.append(
|
||||
ExtensionFunctionOverload(
|
||||
extension_name, overload.name, overload.type, overload.parameters, overload.return_type
|
||||
)
|
||||
)
|
||||
result[function] = extension_overloads
|
||||
return result
|
||||
|
||||
def add_functions(
|
||||
self, extension_name: str, function_list: List[Function], overloads: Dict[Function, List[FunctionOverload]]
|
||||
):
|
||||
extension_name = extension_name.lower()
|
||||
|
||||
base_functions = set()
|
||||
base_functions.update(self.base_functions)
|
||||
if extension_name in EXTENSION_DEPENDENCIES:
|
||||
dependencies = EXTENSION_DEPENDENCIES[extension_name]
|
||||
for item in dependencies:
|
||||
assert item in self.extension_functions
|
||||
base_functions.update(self.extension_functions[item])
|
||||
|
||||
overloads = self.get_extension_overloads(extension_name, overloads)
|
||||
added_functions: Set[Function] = set(function_list) - base_functions
|
||||
|
||||
self.extension_functions[extension_name] = added_functions
|
||||
|
||||
functions_to_add: Dict[Function, ExtensionFunction] = {}
|
||||
for function in added_functions:
|
||||
if function in self.function_overloads:
|
||||
# function is in overload map - add overloads
|
||||
self.function_overloads[function] += overloads[function]
|
||||
elif function in self.function_map:
|
||||
# function is in function map and we are trying to add it again
|
||||
# this means the function is present in multiple extensions
|
||||
# remove from function map, and add to overload map
|
||||
self.function_overloads[function] = self.all_function_overloads[function] + overloads[function]
|
||||
del self.function_map[function]
|
||||
else:
|
||||
functions_to_add[function] = ExtensionFunction(extension_name, function.name, function.type)
|
||||
|
||||
self.all_function_overloads.update(overloads)
|
||||
self.function_map.update(functions_to_add)
|
||||
|
||||
def validate(self):
|
||||
parsed_entries = ParsedEntries(HEADER_PATH)
|
||||
if self.function_map != parsed_entries.functions:
|
||||
print("Function map mismatches:")
|
||||
print_map_diff(self.function_map, parsed_entries.functions)
|
||||
exit(1)
|
||||
if self.settings_map != parsed_entries.settings:
|
||||
print("Settings map mismatches:")
|
||||
print_map_diff(self.settings_map, parsed_entries.settings)
|
||||
exit(1)
|
||||
if self.secret_types_map != parsed_entries.secret_types:
|
||||
print("SecretTypes map mismatches:")
|
||||
print_map_diff(self.secret_types_map, parsed_entries.secret_types)
|
||||
exit(1)
|
||||
|
||||
print("All entries found: ")
|
||||
print(" > functions: " + str(len(parsed_entries.functions)))
|
||||
print(" > settings: " + str(len(parsed_entries.settings)))
|
||||
print(" > secret_types: " + str(len(parsed_entries.secret_types)))
|
||||
|
||||
def verify_export(self):
|
||||
if len(self.function_map) == 0 or len(self.settings_map) == 0 or len(self.secret_types_map) == 0:
|
||||
print(
|
||||
"""
|
||||
The provided configuration produced an empty function map or empty settings map or empty secret types map
|
||||
This is likely caused by building DuckDB with extensions linked in
|
||||
"""
|
||||
)
|
||||
exit(1)
|
||||
|
||||
def export_functions(self) -> str:
|
||||
result = """
|
||||
static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {\n"""
|
||||
sorted_function = sorted(self.function_map)
|
||||
|
||||
for func in sorted_function:
|
||||
function: ExtensionFunction = self.function_map[func]
|
||||
result += "\t{"
|
||||
result += f'"{function.name}", "{function.extension}", {function.type.value}'
|
||||
result += "},\n"
|
||||
result += "}; // END_OF_EXTENSION_FUNCTIONS\n"
|
||||
return result
|
||||
|
||||
def export_function_overloads(self) -> str:
|
||||
result = """
|
||||
static constexpr ExtensionFunctionOverloadEntry EXTENSION_FUNCTION_OVERLOADS[] = {\n"""
|
||||
sorted_function = sorted(self.function_overloads)
|
||||
|
||||
for func in sorted_function:
|
||||
overloads: List[ExtensionFunctionOverload] = sorted(self.function_overloads[func])
|
||||
for overload in overloads:
|
||||
result += "\t{"
|
||||
result += f'"{overload.name}", "{overload.extension}", {overload.type.value}, "'
|
||||
signature = "["
|
||||
signature += ",".join([parameter.type for parameter in overload.parameters])
|
||||
signature += "]>" + overload.return_type.type
|
||||
result += signature
|
||||
result += '"},\n'
|
||||
result += "}; // END_OF_EXTENSION_FUNCTION_OVERLOADS\n"
|
||||
return result
|
||||
|
||||
def export_settings(self) -> str:
|
||||
result = """
|
||||
static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {\n"""
|
||||
sorted_settings = sorted(self.settings_map)
|
||||
|
||||
for settings_name in sorted_settings:
|
||||
setting: ExtensionSetting = self.settings_map[settings_name]
|
||||
result += "\t{"
|
||||
result += f'"{settings_name.lower()}", "{setting.extension}"'
|
||||
result += "},\n"
|
||||
result += "}; // END_OF_EXTENSION_SETTINGS\n"
|
||||
return result
|
||||
|
||||
def export_secret_types(self) -> str:
|
||||
result = """
|
||||
static constexpr ExtensionEntry EXTENSION_SECRET_TYPES[] = {\n"""
|
||||
sorted_secret_types = sorted(self.secret_types_map)
|
||||
|
||||
for secret_types_name in sorted_secret_types:
|
||||
secret_type: ExtensionSecretType = self.secret_types_map[secret_types_name]
|
||||
result += "\t{"
|
||||
result += f'"{secret_types_name.lower()}", "{secret_type.extension}"'
|
||||
result += "},\n"
|
||||
result += "}; // END_OF_EXTENSION_SECRET_TYPES\n"
|
||||
return result
|
||||
|
||||
|
||||
# Get the slice of the file containing the var (assumes // END_OF_<varname> comment after var)
|
||||
def get_slice_of_file(var_name, file_str):
|
||||
begin = file_str.find(var_name)
|
||||
end = file_str.find("END_OF_" + var_name)
|
||||
return file_str[begin:end]
|
||||
|
||||
|
||||
def print_map_diff(d1, d2):
|
||||
s1 = sorted(set(d1.items()))
|
||||
s2 = sorted(set(d2.items()))
|
||||
|
||||
diff1 = str(set(s1) - set(s2))
|
||||
diff2 = str(set(s2) - set(s1))
|
||||
print("Diff between maps: " + diff1 + "\n")
|
||||
print("Diff between maps: " + diff2 + "\n")
|
||||
|
||||
|
||||
def get_extension_path_map() -> Dict[str, str]:
|
||||
extension_paths: Dict[str, str] = {}
|
||||
# extension_repository = pathlib.Path('../build/release/repository')
|
||||
extension_repository = args.extension_repository
|
||||
for location in glob.iglob(extension_repository + '/**/*.duckdb_extension', recursive=True):
|
||||
name, _ = os.path.splitext(os.path.basename(location))
|
||||
print(f"Located extension: {name} in path: '{location}'")
|
||||
extension_paths[name] = location
|
||||
return extension_paths
|
||||
|
||||
|
||||
def write_header(data: ExtensionData):
|
||||
INCLUDE_HEADER = """//===----------------------------------------------------------------------===//
|
||||
// DuckDB
|
||||
//
|
||||
// duckdb/main/extension_entries.hpp
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include \"duckdb/common/unordered_map.hpp\"
|
||||
#include \"duckdb/common/enums/catalog_type.hpp\"
|
||||
|
||||
// NOTE: this file is generated by scripts/generate_extensions_function.py.
|
||||
// Example usage to refresh one extension (replace "icu" with the desired extension):
|
||||
// GENERATE_EXTENSION_ENTRIES=1 make debug
|
||||
// python3 scripts/generate_extensions_function.py --extensions icu --shell build/debug/duckdb --extension_repository build/debug/repository
|
||||
|
||||
// Check out the check-load-install-extensions job in .github/workflows/LinuxRelease.yml for more details
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
struct ExtensionEntry {
|
||||
char name[48];
|
||||
char extension[48];
|
||||
};
|
||||
|
||||
struct ExtensionFunctionEntry {
|
||||
char name[48];
|
||||
char extension[48];
|
||||
CatalogType type;
|
||||
};
|
||||
|
||||
struct ExtensionFunctionOverloadEntry {
|
||||
char name[48];
|
||||
char extension[48];
|
||||
CatalogType type;
|
||||
char signature[96];
|
||||
};
|
||||
"""
|
||||
|
||||
INCLUDE_FOOTER = """
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_COPY_FUNCTIONS[] = {
|
||||
{"parquet", "parquet"},
|
||||
{"json", "json"},
|
||||
{"avro", "avro"}
|
||||
}; // END_OF_EXTENSION_COPY_FUNCTIONS
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_TYPES[] = {
|
||||
{"json", "json"},
|
||||
{"inet", "inet"},
|
||||
{"geometry", "spatial"}
|
||||
}; // END_OF_EXTENSION_TYPES
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_COLLATIONS[] = {
|
||||
{"af", "icu"}, {"am", "icu"}, {"ar", "icu"}, {"ar_sa", "icu"}, {"as", "icu"}, {"az", "icu"},
|
||||
{"be", "icu"}, {"bg", "icu"}, {"bn", "icu"}, {"bo", "icu"}, {"br", "icu"}, {"bs", "icu"},
|
||||
{"ca", "icu"}, {"ceb", "icu"}, {"chr", "icu"}, {"cs", "icu"}, {"cy", "icu"}, {"da", "icu"},
|
||||
{"de", "icu"}, {"de_at", "icu"}, {"dsb", "icu"}, {"dz", "icu"}, {"ee", "icu"}, {"el", "icu"},
|
||||
{"en", "icu"}, {"en_us", "icu"}, {"eo", "icu"}, {"es", "icu"}, {"et", "icu"}, {"fa", "icu"},
|
||||
{"fa_af", "icu"}, {"ff", "icu"}, {"fi", "icu"}, {"fil", "icu"}, {"fo", "icu"}, {"fr", "icu"},
|
||||
{"fr_ca", "icu"}, {"fy", "icu"}, {"ga", "icu"}, {"gl", "icu"}, {"gu", "icu"}, {"ha", "icu"},
|
||||
{"haw", "icu"}, {"he", "icu"}, {"he_il", "icu"}, {"hi", "icu"}, {"hr", "icu"}, {"hsb", "icu"},
|
||||
{"hu", "icu"}, {"hy", "icu"}, {"id", "icu"}, {"id_id", "icu"}, {"ig", "icu"}, {"is", "icu"},
|
||||
{"it", "icu"}, {"ja", "icu"}, {"ka", "icu"}, {"kk", "icu"}, {"kl", "icu"}, {"km", "icu"},
|
||||
{"kn", "icu"}, {"ko", "icu"}, {"kok", "icu"}, {"ku", "icu"}, {"ky", "icu"}, {"lb", "icu"},
|
||||
{"lkt", "icu"}, {"ln", "icu"}, {"lo", "icu"}, {"lt", "icu"}, {"lv", "icu"}, {"mk", "icu"},
|
||||
{"ml", "icu"}, {"mn", "icu"}, {"mr", "icu"}, {"ms", "icu"}, {"mt", "icu"}, {"my", "icu"},
|
||||
{"nb", "icu"}, {"nb_no", "icu"}, {"ne", "icu"}, {"nl", "icu"}, {"nn", "icu"}, {"om", "icu"},
|
||||
{"or", "icu"}, {"pa", "icu"}, {"pa_in", "icu"}, {"pl", "icu"}, {"ps", "icu"}, {"pt", "icu"},
|
||||
{"ro", "icu"}, {"ru", "icu"}, {"sa", "icu"}, {"se", "icu"}, {"si", "icu"}, {"sk", "icu"},
|
||||
{"sl", "icu"}, {"smn", "icu"}, {"sq", "icu"}, {"sr", "icu"}, {"sr_ba", "icu"}, {"sr_me", "icu"},
|
||||
{"sr_rs", "icu"}, {"sv", "icu"}, {"sw", "icu"}, {"ta", "icu"}, {"te", "icu"}, {"th", "icu"},
|
||||
{"tk", "icu"}, {"to", "icu"}, {"tr", "icu"}, {"ug", "icu"}, {"uk", "icu"}, {"ur", "icu"},
|
||||
{"uz", "icu"}, {"vi", "icu"}, {"wae", "icu"}, {"wo", "icu"}, {"xh", "icu"}, {"yi", "icu"},
|
||||
{"yo", "icu"}, {"yue", "icu"}, {"yue_cn", "icu"}, {"zh", "icu"}, {"zh_cn", "icu"}, {"zh_hk", "icu"},
|
||||
{"zh_mo", "icu"}, {"zh_sg", "icu"}, {"zh_tw", "icu"}, {"zu", "icu"}}; // END_OF_EXTENSION_COLLATIONS
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
|
||||
{"http://", "httpfs"}, {"https://", "httpfs"}, {"s3://", "httpfs"}, {"s3a://", "httpfs"}, {"s3n://", "httpfs"},
|
||||
{"gcs://", "httpfs"}, {"gs://", "httpfs"}, {"r2://", "httpfs"}, {"azure://", "azure"}, {"az://", "azure"},
|
||||
{"abfss://", "azure"}, {"hf://", "httpfs"}
|
||||
}; // END_OF_EXTENSION_FILE_PREFIXES
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
|
||||
{".parquet", "parquet"},
|
||||
{".json", "json"},
|
||||
{".jsonl", "json"},
|
||||
{".ndjson", "json"},
|
||||
{".shp", "spatial"},
|
||||
{".gpkg", "spatial"},
|
||||
{".fgb", "spatial"},
|
||||
{".xlsx", "excel"},
|
||||
{".avro", "avro"},
|
||||
}; // END_OF_EXTENSION_FILE_POSTFIXES
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_FILE_CONTAINS[] = {
|
||||
{".parquet?", "parquet"},
|
||||
{".json?", "json"},
|
||||
{".ndjson?", ".jsonl?"},
|
||||
{".jsonl?", ".ndjson?"}
|
||||
}; // EXTENSION_FILE_CONTAINS
|
||||
|
||||
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
||||
// TODO: automate by passing though to script via duckdb
|
||||
static constexpr ExtensionEntry EXTENSION_SECRET_PROVIDERS[] = {{"s3/config", "httpfs"},
|
||||
{"gcs/config", "httpfs"},
|
||||
{"r2/config", "httpfs"},
|
||||
{"s3/credential_chain", "aws"},
|
||||
{"gcs/credential_chain", "aws"},
|
||||
{"r2/credential_chain", "aws"},
|
||||
{"aws/credential_chain", "aws"},
|
||||
{"azure/access_token", "azure"},
|
||||
{"azure/config", "azure"},
|
||||
{"azure/credential_chain", "azure"},
|
||||
{"azure/service_principal", "azure"},
|
||||
{"huggingface/config", "httfps"},
|
||||
{"huggingface/credential_chain", "httpfs"},
|
||||
{"bearer/config", "httpfs"},
|
||||
{"mysql/config", "mysql_scanner"},
|
||||
{"postgres/config", "postgres_scanner"}
|
||||
}; // EXTENSION_SECRET_PROVIDERS
|
||||
|
||||
static constexpr const char *AUTOLOADABLE_EXTENSIONS[] = {
|
||||
"avro",
|
||||
"aws",
|
||||
"azure",
|
||||
"autocomplete",
|
||||
"core_functions",
|
||||
"delta",
|
||||
"ducklake",
|
||||
"encodings",
|
||||
"excel",
|
||||
"fts",
|
||||
"httpfs",
|
||||
"iceberg",
|
||||
"inet",
|
||||
"icu",
|
||||
"json",
|
||||
"motherduck",
|
||||
"mysql_scanner",
|
||||
"parquet",
|
||||
"sqlite_scanner",
|
||||
"sqlsmith",
|
||||
"postgres_scanner",
|
||||
"tpcds",
|
||||
"tpch",
|
||||
"uc_catalog",
|
||||
"ui"
|
||||
}; // END_OF_AUTOLOADABLE_EXTENSIONS
|
||||
|
||||
} // namespace duckdb"""
|
||||
|
||||
data.verify_export()
|
||||
|
||||
file = open(HEADER_PATH, 'w')
|
||||
file.write(INCLUDE_HEADER)
|
||||
|
||||
exported_functions = data.export_functions()
|
||||
file.write(exported_functions)
|
||||
|
||||
exported_overloads = data.export_function_overloads()
|
||||
file.write(exported_overloads)
|
||||
|
||||
exported_settings = data.export_settings()
|
||||
file.write(exported_settings)
|
||||
|
||||
exported_secret_types = data.export_secret_types()
|
||||
file.write(exported_secret_types)
|
||||
|
||||
file.write(INCLUDE_FOOTER)
|
||||
file.close()
|
||||
|
||||
|
||||
# Extensions that can be autoloaded, but are not buildable by DuckDB CI
|
||||
HARDCODED_EXTENSION_FUNCTIONS = ExtensionFunction.create_map(
|
||||
[
|
||||
("delta_scan", "delta", "CatalogType::TABLE_FUNCTION_ENTRY"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
check_prerequisites()
|
||||
|
||||
extension_names: List[str] = get_extension_names()
|
||||
|
||||
extension_data = ExtensionData()
|
||||
# Collect the list of functions/settings without any extensions loaded
|
||||
extension_data.set_base()
|
||||
|
||||
# TODO: add 'purge' option to ignore existing entries ??
|
||||
parsed_entries = ParsedEntries(HEADER_PATH)
|
||||
parsed_entries.filter_entries(extension_names)
|
||||
|
||||
# Add the entries we parsed from the HEADER_PATH
|
||||
extension_data.add_entries(parsed_entries)
|
||||
|
||||
for extension_name in extension_names:
|
||||
print(extension_name)
|
||||
# For every extension, add the functions/settings added by the extension
|
||||
extension_data.add_extension(extension_name)
|
||||
|
||||
# Add hardcoded extension entries (
|
||||
for key, value in HARDCODED_EXTENSION_FUNCTIONS.items():
|
||||
extension_data.function_map[key] = value
|
||||
|
||||
if args.validate:
|
||||
extension_data.validate()
|
||||
return
|
||||
|
||||
write_header(extension_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
93
external/duckdb/scripts/generate_flex.py
vendored
Normal file
93
external/duckdb/scripts/generate_flex.py
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
# use flex to generate the scanner file for the parser
|
||||
# the following version of bison is used:
|
||||
# flex 2.5.35 Apple(flex-32)
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
from sys import platform
|
||||
import sys
|
||||
from python_helpers import open_utf8
|
||||
|
||||
flex_bin = 'flex'
|
||||
pg_path = os.path.join('third_party', 'libpg_query')
|
||||
namespace = 'duckdb_libpgquery'
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
if arg.startswith("--flex="):
|
||||
flex_bin = arg.replace("--flex=", "")
|
||||
elif arg.startswith("--custom_dir_prefix"):
|
||||
pg_path = arg.split("=")[1] + pg_path
|
||||
elif arg.startswith("--namespace"):
|
||||
namespace = arg.split("=")[1]
|
||||
else:
|
||||
raise Exception("Unrecognized argument: " + arg + ", expected --flex, --custom_dir_prefix, --namespace")
|
||||
|
||||
flex_file_path = os.path.join(pg_path, 'scan.l')
|
||||
target_file = os.path.join(pg_path, 'src_backend_parser_scan.cpp')
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[flex_bin, '--nounistd', '-o', target_file, flex_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
stdout = proc.stdout.read().decode('utf8')
|
||||
stderr = proc.stderr.read().decode('utf8')
|
||||
if proc.returncode != None or len(stderr) > 0:
|
||||
print("Flex failed")
|
||||
print("stdout: ", stdout)
|
||||
print("stderr: ", stderr)
|
||||
exit(1)
|
||||
|
||||
with open_utf8(target_file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
# convert this from 'int' to 'yy_size_t' to avoid triggering a warning
|
||||
text = text.replace('int yy_buf_size;\n', 'yy_size_t yy_buf_size;\n')
|
||||
|
||||
# add the libpg_query namespace
|
||||
text = text.replace(
|
||||
'''
|
||||
#ifndef FLEXINT_H
|
||||
#define FLEXINT_H
|
||||
''',
|
||||
'''
|
||||
#ifndef FLEXINT_H
|
||||
#define FLEXINT_H
|
||||
namespace '''
|
||||
+ namespace
|
||||
+ ''' {
|
||||
''',
|
||||
)
|
||||
text = text.replace('register ', '')
|
||||
|
||||
text = text + "\n} /* " + namespace + " */\n"
|
||||
|
||||
text = re.sub('(?:[(]void[)][ ]*)?fprintf', '//', text)
|
||||
text = re.sub('exit[(]', 'throw std::runtime_error(msg); //', text)
|
||||
text = re.sub(r'\n\s*if\s*[(]\s*!\s*yyin\s*[)]\s*\n\s*yyin\s*=\s*stdin;\s*\n', '\n', text)
|
||||
text = re.sub(r'\n\s*if\s*[(]\s*!\s*yyout\s*[)]\s*\n\s*yyout\s*=\s*stdout;\s*\n', '\n', text)
|
||||
|
||||
file_null = 'NULL' if platform == 'linux' else '[(]FILE [*][)] 0'
|
||||
|
||||
text = re.sub(
|
||||
rf'[#]ifdef\s*YY_STDINIT\n\s*yyin = stdin;\n\s*yyout = stdout;\n[#]else\n\s*yyin = {file_null};\n\s*yyout = {file_null};\n[#]endif',
|
||||
' yyin = (FILE *) 0;\n yyout = (FILE *) 0;',
|
||||
text,
|
||||
)
|
||||
|
||||
if 'stdin;' in text:
|
||||
print("STDIN not removed!")
|
||||
# exit(1)
|
||||
|
||||
if 'stdout' in text:
|
||||
print("STDOUT not removed!")
|
||||
# exit(1)
|
||||
|
||||
if 'fprintf(' in text:
|
||||
print("PRINTF not removed!")
|
||||
# exit(1)
|
||||
|
||||
if 'exit(' in text:
|
||||
print("EXIT not removed!")
|
||||
# exit(1)
|
||||
|
||||
with open_utf8(target_file, 'w+') as f:
|
||||
f.write(text)
|
||||
259
external/duckdb/scripts/generate_functions.py
vendored
Normal file
259
external/duckdb/scripts/generate_functions.py
vendored
Normal file
@@ -0,0 +1,259 @@
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
function_groups = {
|
||||
('src', 'include/duckdb', 'function'): ['scalar', 'aggregate'],
|
||||
('extension', 'core_functions/include', 'core_functions'): ['scalar', 'aggregate'],
|
||||
}
|
||||
|
||||
|
||||
def get_header():
|
||||
return '''//===----------------------------------------------------------------------===//
|
||||
// DuckDB
|
||||
//
|
||||
// {HEADER}_functions.hpp
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file is automatically generated by scripts/generate_functions.py
|
||||
// Do not edit this file manually, your changes will be overwritten
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "duckdb/function/function_set.hpp"
|
||||
|
||||
namespace duckdb {
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def get_footer():
|
||||
return '''} // namespace duckdb
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
function_type_set = {}
|
||||
for (root, include_dir, group), function_types in sorted(function_groups.items()):
|
||||
all_functions_group = []
|
||||
group_dir = Path(group)
|
||||
for function_type in function_types:
|
||||
type_dir = Path(root).joinpath(group_dir.joinpath(function_type))
|
||||
relative_function_paths = sorted(
|
||||
[f'{group}/{function_type}/{f.name}' for f in type_dir.iterdir() if f.is_dir()]
|
||||
)
|
||||
for function_path in relative_function_paths:
|
||||
if Path(normalize_path_separators(f'{root}/{function_path}/functions.json')).exists():
|
||||
create_header_file(root, include_dir, function_path, all_functions_group, function_type_set)
|
||||
create_function_list_file(root, group, all_functions_group)
|
||||
|
||||
|
||||
def normalize_path_separators(x):
|
||||
return os.path.sep.join(x.split('/'))
|
||||
|
||||
|
||||
def legal_struct_name(name):
|
||||
return name.isalnum()
|
||||
|
||||
|
||||
def get_struct_name(function_name):
|
||||
return function_name.replace('_', ' ').title().replace(' ', '') + 'Fun'
|
||||
|
||||
|
||||
def get_parameter_line(variants):
|
||||
if not all(
|
||||
isinstance(variant['parameters'], list)
|
||||
and all(isinstance(param, dict) for param in variant['parameters'])
|
||||
and all('name' in param.keys() for param in variant['parameters'])
|
||||
for variant in variants
|
||||
):
|
||||
raise ValueError(
|
||||
f"invalid parameters for variants {variants}\nParameters should have format: \"parameters\": [{{\"name\": <param_name>, \"type\": <param_type>}}, ...]"
|
||||
)
|
||||
return "\\001".join(
|
||||
",".join(
|
||||
param['name'] + "::" + param['type'] if ('type' in param) else param['name']
|
||||
for param in variant['parameters']
|
||||
)
|
||||
for variant in variants
|
||||
)
|
||||
|
||||
|
||||
def get_description_line(variants):
|
||||
return "\\001".join([variant['description'] for variant in variants])
|
||||
|
||||
|
||||
def get_example_line(variants):
|
||||
return "\\001".join([example_from_json(variant) for variant in variants])
|
||||
|
||||
|
||||
def example_from_json(json_record):
|
||||
if 'example' in json_record:
|
||||
example_line = sanitize_string(json_record['example'])
|
||||
elif 'examples' in json_record:
|
||||
example_line = examples_to_line(json_record['examples'])
|
||||
else:
|
||||
example_line = ''
|
||||
return example_line
|
||||
|
||||
|
||||
def examples_to_line(example_list):
|
||||
return "\\002".join([sanitize_string(example) for example in example_list])
|
||||
|
||||
|
||||
def get_category_line(variants):
|
||||
return "\\001".join([categories_from_json(variant) for variant in variants])
|
||||
|
||||
|
||||
def categories_from_json(json_record):
|
||||
if 'categories' in json_record:
|
||||
category_line = ','.join([category.strip() for category in json_record['categories']])
|
||||
else:
|
||||
category_line = ''
|
||||
return category_line
|
||||
|
||||
|
||||
def sanitize_string(text):
|
||||
return text.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
|
||||
def create_header_file(root, include_dir, path, all_function_list, function_type_set):
|
||||
header_path = normalize_path_separators(f'{root}/{include_dir}/{path}_functions.hpp')
|
||||
json_path = normalize_path_separators(f'{root}/{path}/functions.json')
|
||||
with open(json_path, 'r') as f:
|
||||
parsed_json = json.load(f)
|
||||
new_text = get_header().replace('{HEADER}', path)
|
||||
for entry in parsed_json:
|
||||
function_text = ''
|
||||
if 'struct' in entry:
|
||||
struct_name = entry['struct']
|
||||
else:
|
||||
struct_name = get_struct_name(entry['name'])
|
||||
if not legal_struct_name(struct_name):
|
||||
print(f'Struct name {struct_name} is not a valid struct name!')
|
||||
exit(1)
|
||||
if struct_name in function_type_set:
|
||||
raise Exception("Duplicate entry " + struct_name)
|
||||
function_type_set[struct_name] = entry['type']
|
||||
if entry['type'] == 'scalar_function':
|
||||
function_text = 'static ScalarFunction GetFunction();'
|
||||
all_function_list.append([entry['name'], f"DUCKDB_SCALAR_FUNCTION({struct_name})"])
|
||||
elif entry['type'] == 'scalar_function_set':
|
||||
function_text = 'static ScalarFunctionSet GetFunctions();'
|
||||
all_function_list.append([entry['name'], f"DUCKDB_SCALAR_FUNCTION_SET({struct_name})"])
|
||||
elif entry['type'] == 'aggregate_function':
|
||||
function_text = 'static AggregateFunction GetFunction();'
|
||||
all_function_list.append([entry['name'], f"DUCKDB_AGGREGATE_FUNCTION({struct_name})"])
|
||||
elif entry['type'] == 'aggregate_function_set':
|
||||
function_text = 'static AggregateFunctionSet GetFunctions();'
|
||||
all_function_list.append([entry['name'], f"DUCKDB_AGGREGATE_FUNCTION_SET({struct_name})"])
|
||||
else:
|
||||
print("Unknown entry type " + entry['type'] + ' for entry ' + struct_name)
|
||||
exit(1)
|
||||
if 'variants' in entry:
|
||||
parameter_line = get_parameter_line(entry['variants'])
|
||||
description_line = get_description_line(entry['variants'])
|
||||
example_line = get_example_line(entry['variants'])
|
||||
category_line = get_category_line(entry['variants'])
|
||||
else:
|
||||
parameter_line = entry['parameters'].replace(' ', '') if 'parameters' in entry else ''
|
||||
description_line = sanitize_string(entry['description'])
|
||||
example_line = example_from_json(entry)
|
||||
category_line = categories_from_json(entry)
|
||||
if 'extra_functions' in entry:
|
||||
for func_text in entry['extra_functions']:
|
||||
function_text += '\n ' + func_text
|
||||
new_text += (
|
||||
'''struct {STRUCT} {
|
||||
static constexpr const char *Name = "{NAME}";
|
||||
static constexpr const char *Parameters = "{PARAMETERS}";
|
||||
static constexpr const char *Description = "{DESCRIPTION}";
|
||||
static constexpr const char *Example = "{EXAMPLE}";
|
||||
static constexpr const char *Categories = "{CATEGORIES}";
|
||||
|
||||
{FUNCTION}
|
||||
};
|
||||
|
||||
'''.replace(
|
||||
'{STRUCT}', struct_name
|
||||
)
|
||||
.replace('{NAME}', entry['name'])
|
||||
.replace('{PARAMETERS}', parameter_line)
|
||||
.replace('{DESCRIPTION}', description_line)
|
||||
.replace('{EXAMPLE}', example_line)
|
||||
.replace('{CATEGORIES}', category_line)
|
||||
.replace('{FUNCTION}', function_text)
|
||||
)
|
||||
alias_count = 1
|
||||
if 'aliases' in entry:
|
||||
for alias in entry['aliases']:
|
||||
alias_struct_name = get_struct_name(alias)
|
||||
if not legal_struct_name(alias_struct_name):
|
||||
alias_struct_name = struct_name + 'Alias'
|
||||
if alias_count > 1:
|
||||
alias_struct_name += str(alias_count)
|
||||
alias_count += 1
|
||||
|
||||
aliased_type = entry['type']
|
||||
if aliased_type == 'scalar_function':
|
||||
all_function_list.append([alias, f"DUCKDB_SCALAR_FUNCTION_ALIAS({alias_struct_name})"])
|
||||
elif aliased_type == 'scalar_function_set':
|
||||
all_function_list.append([alias, f"DUCKDB_SCALAR_FUNCTION_SET_ALIAS({alias_struct_name})"])
|
||||
elif aliased_type == 'aggregate_function':
|
||||
all_function_list.append([alias, f"DUCKDB_AGGREGATE_FUNCTION_ALIAS({alias_struct_name})"])
|
||||
elif aliased_type == 'aggregate_function_set':
|
||||
all_function_list.append([alias, f"DUCKDB_AGGREGATE_FUNCTION_SET_ALIAS({alias_struct_name})"])
|
||||
else:
|
||||
print("Unknown entry type " + aliased_type + ' for entry ' + struct_name)
|
||||
exit(1)
|
||||
function_type_set[alias_struct_name] = aliased_type
|
||||
new_text += (
|
||||
'''struct {STRUCT} {
|
||||
using ALIAS = {ALIAS};
|
||||
|
||||
static constexpr const char *Name = "{NAME}";
|
||||
};
|
||||
|
||||
'''.replace(
|
||||
'{STRUCT}', alias_struct_name
|
||||
)
|
||||
.replace('{NAME}', alias)
|
||||
.replace('{ALIAS}', struct_name)
|
||||
)
|
||||
new_text += get_footer()
|
||||
with open(header_path, 'w+') as f:
|
||||
f.write(new_text)
|
||||
|
||||
|
||||
def create_function_list_file(root, group, all_function_list):
|
||||
function_list_file = normalize_path_separators(f'{root}/{group}/function_list.cpp')
|
||||
with open(function_list_file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
static_function = f'static const StaticFunctionDefinition {group}[]' ' = {'
|
||||
pos = text.find(static_function)
|
||||
header = text[:pos]
|
||||
footer_lines = text[pos:].split('\n')
|
||||
footer = ''
|
||||
for i in range(len(footer_lines)):
|
||||
if len(footer_lines[i]) == 0:
|
||||
footer = '\n'.join(footer_lines[i:])
|
||||
break
|
||||
|
||||
new_text = header
|
||||
new_text += static_function + '\n'
|
||||
all_function_list = sorted(all_function_list, key=lambda x: x[0])
|
||||
for entry in all_function_list:
|
||||
new_text += '\t' + entry[1] + ',\n'
|
||||
new_text += '\tFINAL_FUNCTION\n};\n'
|
||||
new_text += footer
|
||||
|
||||
with open(function_list_file, 'w+') as f:
|
||||
f.write(new_text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
309
external/duckdb/scripts/generate_grammar.py
vendored
Normal file
309
external/duckdb/scripts/generate_grammar.py
vendored
Normal file
@@ -0,0 +1,309 @@
|
||||
# use bison to generate the parser files
|
||||
# the following version of bison is used:
|
||||
# bison (GNU Bison) 2.3
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
from python_helpers import open_utf8
|
||||
|
||||
bison_location = "bison"
|
||||
base_dir = 'third_party/libpg_query/grammar'
|
||||
pg_dir = 'third_party/libpg_query'
|
||||
namespace = 'duckdb_libpgquery'
|
||||
|
||||
counterexamples = False
|
||||
run_update = False
|
||||
verbose = False
|
||||
for arg in sys.argv[1:]:
|
||||
if arg.startswith("--bison="):
|
||||
bison_location = arg.replace("--bison=", "")
|
||||
elif arg.startswith("--counterexamples"):
|
||||
counterexamples = True
|
||||
elif arg.startswith("--update"):
|
||||
run_update = True
|
||||
# allow a prefix to the source and target directories
|
||||
elif arg.startswith("--custom_dir_prefix"):
|
||||
base_dir = arg.split("=")[1] + base_dir
|
||||
pg_dir = arg.split("=")[1] + pg_dir
|
||||
elif arg.startswith("--namespace"):
|
||||
namespace = arg.split("=")[1]
|
||||
elif arg.startswith("--verbose"):
|
||||
verbose = True
|
||||
else:
|
||||
raise Exception(
|
||||
"Unrecognized argument: "
|
||||
+ arg
|
||||
+ ", expected --counterexamples, --bison=/loc/to/bison, --custom_dir_prefix, --namespace, --verbose"
|
||||
)
|
||||
|
||||
template_file = os.path.join(base_dir, 'grammar.y')
|
||||
target_file = os.path.join(base_dir, 'grammar.y.tmp')
|
||||
header_file = os.path.join(base_dir, 'grammar.hpp')
|
||||
source_file = os.path.join(base_dir, 'grammar.cpp')
|
||||
type_dir = os.path.join(base_dir, 'types')
|
||||
rule_dir = os.path.join(base_dir, 'statements')
|
||||
result_source = os.path.join(base_dir, 'grammar_out.cpp')
|
||||
result_header = os.path.join(base_dir, 'grammar_out.hpp')
|
||||
target_source_loc = os.path.join(pg_dir, 'src_backend_parser_gram.cpp')
|
||||
target_header_loc = os.path.join(pg_dir, 'include/parser/gram.hpp')
|
||||
kwlist_header = os.path.join(pg_dir, 'include/parser/kwlist.hpp')
|
||||
|
||||
|
||||
# parse the keyword lists
|
||||
def read_list_from_file(fname):
|
||||
with open_utf8(fname, 'r') as f:
|
||||
return [x.strip() for x in f.read().split('\n') if len(x.strip()) > 0]
|
||||
|
||||
|
||||
kwdir = os.path.join(base_dir, 'keywords')
|
||||
unreserved_keywords = read_list_from_file(os.path.join(kwdir, 'unreserved_keywords.list'))
|
||||
colname_keywords = read_list_from_file(os.path.join(kwdir, 'column_name_keywords.list'))
|
||||
func_name_keywords = read_list_from_file(os.path.join(kwdir, 'func_name_keywords.list'))
|
||||
type_name_keywords = read_list_from_file(os.path.join(kwdir, 'type_name_keywords.list'))
|
||||
reserved_keywords = read_list_from_file(os.path.join(kwdir, 'reserved_keywords.list'))
|
||||
|
||||
|
||||
def strip_p(x):
|
||||
if x.endswith("_P"):
|
||||
return x[:-2]
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
unreserved_keywords.sort(key=lambda x: strip_p(x))
|
||||
colname_keywords.sort(key=lambda x: strip_p(x))
|
||||
func_name_keywords.sort(key=lambda x: strip_p(x))
|
||||
type_name_keywords.sort(key=lambda x: strip_p(x))
|
||||
reserved_keywords.sort(key=lambda x: strip_p(x))
|
||||
|
||||
statements = read_list_from_file(os.path.join(base_dir, 'statements.list'))
|
||||
statements.sort()
|
||||
if len(statements) < 0:
|
||||
print("Need at least one statement")
|
||||
exit(1)
|
||||
|
||||
# verify there are no duplicate keywords and create big sorted list of keywords
|
||||
kwdict = {}
|
||||
for kw in unreserved_keywords:
|
||||
kwdict[kw] = 'UNRESERVED_KEYWORD'
|
||||
|
||||
for kw in colname_keywords:
|
||||
kwdict[kw] = 'COL_NAME_KEYWORD'
|
||||
|
||||
for kw in func_name_keywords:
|
||||
kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
|
||||
|
||||
for kw in type_name_keywords:
|
||||
kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
|
||||
|
||||
for kw in reserved_keywords:
|
||||
kwdict[kw] = 'RESERVED_KEYWORD'
|
||||
|
||||
kwlist = [(x, kwdict[x]) for x in kwdict.keys()]
|
||||
kwlist.sort(key=lambda x: strip_p(x[0]))
|
||||
|
||||
# now generate kwlist.h
|
||||
# PG_KEYWORD("abort", ABORT_P, UNRESERVED_KEYWORD)
|
||||
kwtext = (
|
||||
"""
|
||||
namespace """
|
||||
+ namespace
|
||||
+ """ {
|
||||
#define PG_KEYWORD(a,b,c) {a,b,c},
|
||||
|
||||
const PGScanKeyword ScanKeywords[] = {
|
||||
"""
|
||||
)
|
||||
for tpl in kwlist:
|
||||
kwtext += 'PG_KEYWORD("%s", %s, %s)\n' % (strip_p(tpl[0]).lower(), tpl[0], tpl[1])
|
||||
kwtext += (
|
||||
"""
|
||||
};
|
||||
|
||||
const int NumScanKeywords = lengthof(ScanKeywords);
|
||||
} // namespace """
|
||||
+ namespace
|
||||
+ """
|
||||
"""
|
||||
)
|
||||
|
||||
with open_utf8(kwlist_header, 'w+') as f:
|
||||
f.write(kwtext)
|
||||
|
||||
|
||||
# generate the final main.y.tmp file
|
||||
# first read the template file
|
||||
with open_utf8(template_file, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
# now perform a series of replacements in the file to construct the final yacc file
|
||||
|
||||
|
||||
def get_file_contents(fpath, add_line_numbers=False):
|
||||
with open_utf8(fpath, 'r') as f:
|
||||
result = f.read()
|
||||
if add_line_numbers:
|
||||
return '#line 1 "%s"\n' % (fpath,) + result
|
||||
else:
|
||||
return result
|
||||
|
||||
|
||||
# grammar.hpp
|
||||
text = text.replace("{{{ GRAMMAR_HEADER }}}", get_file_contents(header_file, True))
|
||||
|
||||
# grammar.cpp
|
||||
text = text.replace("{{{ GRAMMAR_SOURCE }}}", get_file_contents(source_file, True))
|
||||
|
||||
# keyword list
|
||||
kw_token_list = "%token <keyword> " + " ".join([x[0] for x in kwlist])
|
||||
|
||||
text = text.replace("{{{ KEYWORDS }}}", kw_token_list)
|
||||
|
||||
# statements
|
||||
stmt_list = "stmt: " + "\n\t| ".join(statements) + "\n\t| /*EMPTY*/\n\t{ $$ = NULL; }\n"
|
||||
text = text.replace("{{{ STATEMENTS }}}", stmt_list)
|
||||
|
||||
# keywords
|
||||
# keywords can EITHER be reserved, unreserved, or some combination of (col_name, type_name, func_name)
|
||||
# that means duplicates are ONLY allowed between (col_name, type_name and func_name)
|
||||
# having a keyword be both reserved and unreserved is an error
|
||||
# as is having a keyword both reserved and col_name, for example
|
||||
# verify that this is the case
|
||||
reserved_dict = {}
|
||||
unreserved_dict = {}
|
||||
other_dict = {}
|
||||
for r in reserved_keywords:
|
||||
if r in reserved_dict:
|
||||
print("Duplicate keyword " + r + " in reserved keywords")
|
||||
exit(1)
|
||||
reserved_dict[r] = True
|
||||
|
||||
for ur in unreserved_keywords:
|
||||
if ur in unreserved_dict:
|
||||
print("Duplicate keyword " + ur + " in unreserved keywords")
|
||||
exit(1)
|
||||
if ur in reserved_dict:
|
||||
print("Keyword " + ur + " is marked as both unreserved and reserved")
|
||||
exit(1)
|
||||
unreserved_dict[ur] = True
|
||||
|
||||
|
||||
def add_to_other_keywords(kw, list_name):
|
||||
global unreserved_dict
|
||||
global reserved_dict
|
||||
global other_dict
|
||||
if kw in unreserved_dict:
|
||||
print("Keyword " + kw + " is marked as both unreserved and " + list_name)
|
||||
exit(1)
|
||||
if kw in reserved_dict:
|
||||
print("Keyword " + kw + " is marked as both reserved and " + list_name)
|
||||
exit(1)
|
||||
other_dict[kw] = True
|
||||
|
||||
|
||||
for cr in colname_keywords:
|
||||
add_to_other_keywords(cr, "colname")
|
||||
|
||||
type_func_name_dict = {}
|
||||
for tr in type_name_keywords:
|
||||
add_to_other_keywords(tr, "typename")
|
||||
type_func_name_dict[tr] = True
|
||||
|
||||
for fr in func_name_keywords:
|
||||
add_to_other_keywords(fr, "funcname")
|
||||
type_func_name_dict[fr] = True
|
||||
|
||||
type_func_name_keywords = list(type_func_name_dict.keys())
|
||||
type_func_name_keywords.sort()
|
||||
|
||||
all_keywords = list(reserved_dict.keys()) + list(unreserved_dict.keys()) + list(other_dict.keys())
|
||||
all_keywords.sort()
|
||||
|
||||
other_keyword = list(other_dict.keys())
|
||||
other_keyword.sort()
|
||||
|
||||
kw_definitions = "unreserved_keyword: " + " | ".join(unreserved_keywords) + "\n"
|
||||
kw_definitions += "col_name_keyword: " + " | ".join(colname_keywords) + "\n"
|
||||
kw_definitions += "func_name_keyword: " + " | ".join(func_name_keywords) + "\n"
|
||||
kw_definitions += "type_name_keyword: " + " | ".join(type_name_keywords) + "\n"
|
||||
kw_definitions += "other_keyword: " + " | ".join(other_keyword) + "\n"
|
||||
kw_definitions += "type_func_name_keyword: " + " | ".join(type_func_name_keywords) + "\n"
|
||||
kw_definitions += "reserved_keyword: " + " | ".join(reserved_keywords) + "\n"
|
||||
text = text.replace("{{{ KEYWORD_DEFINITIONS }}}", kw_definitions)
|
||||
|
||||
|
||||
# types
|
||||
def concat_dir(dname, extension, add_line_numbers=False):
|
||||
result = ""
|
||||
for fname in os.listdir(dname):
|
||||
fpath = os.path.join(dname, fname)
|
||||
if os.path.isdir(fpath):
|
||||
result += concat_dir(fpath, extension)
|
||||
else:
|
||||
if not fname.endswith(extension):
|
||||
continue
|
||||
result += get_file_contents(fpath, add_line_numbers)
|
||||
return result
|
||||
|
||||
|
||||
type_definitions = concat_dir(type_dir, ".yh")
|
||||
# add statement types as well
|
||||
for stmt in statements:
|
||||
type_definitions += "%type <node> " + stmt + "\n"
|
||||
|
||||
text = text.replace("{{{ TYPES }}}", type_definitions)
|
||||
|
||||
# grammar rules
|
||||
grammar_rules = concat_dir(rule_dir, ".y", True)
|
||||
|
||||
text = text.replace("{{{ GRAMMAR RULES }}}", grammar_rules)
|
||||
|
||||
# finally write the yacc file into the target file
|
||||
with open_utf8(target_file, 'w+') as f:
|
||||
f.write(text)
|
||||
|
||||
# generate the bison
|
||||
cmd = [bison_location]
|
||||
if counterexamples:
|
||||
print("Attempting to print counterexamples (-Wcounterexamples)")
|
||||
cmd += ["-Wcounterexamples"]
|
||||
if run_update:
|
||||
cmd += ["--update"]
|
||||
if verbose:
|
||||
cmd += ["--verbose"]
|
||||
cmd += ["-o", result_source, "-d", target_file]
|
||||
print(' '.join(cmd))
|
||||
proc = subprocess.Popen(cmd, stderr=subprocess.PIPE)
|
||||
res = proc.wait(timeout=10) # ensure CI does not hang as was seen when running with Bison 3.x release.
|
||||
|
||||
if res != 0:
|
||||
text = proc.stderr.read().decode('utf8')
|
||||
print(text)
|
||||
if 'shift/reduce' in text and not counterexamples:
|
||||
print("---------------------------------------------------------------------")
|
||||
print("In case of shift/reduce conflicts, try re-running with --counterexamples")
|
||||
print("Note: this requires a more recent version of Bison (e.g. version 3.8)")
|
||||
print("On a Macbook you can obtain this using \"brew install bison\"")
|
||||
if counterexamples and 'time limit exceeded' in text:
|
||||
print("---------------------------------------------------------------------")
|
||||
print(
|
||||
"The counterexamples time limit was exceeded. This likely means that no useful counterexample was generated."
|
||||
)
|
||||
print("")
|
||||
print("The counterexamples time limit can be increased by setting the TIME_LIMIT environment variable, e.g.:")
|
||||
print("export TIME_LIMIT=100")
|
||||
exit(1)
|
||||
|
||||
|
||||
os.rename(result_source, target_source_loc)
|
||||
os.rename(result_header, target_header_loc)
|
||||
|
||||
with open_utf8(target_source_loc, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
text = text.replace('#include "grammar_out.hpp"', '#include "include/parser/gram.hpp"')
|
||||
text = text.replace('yynerrs = 0;', 'yynerrs = 0; (void)yynerrs;')
|
||||
|
||||
with open_utf8(target_source_loc, 'w+') as f:
|
||||
f.write(text)
|
||||
399
external/duckdb/scripts/generate_metric_enums.py
vendored
Normal file
399
external/duckdb/scripts/generate_metric_enums.py
vendored
Normal file
@@ -0,0 +1,399 @@
|
||||
# Script that takes src/include/duckdb/common/enums/optimizer_type.hpp, extracts the optimizer types
|
||||
# and adds them to the metrics types.
|
||||
# Then it creates a new file src/include/duckdb/common/enums/metric_type.hpp with the new metrics types as enums.
|
||||
# and generates both test/sql/pragma/profiling/test_default_profiling_settings.test
|
||||
# and test/sql/pragma/profiling/test_custom_profiling_optimizer.test
|
||||
|
||||
import re
|
||||
import os
|
||||
|
||||
os.chdir(os.path.dirname(__file__))
|
||||
|
||||
metrics_header_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "metric_type.hpp")
|
||||
metrics_cpp_file = os.path.join("..", "src", "common", "enums", "metric_type.cpp")
|
||||
optimizer_file = os.path.join("..", "src", "include", "duckdb", "common", "enums", "optimizer_type.hpp")
|
||||
|
||||
metrics = [
|
||||
"ATTACH_LOAD_STORAGE_LATENCY",
|
||||
"ATTACH_REPLAY_WAL_LATENCY",
|
||||
"BLOCKED_THREAD_TIME",
|
||||
"CHECKPOINT_LATENCY",
|
||||
"CPU_TIME",
|
||||
"CUMULATIVE_CARDINALITY",
|
||||
"CUMULATIVE_ROWS_SCANNED",
|
||||
"EXTRA_INFO",
|
||||
"LATENCY",
|
||||
"OPERATOR_CARDINALITY",
|
||||
"OPERATOR_NAME",
|
||||
"OPERATOR_ROWS_SCANNED",
|
||||
"OPERATOR_TIMING",
|
||||
"OPERATOR_TYPE",
|
||||
"QUERY_NAME",
|
||||
"RESULT_SET_SIZE",
|
||||
"ROWS_RETURNED",
|
||||
"SYSTEM_PEAK_BUFFER_MEMORY",
|
||||
"SYSTEM_PEAK_TEMP_DIR_SIZE",
|
||||
"TOTAL_BYTES_READ",
|
||||
"TOTAL_BYTES_WRITTEN",
|
||||
"WAITING_TO_ATTACH_LATENCY",
|
||||
]
|
||||
|
||||
phase_timing_metrics = [
|
||||
"ALL_OPTIMIZERS",
|
||||
"CUMULATIVE_OPTIMIZER_TIMING",
|
||||
"PHYSICAL_PLANNER",
|
||||
"PHYSICAL_PLANNER_COLUMN_BINDING",
|
||||
"PHYSICAL_PLANNER_CREATE_PLAN",
|
||||
"PHYSICAL_PLANNER_RESOLVE_TYPES",
|
||||
"PLANNER",
|
||||
"PLANNER_BINDING",
|
||||
]
|
||||
|
||||
query_global_metrics = [
|
||||
"ATTACH_LOAD_STORAGE_LATENCY",
|
||||
"ATTACH_REPLAY_WAL_LATENCY",
|
||||
"BLOCKED_THREAD_TIME",
|
||||
"CHECKPOINT_LATENCY",
|
||||
"SYSTEM_PEAK_BUFFER_MEMORY",
|
||||
"SYSTEM_PEAK_TEMP_DIR_SIZE",
|
||||
"WAITING_TO_ATTACH_LATENCY",
|
||||
]
|
||||
|
||||
optimizer_types = []
|
||||
|
||||
# Regular expression to match the enum values
|
||||
enum_pattern = r'\s*([A-Z_]+)\s*=\s*\d+,?|\s*([A-Z_]+),?'
|
||||
|
||||
inside_enum = False
|
||||
|
||||
# open the optimizer file and extract the optimizer types
|
||||
with open(optimizer_file, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith("enum class OptimizerType"):
|
||||
inside_enum = True
|
||||
continue
|
||||
|
||||
if inside_enum and line.startswith("};"):
|
||||
break
|
||||
|
||||
if inside_enum:
|
||||
match = re.match(enum_pattern, line)
|
||||
if match:
|
||||
optimizer_type = match[1] if match[1] else match[2]
|
||||
if optimizer_type == "INVALID":
|
||||
continue
|
||||
optimizer_types.append(optimizer_type)
|
||||
|
||||
header = """//-------------------------------------------------------------------------
|
||||
// DuckDB
|
||||
//
|
||||
//
|
||||
// duckdb/common/enums/metrics_type.hpp
|
||||
//
|
||||
// This file is automatically generated by scripts/generate_metric_enums.py
|
||||
// Do not edit this file manually, your changes will be overwritten
|
||||
//-------------------------------------------------------------------------\n
|
||||
"""
|
||||
|
||||
typedefs = """struct MetricsTypeHashFunction {
|
||||
uint64_t operator()(const MetricsType &index) const {
|
||||
return std::hash<uint8_t>()(static_cast<uint8_t>(index));
|
||||
}
|
||||
};
|
||||
|
||||
typedef unordered_set<MetricsType, MetricsTypeHashFunction> profiler_settings_t;
|
||||
typedef unordered_map<MetricsType, Value, MetricsTypeHashFunction> profiler_metrics_t;
|
||||
|
||||
"""
|
||||
|
||||
get_optimizer_metric_fun = 'GetOptimizerMetrics()'
|
||||
get_phase_timing_metric_fun = 'GetPhaseTimingMetrics()'
|
||||
get_optimizer_metric_by_type_fun = 'GetOptimizerMetricByType(OptimizerType type)'
|
||||
get_optimizer_type_by_metric_fun = 'GetOptimizerTypeByMetric(MetricsType type)'
|
||||
is_optimizer_metric_fun = 'IsOptimizerMetric(MetricsType type)'
|
||||
is_phase_timing_metric_fun = 'IsPhaseTimingMetric(MetricsType type)'
|
||||
is_query_global_metric_fun = 'IsQueryGlobalMetric(MetricsType type)'
|
||||
|
||||
metrics_class = 'MetricsUtils'
|
||||
|
||||
# Write the metric type header file
|
||||
with open(metrics_header_file, "w") as f:
|
||||
f.write(header)
|
||||
|
||||
f.write('#pragma once\n\n')
|
||||
f.write('#include "duckdb/common/types/value.hpp"\n')
|
||||
f.write('#include "duckdb/common/unordered_set.hpp"\n')
|
||||
f.write('#include "duckdb/common/unordered_map.hpp"\n')
|
||||
f.write('#include "duckdb/common/constants.hpp"\n')
|
||||
f.write('#include "duckdb/common/enum_util.hpp"\n')
|
||||
f.write('#include "duckdb/common/enums/optimizer_type.hpp"\n\n')
|
||||
|
||||
f.write("namespace duckdb {\n\n")
|
||||
|
||||
f.write("enum class MetricsType : uint8_t {\n")
|
||||
|
||||
for metric in metrics:
|
||||
f.write(f" {metric},\n")
|
||||
|
||||
for metric in phase_timing_metrics:
|
||||
f.write(f" {metric},\n")
|
||||
|
||||
for metric in optimizer_types:
|
||||
f.write(f" OPTIMIZER_{metric},\n")
|
||||
|
||||
f.write("};\n\n")
|
||||
|
||||
f.write(typedefs)
|
||||
|
||||
f.write('class MetricsUtils {\n')
|
||||
f.write('public:\n')
|
||||
f.write(f' static profiler_settings_t {get_optimizer_metric_fun};\n')
|
||||
f.write(f' static profiler_settings_t {get_phase_timing_metric_fun};\n\n')
|
||||
f.write(f' static MetricsType {get_optimizer_metric_by_type_fun};\n')
|
||||
f.write(f' static OptimizerType {get_optimizer_type_by_metric_fun};\n\n')
|
||||
f.write(f' static bool {is_optimizer_metric_fun};\n')
|
||||
f.write(f' static bool {is_phase_timing_metric_fun};\n')
|
||||
f.write(f' static bool {is_query_global_metric_fun};\n')
|
||||
f.write('};\n\n')
|
||||
|
||||
f.write("} // namespace duckdb\n")
|
||||
|
||||
# Write the metric_type.cpp file
|
||||
with open(metrics_cpp_file, "w") as f:
|
||||
f.write(header)
|
||||
|
||||
f.write('#include "duckdb/common/enums/metric_type.hpp"\n')
|
||||
f.write("namespace duckdb {\n\n")
|
||||
|
||||
f.write(f'profiler_settings_t {metrics_class}::{get_optimizer_metric_fun} {{\n')
|
||||
f.write(f" return {{\n")
|
||||
for metric in optimizer_types:
|
||||
f.write(f" MetricsType::OPTIMIZER_{metric},\n")
|
||||
f.write(" };\n")
|
||||
f.write("}\n\n")
|
||||
|
||||
f.write(f'profiler_settings_t {metrics_class}::{get_phase_timing_metric_fun} {{\n')
|
||||
f.write(f" return {{\n")
|
||||
for metric in phase_timing_metrics:
|
||||
f.write(f" MetricsType::{metric},\n")
|
||||
f.write(" };\n")
|
||||
f.write("}\n\n")
|
||||
|
||||
f.write(f'MetricsType {metrics_class}::{get_optimizer_metric_by_type_fun} {{\n')
|
||||
f.write(' switch(type) {\n')
|
||||
for metric in optimizer_types:
|
||||
f.write(f" case OptimizerType::{metric}:\n")
|
||||
f.write(f" return MetricsType::OPTIMIZER_{metric};\n")
|
||||
f.write(' default:\n')
|
||||
f.write(
|
||||
' throw InternalException("OptimizerType %s cannot be converted to a MetricsType", '
|
||||
'EnumUtil::ToString(type));\n'
|
||||
)
|
||||
f.write(' };\n')
|
||||
f.write('}\n\n')
|
||||
|
||||
f.write(f'OptimizerType {metrics_class}::{get_optimizer_type_by_metric_fun} {{\n')
|
||||
f.write(' switch(type) {\n')
|
||||
for metric in optimizer_types:
|
||||
f.write(f" case MetricsType::OPTIMIZER_{metric}:\n")
|
||||
f.write(f" return OptimizerType::{metric};\n")
|
||||
f.write(' default:\n')
|
||||
f.write(' return OptimizerType::INVALID;\n')
|
||||
f.write(' };\n')
|
||||
f.write('}\n\n')
|
||||
|
||||
f.write(f'bool {metrics_class}::{is_optimizer_metric_fun} {{\n')
|
||||
f.write(' switch(type) {\n')
|
||||
for metric in optimizer_types:
|
||||
f.write(f" case MetricsType::OPTIMIZER_{metric}:\n")
|
||||
|
||||
f.write(' return true;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' return false;\n')
|
||||
f.write(' };\n')
|
||||
f.write('}\n\n')
|
||||
|
||||
f.write(f'bool {metrics_class}::{is_phase_timing_metric_fun} {{\n')
|
||||
f.write(' switch(type) {\n')
|
||||
for metric in phase_timing_metrics:
|
||||
f.write(f" case MetricsType::{metric}:\n")
|
||||
|
||||
f.write(' return true;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' return false;\n')
|
||||
f.write(' };\n')
|
||||
f.write('}\n\n')
|
||||
|
||||
f.write(f'bool {metrics_class}::{is_query_global_metric_fun} {{\n')
|
||||
f.write(' switch(type) {\n')
|
||||
for metric in query_global_metrics:
|
||||
f.write(f" case MetricsType::{metric}:\n")
|
||||
|
||||
f.write(' return true;\n')
|
||||
f.write(' default:\n')
|
||||
f.write(' return false;\n')
|
||||
f.write(' };\n')
|
||||
f.write('}\n\n')
|
||||
|
||||
f.write("} // namespace duckdb\n")
|
||||
|
||||
# Generate the test files
|
||||
test_names = ["test_default_profiling_settings", "test_custom_profiling_optimizer"]
|
||||
|
||||
test_descriptions = ["default", "custom optimizer"]
|
||||
|
||||
test_files = [os.path.join("..", "test", "sql", "pragma", "profiling", f"{name}.test") for name in test_names]
|
||||
|
||||
|
||||
def write_statement(f, statement_type, statement):
|
||||
f.write(f"statement {statement_type}\n")
|
||||
f.write(statement + "\n\n")
|
||||
|
||||
|
||||
def write_query(f, options, query):
|
||||
f.write(f"query {options}\n")
|
||||
f.write(query + "\n")
|
||||
f.write("----\n")
|
||||
|
||||
|
||||
def write_default_query(f):
|
||||
query = "SELECT unnest(['Maia', 'Thijs', 'Mark', 'Hannes', 'Tom', 'Max', 'Carlo', 'Sam', 'Tania']) AS names ORDER BY random();"
|
||||
write_statement(f, "ok", query)
|
||||
write_statement(f, "ok", "PRAGMA disable_profiling;")
|
||||
|
||||
|
||||
def write_get_custom_profiling_settings(f):
|
||||
query = """
|
||||
SELECT unnest(res) FROM (
|
||||
SELECT current_setting('custom_profiling_settings') AS raw_setting,
|
||||
raw_setting.trim('{}') AS setting,
|
||||
string_split(setting, ', ') AS res
|
||||
) ORDER BY ALL;
|
||||
""".strip()
|
||||
write_query(f, "I", query)
|
||||
|
||||
|
||||
def write_custom_profiling_optimizer(f):
|
||||
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"ALL_OPTIMIZERS\": \"true\"}';")
|
||||
|
||||
write_default_query(f)
|
||||
|
||||
query = """
|
||||
SELECT * FROM (
|
||||
SELECT unnest(res) str FROM (
|
||||
SELECT current_setting('custom_profiling_settings') as raw_setting,
|
||||
raw_setting.trim('{}') AS setting,
|
||||
string_split(setting, ', ') AS res
|
||||
)
|
||||
) WHERE '"true"' NOT in str
|
||||
ORDER BY ALL \
|
||||
""".strip()
|
||||
write_query(f, "I", query)
|
||||
f.write("\n")
|
||||
|
||||
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{}'")
|
||||
write_default_query(f)
|
||||
|
||||
write_get_custom_profiling_settings(f)
|
||||
f.write("(empty)\n\n")
|
||||
|
||||
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'")
|
||||
write_default_query(f)
|
||||
|
||||
write_get_custom_profiling_settings(f)
|
||||
f.write("\"OPTIMIZER_JOIN_ORDER\": \"true\"\n\n")
|
||||
|
||||
write_statement(
|
||||
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
||||
)
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
CASE WHEN optimizer_join_order > 0 THEN 'true'
|
||||
ELSE 'false' END
|
||||
FROM metrics_output;
|
||||
""".strip()
|
||||
write_query(f, "I", query)
|
||||
f.write("true\n\n")
|
||||
|
||||
write_statement(f, "ok", "SET disabled_optimizers = 'JOIN_ORDER';")
|
||||
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"OPTIMIZER_JOIN_ORDER\": \"true\"}'")
|
||||
write_default_query(f)
|
||||
|
||||
write_get_custom_profiling_settings(f)
|
||||
f.write("(empty)\n\n")
|
||||
|
||||
write_statement(f, "ok", "PRAGMA custom_profiling_settings='{\"CUMULATIVE_OPTIMIZER_TIMING\": \"true\"}';")
|
||||
write_default_query(f)
|
||||
|
||||
write_statement(
|
||||
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
||||
)
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
CASE WHEN cumulative_optimizer_timing > 0 THEN 'true'
|
||||
ELSE 'false' END
|
||||
FROM metrics_output;
|
||||
""".strip()
|
||||
write_query(f, "I", query)
|
||||
f.write("true\n\n")
|
||||
|
||||
f.write("# All phase timings must be collected when using detailed profiling mode.\n\n")
|
||||
|
||||
write_statement(f, "ok", "RESET custom_profiling_settings;")
|
||||
write_statement(f, "ok", "SET profiling_mode = 'detailed';")
|
||||
write_default_query(f)
|
||||
|
||||
query = """
|
||||
SELECT * FROM (
|
||||
SELECT unnest(res) str FROM (
|
||||
SELECT current_setting('custom_profiling_settings') AS raw_setting,
|
||||
raw_setting.trim('{}') AS setting,
|
||||
string_split(setting, ', ') AS res
|
||||
)
|
||||
)
|
||||
WHERE '"true"' NOT IN str
|
||||
ORDER BY ALL
|
||||
""".strip()
|
||||
write_query(f, "I", query)
|
||||
f.write("\n")
|
||||
|
||||
write_statement(f, "ok", "RESET custom_profiling_settings;")
|
||||
write_statement(f, "ok", "SET profiling_mode = 'standard';")
|
||||
|
||||
|
||||
# Create the test files
|
||||
for test_file, name, description in zip(test_files, test_names, test_descriptions):
|
||||
with open(test_file, "w") as f:
|
||||
display_name = test_file.replace("../", "")
|
||||
f.write(f"# name: {display_name}\n")
|
||||
f.write(f"# description: Test {description} profiling settings.\n")
|
||||
f.write("# group: [profiling]\n\n")
|
||||
f.write("# This file is automatically generated by scripts/generate_metric_enums.py\n")
|
||||
f.write("# Do not edit this file manually, your changes will be overwritten\n\n")
|
||||
|
||||
f.write("require json\n\n")
|
||||
|
||||
write_statement(f, "ok", "PRAGMA enable_verification;")
|
||||
write_statement(f, "ok", "PRAGMA enable_profiling = 'json';")
|
||||
write_statement(f, "ok", "PRAGMA profiling_output = '__TEST_DIR__/profiling_output.json';")
|
||||
|
||||
if name == "test_custom_profiling_optimizer":
|
||||
write_custom_profiling_optimizer(f)
|
||||
|
||||
write_default_query(f)
|
||||
|
||||
write_get_custom_profiling_settings(f)
|
||||
metrics.sort()
|
||||
|
||||
for metric in metrics:
|
||||
f.write(f'"{metric}": "true"\n')
|
||||
f.write("\n")
|
||||
|
||||
write_statement(
|
||||
f, "ok", "CREATE OR REPLACE TABLE metrics_output AS SELECT * FROM '__TEST_DIR__/profiling_output.json';"
|
||||
)
|
||||
write_statement(f, "ok", "SELECT cpu_time, extra_info, rows_returned, latency FROM metrics_output;")
|
||||
39
external/duckdb/scripts/generate_plan_storage_version.py
vendored
Normal file
39
external/duckdb/scripts/generate_plan_storage_version.py
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
# this script re-generates the binary file used for Test deserialized plans from file
|
||||
# before running this script, increment the version number in src/planner/logical_operator.cpp and
|
||||
# recompile (make debug)
|
||||
# Note that the test is not linked unless you BUILD_TPCH=1
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from python_helpers import open_utf8
|
||||
|
||||
shell_proc = os.path.join('build', 'debug', 'test', 'unittest')
|
||||
gen_binary_file = os.path.join('test', 'api', 'serialized_plans', 'serialized_plans.binary')
|
||||
|
||||
|
||||
def try_remove_file(fname):
|
||||
try:
|
||||
os.remove(fname)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
try_remove_file(gen_binary_file)
|
||||
|
||||
|
||||
def run_test(test):
|
||||
print(test)
|
||||
env = os.environ.copy()
|
||||
env["GEN_PLAN_STORAGE"] = "1"
|
||||
res = subprocess.run([shell_proc, test], capture_output=True, env=env)
|
||||
stdout = res.stdout.decode('utf8').strip()
|
||||
stderr = res.stderr.decode('utf8').strip()
|
||||
if res.returncode != 0:
|
||||
print("Failed to create binary file!")
|
||||
print("----STDOUT----")
|
||||
print(stdout)
|
||||
print("----STDERR----")
|
||||
print(stderr)
|
||||
|
||||
|
||||
run_test("Generate serialized plans file")
|
||||
30
external/duckdb/scripts/generate_presigned_url.sh
vendored
Executable file
30
external/duckdb/scripts/generate_presigned_url.sh
vendored
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
#Note: DONT run as root
|
||||
|
||||
set -e
|
||||
|
||||
DUCKDB_PATH=duckdb
|
||||
if test -f build/release/duckdb; then
|
||||
DUCKDB_PATH=build/release/duckdb
|
||||
elif test -f build/reldebug/duckdb; then
|
||||
DUCKDB_PATH=build/reldebug/duckdb
|
||||
elif test -f build/debug/duckdb; then
|
||||
DUCKDB_PATH=build/debug/duckdb
|
||||
fi
|
||||
|
||||
mkdir -p data/parquet-testing/presigned
|
||||
|
||||
generate_large_parquet_query=$(cat <<EOF
|
||||
|
||||
CALL DBGEN(sf=1);
|
||||
COPY lineitem TO 'data/parquet-testing/presigned/presigned-url-lineitem.parquet' (FORMAT 'parquet');
|
||||
|
||||
EOF
|
||||
)
|
||||
$DUCKDB_PATH -c "$generate_large_parquet_query"
|
||||
|
||||
mkdir -p data/attach_test/
|
||||
|
||||
# Generate Storage Version
|
||||
$DUCKDB_PATH data/attach_test/attach.db < test/sql/storage_version/generate_storage_version.sql
|
||||
$DUCKDB_PATH data/attach_test/lineitem_sf1.db -c "CALL dbgen(sf=1)"
|
||||
858
external/duckdb/scripts/generate_serialization.py
vendored
Normal file
858
external/duckdb/scripts/generate_serialization.py
vendored
Normal file
@@ -0,0 +1,858 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import argparse
|
||||
from enum import Enum
|
||||
|
||||
from typing import Dict, Optional, Tuple, List
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate serialization code')
|
||||
parser.add_argument('--source', type=str, help='Source directory')
|
||||
parser.add_argument('--target', type=str, help='Target directory')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
class MemberVariableStatus(Enum):
|
||||
# Both serialized and deserialized
|
||||
EXISTING = 1
|
||||
# Not serialized, but is deserialized
|
||||
READ_ONLY = 2
|
||||
# Not serialized, not deserialized
|
||||
DELETED = 3
|
||||
|
||||
|
||||
def get_file_list():
|
||||
if args.source is None:
|
||||
targets = [
|
||||
{'source': 'src/include/duckdb/storage/serialization', 'target': 'src/storage/serialization'},
|
||||
{'source': 'extension/parquet/include/', 'target': 'extension/parquet'},
|
||||
{'source': 'extension/json/include/', 'target': 'extension/json'},
|
||||
]
|
||||
else:
|
||||
targets = [
|
||||
{'source': args.source, 'target': args.target},
|
||||
]
|
||||
|
||||
file_list = []
|
||||
for target in targets:
|
||||
source_base = os.path.sep.join(target['source'].split('/'))
|
||||
target_base = os.path.sep.join(target['target'].split('/'))
|
||||
for fname in os.listdir(source_base):
|
||||
if '.json' not in fname:
|
||||
continue
|
||||
if '_enums.json' in fname:
|
||||
continue
|
||||
file_list.append(
|
||||
{
|
||||
'source': os.path.join(source_base, fname),
|
||||
'target': os.path.join(target_base, 'serialize_' + fname.replace('.json', '.cpp')),
|
||||
}
|
||||
)
|
||||
return file_list
|
||||
|
||||
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
version_map_path = os.path.join(scripts_dir, '..', 'src', 'storage', 'version_map.json')
|
||||
version_map_file = file = open(version_map_path)
|
||||
version_map = json.load(version_map_file)
|
||||
|
||||
|
||||
def verify_serialization_versions(version_map):
|
||||
serialization = version_map['serialization']['values']
|
||||
if list(serialization.keys())[-1] != 'latest':
|
||||
print(f"The version map ({version_map_path}) for serialization versions must end in 'latest'!")
|
||||
exit(1)
|
||||
|
||||
|
||||
verify_serialization_versions(version_map)
|
||||
|
||||
|
||||
def lookup_serialization_version(version: str):
|
||||
if version.lower() == "latest":
|
||||
print(
|
||||
f"'latest' is not an allowed 'version' to use in serialization JSON files, please provide a duckdb version"
|
||||
)
|
||||
|
||||
versions = version_map['serialization']['values']
|
||||
if version not in versions:
|
||||
from packaging.version import Version
|
||||
|
||||
current_version = Version(version)
|
||||
|
||||
# This version does not exist in the version map
|
||||
# Which is allowed for unreleased versions, they will get mapped to 'latest' instead
|
||||
|
||||
last_registered_version = Version(list(versions.keys())[-2])
|
||||
if current_version < last_registered_version:
|
||||
# The version was lower than the last defined version, which is not allowed
|
||||
print(
|
||||
f"Specified version ({current_version}) could not be found in the version_map.json, and it is lower than the last defined version ({last_registered_version})!"
|
||||
)
|
||||
exit(1)
|
||||
if hasattr(versions, 'latest'):
|
||||
# We have already mapped a version to 'latest', check that the versions match
|
||||
latest_version = getattr(versions, 'latest')
|
||||
if current_version != latest_version:
|
||||
print(
|
||||
f"Found more than one version that is not present in the version_map.json!: Current: {current_version}, Latest: {latest_version}"
|
||||
)
|
||||
exit(1)
|
||||
else:
|
||||
setattr(lookup_serialization_version, 'latest', current_version)
|
||||
return versions['latest']
|
||||
return versions[version]
|
||||
|
||||
|
||||
INCLUDE_FORMAT = '#include "{filename}"\n'
|
||||
|
||||
HEADER = '''//===----------------------------------------------------------------------===//
|
||||
// This file is automatically generated by scripts/generate_serialization.py
|
||||
// Do not edit this file manually, your changes will be overwritten
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
{include_list}
|
||||
namespace duckdb {{
|
||||
'''
|
||||
|
||||
FOOTER = '''
|
||||
} // namespace duckdb
|
||||
'''
|
||||
|
||||
TEMPLATED_BASE_FORMAT = '''
|
||||
template <typename {template_name}>'''
|
||||
|
||||
SERIALIZE_BASE_FORMAT = '''
|
||||
void {class_name}::Serialize(Serializer &serializer) const {{
|
||||
{members}}}
|
||||
'''
|
||||
|
||||
SERIALIZE_ELEMENT_FORMAT = (
|
||||
'\tserializer.WriteProperty<{property_type}>({property_id}, "{property_key}", {property_name}{property_default});\n'
|
||||
)
|
||||
|
||||
BASE_SERIALIZE_FORMAT = '\t{base_class_name}::Serialize(serializer);\n'
|
||||
|
||||
POINTER_RETURN_FORMAT = '{pointer}<{class_name}>'
|
||||
|
||||
DESERIALIZE_BASE_FORMAT = '''
|
||||
{deserialize_return} {class_name}::Deserialize(Deserializer &deserializer) {{
|
||||
{members}
|
||||
}}
|
||||
'''
|
||||
|
||||
SWITCH_CODE_FORMAT = '''\tswitch ({switch_variable}) {{
|
||||
{case_statements}\tdefault:
|
||||
\t\tthrow SerializationException("Unsupported type for deserialization of {base_class}!");
|
||||
\t}}
|
||||
'''
|
||||
|
||||
SET_DESERIALIZE_PARAMETER_FORMAT = '\tdeserializer.Set<{property_type}>({property_name});\n'
|
||||
UNSET_DESERIALIZE_PARAMETER_FORMAT = '\tdeserializer.Unset<{property_type}>();\n'
|
||||
GET_DESERIALIZE_PARAMETER_FORMAT = 'deserializer.Get<{property_type}>()'
|
||||
TRY_GET_DESERIALIZE_PARAMETER_FORMAT = 'deserializer.TryGet<{property_type}>()'
|
||||
|
||||
SWITCH_HEADER_FORMAT = '\tcase {enum_type}::{enum_value}:\n'
|
||||
|
||||
SWITCH_STATEMENT_FORMAT = (
|
||||
SWITCH_HEADER_FORMAT
|
||||
+ '''\t\tresult = {class_deserialize}::Deserialize(deserializer);
|
||||
\t\tbreak;
|
||||
'''
|
||||
)
|
||||
|
||||
DESERIALIZE_ELEMENT_FORMAT = '\tauto {property_name} = deserializer.ReadProperty<{property_type}>({property_id}, "{property_key}"{property_default});\n'
|
||||
DESERIALIZE_ELEMENT_BASE_FORMAT = '\tauto {property_name} = deserializer.ReadProperty<unique_ptr<{base_property}>>({property_id}, "{property_key}"{property_default});\n'
|
||||
DESERIALIZE_ELEMENT_CLASS_FORMAT = '\tdeserializer.ReadProperty<{property_type}>({property_id}, "{property_key}", result{assignment}{property_name}{property_default});\n'
|
||||
DESERIALIZE_ELEMENT_CLASS_BASE_FORMAT = '\tauto {property_name} = deserializer.ReadProperty<unique_ptr<{base_property}>>({property_id}, "{property_key}"{property_default});\n\tresult{assignment}{property_name} = unique_ptr_cast<{base_property}, {derived_property}>(std::move({property_name}));\n'
|
||||
|
||||
MOVE_LIST = [
|
||||
'string',
|
||||
'ParsedExpression*',
|
||||
'CommonTableExpressionMap',
|
||||
'LogicalType',
|
||||
'ColumnDefinition',
|
||||
'BaseStatistics',
|
||||
'BoundLimitNode',
|
||||
]
|
||||
|
||||
REFERENCE_LIST = ['ClientContext', 'bound_parameter_map_t', 'Catalog']
|
||||
|
||||
|
||||
def is_container(type):
|
||||
return '<' in type and 'CSVOption' not in type
|
||||
|
||||
|
||||
def is_pointer(type):
|
||||
return type.endswith('*') or type.startswith('shared_ptr<')
|
||||
|
||||
|
||||
def is_zeroable(type):
|
||||
return type in [
|
||||
'bool',
|
||||
'int8_t',
|
||||
'int16_t',
|
||||
'int32_t',
|
||||
'int64_t',
|
||||
'uint8_t',
|
||||
'uint16_t',
|
||||
'uint32_t',
|
||||
'uint64_t',
|
||||
'idx_t',
|
||||
'size_t',
|
||||
'int',
|
||||
]
|
||||
|
||||
|
||||
def requires_move(type):
|
||||
return is_container(type) or is_pointer(type) or type in MOVE_LIST
|
||||
|
||||
|
||||
def replace_pointer(type):
|
||||
return re.sub('([a-zA-Z0-9]+)[*]', 'unique_ptr<\\1>', type)
|
||||
|
||||
|
||||
def get_default_argument(default_value):
|
||||
return f'{default_value}'.lower() if type(default_value) == bool else f'{default_value}'
|
||||
|
||||
|
||||
def get_deserialize_element_template(
|
||||
template,
|
||||
property_name,
|
||||
property_key,
|
||||
property_id,
|
||||
property_type,
|
||||
has_default,
|
||||
default_value,
|
||||
status: MemberVariableStatus,
|
||||
pointer_type,
|
||||
):
|
||||
if status == MemberVariableStatus.READ_ONLY and not has_default:
|
||||
print("'read_only' status is not allowed without a default value")
|
||||
exit(1)
|
||||
|
||||
# read_method = 'ReadProperty'
|
||||
assignment = '.' if pointer_type == 'none' else '->'
|
||||
default_argument = '' if default_value is None else f', {get_default_argument(default_value)}'
|
||||
if status == MemberVariableStatus.DELETED:
|
||||
template = template.replace(', result{assignment}{property_name}', '').replace(
|
||||
'ReadProperty', 'ReadDeletedProperty'
|
||||
)
|
||||
elif has_default and default_value is None:
|
||||
template = template.replace('ReadProperty', 'ReadPropertyWithDefault')
|
||||
elif has_default and default_value is not None:
|
||||
template = template.replace('ReadProperty', 'ReadPropertyWithExplicitDefault')
|
||||
template = template.format(
|
||||
property_name=property_name,
|
||||
property_key=property_key,
|
||||
property_id=str(property_id),
|
||||
property_default=default_argument,
|
||||
property_type=property_type,
|
||||
assignment=assignment,
|
||||
)
|
||||
if status == MemberVariableStatus.DELETED:
|
||||
template = template.replace(f'auto {property_name} = ', '')
|
||||
return template
|
||||
|
||||
|
||||
def get_deserialize_assignment(property_name, property_type, pointer_type):
|
||||
assignment = '.' if pointer_type == 'none' else '->'
|
||||
property = property_name.replace('.', '_')
|
||||
if requires_move(property_type):
|
||||
property = f'std::move({property})'
|
||||
return f'\tresult{assignment}{property_name} = {property};\n'
|
||||
|
||||
|
||||
def get_return_value(pointer_type, class_name):
|
||||
if pointer_type == 'none':
|
||||
return class_name
|
||||
return POINTER_RETURN_FORMAT.format(pointer=pointer_type, class_name=class_name)
|
||||
|
||||
|
||||
def generate_return(class_entry):
|
||||
if class_entry.base is None or class_entry.constructor_method is not None:
|
||||
return '\treturn result;'
|
||||
else:
|
||||
return '\treturn std::move(result);'
|
||||
|
||||
|
||||
def parse_status(status: str):
|
||||
if status == 'deleted':
|
||||
return MemberVariableStatus.DELETED
|
||||
if status == 'read_only':
|
||||
return MemberVariableStatus.READ_ONLY
|
||||
if status == 'existing':
|
||||
return MemberVariableStatus.EXISTING
|
||||
valid_options = ['deleted', 'read_only', 'existing']
|
||||
valid_options_string = ", ".join(valid_options)
|
||||
print(f"Invalid 'status' ('{status}') encountered, valid options are: {valid_options_string}")
|
||||
exit(1)
|
||||
|
||||
|
||||
# FIXME: python has __slots__ for this, so it's enforced by Python itself
|
||||
# see: https://wiki.python.org/moin/UsingSlots
|
||||
supported_member_entries = [
|
||||
'id',
|
||||
'name',
|
||||
'type',
|
||||
'property',
|
||||
'serialize_property',
|
||||
'deserialize_property',
|
||||
'base',
|
||||
'default',
|
||||
'status',
|
||||
'version',
|
||||
]
|
||||
|
||||
|
||||
def has_default_by_default(type):
|
||||
if is_pointer(type):
|
||||
return True
|
||||
if is_container(type):
|
||||
if 'IndexVector' in type:
|
||||
return False
|
||||
if 'CSVOption' in type:
|
||||
return False
|
||||
return True
|
||||
if type == 'string':
|
||||
return True
|
||||
if is_zeroable(type):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class MemberVariable:
|
||||
def __init__(self, entry):
|
||||
self.id = entry['id']
|
||||
self.name = entry['name']
|
||||
self.type = entry['type']
|
||||
self.base = None
|
||||
self.has_default = False
|
||||
self.default = None
|
||||
self.status: MemberVariableStatus = MemberVariableStatus.EXISTING
|
||||
self.version: str = 'v0.10.2'
|
||||
if 'property' in entry:
|
||||
self.serialize_property = entry['property']
|
||||
self.deserialize_property = entry['property']
|
||||
else:
|
||||
self.serialize_property = self.name
|
||||
self.deserialize_property = self.name
|
||||
if 'version' in entry:
|
||||
self.version = entry['version']
|
||||
if 'serialize_property' in entry:
|
||||
self.serialize_property = entry['serialize_property']
|
||||
if 'deserialize_property' in entry:
|
||||
self.deserialize_property = entry['deserialize_property']
|
||||
if 'default' in entry:
|
||||
self.has_default = True
|
||||
self.default = entry['default']
|
||||
if 'status' in entry:
|
||||
self.status = parse_status(entry['status'])
|
||||
if self.default is None:
|
||||
# default default
|
||||
self.has_default = has_default_by_default(self.type)
|
||||
if 'base' in entry:
|
||||
self.base = entry['base']
|
||||
for key in entry.keys():
|
||||
if key not in supported_member_entries:
|
||||
print(
|
||||
f"Unsupported key \"{key}\" in member variable, key should be in set {str(supported_member_entries)}"
|
||||
)
|
||||
|
||||
|
||||
supported_serialize_entries = [
|
||||
'class',
|
||||
'class_type',
|
||||
'pointer_type',
|
||||
'base',
|
||||
'enum',
|
||||
'constructor',
|
||||
'constructor_method',
|
||||
'custom_implementation',
|
||||
'custom_switch_code',
|
||||
'members',
|
||||
'return_type',
|
||||
'set_parameters',
|
||||
'includes',
|
||||
'finalize_deserialization',
|
||||
]
|
||||
|
||||
|
||||
class SerializableClass:
|
||||
def __init__(self, entry):
|
||||
self.name = entry['class']
|
||||
self.is_base_class = 'class_type' in entry
|
||||
self.base = None
|
||||
self.base_object = None
|
||||
self.enum_value = None
|
||||
self.enum_entries = []
|
||||
self.set_parameter_names = []
|
||||
self.set_parameters = []
|
||||
self.pointer_type = 'unique_ptr'
|
||||
self.constructor: Optional[List[str]] = None
|
||||
self.constructor_method = None
|
||||
self.members: Optional[List[MemberVariable]] = None
|
||||
self.custom_implementation = False
|
||||
self.custom_switch_code = None
|
||||
self.children: Dict[str, SerializableClass] = {}
|
||||
self.return_type = self.name
|
||||
self.return_class = self.name
|
||||
self.finalize_deserialization = None
|
||||
if 'finalize_deserialization' in entry:
|
||||
self.finalize_deserialization = entry['finalize_deserialization']
|
||||
if self.is_base_class:
|
||||
self.enum_value = entry['class_type']
|
||||
if 'pointer_type' in entry:
|
||||
self.pointer_type = entry['pointer_type']
|
||||
if 'base' in entry:
|
||||
self.base = entry['base']
|
||||
self.enum_entries = entry['enum']
|
||||
if type(self.enum_entries) is str:
|
||||
self.enum_entries = [self.enum_entries]
|
||||
self.return_type = self.base
|
||||
if 'constructor' in entry:
|
||||
self.constructor = entry['constructor']
|
||||
if not isinstance(self.constructor, list):
|
||||
print(f"constructor for {self.name}, must be of type [], but is of type {str(type(self.constructor))}")
|
||||
exit(1)
|
||||
if 'constructor_method' in entry:
|
||||
self.constructor_method = entry['constructor_method']
|
||||
if self.constructor is not None:
|
||||
print(
|
||||
"Not allowed to mix 'constructor_method' and 'constructor', 'constructor_method' will implicitly receive all parameters"
|
||||
)
|
||||
exit(1)
|
||||
if 'custom_implementation' in entry and entry['custom_implementation']:
|
||||
self.custom_implementation = True
|
||||
if 'custom_switch_code' in entry:
|
||||
self.custom_switch_code = entry['custom_switch_code']
|
||||
if 'members' in entry:
|
||||
self.members = [MemberVariable(x) for x in entry['members']]
|
||||
if 'return_type' in entry:
|
||||
self.return_type = entry['return_type']
|
||||
self.return_class = self.return_type
|
||||
if 'set_parameters' in entry:
|
||||
self.set_parameter_names = entry['set_parameters']
|
||||
for set_parameter_name in self.set_parameter_names:
|
||||
found = False
|
||||
assert self.members is not None
|
||||
for member in self.members:
|
||||
if member.name == set_parameter_name:
|
||||
self.set_parameters.append(member)
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
raise Exception(f'Set parameter {set_parameter_name} not found in member list')
|
||||
for key in entry.keys():
|
||||
if key not in supported_serialize_entries:
|
||||
print(
|
||||
f"Unsupported key \"{key}\" in member variable, key should be in set {str(supported_serialize_entries)}"
|
||||
)
|
||||
|
||||
def inherit(self, base_class):
|
||||
self.base_object = base_class
|
||||
self.pointer_type = base_class.pointer_type
|
||||
|
||||
def get_deserialize_element(
|
||||
self, entry: MemberVariable, *, base: Optional[str] = None, pointer_type: Optional[str] = None
|
||||
):
|
||||
property_name = entry.deserialize_property
|
||||
property_id = entry.id
|
||||
property_key = entry.name
|
||||
property_type = replace_pointer(entry.type)
|
||||
if not pointer_type:
|
||||
pointer_type = self.pointer_type
|
||||
|
||||
property_name = property_name.replace('.', '_')
|
||||
template = DESERIALIZE_ELEMENT_FORMAT
|
||||
if base:
|
||||
template = DESERIALIZE_ELEMENT_BASE_FORMAT.replace('{base_property}', base.replace('*', ''))
|
||||
|
||||
return get_deserialize_element_template(
|
||||
template,
|
||||
property_name,
|
||||
property_key,
|
||||
property_id,
|
||||
property_type,
|
||||
entry.has_default,
|
||||
entry.default,
|
||||
entry.status,
|
||||
pointer_type,
|
||||
)
|
||||
|
||||
def get_serialize_element(self, entry: MemberVariable):
|
||||
property_name = entry.serialize_property
|
||||
property_id = entry.id
|
||||
property_key = entry.name
|
||||
property_type = replace_pointer(entry.type)
|
||||
default_value = entry.default
|
||||
|
||||
assignment = '.' if self.pointer_type == 'none' else '->'
|
||||
default_argument = '' if default_value is None else f', {get_default_argument(default_value)}'
|
||||
storage_version = lookup_serialization_version(entry.version)
|
||||
conditional_serialization = storage_version != 1
|
||||
template = SERIALIZE_ELEMENT_FORMAT
|
||||
if entry.status != MemberVariableStatus.EXISTING and not conditional_serialization:
|
||||
template = "\t/* [Deleted] ({property_type}) \"{property_name}\" */\n"
|
||||
elif entry.has_default:
|
||||
template = template.replace('WriteProperty', 'WritePropertyWithDefault')
|
||||
serialization_code = template.format(
|
||||
property_name=property_name,
|
||||
property_type=property_type,
|
||||
property_id=str(property_id),
|
||||
property_key=property_key,
|
||||
property_default=default_argument,
|
||||
assignment=assignment,
|
||||
)
|
||||
|
||||
if conditional_serialization:
|
||||
code = []
|
||||
if entry.status != MemberVariableStatus.EXISTING:
|
||||
# conditional delete
|
||||
code.append(f'\tif (!serializer.ShouldSerialize({storage_version})) {{')
|
||||
else:
|
||||
# conditional serialization
|
||||
code.append(f'\tif (serializer.ShouldSerialize({storage_version})) {{')
|
||||
code.append('\t' + serialization_code)
|
||||
|
||||
result = '\n'.join(code) + '\t}\n'
|
||||
return result
|
||||
return serialization_code
|
||||
|
||||
def generate_constructor(self, constructor_parameters: List[str]):
|
||||
parameters = ", ".join(constructor_parameters)
|
||||
|
||||
if self.constructor_method is not None:
|
||||
return f'\tauto result = {self.constructor_method}({parameters});\n'
|
||||
if self.pointer_type == 'none':
|
||||
if parameters != '':
|
||||
parameters = f'({parameters})'
|
||||
return f'\t{self.return_class} result{parameters};\n'
|
||||
return f'\tauto result = duckdb::{self.pointer_type}<{self.return_class}>(new {self.return_class}({parameters}));\n'
|
||||
|
||||
|
||||
def generate_base_class_code(base_class: SerializableClass):
|
||||
base_class_serialize = ''
|
||||
base_class_deserialize = ''
|
||||
|
||||
# properties
|
||||
enum_type = ''
|
||||
for entry in base_class.members:
|
||||
if entry.serialize_property == base_class.enum_value:
|
||||
enum_type = entry.type
|
||||
base_class_serialize += base_class.get_serialize_element(entry)
|
||||
|
||||
type_name = replace_pointer(entry.type)
|
||||
base_class_deserialize += base_class.get_deserialize_element(entry)
|
||||
expressions = [x for x in base_class.children.items()]
|
||||
expressions = sorted(expressions, key=lambda x: x[0])
|
||||
|
||||
# set parameters
|
||||
for entry in base_class.set_parameters:
|
||||
base_class_deserialize += SET_DESERIALIZE_PARAMETER_FORMAT.format(
|
||||
property_type=entry.type, property_name=entry.name
|
||||
)
|
||||
|
||||
base_class_deserialize += f'\t{base_class.pointer_type}<{base_class.name}> result;\n'
|
||||
switch_cases = ''
|
||||
for expr in expressions:
|
||||
enum_value = expr[0]
|
||||
child_data = expr[1]
|
||||
if child_data.custom_switch_code is not None:
|
||||
switch_cases += SWITCH_HEADER_FORMAT.format(
|
||||
enum_type=enum_type, enum_value=enum_value, class_deserialize=child_data.name
|
||||
)
|
||||
switch_cases += '\n'.join(
|
||||
['\t\t' + x for x in child_data.custom_switch_code.replace('\\n', '\n').split('\n')]
|
||||
)
|
||||
switch_cases += '\n'
|
||||
continue
|
||||
switch_cases += SWITCH_STATEMENT_FORMAT.format(
|
||||
enum_type=enum_type, enum_value=enum_value, class_deserialize=child_data.name
|
||||
)
|
||||
|
||||
assign_entries = []
|
||||
for entry in base_class.members:
|
||||
skip = False
|
||||
for check_entry in [entry.name, entry.serialize_property]:
|
||||
if check_entry in base_class.set_parameter_names:
|
||||
skip = True
|
||||
if check_entry == base_class.enum_value:
|
||||
skip = True
|
||||
if skip:
|
||||
continue
|
||||
assign_entries.append(entry)
|
||||
|
||||
# class switch statement
|
||||
base_class_deserialize += SWITCH_CODE_FORMAT.format(
|
||||
switch_variable=base_class.enum_value, case_statements=switch_cases, base_class=base_class.name
|
||||
)
|
||||
|
||||
deserialize_return = get_return_value(base_class.pointer_type, base_class.return_type)
|
||||
|
||||
for entry in base_class.set_parameters:
|
||||
base_class_deserialize += UNSET_DESERIALIZE_PARAMETER_FORMAT.format(property_type=entry.type)
|
||||
|
||||
for entry in assign_entries:
|
||||
if entry.status != MemberVariableStatus.EXISTING:
|
||||
continue
|
||||
move = False
|
||||
if entry.type in MOVE_LIST or is_container(entry.type) or is_pointer(entry.type):
|
||||
move = True
|
||||
if move:
|
||||
base_class_deserialize += (
|
||||
f'\tresult->{entry.deserialize_property} = std::move({entry.deserialize_property});\n'
|
||||
)
|
||||
else:
|
||||
base_class_deserialize += f'\tresult->{entry.deserialize_property} = {entry.deserialize_property};\n'
|
||||
if base_class.finalize_deserialization is not None:
|
||||
for line in base_class.finalize_deserialization:
|
||||
base_class_deserialize += "\t" + line + "\n"
|
||||
base_class_deserialize += generate_return(base_class)
|
||||
base_class_generation = ''
|
||||
serialization = ''
|
||||
if base_class.base is not None:
|
||||
serialization += BASE_SERIALIZE_FORMAT.format(base_class_name=base_class.base)
|
||||
base_class_generation += SERIALIZE_BASE_FORMAT.format(
|
||||
class_name=base_class.name, members=serialization + base_class_serialize
|
||||
)
|
||||
base_class_generation += DESERIALIZE_BASE_FORMAT.format(
|
||||
deserialize_return=deserialize_return, class_name=base_class.name, members=base_class_deserialize
|
||||
)
|
||||
return base_class_generation
|
||||
|
||||
|
||||
def generate_class_code(class_entry: SerializableClass):
|
||||
if class_entry.custom_implementation:
|
||||
return None
|
||||
class_serialize = ''
|
||||
class_deserialize = ''
|
||||
|
||||
constructor_parameters: List[str] = []
|
||||
constructor_entries = set()
|
||||
last_constructor_index = -1
|
||||
if class_entry.constructor is not None:
|
||||
for constructor_entry_ in class_entry.constructor:
|
||||
if constructor_entry_.endswith('&'):
|
||||
constructor_entry = constructor_entry_[:-1]
|
||||
is_reference = True
|
||||
else:
|
||||
constructor_entry = constructor_entry_
|
||||
is_reference = False
|
||||
constructor_entries.add(constructor_entry)
|
||||
found = False
|
||||
for entry_idx, entry in enumerate(class_entry.members):
|
||||
if entry.name == constructor_entry:
|
||||
if entry_idx > last_constructor_index:
|
||||
last_constructor_index = entry_idx
|
||||
type_name = replace_pointer(entry.type)
|
||||
entry.deserialize_property = entry.deserialize_property.replace('.', '_')
|
||||
if requires_move(type_name) and not is_reference:
|
||||
constructor_parameters.append(f'std::move({entry.deserialize_property})')
|
||||
else:
|
||||
constructor_parameters.append(entry.deserialize_property)
|
||||
found = True
|
||||
break
|
||||
|
||||
if constructor_entry.startswith('$') or constructor_entry.startswith('?'):
|
||||
is_optional = constructor_entry.startswith('?')
|
||||
if is_optional:
|
||||
param_type = constructor_entry.replace('?', '')
|
||||
get_format = TRY_GET_DESERIALIZE_PARAMETER_FORMAT
|
||||
else:
|
||||
param_type = constructor_entry.replace('$', '')
|
||||
get_format = GET_DESERIALIZE_PARAMETER_FORMAT
|
||||
if param_type in REFERENCE_LIST:
|
||||
param_type += ' &'
|
||||
constructor_parameters.append(get_format.format(property_type=param_type))
|
||||
found = True
|
||||
|
||||
if class_entry.base_object is not None:
|
||||
for entry in class_entry.base_object.set_parameters:
|
||||
if entry.name == constructor_entry:
|
||||
constructor_parameters.append(GET_DESERIALIZE_PARAMETER_FORMAT.format(property_type=entry.type))
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
print(f"Constructor member \"{constructor_entry}\" was not found in members list")
|
||||
exit(1)
|
||||
elif class_entry.constructor_method is not None:
|
||||
for entry_idx, entry in enumerate(class_entry.members):
|
||||
if entry_idx > last_constructor_index:
|
||||
last_constructor_index = entry_idx
|
||||
constructor_entries.add(entry.name)
|
||||
type_name = replace_pointer(entry.type)
|
||||
entry.deserialize_property = entry.deserialize_property.replace('.', '_')
|
||||
if requires_move(type_name):
|
||||
constructor_parameters.append(f'std::move({entry.deserialize_property})')
|
||||
else:
|
||||
constructor_parameters.append(entry.deserialize_property)
|
||||
|
||||
if class_entry.base is not None:
|
||||
class_serialize += BASE_SERIALIZE_FORMAT.format(base_class_name=class_entry.base)
|
||||
for entry_idx in range(last_constructor_index + 1):
|
||||
entry = class_entry.members[entry_idx]
|
||||
class_deserialize += class_entry.get_deserialize_element(entry, base=entry.base, pointer_type='unique_ptr')
|
||||
|
||||
class_deserialize += class_entry.generate_constructor(constructor_parameters)
|
||||
if class_entry.members is None:
|
||||
return None
|
||||
for entry_idx, entry in enumerate(class_entry.members):
|
||||
write_property_name = entry.serialize_property
|
||||
deserialize_template_str = DESERIALIZE_ELEMENT_CLASS_FORMAT
|
||||
if entry.base:
|
||||
deserialize_template_str = DESERIALIZE_ELEMENT_CLASS_BASE_FORMAT.replace(
|
||||
'{base_property}', entry.base.replace('*', '')
|
||||
).replace('{derived_property}', entry.type.replace('*', ''))
|
||||
|
||||
class_serialize += class_entry.get_serialize_element(entry)
|
||||
|
||||
type_name = replace_pointer(entry.type)
|
||||
if entry_idx > last_constructor_index:
|
||||
class_deserialize += get_deserialize_element_template(
|
||||
deserialize_template_str,
|
||||
entry.deserialize_property,
|
||||
entry.name,
|
||||
entry.id,
|
||||
type_name,
|
||||
entry.has_default,
|
||||
entry.default,
|
||||
entry.status,
|
||||
class_entry.pointer_type,
|
||||
)
|
||||
elif entry.name not in constructor_entries and entry.status == MemberVariableStatus.EXISTING:
|
||||
class_deserialize += get_deserialize_assignment(
|
||||
entry.deserialize_property, entry.type, class_entry.pointer_type
|
||||
)
|
||||
if entry.name in class_entry.set_parameter_names and entry.status == MemberVariableStatus.EXISTING:
|
||||
class_deserialize += SET_DESERIALIZE_PARAMETER_FORMAT.format(
|
||||
property_type=entry.type, property_name=entry.name
|
||||
)
|
||||
|
||||
for entry in class_entry.set_parameters:
|
||||
class_deserialize += UNSET_DESERIALIZE_PARAMETER_FORMAT.format(
|
||||
property_type=entry.type, property_name=entry.name
|
||||
)
|
||||
if class_entry.finalize_deserialization is not None:
|
||||
class_deserialize += class_entry.finalize_deserialization
|
||||
if class_entry.finalize_deserialization is not None:
|
||||
for line in class_entry.finalize_deserialization:
|
||||
class_deserialize += "\t" + line + "\n"
|
||||
class_deserialize += generate_return(class_entry)
|
||||
deserialize_return = get_return_value(class_entry.pointer_type, class_entry.return_type)
|
||||
|
||||
class_generation = ''
|
||||
pattern = re.compile(r'<\w+>')
|
||||
templated_type = ''
|
||||
|
||||
# Check if is a templated class
|
||||
is_templated = pattern.search(class_entry.name)
|
||||
if is_templated:
|
||||
templated_type = TEMPLATED_BASE_FORMAT.format(template_name=is_templated.group()[1:-1])
|
||||
|
||||
class_generation += templated_type + SERIALIZE_BASE_FORMAT.format(
|
||||
class_name=class_entry.name, members=class_serialize
|
||||
)
|
||||
|
||||
class_generation += templated_type + DESERIALIZE_BASE_FORMAT.format(
|
||||
deserialize_return=deserialize_return,
|
||||
class_name=class_entry.name,
|
||||
members=class_deserialize,
|
||||
)
|
||||
return class_generation
|
||||
|
||||
|
||||
def check_children_for_duplicate_members(node: SerializableClass, parents: list, seen_names: set, seen_ids: set):
|
||||
# Check for duplicate names
|
||||
if node.members is not None:
|
||||
for member in node.members:
|
||||
if member.name in seen_names:
|
||||
# Print the inheritance tree
|
||||
exit(
|
||||
f"Error: Duplicate member name \"{member.name}\" in class \"{node.name}\" ({' -> '.join(map(lambda x: x.name, parents))} -> {node.name})"
|
||||
)
|
||||
seen_names.add(member.name)
|
||||
if member.id in seen_ids:
|
||||
exit(
|
||||
f"Error: Duplicate member id \"{member.id}\" in class \"{node.name}\" ({' -> '.join(map(lambda x: x.name, parents))} -> {node.name})"
|
||||
)
|
||||
seen_ids.add(member.id)
|
||||
|
||||
# Recurse
|
||||
for child in node.children.values():
|
||||
check_children_for_duplicate_members(child, parents + [node], seen_names.copy(), seen_ids.copy())
|
||||
|
||||
|
||||
file_list = get_file_list()
|
||||
|
||||
for entry in file_list:
|
||||
source_path = entry['source']
|
||||
target_path = entry['target']
|
||||
with open(source_path, 'r') as f:
|
||||
try:
|
||||
json_data = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Failed to parse {source_path}: {str(e)}")
|
||||
exit(1)
|
||||
|
||||
include_list = [
|
||||
'duckdb/common/serializer/serializer.hpp',
|
||||
'duckdb/common/serializer/deserializer.hpp',
|
||||
]
|
||||
base_classes: List[SerializableClass] = []
|
||||
classes: List[SerializableClass] = []
|
||||
base_class_data: Dict[str, SerializableClass] = {}
|
||||
|
||||
for entry in json_data:
|
||||
if 'includes' in entry:
|
||||
if type(entry['includes']) != type([]):
|
||||
print(f"Include list must be a list, found {type(entry['includes'])} (in {str(entry)})")
|
||||
exit(1)
|
||||
for include_entry in entry['includes']:
|
||||
if include_entry not in include_list:
|
||||
include_list.append(include_entry)
|
||||
new_class = SerializableClass(entry)
|
||||
if new_class.is_base_class:
|
||||
# this class is a base class itself - construct the base class list
|
||||
if new_class.name in base_class_data:
|
||||
raise Exception(f"Duplicate base class \"{new_class.name}\"")
|
||||
base_class_data[new_class.name] = new_class
|
||||
base_classes.append(new_class)
|
||||
else:
|
||||
classes.append(new_class)
|
||||
if new_class.base is not None:
|
||||
# this class inherits from a base class - add the enum value
|
||||
if new_class.base not in base_class_data:
|
||||
raise Exception(f"Unknown base class \"{new_class.base}\" for entry \"{new_class.name}\"")
|
||||
base_class_object = base_class_data[new_class.base]
|
||||
new_class.inherit(base_class_object)
|
||||
for enum_entry in new_class.enum_entries:
|
||||
if enum_entry in base_class_object.children:
|
||||
raise Exception(f"Duplicate enum entry \"{enum_entry}\"")
|
||||
base_class_object.children[enum_entry] = new_class
|
||||
|
||||
# Ensure that there are no duplicate names in the inheritance tree
|
||||
for base_class in base_classes:
|
||||
if base_class.base is None:
|
||||
# Root base class, now traverse the children
|
||||
check_children_for_duplicate_members(base_class, [], set(), set())
|
||||
|
||||
with open(target_path, 'w+') as f:
|
||||
include_list = ''.join([INCLUDE_FORMAT.format(filename=x) for x in include_list])
|
||||
header = HEADER.format(include_list=include_list)
|
||||
f.write(header)
|
||||
|
||||
# generate the base class serialization
|
||||
for base_class in base_classes:
|
||||
base_class_generation = generate_base_class_code(base_class)
|
||||
f.write(base_class_generation)
|
||||
|
||||
# generate the class serialization
|
||||
classes = sorted(classes, key=lambda x: x.name)
|
||||
for class_entry in classes:
|
||||
class_generation = generate_class_code(class_entry)
|
||||
if class_generation is None:
|
||||
continue
|
||||
f.write(class_generation)
|
||||
|
||||
f.write(FOOTER)
|
||||
10
external/duckdb/scripts/generate_settings.py
vendored
Normal file
10
external/duckdb/scripts/generate_settings.py
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
from settings_scripts import parse_and_sort_json_file, update_header_file, update_scopes, update_src_code
|
||||
from settings_scripts.config import SettingsList, make_format
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_and_sort_json_file()
|
||||
update_header_file()
|
||||
update_scopes()
|
||||
update_src_code()
|
||||
make_format()
|
||||
print(f"- Successfully parsed and included {len(SettingsList)} setting(s)!")
|
||||
77
external/duckdb/scripts/generate_storage_info.py
vendored
Normal file
77
external/duckdb/scripts/generate_storage_info.py
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
VERSION_MAP_PATH = scripts_dir + "/../src/storage/version_map.json"
|
||||
STORAGE_INFO_PATH = scripts_dir + "/../src/storage/storage_info.cpp"
|
||||
START_MARKER = "// START OF {type} VERSION INFO"
|
||||
END_MARKER = "// END OF {type} VERSION INFO"
|
||||
|
||||
|
||||
def generate_version_info_array(storage_versions, type, name, default):
|
||||
result = []
|
||||
name_upper = name.upper()
|
||||
if 'latest' in storage_versions:
|
||||
latest_value = storage_versions['latest']
|
||||
result.append(f"const uint64_t LATEST_{name_upper} = {latest_value};")
|
||||
|
||||
result.append(f"const uint64_t DEFAULT_{name_upper} = {default};")
|
||||
|
||||
result.append(f"static const {type} {name}[] = {{")
|
||||
|
||||
for version_name, storage_version in storage_versions.items():
|
||||
result.append(f'\t{{"{version_name}", {storage_version}}},')
|
||||
|
||||
result.append("\t{nullptr, 0}")
|
||||
result.append("};\n")
|
||||
|
||||
return "\n".join(result)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
with open(VERSION_MAP_PATH, 'r') as json_file:
|
||||
version_map = json.load(json_file)
|
||||
|
||||
with open(STORAGE_INFO_PATH, "r") as cpp_file:
|
||||
content = cpp_file.read()
|
||||
|
||||
for key in version_map['serialization']['values'].keys():
|
||||
if key in ['latest']:
|
||||
continue
|
||||
if key not in version_map['storage']['values'].keys():
|
||||
print(f'Key {key} found in serialization version but not in storage version')
|
||||
exit(1)
|
||||
types = ['storage', 'serialization']
|
||||
for type in version_map:
|
||||
if type not in types:
|
||||
print("Unexpected key {type}")
|
||||
exit(1)
|
||||
capitalized_type = type.capitalize()
|
||||
upper_type = type.upper()
|
||||
array_code = generate_version_info_array(
|
||||
version_map[type]['values'],
|
||||
f'{capitalized_type}VersionInfo',
|
||||
f'{type}_version_info',
|
||||
version_map[type]['default'],
|
||||
)
|
||||
|
||||
start_marker = START_MARKER.format(type=upper_type)
|
||||
start_index = content.find(start_marker)
|
||||
if start_index == -1:
|
||||
print(f"storage_info.cpp is corrupted, could not find the START_MARKER for {type}")
|
||||
exit(1)
|
||||
|
||||
end_marker = END_MARKER.format(type=upper_type)
|
||||
end_index = content.find(end_marker)
|
||||
if end_index == -1:
|
||||
print(f"storage_info.cpp is corrupted, could not find the END_MARKER for {type}")
|
||||
exit(1)
|
||||
content = content[: start_index + len(start_marker)] + "\n" + array_code + content[end_index:]
|
||||
|
||||
with open(STORAGE_INFO_PATH, "w") as cpp_file:
|
||||
cpp_file.write(content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
49
external/duckdb/scripts/generate_storage_version.py
vendored
Normal file
49
external/duckdb/scripts/generate_storage_version.py
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
# this script re-generates the storage used for storage_version.test_slow
|
||||
# before running this script, increment the version number in src/storage/storage_info.cpp and recompile (`make`)
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from python_helpers import open_utf8
|
||||
|
||||
shell_proc = os.path.join('build', 'release', 'duckdb')
|
||||
|
||||
gen_storage_script = os.path.join('test', 'sql', 'storage_version', 'generate_storage_version.sql')
|
||||
gen_storage_target = os.path.join('test', 'sql', 'storage_version', 'storage_version.db')
|
||||
|
||||
|
||||
def try_remove_file(fname):
|
||||
try:
|
||||
os.remove(fname)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
try_remove_file(gen_storage_target)
|
||||
try_remove_file(gen_storage_target + '.wal')
|
||||
|
||||
|
||||
def run_command_in_shell(cmd):
|
||||
print(cmd)
|
||||
res = subprocess.run(
|
||||
[shell_proc, '--batch', '-init', '/dev/null', gen_storage_target],
|
||||
capture_output=True,
|
||||
input=bytearray(cmd, 'utf8'),
|
||||
)
|
||||
stdout = res.stdout.decode('utf8').strip()
|
||||
stderr = res.stderr.decode('utf8').strip()
|
||||
if res.returncode != 0:
|
||||
print("Failed to create database file!")
|
||||
print("----STDOUT----")
|
||||
print(stdout)
|
||||
print("----STDERR----")
|
||||
print(stderr)
|
||||
|
||||
|
||||
with open_utf8(gen_storage_script, 'r') as f:
|
||||
cmd = f.read()
|
||||
|
||||
run_command_in_shell(cmd)
|
||||
run_command_in_shell('select * from integral_values')
|
||||
run_command_in_shell('select * from integral_values')
|
||||
|
||||
try_remove_file(gen_storage_target + '.wal')
|
||||
137
external/duckdb/scripts/generate_tpcds_results.py
vendored
Normal file
137
external/duckdb/scripts/generate_tpcds_results.py
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
import psycopg2
|
||||
import argparse
|
||||
import os
|
||||
import platform
|
||||
import shutil
|
||||
import sys
|
||||
import subprocess
|
||||
import multiprocessing.pool
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate TPC-DS reference results from Postgres.')
|
||||
parser.add_argument(
|
||||
'--sf', dest='sf', action='store', help='The TPC-DS scale factor reference results to generate', default=1
|
||||
)
|
||||
parser.add_argument(
|
||||
'--query-dir',
|
||||
dest='query_dir',
|
||||
action='store',
|
||||
help='The directory with queries to run',
|
||||
default='extension/tpcds/dsdgen/queries',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--answer-dir',
|
||||
dest='answer_dir',
|
||||
action='store',
|
||||
help='The directory where to store the answers',
|
||||
default='extension/tpcds/dsdgen/answers/sf${SF}',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--duckdb-path',
|
||||
dest='duckdb_path',
|
||||
action='store',
|
||||
help='The path to the DuckDB executable',
|
||||
default='build/reldebug/duckdb',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--skip-load',
|
||||
dest='skip_load',
|
||||
action='store_const',
|
||||
const=True,
|
||||
help='Whether or not to skip loading',
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--query-list', dest='query_list', action='store', help='The list of queries to run (default = all)', default=''
|
||||
)
|
||||
parser.add_argument('--nthreads', dest='nthreads', action='store', type=int, help='The number of threads', default=0)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
con = psycopg2.connect(database='postgres')
|
||||
c = con.cursor()
|
||||
if not args.skip_load:
|
||||
tpcds_dir = f'tpcds_sf{args.sf}'
|
||||
|
||||
q = f"""
|
||||
CALL dsdgen(sf={args.sf});
|
||||
EXPORT DATABASE '{tpcds_dir}' (DELIMITER '|');
|
||||
"""
|
||||
proc = subprocess.Popen([args.duckdb_path, "-c", q])
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
exit(1)
|
||||
|
||||
# drop the previous tables
|
||||
tables = [
|
||||
'name',
|
||||
'web_site',
|
||||
'web_sales',
|
||||
'web_returns',
|
||||
'web_page',
|
||||
'warehouse',
|
||||
'time_dim',
|
||||
'store_sales',
|
||||
'store_returns',
|
||||
'store',
|
||||
'ship_mode',
|
||||
'reason',
|
||||
'promotion',
|
||||
'item',
|
||||
'inventory',
|
||||
'income_band',
|
||||
'household_demographics',
|
||||
'date_dim',
|
||||
'customer_demographics',
|
||||
'customer_address',
|
||||
'customer',
|
||||
'catalog_sales',
|
||||
'catalog_returns',
|
||||
'catalog_page',
|
||||
'call_center',
|
||||
]
|
||||
for table in tables:
|
||||
c.execute(f'DROP TABLE IF EXISTS {table};')
|
||||
|
||||
with open(os.path.join(tpcds_dir, 'schema.sql'), 'r') as f:
|
||||
schema = f.read()
|
||||
|
||||
c.execute(schema)
|
||||
|
||||
with open(os.path.join(tpcds_dir, 'load.sql'), 'r') as f:
|
||||
load = f.read()
|
||||
|
||||
load = load.replace(f'{tpcds_dir}/', f'{os.getcwd()}/{tpcds_dir}/')
|
||||
|
||||
c.execute(load)
|
||||
|
||||
con.commit()
|
||||
|
||||
# get a list of all queries
|
||||
queries = os.listdir(args.query_dir)
|
||||
queries.sort()
|
||||
|
||||
answer_dir = args.answer_dir.replace('${SF}', args.sf)
|
||||
|
||||
if len(args.query_list) > 0:
|
||||
passing_queries = [x + '.sql' for x in args.query_list.split(',')]
|
||||
queries = [x for x in queries if x in passing_queries]
|
||||
queries.sort()
|
||||
|
||||
|
||||
def run_query(q):
|
||||
print(q)
|
||||
with open(os.path.join(args.query_dir, q), 'r') as f:
|
||||
sql_query = f.read()
|
||||
answer_path = os.path.join(os.getcwd(), answer_dir, q.replace('.sql', '.csv'))
|
||||
c.execute(f'DROP TABLE IF EXISTS "query_result{q}"')
|
||||
c.execute(f'CREATE TABLE "query_result{q}" AS ' + sql_query)
|
||||
c.execute(f"COPY \"query_result{q}\" TO '{answer_path}' (FORMAT CSV, DELIMITER '|', HEADER, NULL 'NULL')")
|
||||
|
||||
|
||||
if args.nthreads == 0:
|
||||
for q in queries:
|
||||
run_query(q)
|
||||
else:
|
||||
pool = multiprocessing.pool.ThreadPool(processes=args.nthreads)
|
||||
|
||||
pool.map(run_query, queries)
|
||||
116
external/duckdb/scripts/generate_tpcds_schema.py
vendored
Normal file
116
external/duckdb/scripts/generate_tpcds_schema.py
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
duckdb_program = '/Users/myth/Programs/duckdb-bugfix/build/release/duckdb'
|
||||
|
||||
struct_def = '''struct $STRUCT_NAME {
|
||||
static constexpr char *Name = "$NAME";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = $COLUMN_COUNT;
|
||||
static const LogicalType Types[];
|
||||
static constexpr idx_t PrimaryKeyCount = $PK_COLUMN_COUNT;
|
||||
static const char *PrimaryKeyColumns[];
|
||||
};
|
||||
'''
|
||||
|
||||
initcode = '''
|
||||
call dsdgen(sf=0);
|
||||
.mode csv
|
||||
.header 0
|
||||
'''
|
||||
|
||||
column_count_query = '''
|
||||
select count(*) from pragma_table_info('$NAME');
|
||||
'''
|
||||
|
||||
pk_column_count_query = '''
|
||||
select count(*) from pragma_table_info('$NAME') where pk=true;
|
||||
'''
|
||||
|
||||
gen_names = '''
|
||||
select concat('const char *', '$STRUCT_NAME', '::Columns[] = {', STRING_AGG('"' || name || '"', ', ') || '};') from pragma_table_info('$NAME');
|
||||
'''
|
||||
|
||||
gen_types = '''
|
||||
select concat('const LogicalType ', '$STRUCT_NAME', '::Types[] = {', STRING_AGG('LogicalType::' || type, ', ') || '};') from pragma_table_info('$NAME');
|
||||
'''
|
||||
|
||||
pk_columns = '''
|
||||
select concat('const char *', '$STRUCT_NAME', '::PrimaryKeyColumns[] = {', STRING_AGG('"' || name || '"', ', ') || '};') from pragma_table_info('$NAME') where pk=true;
|
||||
'''
|
||||
|
||||
|
||||
def run_query(sql):
|
||||
input_sql = initcode + '\n' + sql
|
||||
res = subprocess.run(duckdb_program, input=input_sql.encode('utf8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
stdout = res.stdout.decode('utf8').strip()
|
||||
stderr = res.stderr.decode('utf8').strip()
|
||||
if res.returncode != 0:
|
||||
print("FAILED TO RUN QUERY")
|
||||
print(stderr)
|
||||
exit(1)
|
||||
return stdout
|
||||
|
||||
|
||||
def prepare_query(sql, table_name, struct_name):
|
||||
return sql.replace('$NAME', table_name).replace('$STRUCT_NAME', struct_name)
|
||||
|
||||
|
||||
header = '''
|
||||
#pragma once
|
||||
|
||||
#include "duckdb.hpp"
|
||||
|
||||
#ifndef DUCKDB_AMALGAMATION
|
||||
#include "duckdb/common/exception.hpp"
|
||||
#include "duckdb/common/types/date.hpp"
|
||||
#include "duckdb/parser/column_definition.hpp"
|
||||
#include "duckdb/storage/data_table.hpp"
|
||||
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
||||
#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
|
||||
#include "duckdb/parser/parsed_data/create_table_info.hpp"
|
||||
#include "duckdb/parser/constraints/not_null_constraint.hpp"
|
||||
#include "duckdb/catalog/catalog.hpp"
|
||||
#include "duckdb/planner/binder.hpp"
|
||||
#endif
|
||||
|
||||
namespace tpcds {
|
||||
|
||||
using duckdb::LogicalType;
|
||||
using duckdb::idx_t;
|
||||
'''
|
||||
|
||||
footer = '''
|
||||
}
|
||||
'''
|
||||
|
||||
print(header)
|
||||
|
||||
table_list = run_query('show tables')
|
||||
for table_name in table_list.split('\n'):
|
||||
table_name = table_name.strip()
|
||||
print(
|
||||
'''
|
||||
//===--------------------------------------------------------------------===//
|
||||
// $NAME
|
||||
//===--------------------------------------------------------------------===//'''.replace(
|
||||
'$NAME', table_name
|
||||
)
|
||||
)
|
||||
struct_name = str(table_name.title().replace('_', '')) + 'Info'
|
||||
column_count = int(run_query(prepare_query(column_count_query, table_name, struct_name)).strip())
|
||||
pk_column_count = int(run_query(prepare_query(pk_column_count_query, table_name, struct_name)).strip())
|
||||
print(
|
||||
prepare_query(struct_def, table_name, struct_name)
|
||||
.replace('$COLUMN_COUNT', str(column_count))
|
||||
.replace('$PK_COLUMN_COUNT', str(pk_column_count))
|
||||
)
|
||||
|
||||
print(run_query(prepare_query(gen_names, table_name, struct_name)).replace('""', '"').strip('"'))
|
||||
print("")
|
||||
print(run_query(prepare_query(gen_types, table_name, struct_name)).strip('"'))
|
||||
print("")
|
||||
print(run_query(prepare_query(pk_columns, table_name, struct_name)).replace('""', '"').strip('"'))
|
||||
|
||||
print(footer)
|
||||
21
external/duckdb/scripts/generate_vector_sizes.py
vendored
Normal file
21
external/duckdb/scripts/generate_vector_sizes.py
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
supported_vector_sizes = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
|
||||
|
||||
result = ""
|
||||
for i in range(len(supported_vector_sizes)):
|
||||
vsize = supported_vector_sizes[i]
|
||||
if i == 0:
|
||||
result += "#if"
|
||||
else:
|
||||
result += "#elif"
|
||||
result += " STANDARD_VECTOR_SIZE == " + str(vsize) + "\n"
|
||||
result += "const sel_t FlatVector::incremental_vector[] = {"
|
||||
for idx in range(vsize):
|
||||
if idx != 0:
|
||||
result += ", "
|
||||
result += str(idx)
|
||||
result += "};\n"
|
||||
|
||||
result += """#else
|
||||
#error Unsupported VECTOR_SIZE!
|
||||
#endif"""
|
||||
print(result)
|
||||
327
external/duckdb/scripts/gentpcecode.py
vendored
Normal file
327
external/duckdb/scripts/gentpcecode.py
vendored
Normal file
@@ -0,0 +1,327 @@
|
||||
import os
|
||||
from python_helpers import open_utf8
|
||||
|
||||
GENERATED_HEADER = 'include/tpce_generated.hpp'
|
||||
GENERATED_SOURCE = 'tpce_generated.cpp'
|
||||
TPCE_DIR = os.path.join('third_party', 'tpce-tool')
|
||||
|
||||
GENERATED_HEADER = os.path.join(TPCE_DIR, GENERATED_HEADER)
|
||||
GENERATED_SOURCE = os.path.join(TPCE_DIR, GENERATED_SOURCE)
|
||||
|
||||
current_table = None
|
||||
|
||||
tables = {}
|
||||
|
||||
print(GENERATED_HEADER)
|
||||
print(GENERATED_SOURCE)
|
||||
|
||||
header = open_utf8(GENERATED_HEADER, 'w+')
|
||||
source = open_utf8(GENERATED_SOURCE, 'w+')
|
||||
|
||||
for fp in [header, source]:
|
||||
fp.write(
|
||||
"""
|
||||
////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// THIS FILE IS GENERATED BY gentpcecode.py, DO NOT EDIT MANUALLY //
|
||||
////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
header.write(
|
||||
"""
|
||||
#include "duckdb/catalog/catalog.hpp"
|
||||
#include "duckdb/main/appender.hpp"
|
||||
#include "duckdb/main/connection.hpp"
|
||||
#include "duckdb/main/database.hpp"
|
||||
|
||||
#include "main/BaseLoader.h"
|
||||
#include "main/BaseLoaderFactory.h"
|
||||
#include "main/NullLoader.h"
|
||||
#include "main/TableRows.h"
|
||||
|
||||
namespace TPCE {
|
||||
class DuckDBLoaderFactory : public CBaseLoaderFactory {
|
||||
duckdb::Connection &con;
|
||||
std::string schema;
|
||||
std::string suffix;
|
||||
|
||||
public:
|
||||
DuckDBLoaderFactory(duckdb::Connection &con, std::string schema,
|
||||
std::string suffix)
|
||||
: con(con), schema(schema), suffix(suffix) {
|
||||
}
|
||||
|
||||
// Functions to create loader classes for individual tables.
|
||||
virtual CBaseLoader<ACCOUNT_PERMISSION_ROW> *
|
||||
CreateAccountPermissionLoader();
|
||||
virtual CBaseLoader<ADDRESS_ROW> *CreateAddressLoader();
|
||||
virtual CBaseLoader<BROKER_ROW> *CreateBrokerLoader();
|
||||
virtual CBaseLoader<CASH_TRANSACTION_ROW> *
|
||||
CreateCashTransactionLoader();
|
||||
virtual CBaseLoader<CHARGE_ROW> *CreateChargeLoader();
|
||||
virtual CBaseLoader<COMMISSION_RATE_ROW> *CreateCommissionRateLoader();
|
||||
virtual CBaseLoader<COMPANY_COMPETITOR_ROW> *
|
||||
CreateCompanyCompetitorLoader();
|
||||
virtual CBaseLoader<COMPANY_ROW> *CreateCompanyLoader();
|
||||
virtual CBaseLoader<CUSTOMER_ACCOUNT_ROW> *
|
||||
CreateCustomerAccountLoader();
|
||||
virtual CBaseLoader<CUSTOMER_ROW> *CreateCustomerLoader();
|
||||
virtual CBaseLoader<CUSTOMER_TAXRATE_ROW> *
|
||||
CreateCustomerTaxrateLoader();
|
||||
virtual CBaseLoader<DAILY_MARKET_ROW> *CreateDailyMarketLoader();
|
||||
virtual CBaseLoader<EXCHANGE_ROW> *CreateExchangeLoader();
|
||||
virtual CBaseLoader<FINANCIAL_ROW> *CreateFinancialLoader();
|
||||
virtual CBaseLoader<HOLDING_ROW> *CreateHoldingLoader();
|
||||
virtual CBaseLoader<HOLDING_HISTORY_ROW> *CreateHoldingHistoryLoader();
|
||||
virtual CBaseLoader<HOLDING_SUMMARY_ROW> *CreateHoldingSummaryLoader();
|
||||
virtual CBaseLoader<INDUSTRY_ROW> *CreateIndustryLoader();
|
||||
virtual CBaseLoader<LAST_TRADE_ROW> *CreateLastTradeLoader();
|
||||
virtual CBaseLoader<NEWS_ITEM_ROW> *CreateNewsItemLoader();
|
||||
virtual CBaseLoader<NEWS_XREF_ROW> *CreateNewsXRefLoader();
|
||||
virtual CBaseLoader<SECTOR_ROW> *CreateSectorLoader();
|
||||
virtual CBaseLoader<SECURITY_ROW> *CreateSecurityLoader();
|
||||
virtual CBaseLoader<SETTLEMENT_ROW> *CreateSettlementLoader();
|
||||
virtual CBaseLoader<STATUS_TYPE_ROW> *CreateStatusTypeLoader();
|
||||
virtual CBaseLoader<TAX_RATE_ROW> *CreateTaxRateLoader();
|
||||
virtual CBaseLoader<TRADE_HISTORY_ROW> *CreateTradeHistoryLoader();
|
||||
virtual CBaseLoader<TRADE_ROW> *CreateTradeLoader();
|
||||
virtual CBaseLoader<TRADE_REQUEST_ROW> *CreateTradeRequestLoader();
|
||||
virtual CBaseLoader<TRADE_TYPE_ROW> *CreateTradeTypeLoader();
|
||||
virtual CBaseLoader<WATCH_ITEM_ROW> *CreateWatchItemLoader();
|
||||
virtual CBaseLoader<WATCH_LIST_ROW> *CreateWatchListLoader();
|
||||
virtual CBaseLoader<ZIP_CODE_ROW> *CreateZipCodeLoader();
|
||||
};
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
source.write(
|
||||
"""
|
||||
#include "tpce_generated.hpp"
|
||||
|
||||
using namespace duckdb;
|
||||
using namespace std;
|
||||
|
||||
namespace TPCE {
|
||||
struct tpce_append_information {
|
||||
tpce_append_information(Connection &con, string schema, string table) :
|
||||
appender(con, schema, table) {}
|
||||
|
||||
Appender appender;
|
||||
};
|
||||
|
||||
static void append_value(tpce_append_information &info, int32_t value) {
|
||||
info.appender.Append<int32_t>(value);
|
||||
}
|
||||
|
||||
static void append_bigint(tpce_append_information &info, int64_t value) {
|
||||
info.appender.Append<int64_t>(value);
|
||||
}
|
||||
|
||||
static void append_string(tpce_append_information &info, const char *value) {
|
||||
info.appender.Append<Value>(Value(value));
|
||||
}
|
||||
|
||||
static void append_double(tpce_append_information &info, double value) {
|
||||
info.appender.Append<double>(value);
|
||||
}
|
||||
|
||||
static void append_bool(tpce_append_information &info, bool value) {
|
||||
info.appender.Append<bool>(value);
|
||||
}
|
||||
|
||||
static void append_timestamp(tpce_append_information &info, CDateTime time) {
|
||||
int32_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, msec = 0;
|
||||
time.GetYMDHMS(&year, &month, &day, &hour, &minute, &second, &msec);
|
||||
info.appender.Append<Value>(Value::TIMESTAMP(year, month, day, hour, minute, second, msec * 1000));
|
||||
}
|
||||
|
||||
void append_char(tpce_append_information &info, char value) {
|
||||
char val[2];
|
||||
val[0] = value;
|
||||
val[1] = '\\0';
|
||||
append_string(info, val);
|
||||
}
|
||||
|
||||
template <typename T> class DuckDBBaseLoader : public CBaseLoader<T> {
|
||||
protected:
|
||||
tpce_append_information info;
|
||||
|
||||
public:
|
||||
DuckDBBaseLoader(Connection &con, string schema, string table) :
|
||||
info(con, schema, table) {
|
||||
}
|
||||
|
||||
void FinishLoad() {
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
with open(os.path.join(TPCE_DIR, 'include/main/TableRows.h'), 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('typedef struct '):
|
||||
line = line.replace('typedef struct ', '')
|
||||
current_table = line.split(' ')[0].replace('_ROW', ' ').replace('_', ' ').lower().strip()
|
||||
tables[current_table] = []
|
||||
elif line.startswith('}'):
|
||||
current_table = None
|
||||
elif current_table != None:
|
||||
# row
|
||||
# get type
|
||||
splits = line.strip().split(' ')
|
||||
if len(splits) < 2:
|
||||
continue
|
||||
line = splits[0]
|
||||
name = splits[1].split(';')[0].split('[')[0].lower()
|
||||
is_single_char = False
|
||||
if 'TIdent' in line or 'INT64' in line or 'TTrade' in line:
|
||||
tpe = "TypeId::BIGINT"
|
||||
sqltpe = "BIGINT"
|
||||
elif 'double' in line or 'float' in line:
|
||||
tpe = "TypeId::DECIMAL"
|
||||
sqltpe = "DECIMAL"
|
||||
elif 'int' in line:
|
||||
tpe = "TypeId::INTEGER"
|
||||
sqltpe = "INTEGER"
|
||||
elif 'CDateTime' in line:
|
||||
tpe = "TypeId::TIMESTAMP"
|
||||
sqltpe = "TIMESTAMP"
|
||||
elif 'bool' in line:
|
||||
tpe = 'TypeId::BOOLEAN'
|
||||
sqltpe = "BOOLEAN"
|
||||
elif 'char' in line:
|
||||
if '[' not in splits[1]:
|
||||
is_single_char = True
|
||||
tpe = "TypeId::VARCHAR"
|
||||
sqltpe = "VARCHAR"
|
||||
else:
|
||||
continue
|
||||
tables[current_table].append([name, tpe, is_single_char, sqltpe])
|
||||
|
||||
|
||||
def get_tablename(name):
|
||||
name = name.title().replace(' ', '')
|
||||
if name == 'NewsXref':
|
||||
return 'NewsXRef'
|
||||
return name
|
||||
|
||||
|
||||
for table in tables.keys():
|
||||
source.write(
|
||||
"""
|
||||
class DuckDB${TABLENAME}Load : public DuckDBBaseLoader<${ROW_TYPE}> {
|
||||
public:
|
||||
DuckDB${TABLENAME}Load(Connection &con, string schema, string table) :
|
||||
DuckDBBaseLoader(con, schema, table) {
|
||||
|
||||
}
|
||||
|
||||
void WriteNextRecord(const ${ROW_TYPE} &next_record) {
|
||||
info.appender.BeginRow();""".replace(
|
||||
"${TABLENAME}", get_tablename(table)
|
||||
).replace(
|
||||
"${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW'
|
||||
)
|
||||
)
|
||||
source.write("\n")
|
||||
collist = tables[table]
|
||||
for i in range(len(collist)):
|
||||
entry = collist[i]
|
||||
name = entry[0].upper()
|
||||
tpe = entry[1]
|
||||
if tpe == "TypeId::BIGINT":
|
||||
funcname = "bigint"
|
||||
elif tpe == "TypeId::DECIMAL":
|
||||
funcname = "double"
|
||||
elif tpe == "TypeId::INTEGER":
|
||||
funcname = "value"
|
||||
elif tpe == "TypeId::TIMESTAMP":
|
||||
funcname = "timestamp"
|
||||
elif tpe == 'TypeId::BOOLEAN':
|
||||
funcname = "bool"
|
||||
elif tpe == "TypeId::VARCHAR":
|
||||
if entry[2]:
|
||||
funcname = "char"
|
||||
else:
|
||||
funcname = "string"
|
||||
else:
|
||||
print("Unknown type " + tpe)
|
||||
exit(1)
|
||||
source.write("\t\tappend_%s(info, next_record.%s);" % (funcname, name))
|
||||
if i != len(collist) - 1:
|
||||
source.write("\n")
|
||||
source.write(
|
||||
"""
|
||||
info.appender.EndRow();
|
||||
}
|
||||
|
||||
};"""
|
||||
)
|
||||
|
||||
|
||||
for table in tables.keys():
|
||||
source.write(
|
||||
"""
|
||||
CBaseLoader<${ROW_TYPE}> *
|
||||
DuckDBLoaderFactory::Create${TABLENAME}Loader() {
|
||||
return new DuckDB${TABLENAME}Load(con, schema, "${TABLEINDB}" + suffix);
|
||||
}
|
||||
""".replace(
|
||||
"${TABLENAME}", get_tablename(table)
|
||||
)
|
||||
.replace("${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW')
|
||||
.replace("${TABLEINDB}", table.replace(' ', '_'))
|
||||
)
|
||||
|
||||
source.write("\n")
|
||||
|
||||
# static string RegionSchema(string schema, string suffix) {
|
||||
# return "CREATE TABLE " + schema + ".region" + suffix + " ("
|
||||
# "r_regionkey INT NOT NULL,"
|
||||
# "r_name VARCHAR(25) NOT NULL,"
|
||||
# "r_comment VARCHAR(152) NOT NULL);";
|
||||
# }
|
||||
|
||||
|
||||
for table in tables.keys():
|
||||
tname = table.replace(' ', '_')
|
||||
str = 'static string ' + table.title().replace(' ', '') + 'Schema(string schema, string suffix) {\n'
|
||||
str += '\treturn "CREATE TABLE " + schema + ".%s" + suffix + " ("\n' % (tname,)
|
||||
columns = tables[table]
|
||||
for i in range(len(columns)):
|
||||
column = columns[i]
|
||||
str += '\t "' + column[0] + " " + column[3]
|
||||
if i == len(columns) - 1:
|
||||
str += ')";'
|
||||
else:
|
||||
str += ',"'
|
||||
str += "\n"
|
||||
str += "}\n\n"
|
||||
source.write(str)
|
||||
|
||||
|
||||
func = 'void CreateTPCESchema(duckdb::DuckDB &db, duckdb::Connection &con, std::string &schema, std::string &suffix)'
|
||||
header.write(func + ';\n\n')
|
||||
source.write(func + ' {\n')
|
||||
|
||||
|
||||
# con.Query(RegionSchema(schema, suffix));
|
||||
|
||||
for table in tables.keys():
|
||||
tname = table.replace(' ', '_')
|
||||
source.write('\tcon.Query(%sSchema(schema, suffix));\n' % (table.title().replace(' ', '')))
|
||||
|
||||
|
||||
source.write('}\n\n')
|
||||
|
||||
|
||||
for fp in [header, source]:
|
||||
fp.write("} /* namespace TPCE */\n")
|
||||
fp.close()
|
||||
61
external/duckdb/scripts/get_test_list.py
vendored
Normal file
61
external/duckdb/scripts/get_test_list.py
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
import argparse
|
||||
import sys
|
||||
import subprocess
|
||||
import re
|
||||
import os
|
||||
|
||||
DEFAULT_UNITTEST_PATH = 'build/release/test/unittest'
|
||||
|
||||
parser = argparse.ArgumentParser(description='Print a list of tests to run.')
|
||||
parser.add_argument(
|
||||
'--file-contains',
|
||||
dest='file_contains',
|
||||
action='store',
|
||||
help='Filter based on a string contained in the text',
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--unittest',
|
||||
dest='unittest',
|
||||
action='store',
|
||||
help='The path to the unittest program',
|
||||
default=DEFAULT_UNITTEST_PATH,
|
||||
)
|
||||
parser.add_argument('--list', dest='filter', action='store', help='The unittest filter to apply', default='')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
file_contains = args.file_contains
|
||||
extra_args = [args.filter]
|
||||
unittest_program = args.unittest
|
||||
|
||||
# Override default for windows
|
||||
if os.name == 'nt' and unittest_program == DEFAULT_UNITTEST_PATH:
|
||||
unittest_program = 'build/release/test/Release/unittest.exe'
|
||||
|
||||
proc = subprocess.Popen([unittest_program, '-l'] + extra_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout = proc.stdout.read().decode('utf8')
|
||||
stderr = proc.stderr.read().decode('utf8')
|
||||
if proc.returncode is not None and proc.returncode != 0:
|
||||
print("Failed to run program " + unittest_program)
|
||||
print(proc.returncode)
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
exit(1)
|
||||
|
||||
test_cases = []
|
||||
for line in stdout.splitlines()[1:]:
|
||||
if not line.strip():
|
||||
continue
|
||||
splits = line.rsplit('\t', 1)
|
||||
if file_contains is not None:
|
||||
if not os.path.isfile(splits[0]):
|
||||
continue
|
||||
try:
|
||||
with open(splits[0], 'r') as f:
|
||||
text = f.read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
if file_contains not in text:
|
||||
continue
|
||||
print(splits[0])
|
||||
78
external/duckdb/scripts/include_analyzer.py
vendored
Normal file
78
external/duckdb/scripts/include_analyzer.py
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
import amalgamation
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
from python_helpers import open_utf8
|
||||
|
||||
include_counts = {}
|
||||
include_chains = {}
|
||||
cached_includes = {}
|
||||
|
||||
|
||||
def analyze_include_file(fpath, already_included_files, prev_include=""):
|
||||
if fpath in already_included_files:
|
||||
return
|
||||
if fpath in amalgamation.always_excluded:
|
||||
return
|
||||
if fpath not in cached_includes:
|
||||
# print(fpath)
|
||||
with open_utf8(fpath, 'r') as f:
|
||||
text = f.read()
|
||||
(statements, includes) = amalgamation.get_includes(fpath, text)
|
||||
cached_includes[fpath] = includes
|
||||
else:
|
||||
includes = cached_includes[fpath]
|
||||
|
||||
if fpath in include_counts:
|
||||
include_counts[fpath] += 1
|
||||
else:
|
||||
include_counts[fpath] = 1
|
||||
|
||||
if fpath not in include_chains:
|
||||
include_chains[fpath] = {}
|
||||
if prev_include not in include_chains[fpath]:
|
||||
include_chains[fpath][prev_include] = 0
|
||||
include_chains[fpath][prev_include] += 1
|
||||
|
||||
already_included_files.append(fpath)
|
||||
if fpath.endswith('.h') or fpath.endswith('.hpp'):
|
||||
prev_include = fpath
|
||||
for include in includes:
|
||||
analyze_include_file(include, already_included_files, prev_include)
|
||||
|
||||
|
||||
def analyze_includes(dir):
|
||||
files = os.listdir(dir)
|
||||
files.sort()
|
||||
for fname in files:
|
||||
if fname in amalgamation.excluded_files:
|
||||
continue
|
||||
fpath = os.path.join(dir, fname)
|
||||
if os.path.isdir(fpath):
|
||||
analyze_includes(fpath)
|
||||
elif fname.endswith('.cpp') or fname.endswith('.c') or fname.endswith('.cc'):
|
||||
analyze_include_file(fpath, [])
|
||||
|
||||
|
||||
for compile_dir in amalgamation.compile_directories:
|
||||
analyze_includes(compile_dir)
|
||||
|
||||
kws = []
|
||||
for entry in include_counts.keys():
|
||||
kws.append([entry, include_counts[entry]])
|
||||
|
||||
kws.sort(key=lambda tup: -tup[1])
|
||||
for k in range(0, len(kws)):
|
||||
include_file = kws[k][0]
|
||||
include_count = kws[k][1]
|
||||
print("------------------------------------------------------------")
|
||||
print(include_file + " (" + str(include_count) + ")")
|
||||
print("------------------------------------------------------------")
|
||||
print("FILE INCLUDED FROM:")
|
||||
chainkws = []
|
||||
for chain in include_chains[include_file]:
|
||||
chainkws.append([chain, include_chains[include_file][chain]])
|
||||
chainkws.sort(key=lambda tup: -tup[1])
|
||||
for l in range(0, min(5, len(chainkws))):
|
||||
print(chainkws[l])
|
||||
21
external/duckdb/scripts/install_node.sh
vendored
Executable file
21
external/duckdb/scripts/install_node.sh
vendored
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [[ ${1:-false} == 'false' ]]; then
|
||||
echo "Error: pass node version as first argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NODE_VERSION=$1
|
||||
|
||||
# if an existing nvm is already installed we need to unload it
|
||||
nvm unload || true
|
||||
|
||||
# here we set up the node version on the fly based on the matrix value.
|
||||
# This is done manually so that the build works the same on OS X
|
||||
rm -rf ./__nvm/ && git clone --depth 1 https://github.com/creationix/nvm.git ./__nvm
|
||||
source ./__nvm/nvm.sh
|
||||
nvm install ${NODE_VERSION}
|
||||
nvm use --delete-prefix ${NODE_VERSION}
|
||||
node --version
|
||||
npm --version
|
||||
which node
|
||||
33
external/duckdb/scripts/list_vcpkg_registry_packages.py
vendored
Normal file
33
external/duckdb/scripts/list_vcpkg_registry_packages.py
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
import argparse
|
||||
import requests
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate the list of packages provided by the registry at <baseline>.')
|
||||
parser.add_argument(
|
||||
'--baseline',
|
||||
action='store',
|
||||
help='The baseline (git commit) of the vcpkg-duckdb-ports',
|
||||
required=True,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
GITHUB_API = "https://api.github.com/repos/duckdb/vcpkg-duckdb-ports/git/trees"
|
||||
|
||||
|
||||
def main():
|
||||
# Get the tree recursively for the commit
|
||||
response = requests.get(f"{GITHUB_API}/{args.baseline}?recursive=1")
|
||||
response.raise_for_status()
|
||||
|
||||
# Extract package names from ports directory
|
||||
packages = set()
|
||||
for item in response.json()['tree']:
|
||||
path = item['path']
|
||||
if path.startswith('ports/'):
|
||||
parts = path.split('/')
|
||||
if len(parts) > 2:
|
||||
packages.add(parts[1])
|
||||
print(sorted(list(packages)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
97
external/duckdb/scripts/merge_vcpkg_deps.py
vendored
Normal file
97
external/duckdb/scripts/merge_vcpkg_deps.py
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Pass vcpkg.json files to merge their dependencies and produce a single vcpkg.json with their
|
||||
# combined & deduplicated dependencies. Note that this script is very dumb and some manual merging may be required
|
||||
# to combine extensions from multiple builds in the case of colliding dependencies.
|
||||
|
||||
# Also: note that due to the fact that the httpfs extension currently can not use the latest openssl version (3.1),
|
||||
# we need to pin the openssl version requiring us to also pin the vcpkg version here. When updating the vcpkg git hash
|
||||
# we probably want to change it here and in ('.github/actions/build_extensions/action.yml') at the same time
|
||||
|
||||
dependencies_str = []
|
||||
dependencies_dict = []
|
||||
merged_overlay_ports = []
|
||||
merged_overlay_triplets = []
|
||||
|
||||
|
||||
def prefix_overlay_ports_or_triples(overlay_dir, path_to_vcpkg_json):
|
||||
def prefix_overlay_port_or_triplet(overlay_port_or_triplet):
|
||||
vcpkg_prefix_path = path_to_vcpkg_json[0 : path_to_vcpkg_json.find("/vcpkg.json")]
|
||||
if len(vcpkg_prefix_path) == 0:
|
||||
return overlay_port_or_triplet
|
||||
return vcpkg_prefix_path + '/' + overlay_port_or_triplet
|
||||
|
||||
return map(prefix_overlay_port_or_triplet, overlay_dir)
|
||||
|
||||
|
||||
for file in sys.argv[1:]:
|
||||
f = open(file)
|
||||
data = json.load(f)
|
||||
|
||||
if 'dependencies' in data:
|
||||
for dep in data['dependencies']:
|
||||
if type(dep) is str:
|
||||
dependencies_str.append(dep)
|
||||
elif type(dep) is dict:
|
||||
dependencies_dict.append(dep)
|
||||
else:
|
||||
raise Exception(f"Unknown entry type found in dependencies: '{dep}'")
|
||||
|
||||
if 'vcpkg-configuration' in data:
|
||||
if 'overlay-ports' in data['vcpkg-configuration']:
|
||||
merged_overlay_ports += prefix_overlay_ports_or_triples(data['vcpkg-configuration']['overlay-ports'], file)
|
||||
if 'overlay-triplets' in data['vcpkg-configuration']:
|
||||
merged_overlay_triplets += prefix_overlay_ports_or_triples(
|
||||
data['vcpkg-configuration']['overlay-triplets'], file
|
||||
)
|
||||
|
||||
final_deduplicated_deps = list()
|
||||
dedup_set = set()
|
||||
|
||||
for dep in dependencies_dict:
|
||||
if dep['name'] not in dedup_set:
|
||||
final_deduplicated_deps.append(dep)
|
||||
# TODO: deduplication is disabled for now, just let vcpkg handle duplicates in deps
|
||||
# dedup_set.add(dep['name'])
|
||||
|
||||
for dep in dependencies_str:
|
||||
if dep not in dedup_set:
|
||||
final_deduplicated_deps.append(dep)
|
||||
# TODO: deduplication is disabled for now, just let vcpkg handle duplicates in deps
|
||||
# dedup_set.add(dep)
|
||||
|
||||
opensslVersion = os.getenv("OPENSSL_VERSION_OVERRIDE", "3.0.8")
|
||||
data = {
|
||||
"description": f"Auto-generated vcpkg.json for combined DuckDB extension build, generated by 'scripts/merge_vcpkg_deps.py'",
|
||||
"builtin-baseline": "ce613c41372b23b1f51333815feb3edd87ef8a8b",
|
||||
"dependencies": final_deduplicated_deps,
|
||||
"overrides": [{"name": "openssl", "version": opensslVersion}],
|
||||
}
|
||||
|
||||
data['vcpkg-configuration'] = {}
|
||||
|
||||
if merged_overlay_ports:
|
||||
data['vcpkg-configuration']['overlay-ports'] = merged_overlay_ports
|
||||
|
||||
if merged_overlay_triplets:
|
||||
data['vcpkg-configuration']['overlay-triplets'] = merged_overlay_triplets
|
||||
|
||||
REGISTRY_BASELINE = '869bddccca976e0abe25894356e7f49e77765169'
|
||||
# NOTE: use 'scripts/list_vcpkg_registry_packages.py --baseline <baseline>' to generate the list of packages
|
||||
data['vcpkg-configuration']['registries'] = [
|
||||
{
|
||||
"kind": "git",
|
||||
"repository": "https://github.com/duckdb/vcpkg-duckdb-ports",
|
||||
"baseline": REGISTRY_BASELINE,
|
||||
"packages": ['avro-c', 'vcpkg-cmake'],
|
||||
}
|
||||
]
|
||||
|
||||
# Print output
|
||||
print("Writing to 'build/extension_configuration/vcpkg.json': ")
|
||||
print(data["dependencies"])
|
||||
|
||||
with open('build/extension_configuration/vcpkg.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
75
external/duckdb/scripts/modify_distribution_matrix.py
vendored
Normal file
75
external/duckdb/scripts/modify_distribution_matrix.py
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
# This script is used by CI to modify the deployment matrix for the extension distribution
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import logging
|
||||
|
||||
# Define command-line arguments
|
||||
parser = argparse.ArgumentParser(description="Filter a JSON file based on excluded duckdb_arch values and select an OS")
|
||||
parser.add_argument("--input", required=True, help="Input JSON file path")
|
||||
parser.add_argument("--exclude", required=True, help="Semicolon-separated list of excluded duckdb_arch values")
|
||||
parser.add_argument("--output", help="Output JSON file path")
|
||||
parser.add_argument("--pretty", action="store_true", help="Pretty print the output JSON")
|
||||
parser.add_argument("--select_os", help="Select an OS to include in the output JSON")
|
||||
parser.add_argument("--deploy_matrix", action="store_true", help="Create a merged list used in deploy step")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse the input file path, excluded arch values, and output file path
|
||||
input_json_file_path = args.input
|
||||
excluded_arch_values = args.exclude.split(";")
|
||||
output_json_file_path = args.output
|
||||
select_os = args.select_os
|
||||
|
||||
# Read the input JSON file
|
||||
with open(input_json_file_path, "r") as json_file:
|
||||
data = json.load(json_file)
|
||||
|
||||
|
||||
# Function to filter entries based on duckdb_arch values
|
||||
def filter_entries(data, arch_values):
|
||||
for os, config in data.items():
|
||||
if "include" in config:
|
||||
config["include"] = [entry for entry in config["include"] if entry["duckdb_arch"] not in arch_values]
|
||||
if not config["include"]:
|
||||
del config["include"]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# Filter the JSON data
|
||||
filtered_data = filter_entries(data, excluded_arch_values)
|
||||
|
||||
# Select an OS if specified
|
||||
if select_os:
|
||||
found = False
|
||||
for os in filtered_data.keys():
|
||||
if os == select_os:
|
||||
filtered_data = filtered_data[os]
|
||||
found = True
|
||||
break
|
||||
if found == False:
|
||||
logging.warning('A selection OS was provided but not found')
|
||||
filtered_data = []
|
||||
|
||||
# When deploy_matrix is specified, we only output a single merged include list with all the duckdb_archs
|
||||
elif args.deploy_matrix:
|
||||
deploy_archs = []
|
||||
|
||||
for os, config in filtered_data.items():
|
||||
if "include" in config:
|
||||
for item in config["include"]:
|
||||
deploy_archs.append({"duckdb_arch": item["duckdb_arch"]})
|
||||
|
||||
filtered_data = {"include": deploy_archs}
|
||||
|
||||
# Determine the JSON formatting
|
||||
indent = 2 if args.pretty else None
|
||||
|
||||
# If no output file is provided, print to stdout
|
||||
if output_json_file_path:
|
||||
with open(output_json_file_path, "w") as output_json_file:
|
||||
if filtered_data:
|
||||
json.dump(filtered_data, output_json_file, indent=indent)
|
||||
else:
|
||||
json.dump(filtered_data, sys.stdout, indent=indent)
|
||||
BIN
external/duckdb/scripts/null.txt
vendored
Normal file
BIN
external/duckdb/scripts/null.txt
vendored
Normal file
Binary file not shown.
15
external/duckdb/scripts/osx_import_codesign_certificate.sh
vendored
Normal file
15
external/duckdb/scripts/osx_import_codesign_certificate.sh
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# create variables
|
||||
export CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12
|
||||
export KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db
|
||||
|
||||
# import certificate and provisioning profile from secrets
|
||||
echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH
|
||||
|
||||
# create temporary keychain
|
||||
security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
|
||||
security set-keychain-settings -lut 21600 $KEYCHAIN_PATH
|
||||
security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
|
||||
|
||||
# import certificate to keychain
|
||||
security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
|
||||
security list-keychain -d user -s $KEYCHAIN_PATH
|
||||
417
external/duckdb/scripts/package_build.py
vendored
Normal file
417
external/duckdb/scripts/package_build.py
vendored
Normal file
@@ -0,0 +1,417 @@
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
from python_helpers import open_utf8
|
||||
import re
|
||||
|
||||
excluded_objects = ['utf8proc_data.cpp']
|
||||
|
||||
|
||||
def third_party_includes():
|
||||
includes = []
|
||||
includes += [os.path.join('third_party', 'concurrentqueue')]
|
||||
includes += [os.path.join('third_party', 'fast_float')]
|
||||
includes += [os.path.join('third_party', 'fastpforlib')]
|
||||
includes += [os.path.join('third_party', 'fmt', 'include')]
|
||||
includes += [os.path.join('third_party', 'fsst')]
|
||||
includes += [os.path.join('third_party', 'httplib')]
|
||||
includes += [os.path.join('third_party', 'hyperloglog')]
|
||||
includes += [os.path.join('third_party', 'jaro_winkler')]
|
||||
includes += [os.path.join('third_party', 'jaro_winkler', 'details')]
|
||||
includes += [os.path.join('third_party', 'libpg_query')]
|
||||
includes += [os.path.join('third_party', 'libpg_query', 'include')]
|
||||
includes += [os.path.join('third_party', 'lz4')]
|
||||
includes += [os.path.join('third_party', 'brotli', 'include')]
|
||||
includes += [os.path.join('third_party', 'brotli', 'common')]
|
||||
includes += [os.path.join('third_party', 'brotli', 'dec')]
|
||||
includes += [os.path.join('third_party', 'brotli', 'enc')]
|
||||
includes += [os.path.join('third_party', 'mbedtls', 'include')]
|
||||
includes += [os.path.join('third_party', 'mbedtls', 'library')]
|
||||
includes += [os.path.join('third_party', 'miniz')]
|
||||
includes += [os.path.join('third_party', 'pcg')]
|
||||
includes += [os.path.join('third_party', 'pdqsort')]
|
||||
includes += [os.path.join('third_party', 're2')]
|
||||
includes += [os.path.join('third_party', 'ska_sort')]
|
||||
includes += [os.path.join('third_party', 'skiplist')]
|
||||
includes += [os.path.join('third_party', 'tdigest')]
|
||||
includes += [os.path.join('third_party', 'utf8proc')]
|
||||
includes += [os.path.join('third_party', 'utf8proc', 'include')]
|
||||
includes += [os.path.join('third_party', 'vergesort')]
|
||||
includes += [os.path.join('third_party', 'yyjson', 'include')]
|
||||
includes += [os.path.join('third_party', 'zstd', 'include')]
|
||||
return includes
|
||||
|
||||
|
||||
def third_party_sources():
|
||||
sources = []
|
||||
sources += [os.path.join('third_party', 'fmt')]
|
||||
sources += [os.path.join('third_party', 'fsst')]
|
||||
sources += [os.path.join('third_party', 'miniz')]
|
||||
sources += [os.path.join('third_party', 're2')]
|
||||
sources += [os.path.join('third_party', 'hyperloglog')]
|
||||
sources += [os.path.join('third_party', 'skiplist')]
|
||||
sources += [os.path.join('third_party', 'fastpforlib')]
|
||||
sources += [os.path.join('third_party', 'utf8proc')]
|
||||
sources += [os.path.join('third_party', 'libpg_query')]
|
||||
sources += [os.path.join('third_party', 'mbedtls')]
|
||||
sources += [os.path.join('third_party', 'yyjson')]
|
||||
sources += [os.path.join('third_party', 'zstd')]
|
||||
return sources
|
||||
|
||||
|
||||
def file_is_lib(fname, libname):
|
||||
libextensions = ['.a', '.lib']
|
||||
libprefixes = ['', 'lib']
|
||||
for ext in libextensions:
|
||||
for prefix in libprefixes:
|
||||
potential_libname = prefix + libname + ext
|
||||
if fname == potential_libname:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_libraries(binary_dir, libraries, extensions):
|
||||
result_libs = []
|
||||
|
||||
def find_library_recursive(search_dir, libname):
|
||||
flist = os.listdir(search_dir)
|
||||
for fname in flist:
|
||||
fpath = os.path.join(search_dir, fname)
|
||||
if os.path.isdir(fpath):
|
||||
entry = find_library_recursive(fpath, libname)
|
||||
if entry != None:
|
||||
return entry
|
||||
elif os.path.isfile(fpath) and file_is_lib(fname, libname):
|
||||
return search_dir
|
||||
return None
|
||||
|
||||
def find_library(search_dir, libname, result_libs, required=False):
|
||||
if libname == 'Threads::Threads':
|
||||
result_libs += [(None, 'pthread')]
|
||||
return
|
||||
libdir = find_library_recursive(binary_dir, libname)
|
||||
if libdir is None and required:
|
||||
raise Exception(f"Failed to locate required library {libname} in {binary_dir}")
|
||||
|
||||
result_libs += [(libdir, libname)]
|
||||
|
||||
duckdb_lib_name = 'duckdb_static'
|
||||
if os.name == 'nt':
|
||||
duckdb_lib_name = 'duckdb'
|
||||
find_library(os.path.join(binary_dir, 'src'), duckdb_lib_name, result_libs, True)
|
||||
for ext in extensions:
|
||||
find_library(os.path.join(binary_dir, 'extension', ext), ext + '_extension', result_libs, True)
|
||||
|
||||
for libname in libraries:
|
||||
find_library(binary_dir, libname, result_libs)
|
||||
|
||||
return result_libs
|
||||
|
||||
|
||||
def includes(extensions):
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
# add includes for duckdb and extensions
|
||||
includes = []
|
||||
includes.append(os.path.join(scripts_dir, '..', 'src', 'include'))
|
||||
includes.append(os.path.join(scripts_dir, '..'))
|
||||
includes.append(os.path.join(scripts_dir, '..', 'third_party', 'utf8proc', 'include'))
|
||||
for ext in extensions:
|
||||
includes.append(os.path.join(scripts_dir, '..', 'extension', ext, 'include'))
|
||||
return includes
|
||||
|
||||
|
||||
def include_flags(extensions):
|
||||
return ' ' + ' '.join(['-I' + x for x in includes(extensions)])
|
||||
|
||||
|
||||
def convert_backslashes(x):
|
||||
return '/'.join(x.split(os.path.sep))
|
||||
|
||||
|
||||
def get_relative_path(source_dir, target_file):
|
||||
source_dir = convert_backslashes(source_dir)
|
||||
target_file = convert_backslashes(target_file)
|
||||
|
||||
# absolute path: try to convert
|
||||
if source_dir in target_file:
|
||||
target_file = target_file.replace(source_dir, "").lstrip('/')
|
||||
return target_file
|
||||
|
||||
|
||||
######
|
||||
# MAIN_BRANCH_VERSIONING default should be 'True' for main branch and feature branches
|
||||
# MAIN_BRANCH_VERSIONING default should be 'False' for release branches
|
||||
# MAIN_BRANCH_VERSIONING default value needs to keep in sync between:
|
||||
# - CMakeLists.txt
|
||||
# - scripts/amalgamation.py
|
||||
# - scripts/package_build.py
|
||||
######
|
||||
MAIN_BRANCH_VERSIONING = True
|
||||
if os.getenv('MAIN_BRANCH_VERSIONING') == "0":
|
||||
MAIN_BRANCH_VERSIONING = False
|
||||
if os.getenv('MAIN_BRANCH_VERSIONING') == "1":
|
||||
MAIN_BRANCH_VERSIONING = True
|
||||
|
||||
|
||||
def get_git_describe():
|
||||
override_git_describe = os.getenv('OVERRIDE_GIT_DESCRIBE') or ''
|
||||
versioning_tag_match = 'v*.*.*'
|
||||
if MAIN_BRANCH_VERSIONING:
|
||||
versioning_tag_match = 'v*.*.0'
|
||||
# empty override_git_describe, either since env was empty string or not existing
|
||||
# -> ask git (that can fail, so except in place)
|
||||
if len(override_git_describe) == 0:
|
||||
try:
|
||||
return (
|
||||
subprocess.check_output(
|
||||
['git', 'describe', '--tags', '--long', '--debug', '--match', versioning_tag_match]
|
||||
)
|
||||
.strip()
|
||||
.decode('utf8')
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
return "v0.0.0-0-gdeadbeeff"
|
||||
if len(override_git_describe.split('-')) == 3:
|
||||
return override_git_describe
|
||||
if len(override_git_describe.split('-')) == 1:
|
||||
override_git_describe += "-0"
|
||||
assert len(override_git_describe.split('-')) == 2
|
||||
try:
|
||||
return (
|
||||
override_git_describe
|
||||
+ "-g"
|
||||
+ subprocess.check_output(['git', 'log', '-1', '--format=%h']).strip().decode('utf8')
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
return override_git_describe + "-g" + "deadbeeff"
|
||||
|
||||
|
||||
def git_commit_hash():
|
||||
if 'SETUPTOOLS_SCM_PRETEND_HASH' in os.environ:
|
||||
return os.environ['SETUPTOOLS_SCM_PRETEND_HASH']
|
||||
try:
|
||||
git_describe = get_git_describe()
|
||||
hash = git_describe.split('-')[2].lstrip('g')
|
||||
return hash
|
||||
except:
|
||||
return "deadbeeff"
|
||||
|
||||
|
||||
def prefix_version(version):
|
||||
"""Make sure the version is prefixed with 'v' to be of the form vX.Y.Z"""
|
||||
if version.startswith('v'):
|
||||
return version
|
||||
return 'v' + version
|
||||
|
||||
|
||||
def git_dev_version():
|
||||
if 'SETUPTOOLS_SCM_PRETEND_VERSION' in os.environ:
|
||||
return prefix_version(os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'])
|
||||
try:
|
||||
long_version = get_git_describe()
|
||||
version_splits = long_version.split('-')[0].lstrip('v').split('.')
|
||||
dev_version = long_version.split('-')[1]
|
||||
if int(dev_version) == 0:
|
||||
# directly on a tag: emit the regular version
|
||||
return "v" + '.'.join(version_splits)
|
||||
else:
|
||||
# not on a tag: increment the version by one and add a -devX suffix
|
||||
# this needs to keep in sync with changes to CMakeLists.txt
|
||||
if MAIN_BRANCH_VERSIONING == True:
|
||||
# increment minor version
|
||||
version_splits[1] = str(int(version_splits[1]) + 1)
|
||||
else:
|
||||
# increment patch version
|
||||
version_splits[2] = str(int(version_splits[2]) + 1)
|
||||
return "v" + '.'.join(version_splits) + "-dev" + dev_version
|
||||
except:
|
||||
return "v0.0.0"
|
||||
|
||||
|
||||
def include_package(pkg_name, pkg_dir, include_files, include_list, source_list):
|
||||
import amalgamation
|
||||
|
||||
original_path = sys.path
|
||||
# append the directory
|
||||
sys.path.append(pkg_dir)
|
||||
ext_pkg = __import__(pkg_name + '_config')
|
||||
|
||||
ext_include_dirs = ext_pkg.include_directories
|
||||
ext_source_files = ext_pkg.source_files
|
||||
|
||||
include_files += amalgamation.list_includes_files(ext_include_dirs)
|
||||
include_list += ext_include_dirs
|
||||
source_list += ext_source_files
|
||||
|
||||
sys.path = original_path
|
||||
|
||||
|
||||
def build_package(target_dir, extensions, linenumbers=False, unity_count=32, folder_name='duckdb', short_paths=False):
|
||||
if not os.path.isdir(target_dir):
|
||||
os.mkdir(target_dir)
|
||||
|
||||
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(scripts_dir)
|
||||
import amalgamation
|
||||
|
||||
prev_wd = os.getcwd()
|
||||
os.chdir(os.path.join(scripts_dir, '..'))
|
||||
|
||||
# obtain the list of source files from the amalgamation
|
||||
source_list = amalgamation.list_sources()
|
||||
include_list = amalgamation.list_include_dirs()
|
||||
include_files = amalgamation.list_includes()
|
||||
|
||||
def copy_file(src, target_dir):
|
||||
# get the path
|
||||
full_path = src.split(os.path.sep)
|
||||
current_path = target_dir
|
||||
for i in range(len(full_path) - 1):
|
||||
current_path = os.path.join(current_path, full_path[i])
|
||||
if not os.path.isdir(current_path):
|
||||
os.mkdir(current_path)
|
||||
target_name = full_path[-1]
|
||||
target_file = os.path.join(current_path, target_name)
|
||||
amalgamation.copy_if_different(src, target_file)
|
||||
|
||||
# include the main extension helper
|
||||
include_files += [os.path.join('src', 'include', 'duckdb', 'main', 'extension_helper.hpp')]
|
||||
# include the separate extensions
|
||||
for ext in extensions:
|
||||
ext_path = os.path.join(scripts_dir, '..', 'extension', ext)
|
||||
include_package(ext, ext_path, include_files, include_list, source_list)
|
||||
|
||||
for src in source_list:
|
||||
copy_file(src, target_dir)
|
||||
|
||||
for inc in include_files:
|
||||
copy_file(inc, target_dir)
|
||||
|
||||
# handle pragma_version.cpp: paste #define DUCKDB_SOURCE_ID and DUCKDB_VERSION there
|
||||
curdir = os.getcwd()
|
||||
os.chdir(os.path.join(scripts_dir, '..'))
|
||||
githash = git_commit_hash()
|
||||
dev_version = git_dev_version()
|
||||
dev_v_parts = dev_version.lstrip('v').split('.')
|
||||
os.chdir(curdir)
|
||||
# open the file and read the current contents
|
||||
fpath = os.path.join(target_dir, 'src', 'function', 'table', 'version', 'pragma_version.cpp')
|
||||
with open_utf8(fpath, 'r') as f:
|
||||
text = f.read()
|
||||
# now add the DUCKDB_SOURCE_ID define, if it is not there already
|
||||
found_hash = False
|
||||
found_dev = False
|
||||
found_major = False
|
||||
found_minor = False
|
||||
found_patch = False
|
||||
lines = text.split('\n')
|
||||
for i in range(len(lines)):
|
||||
if '#define DUCKDB_SOURCE_ID ' in lines[i]:
|
||||
lines[i] = '#define DUCKDB_SOURCE_ID "{}"'.format(githash)
|
||||
found_hash = True
|
||||
if '#define DUCKDB_VERSION ' in lines[i]:
|
||||
lines[i] = '#define DUCKDB_VERSION "{}"'.format(dev_version)
|
||||
found_dev = True
|
||||
if '#define DUCKDB_MAJOR_VERSION ' in lines[i]:
|
||||
lines[i] = '#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0]))
|
||||
found_major = True
|
||||
if '#define DUCKDB_MINOR_VERSION ' in lines[i]:
|
||||
lines[i] = '#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1]))
|
||||
found_minor = True
|
||||
if '#define DUCKDB_PATCH_VERSION ' in lines[i]:
|
||||
lines[i] = '#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2])
|
||||
found_patch = True
|
||||
if not found_hash:
|
||||
lines = ['#ifndef DUCKDB_SOURCE_ID', '#define DUCKDB_SOURCE_ID "{}"'.format(githash), '#endif'] + lines
|
||||
if not found_dev:
|
||||
lines = ['#ifndef DUCKDB_VERSION', '#define DUCKDB_VERSION "{}"'.format(dev_version), '#endif'] + lines
|
||||
if not found_major:
|
||||
lines = [
|
||||
'#ifndef DUCKDB_MAJOR_VERSION',
|
||||
'#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0])),
|
||||
'#endif',
|
||||
] + lines
|
||||
if not found_minor:
|
||||
lines = [
|
||||
'#ifndef DUCKDB_MINOR_VERSION',
|
||||
'#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1])),
|
||||
'#endif',
|
||||
] + lines
|
||||
if not found_patch:
|
||||
lines = [
|
||||
'#ifndef DUCKDB_PATCH_VERSION',
|
||||
'#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2]),
|
||||
'#endif',
|
||||
] + lines
|
||||
text = '\n'.join(lines)
|
||||
with open_utf8(fpath, 'w+') as f:
|
||||
f.write(text)
|
||||
|
||||
def file_is_excluded(fname):
|
||||
for entry in excluded_objects:
|
||||
if entry in fname:
|
||||
return True
|
||||
return False
|
||||
|
||||
def generate_unity_build(entries, unity_name, linenumbers):
|
||||
ub_file = os.path.join(target_dir, unity_name)
|
||||
with open_utf8(ub_file, 'w+') as f:
|
||||
for entry in entries:
|
||||
if linenumbers:
|
||||
f.write('#line 0 "{}"\n'.format(convert_backslashes(entry)))
|
||||
f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
|
||||
return ub_file
|
||||
|
||||
def generate_unity_builds(source_list, nsplits, linenumbers):
|
||||
files_per_directory = {}
|
||||
for source in source_list:
|
||||
dirname = os.path.dirname(source)
|
||||
if dirname not in files_per_directory:
|
||||
files_per_directory[dirname] = []
|
||||
files_per_directory[dirname].append(source)
|
||||
|
||||
new_source_files = []
|
||||
for dirname in files_per_directory.keys():
|
||||
current_files = files_per_directory[dirname]
|
||||
cmake_file = os.path.join(dirname, 'CMakeLists.txt')
|
||||
unity_build = False
|
||||
if os.path.isfile(cmake_file):
|
||||
with open(cmake_file, 'r') as f:
|
||||
text = f.read()
|
||||
if 'add_library_unity' in text:
|
||||
unity_build = True
|
||||
# re-order the files in the unity build so that they follow the same order as the CMake
|
||||
scores = {}
|
||||
filenames = [x[0] for x in re.findall('([a-zA-Z0-9_]+[.](cpp|cc|c|cxx))', text)]
|
||||
score = 0
|
||||
for filename in filenames:
|
||||
scores[filename] = score
|
||||
score += 1
|
||||
current_files.sort(
|
||||
key=lambda x: scores[os.path.basename(x)] if os.path.basename(x) in scores else 99999
|
||||
)
|
||||
if not unity_build:
|
||||
if short_paths:
|
||||
# replace source files with "__"
|
||||
for file in current_files:
|
||||
unity_filename = os.path.basename(file)
|
||||
new_source_files.append(generate_unity_build([file], unity_filename, linenumbers))
|
||||
else:
|
||||
# directly use the source files
|
||||
new_source_files += [os.path.join(folder_name, file) for file in current_files]
|
||||
else:
|
||||
unity_base = dirname.replace(os.path.sep, '_')
|
||||
unity_name = f'ub_{unity_base}.cpp'
|
||||
new_source_files.append(generate_unity_build(current_files, unity_name, linenumbers))
|
||||
return new_source_files
|
||||
|
||||
original_sources = source_list
|
||||
source_list = generate_unity_builds(source_list, unity_count, linenumbers)
|
||||
|
||||
os.chdir(prev_wd)
|
||||
return (
|
||||
[convert_backslashes(x) for x in source_list if not file_is_excluded(x)],
|
||||
[convert_backslashes(x) for x in include_list],
|
||||
[convert_backslashes(x) for x in original_sources],
|
||||
)
|
||||
21
external/duckdb/scripts/parser_test.py
vendored
Normal file
21
external/duckdb/scripts/parser_test.py
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
from sqllogictest import SQLParserException, SQLLogicParser, SQLLogicTest
|
||||
|
||||
from typing import Optional
|
||||
import argparse
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="SQL Logic Parser")
|
||||
parser.add_argument("filename", type=str, help="Path to the SQL logic file")
|
||||
args = parser.parse_args()
|
||||
|
||||
filename = args.filename
|
||||
|
||||
parser = SQLLogicParser()
|
||||
out: Optional[SQLLogicTest] = parser.parse(filename)
|
||||
if not out:
|
||||
raise SQLParserException(f"Test {filename} could not be parsed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
207
external/duckdb/scripts/plan_cost_runner.py
vendored
Normal file
207
external/duckdb/scripts/plan_cost_runner.py
vendored
Normal file
@@ -0,0 +1,207 @@
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
OLD_DB_NAME = "old.duckdb"
|
||||
NEW_DB_NAME = "new.duckdb"
|
||||
PROFILE_FILENAME = "duckdb_profile.json"
|
||||
|
||||
ENABLE_PROFILING = "PRAGMA enable_profiling=json"
|
||||
PROFILE_OUTPUT = f"PRAGMA profile_output='{PROFILE_FILENAME}'"
|
||||
|
||||
BANNER_SIZE = 52
|
||||
|
||||
|
||||
def init_db(cli, dbname, benchmark_dir):
|
||||
print(f"INITIALIZING {dbname} ...")
|
||||
subprocess.run(
|
||||
f"{cli} {dbname} < {benchmark_dir}/init/schema.sql", shell=True, check=True, stdout=subprocess.DEVNULL
|
||||
)
|
||||
subprocess.run(f"{cli} {dbname} < {benchmark_dir}/init/load.sql", shell=True, check=True, stdout=subprocess.DEVNULL)
|
||||
print("INITIALIZATION DONE")
|
||||
|
||||
|
||||
class PlanCost:
|
||||
def __init__(self):
|
||||
self.total = 0
|
||||
self.build_side = 0
|
||||
self.probe_side = 0
|
||||
self.time = 0
|
||||
|
||||
def __add__(self, other):
|
||||
self.total += other.total
|
||||
self.build_side += other.build_side
|
||||
self.probe_side += other.probe_side
|
||||
return self
|
||||
|
||||
def __gt__(self, other):
|
||||
if self == other or self.total < other.total:
|
||||
return False
|
||||
# if the total intermediate cardinalities is greater, also inspect time.
|
||||
# it's possible a plan reordering increased cardinalities, but overall execution time
|
||||
# was not greatly affected
|
||||
total_card_increased = self.total > other.total
|
||||
build_card_increased = self.build_side > other.build_side
|
||||
if total_card_increased and build_card_increased:
|
||||
return True
|
||||
# we know the total cardinality is either the same or higher and the build side has not increased
|
||||
# in this case fall back to the timing. It's possible that even if the probe side is higher
|
||||
# since the tuples are in flight, the plan executes faster
|
||||
return self.time > other.time * 1.03
|
||||
|
||||
def __lt__(self, other):
|
||||
if self == other:
|
||||
return False
|
||||
return not (self > other)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.total == other.total and self.build_side == other.build_side and self.probe_side == other.probe_side
|
||||
|
||||
|
||||
def is_measured_join(op) -> bool:
|
||||
if 'name' not in op:
|
||||
return False
|
||||
if op['name'] != 'HASH_JOIN':
|
||||
return False
|
||||
if 'Join Type' not in op['extra_info']:
|
||||
return False
|
||||
if op['extra_info']['Join Type'].startswith('MARK'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def op_inspect(op) -> PlanCost:
|
||||
cost = PlanCost()
|
||||
if 'Query' in op:
|
||||
cost.time = op['operator_timing']
|
||||
if is_measured_join(op):
|
||||
cost.total = op['operator_cardinality']
|
||||
if 'operator_cardinality' in op['children'][0]:
|
||||
cost.probe_side += op['children'][0]['operator_cardinality']
|
||||
if 'operator_cardinality' in op['children'][1]:
|
||||
cost.build_side += op['children'][1]['operator_cardinality']
|
||||
|
||||
left_cost = op_inspect(op['children'][0])
|
||||
right_cost = op_inspect(op['children'][1])
|
||||
cost.probe_side += left_cost.probe_side + right_cost.probe_side
|
||||
cost.build_side += left_cost.build_side + right_cost.build_side
|
||||
cost.total += left_cost.total + right_cost.total
|
||||
return cost
|
||||
|
||||
for child_op in op['children']:
|
||||
cost += op_inspect(child_op)
|
||||
|
||||
return cost
|
||||
|
||||
|
||||
def query_plan_cost(cli, dbname, query):
|
||||
try:
|
||||
subprocess.run(
|
||||
f"{cli} --readonly {dbname} -c \"{ENABLE_PROFILING};{PROFILE_OUTPUT};{query}\"",
|
||||
shell=True,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("-------------------------")
|
||||
print("--------Failure----------")
|
||||
print("-------------------------")
|
||||
print(e.stderr.decode('utf8'))
|
||||
print("-------------------------")
|
||||
print("--------Output----------")
|
||||
print("-------------------------")
|
||||
print(e.output.decode('utf8'))
|
||||
print("-------------------------")
|
||||
raise e
|
||||
with open(PROFILE_FILENAME, 'r') as file:
|
||||
return op_inspect(json.load(file))
|
||||
|
||||
|
||||
def print_banner(text):
|
||||
text_len = len(text)
|
||||
rest = BANNER_SIZE - text_len - 10
|
||||
l_width = int(rest / 2)
|
||||
r_width = l_width
|
||||
if rest % 2 != 0:
|
||||
l_width += 1
|
||||
print("")
|
||||
print("=" * BANNER_SIZE)
|
||||
print("=" * l_width + " " * 5 + text + " " * 5 + "=" * r_width)
|
||||
print("=" * BANNER_SIZE)
|
||||
|
||||
|
||||
def print_diffs(diffs):
|
||||
for query_name, old_cost, new_cost in diffs:
|
||||
print("")
|
||||
print("Query:", query_name)
|
||||
print("Old total cost:", old_cost.total)
|
||||
print("Old build cost:", old_cost.build_side)
|
||||
print("Old probe cost:", old_cost.probe_side)
|
||||
print("New total cost:", new_cost.total)
|
||||
print("New build cost:", new_cost.build_side)
|
||||
print("New probe cost:", new_cost.probe_side)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Plan cost regression test script with old and new versions.")
|
||||
|
||||
parser.add_argument("--old", type=str, help="Path to the old runner.", required=True)
|
||||
parser.add_argument("--new", type=str, help="Path to the new runner.", required=True)
|
||||
parser.add_argument("--dir", type=str, help="Path to the benchmark directory.", required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
old = args.old
|
||||
new = args.new
|
||||
benchmark_dir = args.dir
|
||||
|
||||
init_db(old, OLD_DB_NAME, benchmark_dir)
|
||||
init_db(new, NEW_DB_NAME, benchmark_dir)
|
||||
|
||||
improvements = []
|
||||
regressions = []
|
||||
|
||||
files = glob.glob(f"{benchmark_dir}/queries/*.sql")
|
||||
files.sort()
|
||||
|
||||
print("")
|
||||
print("RUNNING BENCHMARK QUERIES")
|
||||
for f in tqdm(files):
|
||||
query_name = f.split("/")[-1].replace(".sql", "")
|
||||
|
||||
with open(f, "r") as file:
|
||||
query = file.read()
|
||||
|
||||
old_cost = query_plan_cost(old, OLD_DB_NAME, query)
|
||||
new_cost = query_plan_cost(new, NEW_DB_NAME, query)
|
||||
|
||||
if old_cost > new_cost:
|
||||
improvements.append((query_name, old_cost, new_cost))
|
||||
elif new_cost > old_cost:
|
||||
regressions.append((query_name, old_cost, new_cost))
|
||||
|
||||
exit_code = 0
|
||||
if improvements:
|
||||
print_banner("IMPROVEMENTS DETECTED")
|
||||
print_diffs(improvements)
|
||||
if regressions:
|
||||
exit_code = 1
|
||||
print_banner("REGRESSIONS DETECTED")
|
||||
print_diffs(regressions)
|
||||
if not improvements and not regressions:
|
||||
print_banner("NO DIFFERENCES DETECTED")
|
||||
|
||||
os.remove(OLD_DB_NAME)
|
||||
os.remove(NEW_DB_NAME)
|
||||
os.remove(PROFILE_FILENAME)
|
||||
|
||||
exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
23
external/duckdb/scripts/python_helpers.py
vendored
Normal file
23
external/duckdb/scripts/python_helpers.py
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
def open_utf8(fpath, flags):
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
return open(fpath, flags)
|
||||
else:
|
||||
return open(fpath, flags, encoding="utf8")
|
||||
|
||||
|
||||
def normalize_path(path):
|
||||
import os
|
||||
|
||||
def normalize(p):
|
||||
return os.path.sep.join(p.split('/'))
|
||||
|
||||
if isinstance(path, list):
|
||||
normed = map(lambda p: normalize(p), path)
|
||||
return list(normed)
|
||||
|
||||
if isinstance(path, str):
|
||||
return normalize(path)
|
||||
|
||||
raise Exception("Can only be called with a str or list argument")
|
||||
17
external/duckdb/scripts/raspberry-pi-cmake-toolchain.cmake
vendored
Normal file
17
external/duckdb/scripts/raspberry-pi-cmake-toolchain.cmake
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
SET(CMAKE_SYSTEM_NAME Linux)
|
||||
|
||||
# Define our host system
|
||||
SET(CMAKE_SYSTEM_NAME Linux)
|
||||
SET(CMAKE_SYSTEM_VERSION 1)
|
||||
# Define the cross compiler locations
|
||||
SET(CMAKE_C_COMPILER ${DUCKDB_RPI_TOOLCHAIN_PREFIX}/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc)
|
||||
SET(CMAKE_CXX_COMPILER ${DUCKDB_RPI_TOOLCHAIN_PREFIX}/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc)
|
||||
# Define the sysroot path for the RaspberryPi distribution in our tools folder
|
||||
SET(CMAKE_FIND_ROOT_PATH ${DUCKDB_RPI_TOOLCHAIN_PREFIX}/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/arm-linux-gnueabihf/sysroot/)
|
||||
# Use our definitions for compiler tools
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
# Search for libraries and headers in the target directories only
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
SET(DUCKDB_EXTRA_LINK_FLAGS -lstdc++ -lgcc -lm)
|
||||
1
external/duckdb/scripts/regression/__init__.py
vendored
Normal file
1
external/duckdb/scripts/regression/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
193
external/duckdb/scripts/regression/benchmark.py
vendored
Normal file
193
external/duckdb/scripts/regression/benchmark.py
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
import subprocess
|
||||
import statistics
|
||||
from io import StringIO
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
import argparse
|
||||
from typing import Optional, Union, Tuple, List
|
||||
import functools
|
||||
|
||||
print = functools.partial(print, flush=True)
|
||||
|
||||
STDERR_HEADER = '''====================================================
|
||||
============== STDERR =============
|
||||
====================================================
|
||||
'''
|
||||
|
||||
STDOUT_HEADER = '''====================================================
|
||||
============== STDOUT =============
|
||||
====================================================
|
||||
'''
|
||||
|
||||
# timeouts in seconds
|
||||
MAX_TIMEOUT = 3600
|
||||
DEFAULT_TIMEOUT = 600
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkRunnerConfig:
|
||||
"Configuration for a BenchmarkRunner"
|
||||
|
||||
benchmark_runner: str
|
||||
benchmark_file: str
|
||||
verbose: bool = False
|
||||
threads: Optional[int] = None
|
||||
memory_limit: Optional[str] = None
|
||||
disable_timeout: bool = False
|
||||
max_timeout: int = MAX_TIMEOUT
|
||||
root_dir: str = ""
|
||||
no_summary: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_params(cls, benchmark_runner, benchmark_file, **kwargs) -> "BenchmarkRunnerConfig":
|
||||
verbose = kwargs.get("verbose", False)
|
||||
threads = kwargs.get("threads", None)
|
||||
memory_limit = kwargs.get("memory_limit", None)
|
||||
disable_timeout = kwargs.get("disable_timeout", False)
|
||||
max_timeout = kwargs.get("max_timeout", MAX_TIMEOUT)
|
||||
root_dir = kwargs.get("root_dir", "")
|
||||
no_summary = kwargs.get("no_summary", False)
|
||||
|
||||
config = cls(
|
||||
benchmark_runner=benchmark_runner,
|
||||
benchmark_file=benchmark_file,
|
||||
verbose=verbose,
|
||||
threads=threads,
|
||||
memory_limit=memory_limit,
|
||||
disable_timeout=disable_timeout,
|
||||
max_timeout=max_timeout,
|
||||
root_dir=root_dir,
|
||||
no_summary=no_summary,
|
||||
)
|
||||
return config
|
||||
|
||||
@classmethod
|
||||
def from_args(cls) -> "BenchmarkRunnerConfig":
|
||||
parser = argparse.ArgumentParser(description="Benchmark script with old and new runners.")
|
||||
|
||||
# Define the arguments
|
||||
parser.add_argument("--path", type=str, help="Path to the benchmark_runner executable", required=True)
|
||||
parser.add_argument("--benchmarks", type=str, help="Path to the benchmark file.", required=True)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose output.")
|
||||
parser.add_argument("--threads", type=int, help="Number of threads to use.")
|
||||
parser.add_argument("--memory_limit", type=str, help="Memory limit to use.")
|
||||
parser.add_argument("--disable-timeout", action="store_true", help="Disable timeout.")
|
||||
parser.add_argument(
|
||||
"--max-timeout", type=int, default=3600, help="Set maximum timeout in seconds (default: 3600)."
|
||||
)
|
||||
parser.add_argument("--root-dir", type=str, default="", help="Root directory.")
|
||||
parser.add_argument(
|
||||
"--no-summary", type=str, default=False, help="No failures summary is outputed when passing this flag."
|
||||
)
|
||||
|
||||
# Parse arguments
|
||||
parsed_args = parser.parse_args()
|
||||
|
||||
# Create an instance of BenchmarkRunnerConfig using parsed arguments
|
||||
config = cls(
|
||||
benchmark_runner=parsed_args.path,
|
||||
benchmark_file=parsed_args.benchmarks,
|
||||
verbose=parsed_args.verbose,
|
||||
threads=parsed_args.threads,
|
||||
memory_limit=parsed_args.memory_limit,
|
||||
disable_timeout=parsed_args.disable_timeout,
|
||||
max_timeout=parsed_args.max_timeout,
|
||||
root_dir=parsed_args.root_dir,
|
||||
no_summary=parsed_args.no_summary,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
class BenchmarkRunner:
|
||||
def __init__(self, config: BenchmarkRunnerConfig):
|
||||
self.config = config
|
||||
self.complete_timings = []
|
||||
self.benchmark_list: List[str] = []
|
||||
with open(self.config.benchmark_file, 'r') as f:
|
||||
self.benchmark_list = [x.strip() for x in f.read().split('\n') if len(x) > 0]
|
||||
|
||||
def construct_args(self, benchmark_path):
|
||||
benchmark_args = []
|
||||
benchmark_args.extend([self.config.benchmark_runner, benchmark_path])
|
||||
if self.config.root_dir:
|
||||
benchmark_args.extend(['--root-dir', self.config.root_dir])
|
||||
if self.config.threads:
|
||||
benchmark_args.extend([f"--threads={self.config.threads}"])
|
||||
if self.config.memory_limit:
|
||||
benchmark_args.extend([f"--memory_limit={self.config.memory_limit}"])
|
||||
if self.config.disable_timeout:
|
||||
benchmark_args.extend(["--disable-timeout"])
|
||||
if self.config.no_summary:
|
||||
benchmark_args.extend(["--no-summary"])
|
||||
return benchmark_args
|
||||
|
||||
def run_benchmark(self, benchmark) -> Tuple[Union[float, str], Optional[str]]:
|
||||
benchmark_args = self.construct_args(benchmark)
|
||||
timeout_seconds = DEFAULT_TIMEOUT
|
||||
if self.config.disable_timeout:
|
||||
timeout_seconds = self.config.max_timeout
|
||||
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
benchmark_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout_seconds
|
||||
)
|
||||
out = proc.stdout.decode('utf8')
|
||||
err = proc.stderr.decode('utf8')
|
||||
returncode = proc.returncode
|
||||
except subprocess.TimeoutExpired:
|
||||
print("Failed to run benchmark " + benchmark)
|
||||
print(f"Aborted due to exceeding the limit of {timeout_seconds} seconds")
|
||||
return (
|
||||
'Failed to run benchmark ' + benchmark,
|
||||
f"Aborted due to exceeding the limit of {timeout_seconds} seconds",
|
||||
)
|
||||
if returncode != 0:
|
||||
print("Failed to run benchmark " + benchmark)
|
||||
print(STDERR_HEADER)
|
||||
print(err)
|
||||
print(STDOUT_HEADER)
|
||||
print(out)
|
||||
if 'HTTP' in err:
|
||||
print("Ignoring HTTP error and terminating the running of the regression tests")
|
||||
exit(0)
|
||||
return 'Failed to run benchmark ' + benchmark, err
|
||||
if self.config.verbose:
|
||||
print(err)
|
||||
# read the input CSV
|
||||
f = StringIO(err)
|
||||
csv_reader = csv.reader(f, delimiter='\t')
|
||||
header = True
|
||||
timings = []
|
||||
try:
|
||||
for row in csv_reader:
|
||||
if len(row) == 0:
|
||||
continue
|
||||
if header:
|
||||
header = False
|
||||
else:
|
||||
timings.append(row[2])
|
||||
self.complete_timings.append(row[2])
|
||||
return float(statistics.median(timings)), None
|
||||
except:
|
||||
print("Failed to run benchmark " + benchmark)
|
||||
print(err)
|
||||
return 'Failed to run benchmark ' + benchmark, err
|
||||
|
||||
def run_benchmarks(self, benchmark_list: List[str]):
|
||||
results = {}
|
||||
failures = {}
|
||||
for benchmark in benchmark_list:
|
||||
result, failure_message = self.run_benchmark(benchmark)
|
||||
results[benchmark] = result
|
||||
failures[benchmark] = failure_message if failure_message else None
|
||||
return results, failures
|
||||
|
||||
|
||||
def main():
|
||||
config = BenchmarkRunnerConfig.from_args()
|
||||
runner = BenchmarkRunner(config)
|
||||
runner.run_benchmarks()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
227
external/duckdb/scripts/regression/test_runner.py
vendored
Normal file
227
external/duckdb/scripts/regression/test_runner.py
vendored
Normal file
@@ -0,0 +1,227 @@
|
||||
import os
|
||||
import math
|
||||
import functools
|
||||
import shutil
|
||||
from benchmark import BenchmarkRunner, BenchmarkRunnerConfig
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List, Union
|
||||
import subprocess
|
||||
|
||||
print = functools.partial(print, flush=True)
|
||||
|
||||
|
||||
def is_number(s):
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
# Geometric mean of an array of numbers
|
||||
def geomean(xs):
|
||||
if len(xs) == 0:
|
||||
return 'EMPTY'
|
||||
for entry in xs:
|
||||
if not is_number(entry):
|
||||
return entry
|
||||
return math.exp(math.fsum(math.log(float(x)) for x in xs) / len(xs))
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
# Set up the argument parser
|
||||
parser = argparse.ArgumentParser(description="Benchmark script with old and new runners.")
|
||||
|
||||
# Define the arguments
|
||||
parser.add_argument("--old", type=str, help="Path to the old runner.", required=True)
|
||||
parser.add_argument("--new", type=str, help="Path to the new runner.", required=True)
|
||||
parser.add_argument("--benchmarks", type=str, help="Path to the benchmark file.", required=True)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose output.")
|
||||
parser.add_argument("--threads", type=int, help="Number of threads to use.")
|
||||
parser.add_argument("--memory_limit", type=str, help="Memory limit to use.")
|
||||
parser.add_argument("--nofail", action="store_true", help="Do not fail on regression.")
|
||||
parser.add_argument("--disable-timeout", action="store_true", help="Disable timeout.")
|
||||
parser.add_argument("--max-timeout", type=int, default=3600, help="Set maximum timeout in seconds (default: 3600).")
|
||||
parser.add_argument("--root-dir", type=str, default="", help="Root directory.")
|
||||
parser.add_argument("--no-summary", type=str, default=False, help="No summary in the end.")
|
||||
parser.add_argument(
|
||||
"--regression-threshold-seconds",
|
||||
type=float,
|
||||
default=0.05,
|
||||
help="REGRESSION_THRESHOLD_SECONDS value for large benchmarks.",
|
||||
)
|
||||
|
||||
# Parse the arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
# Assign parsed arguments to variables
|
||||
old_runner_path = args.old
|
||||
new_runner_path = args.new
|
||||
benchmark_file = args.benchmarks
|
||||
verbose = args.verbose
|
||||
threads = args.threads
|
||||
memory_limit = args.memory_limit
|
||||
no_regression_fail = args.nofail
|
||||
disable_timeout = args.disable_timeout
|
||||
max_timeout = args.max_timeout
|
||||
root_dir = args.root_dir
|
||||
no_summary = args.no_summary
|
||||
regression_threshold_seconds = args.regression_threshold_seconds
|
||||
|
||||
|
||||
# how many times we will run the experiment, to be sure of the regression
|
||||
NUMBER_REPETITIONS = 5
|
||||
# the threshold at which we consider something a regression (percentage)
|
||||
REGRESSION_THRESHOLD_PERCENTAGE = 0.1
|
||||
# minimal seconds diff for something to be a regression (for very fast benchmarks)
|
||||
REGRESSION_THRESHOLD_SECONDS = regression_threshold_seconds
|
||||
|
||||
if not os.path.isfile(old_runner_path):
|
||||
print(f"Failed to find old runner {old_runner_path}")
|
||||
exit(1)
|
||||
|
||||
if not os.path.isfile(new_runner_path):
|
||||
print(f"Failed to find new runner {new_runner_path}")
|
||||
exit(1)
|
||||
|
||||
config_dict = vars(args)
|
||||
old_runner = BenchmarkRunner(BenchmarkRunnerConfig.from_params(old_runner_path, benchmark_file, **config_dict))
|
||||
new_runner = BenchmarkRunner(BenchmarkRunnerConfig.from_params(new_runner_path, benchmark_file, **config_dict))
|
||||
|
||||
benchmark_list = old_runner.benchmark_list
|
||||
|
||||
summary = []
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
benchmark: str
|
||||
old_result: Union[float, str]
|
||||
new_result: Union[float, str]
|
||||
old_failure: Optional[str] = None
|
||||
new_failure: Optional[str] = None
|
||||
|
||||
|
||||
multiply_percentage = 1.0 + REGRESSION_THRESHOLD_PERCENTAGE
|
||||
other_results: List[BenchmarkResult] = []
|
||||
error_list: List[BenchmarkResult] = []
|
||||
for i in range(NUMBER_REPETITIONS):
|
||||
regression_list: List[BenchmarkResult] = []
|
||||
if len(benchmark_list) == 0:
|
||||
break
|
||||
print(
|
||||
f'''====================================================
|
||||
============== ITERATION {i} =============
|
||||
============== REMAINING {len(benchmark_list)} =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
|
||||
old_results, old_failures = old_runner.run_benchmarks(benchmark_list)
|
||||
new_results, new_failures = new_runner.run_benchmarks(benchmark_list)
|
||||
|
||||
for benchmark in benchmark_list:
|
||||
old_res = old_results[benchmark]
|
||||
new_res = new_results[benchmark]
|
||||
|
||||
old_fail = old_failures[benchmark]
|
||||
new_fail = new_failures[benchmark]
|
||||
|
||||
if isinstance(old_res, str) or isinstance(new_res, str):
|
||||
# benchmark failed to run - always a regression
|
||||
error_list.append(BenchmarkResult(benchmark, old_res, new_res, old_fail, new_fail))
|
||||
elif (no_regression_fail == False) and (
|
||||
(old_res + REGRESSION_THRESHOLD_SECONDS) * multiply_percentage < new_res
|
||||
):
|
||||
regression_list.append(BenchmarkResult(benchmark, old_res, new_res))
|
||||
else:
|
||||
other_results.append(BenchmarkResult(benchmark, old_res, new_res))
|
||||
benchmark_list = [res.benchmark for res in regression_list]
|
||||
|
||||
exit_code = 0
|
||||
regression_list.extend(error_list)
|
||||
summary = []
|
||||
if len(regression_list) > 0:
|
||||
exit_code = 1
|
||||
print(
|
||||
'''====================================================
|
||||
============== REGRESSIONS DETECTED =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
for regression in regression_list:
|
||||
print(f"{regression.benchmark}")
|
||||
print(f"Old timing: {regression.old_result}")
|
||||
print(f"New timing: {regression.new_result}")
|
||||
if regression.old_failure or regression.new_failure:
|
||||
new_data = {
|
||||
"benchmark": regression.benchmark,
|
||||
"old_failure": regression.old_failure,
|
||||
"new_failure": regression.new_failure,
|
||||
}
|
||||
summary.append(new_data)
|
||||
print("")
|
||||
print(
|
||||
'''====================================================
|
||||
============== OTHER TIMINGS =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
else:
|
||||
print(
|
||||
'''====================================================
|
||||
============== NO REGRESSIONS DETECTED =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
|
||||
other_results.sort(key=lambda x: x.benchmark)
|
||||
for res in other_results:
|
||||
print(f"{res.benchmark}")
|
||||
print(f"Old timing: {res.old_result}")
|
||||
print(f"New timing: {res.new_result}")
|
||||
print("")
|
||||
|
||||
time_a = geomean(old_runner.complete_timings)
|
||||
time_b = geomean(new_runner.complete_timings)
|
||||
|
||||
|
||||
print("")
|
||||
if isinstance(time_a, str) or isinstance(time_b, str):
|
||||
print(f"Old: {time_a}")
|
||||
print(f"New: {time_b}")
|
||||
elif time_a > time_b * 1.01:
|
||||
print(f"Old timing geometric mean: {time_a}")
|
||||
print(f"New timing geometric mean: {time_b}, roughly {int((time_a - time_b) * 100.0 / time_a)}% faster")
|
||||
elif time_b > time_a * 1.01:
|
||||
print(f"Old timing geometric mean: {time_a}, roughly {int((time_b - time_a) * 100.0 / time_b)}% faster")
|
||||
print(f"New timing geometric mean: {time_b}")
|
||||
else:
|
||||
print(f"Old timing geometric mean: {time_a}")
|
||||
print(f"New timing geometric mean: {time_b}")
|
||||
|
||||
# nuke cached benchmark data between runs
|
||||
if os.path.isdir("duckdb_benchmark_data"):
|
||||
shutil.rmtree('duckdb_benchmark_data')
|
||||
|
||||
if summary and not no_summary:
|
||||
print(
|
||||
'''\n\n====================================================
|
||||
================ FAILURES SUMMARY ================
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
# check the value is "true" otherwise you'll see the prefix in local run outputs
|
||||
prefix = "::error::" if ('CI' in os.environ and os.getenv('CI') == 'true') else ""
|
||||
for i, failure_message in enumerate(summary, start=1):
|
||||
prefix_str = f"{prefix}{i}" if len(prefix) > 0 else f"{i}"
|
||||
print(f"{prefix_str}: ", failure_message["benchmark"])
|
||||
if failure_message["old_failure"] != failure_message["new_failure"]:
|
||||
print("Old:\n", failure_message["old_failure"])
|
||||
print("New:\n", failure_message["new_failure"])
|
||||
else:
|
||||
print(failure_message["old_failure"])
|
||||
print("-", 52)
|
||||
|
||||
exit(exit_code)
|
||||
115
external/duckdb/scripts/regression_check.py
vendored
Normal file
115
external/duckdb/scripts/regression_check.py
vendored
Normal file
@@ -0,0 +1,115 @@
|
||||
import os
|
||||
import sys
|
||||
import duckdb
|
||||
import numpy
|
||||
import subprocess
|
||||
from io import StringIO
|
||||
import csv
|
||||
import statistics
|
||||
|
||||
old_file = None
|
||||
new_file = None
|
||||
# the threshold at which we consider something a regression (percentage)
|
||||
regression_threshold_percentage = 0.1
|
||||
# minimal seconds diff for something to be a regression (for very fast benchmarks)
|
||||
regression_threshold_seconds = 0.01
|
||||
|
||||
for arg in sys.argv:
|
||||
if arg.startswith("--old="):
|
||||
old_file = arg.replace("--old=", "")
|
||||
elif arg.startswith("--new="):
|
||||
new_file = arg.replace("--new=", "")
|
||||
|
||||
if old_file is None or new_file is None:
|
||||
print("Usage: python scripts/regression_check.py --old=<old_file> --new-<new_file>")
|
||||
exit(1)
|
||||
|
||||
con = duckdb.connect()
|
||||
old_timings_l = con.execute(
|
||||
f"SELECT name, median(time) FROM read_csv_auto('{old_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
|
||||
).fetchall()
|
||||
new_timings_l = con.execute(
|
||||
f"SELECT name, median(time) FROM read_csv_auto('{new_file}') t(name, nrun, time) GROUP BY ALL ORDER BY ALL"
|
||||
).fetchall()
|
||||
|
||||
old_timings = {}
|
||||
new_timings = {}
|
||||
|
||||
for entry in old_timings_l:
|
||||
name = entry[0]
|
||||
timing = entry[1]
|
||||
old_timings[name] = timing
|
||||
|
||||
for entry in new_timings_l:
|
||||
name = entry[0]
|
||||
timing = entry[1]
|
||||
new_timings[name] = timing
|
||||
|
||||
slow_keys = []
|
||||
multiply_percentage = 1.0 + regression_threshold_percentage
|
||||
|
||||
test_keys = list(new_timings.keys())
|
||||
test_keys.sort()
|
||||
|
||||
for key in test_keys:
|
||||
new_timing = new_timings[key]
|
||||
old_timing = old_timings[key]
|
||||
if (old_timing + regression_threshold_seconds) * multiply_percentage < new_timing:
|
||||
slow_keys.append(key)
|
||||
|
||||
return_code = 0
|
||||
if len(slow_keys) > 0:
|
||||
print(
|
||||
'''====================================================
|
||||
============== REGRESSIONS DETECTED =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
return_code = 1
|
||||
for key in slow_keys:
|
||||
new_timing = new_timings[key]
|
||||
old_timing = old_timings[key]
|
||||
print(key)
|
||||
print(f"Old timing: {old_timing}")
|
||||
print(f"New timing: {new_timing}")
|
||||
print("")
|
||||
|
||||
print(
|
||||
'''====================================================
|
||||
================== New Timings ==================
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
with open(new_file, 'r') as f:
|
||||
print(f.read())
|
||||
print(
|
||||
'''====================================================
|
||||
================== Old Timings ==================
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
with open(old_file, 'r') as f:
|
||||
print(f.read())
|
||||
else:
|
||||
print(
|
||||
'''====================================================
|
||||
============== NO REGRESSIONS DETECTED =============
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
|
||||
print(
|
||||
'''====================================================
|
||||
=================== ALL TIMINGS ===================
|
||||
====================================================
|
||||
'''
|
||||
)
|
||||
for key in test_keys:
|
||||
new_timing = new_timings[key]
|
||||
old_timing = old_timings[key]
|
||||
print(key)
|
||||
print(f"Old timing: {old_timing}")
|
||||
print(f"New timing: {new_timing}")
|
||||
print("")
|
||||
|
||||
exit(return_code)
|
||||
80
external/duckdb/scripts/regression_test_extension_size.py
vendored
Normal file
80
external/duckdb/scripts/regression_test_extension_size.py
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
# the threshold at which we consider something a regression (percentage)
|
||||
regression_threshold_percentage = 0.20
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate TPC-DS reference results from Postgres.')
|
||||
parser.add_argument(
|
||||
'--old', dest='old_extension_dir', action='store', help='Path to the old extension dir', required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
'--new', dest='new_extension_dir', action='store', help='Path to the new extension dir', required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
'--expect',
|
||||
dest='expected_extensions_raw',
|
||||
action='store',
|
||||
help='Comma separated list of expected extensions',
|
||||
required=True,
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
expected_extensions = args.expected_extensions_raw.split(',')
|
||||
|
||||
exit_code = 0
|
||||
|
||||
|
||||
def parse_extensions(dir):
|
||||
result = {}
|
||||
|
||||
for root, dirs, files in os.walk(dir):
|
||||
for filename in files:
|
||||
if filename.endswith(".duckdb_extension"):
|
||||
result[Path(filename).stem] = os.path.join(root, filename)
|
||||
|
||||
# Check all expected extensions are there
|
||||
for expected_extension in expected_extensions:
|
||||
if expected_extension not in result.keys():
|
||||
print(f"Did not find expected extension {expected_extension} in {dir}")
|
||||
exit(1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
old_extensions = parse_extensions(args.old_extension_dir)
|
||||
new_extensions = parse_extensions(args.new_extension_dir)
|
||||
|
||||
matching_extensions = []
|
||||
|
||||
for extension in old_extensions.keys():
|
||||
if extension in new_extensions:
|
||||
matching_extensions.append(extension)
|
||||
|
||||
check_passed = True
|
||||
error_message = ""
|
||||
|
||||
for extension in matching_extensions:
|
||||
old_size = os.path.getsize(old_extensions[extension])
|
||||
new_size = os.path.getsize(new_extensions[extension])
|
||||
|
||||
print(f" - checking '{extension}': old size={old_size}, new_size={new_size}")
|
||||
|
||||
if new_size / (old_size + 0.1) > (1.0 + regression_threshold_percentage):
|
||||
check_passed = False
|
||||
error_message += f" - Extension '{extension}' was bigger than expected {new_size}\n"
|
||||
error_message += f" - old size: {old_size}\n"
|
||||
error_message += f" - new size: {new_size}\n"
|
||||
|
||||
print()
|
||||
if not check_passed:
|
||||
print("Extension size regression check failed:\n")
|
||||
print(error_message)
|
||||
exit(1)
|
||||
else:
|
||||
print(f"All extensions passed the check!")
|
||||
402
external/duckdb/scripts/regression_test_python.py
vendored
Normal file
402
external/duckdb/scripts/regression_test_python.py
vendored
Normal file
@@ -0,0 +1,402 @@
|
||||
import os
|
||||
import sys
|
||||
import duckdb
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import time
|
||||
import argparse
|
||||
from typing import Dict, List, Any
|
||||
import numpy as np
|
||||
|
||||
TPCH_QUERIES = []
|
||||
res = duckdb.execute(
|
||||
"""
|
||||
select query from tpch_queries()
|
||||
"""
|
||||
).fetchall()
|
||||
for x in res:
|
||||
TPCH_QUERIES.append(x[0])
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose mode", default=False)
|
||||
parser.add_argument("--threads", type=int, help="Number of threads", default=None)
|
||||
parser.add_argument("--nruns", type=int, help="Number of runs", default=10)
|
||||
parser.add_argument("--out-file", type=str, help="Output file path", default=None)
|
||||
parser.add_argument("--scale-factor", type=float, help="Set the scale factor TPCH is generated at", default=1.0)
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
|
||||
verbose = args.verbose
|
||||
threads = args.threads
|
||||
nruns = args.nruns
|
||||
out_file = args.out_file
|
||||
scale_factor = args.scale_factor
|
||||
|
||||
if unknown_args:
|
||||
parser.error(f"Unrecognized parameter(s): {', '.join(unknown_args)}")
|
||||
|
||||
|
||||
def print_msg(message: str):
|
||||
if not verbose:
|
||||
return
|
||||
print(message)
|
||||
|
||||
|
||||
def write_result(benchmark_name, nrun, t):
|
||||
bench_result = f"{benchmark_name}\t{nrun}\t{t}"
|
||||
if out_file is not None:
|
||||
if not hasattr(write_result, 'file'):
|
||||
write_result.file = open(out_file, 'w+')
|
||||
write_result.file.write(bench_result)
|
||||
write_result.file.write('\n')
|
||||
else:
|
||||
print_msg(bench_result)
|
||||
|
||||
|
||||
def close_result():
|
||||
if not hasattr(write_result, 'file'):
|
||||
return
|
||||
write_result.file.close()
|
||||
|
||||
|
||||
class BenchmarkResult:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.runs: List[float] = []
|
||||
|
||||
def add(self, duration: float):
|
||||
self.runs.append(duration)
|
||||
|
||||
def write(self):
|
||||
for i, run in enumerate(self.runs):
|
||||
write_result(self.name, i, run)
|
||||
|
||||
|
||||
class TPCHData:
|
||||
TABLES = ["customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier"]
|
||||
|
||||
def __init__(self, scale_factor):
|
||||
self.conn = duckdb.connect()
|
||||
self.conn.execute(f'CALL dbgen(sf={scale_factor})')
|
||||
|
||||
def get_tables(self, convertor) -> Dict[str, Any]:
|
||||
res = {}
|
||||
for table in self.TABLES:
|
||||
res[table] = convertor(self.conn, table)
|
||||
return res
|
||||
|
||||
def load_lineitem(self, collector, benchmark_name) -> BenchmarkResult:
|
||||
query = 'SELECT * FROM lineitem'
|
||||
result = BenchmarkResult(benchmark_name)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
start = time.time()
|
||||
rel = self.conn.sql(query)
|
||||
res = collector(rel)
|
||||
end = time.time()
|
||||
duration = float(end - start)
|
||||
del res
|
||||
padding = " " * len(str(nruns))
|
||||
print_msg(f"T{padding}: {duration}s")
|
||||
result.add(duration)
|
||||
return result
|
||||
|
||||
|
||||
class TPCHBenchmarker:
|
||||
def __init__(self, name: str):
|
||||
self.initialize_connection()
|
||||
self.name = name
|
||||
|
||||
def initialize_connection(self):
|
||||
self.con = duckdb.connect()
|
||||
if not threads:
|
||||
return
|
||||
print_msg(f'Limiting threads to {threads}')
|
||||
self.con.execute(f"SET threads={threads}")
|
||||
|
||||
def register_tables(self, tables: Dict[str, Any]):
|
||||
for name, table in tables.items():
|
||||
self.con.register(name, table)
|
||||
|
||||
def run_tpch(self, collector, benchmark_name) -> BenchmarkResult:
|
||||
print_msg("")
|
||||
print_msg(TPCH_QUERIES)
|
||||
result = BenchmarkResult(benchmark_name)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
# Execute all queries
|
||||
for i, query in enumerate(TPCH_QUERIES):
|
||||
start = time.time()
|
||||
rel = self.con.sql(query)
|
||||
if rel:
|
||||
res = collector(rel)
|
||||
del res
|
||||
else:
|
||||
print_msg(f"Query '{query}' did not produce output")
|
||||
end = time.time()
|
||||
query_time = float(end - start)
|
||||
print_msg(f"Q{str(i).ljust(len(str(nruns)), ' ')}: {query_time}")
|
||||
duration += float(end - start)
|
||||
padding = " " * len(str(nruns))
|
||||
print_msg(f"T{padding}: {duration}s")
|
||||
result.add(duration)
|
||||
return result
|
||||
|
||||
|
||||
def test_tpch():
|
||||
print_msg(f"Generating TPCH (sf={scale_factor})")
|
||||
tpch = TPCHData(scale_factor)
|
||||
|
||||
## -------- Benchmark converting LineItem to different formats ---------
|
||||
|
||||
def fetch_native(rel: duckdb.DuckDBPyRelation):
|
||||
return rel.fetchall()
|
||||
|
||||
def fetch_pandas(rel: duckdb.DuckDBPyRelation):
|
||||
return rel.df()
|
||||
|
||||
def fetch_arrow(rel: duckdb.DuckDBPyRelation):
|
||||
return rel.arrow()
|
||||
|
||||
COLLECTORS = {'native': fetch_native, 'pandas': fetch_pandas, 'arrow': fetch_arrow}
|
||||
# For every collector, load lineitem 'nrun' times
|
||||
for collector in COLLECTORS:
|
||||
result: BenchmarkResult = tpch.load_lineitem(COLLECTORS[collector], collector + "_load_lineitem")
|
||||
print_msg(result.name)
|
||||
print_msg(collector)
|
||||
result.write()
|
||||
|
||||
## ------- Benchmark running TPCH queries on top of different formats --------
|
||||
|
||||
def convert_pandas(conn: duckdb.DuckDBPyConnection, table_name: str):
|
||||
return conn.execute(f"SELECT * FROM {table_name}").df()
|
||||
|
||||
def convert_arrow(conn: duckdb.DuckDBPyConnection, table_name: str):
|
||||
df = convert_pandas(conn, table_name)
|
||||
return pa.Table.from_pandas(df)
|
||||
|
||||
CONVERTORS = {'pandas': convert_pandas, 'arrow': convert_arrow}
|
||||
# Convert TPCH data to the right format, then run TPCH queries on that data
|
||||
for convertor in CONVERTORS:
|
||||
tables = tpch.get_tables(CONVERTORS[convertor])
|
||||
tester = TPCHBenchmarker(convertor)
|
||||
tester.register_tables(tables)
|
||||
collector = COLLECTORS[convertor]
|
||||
result: BenchmarkResult = tester.run_tpch(collector, f"{convertor}tpch")
|
||||
result.write()
|
||||
|
||||
|
||||
def generate_string(seed: int):
|
||||
output = ''
|
||||
for _ in range(10):
|
||||
output += chr(ord('A') + int(seed % 26))
|
||||
seed /= 26
|
||||
return output
|
||||
|
||||
|
||||
class ArrowDictionary:
|
||||
def __init__(self, unique_values):
|
||||
self.size = unique_values
|
||||
self.dict = [generate_string(x) for x in range(unique_values)]
|
||||
|
||||
|
||||
class ArrowDictionaryBenchmark:
|
||||
def __init__(self, unique_values, values, arrow_dict: ArrowDictionary):
|
||||
assert unique_values <= arrow_dict.size
|
||||
self.initialize_connection()
|
||||
self.generate(unique_values, values, arrow_dict)
|
||||
|
||||
def initialize_connection(self):
|
||||
self.con = duckdb.connect()
|
||||
if not threads:
|
||||
return
|
||||
print_msg(f'Limiting threads to {threads}')
|
||||
self.con.execute(f"SET threads={threads}")
|
||||
|
||||
def generate(self, unique_values, values, arrow_dict: ArrowDictionary):
|
||||
self.input = []
|
||||
self.expected = []
|
||||
for x in range(values):
|
||||
value = arrow_dict.dict[x % unique_values]
|
||||
self.input.append(value)
|
||||
self.expected.append((value,))
|
||||
|
||||
array = pa.array(
|
||||
self.input,
|
||||
type=pa.dictionary(pa.int64(), pa.string()),
|
||||
)
|
||||
self.table = pa.table([array], names=["x"])
|
||||
|
||||
def benchmark(self, benchmark_name) -> BenchmarkResult:
|
||||
self.con.register('arrow_table', self.table)
|
||||
result = BenchmarkResult(benchmark_name)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
start = time.time()
|
||||
res = self.con.execute(
|
||||
"""
|
||||
select * from arrow_table
|
||||
"""
|
||||
).fetchall()
|
||||
end = time.time()
|
||||
duration = float(end - start)
|
||||
assert self.expected == res
|
||||
del res
|
||||
padding = " " * len(str(nruns))
|
||||
print_msg(f"T{padding}: {duration}s")
|
||||
result.add(duration)
|
||||
return result
|
||||
|
||||
|
||||
class SelectAndCallBenchmark:
|
||||
def __init__(self):
|
||||
"""
|
||||
SELECT statements become QueryRelations, any other statement type becomes a MaterializedRelation.
|
||||
We use SELECT and CALL here because their execution plans are identical
|
||||
"""
|
||||
self.initialize_connection()
|
||||
|
||||
def initialize_connection(self):
|
||||
self.con = duckdb.connect()
|
||||
if not threads:
|
||||
return
|
||||
print_msg(f'Limiting threads to {threads}')
|
||||
self.con.execute(f"SET threads={threads}")
|
||||
|
||||
def benchmark(self, name, query) -> List[BenchmarkResult]:
|
||||
results: List[BenchmarkResult] = []
|
||||
methods = {'select': 'select * from ', 'call': 'call '}
|
||||
for key, value in methods.items():
|
||||
for rowcount in [2048, 50000, 2500000]:
|
||||
result = BenchmarkResult(f'{key}_{name}_{rowcount}')
|
||||
query_string = query.format(rows=rowcount)
|
||||
query_string = value + query_string
|
||||
rel = self.con.sql(query_string)
|
||||
print_msg(rel.type)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
start = time.time()
|
||||
rel.fetchall()
|
||||
end = time.time()
|
||||
duration = float(end - start)
|
||||
padding = " " * len(str(nruns))
|
||||
print_msg(f"T{padding}: {duration}s")
|
||||
result.add(duration)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
class PandasDFLoadBenchmark:
|
||||
def __init__(self):
|
||||
self.initialize_connection()
|
||||
self.generate()
|
||||
|
||||
def initialize_connection(self):
|
||||
self.con = duckdb.connect()
|
||||
if not threads:
|
||||
return
|
||||
print_msg(f'Limiting threads to {threads}')
|
||||
self.con.execute(f"SET threads={threads}")
|
||||
|
||||
def generate(self):
|
||||
self.con.execute("call dbgen(sf=0.1)")
|
||||
new_table = "*, " + ", ".join(["l_shipdate"] * 300)
|
||||
self.con.execute(f"create table wide as select {new_table} from lineitem limit 500")
|
||||
self.con.execute(f"copy wide to 'wide_table.csv' (FORMAT CSV)")
|
||||
|
||||
def benchmark(self, benchmark_name) -> BenchmarkResult:
|
||||
result = BenchmarkResult(benchmark_name)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
pandas_df = pd.read_csv('wide_table.csv')
|
||||
start = time.time()
|
||||
for _ in range(30):
|
||||
res = self.con.execute("""select * from pandas_df""").df()
|
||||
end = time.time()
|
||||
duration = float(end - start)
|
||||
del res
|
||||
result.add(duration)
|
||||
return result
|
||||
|
||||
|
||||
class PandasAnalyzerBenchmark:
|
||||
def __init__(self):
|
||||
self.initialize_connection()
|
||||
self.generate()
|
||||
|
||||
def initialize_connection(self):
|
||||
self.con = duckdb.connect()
|
||||
if not threads:
|
||||
return
|
||||
print_msg(f'Limiting threads to {threads}')
|
||||
self.con.execute(f"SET threads={threads}")
|
||||
|
||||
def generate(self):
|
||||
return
|
||||
|
||||
def benchmark(self, benchmark_name) -> BenchmarkResult:
|
||||
result = BenchmarkResult(benchmark_name)
|
||||
data = [None] * 9999999 + [1] # Last element is 1, others are None
|
||||
|
||||
# Create the DataFrame with the specified data and column type as object
|
||||
pandas_df = pd.DataFrame(data, columns=['Column'], dtype=object)
|
||||
for _ in range(nruns):
|
||||
duration = 0.0
|
||||
start = time.time()
|
||||
for _ in range(30):
|
||||
res = self.con.execute("""select * from pandas_df""").df()
|
||||
end = time.time()
|
||||
duration = float(end - start)
|
||||
del res
|
||||
result.add(duration)
|
||||
return result
|
||||
|
||||
|
||||
def test_arrow_dictionaries_scan():
|
||||
DICT_SIZE = 26 * 1000
|
||||
print_msg(f"Generating a unique dictionary of size {DICT_SIZE}")
|
||||
arrow_dict = ArrowDictionary(DICT_SIZE)
|
||||
DATASET_SIZE = 10000000
|
||||
for unique_values in [2, 1000, DICT_SIZE]:
|
||||
test = ArrowDictionaryBenchmark(unique_values, DATASET_SIZE, arrow_dict)
|
||||
benchmark_name = f"arrow_dict_unique_{unique_values}_total_{DATASET_SIZE}"
|
||||
result = test.benchmark(benchmark_name)
|
||||
result.write()
|
||||
|
||||
|
||||
def test_loading_pandas_df_many_times():
|
||||
test = PandasDFLoadBenchmark()
|
||||
benchmark_name = f"load_pandas_df_many_times"
|
||||
result = test.benchmark(benchmark_name)
|
||||
result.write()
|
||||
|
||||
|
||||
def test_pandas_analyze():
|
||||
test = PandasAnalyzerBenchmark()
|
||||
benchmark_name = f"pandas_analyze"
|
||||
result = test.benchmark(benchmark_name)
|
||||
result.write()
|
||||
|
||||
|
||||
def test_call_and_select_statements():
|
||||
test = SelectAndCallBenchmark()
|
||||
queries = {
|
||||
'repeat_row': "repeat_row(42, 'test', True, 'this is a long string', num_rows={rows})",
|
||||
}
|
||||
for key, value in queries.items():
|
||||
results = test.benchmark(key, value)
|
||||
for res in results:
|
||||
res.write()
|
||||
|
||||
|
||||
def main():
|
||||
test_tpch()
|
||||
test_arrow_dictionaries_scan()
|
||||
test_loading_pandas_df_many_times()
|
||||
test_pandas_analyze()
|
||||
test_call_and_select_statements()
|
||||
|
||||
close_result()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
87
external/duckdb/scripts/regression_test_storage_size.py
vendored
Normal file
87
external/duckdb/scripts/regression_test_storage_size.py
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
# the threshold at which we consider something a regression (percentage)
|
||||
regression_threshold_percentage = 0.05
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate TPC-DS reference results from Postgres.')
|
||||
parser.add_argument('--old', dest='old_runner', action='store', help='Path to the old shell executable')
|
||||
parser.add_argument('--new', dest='new_runner', action='store', help='Path to the new shell executable')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
old_runner = args.old_runner
|
||||
new_runner = args.new_runner
|
||||
exit_code = 0
|
||||
|
||||
if not os.path.isfile(old_runner):
|
||||
print(f"Failed to find old runner {old_runner}")
|
||||
exit(1)
|
||||
|
||||
if not os.path.isfile(new_runner):
|
||||
print(f"Failed to find new runner {new_runner}")
|
||||
exit(1)
|
||||
|
||||
|
||||
def load_data(shell_path, load_script):
|
||||
with tempfile.NamedTemporaryFile() as f:
|
||||
filename = f.name
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
shell_path,
|
||||
'-storage_version',
|
||||
'latest',
|
||||
'-c',
|
||||
"set storage_compatibility_version='latest'",
|
||||
'-c',
|
||||
load_script,
|
||||
filename,
|
||||
]
|
||||
)
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
print('----------------------------')
|
||||
print('FAILED TO RUN')
|
||||
print('----------------------------')
|
||||
return None
|
||||
return os.path.getsize(filename)
|
||||
|
||||
|
||||
def run_benchmark(load_script, benchmark_name):
|
||||
print('----------------------------')
|
||||
print(f'Running benchmark {benchmark_name}')
|
||||
print('----------------------------')
|
||||
old_size = load_data(old_runner, load_script)
|
||||
if old_size is None:
|
||||
return False
|
||||
new_size = load_data(new_runner, load_script)
|
||||
if new_size is None:
|
||||
return False
|
||||
print(f'Database size with old runner: {old_size}')
|
||||
print(f'Database size with new runner: {new_size}')
|
||||
if new_size - new_size * regression_threshold_percentage > old_size:
|
||||
print('----------------------------')
|
||||
print('FAILURE: SIZE INCREASE')
|
||||
print('----------------------------')
|
||||
return False
|
||||
else:
|
||||
print('----------------------------')
|
||||
print('SUCCESS!')
|
||||
print('----------------------------')
|
||||
return True
|
||||
|
||||
|
||||
tpch_load = 'CALL dbgen(sf=1);'
|
||||
tpcds_load = 'CALL dsdgen(sf=1);'
|
||||
|
||||
|
||||
benchmarks = [[tpch_load, 'TPC-H SF1'], [tpcds_load, 'TPC-DS SF1']]
|
||||
|
||||
for benchmark in benchmarks:
|
||||
if not run_benchmark(benchmark[0], benchmark[1]):
|
||||
print(f'Database size increased in {benchmark[1]}')
|
||||
exit_code = 1
|
||||
|
||||
exit(exit_code)
|
||||
48
external/duckdb/scripts/rename-slow-tests.R
vendored
Normal file
48
external/duckdb/scripts/rename-slow-tests.R
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
library(tidyverse)
|
||||
|
||||
here <- rprojroot::is_git_root$find_file
|
||||
|
||||
# build/debug/test/unittest -d yes 2>&1 > timings.txt
|
||||
timings <- readLines(here("timings.txt"))
|
||||
|
||||
timings
|
||||
timings_df <- rematch2::re_match(timings, "^.*(?<time>[0-9][.][0-9][0-9][0-9]) s: (?<desc>.*)$")
|
||||
|
||||
cum_timings_df <-
|
||||
timings_df %>%
|
||||
filter(!is.na(time)) %>%
|
||||
mutate(time = as.numeric(time)) %>%
|
||||
count(desc, wt = time, name = "time") %>%
|
||||
arrange(time) %>%
|
||||
mutate(cum_time = cumsum(time), id = row_number())
|
||||
|
||||
cum_timings_df %>%
|
||||
ggplot(aes(x = time, y = cum_time, color = id)) +
|
||||
geom_line() +
|
||||
scale_x_log10()
|
||||
|
||||
cum_timings_df %>%
|
||||
ggplot(aes(x = id, y = cum_time, color = time)) +
|
||||
geom_line() +
|
||||
scale_colour_continuous(trans = "log10")
|
||||
|
||||
cum_timings_cut <-
|
||||
cum_timings_df %>%
|
||||
filter(cum_time >= 200, str_detect(desc, "[.]test$"))
|
||||
|
||||
slow <- cum_timings_cut$desc
|
||||
slow_renamed <- paste0(slow, "_coverage")
|
||||
|
||||
slow_renamed[fs::file_exists(here(slow_renamed))]
|
||||
stopifnot(!any(fs::file_exists(here(slow_renamed))))
|
||||
|
||||
withr::with_dir(
|
||||
here(),
|
||||
fs::file_move(slow, slow_renamed)
|
||||
)
|
||||
|
||||
walk2(slow_renamed, slow, ~ {
|
||||
text <- brio::read_lines(here(.x))
|
||||
text <- str_replace_all(text, fixed(.y), .x)
|
||||
brio::write_lines(text, here(.x))
|
||||
})
|
||||
20
external/duckdb/scripts/repeat_until_success.py
vendored
Normal file
20
external/duckdb/scripts/repeat_until_success.py
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
print("Expected usage: python3 repeat_until_success.py [command]")
|
||||
exit(1)
|
||||
|
||||
ntries = 10
|
||||
sleep_duration = 3
|
||||
cmd = sys.argv[1]
|
||||
|
||||
for i in range(ntries):
|
||||
ret = os.system(cmd)
|
||||
if ret is None or ret == 0:
|
||||
exit(0)
|
||||
print("Command {{ " + cmd + " }} failed, retrying (" + str(i + 1) + "/" + str(ntries) + ")")
|
||||
time.sleep(sleep_duration)
|
||||
|
||||
exit(1)
|
||||
62
external/duckdb/scripts/rerun_workflows.py
vendored
Normal file
62
external/duckdb/scripts/rerun_workflows.py
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
import subprocess
|
||||
import duckdb
|
||||
import os
|
||||
import pandas as pd
|
||||
import argparse
|
||||
from io import StringIO
|
||||
|
||||
parser = argparse.ArgumentParser(description='Rerun failed workflows from a PR.')
|
||||
parser.add_argument(
|
||||
'--title',
|
||||
dest='title',
|
||||
action='store',
|
||||
help='The title of the PR for which we want to rerun workflows (or part of the title)',
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--repo', dest='repo', action='store', help='The repository to run this workflow on', default='duckdb/duckdb'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--max_workflows',
|
||||
dest='max_workflows',
|
||||
action='store',
|
||||
help='The maximum number of workflows to look at (starting from the latest)',
|
||||
default=200,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
nlimit = args.max_workflows
|
||||
query = args.title
|
||||
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
'gh',
|
||||
'run',
|
||||
'-R',
|
||||
args.repo,
|
||||
'list',
|
||||
'--json',
|
||||
'displayTitle,databaseId,status,conclusion,headSha',
|
||||
f'--limit={nlimit}',
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
text = proc.stdout.read().decode('utf8')
|
||||
df = pd.read_json(StringIO(text))
|
||||
result = duckdb.query(f"select headSha from df where displayTitle LIKE '%{query}%' limit 1").fetchall()
|
||||
if len(result) == 0:
|
||||
print(
|
||||
f"No workflows found in the latest {nlimit} workflows that contain the text {query}.\nPerhaps try running with a higher --max_workflows parameter?"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
headSha = result[0][0]
|
||||
|
||||
result = duckdb.query(
|
||||
f"select databaseId from df where conclusion IN ('failure', 'cancelled') AND displayTitle LIKE '%{query}%' and headSha='{headSha}'"
|
||||
).fetchall()
|
||||
if len(result) == 0:
|
||||
print(f"Found runs that match the text {query} but no failing or cancelled runs were found")
|
||||
for databaseId in [x[0] for x in result]:
|
||||
os.system(f'gh run -R {args.repo} rerun {databaseId}')
|
||||
347
external/duckdb/scripts/run-clang-tidy.py
vendored
Normal file
347
external/duckdb/scripts/run-clang-tidy.py
vendored
Normal file
@@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# ===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===#
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
# ===------------------------------------------------------------------------===#
|
||||
# FIXME: Integrate with clang-tidy-diff.py
|
||||
|
||||
"""
|
||||
Parallel clang-tidy runner
|
||||
==========================
|
||||
|
||||
Runs clang-tidy over all files in a compilation database. Requires clang-tidy
|
||||
and clang-apply-replacements in $PATH.
|
||||
|
||||
Example invocations.
|
||||
- Run clang-tidy on all files in the current working directory with a default
|
||||
set of checks and show warnings in the cpp files and all project headers.
|
||||
run-clang-tidy.py $PWD
|
||||
|
||||
- Fix all header guards.
|
||||
run-clang-tidy.py -fix -checks=-*,llvm-header-guard
|
||||
|
||||
- Fix all header guards included from clang-tidy and header guards
|
||||
for clang-tidy headers.
|
||||
run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \
|
||||
-header-filter=extra/clang-tidy
|
||||
|
||||
Compilation database setup:
|
||||
http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
yaml = None
|
||||
|
||||
is_py2 = sys.version[0] == '2'
|
||||
|
||||
if is_py2:
|
||||
import Queue as queue
|
||||
else:
|
||||
import queue as queue
|
||||
|
||||
|
||||
def find_compilation_database(path):
|
||||
"""Adjusts the directory until a compilation database is found."""
|
||||
result = './'
|
||||
while not os.path.isfile(os.path.join(result, path)):
|
||||
if os.path.realpath(result) == '/':
|
||||
print('Error: could not find compilation database.')
|
||||
sys.exit(1)
|
||||
result += '../'
|
||||
return os.path.realpath(result)
|
||||
|
||||
|
||||
def make_absolute(f, directory):
|
||||
if os.path.isabs(f):
|
||||
return f
|
||||
return os.path.normpath(os.path.join(directory, f))
|
||||
|
||||
|
||||
def get_tidy_invocation(
|
||||
f, clang_tidy_binary, checks, tmpdir, build_path, header_filter, extra_arg, extra_arg_before, quiet, config
|
||||
):
|
||||
"""Gets a command line for clang-tidy."""
|
||||
start = [clang_tidy_binary]
|
||||
if header_filter is not None:
|
||||
start.append('-header-filter=' + header_filter)
|
||||
if checks:
|
||||
start.append('-checks=' + checks)
|
||||
if tmpdir is not None:
|
||||
start.append('-export-fixes')
|
||||
# Get a temporary file. We immediately close the handle so clang-tidy can
|
||||
# overwrite it.
|
||||
(handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir)
|
||||
os.close(handle)
|
||||
start.append(name)
|
||||
for arg in extra_arg:
|
||||
start.append('-extra-arg=%s' % arg)
|
||||
for arg in extra_arg_before:
|
||||
start.append('-extra-arg-before=%s' % arg)
|
||||
start.append('-p=' + build_path)
|
||||
if quiet:
|
||||
start.append('--quiet')
|
||||
if config:
|
||||
start.append('-config=' + config)
|
||||
start.append(f)
|
||||
return start
|
||||
|
||||
|
||||
def merge_replacement_files(tmpdir, mergefile):
|
||||
"""Merge all replacement files in a directory into a single file"""
|
||||
# The fixes suggested by clang-tidy >= 4.0.0 are given under
|
||||
# the top level key 'Diagnostics' in the output yaml files
|
||||
mergekey = "Diagnostics"
|
||||
merged = []
|
||||
for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')):
|
||||
content = yaml.safe_load(open(replacefile, 'r'))
|
||||
if not content:
|
||||
continue # Skip empty files.
|
||||
merged.extend(content.get(mergekey, []))
|
||||
|
||||
if merged:
|
||||
# MainSourceFile: The key is required by the definition inside
|
||||
# include/clang/Tooling/ReplacementsYaml.h, but the value
|
||||
# is actually never used inside clang-apply-replacements,
|
||||
# so we set it to '' here.
|
||||
output = {'MainSourceFile': '', mergekey: merged}
|
||||
with open(mergefile, 'w') as out:
|
||||
yaml.safe_dump(output, out)
|
||||
else:
|
||||
# Empty the file:
|
||||
open(mergefile, 'w').close()
|
||||
|
||||
|
||||
def check_clang_apply_replacements_binary(args):
|
||||
"""Checks if invoking supplied clang-apply-replacements binary works."""
|
||||
try:
|
||||
subprocess.check_call([args.clang_apply_replacements_binary, '--version'])
|
||||
except:
|
||||
print(
|
||||
'Unable to run clang-apply-replacements. Is clang-apply-replacements ' 'binary correctly specified?',
|
||||
file=sys.stderr,
|
||||
)
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def apply_fixes(args, tmpdir):
|
||||
"""Calls clang-apply-fixes on a given directory."""
|
||||
invocation = [args.clang_apply_replacements_binary]
|
||||
if args.format:
|
||||
invocation.append('-format')
|
||||
if args.style:
|
||||
invocation.append('-style=' + args.style)
|
||||
invocation.append(tmpdir)
|
||||
subprocess.call(invocation)
|
||||
|
||||
|
||||
def run_tidy(args, tmpdir, build_path, queue, lock, failed_files):
|
||||
"""Takes filenames out of queue and runs clang-tidy on them."""
|
||||
while True:
|
||||
name = queue.get()
|
||||
invocation = get_tidy_invocation(
|
||||
name,
|
||||
args.clang_tidy_binary,
|
||||
args.checks,
|
||||
tmpdir,
|
||||
build_path,
|
||||
args.header_filter,
|
||||
args.extra_arg,
|
||||
args.extra_arg_before,
|
||||
args.quiet,
|
||||
args.config,
|
||||
)
|
||||
|
||||
proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
output, err = proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
failed_files.append(name)
|
||||
with lock:
|
||||
sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8'))
|
||||
if len(err) > 0:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.write(err.decode('utf-8'))
|
||||
queue.task_done()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Runs clang-tidy over all files '
|
||||
'in a compilation database. Requires '
|
||||
'clang-tidy and clang-apply-replacements in '
|
||||
'$PATH.'
|
||||
)
|
||||
parser.add_argument('-clang-tidy-binary', metavar='PATH', default='clang-tidy', help='path to clang-tidy binary')
|
||||
parser.add_argument(
|
||||
'-clang-apply-replacements-binary',
|
||||
metavar='PATH',
|
||||
default='clang-apply-replacements',
|
||||
help='path to clang-apply-replacements binary',
|
||||
)
|
||||
parser.add_argument('-checks', default=None, help='checks filter, when not specified, use clang-tidy ' 'default')
|
||||
parser.add_argument(
|
||||
'-config',
|
||||
default=None,
|
||||
help='Specifies a configuration in YAML/JSON format: '
|
||||
' -config="{Checks: \'*\', '
|
||||
' CheckOptions: [{key: x, '
|
||||
' value: y}]}" '
|
||||
'When the value is empty, clang-tidy will '
|
||||
'attempt to find a file named .clang-tidy for '
|
||||
'each source file in its parent directories.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-header-filter',
|
||||
default=None,
|
||||
help='regular expression matching the names of the '
|
||||
'headers to output diagnostics from. Diagnostics from '
|
||||
'the main file of each translation unit are always '
|
||||
'displayed.',
|
||||
)
|
||||
if yaml:
|
||||
parser.add_argument(
|
||||
'-export-fixes',
|
||||
metavar='filename',
|
||||
dest='export_fixes',
|
||||
help='Create a yaml file to store suggested fixes in, '
|
||||
'which can be applied with clang-apply-replacements.',
|
||||
)
|
||||
parser.add_argument('-j', type=int, default=0, help='number of tidy instances to be run in parallel.')
|
||||
parser.add_argument('files', nargs='*', default=['.*'], help='files to be processed (regex on path)')
|
||||
parser.add_argument('-fix', action='store_true', help='apply fix-its')
|
||||
parser.add_argument('-format', action='store_true', help='Reformat code ' 'after applying fixes')
|
||||
parser.add_argument('-style', default='file', help='The style of reformat ' 'code after applying fixes')
|
||||
parser.add_argument('-p', dest='build_path', help='Path used to read a compile command database.')
|
||||
parser.add_argument(
|
||||
'-extra-arg',
|
||||
dest='extra_arg',
|
||||
action='append',
|
||||
default=[],
|
||||
help='Additional argument to append to the compiler ' 'command line.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-extra-arg-before',
|
||||
dest='extra_arg_before',
|
||||
action='append',
|
||||
default=[],
|
||||
help='Additional argument to prepend to the compiler ' 'command line.',
|
||||
)
|
||||
parser.add_argument('-quiet', action='store_true', help='Run clang-tidy in quiet mode')
|
||||
args = parser.parse_args()
|
||||
|
||||
db_path = 'compile_commands.json'
|
||||
|
||||
if args.build_path is not None:
|
||||
build_path = args.build_path
|
||||
else:
|
||||
# Find our database
|
||||
build_path = find_compilation_database(db_path)
|
||||
|
||||
try:
|
||||
invocation = [args.clang_tidy_binary, '-list-checks']
|
||||
invocation.append('-p=' + build_path)
|
||||
if args.checks:
|
||||
invocation.append('-checks=' + args.checks)
|
||||
invocation.append('-')
|
||||
if args.quiet:
|
||||
# Even with -quiet we still want to check if we can call clang-tidy.
|
||||
with open(os.devnull, 'w') as dev_null:
|
||||
subprocess.check_call(invocation, stdout=dev_null)
|
||||
else:
|
||||
subprocess.check_call(invocation)
|
||||
except:
|
||||
print("Unable to run clang-tidy, consider running `pip install clang-tidy`", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Load the database and extract all files.
|
||||
database = json.load(open(os.path.join(build_path, db_path)))
|
||||
files = [make_absolute(entry['file'], entry['directory']) for entry in database]
|
||||
|
||||
max_task = args.j
|
||||
if max_task == 0:
|
||||
max_task = multiprocessing.cpu_count()
|
||||
|
||||
tmpdir = None
|
||||
if args.fix or (yaml and args.export_fixes):
|
||||
check_clang_apply_replacements_binary(args)
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
|
||||
# Build up a big regexy filter from all command line arguments.
|
||||
file_name_re = re.compile('|'.join(args.files))
|
||||
|
||||
return_code = 0
|
||||
try:
|
||||
# Spin up a bunch of tidy-launching threads.
|
||||
task_queue = queue.Queue(max_task)
|
||||
# List of files with a non-zero return code.
|
||||
failed_files = []
|
||||
lock = threading.Lock()
|
||||
for _ in range(max_task):
|
||||
t = threading.Thread(target=run_tidy, args=(args, tmpdir, build_path, task_queue, lock, failed_files))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
# Fill the queue with files.
|
||||
for name in files:
|
||||
if file_name_re.search(name):
|
||||
task_queue.put(name)
|
||||
|
||||
# Wait for all threads to be done.
|
||||
task_queue.join()
|
||||
if len(failed_files):
|
||||
return_code = 1
|
||||
|
||||
except KeyboardInterrupt:
|
||||
# This is a sad hack. Unfortunately subprocess goes
|
||||
# bonkers with ctrl-c and we start forking merrily.
|
||||
print('\nCtrl-C detected, goodbye.')
|
||||
if tmpdir:
|
||||
shutil.rmtree(tmpdir)
|
||||
os.kill(0, 9)
|
||||
|
||||
if yaml and args.export_fixes:
|
||||
print('Writing fixes to ' + args.export_fixes + ' ...')
|
||||
try:
|
||||
merge_replacement_files(tmpdir, args.export_fixes)
|
||||
except:
|
||||
print('Error exporting fixes.\n', file=sys.stderr)
|
||||
traceback.print_exc()
|
||||
return_code = 1
|
||||
|
||||
if args.fix:
|
||||
print('Applying fixes ...')
|
||||
try:
|
||||
apply_fixes(args, tmpdir)
|
||||
except:
|
||||
print('Error applying fixes.\n', file=sys.stderr)
|
||||
traceback.print_exc()
|
||||
return_code = 1
|
||||
|
||||
if tmpdir:
|
||||
shutil.rmtree(tmpdir)
|
||||
sys.exit(return_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
52
external/duckdb/scripts/run_benchmark.py
vendored
Normal file
52
external/duckdb/scripts/run_benchmark.py
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run a full benchmark using the CLI and report the results.')
|
||||
parser.add_argument('--shell', action='store', help='Path to the CLI', default='build/reldebug/duckdb')
|
||||
parser.add_argument('--database', action='store', help='Path to the database file to load data from')
|
||||
parser.add_argument(
|
||||
'--queries', action='store', help='Path to the list of queries to run (e.g. benchmark/clickbench/queries)'
|
||||
)
|
||||
parser.add_argument('--nrun', action='store', help='The number of runs', default=3)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
queries = os.listdir(args.queries)
|
||||
queries.sort()
|
||||
ran_queries = []
|
||||
timings = []
|
||||
for q in queries:
|
||||
if 'load.sql' in q:
|
||||
continue
|
||||
command = [args.shell, args.database]
|
||||
command += ['-c', '.timer on']
|
||||
for i in range(args.nrun):
|
||||
command += ['-c', '.read ' + os.path.join(args.queries, q)]
|
||||
res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
stdout = res.stdout.decode('utf8').strip()
|
||||
stderr = res.stderr.decode('utf8').strip()
|
||||
results = re.findall(r'Run Time \(s\): real (\d+.\d+)', stdout)
|
||||
if res.returncode != 0 or 'Error:\n' in stderr or len(results) != args.nrun:
|
||||
print("------- Failed to run query -------")
|
||||
print(q)
|
||||
print("------- stdout -------")
|
||||
print(stdout)
|
||||
print("------- stderr -------")
|
||||
print(stderr)
|
||||
exit(1)
|
||||
results = [float(x) for x in results]
|
||||
print(f"Timings for {q}: " + str(results))
|
||||
ran_queries.append(q)
|
||||
timings.append(results[1])
|
||||
|
||||
print('')
|
||||
sql_query = 'SELECT UNNEST(['
|
||||
sql_query += ','.join(["'" + x + "'" for x in ran_queries]) + ']) as query'
|
||||
sql_query += ","
|
||||
sql_query += "UNNEST(["
|
||||
sql_query += ','.join([str(x) for x in timings])
|
||||
sql_query += "]) as timing;"
|
||||
print(sql_query)
|
||||
177
external/duckdb/scripts/run_extension_medata_tests.sh
vendored
Executable file
177
external/duckdb/scripts/run_extension_medata_tests.sh
vendored
Executable file
@@ -0,0 +1,177 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generates a bunch of directories to be used for testing extension updating related behaviour used in `test/extension/update_extensions_ci.test`
|
||||
|
||||
# Please consider your energy footprint by only running this script with ccache.
|
||||
# note that subsequent runs used cached artifacts, use `make clean` or rm -rf build/debug to clean
|
||||
|
||||
set -x
|
||||
set -e
|
||||
|
||||
DUCKDB_BUILD_DIR="./build/debug"
|
||||
|
||||
TEST_DIR="./build/extension_metadata_test_data"
|
||||
TEST_DIR_COPY="./build/extension_metadata_test_data_copy"
|
||||
|
||||
### Directories to use
|
||||
# Used as the extension installation directory for DuckDB
|
||||
export LOCAL_EXTENSION_DIR="$TEST_DIR/extension_dir"
|
||||
# Repository for testing successfully updating extensions
|
||||
export LOCAL_EXTENSION_REPO_UPDATED="$TEST_DIR/repository"
|
||||
# Repository for testing incorrect platform
|
||||
export LOCAL_EXTENSION_REPO_INCORRECT_PLATFORM="$TEST_DIR/repository_incorrect_platform"
|
||||
# Repository for testing incorrect version
|
||||
export LOCAL_EXTENSION_REPO_INCORRECT_DUCKDB_VERSION="$TEST_DIR/repository_incorrect_version"
|
||||
# Repository where both platform and version mismatch
|
||||
export LOCAL_EXTENSION_REPO_VERSION_AND_PLATFORM_INCORRECT="$TEST_DIR/repository_incorrect_version_and_platform"
|
||||
# Directory containing the extensions for direct installing
|
||||
export DIRECT_INSTALL_DIR="$TEST_DIR/direct_install"
|
||||
|
||||
# Extension dir with a malformed info file for an extension
|
||||
export LOCAL_EXTENSION_DIR_MALFORMED_INFO="$TEST_DIR/extension_dir_malformed_info"
|
||||
# Extension dir with a metadata install version that mismatches the files metadata
|
||||
export LOCAL_EXTENSION_DIR_INFO_INCORRECT_VERSION="$TEST_DIR/extension_dir_malformed_info_incorrect_version"
|
||||
|
||||
if [ -d "$TEST_DIR_COPY" ]; then
|
||||
# REUSE PREVIOUSLY GENERATED DATA
|
||||
rm -r $TEST_DIR
|
||||
cp -R $TEST_DIR_COPY $TEST_DIR
|
||||
else
|
||||
# GENERATE FRESH DATA
|
||||
mkdir -p $TEST_DIR
|
||||
mkdir -p $DIRECT_INSTALL_DIR
|
||||
mkdir -p $LOCAL_EXTENSION_DIR
|
||||
mkdir -p $LOCAL_EXTENSION_REPO_UPDATED
|
||||
mkdir -p $LOCAL_EXTENSION_REPO_INCORRECT_PLATFORM
|
||||
mkdir -p $LOCAL_EXTENSION_REPO_INCORRECT_DUCKDB_VERSION
|
||||
|
||||
#################################################
|
||||
### First repo: successfully updating extensions.
|
||||
#################################################
|
||||
# Set extension config
|
||||
cat > $TEST_DIR/extension_config_before.cmake <<EOL
|
||||
duckdb_extension_load(json DONT_LINK EXTENSION_VERSION v0.0.1)
|
||||
duckdb_extension_load(tpch DONT_LINK EXTENSION_VERSION v0.0.1)
|
||||
duckdb_extension_load(tpcds DONT_LINK EXTENSION_VERSION v0.0.1)
|
||||
duckdb_extension_load(icu DONT_LINK EXTENSION_VERSION v0.0.1)
|
||||
EOL
|
||||
|
||||
# Build the extensions using the first config
|
||||
LOCAL_EXTENSION_REPO=$LOCAL_EXTENSION_REPO_UPDATED EXTENSION_CONFIGS=$TEST_DIR/extension_config_before.cmake make debug
|
||||
|
||||
# Set the version and platform now that we have a build
|
||||
DUCKDB_VERSION=`$DUCKDB_BUILD_DIR/duckdb -csv -noheader -c 'select source_id from pragma_version()'`
|
||||
DUCKDB_PLATFORM=`cat $DUCKDB_BUILD_DIR/duckdb_platform_out`
|
||||
|
||||
# Install the extension from the initial config
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set extension_directory='$LOCAL_EXTENSION_DIR'; set custom_extension_repository='$LOCAL_EXTENSION_REPO_UPDATED'; install tpch; install json; INSTALL icu;"
|
||||
|
||||
# Delete the info file from the icu extension
|
||||
rm $LOCAL_EXTENSION_DIR/$DUCKDB_VERSION/$DUCKDB_PLATFORM/icu.duckdb_extension.info
|
||||
|
||||
# Install tpcds directly
|
||||
cp $DUCKDB_BUILD_DIR/extension/tpcds/tpcds.duckdb_extension $DIRECT_INSTALL_DIR/tpcds.duckdb_extension
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set extension_directory='$LOCAL_EXTENSION_DIR'; install '$DIRECT_INSTALL_DIR/tpcds.duckdb_extension';"
|
||||
|
||||
# Set updated extension config where we update the tpch extension but not the json extension
|
||||
cat > $TEST_DIR/extension_config_after.cmake <<EOL
|
||||
duckdb_extension_load(json DONT_LINK EXTENSION_VERSION v0.0.1)
|
||||
duckdb_extension_load(tpch DONT_LINK EXTENSION_VERSION v0.0.2)
|
||||
duckdb_extension_load(icu DONT_LINK EXTENSION_VERSION v0.0.2)
|
||||
EOL
|
||||
|
||||
# Build the extensions using the second config
|
||||
LOCAL_EXTENSION_REPO=$LOCAL_EXTENSION_REPO_UPDATED EXTENSION_CONFIGS=$TEST_DIR/extension_config_after.cmake BUILD_EXTENSIONS_ONLY=1 make debug
|
||||
|
||||
# For good measure, we also gzip one of the files in the repo to ensure we can do both gzipped and non gzipped
|
||||
gzip -1 $LOCAL_EXTENSION_REPO_UPDATED/$DUCKDB_VERSION/$DUCKDB_PLATFORM/icu.duckdb_extension
|
||||
|
||||
##########################################
|
||||
### Second repo: Incorrect DuckDB platform
|
||||
##########################################
|
||||
rm -rf $DUCKDB_BUILD_DIR
|
||||
# Set extension config
|
||||
cat > $TEST_DIR/extension_config_incorrect_platform.cmake <<EOL
|
||||
duckdb_extension_load(json DONT_LINK EXTENSION_VERSION v0.0.3)
|
||||
EOL
|
||||
|
||||
# Build the extensions using the incorrect platform
|
||||
DUCKDB_PLATFORM=test_platform EXTENSION_CONFIGS=$TEST_DIR/extension_config_incorrect_platform.cmake BUILD_EXTENSIONS_ONLY=1 make debug
|
||||
|
||||
cp $DUCKDB_BUILD_DIR/extension/json/json.duckdb_extension $DIRECT_INSTALL_DIR/json_incorrect_platform.duckdb_extension
|
||||
|
||||
########################################
|
||||
### Third repo: Incorrect DuckDB version
|
||||
########################################
|
||||
rm -rf $DUCKDB_BUILD_DIR
|
||||
# Set extension config
|
||||
cat > $TEST_DIR/extension_config_incorrect_version.cmake <<EOL
|
||||
duckdb_extension_load(json DONT_LINK EXTENSION_VERSION v0.0.4)
|
||||
EOL
|
||||
|
||||
# Build the extensions using the incorrect platform
|
||||
DUCKDB_EXPLICIT_VERSION=v1337 EXTENSION_CONFIGS=$TEST_DIR/extension_config_before.cmake BUILD_EXTENSIONS_ONLY=1 make debug
|
||||
|
||||
cp $DUCKDB_BUILD_DIR/extension/json/json.duckdb_extension $DIRECT_INSTALL_DIR/json_incorrect_version.duckdb_extension
|
||||
|
||||
####################################################
|
||||
### Fourth repo: Both platform and version incorrect
|
||||
####################################################
|
||||
rm -rf $DUCKDB_BUILD_DIR
|
||||
# Set extension config
|
||||
cat > $TEST_DIR/extension_config_incorrect_version.cmake <<EOL
|
||||
duckdb_extension_load(json DONT_LINK EXTENSION_VERSION v0.0.4)
|
||||
EOL
|
||||
|
||||
# Build the extensions using the incorrect platform
|
||||
DUCKDB_PLATFORM=test_platform DUCKDB_EXPLICIT_VERSION=v1337 EXTENSION_CONFIGS=$TEST_DIR/extension_config_before.cmake BUILD_EXTENSIONS_ONLY=1 make debug
|
||||
|
||||
cp $DUCKDB_BUILD_DIR/extension/json/json.duckdb_extension $DIRECT_INSTALL_DIR/json_incorrect_version_and_platform.duckdb_extension
|
||||
|
||||
# Note that we set the "double wrong" extension to have the proper name, so we can actually load it during testing with
|
||||
# SET allow_extensions_metadata_mismatch=true;
|
||||
cp $DUCKDB_BUILD_DIR/extension/json/json.duckdb_extension $DIRECT_INSTALL_DIR/json.duckdb_extension
|
||||
|
||||
###########################
|
||||
### Prepare malformed repos/dirs
|
||||
###########################
|
||||
# Build clean duckdb
|
||||
rm -rf $DUCKDB_BUILD_DIR
|
||||
make debug
|
||||
|
||||
# Use duckdb to install the extensions into the repositories (note that we are doing a trick here by setting the extension_directory to the local repo dir)
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set allow_extensions_metadata_mismatch=true; set extension_directory='$LOCAL_EXTENSION_REPO_INCORRECT_PLATFORM'; install '$DIRECT_INSTALL_DIR/json_incorrect_platform.duckdb_extension'"
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set allow_extensions_metadata_mismatch=true; set extension_directory='$LOCAL_EXTENSION_REPO_INCORRECT_DUCKDB_VERSION'; install '$DIRECT_INSTALL_DIR/json_incorrect_version.duckdb_extension'"
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set allow_extensions_metadata_mismatch=true; set extension_directory='$LOCAL_EXTENSION_REPO_VERSION_AND_PLATFORM_INCORRECT'; install '$DIRECT_INSTALL_DIR/json_incorrect_version_and_platform.duckdb_extension'"
|
||||
|
||||
# Create dir with malformed info file
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set extension_directory='$LOCAL_EXTENSION_DIR_MALFORMED_INFO'; install '$DIRECT_INSTALL_DIR/tpcds.duckdb_extension';"
|
||||
echo blablablab > $LOCAL_EXTENSION_DIR_MALFORMED_INFO/$DUCKDB_VERSION/$DUCKDB_PLATFORM/tpcds.duckdb_extension.info
|
||||
|
||||
# Create dir with malformed info file: we install a new version from LOCAL_EXTENSION_REPO_UPDATED but preserve the old info file
|
||||
$DUCKDB_BUILD_DIR/duckdb -unsigned -c "set extension_directory='$LOCAL_EXTENSION_DIR_INFO_INCORRECT_VERSION'; install 'tpch' from '$LOCAL_EXTENSION_REPO_UPDATED'"
|
||||
cp $LOCAL_EXTENSION_DIR/$DUCKDB_VERSION/$DUCKDB_PLATFORM/tpch.duckdb_extension.info $LOCAL_EXTENSION_DIR_INFO_INCORRECT_VERSION/$DUCKDB_VERSION/$DUCKDB_PLATFORM/tpch.duckdb_extension.info
|
||||
|
||||
###################################################################
|
||||
### Allow using copy instead of regenerating test data on every run
|
||||
###################################################################
|
||||
cp -R $TEST_DIR $TEST_DIR_COPY
|
||||
fi
|
||||
|
||||
###########################
|
||||
### Set version and platform
|
||||
###########################
|
||||
DUCKDB_VERSION=`$DUCKDB_BUILD_DIR/duckdb -csv -noheader -c 'select source_id from pragma_version()'`
|
||||
DUCKDB_PLATFORM=`cat $DUCKDB_BUILD_DIR/duckdb_platform_out`
|
||||
|
||||
###########################
|
||||
### Populate the minio repositories
|
||||
###########################
|
||||
AWS_DEFAULT_REGION=eu-west-1 AWS_ACCESS_KEY_ID=minio_duckdb_user AWS_SECRET_ACCESS_KEY=minio_duckdb_user_password aws --endpoint-url http://duckdb-minio.com:9000 s3 sync $LOCAL_EXTENSION_REPO_UPDATED s3://test-bucket-public/ci-test-repo
|
||||
export REMOTE_EXTENSION_REPO_UPDATED=http://duckdb-minio.com:9000/test-bucket-public/ci-test-repo
|
||||
export REMOTE_EXTENSION_REPO_DIRECT_PATH=http://duckdb-minio.com:9000/test-bucket-public/ci-test-repo/$DUCKDB_VERSION/$DUCKDB_PLATFORM
|
||||
|
||||
################
|
||||
### Run test
|
||||
################
|
||||
RUN_EXTENSION_UPDATE_TEST=1 $DUCKDB_BUILD_DIR/test/unittest test/extension/update_extensions_ci.test
|
||||
318
external/duckdb/scripts/run_tests_one_by_one.py
vendored
Normal file
318
external/duckdb/scripts/run_tests_one_by_one.py
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
import argparse
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import threading
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
import re
|
||||
|
||||
|
||||
class ErrorContainer:
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self._errors = []
|
||||
|
||||
def append(self, item):
|
||||
with self._lock:
|
||||
self._errors.append(item)
|
||||
|
||||
def get_errors(self):
|
||||
with self._lock:
|
||||
return list(self._errors)
|
||||
|
||||
def __len__(self):
|
||||
with self._lock:
|
||||
return len(self._errors)
|
||||
|
||||
|
||||
error_container = ErrorContainer()
|
||||
|
||||
|
||||
def valid_timeout(value):
|
||||
try:
|
||||
timeout_float = float(value)
|
||||
if timeout_float <= 0:
|
||||
raise argparse.ArgumentTypeError("Timeout value must be a positive float")
|
||||
return timeout_float
|
||||
except ValueError:
|
||||
raise argparse.ArgumentTypeError("Timeout value must be a float")
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run tests one by one with optional flags.')
|
||||
parser.add_argument('unittest_program', help='Path to the unittest program')
|
||||
parser.add_argument('--no-exit', action='store_true', help='Execute all tests, without stopping on first error')
|
||||
parser.add_argument('--fast-fail', action='store_true', help='Terminate on first error')
|
||||
parser.add_argument('--profile', action='store_true', help='Enable profiling')
|
||||
parser.add_argument('--no-assertions', action='store_false', help='Disable assertions')
|
||||
parser.add_argument('--time_execution', action='store_true', help='Measure and print the execution time of each test')
|
||||
parser.add_argument('--list', action='store_true', help='Print the list of tests to run')
|
||||
parser.add_argument('--summarize-failures', action='store_true', help='Summarize failures', default=None)
|
||||
parser.add_argument(
|
||||
'--tests-per-invocation', type=int, help='The amount of tests to run per invocation of the runner', default=1
|
||||
)
|
||||
parser.add_argument(
|
||||
'--print-interval', action='store', help='Prints "Still running..." every N seconds', default=300.0, type=float
|
||||
)
|
||||
parser.add_argument(
|
||||
'--timeout',
|
||||
action='store',
|
||||
help='Add a timeout for each test (in seconds, default: 3600s - i.e. one hour)',
|
||||
default=3600,
|
||||
type=valid_timeout,
|
||||
)
|
||||
parser.add_argument('--valgrind', action='store_true', help='Run the tests with valgrind', default=False)
|
||||
|
||||
args, extra_args = parser.parse_known_args()
|
||||
|
||||
if not args.unittest_program:
|
||||
parser.error('Path to unittest program is required')
|
||||
|
||||
# Access the arguments
|
||||
unittest_program = args.unittest_program
|
||||
no_exit = args.no_exit
|
||||
fast_fail = args.fast_fail
|
||||
tests_per_invocation = args.tests_per_invocation
|
||||
|
||||
if no_exit:
|
||||
if fast_fail:
|
||||
print("--no-exit and --fast-fail can't be combined")
|
||||
exit(1)
|
||||
|
||||
profile = args.profile
|
||||
assertions = args.no_assertions
|
||||
time_execution = args.time_execution
|
||||
timeout = args.timeout
|
||||
|
||||
summarize_failures = args.summarize_failures
|
||||
if summarize_failures is None:
|
||||
# get from env
|
||||
summarize_failures = False
|
||||
if 'SUMMARIZE_FAILURES' in os.environ:
|
||||
summarize_failures = os.environ['SUMMARIZE_FAILURES'] == '1'
|
||||
elif 'CI' in os.environ:
|
||||
# enable by default in CI if not set explicitly
|
||||
summarize_failures = True
|
||||
|
||||
# Use the '-l' parameter to output the list of tests to run
|
||||
proc = subprocess.run([unittest_program, '-l'] + extra_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout = proc.stdout.decode('utf8').strip()
|
||||
stderr = proc.stderr.decode('utf8').strip()
|
||||
if len(stderr) > 0:
|
||||
print("Failed to run program " + unittest_program)
|
||||
print("Returncode:", proc.returncode)
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
exit(1)
|
||||
|
||||
# The output is in the format of 'PATH\tGROUP', we're only interested in the PATH portion
|
||||
test_cases = []
|
||||
first_line = True
|
||||
for line in stdout.splitlines():
|
||||
if first_line:
|
||||
first_line = False
|
||||
continue
|
||||
if len(line.strip()) == 0:
|
||||
continue
|
||||
splits = line.rsplit('\t', 1)
|
||||
test_cases.append(splits[0])
|
||||
|
||||
|
||||
test_count = len(test_cases)
|
||||
if args.list:
|
||||
for test_number, test_case in enumerate(test_cases):
|
||||
print(print(f"[{test_number}/{test_count}]: {test_case}"))
|
||||
|
||||
all_passed = True
|
||||
|
||||
|
||||
def fail():
|
||||
global all_passed
|
||||
all_passed = False
|
||||
if fast_fail:
|
||||
exit(1)
|
||||
|
||||
|
||||
def parse_assertions(stdout):
|
||||
for line in stdout.splitlines():
|
||||
if 'All tests were skipped' in line:
|
||||
return "SKIPPED"
|
||||
if line == 'assertions: - none -':
|
||||
return "0 assertions"
|
||||
|
||||
# Parse assertions in format
|
||||
pos = line.find("assertion")
|
||||
if pos != -1:
|
||||
space_before_num = line.rfind(' ', 0, pos - 2)
|
||||
return line[space_before_num + 2 : pos + 10]
|
||||
|
||||
return "ERROR"
|
||||
|
||||
|
||||
is_active = False
|
||||
|
||||
|
||||
def get_test_name_from(text):
|
||||
match = re.findall(r'\((.*?)\)\!', text)
|
||||
return match[0] if match else ''
|
||||
|
||||
|
||||
def get_clean_error_message_from(text):
|
||||
match = re.split(r'^=+\n', text, maxsplit=1, flags=re.MULTILINE)
|
||||
return match[1] if len(match) > 1 else text
|
||||
|
||||
|
||||
def print_interval_background(interval):
|
||||
global is_active
|
||||
current_ticker = 0.0
|
||||
while is_active:
|
||||
time.sleep(0.1)
|
||||
current_ticker += 0.1
|
||||
if current_ticker >= interval:
|
||||
print("Still running...")
|
||||
current_ticker = 0
|
||||
|
||||
|
||||
def launch_test(test, list_of_tests=False):
|
||||
global is_active
|
||||
# start the background thread
|
||||
is_active = True
|
||||
background_print_thread = threading.Thread(target=print_interval_background, args=[args.print_interval])
|
||||
background_print_thread.start()
|
||||
|
||||
unittest_stdout = sys.stdout if list_of_tests else subprocess.PIPE
|
||||
unittest_stderr = subprocess.PIPE
|
||||
|
||||
start = time.time()
|
||||
try:
|
||||
test_cmd = [unittest_program] + test
|
||||
if args.valgrind:
|
||||
test_cmd = ['valgrind'] + test_cmd
|
||||
# should unset SUMMARIZE_FAILURES to avoid producing exceeding failure logs
|
||||
env = os.environ.copy()
|
||||
# pass env variables globally
|
||||
if list_of_tests or no_exit or tests_per_invocation:
|
||||
env['SUMMARIZE_FAILURES'] = '0'
|
||||
env['NO_DUPLICATING_HEADERS'] = '1'
|
||||
else:
|
||||
env['SUMMARIZE_FAILURES'] = '0'
|
||||
res = subprocess.run(test_cmd, stdout=unittest_stdout, stderr=unittest_stderr, timeout=timeout, env=env)
|
||||
except subprocess.TimeoutExpired as e:
|
||||
if list_of_tests:
|
||||
print("[TIMED OUT]", flush=True)
|
||||
else:
|
||||
print(" (TIMED OUT)", flush=True)
|
||||
test_name = test[0] if not list_of_tests else str(test)
|
||||
error_msg = f'TIMEOUT - exceeded specified timeout of {timeout} seconds'
|
||||
new_data = {"test": test_name, "return_code": 1, "stdout": '', "stderr": error_msg}
|
||||
error_container.append(new_data)
|
||||
fail()
|
||||
return
|
||||
|
||||
stdout = res.stdout.decode('utf8') if not list_of_tests else ''
|
||||
stderr = res.stderr.decode('utf8')
|
||||
|
||||
if len(stderr) > 0:
|
||||
# when list_of_tests test name gets transformed, but we can get it from stderr
|
||||
test_name = test[0] if not list_of_tests else get_test_name_from(stderr)
|
||||
error_message = get_clean_error_message_from(stderr)
|
||||
new_data = {"test": test_name, "return_code": res.returncode, "stdout": stdout, "stderr": error_message}
|
||||
error_container.append(new_data)
|
||||
|
||||
end = time.time()
|
||||
|
||||
# join the background print thread
|
||||
is_active = False
|
||||
background_print_thread.join()
|
||||
|
||||
additional_data = ""
|
||||
if assertions:
|
||||
additional_data += " (" + parse_assertions(stdout) + ")"
|
||||
if args.time_execution:
|
||||
additional_data += f" (Time: {end - start:.4f} seconds)"
|
||||
print(additional_data, flush=True)
|
||||
if profile:
|
||||
print(f'{test_case} {end - start}')
|
||||
if res.returncode is None or res.returncode == 0:
|
||||
return
|
||||
|
||||
print("FAILURE IN RUNNING TEST")
|
||||
print(
|
||||
"""--------------------
|
||||
RETURNCODE
|
||||
--------------------"""
|
||||
)
|
||||
print(res.returncode)
|
||||
print(
|
||||
"""--------------------
|
||||
STDOUT
|
||||
--------------------"""
|
||||
)
|
||||
print(stdout)
|
||||
print(
|
||||
"""--------------------
|
||||
STDERR
|
||||
--------------------"""
|
||||
)
|
||||
print(stderr)
|
||||
|
||||
# if a test closes unexpectedly (e.g., SEGV), test cleanup doesn't happen,
|
||||
# causing us to run out of space on subsequent tests in GH Actions (not much disk space there)
|
||||
duckdb_unittest_tempdir = os.path.join(
|
||||
os.path.dirname(unittest_program), '..', '..', '..', 'duckdb_unittest_tempdir'
|
||||
)
|
||||
if os.path.exists(duckdb_unittest_tempdir) and os.listdir(duckdb_unittest_tempdir):
|
||||
shutil.rmtree(duckdb_unittest_tempdir)
|
||||
fail()
|
||||
|
||||
|
||||
def run_tests_one_by_one():
|
||||
for test_number, test_case in enumerate(test_cases):
|
||||
if not profile:
|
||||
print(f"[{test_number}/{test_count}]: {test_case}", end="", flush=True)
|
||||
launch_test([test_case])
|
||||
|
||||
|
||||
def escape_test_case(test_case):
|
||||
return test_case.replace(',', '\\,')
|
||||
|
||||
|
||||
def run_tests_batched(batch_count):
|
||||
tmp = tempfile.NamedTemporaryFile()
|
||||
# write the test list to a temporary file
|
||||
with open(tmp.name, 'w') as f:
|
||||
for test_case in test_cases:
|
||||
f.write(escape_test_case(test_case) + '\n')
|
||||
# use start_offset/end_offset to cycle through the test list
|
||||
test_number = 0
|
||||
while test_number < len(test_cases):
|
||||
# gather test cases
|
||||
next_entry = test_number + batch_count
|
||||
if next_entry > len(test_cases):
|
||||
next_entry = len(test_cases)
|
||||
|
||||
launch_test(['-f', tmp.name, '--start-offset', str(test_number), '--end-offset', str(next_entry)], True)
|
||||
test_number = next_entry
|
||||
|
||||
|
||||
if args.tests_per_invocation == 1:
|
||||
run_tests_one_by_one()
|
||||
else:
|
||||
assertions = False
|
||||
run_tests_batched(args.tests_per_invocation)
|
||||
|
||||
if all_passed:
|
||||
exit(0)
|
||||
if summarize_failures and len(error_container):
|
||||
print(
|
||||
'''\n\n====================================================
|
||||
================ FAILURES SUMMARY ================
|
||||
====================================================\n
|
||||
'''
|
||||
)
|
||||
for i, error in enumerate(error_container.get_errors(), start=1):
|
||||
print(f"\n{i}:", error["test"], "\n")
|
||||
print(error["stderr"])
|
||||
|
||||
exit(1)
|
||||
4
external/duckdb/scripts/settings_scripts/__init__.py
vendored
Normal file
4
external/duckdb/scripts/settings_scripts/__init__.py
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
from .parse_and_sort_settings_in_json import add_all_settings_to_global_list as parse_and_sort_json_file
|
||||
from .update_settings_header_file import generate as update_header_file
|
||||
from .update_settings_scopes import generate as update_scopes
|
||||
from .update_settings_src_code import generate as update_src_code
|
||||
197
external/duckdb/scripts/settings_scripts/config.py
vendored
Normal file
197
external/duckdb/scripts/settings_scripts/config.py
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Set, List
|
||||
from functools import total_ordering
|
||||
|
||||
# define file paths and global variables
|
||||
DUCKDB_DIR = Path(__file__).resolve().parent.parent.parent
|
||||
DUCKDB_SETTINGS_HEADER_FILE = os.path.join(DUCKDB_DIR, "src/include/duckdb/main", "settings.hpp")
|
||||
DUCKDB_AUTOGENERATED_SETTINGS_FILE = os.path.join(DUCKDB_DIR, "src/main/settings", "autogenerated_settings.cpp")
|
||||
DUCKDB_SETTINGS_SCOPE_FILE = os.path.join(DUCKDB_DIR, "src/main", "config.cpp")
|
||||
JSON_PATH = os.path.join(DUCKDB_DIR, "src/common", "settings.json")
|
||||
|
||||
# define scope values
|
||||
VALID_SCOPE_VALUES = ["GLOBAL", "LOCAL", "GLOBAL_LOCAL"]
|
||||
INVALID_SCOPE_VALUE = "INVALID"
|
||||
SQL_TYPE_MAP = {"UBIGINT": "idx_t", "BIGINT": "int64_t", "BOOLEAN": "bool", "DOUBLE": "double", "VARCHAR": "string"}
|
||||
|
||||
|
||||
# global Setting structure
|
||||
@total_ordering
|
||||
class Setting:
|
||||
# track names of written settings to prevent duplicates
|
||||
__written_settings: Set[str] = set()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
sql_type: str,
|
||||
scope: str,
|
||||
internal_setting: str,
|
||||
on_callbacks: List[str],
|
||||
custom_implementation,
|
||||
struct_name: str,
|
||||
aliases: List[str],
|
||||
default_scope: str,
|
||||
default_value: str,
|
||||
):
|
||||
self.name = self._get_valid_name(name)
|
||||
self.description = description
|
||||
self.sql_type = self._get_sql_type(sql_type)
|
||||
self.return_type = self._get_setting_type(sql_type)
|
||||
self.is_enum = sql_type.startswith('ENUM')
|
||||
self.internal_setting = internal_setting
|
||||
self.scope = self._get_valid_scope(scope) if scope is not None else None
|
||||
self.on_set, self.on_reset = self._get_on_callbacks(on_callbacks)
|
||||
self.is_generic_setting = self.scope is None
|
||||
if self.is_enum and self.is_generic_setting:
|
||||
self.on_set = True
|
||||
custom_callbacks = ['set', 'reset', 'get']
|
||||
if type(custom_implementation) is bool:
|
||||
self.all_custom = custom_implementation
|
||||
self.custom_implementation = custom_callbacks if custom_implementation else []
|
||||
else:
|
||||
for entry in custom_implementation:
|
||||
if entry not in custom_callbacks:
|
||||
raise ValueError(
|
||||
f"Setting {self.name} - incorrect input for custom_implementation - expected set/reset/get, got {entry}"
|
||||
)
|
||||
self.all_custom = len(set(custom_implementation)) == 3
|
||||
self.custom_implementation = custom_implementation
|
||||
self.aliases = self._get_aliases(aliases)
|
||||
self.struct_name = self._get_struct_name() if len(struct_name) == 0 else struct_name
|
||||
self.default_scope = self._get_valid_default_scope(default_scope) if default_scope is not None else None
|
||||
self.default_value = default_value
|
||||
|
||||
# define all comparisons to be based on the setting's name attribute
|
||||
def __eq__(self, other) -> bool:
|
||||
return isinstance(other, Setting) and self.name == other.name
|
||||
|
||||
def __lt__(self, other) -> bool:
|
||||
return isinstance(other, Setting) and self.name < other.name
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return f"struct {self.struct_name} -> {self.name}, {self.sql_type}, {self.type}, {self.scope}, {self.description} {self.aliases}"
|
||||
|
||||
# validate setting name for correct format and uniqueness
|
||||
def _get_valid_name(self, name: str) -> str:
|
||||
if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
|
||||
raise ValueError(f"'{name}' cannot be used as setting name - invalid character")
|
||||
if name in Setting.__written_settings:
|
||||
raise ValueError(f"'{name}' cannot be used as setting name - already exists")
|
||||
Setting.__written_settings.add(name)
|
||||
return name
|
||||
|
||||
# ensure the setting scope is valid based on the accepted values
|
||||
def _get_valid_scope(self, scope: str) -> str:
|
||||
scope = scope.upper()
|
||||
if scope in VALID_SCOPE_VALUES:
|
||||
return scope
|
||||
return INVALID_SCOPE_VALUE
|
||||
|
||||
def _get_valid_default_scope(self, scope: str) -> str:
|
||||
scope = scope.upper()
|
||||
if scope == 'GLOBAL':
|
||||
return scope
|
||||
elif scope == 'LOCAL':
|
||||
return 'SESSION'
|
||||
raise Exception(f"Invalid default scope value {scope}")
|
||||
|
||||
# validate and return the correct type format
|
||||
def _get_sql_type(self, sql_type) -> str:
|
||||
if sql_type.startswith('ENUM'):
|
||||
return 'VARCHAR'
|
||||
if sql_type.endswith('[]'):
|
||||
# recurse into child-element
|
||||
sub_type = self._get_sql_type(sql_type[:-2])
|
||||
return sql_type
|
||||
if sql_type in SQL_TYPE_MAP:
|
||||
return sql_type
|
||||
raise ValueError(f"Invalid SQL type: '{sql_type}' - supported types are {', '.join(SQL_TYPE_MAP.keys())}")
|
||||
|
||||
# validate and return the cpp input type
|
||||
def _get_setting_type(self, type) -> str:
|
||||
if type.startswith('ENUM'):
|
||||
return type[len('ENUM<') : -1]
|
||||
if type.endswith('[]'):
|
||||
subtype = self._get_setting_type(type[:-2])
|
||||
return "vector<" + subtype + ">"
|
||||
return SQL_TYPE_MAP[type]
|
||||
|
||||
# validate and return the correct type format
|
||||
def _get_on_callbacks(self, callbacks) -> (bool, bool):
|
||||
set = False
|
||||
reset = False
|
||||
for entry in callbacks:
|
||||
if entry == 'set':
|
||||
set = True
|
||||
elif entry == 'reset':
|
||||
reset = True
|
||||
else:
|
||||
raise ValueError(f"Invalid entry in on_callbacks list: {entry} (expected set or reset)")
|
||||
return (set, reset)
|
||||
|
||||
# validate and return the set of the aliases
|
||||
def _get_aliases(self, aliases: List[str]) -> List[str]:
|
||||
return [self._get_valid_name(alias) for alias in aliases]
|
||||
|
||||
# generate a function name
|
||||
def _get_struct_name(self) -> str:
|
||||
camel_case_name = ''.join(word.capitalize() for word in re.split(r'[-_]', self.name))
|
||||
if camel_case_name.endswith("Setting"):
|
||||
return f"{camel_case_name}"
|
||||
return f"{camel_case_name}Setting"
|
||||
|
||||
|
||||
# this global list (accessible across all files) stores all the settings definitions in the json file
|
||||
SettingsList: List[Setting] = []
|
||||
|
||||
|
||||
# global method that finds the indexes of a start and an end marker in a file
|
||||
def find_start_end_indexes(source_code, start_marker, end_marker, file_path):
|
||||
start_matches = list(re.finditer(start_marker, source_code))
|
||||
if len(start_matches) == 0:
|
||||
raise ValueError(f"Couldn't find start marker {start_marker} in {file_path}")
|
||||
elif len(start_matches) > 1:
|
||||
raise ValueError(f"Start marker found more than once in {file_path}")
|
||||
start_index = start_matches[0].end()
|
||||
|
||||
end_matches = list(re.finditer(end_marker, source_code[start_index:]))
|
||||
if len(end_matches) == 0:
|
||||
raise ValueError(f"Couldn't find end marker {end_marker} in {file_path}")
|
||||
elif len(end_matches) > 1:
|
||||
raise ValueError(f"End marker found more than once in {file_path}")
|
||||
end_index = start_index + end_matches[0].start()
|
||||
return start_index, end_index
|
||||
|
||||
|
||||
# global markers
|
||||
SEPARATOR = "//===----------------------------------------------------------------------===//\n"
|
||||
SRC_CODE_START_MARKER = "namespace duckdb {"
|
||||
SRC_CODE_END_MARKER = "} // namespace duckdb"
|
||||
|
||||
|
||||
# global method
|
||||
def write_content_to_file(new_content, path):
|
||||
with open(path, 'w') as source_file:
|
||||
source_file.write("".join(new_content))
|
||||
|
||||
|
||||
def get_setting_heading(setting_struct_name):
|
||||
struct_name_wt_Setting = re.sub(r'Setting$', '', setting_struct_name)
|
||||
heading_name = re.sub(r'(?<!^)(?=[A-Z])', ' ', struct_name_wt_Setting)
|
||||
heading = SEPARATOR + f"// {heading_name}\n" + SEPARATOR
|
||||
return heading
|
||||
|
||||
|
||||
def make_format():
|
||||
os.system(f"python3 scripts/format.py {DUCKDB_SETTINGS_HEADER_FILE} --fix --force --noconfirm")
|
||||
os.system(f"python3 scripts/format.py {DUCKDB_SETTINGS_SCOPE_FILE} --fix --force --noconfirm")
|
||||
os.system(f"python3 scripts/format.py {DUCKDB_AUTOGENERATED_SETTINGS_FILE} --fix --force --noconfirm")
|
||||
58
external/duckdb/scripts/settings_scripts/parse_and_sort_settings_in_json.py
vendored
Normal file
58
external/duckdb/scripts/settings_scripts/parse_and_sort_settings_in_json.py
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
from .config import Setting, SettingsList, JSON_PATH
|
||||
|
||||
|
||||
# sort settings in json by name
|
||||
def sort_json_data(path):
|
||||
with open(path, 'r') as file:
|
||||
data = json.load(file)
|
||||
sorted_data = sorted(data, key=lambda x: x['name'])
|
||||
with open(path, 'w') as file:
|
||||
json.dump(sorted_data, file, indent=4)
|
||||
return sorted_data
|
||||
|
||||
|
||||
# parse json data and stores each entry as a settings object in the global list SettingsList
|
||||
def add_all_settings_to_global_list():
|
||||
valid_entries = [
|
||||
'name',
|
||||
'description',
|
||||
'type',
|
||||
'scope',
|
||||
'internal_setting',
|
||||
'on_callbacks',
|
||||
'custom_implementation',
|
||||
'struct',
|
||||
'aliases',
|
||||
'default_scope',
|
||||
'default_value',
|
||||
]
|
||||
|
||||
print(f"Parsing and sorting the settings data in {JSON_PATH}")
|
||||
clear_global_settings_list()
|
||||
json_data = sort_json_data(JSON_PATH)
|
||||
# store all the settings in the SettingsList
|
||||
for entry in json_data:
|
||||
for field_entry in entry:
|
||||
if field_entry not in valid_entries:
|
||||
raise ValueError(
|
||||
f"Found entry unexpected entry \"{field_entry}\" in setting, expected entry to be in {', '.join(valid_entries)}"
|
||||
)
|
||||
setting = Setting(
|
||||
name=entry['name'],
|
||||
description=entry['description'],
|
||||
sql_type=entry['type'],
|
||||
internal_setting=entry.get('internal_setting', entry['name']),
|
||||
scope=entry.get('scope', None),
|
||||
struct_name=entry.get('struct', ''),
|
||||
on_callbacks=entry.get('on_callbacks', []),
|
||||
custom_implementation=entry.get('custom_implementation', False),
|
||||
aliases=entry.get('aliases', []),
|
||||
default_scope=entry.get('default_scope', None),
|
||||
default_value=entry.get('default_value', None),
|
||||
)
|
||||
SettingsList.append(setting)
|
||||
|
||||
|
||||
def clear_global_settings_list():
|
||||
SettingsList.clear()
|
||||
132
external/duckdb/scripts/settings_scripts/update_autogenerated_functions.py
vendored
Normal file
132
external/duckdb/scripts/settings_scripts/update_autogenerated_functions.py
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
from .config import (
|
||||
SRC_CODE_START_MARKER,
|
||||
SRC_CODE_END_MARKER,
|
||||
SettingsList,
|
||||
find_start_end_indexes,
|
||||
get_setting_heading,
|
||||
)
|
||||
|
||||
|
||||
def generate_create_value(setting):
|
||||
if setting.sql_type == 'VARCHAR':
|
||||
return 'Value'
|
||||
else:
|
||||
return f'Value::{setting.sql_type}'
|
||||
|
||||
|
||||
def add_autogenerated_global_functions(setting):
|
||||
cpp_code = ""
|
||||
if 'set' not in setting.custom_implementation:
|
||||
cpp_code += (
|
||||
f"void {setting.struct_name}::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {{\n"
|
||||
)
|
||||
if setting.on_set:
|
||||
cpp_code += f"\tif (!OnGlobalSet(db, config, input)) {{\n"
|
||||
cpp_code += f"\t\treturn;\n\t}}\n"
|
||||
if setting.is_enum:
|
||||
cpp_code += f"\tauto str_input = StringUtil::Upper(input.GetValue<string>());\n"
|
||||
cpp_code += f"\tconfig.options.{setting.internal_setting} = EnumUtil::FromString<{setting.return_type}>(str_input);\n"
|
||||
else:
|
||||
cpp_code += f"\tconfig.options.{setting.internal_setting} = input.GetValue<{setting.return_type}>();\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
if 'reset' not in setting.custom_implementation:
|
||||
cpp_code += f"void {setting.struct_name}::ResetGlobal(DatabaseInstance *db, DBConfig &config) {{\n"
|
||||
if setting.on_reset:
|
||||
cpp_code += f"\tif (!OnGlobalReset(db, config)) {{\n"
|
||||
cpp_code += f"\t\treturn;\n\t}}\n"
|
||||
cpp_code += f"\tconfig.options.{setting.internal_setting} = DBConfigOptions().{setting.internal_setting};\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
if 'get' not in setting.custom_implementation:
|
||||
cpp_code += f"Value {setting.struct_name}::GetSetting(const ClientContext &context) {{\n"
|
||||
cpp_code += f"\tauto &config = DBConfig::GetConfig(context);\n"
|
||||
if setting.is_enum:
|
||||
cpp_code += f"\treturn {generate_create_value(setting)}(StringUtil::Lower(EnumUtil::ToString(config.options.{setting.internal_setting})));\n"
|
||||
else:
|
||||
cpp_code += f"\treturn {generate_create_value(setting)}(config.options.{setting.internal_setting});\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
return cpp_code
|
||||
|
||||
|
||||
def add_autogenerated_local_functions(setting):
|
||||
cpp_code = ""
|
||||
if 'set' not in setting.custom_implementation:
|
||||
cpp_code += f"void {setting.struct_name}::SetLocal(ClientContext &context, const Value &input) {{\n"
|
||||
if setting.on_set:
|
||||
cpp_code += f"\tif (!OnLocalSet(context, input)) {{\n"
|
||||
cpp_code += f"\t\treturn;\n\t}}\n"
|
||||
cpp_code += f"\tauto &config = ClientConfig::GetConfig(context);\n"
|
||||
if setting.is_enum:
|
||||
cpp_code += f"\tauto str_input = StringUtil::Upper(input.GetValue<string>());\n"
|
||||
cpp_code += (
|
||||
f"\tconfig.{setting.internal_setting} = EnumUtil::FromString<{setting.return_type}>(str_input);\n"
|
||||
)
|
||||
else:
|
||||
cpp_code += f"\tconfig.{setting.internal_setting} = input.GetValue<{setting.return_type}>();\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
if 'reset' not in setting.custom_implementation:
|
||||
cpp_code += f"void {setting.struct_name}::ResetLocal(ClientContext &context) {{\n"
|
||||
if setting.on_reset:
|
||||
cpp_code += f"\tif (!OnLocalReset(context)) {{\n"
|
||||
cpp_code += f"\t\treturn;\n\t}}\n"
|
||||
cpp_code += f"\tClientConfig::GetConfig(context).{setting.internal_setting} = ClientConfig().{setting.internal_setting};\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
if 'get' not in setting.custom_implementation:
|
||||
cpp_code += f"Value {setting.struct_name}::GetSetting(const ClientContext &context) {{\n"
|
||||
cpp_code += f"\tauto &config = ClientConfig::GetConfig(context);\n"
|
||||
if setting.is_enum:
|
||||
cpp_code += f"\treturn {generate_create_value(setting)}(StringUtil::Lower(EnumUtil::ToString(config.{setting.internal_setting})));\n"
|
||||
else:
|
||||
cpp_code += f"\treturn {generate_create_value(setting)}(config.{setting.internal_setting});\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
return cpp_code
|
||||
|
||||
|
||||
def add_autogenerated_enum_set(setting):
|
||||
if not setting.on_set:
|
||||
return ""
|
||||
if not setting.is_enum:
|
||||
return ""
|
||||
if 'set' in setting.custom_implementation:
|
||||
return ""
|
||||
cpp_code = ""
|
||||
|
||||
cpp_code += f"void {setting.struct_name}::OnSet(SettingCallbackInfo &info, Value ¶meter) {{\n"
|
||||
cpp_code += f"\tEnumUtil::FromString<{setting.return_type}>(StringValue::Get(parameter));\n"
|
||||
cpp_code += f"}}\n\n"
|
||||
return cpp_code
|
||||
|
||||
|
||||
def add_autogenerated_functions(path):
|
||||
with open(path, 'r') as source_file:
|
||||
source_code = source_file.read()
|
||||
|
||||
# find start and end indexes of the auto-generated section
|
||||
start_index, end_index = find_start_end_indexes(source_code, SRC_CODE_START_MARKER, SRC_CODE_END_MARKER, path)
|
||||
|
||||
# split source code into sections
|
||||
start_section = source_code[: start_index + 1] + "\n"
|
||||
end_section = source_code[end_index:]
|
||||
|
||||
new_content = ""
|
||||
added = 0
|
||||
for setting in SettingsList:
|
||||
# if the setting doesn't need custom implementation, an autogenerated one will be included
|
||||
if not setting.all_custom:
|
||||
header = get_setting_heading(setting.struct_name)
|
||||
content = ""
|
||||
if setting.is_generic_setting:
|
||||
content += add_autogenerated_enum_set(setting)
|
||||
else:
|
||||
if setting.scope == "GLOBAL" or setting.scope == "GLOBAL_LOCAL":
|
||||
content += add_autogenerated_global_functions(setting)
|
||||
if setting.scope == "LOCAL" or setting.scope == "GLOBAL_LOCAL":
|
||||
content += add_autogenerated_local_functions(setting)
|
||||
if len(content) > 0:
|
||||
new_content += header
|
||||
new_content += content
|
||||
added += 1
|
||||
return start_section + new_content + end_section, added
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")
|
||||
73
external/duckdb/scripts/settings_scripts/update_settings_header_file.py
vendored
Normal file
73
external/duckdb/scripts/settings_scripts/update_settings_header_file.py
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
from .config import SEPARATOR, SettingsList, find_start_end_indexes, write_content_to_file
|
||||
|
||||
# markers
|
||||
START_MARKER = (
|
||||
f"//===----------------------------------------------------------------------===//\n"
|
||||
f"// This code is autogenerated from 'update_settings_header_file.py'.\n"
|
||||
f"// Please do not make any changes directly here, as they will be overwritten.\n//\n"
|
||||
f"// Start of the auto-generated list of settings structures\n"
|
||||
f"//===----------------------------------------------------------------------===//\n"
|
||||
)
|
||||
END_MARKER = "// End of the auto-generated list of settings structures"
|
||||
|
||||
|
||||
def extract_declarations(setting) -> str:
|
||||
definition = (
|
||||
f"struct {setting.struct_name} {{\n"
|
||||
f" using RETURN_TYPE = {setting.return_type};\n"
|
||||
f" static constexpr const char *Name = \"{setting.name}\";\n"
|
||||
f" static constexpr const char *Description = \"{setting.description}\";\n"
|
||||
f" static constexpr const char *InputType = \"{setting.sql_type}\";\n"
|
||||
)
|
||||
if setting.scope == "GLOBAL" or setting.scope == "GLOBAL_LOCAL":
|
||||
definition += f" static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);\n"
|
||||
definition += f" static void ResetGlobal(DatabaseInstance *db, DBConfig &config);\n"
|
||||
if setting.on_set:
|
||||
definition += f"static bool OnGlobalSet(DatabaseInstance *db, DBConfig &config, const Value &input);\n"
|
||||
if setting.on_reset:
|
||||
definition += f"static bool OnGlobalReset(DatabaseInstance *db, DBConfig &config);\n"
|
||||
if setting.scope == "LOCAL" or setting.scope == "GLOBAL_LOCAL":
|
||||
definition += f" static void SetLocal(ClientContext &context, const Value ¶meter);\n"
|
||||
definition += f" static void ResetLocal(ClientContext &context);\n"
|
||||
if setting.on_set:
|
||||
definition += f"static bool OnLocalSet(ClientContext &context, const Value &input);\n"
|
||||
if setting.on_reset:
|
||||
definition += f"static bool OnLocalReset(ClientContext &context);\n"
|
||||
if setting.scope is not None:
|
||||
definition += f" static Value GetSetting(const ClientContext &context);\n"
|
||||
if setting.is_generic_setting:
|
||||
definition += f" static constexpr const char *DefaultValue = \"{setting.default_value}\";\n"
|
||||
definition += f" static constexpr SetScope DefaultScope = SetScope::{setting.default_scope};\n"
|
||||
if setting.on_set:
|
||||
definition += f" static void OnSet(SettingCallbackInfo &info, Value &input);\n"
|
||||
|
||||
definition += f"}};\n\n"
|
||||
return definition
|
||||
|
||||
|
||||
# generate code for all the settings for the the header file
|
||||
def generate_content(header_file_path):
|
||||
with open(header_file_path, 'r') as source_file:
|
||||
source_code = source_file.read()
|
||||
|
||||
# find start and end indexes of the auto-generated section
|
||||
start_index, end_index = find_start_end_indexes(source_code, START_MARKER, END_MARKER, header_file_path)
|
||||
|
||||
# split source code into sections
|
||||
start_section = source_code[: start_index + 1]
|
||||
end_section = SEPARATOR + source_code[end_index:]
|
||||
|
||||
new_content = "".join(extract_declarations(setting) for setting in SettingsList)
|
||||
return start_section + new_content + end_section
|
||||
|
||||
|
||||
def generate():
|
||||
from .config import DUCKDB_SETTINGS_HEADER_FILE
|
||||
|
||||
print(f"Updating {DUCKDB_SETTINGS_HEADER_FILE}")
|
||||
new_content = generate_content(DUCKDB_SETTINGS_HEADER_FILE)
|
||||
write_content_to_file(new_content, DUCKDB_SETTINGS_HEADER_FILE)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")
|
||||
61
external/duckdb/scripts/settings_scripts/update_settings_scopes.py
vendored
Normal file
61
external/duckdb/scripts/settings_scripts/update_settings_scopes.py
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
from .config import SettingsList, VALID_SCOPE_VALUES, find_start_end_indexes, write_content_to_file
|
||||
|
||||
# markers
|
||||
START_MARKER = r'static const ConfigurationOption internal_options\[\] = \{\n'
|
||||
END_MARKER = r',\s*FINAL_ALIAS};'
|
||||
|
||||
|
||||
# generate the scope code for the ConfigurationOption array and insert into the config file
|
||||
def generate_scope_code(file):
|
||||
with open(file, 'r') as source_file:
|
||||
source_code = source_file.read()
|
||||
|
||||
# find the start and end indexes of the settings' scope array
|
||||
start_index, end_index = find_start_end_indexes(source_code, START_MARKER, END_MARKER, file)
|
||||
|
||||
# split source code into sections
|
||||
before_array = source_code[:start_index] + "\n "
|
||||
after_array = source_code[end_index:]
|
||||
|
||||
# generate new entries for the settings array
|
||||
new_entries = []
|
||||
new_aliases = []
|
||||
for setting in SettingsList:
|
||||
if setting.is_generic_setting:
|
||||
if setting.on_set:
|
||||
new_entries.append([setting.name, f"DUCKDB_SETTING_CALLBACK({setting.struct_name})"])
|
||||
else:
|
||||
new_entries.append([setting.name, f"DUCKDB_SETTING({setting.struct_name})"])
|
||||
elif setting.scope in VALID_SCOPE_VALUES: # valid setting_scope values
|
||||
new_entries.append([setting.name, f"DUCKDB_{setting.scope}({setting.struct_name})"])
|
||||
else:
|
||||
raise ValueError(f"Setting {setting.name} has invalid input scope value")
|
||||
for alias in setting.aliases:
|
||||
new_aliases.append([alias, setting.name])
|
||||
new_entries.sort(key=lambda x: x[0])
|
||||
new_aliases.sort(key=lambda x: x[0])
|
||||
entry_indexes = {}
|
||||
for i in range(len(new_entries)):
|
||||
entry_indexes[new_entries[i][0]] = i
|
||||
for alias in new_aliases:
|
||||
alias_index = entry_indexes[alias[1]]
|
||||
alias.append(f"DUCKDB_SETTING_ALIAS(\"{alias[0]}\", {alias_index})")
|
||||
|
||||
new_array_section = ',\n '.join([x[1] for x in new_entries])
|
||||
new_array_section += ', FINAL_SETTING};\n\n'
|
||||
new_array_section += 'static const ConfigurationAlias setting_aliases[] = {'
|
||||
new_array_section += ',\n '.join([x[2] for x in new_aliases])
|
||||
|
||||
return before_array + new_array_section + after_array
|
||||
|
||||
|
||||
def generate():
|
||||
from .config import DUCKDB_SETTINGS_SCOPE_FILE
|
||||
|
||||
print(f"Updating {DUCKDB_SETTINGS_SCOPE_FILE}")
|
||||
new_content = generate_scope_code(DUCKDB_SETTINGS_SCOPE_FILE)
|
||||
write_content_to_file(new_content, DUCKDB_SETTINGS_SCOPE_FILE)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")
|
||||
18
external/duckdb/scripts/settings_scripts/update_settings_src_code.py
vendored
Normal file
18
external/duckdb/scripts/settings_scripts/update_settings_src_code.py
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
import re
|
||||
from .config import SettingsList, write_content_to_file, find_start_end_indexes
|
||||
from .update_autogenerated_functions import add_autogenerated_functions
|
||||
|
||||
|
||||
def generate():
|
||||
from .config import DUCKDB_AUTOGENERATED_SETTINGS_FILE
|
||||
|
||||
print(f"Updating {DUCKDB_AUTOGENERATED_SETTINGS_FILE}")
|
||||
new_autogenerated_content, generated = add_autogenerated_functions(DUCKDB_AUTOGENERATED_SETTINGS_FILE)
|
||||
write_content_to_file(new_autogenerated_content, DUCKDB_AUTOGENERATED_SETTINGS_FILE)
|
||||
|
||||
# NOTE: for debugging purposes
|
||||
# print(f"The total number of settings is {len(SettingsList)}, and {generated} settings are added in {DUCKDB_AUTOGENERATED_SETTINGS_FILE} and, {added_custom} new and {existing_custom} existing added in {DUCKDB_CUSTOM_DEFINED_SETTINGS_FILE}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise ValueError("Please use 'generate_settings.py' instead of running the individual script(s)")
|
||||
25
external/duckdb/scripts/setup_ubuntu1804.sh
vendored
Executable file
25
external/duckdb/scripts/setup_ubuntu1804.sh
vendored
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
# stuff
|
||||
apt-get update -y -qq
|
||||
apt-get install -y -qq software-properties-common
|
||||
add-apt-repository ppa:git-core/ppa
|
||||
apt-get update -y -qq
|
||||
apt-get install -y -qq --fix-missing ninja-build make gcc-multilib g++-multilib libssl-dev wget openjdk-8-jdk zip maven unixodbc-dev libc6-dev-i386 lib32readline6-dev libssl-dev libcurl4-gnutls-dev libexpat1-dev gettext unzip build-essential checkinstall libffi-dev curl libz-dev openssh-client pkg-config
|
||||
|
||||
# cross compilation stuff
|
||||
apt-get install -y -qq gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
|
||||
# git
|
||||
wget https://github.com/git/git/archive/refs/tags/v2.18.5.tar.gz
|
||||
tar xvf v2.18.5.tar.gz
|
||||
cd git-2.18.5
|
||||
make
|
||||
make prefix=/usr install
|
||||
git --version
|
||||
|
||||
# cmake
|
||||
wget https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.sh
|
||||
chmod +x cmake-3.21.3-linux-x86_64.sh
|
||||
./cmake-3.21.3-linux-x86_64.sh --skip-license --prefix=/usr/local
|
||||
cmake --version
|
||||
86
external/duckdb/scripts/test_compile.py
vendored
Normal file
86
external/duckdb/scripts/test_compile.py
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
import os
|
||||
import sys
|
||||
import amalgamation
|
||||
import pickle
|
||||
import subprocess
|
||||
|
||||
# where to cache which files have already been compiled
|
||||
cache_file = 'amalgamation.cache'
|
||||
ignored_files = ['utf8proc_data.cpp']
|
||||
|
||||
RESUME_AUTO = 0
|
||||
RESUME_ALWAYS = 1
|
||||
RESUME_NEVER = 2
|
||||
|
||||
# resume behavior
|
||||
# by default, we resume if the previous test_compile was run on the same commit hash as this one
|
||||
resume = RESUME_AUTO
|
||||
for arg in sys.argv:
|
||||
if arg == '--resume':
|
||||
resume = RESUME_ALWAYS
|
||||
elif arg == '--restart':
|
||||
cache = RESUME_NEVER
|
||||
|
||||
if resume == RESUME_NEVER:
|
||||
try:
|
||||
os.remove(cache_file)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def get_git_hash():
|
||||
proc = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
|
||||
return proc.stdout.read().strip()
|
||||
|
||||
|
||||
current_hash = get_git_hash()
|
||||
|
||||
# load the cache, and check the commit hash
|
||||
try:
|
||||
with open(cache_file, 'rb') as cf:
|
||||
cache = pickle.load(cf)
|
||||
if resume == RESUME_AUTO:
|
||||
# auto resume, check
|
||||
if cache['commit_hash'] != current_hash:
|
||||
cache = {}
|
||||
except:
|
||||
cache = {}
|
||||
|
||||
cache['commit_hash'] = current_hash
|
||||
|
||||
|
||||
def try_compilation(fpath, cache):
|
||||
if fpath in cache:
|
||||
return
|
||||
print(fpath)
|
||||
|
||||
cmd = (
|
||||
'clang++ -std=c++11 -Wno-deprecated -Wno-writable-strings -S -MMD -MF dependencies.d -o deps.s '
|
||||
+ fpath
|
||||
+ ' '
|
||||
+ ' '.join(["-I" + x for x in amalgamation.include_paths])
|
||||
)
|
||||
ret = os.system(cmd)
|
||||
if ret != 0:
|
||||
raise Exception('Failed compilation of file "' + fpath + '"!\n Command: ' + cmd)
|
||||
cache[fpath] = True
|
||||
with open(cache_file, 'wb') as cf:
|
||||
pickle.dump(cache, cf)
|
||||
|
||||
|
||||
def compile_dir(dir, cache):
|
||||
files = os.listdir(dir)
|
||||
files.sort()
|
||||
for fname in files:
|
||||
if fname in amalgamation.excluded_compilation_files or fname in ignored_files:
|
||||
continue
|
||||
fpath = os.path.join(dir, fname)
|
||||
if os.path.isdir(fpath):
|
||||
compile_dir(fpath, cache)
|
||||
elif fname.endswith('.cpp') or fname.endswith('.hpp') or fname.endswith('.c') or fname.endswith('.cc'):
|
||||
try_compilation(fpath, cache)
|
||||
|
||||
|
||||
# compile all files in the src directory (including headers!) individually
|
||||
for cdir in amalgamation.compile_directories:
|
||||
compile_dir(cdir, cache)
|
||||
22
external/duckdb/scripts/test_docker_images.sh
vendored
Executable file
22
external/duckdb/scripts/test_docker_images.sh
vendored
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
TEST="./build/release/duckdb -c 'PRAGMA platform;' && make clean && echo 'DOCKER TEST RESULT: SUCCESS' || (echo 'DOCKER TEST RESULT: FAILURE' && make clean)"
|
||||
|
||||
make clean
|
||||
|
||||
# Currently not working due to cmake version being too low
|
||||
# docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb amazonlinux:2 <<< "yum install gcc gcc-c++ git make cmake ninja-build -y && GEN=ninja make && $TEST" 2>&1
|
||||
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja python3 && cmake -Bbuild . && cmake --build build && cmake --install build && g++ -std=c++11 examples/embedded-c++/main.cpp"
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb amazonlinux:latest <<< "yum install clang git make cmake ninja-build -y && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --platform linux/arm64 --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --platform linux/amd64 --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja python3 && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb alpine:latest <<< "apk add g++ git make cmake ninja && CXX_STANDARD=23 GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb ubuntu:20.04 <<< "apt-get update && export DEBIAN_FRONTEND=noninteractive && apt-get install g++ git make cmake ninja-build -y && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb ubuntu:devel <<< "apt-get update && export DEBIAN_FRONTEND=noninteractive && apt-get install g++ git make cmake ninja-build -y && GEN=ninja make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb centos <<< "sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && yum install git make cmake clang -y && make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb fedora <<< "dnf install make cmake ninja-build gcc g++ -y && make && $TEST" 2>&1
|
||||
docker run -i --rm -v $(pwd):/duckdb --workdir /duckdb ghcr.io/mocusez/duckdb-riscv-ci/duckdb-riscv-ci <<< "apt-get update && export DEBIAN_FRONTEND=noninteractive && apt-get install cmake ninja-build libssl-dev g++-riscv64-linux-gnu -y && GEN=ninja CC='riscv64-linux-gnu-gcc -march=rv64gcv_zicsr_zifencei_zihintpause_zvl256b' CXX='riscv64-linux-gnu-g++ -march=rv64gcv_zicsr_zifencei_zihintpause_zvl256b' DUCKDB_PLATFORM=linux_riscv make && cd / && ./start_qemu.sh && cd /duckdb && make clean && echo 'DOCKER TEST RESULT: SUCCESS' || (echo 'DOCKER TEST RESULT: FAILURE' && make clean)" 2>&1
|
||||
230
external/duckdb/scripts/test_peg_parser.py
vendored
Normal file
230
external/duckdb/scripts/test_peg_parser.py
vendored
Normal file
@@ -0,0 +1,230 @@
|
||||
import argparse
|
||||
import os
|
||||
import sqllogictest
|
||||
from sqllogictest import SQLParserException, SQLLogicParser, SQLLogicTest
|
||||
import subprocess
|
||||
import multiprocessing
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
parser = argparse.ArgumentParser(description="Test serialization")
|
||||
parser.add_argument("--shell", type=str, help="Shell binary to run", default=os.path.join('build', 'debug', 'duckdb'))
|
||||
parser.add_argument("--offset", type=int, help="File offset", default=None)
|
||||
parser.add_argument("--count", type=int, help="File count", default=None)
|
||||
parser.add_argument('--no-exit', action='store_true', help='Do not exit after a test fails', default=False)
|
||||
parser.add_argument('--print-failing-only', action='store_true', help='Print failing tests only', default=False)
|
||||
parser.add_argument(
|
||||
'--include-extensions', action='store_true', help='Include test files of out-of-tree extensions', default=False
|
||||
)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--test-file", type=str, help="Path to the SQL logic file", default='')
|
||||
group.add_argument(
|
||||
"--test-list", type=str, help="Path to the file that contains a newline separated list of test files", default=''
|
||||
)
|
||||
group.add_argument("--all-tests", action='store_true', help="Run all tests", default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def extract_git_urls(script: str):
|
||||
pattern = r'GIT_URL\s+(https?://\S+)'
|
||||
return re.findall(pattern, script)
|
||||
|
||||
|
||||
import os
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
def download_directory_contents(api_url, local_path, headers):
|
||||
response = requests.get(api_url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
print(f"⚠️ Could not access {api_url}: {response.status_code}")
|
||||
return
|
||||
|
||||
os.makedirs(local_path, exist_ok=True)
|
||||
|
||||
for item in response.json():
|
||||
item_type = item.get("type")
|
||||
item_name = item.get("name")
|
||||
if item_type == "file":
|
||||
download_url = item.get("download_url")
|
||||
if not download_url:
|
||||
continue
|
||||
file_path = os.path.join(local_path, item_name)
|
||||
file_resp = requests.get(download_url)
|
||||
if file_resp.status_code == 200:
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(file_resp.content)
|
||||
print(f" - Downloaded {file_path}")
|
||||
else:
|
||||
print(f" - Failed to download {file_path}")
|
||||
elif item_type == "dir":
|
||||
subdir_api_url = item.get("url")
|
||||
subdir_local_path = os.path.join(local_path, item_name)
|
||||
download_directory_contents(subdir_api_url, subdir_local_path, headers)
|
||||
|
||||
|
||||
def download_test_sql_folder(repo_url, base_folder="extension-test-files"):
|
||||
repo_name = urlparse(repo_url).path.strip("/").split("/")[-1]
|
||||
target_folder = os.path.join(base_folder, repo_name)
|
||||
|
||||
if os.path.exists(target_folder):
|
||||
print(f"✓ Skipping {repo_name}, already exists.")
|
||||
return
|
||||
|
||||
print(f"⬇️ Downloading test/sql from {repo_name}...")
|
||||
|
||||
api_url = f"https://api.github.com/repos/duckdb/{repo_name}/contents/test/sql?ref=main"
|
||||
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
|
||||
headers = {"Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {GITHUB_TOKEN}"}
|
||||
|
||||
download_directory_contents(api_url, target_folder, headers)
|
||||
|
||||
|
||||
def batch_download_all_test_sql():
|
||||
filename = ".github/config/out_of_tree_extensions.cmake"
|
||||
if not os.path.isfile(filename):
|
||||
raise Exception(f"File {filename} not found")
|
||||
with open(filename, "r") as f:
|
||||
content = f.read()
|
||||
urls = extract_git_urls(content)
|
||||
if urls == []:
|
||||
print("No URLs found.")
|
||||
for url in urls:
|
||||
download_test_sql_folder(url)
|
||||
|
||||
|
||||
def find_tests_recursive(dir, excluded_paths):
|
||||
test_list = []
|
||||
for f in os.listdir(dir):
|
||||
path = os.path.join(dir, f)
|
||||
if path in excluded_paths:
|
||||
continue
|
||||
if os.path.isdir(path):
|
||||
test_list += find_tests_recursive(path, excluded_paths)
|
||||
elif path.endswith('.test') or path.endswith('.test_slow'):
|
||||
test_list.append(path)
|
||||
return test_list
|
||||
|
||||
|
||||
def parse_test_file(filename):
|
||||
if not os.path.isfile(filename):
|
||||
raise Exception(f"File {filename} not found")
|
||||
parser = SQLLogicParser()
|
||||
try:
|
||||
out: Optional[SQLLogicTest] = parser.parse(filename)
|
||||
if not out:
|
||||
raise SQLParserException(f"Test {filename} could not be parsed")
|
||||
except:
|
||||
return []
|
||||
loop_count = 0
|
||||
statements = []
|
||||
for stmt in out.statements:
|
||||
if type(stmt) is sqllogictest.statement.skip.Skip:
|
||||
# mode skip - just skip entire test
|
||||
break
|
||||
if type(stmt) is sqllogictest.statement.loop.Loop or type(stmt) is sqllogictest.statement.foreach.Foreach:
|
||||
loop_count += 1
|
||||
if type(stmt) is sqllogictest.statement.endloop.Endloop:
|
||||
loop_count -= 1
|
||||
if loop_count > 0:
|
||||
# loops are ignored currently
|
||||
continue
|
||||
if not (
|
||||
type(stmt) is sqllogictest.statement.query.Query or type(stmt) is sqllogictest.statement.statement.Statement
|
||||
):
|
||||
# only handle query and statement nodes for now
|
||||
continue
|
||||
if type(stmt) is sqllogictest.statement.statement.Statement:
|
||||
# skip expected errors
|
||||
if stmt.expected_result.type == sqllogictest.ExpectedResult.Type.ERROR:
|
||||
if any(
|
||||
"parser error" in line.lower() or "syntax error" in line.lower()
|
||||
for line in stmt.expected_result.lines
|
||||
):
|
||||
continue
|
||||
query = ' '.join(stmt.lines)
|
||||
statements.append(query)
|
||||
return statements
|
||||
|
||||
|
||||
def run_test_case(args_tuple):
|
||||
i, file, shell, print_failing_only = args_tuple
|
||||
results = []
|
||||
if not print_failing_only:
|
||||
print(f"Run test {i}: {file}")
|
||||
|
||||
statements = parse_test_file(file)
|
||||
for statement in statements:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
peg_sql_path = os.path.join(tmpdir, 'peg_test.sql')
|
||||
with open(peg_sql_path, 'w') as f:
|
||||
f.write(f'CALL check_peg_parser($TEST_PEG_PARSER${statement}$TEST_PEG_PARSER$);\n')
|
||||
|
||||
proc = subprocess.run([shell, '-init', peg_sql_path, '-c', '.exit'], capture_output=True)
|
||||
stderr = proc.stderr.decode('utf8')
|
||||
|
||||
if proc.returncode == 0 and ' Error:' not in stderr:
|
||||
continue
|
||||
|
||||
if print_failing_only:
|
||||
print(f"Failed test {i}: {file}")
|
||||
else:
|
||||
print(f'Failed')
|
||||
print(f'-- STDOUT --')
|
||||
print(proc.stdout.decode('utf8'))
|
||||
print(f'-- STDERR --')
|
||||
print(stderr)
|
||||
|
||||
results.append((file, statement))
|
||||
break
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = []
|
||||
excluded_tests = {
|
||||
'test/sql/peg_parser', # Fail for some reason
|
||||
'test/sql/prepared/parameter_variants.test', # PostgreSQL parser bug with ?1
|
||||
'test/sql/copy/s3/download_config.test', # Unknown why this passes in SQLLogicTest
|
||||
'test/sql/function/list/lambdas/arrow/lambda_scope_deprecated.test', # Error in the tokenization of *+*
|
||||
'test/sql/catalog/function/test_simple_macro.test', # Bug when mixing named parameters and non-named
|
||||
}
|
||||
if args.all_tests:
|
||||
# run all tests
|
||||
test_dir = os.path.join('test', 'sql')
|
||||
files = find_tests_recursive(test_dir, excluded_tests)
|
||||
if args.include_extensions:
|
||||
batch_download_all_test_sql()
|
||||
extension_files = find_tests_recursive('extension-test-files', {})
|
||||
files = files + extension_files
|
||||
elif len(args.test_list) > 0:
|
||||
with open(args.test_list, 'r') as f:
|
||||
files = [x.strip() for x in f.readlines() if x.strip() not in excluded_tests]
|
||||
else:
|
||||
# run a single test
|
||||
files.append(args.test_file)
|
||||
files.sort()
|
||||
|
||||
start = args.offset if args.offset is not None else 0
|
||||
end = start + args.count if args.count is not None else len(files)
|
||||
work_items = [(i, files[i], args.shell, args.print_failing_only) for i in range(start, end)]
|
||||
|
||||
if not args.no_exit:
|
||||
# Disable multiprocessing for --no-exit behavior
|
||||
failed_test_list = []
|
||||
for item in work_items:
|
||||
res = run_test_case(item)
|
||||
if res:
|
||||
failed_test_list.extend(res)
|
||||
exit(1)
|
||||
else:
|
||||
with multiprocessing.Pool() as pool:
|
||||
results = pool.map(run_test_case, work_items)
|
||||
failed_test_list = [item for sublist in results for item in sublist]
|
||||
|
||||
failed_tests = len(failed_test_list)
|
||||
print("List of failed tests: ")
|
||||
for test, statement in failed_test_list:
|
||||
print(f"{test}\n{statement}\n\n")
|
||||
print(f"Total of {failed_tests} out of {len(files)} failed ({round(failed_tests/len(files) * 100,2)}%). ")
|
||||
226
external/duckdb/scripts/test_serialization_bwc.py
vendored
Normal file
226
external/duckdb/scripts/test_serialization_bwc.py
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
import sqllogictest
|
||||
from sqllogictest import SQLParserException, SQLLogicParser, SQLLogicTest
|
||||
import duckdb
|
||||
from typing import Optional
|
||||
import argparse
|
||||
import shutil
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
# example usage: python3 scripts/test_serialization_bwc.py --old-source ../duckdb-bugfix --test-file test/sql/aggregate/aggregates/test_median.test
|
||||
|
||||
serialized_path = os.path.join('test', 'api', 'serialized_plans')
|
||||
db_load_path = os.path.join(serialized_path, 'db_load.sql')
|
||||
queries_path = os.path.join(serialized_path, 'queries.sql')
|
||||
result_binary = os.path.join(serialized_path, 'serialized_plans.binary')
|
||||
unittest_binary = os.path.join('build', 'debug', 'test', 'unittest')
|
||||
|
||||
|
||||
def complete_query(q):
|
||||
q = q.strip()
|
||||
if q.endswith(';'):
|
||||
return q
|
||||
return q + ';'
|
||||
|
||||
|
||||
def parse_test_file(filename):
|
||||
parser = SQLLogicParser()
|
||||
try:
|
||||
out: Optional[SQLLogicTest] = parser.parse(filename)
|
||||
if not out:
|
||||
raise SQLParserException(f"Test {filename} could not be parsed")
|
||||
except:
|
||||
return {'load': [], 'query': []}
|
||||
loop_count = 0
|
||||
load_statements = []
|
||||
query_statements = []
|
||||
for stmt in out.statements:
|
||||
if type(stmt) is sqllogictest.statement.skip.Skip:
|
||||
# mode skip - just skip entire test
|
||||
break
|
||||
if type(stmt) is sqllogictest.statement.loop.Loop or type(stmt) is sqllogictest.statement.foreach.Foreach:
|
||||
loop_count += 1
|
||||
if type(stmt) is sqllogictest.statement.endloop.Endloop:
|
||||
loop_count -= 1
|
||||
if loop_count > 0:
|
||||
# loops are ignored currently
|
||||
continue
|
||||
if not (
|
||||
type(stmt) is sqllogictest.statement.query.Query or type(stmt) is sqllogictest.statement.statement.Statement
|
||||
):
|
||||
# only handle query and statement nodes for now
|
||||
continue
|
||||
if type(stmt) is sqllogictest.statement.statement.Statement:
|
||||
# skip expected errors
|
||||
if stmt.expected_result.type == sqllogictest.ExpectedResult.Type.ERROR:
|
||||
continue
|
||||
query = ' '.join(stmt.lines)
|
||||
try:
|
||||
sql_stmt_list = duckdb.extract_statements(query)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
continue
|
||||
for sql_stmt in sql_stmt_list:
|
||||
if sql_stmt.type == duckdb.StatementType.SELECT:
|
||||
query_statements.append(query)
|
||||
elif sql_stmt.type == duckdb.StatementType.PRAGMA:
|
||||
continue
|
||||
else:
|
||||
load_statements.append(query)
|
||||
return {'load': load_statements, 'query': query_statements}
|
||||
|
||||
|
||||
def build_sources(old_source, new_source):
|
||||
# generate the sources
|
||||
current_path = os.getcwd()
|
||||
os.chdir(old_source)
|
||||
# build if not yet build
|
||||
if not os.path.isfile(unittest_binary):
|
||||
res = subprocess.run(['make', 'debug']).returncode
|
||||
if res != 0:
|
||||
raise Exception("Failed to build old sources")
|
||||
|
||||
# run the verification
|
||||
os.chdir(current_path)
|
||||
os.chdir(new_source)
|
||||
|
||||
# build if not yet build
|
||||
if not os.path.isfile(unittest_binary):
|
||||
res = subprocess.run(['make', 'debug']).returncode
|
||||
if res != 0:
|
||||
raise Exception("Failed to build new sources")
|
||||
os.chdir(current_path)
|
||||
|
||||
|
||||
def run_test(filename, old_source, new_source, no_exit):
|
||||
statements = parse_test_file(filename)
|
||||
|
||||
# generate the sources
|
||||
current_path = os.getcwd()
|
||||
os.chdir(old_source)
|
||||
# write the files
|
||||
with open(os.path.join(old_source, db_load_path), 'w+') as f:
|
||||
for stmt in statements['load']:
|
||||
f.write(complete_query(stmt) + '\n')
|
||||
|
||||
with open(os.path.join(old_source, queries_path), 'w+') as f:
|
||||
for stmt in statements['query']:
|
||||
f.write(complete_query(stmt) + '\n')
|
||||
|
||||
# generate the serialization
|
||||
my_env = os.environ.copy()
|
||||
my_env['GEN_PLAN_STORAGE'] = '1'
|
||||
res = subprocess.run(['build/debug/test/unittest', 'Generate serialized plans file'], env=my_env).returncode
|
||||
if res != 0:
|
||||
print(f"SKIPPING TEST {filename}")
|
||||
return True
|
||||
|
||||
os.chdir(current_path)
|
||||
|
||||
# copy over the files
|
||||
for f in [db_load_path, queries_path, result_binary]:
|
||||
shutil.copy(os.path.join(old_source, f), os.path.join(new_source, f))
|
||||
|
||||
# run the verification
|
||||
os.chdir(new_source)
|
||||
|
||||
res = subprocess.run(['build/debug/test/unittest', "Test deserialized plans from file"]).returncode
|
||||
if res != 0:
|
||||
if no_exit:
|
||||
print("BROKEN TEST")
|
||||
with open('broken_tests.list', 'a') as f:
|
||||
f.write(filename + '\n')
|
||||
return False
|
||||
raise Exception("Deserialization failure")
|
||||
os.chdir(current_path)
|
||||
return True
|
||||
|
||||
|
||||
def parse_excluded_tests(path):
|
||||
exclusion_list = {}
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if len(line.strip()) == 0 or line[0] == '#':
|
||||
continue
|
||||
exclusion_list[line.strip()] = True
|
||||
return exclusion_list
|
||||
|
||||
|
||||
def find_tests_recursive(dir, excluded_paths):
|
||||
test_list = []
|
||||
for f in os.listdir(dir):
|
||||
path = os.path.join(dir, f)
|
||||
if path in excluded_paths:
|
||||
continue
|
||||
if os.path.isdir(path):
|
||||
test_list += find_tests_recursive(path, excluded_paths)
|
||||
elif path.endswith('.test'):
|
||||
test_list.append(path)
|
||||
return test_list
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test serialization")
|
||||
parser.add_argument("--new-source", type=str, help="Path to the new source", default='.')
|
||||
parser.add_argument("--old-source", type=str, help="Path to the old source")
|
||||
parser.add_argument("--start-at", type=str, help="Start running tests at this specific test", default=None)
|
||||
parser.add_argument("--no-exit", action="store_true", help="Keep running even if a test fails", default=False)
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--test-file", type=str, help="Path to the SQL logic file", default='')
|
||||
group.add_argument("--all-tests", action='store_true', help="Run all tests", default=False)
|
||||
group.add_argument("--test-list", type=str, help="Load tests to run from a file list", default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
old_source = args.old_source
|
||||
new_source = args.new_source
|
||||
files = []
|
||||
if args.all_tests:
|
||||
# run all tests
|
||||
excluded_tests = parse_excluded_tests(
|
||||
os.path.join(new_source, 'test', 'api', 'serialized_plans', 'excluded_tests.list')
|
||||
)
|
||||
test_dir = os.path.join('test', 'sql')
|
||||
if new_source != '.':
|
||||
test_dir = os.path.join(new_source, test_dir)
|
||||
files = find_tests_recursive(test_dir, excluded_tests)
|
||||
elif args.test_list is not None:
|
||||
with open(args.test_list, 'r') as f:
|
||||
for line in f:
|
||||
if len(line.strip()) == 0:
|
||||
continue
|
||||
files.append(line.strip())
|
||||
else:
|
||||
# run a single test
|
||||
files.append(args.test_file)
|
||||
files.sort()
|
||||
|
||||
current_path = os.getcwd()
|
||||
try:
|
||||
build_sources(old_source, new_source)
|
||||
|
||||
all_succeeded = True
|
||||
started = False
|
||||
if args.start_at is None:
|
||||
started = True
|
||||
for filename in files:
|
||||
if not started:
|
||||
if filename == args.start_at:
|
||||
started = True
|
||||
else:
|
||||
continue
|
||||
|
||||
print(f"Run test {filename}")
|
||||
os.chdir(current_path)
|
||||
if not run_test(filename, old_source, new_source, args.no_exit):
|
||||
all_succeeded = False
|
||||
if not all_succeeded:
|
||||
exit(1)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
os.chdir(current_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
229
external/duckdb/scripts/test_storage_compatibility.py
vendored
Normal file
229
external/duckdb/scripts/test_storage_compatibility.py
vendored
Normal file
@@ -0,0 +1,229 @@
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run a full benchmark using the CLI and report the results.')
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--old-cli', action='store', help='Path to the CLI of the old DuckDB version to test')
|
||||
group.add_argument('--versions', type=str, action='store', help='DuckDB versions to test')
|
||||
parser.add_argument('--new-unittest', action='store', help='Path to the new unittester to run', required=True)
|
||||
parser.add_argument('--new-cli', action='store', help='Path to the new unittester to run', default=None)
|
||||
parser.add_argument('--compatibility', action='store', help='Storage compatibility version', default='v1.0.0')
|
||||
parser.add_argument(
|
||||
'--test-config', action='store', help='Test config script to run', default='test/configs/storage_compatibility.json'
|
||||
)
|
||||
parser.add_argument('--db-name', action='store', help='Database name to write to', default='bwc_storage_test.db')
|
||||
parser.add_argument('--abort-on-failure', action='store_true', help='Abort on first failure', default=False)
|
||||
parser.add_argument('--start-offset', type=int, action='store', help='Test start offset', default=None)
|
||||
parser.add_argument('--end-offset', type=int, action='store', help='Test end offset', default=None)
|
||||
parser.add_argument('--no-summarize-failures', action='store_true', help='Skip failure summary', default=False)
|
||||
parser.add_argument('--list-versions', action='store_true', help='Only list versions to test', default=False)
|
||||
parser.add_argument(
|
||||
'--run-empty-tests',
|
||||
action='store_true',
|
||||
help='Run tests that don' 't have a CREATE TABLE or CREATE VIEW statement',
|
||||
default=False,
|
||||
)
|
||||
|
||||
args, extra_args = parser.parse_known_args()
|
||||
|
||||
programs_to_test = []
|
||||
if args.versions is not None:
|
||||
version_splits = args.versions.split('|')
|
||||
for version in version_splits:
|
||||
cli_path = os.path.join(Path.home(), '.duckdb', 'cli', version, 'duckdb')
|
||||
if not os.path.isfile(cli_path):
|
||||
os.system(f'curl https://install.duckdb.org | DUCKDB_VERSION={version} sh')
|
||||
programs_to_test.append(cli_path)
|
||||
else:
|
||||
programs_to_test.append(args.old_cli)
|
||||
|
||||
unittest_program = args.new_unittest
|
||||
db_name = args.db_name
|
||||
new_cli = args.new_unittest.replace('test/unittest', 'duckdb') if args.new_cli is None else args.new_cli
|
||||
summarize_failures = not args.no_summarize_failures
|
||||
|
||||
# Use the '-l' parameter to output the list of tests to run
|
||||
proc = subprocess.run(
|
||||
[unittest_program, '--test-config', args.test_config, '-l'] + extra_args,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
stdout = proc.stdout.decode('utf8').strip()
|
||||
stderr = proc.stderr.decode('utf8').strip()
|
||||
if len(stderr) > 0:
|
||||
print("Failed to run program " + unittest_program)
|
||||
print("Returncode:", proc.returncode)
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
exit(1)
|
||||
|
||||
|
||||
# The output is in the format of 'PATH\tGROUP', we're only interested in the PATH portion
|
||||
test_cases = []
|
||||
first_line = True
|
||||
for line in stdout.splitlines():
|
||||
if first_line:
|
||||
first_line = False
|
||||
continue
|
||||
if len(line.strip()) == 0:
|
||||
continue
|
||||
splits = line.rsplit('\t', 1)
|
||||
test_cases.append(splits[0])
|
||||
|
||||
test_cases.sort()
|
||||
if args.compatibility != 'v1.0.0':
|
||||
raise Exception("Only v1.0.0 is supported for now (FIXME)")
|
||||
|
||||
|
||||
def escape_cmd_arg(arg):
|
||||
if '"' in arg or '\'' in arg or ' ' in arg or '\\' in arg:
|
||||
arg = arg.replace('\\', '\\\\')
|
||||
arg = arg.replace('"', '\\"')
|
||||
arg = arg.replace("'", "\\'")
|
||||
return f'"{arg}"'
|
||||
return arg
|
||||
|
||||
|
||||
error_container = []
|
||||
|
||||
|
||||
def handle_failure(test, cmd, msg, stdout, stderr, returncode):
|
||||
print(f"==============FAILURE============")
|
||||
print(test)
|
||||
print(f"==============MESSAGE============")
|
||||
print(msg)
|
||||
print(f"==============COMMAND============")
|
||||
cmd_str = ''
|
||||
for entry in cmd:
|
||||
cmd_str += escape_cmd_arg(entry) + ' '
|
||||
print(cmd_str.strip())
|
||||
print(f"==============RETURNCODE=========")
|
||||
print(str(returncode))
|
||||
print(f"==============STDOUT=============")
|
||||
print(stdout)
|
||||
print(f"==============STDERR=============")
|
||||
print(stderr)
|
||||
print(f"=================================")
|
||||
if args.abort_on_failure:
|
||||
exit(1)
|
||||
else:
|
||||
error_container.append({'test': test, 'stderr': stderr})
|
||||
|
||||
|
||||
def run_program(cmd, description):
|
||||
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout = proc.stdout.decode('utf8').strip()
|
||||
stderr = proc.stderr.decode('utf8').strip()
|
||||
if proc.returncode != 0:
|
||||
return {
|
||||
'test': test,
|
||||
'cmd': cmd,
|
||||
'msg': f'Failed to {description}',
|
||||
'stdout': stdout,
|
||||
'stderr': stderr,
|
||||
'returncode': proc.returncode,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def try_run_program(cmd, description):
|
||||
result = run_program(cmd, description)
|
||||
if result is None:
|
||||
return True
|
||||
handle_failure(**result)
|
||||
return False
|
||||
|
||||
|
||||
index = 0
|
||||
start = 0 if args.start_offset is None else args.start_offset
|
||||
end = len(test_cases) if args.end_offset is None else args.end_offset
|
||||
for i in range(start, end):
|
||||
test = test_cases[i]
|
||||
skipped = ''
|
||||
if not args.run_empty_tests:
|
||||
with open(test, 'r') as f:
|
||||
test_contents = f.read().lower()
|
||||
if 'create table' not in test_contents and 'create view' not in test_contents:
|
||||
skipped = ' (SKIPPED)'
|
||||
|
||||
print(f'[{i}/{len(test_cases)}]: {test}{skipped}')
|
||||
if skipped != '':
|
||||
continue
|
||||
# remove the old db
|
||||
try:
|
||||
os.remove(db_name)
|
||||
except:
|
||||
pass
|
||||
cmd = [unittest_program, '--test-config', args.test_config, test]
|
||||
if not try_run_program(cmd, 'Run Test'):
|
||||
continue
|
||||
|
||||
if not os.path.isfile(db_name):
|
||||
# db not created
|
||||
continue
|
||||
|
||||
cmd = [
|
||||
programs_to_test[-1],
|
||||
db_name,
|
||||
'-c',
|
||||
'.headers off',
|
||||
'-csv',
|
||||
'-c',
|
||||
'.output table_list.csv',
|
||||
'-c',
|
||||
'SHOW ALL TABLES',
|
||||
]
|
||||
if not try_run_program(cmd, 'List Tables'):
|
||||
continue
|
||||
|
||||
tables = []
|
||||
with open('table_list.csv', newline='') as f:
|
||||
reader = csv.reader(f)
|
||||
for row in reader:
|
||||
tables.append((row[1], row[2]))
|
||||
# no tables / views
|
||||
if len(tables) == 0:
|
||||
continue
|
||||
|
||||
# read all tables / views
|
||||
failures = []
|
||||
for cli in programs_to_test:
|
||||
cmd = [cli, db_name]
|
||||
for table in tables:
|
||||
schema_name = table[0].replace('"', '""')
|
||||
table_name = table[1].replace('"', '""')
|
||||
cmd += ['-c', f'FROM "{schema_name}"."{table_name}"']
|
||||
failure = run_program(cmd, 'Query Tables')
|
||||
if failure is not None:
|
||||
failures.append(failure)
|
||||
if len(failures) > 0:
|
||||
# we failed to query the tables
|
||||
# this MIGHT be expected - e.g. we might have views that reference stale state (e.g. files that are deleted)
|
||||
# try to run it with the new CLI - if this succeeds we have a problem
|
||||
new_cmd = [new_cli] + cmd[1:]
|
||||
new_failure = run_program(new_cmd, 'Query Tables (New)')
|
||||
if new_failure is None:
|
||||
# we succeeded with the new CLI - report the failure
|
||||
for failure in failures:
|
||||
handle_failure(**failure)
|
||||
continue
|
||||
|
||||
if len(error_container) == 0:
|
||||
exit(0)
|
||||
|
||||
if summarize_failures:
|
||||
print(
|
||||
'''\n\n====================================================
|
||||
================ FAILURES SUMMARY ================
|
||||
====================================================\n
|
||||
'''
|
||||
)
|
||||
for i, error in enumerate(error_container, start=1):
|
||||
print(f"\n{i}:", error["test"], "\n")
|
||||
print(error["stderr"])
|
||||
|
||||
exit(1)
|
||||
162
external/duckdb/scripts/test_zero_initialize.py
vendored
Normal file
162
external/duckdb/scripts/test_zero_initialize.py
vendored
Normal file
@@ -0,0 +1,162 @@
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='''Runs storage tests both with explicit one-initialization and with explicit zero-initialization, and verifies that the final storage files are the same.
|
||||
The purpose of this is to verify all memory is correctly initialized before writing to disk - which prevents leaking of in-memory data in storage files by writing uninitialized memory to disk.'''
|
||||
)
|
||||
parser.add_argument('--unittest', default='build/debug/test/unittest', help='path to unittest', dest='unittest')
|
||||
parser.add_argument(
|
||||
'--zero_init_dir',
|
||||
default='test_zero_init_db',
|
||||
help='directory to write zero-initialized databases to',
|
||||
dest='zero_init_dir',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--standard_dir', default='test_standard_db', help='directory to write regular databases to', dest='standard_dir'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
test_list = [
|
||||
'test/sql/index/art/storage/test_art_checkpoint.test',
|
||||
'test/sql/storage/compression/simple_compression.test',
|
||||
'test/sql/storage/delete/test_store_deletes.test',
|
||||
'test/sql/storage/mix/test_update_delete_string.test',
|
||||
'test/sql/storage/nested/struct_of_lists_unaligned.test',
|
||||
'test/sql/storage/test_store_integers.test',
|
||||
'test/sql/storage/test_store_nulls_strings.test',
|
||||
'test/sql/storage/update/test_store_null_updates.test',
|
||||
]
|
||||
|
||||
|
||||
def run_test(args):
|
||||
res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout = res.stdout.decode('utf8').strip()
|
||||
stderr = res.stderr.decode('utf8').strip()
|
||||
if res.returncode != 0:
|
||||
print("Failed to run test!")
|
||||
print("----------COMMAND-----------")
|
||||
print(' '.join(args))
|
||||
print("----------STDOUT-----------")
|
||||
print(stdout)
|
||||
print("----------STDERR-----------")
|
||||
print(stderr)
|
||||
print("---------------------")
|
||||
exit(1)
|
||||
|
||||
|
||||
header_size = 4096 * 3
|
||||
block_size = 262144
|
||||
checksum_size = 8
|
||||
|
||||
|
||||
def handle_error(i, standard_db, zero_init_db, standard_data, zero_data):
|
||||
print("------------------------------------------------------------------")
|
||||
print(f"FAIL - Mismatch between one-initialized and zero-initialized databases at byte position {i}")
|
||||
print("------------------------------------------------------------------")
|
||||
print(f"One-initialized database {standard_db} - byte value {standard_data}")
|
||||
print(f"Zero-initialized database {zero_init_db} - byte value {zero_data}")
|
||||
if i < header_size:
|
||||
print("This byte is in the initial headers of the file")
|
||||
else:
|
||||
byte_pos = (i - header_size) % block_size
|
||||
if byte_pos >= checksum_size:
|
||||
print(
|
||||
f"This byte is in block id {(i - header_size) // block_size} at byte position {byte_pos - checksum_size} (position {byte_pos} including the block checksum)"
|
||||
)
|
||||
else:
|
||||
print(f"This byte is in block id {(i - header_size) // block_size} at byte position {byte_pos}")
|
||||
print("This is in the checksum part of the block")
|
||||
print("------------------------------------------------------------------")
|
||||
print(
|
||||
"This error likely means that memory was not correctly zero-initialized in a block before being written out to disk."
|
||||
)
|
||||
|
||||
|
||||
def compare_database(standard_db, zero_init_db):
|
||||
with open(standard_db, 'rb') as f:
|
||||
standard_data = f.read()
|
||||
with open(zero_init_db, 'rb') as f:
|
||||
zero_data = f.read()
|
||||
if len(standard_data) != len(zero_data):
|
||||
print(
|
||||
f"FAIL - Length mismatch between database {standard_db} ({str(len(standard_data))}) and {zero_init_db} ({str(len(zero_data))})"
|
||||
)
|
||||
return False
|
||||
found_error = None
|
||||
for i in range(len(standard_data)):
|
||||
if standard_data[i] != zero_data[i]:
|
||||
if i > header_size:
|
||||
byte_pos = (i - header_size) % block_size
|
||||
if byte_pos <= 8:
|
||||
# different checksum, skip because it does not tell us anything!
|
||||
if found_error is None:
|
||||
found_error = i
|
||||
continue
|
||||
handle_error(i, standard_db, zero_init_db, standard_data[i], zero_data[i])
|
||||
return False
|
||||
if found_error is not None:
|
||||
i = found_error
|
||||
handle_error(i, standard_db, zero_init_db, standard_data[i], zero_data[i])
|
||||
return False
|
||||
print("Success!")
|
||||
return True
|
||||
|
||||
|
||||
def compare_files(standard_dir, zero_init_dir):
|
||||
standard_list = os.listdir(standard_dir)
|
||||
zero_init_list = os.listdir(zero_init_dir)
|
||||
standard_list.sort()
|
||||
zero_init_list.sort()
|
||||
if standard_list != zero_init_list:
|
||||
print(
|
||||
f"FAIL - Directories contain mismatching files (standard - {str(standard_list)}, zero init - {str(zero_init_list)})"
|
||||
)
|
||||
return False
|
||||
if len(standard_list) == 0:
|
||||
print("FAIL - Directory is empty!")
|
||||
return False
|
||||
success = True
|
||||
for entry in standard_list:
|
||||
if not compare_database(os.path.join(standard_dir, entry), os.path.join(zero_init_dir, entry)):
|
||||
success = False
|
||||
return success
|
||||
|
||||
|
||||
def clear_directories(directories):
|
||||
for dir in directories:
|
||||
try:
|
||||
shutil.rmtree(dir)
|
||||
except FileNotFoundError as e:
|
||||
pass
|
||||
|
||||
|
||||
test_dirs = [args.standard_dir, args.zero_init_dir]
|
||||
|
||||
success = True
|
||||
for test in test_list:
|
||||
print(f"Running test {test}")
|
||||
clear_directories(test_dirs)
|
||||
standard_args = [args.unittest, '--test-temp-dir', args.standard_dir, '--one-initialize', '--single-threaded', test]
|
||||
zero_init_args = [
|
||||
args.unittest,
|
||||
'--test-temp-dir',
|
||||
args.zero_init_dir,
|
||||
'--zero-initialize',
|
||||
'--single-threaded',
|
||||
test,
|
||||
]
|
||||
print(f"Running test in one-initialize mode")
|
||||
run_test(standard_args)
|
||||
print(f"Running test in zero-initialize mode")
|
||||
run_test(zero_init_args)
|
||||
if not compare_files(args.standard_dir, args.zero_init_dir):
|
||||
success = False
|
||||
|
||||
clear_directories(test_dirs)
|
||||
|
||||
if not success:
|
||||
exit(1)
|
||||
48
external/duckdb/scripts/try_timeout.py
vendored
Normal file
48
external/duckdb/scripts/try_timeout.py
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Expected python3 scripts/try_timeout.py --timeout=[timeout] --retry=[retries] [cmd] [options...]")
|
||||
print("Timeout should be given in seconds")
|
||||
exit(1)
|
||||
|
||||
timeout = int(sys.argv[1].replace("--timeout=", ""))
|
||||
retries = int(sys.argv[2].replace("--retry=", ""))
|
||||
cmd = sys.argv[3:]
|
||||
|
||||
|
||||
class Command(object):
|
||||
def __init__(self, cmd):
|
||||
self.cmd = cmd
|
||||
self.process = None
|
||||
|
||||
def run(self, timeout):
|
||||
self.process = None
|
||||
|
||||
def target():
|
||||
self.process = subprocess.Popen(self.cmd)
|
||||
self.process.communicate()
|
||||
|
||||
thread = threading.Thread(target=target)
|
||||
thread.start()
|
||||
|
||||
thread.join(timeout)
|
||||
if thread.is_alive():
|
||||
print('Terminating process: process exceeded timeout of ' + str(timeout) + ' seconds')
|
||||
self.process.terminate()
|
||||
thread.join()
|
||||
if self.process is None:
|
||||
return 1
|
||||
return self.process.returncode
|
||||
|
||||
|
||||
for i in range(retries):
|
||||
print("Attempting to run command \"" + ' '.join(cmd) + '"')
|
||||
command = Command(cmd)
|
||||
returncode = command.run(timeout)
|
||||
if returncode == 0:
|
||||
exit(0)
|
||||
|
||||
exit(1)
|
||||
64
external/duckdb/scripts/upload-assets-to-staging.sh
vendored
Executable file
64
external/duckdb/scripts/upload-assets-to-staging.sh
vendored
Executable file
@@ -0,0 +1,64 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Main extension uploading script
|
||||
|
||||
# Usage: ./scripts/upload-staging-asset.sh <folder> <file>*
|
||||
# <folder> : Folder to upload to
|
||||
# <file> : File to be uploaded
|
||||
|
||||
if [ -z "$1" ] || [ -z "$2" ]; then
|
||||
echo "Usage: ./scripts/upload-staging-asset.sh <folder> <file1> [... <fileN>]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
# skip if repo is not in duckdb organization
|
||||
if [ "$GITHUB_REPOSITORY_OWNER" != "duckdb" ]; then
|
||||
echo "Repository is $GITHUB_REPOSITORY_OWNER (not duckdb)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
FOLDER="$1"
|
||||
DRY_RUN_PARAM=""
|
||||
|
||||
# dryrun if repo is not duckdb/duckdb
|
||||
if [ "$GITHUB_REPOSITORY" != "duckdb/duckdb" ]; then
|
||||
echo "Repository is $GITHUB_REPOSITORY (not duckdb/duckdb)"
|
||||
DRY_RUN_PARAM="--dryrun"
|
||||
fi
|
||||
# dryrun if we are not in main
|
||||
if [ "$GITHUB_REF" != "refs/heads/main" ]; then
|
||||
echo "git ref is $GITHUB_REF (not refs/heads/main)"
|
||||
DRY_RUN_PARAM="--dryrun"
|
||||
fi
|
||||
|
||||
if [ "$GITHUB_EVENT_NAME" == "workflow_dispatch" ]; then
|
||||
echo "overriding DRY_RUN_PARAM, forcing upload"
|
||||
DRY_RUN_PARAM=""
|
||||
fi
|
||||
|
||||
# dryrun if AWS key is not set
|
||||
if [ -z "$AWS_ACCESS_KEY_ID" ]; then
|
||||
echo "No access key available"
|
||||
DRY_RUN_PARAM="--dryrun"
|
||||
fi
|
||||
|
||||
|
||||
TARGET=$(git log -1 --format=%h)
|
||||
|
||||
if [ "$UPLOAD_ASSETS_TO_STAGING_TARGET" ]; then
|
||||
TARGET="$UPLOAD_ASSETS_TO_STAGING_TARGET"
|
||||
fi
|
||||
|
||||
# decide target for staging
|
||||
if [ "$OVERRIDE_GIT_DESCRIBE" ]; then
|
||||
TARGET="$TARGET/$OVERRIDE_GIT_DESCRIBE"
|
||||
fi
|
||||
|
||||
python3 -m pip install awscli
|
||||
|
||||
for var in "${@: 2}"
|
||||
do
|
||||
aws s3 cp $var s3://duckdb-staging/$TARGET/$GITHUB_REPOSITORY/$FOLDER/ $DRY_RUN_PARAM --region us-east-2
|
||||
done
|
||||
62
external/duckdb/scripts/verify_enum_integrity.py
vendored
Normal file
62
external/duckdb/scripts/verify_enum_integrity.py
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
from cxxheaderparser.parser import CxxParser, ParserOptions
|
||||
from cxxheaderparser.visitor import CxxVisitor
|
||||
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
|
||||
from cxxheaderparser.parserstate import NamespaceBlockState
|
||||
from cxxheaderparser.types import EnumDecl
|
||||
import textwrap
|
||||
import os
|
||||
|
||||
|
||||
class Visitor:
|
||||
def on_enum(self, state: NamespaceBlockState, cursor: EnumDecl) -> None:
|
||||
enum_name = cursor.typename.segments[0].format()
|
||||
if '<' in enum_name:
|
||||
raise Exception(
|
||||
"Enum '{}' is an anonymous enum, please name it\n".format(cursor.doxygen[3:] if cursor.doxygen else '')
|
||||
)
|
||||
|
||||
enum_constants = dict()
|
||||
for enum_const in cursor.values:
|
||||
name = enum_const.name.format()
|
||||
if enum_const.value is None:
|
||||
raise Exception(f"Enum constant '{name}' in '{enum_name}' does not have an explicit value assignment.")
|
||||
value = enum_const.value.format()
|
||||
if value in enum_constants:
|
||||
other_constant = enum_constants[value]
|
||||
error = f"""
|
||||
Enum '{enum_name}' contains a duplicate value:
|
||||
Value {value} is defined for both '{other_constant}' and '{name}'
|
||||
"""
|
||||
error = textwrap.dedent(error)
|
||||
raise Exception(error)
|
||||
enum_constants[value] = name
|
||||
print(f"Successfully verified the integrity of enum {enum_name} ({len(enum_constants)} entries)")
|
||||
|
||||
def __getattr__(self, name):
|
||||
return lambda *args, **kwargs: True
|
||||
|
||||
|
||||
def parse_enum(file_path):
|
||||
# Create index
|
||||
parser = CxxParser(
|
||||
file_path,
|
||||
None,
|
||||
visitor=Visitor(),
|
||||
options=ParserOptions(preprocessor=make_pcpp_preprocessor()),
|
||||
)
|
||||
parser.parse()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Parse a C header file and check enum integrity.")
|
||||
parser.add_argument("file_path", type=str, help="Path to the C header file")
|
||||
|
||||
args = parser.parse_args()
|
||||
file_path = args.file_path
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
raise Exception(f"Error: file '{file_path}' does not exist")
|
||||
|
||||
enum_dict = parse_enum(file_path)
|
||||
21
external/duckdb/scripts/windows_ci.py
vendored
Normal file
21
external/duckdb/scripts/windows_ci.py
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
|
||||
common_path = os.path.join('src', 'include', 'duckdb', 'common', 'common.hpp')
|
||||
with open(common_path, 'r') as f:
|
||||
text = f.read()
|
||||
|
||||
|
||||
text = text.replace(
|
||||
'#pragma once',
|
||||
'''#pragma once
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef DUCKDB_MAIN_LIBRARY
|
||||
#include "duckdb/common/windows.hpp"
|
||||
#endif
|
||||
#endif
|
||||
''',
|
||||
)
|
||||
|
||||
with open(common_path, 'w+') as f:
|
||||
f.write(text)
|
||||
Reference in New Issue
Block a user