Files
email-tracker/external/duckdb/scripts/package_build.py
2025-10-24 19:21:19 -05:00

418 lines
16 KiB
Python

import os
import sys
import shutil
import subprocess
from python_helpers import open_utf8
import re
excluded_objects = ['utf8proc_data.cpp']
def third_party_includes():
includes = []
includes += [os.path.join('third_party', 'concurrentqueue')]
includes += [os.path.join('third_party', 'fast_float')]
includes += [os.path.join('third_party', 'fastpforlib')]
includes += [os.path.join('third_party', 'fmt', 'include')]
includes += [os.path.join('third_party', 'fsst')]
includes += [os.path.join('third_party', 'httplib')]
includes += [os.path.join('third_party', 'hyperloglog')]
includes += [os.path.join('third_party', 'jaro_winkler')]
includes += [os.path.join('third_party', 'jaro_winkler', 'details')]
includes += [os.path.join('third_party', 'libpg_query')]
includes += [os.path.join('third_party', 'libpg_query', 'include')]
includes += [os.path.join('third_party', 'lz4')]
includes += [os.path.join('third_party', 'brotli', 'include')]
includes += [os.path.join('third_party', 'brotli', 'common')]
includes += [os.path.join('third_party', 'brotli', 'dec')]
includes += [os.path.join('third_party', 'brotli', 'enc')]
includes += [os.path.join('third_party', 'mbedtls', 'include')]
includes += [os.path.join('third_party', 'mbedtls', 'library')]
includes += [os.path.join('third_party', 'miniz')]
includes += [os.path.join('third_party', 'pcg')]
includes += [os.path.join('third_party', 'pdqsort')]
includes += [os.path.join('third_party', 're2')]
includes += [os.path.join('third_party', 'ska_sort')]
includes += [os.path.join('third_party', 'skiplist')]
includes += [os.path.join('third_party', 'tdigest')]
includes += [os.path.join('third_party', 'utf8proc')]
includes += [os.path.join('third_party', 'utf8proc', 'include')]
includes += [os.path.join('third_party', 'vergesort')]
includes += [os.path.join('third_party', 'yyjson', 'include')]
includes += [os.path.join('third_party', 'zstd', 'include')]
return includes
def third_party_sources():
sources = []
sources += [os.path.join('third_party', 'fmt')]
sources += [os.path.join('third_party', 'fsst')]
sources += [os.path.join('third_party', 'miniz')]
sources += [os.path.join('third_party', 're2')]
sources += [os.path.join('third_party', 'hyperloglog')]
sources += [os.path.join('third_party', 'skiplist')]
sources += [os.path.join('third_party', 'fastpforlib')]
sources += [os.path.join('third_party', 'utf8proc')]
sources += [os.path.join('third_party', 'libpg_query')]
sources += [os.path.join('third_party', 'mbedtls')]
sources += [os.path.join('third_party', 'yyjson')]
sources += [os.path.join('third_party', 'zstd')]
return sources
def file_is_lib(fname, libname):
libextensions = ['.a', '.lib']
libprefixes = ['', 'lib']
for ext in libextensions:
for prefix in libprefixes:
potential_libname = prefix + libname + ext
if fname == potential_libname:
return True
return False
def get_libraries(binary_dir, libraries, extensions):
result_libs = []
def find_library_recursive(search_dir, libname):
flist = os.listdir(search_dir)
for fname in flist:
fpath = os.path.join(search_dir, fname)
if os.path.isdir(fpath):
entry = find_library_recursive(fpath, libname)
if entry != None:
return entry
elif os.path.isfile(fpath) and file_is_lib(fname, libname):
return search_dir
return None
def find_library(search_dir, libname, result_libs, required=False):
if libname == 'Threads::Threads':
result_libs += [(None, 'pthread')]
return
libdir = find_library_recursive(binary_dir, libname)
if libdir is None and required:
raise Exception(f"Failed to locate required library {libname} in {binary_dir}")
result_libs += [(libdir, libname)]
duckdb_lib_name = 'duckdb_static'
if os.name == 'nt':
duckdb_lib_name = 'duckdb'
find_library(os.path.join(binary_dir, 'src'), duckdb_lib_name, result_libs, True)
for ext in extensions:
find_library(os.path.join(binary_dir, 'extension', ext), ext + '_extension', result_libs, True)
for libname in libraries:
find_library(binary_dir, libname, result_libs)
return result_libs
def includes(extensions):
scripts_dir = os.path.dirname(os.path.abspath(__file__))
# add includes for duckdb and extensions
includes = []
includes.append(os.path.join(scripts_dir, '..', 'src', 'include'))
includes.append(os.path.join(scripts_dir, '..'))
includes.append(os.path.join(scripts_dir, '..', 'third_party', 'utf8proc', 'include'))
for ext in extensions:
includes.append(os.path.join(scripts_dir, '..', 'extension', ext, 'include'))
return includes
def include_flags(extensions):
return ' ' + ' '.join(['-I' + x for x in includes(extensions)])
def convert_backslashes(x):
return '/'.join(x.split(os.path.sep))
def get_relative_path(source_dir, target_file):
source_dir = convert_backslashes(source_dir)
target_file = convert_backslashes(target_file)
# absolute path: try to convert
if source_dir in target_file:
target_file = target_file.replace(source_dir, "").lstrip('/')
return target_file
######
# MAIN_BRANCH_VERSIONING default should be 'True' for main branch and feature branches
# MAIN_BRANCH_VERSIONING default should be 'False' for release branches
# MAIN_BRANCH_VERSIONING default value needs to keep in sync between:
# - CMakeLists.txt
# - scripts/amalgamation.py
# - scripts/package_build.py
######
MAIN_BRANCH_VERSIONING = True
if os.getenv('MAIN_BRANCH_VERSIONING') == "0":
MAIN_BRANCH_VERSIONING = False
if os.getenv('MAIN_BRANCH_VERSIONING') == "1":
MAIN_BRANCH_VERSIONING = True
def get_git_describe():
override_git_describe = os.getenv('OVERRIDE_GIT_DESCRIBE') or ''
versioning_tag_match = 'v*.*.*'
if MAIN_BRANCH_VERSIONING:
versioning_tag_match = 'v*.*.0'
# empty override_git_describe, either since env was empty string or not existing
# -> ask git (that can fail, so except in place)
if len(override_git_describe) == 0:
try:
return (
subprocess.check_output(
['git', 'describe', '--tags', '--long', '--debug', '--match', versioning_tag_match]
)
.strip()
.decode('utf8')
)
except subprocess.CalledProcessError:
return "v0.0.0-0-gdeadbeeff"
if len(override_git_describe.split('-')) == 3:
return override_git_describe
if len(override_git_describe.split('-')) == 1:
override_git_describe += "-0"
assert len(override_git_describe.split('-')) == 2
try:
return (
override_git_describe
+ "-g"
+ subprocess.check_output(['git', 'log', '-1', '--format=%h']).strip().decode('utf8')
)
except subprocess.CalledProcessError:
return override_git_describe + "-g" + "deadbeeff"
def git_commit_hash():
if 'SETUPTOOLS_SCM_PRETEND_HASH' in os.environ:
return os.environ['SETUPTOOLS_SCM_PRETEND_HASH']
try:
git_describe = get_git_describe()
hash = git_describe.split('-')[2].lstrip('g')
return hash
except:
return "deadbeeff"
def prefix_version(version):
"""Make sure the version is prefixed with 'v' to be of the form vX.Y.Z"""
if version.startswith('v'):
return version
return 'v' + version
def git_dev_version():
if 'SETUPTOOLS_SCM_PRETEND_VERSION' in os.environ:
return prefix_version(os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'])
try:
long_version = get_git_describe()
version_splits = long_version.split('-')[0].lstrip('v').split('.')
dev_version = long_version.split('-')[1]
if int(dev_version) == 0:
# directly on a tag: emit the regular version
return "v" + '.'.join(version_splits)
else:
# not on a tag: increment the version by one and add a -devX suffix
# this needs to keep in sync with changes to CMakeLists.txt
if MAIN_BRANCH_VERSIONING == True:
# increment minor version
version_splits[1] = str(int(version_splits[1]) + 1)
else:
# increment patch version
version_splits[2] = str(int(version_splits[2]) + 1)
return "v" + '.'.join(version_splits) + "-dev" + dev_version
except:
return "v0.0.0"
def include_package(pkg_name, pkg_dir, include_files, include_list, source_list):
import amalgamation
original_path = sys.path
# append the directory
sys.path.append(pkg_dir)
ext_pkg = __import__(pkg_name + '_config')
ext_include_dirs = ext_pkg.include_directories
ext_source_files = ext_pkg.source_files
include_files += amalgamation.list_includes_files(ext_include_dirs)
include_list += ext_include_dirs
source_list += ext_source_files
sys.path = original_path
def build_package(target_dir, extensions, linenumbers=False, unity_count=32, folder_name='duckdb', short_paths=False):
if not os.path.isdir(target_dir):
os.mkdir(target_dir)
scripts_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(scripts_dir)
import amalgamation
prev_wd = os.getcwd()
os.chdir(os.path.join(scripts_dir, '..'))
# obtain the list of source files from the amalgamation
source_list = amalgamation.list_sources()
include_list = amalgamation.list_include_dirs()
include_files = amalgamation.list_includes()
def copy_file(src, target_dir):
# get the path
full_path = src.split(os.path.sep)
current_path = target_dir
for i in range(len(full_path) - 1):
current_path = os.path.join(current_path, full_path[i])
if not os.path.isdir(current_path):
os.mkdir(current_path)
target_name = full_path[-1]
target_file = os.path.join(current_path, target_name)
amalgamation.copy_if_different(src, target_file)
# include the main extension helper
include_files += [os.path.join('src', 'include', 'duckdb', 'main', 'extension_helper.hpp')]
# include the separate extensions
for ext in extensions:
ext_path = os.path.join(scripts_dir, '..', 'extension', ext)
include_package(ext, ext_path, include_files, include_list, source_list)
for src in source_list:
copy_file(src, target_dir)
for inc in include_files:
copy_file(inc, target_dir)
# handle pragma_version.cpp: paste #define DUCKDB_SOURCE_ID and DUCKDB_VERSION there
curdir = os.getcwd()
os.chdir(os.path.join(scripts_dir, '..'))
githash = git_commit_hash()
dev_version = git_dev_version()
dev_v_parts = dev_version.lstrip('v').split('.')
os.chdir(curdir)
# open the file and read the current contents
fpath = os.path.join(target_dir, 'src', 'function', 'table', 'version', 'pragma_version.cpp')
with open_utf8(fpath, 'r') as f:
text = f.read()
# now add the DUCKDB_SOURCE_ID define, if it is not there already
found_hash = False
found_dev = False
found_major = False
found_minor = False
found_patch = False
lines = text.split('\n')
for i in range(len(lines)):
if '#define DUCKDB_SOURCE_ID ' in lines[i]:
lines[i] = '#define DUCKDB_SOURCE_ID "{}"'.format(githash)
found_hash = True
if '#define DUCKDB_VERSION ' in lines[i]:
lines[i] = '#define DUCKDB_VERSION "{}"'.format(dev_version)
found_dev = True
if '#define DUCKDB_MAJOR_VERSION ' in lines[i]:
lines[i] = '#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0]))
found_major = True
if '#define DUCKDB_MINOR_VERSION ' in lines[i]:
lines[i] = '#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1]))
found_minor = True
if '#define DUCKDB_PATCH_VERSION ' in lines[i]:
lines[i] = '#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2])
found_patch = True
if not found_hash:
lines = ['#ifndef DUCKDB_SOURCE_ID', '#define DUCKDB_SOURCE_ID "{}"'.format(githash), '#endif'] + lines
if not found_dev:
lines = ['#ifndef DUCKDB_VERSION', '#define DUCKDB_VERSION "{}"'.format(dev_version), '#endif'] + lines
if not found_major:
lines = [
'#ifndef DUCKDB_MAJOR_VERSION',
'#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0])),
'#endif',
] + lines
if not found_minor:
lines = [
'#ifndef DUCKDB_MINOR_VERSION',
'#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1])),
'#endif',
] + lines
if not found_patch:
lines = [
'#ifndef DUCKDB_PATCH_VERSION',
'#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2]),
'#endif',
] + lines
text = '\n'.join(lines)
with open_utf8(fpath, 'w+') as f:
f.write(text)
def file_is_excluded(fname):
for entry in excluded_objects:
if entry in fname:
return True
return False
def generate_unity_build(entries, unity_name, linenumbers):
ub_file = os.path.join(target_dir, unity_name)
with open_utf8(ub_file, 'w+') as f:
for entry in entries:
if linenumbers:
f.write('#line 0 "{}"\n'.format(convert_backslashes(entry)))
f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
return ub_file
def generate_unity_builds(source_list, nsplits, linenumbers):
files_per_directory = {}
for source in source_list:
dirname = os.path.dirname(source)
if dirname not in files_per_directory:
files_per_directory[dirname] = []
files_per_directory[dirname].append(source)
new_source_files = []
for dirname in files_per_directory.keys():
current_files = files_per_directory[dirname]
cmake_file = os.path.join(dirname, 'CMakeLists.txt')
unity_build = False
if os.path.isfile(cmake_file):
with open(cmake_file, 'r') as f:
text = f.read()
if 'add_library_unity' in text:
unity_build = True
# re-order the files in the unity build so that they follow the same order as the CMake
scores = {}
filenames = [x[0] for x in re.findall('([a-zA-Z0-9_]+[.](cpp|cc|c|cxx))', text)]
score = 0
for filename in filenames:
scores[filename] = score
score += 1
current_files.sort(
key=lambda x: scores[os.path.basename(x)] if os.path.basename(x) in scores else 99999
)
if not unity_build:
if short_paths:
# replace source files with "__"
for file in current_files:
unity_filename = os.path.basename(file)
new_source_files.append(generate_unity_build([file], unity_filename, linenumbers))
else:
# directly use the source files
new_source_files += [os.path.join(folder_name, file) for file in current_files]
else:
unity_base = dirname.replace(os.path.sep, '_')
unity_name = f'ub_{unity_base}.cpp'
new_source_files.append(generate_unity_build(current_files, unity_name, linenumbers))
return new_source_files
original_sources = source_list
source_list = generate_unity_builds(source_list, unity_count, linenumbers)
os.chdir(prev_wd)
return (
[convert_backslashes(x) for x in source_list if not file_is_excluded(x)],
[convert_backslashes(x) for x in include_list],
[convert_backslashes(x) for x in original_sources],
)