418 lines
16 KiB
Python
418 lines
16 KiB
Python
import os
|
|
import sys
|
|
import shutil
|
|
import subprocess
|
|
from python_helpers import open_utf8
|
|
import re
|
|
|
|
excluded_objects = ['utf8proc_data.cpp']
|
|
|
|
|
|
def third_party_includes():
|
|
includes = []
|
|
includes += [os.path.join('third_party', 'concurrentqueue')]
|
|
includes += [os.path.join('third_party', 'fast_float')]
|
|
includes += [os.path.join('third_party', 'fastpforlib')]
|
|
includes += [os.path.join('third_party', 'fmt', 'include')]
|
|
includes += [os.path.join('third_party', 'fsst')]
|
|
includes += [os.path.join('third_party', 'httplib')]
|
|
includes += [os.path.join('third_party', 'hyperloglog')]
|
|
includes += [os.path.join('third_party', 'jaro_winkler')]
|
|
includes += [os.path.join('third_party', 'jaro_winkler', 'details')]
|
|
includes += [os.path.join('third_party', 'libpg_query')]
|
|
includes += [os.path.join('third_party', 'libpg_query', 'include')]
|
|
includes += [os.path.join('third_party', 'lz4')]
|
|
includes += [os.path.join('third_party', 'brotli', 'include')]
|
|
includes += [os.path.join('third_party', 'brotli', 'common')]
|
|
includes += [os.path.join('third_party', 'brotli', 'dec')]
|
|
includes += [os.path.join('third_party', 'brotli', 'enc')]
|
|
includes += [os.path.join('third_party', 'mbedtls', 'include')]
|
|
includes += [os.path.join('third_party', 'mbedtls', 'library')]
|
|
includes += [os.path.join('third_party', 'miniz')]
|
|
includes += [os.path.join('third_party', 'pcg')]
|
|
includes += [os.path.join('third_party', 'pdqsort')]
|
|
includes += [os.path.join('third_party', 're2')]
|
|
includes += [os.path.join('third_party', 'ska_sort')]
|
|
includes += [os.path.join('third_party', 'skiplist')]
|
|
includes += [os.path.join('third_party', 'tdigest')]
|
|
includes += [os.path.join('third_party', 'utf8proc')]
|
|
includes += [os.path.join('third_party', 'utf8proc', 'include')]
|
|
includes += [os.path.join('third_party', 'vergesort')]
|
|
includes += [os.path.join('third_party', 'yyjson', 'include')]
|
|
includes += [os.path.join('third_party', 'zstd', 'include')]
|
|
return includes
|
|
|
|
|
|
def third_party_sources():
|
|
sources = []
|
|
sources += [os.path.join('third_party', 'fmt')]
|
|
sources += [os.path.join('third_party', 'fsst')]
|
|
sources += [os.path.join('third_party', 'miniz')]
|
|
sources += [os.path.join('third_party', 're2')]
|
|
sources += [os.path.join('third_party', 'hyperloglog')]
|
|
sources += [os.path.join('third_party', 'skiplist')]
|
|
sources += [os.path.join('third_party', 'fastpforlib')]
|
|
sources += [os.path.join('third_party', 'utf8proc')]
|
|
sources += [os.path.join('third_party', 'libpg_query')]
|
|
sources += [os.path.join('third_party', 'mbedtls')]
|
|
sources += [os.path.join('third_party', 'yyjson')]
|
|
sources += [os.path.join('third_party', 'zstd')]
|
|
return sources
|
|
|
|
|
|
def file_is_lib(fname, libname):
|
|
libextensions = ['.a', '.lib']
|
|
libprefixes = ['', 'lib']
|
|
for ext in libextensions:
|
|
for prefix in libprefixes:
|
|
potential_libname = prefix + libname + ext
|
|
if fname == potential_libname:
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_libraries(binary_dir, libraries, extensions):
|
|
result_libs = []
|
|
|
|
def find_library_recursive(search_dir, libname):
|
|
flist = os.listdir(search_dir)
|
|
for fname in flist:
|
|
fpath = os.path.join(search_dir, fname)
|
|
if os.path.isdir(fpath):
|
|
entry = find_library_recursive(fpath, libname)
|
|
if entry != None:
|
|
return entry
|
|
elif os.path.isfile(fpath) and file_is_lib(fname, libname):
|
|
return search_dir
|
|
return None
|
|
|
|
def find_library(search_dir, libname, result_libs, required=False):
|
|
if libname == 'Threads::Threads':
|
|
result_libs += [(None, 'pthread')]
|
|
return
|
|
libdir = find_library_recursive(binary_dir, libname)
|
|
if libdir is None and required:
|
|
raise Exception(f"Failed to locate required library {libname} in {binary_dir}")
|
|
|
|
result_libs += [(libdir, libname)]
|
|
|
|
duckdb_lib_name = 'duckdb_static'
|
|
if os.name == 'nt':
|
|
duckdb_lib_name = 'duckdb'
|
|
find_library(os.path.join(binary_dir, 'src'), duckdb_lib_name, result_libs, True)
|
|
for ext in extensions:
|
|
find_library(os.path.join(binary_dir, 'extension', ext), ext + '_extension', result_libs, True)
|
|
|
|
for libname in libraries:
|
|
find_library(binary_dir, libname, result_libs)
|
|
|
|
return result_libs
|
|
|
|
|
|
def includes(extensions):
|
|
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
# add includes for duckdb and extensions
|
|
includes = []
|
|
includes.append(os.path.join(scripts_dir, '..', 'src', 'include'))
|
|
includes.append(os.path.join(scripts_dir, '..'))
|
|
includes.append(os.path.join(scripts_dir, '..', 'third_party', 'utf8proc', 'include'))
|
|
for ext in extensions:
|
|
includes.append(os.path.join(scripts_dir, '..', 'extension', ext, 'include'))
|
|
return includes
|
|
|
|
|
|
def include_flags(extensions):
|
|
return ' ' + ' '.join(['-I' + x for x in includes(extensions)])
|
|
|
|
|
|
def convert_backslashes(x):
|
|
return '/'.join(x.split(os.path.sep))
|
|
|
|
|
|
def get_relative_path(source_dir, target_file):
|
|
source_dir = convert_backslashes(source_dir)
|
|
target_file = convert_backslashes(target_file)
|
|
|
|
# absolute path: try to convert
|
|
if source_dir in target_file:
|
|
target_file = target_file.replace(source_dir, "").lstrip('/')
|
|
return target_file
|
|
|
|
|
|
######
|
|
# MAIN_BRANCH_VERSIONING default should be 'True' for main branch and feature branches
|
|
# MAIN_BRANCH_VERSIONING default should be 'False' for release branches
|
|
# MAIN_BRANCH_VERSIONING default value needs to keep in sync between:
|
|
# - CMakeLists.txt
|
|
# - scripts/amalgamation.py
|
|
# - scripts/package_build.py
|
|
######
|
|
MAIN_BRANCH_VERSIONING = True
|
|
if os.getenv('MAIN_BRANCH_VERSIONING') == "0":
|
|
MAIN_BRANCH_VERSIONING = False
|
|
if os.getenv('MAIN_BRANCH_VERSIONING') == "1":
|
|
MAIN_BRANCH_VERSIONING = True
|
|
|
|
|
|
def get_git_describe():
|
|
override_git_describe = os.getenv('OVERRIDE_GIT_DESCRIBE') or ''
|
|
versioning_tag_match = 'v*.*.*'
|
|
if MAIN_BRANCH_VERSIONING:
|
|
versioning_tag_match = 'v*.*.0'
|
|
# empty override_git_describe, either since env was empty string or not existing
|
|
# -> ask git (that can fail, so except in place)
|
|
if len(override_git_describe) == 0:
|
|
try:
|
|
return (
|
|
subprocess.check_output(
|
|
['git', 'describe', '--tags', '--long', '--debug', '--match', versioning_tag_match]
|
|
)
|
|
.strip()
|
|
.decode('utf8')
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
return "v0.0.0-0-gdeadbeeff"
|
|
if len(override_git_describe.split('-')) == 3:
|
|
return override_git_describe
|
|
if len(override_git_describe.split('-')) == 1:
|
|
override_git_describe += "-0"
|
|
assert len(override_git_describe.split('-')) == 2
|
|
try:
|
|
return (
|
|
override_git_describe
|
|
+ "-g"
|
|
+ subprocess.check_output(['git', 'log', '-1', '--format=%h']).strip().decode('utf8')
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
return override_git_describe + "-g" + "deadbeeff"
|
|
|
|
|
|
def git_commit_hash():
|
|
if 'SETUPTOOLS_SCM_PRETEND_HASH' in os.environ:
|
|
return os.environ['SETUPTOOLS_SCM_PRETEND_HASH']
|
|
try:
|
|
git_describe = get_git_describe()
|
|
hash = git_describe.split('-')[2].lstrip('g')
|
|
return hash
|
|
except:
|
|
return "deadbeeff"
|
|
|
|
|
|
def prefix_version(version):
|
|
"""Make sure the version is prefixed with 'v' to be of the form vX.Y.Z"""
|
|
if version.startswith('v'):
|
|
return version
|
|
return 'v' + version
|
|
|
|
|
|
def git_dev_version():
|
|
if 'SETUPTOOLS_SCM_PRETEND_VERSION' in os.environ:
|
|
return prefix_version(os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'])
|
|
try:
|
|
long_version = get_git_describe()
|
|
version_splits = long_version.split('-')[0].lstrip('v').split('.')
|
|
dev_version = long_version.split('-')[1]
|
|
if int(dev_version) == 0:
|
|
# directly on a tag: emit the regular version
|
|
return "v" + '.'.join(version_splits)
|
|
else:
|
|
# not on a tag: increment the version by one and add a -devX suffix
|
|
# this needs to keep in sync with changes to CMakeLists.txt
|
|
if MAIN_BRANCH_VERSIONING == True:
|
|
# increment minor version
|
|
version_splits[1] = str(int(version_splits[1]) + 1)
|
|
else:
|
|
# increment patch version
|
|
version_splits[2] = str(int(version_splits[2]) + 1)
|
|
return "v" + '.'.join(version_splits) + "-dev" + dev_version
|
|
except:
|
|
return "v0.0.0"
|
|
|
|
|
|
def include_package(pkg_name, pkg_dir, include_files, include_list, source_list):
|
|
import amalgamation
|
|
|
|
original_path = sys.path
|
|
# append the directory
|
|
sys.path.append(pkg_dir)
|
|
ext_pkg = __import__(pkg_name + '_config')
|
|
|
|
ext_include_dirs = ext_pkg.include_directories
|
|
ext_source_files = ext_pkg.source_files
|
|
|
|
include_files += amalgamation.list_includes_files(ext_include_dirs)
|
|
include_list += ext_include_dirs
|
|
source_list += ext_source_files
|
|
|
|
sys.path = original_path
|
|
|
|
|
|
def build_package(target_dir, extensions, linenumbers=False, unity_count=32, folder_name='duckdb', short_paths=False):
|
|
if not os.path.isdir(target_dir):
|
|
os.mkdir(target_dir)
|
|
|
|
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
sys.path.append(scripts_dir)
|
|
import amalgamation
|
|
|
|
prev_wd = os.getcwd()
|
|
os.chdir(os.path.join(scripts_dir, '..'))
|
|
|
|
# obtain the list of source files from the amalgamation
|
|
source_list = amalgamation.list_sources()
|
|
include_list = amalgamation.list_include_dirs()
|
|
include_files = amalgamation.list_includes()
|
|
|
|
def copy_file(src, target_dir):
|
|
# get the path
|
|
full_path = src.split(os.path.sep)
|
|
current_path = target_dir
|
|
for i in range(len(full_path) - 1):
|
|
current_path = os.path.join(current_path, full_path[i])
|
|
if not os.path.isdir(current_path):
|
|
os.mkdir(current_path)
|
|
target_name = full_path[-1]
|
|
target_file = os.path.join(current_path, target_name)
|
|
amalgamation.copy_if_different(src, target_file)
|
|
|
|
# include the main extension helper
|
|
include_files += [os.path.join('src', 'include', 'duckdb', 'main', 'extension_helper.hpp')]
|
|
# include the separate extensions
|
|
for ext in extensions:
|
|
ext_path = os.path.join(scripts_dir, '..', 'extension', ext)
|
|
include_package(ext, ext_path, include_files, include_list, source_list)
|
|
|
|
for src in source_list:
|
|
copy_file(src, target_dir)
|
|
|
|
for inc in include_files:
|
|
copy_file(inc, target_dir)
|
|
|
|
# handle pragma_version.cpp: paste #define DUCKDB_SOURCE_ID and DUCKDB_VERSION there
|
|
curdir = os.getcwd()
|
|
os.chdir(os.path.join(scripts_dir, '..'))
|
|
githash = git_commit_hash()
|
|
dev_version = git_dev_version()
|
|
dev_v_parts = dev_version.lstrip('v').split('.')
|
|
os.chdir(curdir)
|
|
# open the file and read the current contents
|
|
fpath = os.path.join(target_dir, 'src', 'function', 'table', 'version', 'pragma_version.cpp')
|
|
with open_utf8(fpath, 'r') as f:
|
|
text = f.read()
|
|
# now add the DUCKDB_SOURCE_ID define, if it is not there already
|
|
found_hash = False
|
|
found_dev = False
|
|
found_major = False
|
|
found_minor = False
|
|
found_patch = False
|
|
lines = text.split('\n')
|
|
for i in range(len(lines)):
|
|
if '#define DUCKDB_SOURCE_ID ' in lines[i]:
|
|
lines[i] = '#define DUCKDB_SOURCE_ID "{}"'.format(githash)
|
|
found_hash = True
|
|
if '#define DUCKDB_VERSION ' in lines[i]:
|
|
lines[i] = '#define DUCKDB_VERSION "{}"'.format(dev_version)
|
|
found_dev = True
|
|
if '#define DUCKDB_MAJOR_VERSION ' in lines[i]:
|
|
lines[i] = '#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0]))
|
|
found_major = True
|
|
if '#define DUCKDB_MINOR_VERSION ' in lines[i]:
|
|
lines[i] = '#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1]))
|
|
found_minor = True
|
|
if '#define DUCKDB_PATCH_VERSION ' in lines[i]:
|
|
lines[i] = '#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2])
|
|
found_patch = True
|
|
if not found_hash:
|
|
lines = ['#ifndef DUCKDB_SOURCE_ID', '#define DUCKDB_SOURCE_ID "{}"'.format(githash), '#endif'] + lines
|
|
if not found_dev:
|
|
lines = ['#ifndef DUCKDB_VERSION', '#define DUCKDB_VERSION "{}"'.format(dev_version), '#endif'] + lines
|
|
if not found_major:
|
|
lines = [
|
|
'#ifndef DUCKDB_MAJOR_VERSION',
|
|
'#define DUCKDB_MAJOR_VERSION {}'.format(int(dev_v_parts[0])),
|
|
'#endif',
|
|
] + lines
|
|
if not found_minor:
|
|
lines = [
|
|
'#ifndef DUCKDB_MINOR_VERSION',
|
|
'#define DUCKDB_MINOR_VERSION {}'.format(int(dev_v_parts[1])),
|
|
'#endif',
|
|
] + lines
|
|
if not found_patch:
|
|
lines = [
|
|
'#ifndef DUCKDB_PATCH_VERSION',
|
|
'#define DUCKDB_PATCH_VERSION "{}"'.format(dev_v_parts[2]),
|
|
'#endif',
|
|
] + lines
|
|
text = '\n'.join(lines)
|
|
with open_utf8(fpath, 'w+') as f:
|
|
f.write(text)
|
|
|
|
def file_is_excluded(fname):
|
|
for entry in excluded_objects:
|
|
if entry in fname:
|
|
return True
|
|
return False
|
|
|
|
def generate_unity_build(entries, unity_name, linenumbers):
|
|
ub_file = os.path.join(target_dir, unity_name)
|
|
with open_utf8(ub_file, 'w+') as f:
|
|
for entry in entries:
|
|
if linenumbers:
|
|
f.write('#line 0 "{}"\n'.format(convert_backslashes(entry)))
|
|
f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
|
|
return ub_file
|
|
|
|
def generate_unity_builds(source_list, nsplits, linenumbers):
|
|
files_per_directory = {}
|
|
for source in source_list:
|
|
dirname = os.path.dirname(source)
|
|
if dirname not in files_per_directory:
|
|
files_per_directory[dirname] = []
|
|
files_per_directory[dirname].append(source)
|
|
|
|
new_source_files = []
|
|
for dirname in files_per_directory.keys():
|
|
current_files = files_per_directory[dirname]
|
|
cmake_file = os.path.join(dirname, 'CMakeLists.txt')
|
|
unity_build = False
|
|
if os.path.isfile(cmake_file):
|
|
with open(cmake_file, 'r') as f:
|
|
text = f.read()
|
|
if 'add_library_unity' in text:
|
|
unity_build = True
|
|
# re-order the files in the unity build so that they follow the same order as the CMake
|
|
scores = {}
|
|
filenames = [x[0] for x in re.findall('([a-zA-Z0-9_]+[.](cpp|cc|c|cxx))', text)]
|
|
score = 0
|
|
for filename in filenames:
|
|
scores[filename] = score
|
|
score += 1
|
|
current_files.sort(
|
|
key=lambda x: scores[os.path.basename(x)] if os.path.basename(x) in scores else 99999
|
|
)
|
|
if not unity_build:
|
|
if short_paths:
|
|
# replace source files with "__"
|
|
for file in current_files:
|
|
unity_filename = os.path.basename(file)
|
|
new_source_files.append(generate_unity_build([file], unity_filename, linenumbers))
|
|
else:
|
|
# directly use the source files
|
|
new_source_files += [os.path.join(folder_name, file) for file in current_files]
|
|
else:
|
|
unity_base = dirname.replace(os.path.sep, '_')
|
|
unity_name = f'ub_{unity_base}.cpp'
|
|
new_source_files.append(generate_unity_build(current_files, unity_name, linenumbers))
|
|
return new_source_files
|
|
|
|
original_sources = source_list
|
|
source_list = generate_unity_builds(source_list, unity_count, linenumbers)
|
|
|
|
os.chdir(prev_wd)
|
|
return (
|
|
[convert_backslashes(x) for x in source_list if not file_is_excluded(x)],
|
|
[convert_backslashes(x) for x in include_list],
|
|
[convert_backslashes(x) for x in original_sources],
|
|
)
|