Files
email-tracker/external/duckdb/scripts/format.py
2025-10-24 19:21:19 -05:00

462 lines
14 KiB
Python

#!/usr/bin/python
# this script is used to format the source directory
import os
import time
import sys
import inspect
import subprocess
import difflib
import re
import tempfile
import uuid
import concurrent.futures
import argparse
import shutil
import traceback
from python_helpers import open_utf8
try:
ver = subprocess.check_output(('black', '--version'), text=True)
if int(ver.split(' ')[1].split('.')[0]) < 24:
print('you need to run `pip install "black>=24"`', ver)
exit(-1)
except Exception as e:
print('you need to run `pip install "black>=24"`', e)
exit(-1)
try:
ver = subprocess.check_output(('clang-format', '--version'), text=True)
if '11.' not in ver:
print('you need to run `pip install clang_format==11.0.1 - `', ver)
exit(-1)
except Exception as e:
print('you need to run `pip install clang_format==11.0.1 - `', e)
exit(-1)
cpp_format_command = 'clang-format --sort-includes=0 -style=file'
cmake_format_command = 'cmake-format'
try:
subprocess.check_output(('cmake-format', '--version'), text=True)
except Exception as e:
print('you need to run `pip install cmake-format`', e)
exit(-1)
extensions = [
'.cpp',
'.ipp',
'.c',
'.hpp',
'.h',
'.cc',
'.hh',
'CMakeLists.txt',
'.test',
'.test_slow',
'.test_coverage',
'.benchmark',
'.py',
'.java',
]
formatted_directories = ['src', 'benchmark', 'test', 'tools', 'examples', 'extension', 'scripts']
ignored_files = [
'tpch_constants.hpp',
'tpcds_constants.hpp',
'_generated',
'tpce_flat_input.hpp',
'test_csv_header.hpp',
'duckdb.cpp',
'duckdb.hpp',
'json.hpp',
'sqlite3.h',
'shell.c',
'termcolor.hpp',
'test_insert_invalid.test',
'httplib.hpp',
'os_win.c',
'glob.c',
'printf.c',
'helper.hpp',
'single_thread_ptr.hpp',
'types.hpp',
'default_views.cpp',
'default_functions.cpp',
'release.h',
'genrand.cpp',
'address.cpp',
'visualizer_constants.hpp',
'icu-collate.cpp',
'icu-collate.hpp',
'yyjson.cpp',
'yyjson.hpp',
'duckdb_pdqsort.hpp',
'pdqsort.h',
'stubdata.cpp',
'nf_calendar.cpp',
'nf_calendar.h',
'nf_localedata.cpp',
'nf_localedata.h',
'nf_zformat.cpp',
'nf_zformat.h',
'expr.cc',
'function_list.cpp',
'inlined_grammar.hpp',
]
ignored_directories = [
'.eggs',
'__pycache__',
'dbgen',
os.path.join('tools', 'rpkg', 'src', 'duckdb'),
os.path.join('tools', 'rpkg', 'inst', 'include', 'cpp11'),
os.path.join('extension', 'tpcds', 'dsdgen'),
os.path.join('extension', 'jemalloc', 'jemalloc'),
os.path.join('extension', 'icu', 'third_party'),
os.path.join('tools', 'nodejs', 'src', 'duckdb'),
]
format_all = False
check_only = True
confirm = True
silent = False
force = False
parser = argparse.ArgumentParser(prog='python scripts/format.py', description='Format source directory files')
parser.add_argument(
'revision', nargs='?', default='HEAD', help='Revision number or --all to format all files (default: HEAD)'
)
parser.add_argument('--check', action='store_true', help='Only print differences (default)')
parser.add_argument('--fix', action='store_true', help='Fix the files')
parser.add_argument('-a', '--all', action='store_true', help='Format all files')
parser.add_argument('-d', '--directories', nargs='*', default=[], help='Format specified directories')
parser.add_argument('-y', '--noconfirm', action='store_true', help='Skip confirmation prompt')
parser.add_argument('-q', '--silent', action='store_true', help='Suppress output')
parser.add_argument('-f', '--force', action='store_true', help='Force formatting')
args = parser.parse_args()
revision = args.revision
if args.check and args.fix:
parser.print_usage()
exit(1)
check_only = not args.fix
confirm = not args.noconfirm
silent = args.silent
force = args.force
format_all = args.all
if args.directories:
formatted_directories = args.directories
def file_is_ignored(full_path):
if os.path.basename(full_path) in ignored_files:
return True
dirnames = os.path.sep.join(full_path.split(os.path.sep)[:-1])
for ignored_directory in ignored_directories:
if ignored_directory in dirnames:
return True
return False
def can_format_file(full_path):
global extensions, formatted_directories, ignored_files
if not os.path.isfile(full_path):
return False
fname = full_path.split(os.path.sep)[-1]
found = False
# check file extension
for ext in extensions:
if full_path.endswith(ext):
found = True
break
if not found:
return False
# check ignored files
if file_is_ignored(full_path):
return False
# now check file directory
for dname in formatted_directories:
if full_path.startswith(dname):
return True
return False
action = "Formatting"
if check_only:
action = "Checking"
def get_changed_files(revision):
proc = subprocess.Popen(['git', 'diff', '--name-only', revision], stdout=subprocess.PIPE)
files = proc.stdout.read().decode('utf8').split('\n')
changed_files = []
for f in files:
if not can_format_file(f):
continue
if file_is_ignored(f):
continue
changed_files.append(f)
return changed_files
if os.path.isfile(revision):
print(action + " individual file: " + revision)
changed_files = [revision]
elif os.path.isdir(revision):
print(action + " files in directory: " + revision)
changed_files = [os.path.join(revision, x) for x in os.listdir(revision)]
print("Changeset:")
for fname in changed_files:
print(fname)
elif not format_all:
if revision == 'main':
# fetch new changes when comparing to the master
os.system("git fetch origin main:main")
print(action + " since branch or revision: " + revision)
changed_files = get_changed_files(revision)
if len(changed_files) == 0:
print("No changed files found!")
exit(0)
print("Changeset:")
for fname in changed_files:
print(fname)
else:
print(action + " all files")
if confirm and not check_only:
print("The files listed above will be reformatted.")
result = input("Continue with changes (y/n)?\n")
if result != 'y':
print("Aborting.")
exit(0)
format_commands = {
'.cpp': cpp_format_command,
'.ipp': cpp_format_command,
'.c': cpp_format_command,
'.hpp': cpp_format_command,
'.h': cpp_format_command,
'.hh': cpp_format_command,
'.cc': cpp_format_command,
'.txt': cmake_format_command,
'.py': 'black --quiet - --skip-string-normalization --line-length 120 --stdin-filename',
'.java': cpp_format_command,
}
difference_files = []
header_top = "//===----------------------------------------------------------------------===//\n"
header_top += "// DuckDB\n" + "//\n"
header_bottom = "//\n" + "//\n"
header_bottom += "//===----------------------------------------------------------------------===//\n\n"
base_dir = os.path.join(os.getcwd(), 'src/include')
def get_formatted_text(f, full_path, directory, ext):
if not can_format_file(full_path):
if not force:
print(
"File "
+ full_path
+ " is not normally formatted - but attempted to format anyway. Use --force if formatting is desirable"
)
exit(1)
if f == 'list.hpp':
# fill in list file
file_list = [
os.path.join(dp, f)
for dp, dn, filenames in os.walk(directory)
for f in filenames
if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp")
]
file_list = [x.replace('src/include/', '') for x in file_list]
file_list.sort()
result = ""
for x in file_list:
result += '#include "%s"\n' % (x)
return result
if ext == ".hpp" and directory.startswith("src/include"):
with open_utf8(full_path, 'r') as f:
lines = f.readlines()
# format header in files
header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n"
text = header_top + header_middle + header_bottom
is_old_header = True
for line in lines:
if not (line.startswith("//") or line.startswith("\n")) and is_old_header:
is_old_header = False
if not is_old_header:
text += line
if ext == '.test' or ext == '.test_slow' or ext == '.test_coverage' or ext == '.benchmark':
f = open_utf8(full_path, 'r')
lines = f.readlines()
f.close()
found_name = False
found_group = False
group_name = full_path.split('/')[-2]
new_path_line = '# name: ' + full_path + '\n'
new_group_line = '# group: [' + group_name + ']' + '\n'
found_diff = False
# Find description.
found_description = False
for line in lines:
if line.lower().startswith('# description:') or line.lower().startswith('#description:'):
if found_description:
print("Error formatting file " + full_path + ", multiple lines starting with # description found")
exit(1)
found_description = True
new_description_line = '# description: ' + line.split(':', 1)[1].strip() + '\n'
# Filter old meta.
meta = ['#name:', '# name:', '#description:', '# description:', '#group:', '# group:']
lines = [line for line in lines if not any(line.lower().startswith(m) for m in meta)]
# Clean up empty leading lines.
while lines and not lines[0].strip():
lines.pop(0)
# Ensure header is prepended.
header = [new_path_line]
if found_description:
header.append(new_description_line)
header.append(new_group_line)
header.append('\n')
return ''.join(header + lines)
proc_command = format_commands[ext].split(' ') + [full_path]
proc = subprocess.Popen(
proc_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=open(full_path) if ext == '.py' else None
)
new_text = proc.stdout.read().decode('utf8')
stderr = proc.stderr.read().decode('utf8')
if len(stderr) > 0:
print(os.getcwd())
print("Failed to format file " + full_path)
print(' '.join(proc_command))
print(stderr)
exit(1)
new_text = new_text.replace('\r', '')
new_text = re.sub(r'\n*$', '', new_text)
return new_text + '\n'
def file_is_generated(text):
if '// This file is automatically generated by scripts/' in text:
return True
return False
def format_file(f, full_path, directory, ext):
global difference_files
with open_utf8(full_path, 'r') as f:
old_text = f.read()
# do not format auto-generated files
if file_is_generated(old_text) and ext != '.py':
return
old_lines = old_text.split('\n')
new_text = get_formatted_text(f, full_path, directory, ext)
if ext in ('.cpp', '.hpp'):
new_text = new_text.replace('ARGS &&...args', 'ARGS &&... args')
if check_only:
new_lines = new_text.split('\n')
old_lines = [x for x in old_lines if '...' not in x]
new_lines = [x for x in new_lines if '...' not in x]
diff_result = difflib.unified_diff(old_lines, new_lines)
total_diff = ""
for diff_line in diff_result:
total_diff += diff_line + "\n"
total_diff = total_diff.strip()
if len(total_diff) > 0:
print("----------------------------------------")
print("----------------------------------------")
print("Found differences in file " + full_path)
print("----------------------------------------")
print("----------------------------------------")
print(total_diff)
difference_files.append(full_path)
else:
tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
with open_utf8(tmpfile, 'w+') as f:
f.write(new_text)
shutil.move(tmpfile, full_path)
class ToFormatFile:
def __init__(self, filename, full_path, directory):
self.filename = filename
self.full_path = full_path
self.directory = directory
self.ext = '.' + filename.split('.')[-1]
def format_directory(directory):
files = os.listdir(directory)
files.sort()
result = []
for f in files:
full_path = os.path.join(directory, f)
if os.path.isdir(full_path):
if f in ignored_directories or full_path in ignored_directories:
continue
result += format_directory(full_path)
elif can_format_file(full_path):
result += [ToFormatFile(f, full_path, directory)]
return result
files = []
if format_all:
try:
os.system(cmake_format_command.replace("${FILE}", "CMakeLists.txt"))
except:
pass
for direct in formatted_directories:
files += format_directory(direct)
else:
for full_path in changed_files:
splits = full_path.split(os.path.sep)
fname = splits[-1]
dirname = os.path.sep.join(splits[:-1])
files.append(ToFormatFile(fname, full_path, dirname))
def process_file(f):
if not silent:
print(f.full_path)
try:
format_file(f.filename, f.full_path, f.directory, f.ext)
except:
print(traceback.format_exc())
sys.exit(1)
# Create thread for each file
with concurrent.futures.ThreadPoolExecutor() as executor:
try:
threads = [executor.submit(process_file, f) for f in files]
# Wait for all tasks to complete
concurrent.futures.wait(threads)
except KeyboardInterrupt:
executor.shutdown(wait=True, cancel_futures=True)
raise
if check_only:
if len(difference_files) > 0:
print("")
print("")
print("")
print("Failed format-check: differences were found in the following files:")
for fname in difference_files:
print("- " + fname)
print('Run "make format-fix" to fix these differences automatically')
exit(1)
else:
print("Passed format-check")
exit(0)