Files
email-tracker/external/duckdb/scripts/generate_csv_header.py
2025-10-24 19:21:19 -05:00

99 lines
3.2 KiB
Python

# this script generates data for the TPC-H dbgen
import os
from python_helpers import open_utf8
def get_csv_text(fpath, add_null_terminator=False):
with open(fpath, 'rb') as f:
text = bytearray(f.read())
result_text = ""
first = True
for byte in text:
if first:
result_text += str(byte)
else:
result_text += ", " + str(byte)
first = False
if add_null_terminator:
result_text += ", 0"
return result_text
def write_dir(dirname, varname):
files = os.listdir(dirname)
files.sort()
result = ""
aggregated_result = "const char *%s[] = {\n" % (varname,)
for fname in files:
file_varname = "%s_%s" % (varname, fname.split('.')[0])
result += "const uint8_t %s[] = {" % (file_varname,) + get_csv_text(os.path.join(dirname, fname), True) + "};\n"
aggregated_result += "\t(const char*) %s,\n" % (file_varname,)
aggregated_result = aggregated_result[:-2] + "\n};\n"
return result + aggregated_result
# ------------------------------------------- #
# ------------------------------------------- #
# ------------- TPC-H ------------ #
# ------------------------------------------- #
# ------------------------------------------- #
tpch_dir = 'extension/tpch/dbgen'
tpch_queries = os.path.join(tpch_dir, 'queries')
tpch_answers_sf001 = os.path.join(tpch_dir, 'answers', 'sf0.01')
tpch_answers_sf01 = os.path.join(tpch_dir, 'answers', 'sf0.1')
tpch_answers_sf1 = os.path.join(tpch_dir, 'answers', 'sf1')
tpch_header = os.path.join(tpch_dir, 'include', 'tpch_constants.hpp')
def create_tpch_header(tpch_dir):
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
#pragma once
const int TPCH_QUERIES_COUNT = 22;
"""
# write the queries
result += write_dir(tpch_queries, "TPCH_QUERIES")
result += write_dir(tpch_answers_sf001, "TPCH_ANSWERS_SF0_01")
result += write_dir(tpch_answers_sf01, "TPCH_ANSWERS_SF0_1")
result += write_dir(tpch_answers_sf1, "TPCH_ANSWERS_SF1")
with open_utf8(tpch_header, 'w+') as f:
f.write(result)
print(tpch_header)
create_tpch_header(tpch_dir)
# ------------------------------------------- #
# ------------------------------------------- #
# ------------- TPC-DS ------------ #
# ------------------------------------------- #
# ------------------------------------------- #
tpcds_dir = 'extension/tpcds/dsdgen'
tpcds_queries = os.path.join(tpcds_dir, 'queries')
tpcds_answers_sf001 = os.path.join(tpcds_dir, 'answers', 'sf0.01')
tpcds_answers_sf1 = os.path.join(tpcds_dir, 'answers', 'sf1')
tpcds_header = os.path.join(tpcds_dir, 'include', 'tpcds_constants.hpp')
def create_tpcds_header(tpch_dir):
result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */
#pragma once
const int TPCDS_QUERIES_COUNT = 99;
const int TPCDS_TABLE_COUNT = 24;
"""
# write the queries
result += write_dir(tpcds_queries, "TPCDS_QUERIES")
result += write_dir(tpcds_answers_sf001, "TPCDS_ANSWERS_SF0_01")
result += write_dir(tpcds_answers_sf1, "TPCDS_ANSWERS_SF1")
with open_utf8(tpcds_header, 'w+') as f:
f.write(result)
print(tpcds_header)
create_tpcds_header(tpcds_dir)