Files
email-tracker/external/duckdb/scripts/gentpcecode.py
2025-10-24 19:21:19 -05:00

328 lines
10 KiB
Python

import os
from python_helpers import open_utf8
GENERATED_HEADER = 'include/tpce_generated.hpp'
GENERATED_SOURCE = 'tpce_generated.cpp'
TPCE_DIR = os.path.join('third_party', 'tpce-tool')
GENERATED_HEADER = os.path.join(TPCE_DIR, GENERATED_HEADER)
GENERATED_SOURCE = os.path.join(TPCE_DIR, GENERATED_SOURCE)
current_table = None
tables = {}
print(GENERATED_HEADER)
print(GENERATED_SOURCE)
header = open_utf8(GENERATED_HEADER, 'w+')
source = open_utf8(GENERATED_SOURCE, 'w+')
for fp in [header, source]:
fp.write(
"""
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
// THIS FILE IS GENERATED BY gentpcecode.py, DO NOT EDIT MANUALLY //
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
"""
)
header.write(
"""
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/main/appender.hpp"
#include "duckdb/main/connection.hpp"
#include "duckdb/main/database.hpp"
#include "main/BaseLoader.h"
#include "main/BaseLoaderFactory.h"
#include "main/NullLoader.h"
#include "main/TableRows.h"
namespace TPCE {
class DuckDBLoaderFactory : public CBaseLoaderFactory {
duckdb::Connection &con;
std::string schema;
std::string suffix;
public:
DuckDBLoaderFactory(duckdb::Connection &con, std::string schema,
std::string suffix)
: con(con), schema(schema), suffix(suffix) {
}
// Functions to create loader classes for individual tables.
virtual CBaseLoader<ACCOUNT_PERMISSION_ROW> *
CreateAccountPermissionLoader();
virtual CBaseLoader<ADDRESS_ROW> *CreateAddressLoader();
virtual CBaseLoader<BROKER_ROW> *CreateBrokerLoader();
virtual CBaseLoader<CASH_TRANSACTION_ROW> *
CreateCashTransactionLoader();
virtual CBaseLoader<CHARGE_ROW> *CreateChargeLoader();
virtual CBaseLoader<COMMISSION_RATE_ROW> *CreateCommissionRateLoader();
virtual CBaseLoader<COMPANY_COMPETITOR_ROW> *
CreateCompanyCompetitorLoader();
virtual CBaseLoader<COMPANY_ROW> *CreateCompanyLoader();
virtual CBaseLoader<CUSTOMER_ACCOUNT_ROW> *
CreateCustomerAccountLoader();
virtual CBaseLoader<CUSTOMER_ROW> *CreateCustomerLoader();
virtual CBaseLoader<CUSTOMER_TAXRATE_ROW> *
CreateCustomerTaxrateLoader();
virtual CBaseLoader<DAILY_MARKET_ROW> *CreateDailyMarketLoader();
virtual CBaseLoader<EXCHANGE_ROW> *CreateExchangeLoader();
virtual CBaseLoader<FINANCIAL_ROW> *CreateFinancialLoader();
virtual CBaseLoader<HOLDING_ROW> *CreateHoldingLoader();
virtual CBaseLoader<HOLDING_HISTORY_ROW> *CreateHoldingHistoryLoader();
virtual CBaseLoader<HOLDING_SUMMARY_ROW> *CreateHoldingSummaryLoader();
virtual CBaseLoader<INDUSTRY_ROW> *CreateIndustryLoader();
virtual CBaseLoader<LAST_TRADE_ROW> *CreateLastTradeLoader();
virtual CBaseLoader<NEWS_ITEM_ROW> *CreateNewsItemLoader();
virtual CBaseLoader<NEWS_XREF_ROW> *CreateNewsXRefLoader();
virtual CBaseLoader<SECTOR_ROW> *CreateSectorLoader();
virtual CBaseLoader<SECURITY_ROW> *CreateSecurityLoader();
virtual CBaseLoader<SETTLEMENT_ROW> *CreateSettlementLoader();
virtual CBaseLoader<STATUS_TYPE_ROW> *CreateStatusTypeLoader();
virtual CBaseLoader<TAX_RATE_ROW> *CreateTaxRateLoader();
virtual CBaseLoader<TRADE_HISTORY_ROW> *CreateTradeHistoryLoader();
virtual CBaseLoader<TRADE_ROW> *CreateTradeLoader();
virtual CBaseLoader<TRADE_REQUEST_ROW> *CreateTradeRequestLoader();
virtual CBaseLoader<TRADE_TYPE_ROW> *CreateTradeTypeLoader();
virtual CBaseLoader<WATCH_ITEM_ROW> *CreateWatchItemLoader();
virtual CBaseLoader<WATCH_LIST_ROW> *CreateWatchListLoader();
virtual CBaseLoader<ZIP_CODE_ROW> *CreateZipCodeLoader();
};
"""
)
source.write(
"""
#include "tpce_generated.hpp"
using namespace duckdb;
using namespace std;
namespace TPCE {
struct tpce_append_information {
tpce_append_information(Connection &con, string schema, string table) :
appender(con, schema, table) {}
Appender appender;
};
static void append_value(tpce_append_information &info, int32_t value) {
info.appender.Append<int32_t>(value);
}
static void append_bigint(tpce_append_information &info, int64_t value) {
info.appender.Append<int64_t>(value);
}
static void append_string(tpce_append_information &info, const char *value) {
info.appender.Append<Value>(Value(value));
}
static void append_double(tpce_append_information &info, double value) {
info.appender.Append<double>(value);
}
static void append_bool(tpce_append_information &info, bool value) {
info.appender.Append<bool>(value);
}
static void append_timestamp(tpce_append_information &info, CDateTime time) {
int32_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, msec = 0;
time.GetYMDHMS(&year, &month, &day, &hour, &minute, &second, &msec);
info.appender.Append<Value>(Value::TIMESTAMP(year, month, day, hour, minute, second, msec * 1000));
}
void append_char(tpce_append_information &info, char value) {
char val[2];
val[0] = value;
val[1] = '\\0';
append_string(info, val);
}
template <typename T> class DuckDBBaseLoader : public CBaseLoader<T> {
protected:
tpce_append_information info;
public:
DuckDBBaseLoader(Connection &con, string schema, string table) :
info(con, schema, table) {
}
void FinishLoad() {
}
};
"""
)
with open(os.path.join(TPCE_DIR, 'include/main/TableRows.h'), 'r') as f:
for line in f:
line = line.strip()
if line.startswith('typedef struct '):
line = line.replace('typedef struct ', '')
current_table = line.split(' ')[0].replace('_ROW', ' ').replace('_', ' ').lower().strip()
tables[current_table] = []
elif line.startswith('}'):
current_table = None
elif current_table != None:
# row
# get type
splits = line.strip().split(' ')
if len(splits) < 2:
continue
line = splits[0]
name = splits[1].split(';')[0].split('[')[0].lower()
is_single_char = False
if 'TIdent' in line or 'INT64' in line or 'TTrade' in line:
tpe = "TypeId::BIGINT"
sqltpe = "BIGINT"
elif 'double' in line or 'float' in line:
tpe = "TypeId::DECIMAL"
sqltpe = "DECIMAL"
elif 'int' in line:
tpe = "TypeId::INTEGER"
sqltpe = "INTEGER"
elif 'CDateTime' in line:
tpe = "TypeId::TIMESTAMP"
sqltpe = "TIMESTAMP"
elif 'bool' in line:
tpe = 'TypeId::BOOLEAN'
sqltpe = "BOOLEAN"
elif 'char' in line:
if '[' not in splits[1]:
is_single_char = True
tpe = "TypeId::VARCHAR"
sqltpe = "VARCHAR"
else:
continue
tables[current_table].append([name, tpe, is_single_char, sqltpe])
def get_tablename(name):
name = name.title().replace(' ', '')
if name == 'NewsXref':
return 'NewsXRef'
return name
for table in tables.keys():
source.write(
"""
class DuckDB${TABLENAME}Load : public DuckDBBaseLoader<${ROW_TYPE}> {
public:
DuckDB${TABLENAME}Load(Connection &con, string schema, string table) :
DuckDBBaseLoader(con, schema, table) {
}
void WriteNextRecord(const ${ROW_TYPE} &next_record) {
info.appender.BeginRow();""".replace(
"${TABLENAME}", get_tablename(table)
).replace(
"${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW'
)
)
source.write("\n")
collist = tables[table]
for i in range(len(collist)):
entry = collist[i]
name = entry[0].upper()
tpe = entry[1]
if tpe == "TypeId::BIGINT":
funcname = "bigint"
elif tpe == "TypeId::DECIMAL":
funcname = "double"
elif tpe == "TypeId::INTEGER":
funcname = "value"
elif tpe == "TypeId::TIMESTAMP":
funcname = "timestamp"
elif tpe == 'TypeId::BOOLEAN':
funcname = "bool"
elif tpe == "TypeId::VARCHAR":
if entry[2]:
funcname = "char"
else:
funcname = "string"
else:
print("Unknown type " + tpe)
exit(1)
source.write("\t\tappend_%s(info, next_record.%s);" % (funcname, name))
if i != len(collist) - 1:
source.write("\n")
source.write(
"""
info.appender.EndRow();
}
};"""
)
for table in tables.keys():
source.write(
"""
CBaseLoader<${ROW_TYPE}> *
DuckDBLoaderFactory::Create${TABLENAME}Loader() {
return new DuckDB${TABLENAME}Load(con, schema, "${TABLEINDB}" + suffix);
}
""".replace(
"${TABLENAME}", get_tablename(table)
)
.replace("${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW')
.replace("${TABLEINDB}", table.replace(' ', '_'))
)
source.write("\n")
# static string RegionSchema(string schema, string suffix) {
# return "CREATE TABLE " + schema + ".region" + suffix + " ("
# "r_regionkey INT NOT NULL,"
# "r_name VARCHAR(25) NOT NULL,"
# "r_comment VARCHAR(152) NOT NULL);";
# }
for table in tables.keys():
tname = table.replace(' ', '_')
str = 'static string ' + table.title().replace(' ', '') + 'Schema(string schema, string suffix) {\n'
str += '\treturn "CREATE TABLE " + schema + ".%s" + suffix + " ("\n' % (tname,)
columns = tables[table]
for i in range(len(columns)):
column = columns[i]
str += '\t "' + column[0] + " " + column[3]
if i == len(columns) - 1:
str += ')";'
else:
str += ',"'
str += "\n"
str += "}\n\n"
source.write(str)
func = 'void CreateTPCESchema(duckdb::DuckDB &db, duckdb::Connection &con, std::string &schema, std::string &suffix)'
header.write(func + ';\n\n')
source.write(func + ' {\n')
# con.Query(RegionSchema(schema, suffix));
for table in tables.keys():
tname = table.replace(' ', '_')
source.write('\tcon.Query(%sSchema(schema, suffix));\n' % (table.title().replace(' ', '')))
source.write('}\n\n')
for fp in [header, source]:
fp.write("} /* namespace TPCE */\n")
fp.close()