import os from python_helpers import open_utf8 GENERATED_HEADER = 'include/tpce_generated.hpp' GENERATED_SOURCE = 'tpce_generated.cpp' TPCE_DIR = os.path.join('third_party', 'tpce-tool') GENERATED_HEADER = os.path.join(TPCE_DIR, GENERATED_HEADER) GENERATED_SOURCE = os.path.join(TPCE_DIR, GENERATED_SOURCE) current_table = None tables = {} print(GENERATED_HEADER) print(GENERATED_SOURCE) header = open_utf8(GENERATED_HEADER, 'w+') source = open_utf8(GENERATED_SOURCE, 'w+') for fp in [header, source]: fp.write( """ //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// // THIS FILE IS GENERATED BY gentpcecode.py, DO NOT EDIT MANUALLY // //////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// """ ) header.write( """ #include "duckdb/catalog/catalog.hpp" #include "duckdb/main/appender.hpp" #include "duckdb/main/connection.hpp" #include "duckdb/main/database.hpp" #include "main/BaseLoader.h" #include "main/BaseLoaderFactory.h" #include "main/NullLoader.h" #include "main/TableRows.h" namespace TPCE { class DuckDBLoaderFactory : public CBaseLoaderFactory { duckdb::Connection &con; std::string schema; std::string suffix; public: DuckDBLoaderFactory(duckdb::Connection &con, std::string schema, std::string suffix) : con(con), schema(schema), suffix(suffix) { } // Functions to create loader classes for individual tables. virtual CBaseLoader * CreateAccountPermissionLoader(); virtual CBaseLoader *CreateAddressLoader(); virtual CBaseLoader *CreateBrokerLoader(); virtual CBaseLoader * CreateCashTransactionLoader(); virtual CBaseLoader *CreateChargeLoader(); virtual CBaseLoader *CreateCommissionRateLoader(); virtual CBaseLoader * CreateCompanyCompetitorLoader(); virtual CBaseLoader *CreateCompanyLoader(); virtual CBaseLoader * CreateCustomerAccountLoader(); virtual CBaseLoader *CreateCustomerLoader(); virtual CBaseLoader * CreateCustomerTaxrateLoader(); virtual CBaseLoader *CreateDailyMarketLoader(); virtual CBaseLoader *CreateExchangeLoader(); virtual CBaseLoader *CreateFinancialLoader(); virtual CBaseLoader *CreateHoldingLoader(); virtual CBaseLoader *CreateHoldingHistoryLoader(); virtual CBaseLoader *CreateHoldingSummaryLoader(); virtual CBaseLoader *CreateIndustryLoader(); virtual CBaseLoader *CreateLastTradeLoader(); virtual CBaseLoader *CreateNewsItemLoader(); virtual CBaseLoader *CreateNewsXRefLoader(); virtual CBaseLoader *CreateSectorLoader(); virtual CBaseLoader *CreateSecurityLoader(); virtual CBaseLoader *CreateSettlementLoader(); virtual CBaseLoader *CreateStatusTypeLoader(); virtual CBaseLoader *CreateTaxRateLoader(); virtual CBaseLoader *CreateTradeHistoryLoader(); virtual CBaseLoader *CreateTradeLoader(); virtual CBaseLoader *CreateTradeRequestLoader(); virtual CBaseLoader *CreateTradeTypeLoader(); virtual CBaseLoader *CreateWatchItemLoader(); virtual CBaseLoader *CreateWatchListLoader(); virtual CBaseLoader *CreateZipCodeLoader(); }; """ ) source.write( """ #include "tpce_generated.hpp" using namespace duckdb; using namespace std; namespace TPCE { struct tpce_append_information { tpce_append_information(Connection &con, string schema, string table) : appender(con, schema, table) {} Appender appender; }; static void append_value(tpce_append_information &info, int32_t value) { info.appender.Append(value); } static void append_bigint(tpce_append_information &info, int64_t value) { info.appender.Append(value); } static void append_string(tpce_append_information &info, const char *value) { info.appender.Append(Value(value)); } static void append_double(tpce_append_information &info, double value) { info.appender.Append(value); } static void append_bool(tpce_append_information &info, bool value) { info.appender.Append(value); } static void append_timestamp(tpce_append_information &info, CDateTime time) { int32_t year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, msec = 0; time.GetYMDHMS(&year, &month, &day, &hour, &minute, &second, &msec); info.appender.Append(Value::TIMESTAMP(year, month, day, hour, minute, second, msec * 1000)); } void append_char(tpce_append_information &info, char value) { char val[2]; val[0] = value; val[1] = '\\0'; append_string(info, val); } template class DuckDBBaseLoader : public CBaseLoader { protected: tpce_append_information info; public: DuckDBBaseLoader(Connection &con, string schema, string table) : info(con, schema, table) { } void FinishLoad() { } }; """ ) with open(os.path.join(TPCE_DIR, 'include/main/TableRows.h'), 'r') as f: for line in f: line = line.strip() if line.startswith('typedef struct '): line = line.replace('typedef struct ', '') current_table = line.split(' ')[0].replace('_ROW', ' ').replace('_', ' ').lower().strip() tables[current_table] = [] elif line.startswith('}'): current_table = None elif current_table != None: # row # get type splits = line.strip().split(' ') if len(splits) < 2: continue line = splits[0] name = splits[1].split(';')[0].split('[')[0].lower() is_single_char = False if 'TIdent' in line or 'INT64' in line or 'TTrade' in line: tpe = "TypeId::BIGINT" sqltpe = "BIGINT" elif 'double' in line or 'float' in line: tpe = "TypeId::DECIMAL" sqltpe = "DECIMAL" elif 'int' in line: tpe = "TypeId::INTEGER" sqltpe = "INTEGER" elif 'CDateTime' in line: tpe = "TypeId::TIMESTAMP" sqltpe = "TIMESTAMP" elif 'bool' in line: tpe = 'TypeId::BOOLEAN' sqltpe = "BOOLEAN" elif 'char' in line: if '[' not in splits[1]: is_single_char = True tpe = "TypeId::VARCHAR" sqltpe = "VARCHAR" else: continue tables[current_table].append([name, tpe, is_single_char, sqltpe]) def get_tablename(name): name = name.title().replace(' ', '') if name == 'NewsXref': return 'NewsXRef' return name for table in tables.keys(): source.write( """ class DuckDB${TABLENAME}Load : public DuckDBBaseLoader<${ROW_TYPE}> { public: DuckDB${TABLENAME}Load(Connection &con, string schema, string table) : DuckDBBaseLoader(con, schema, table) { } void WriteNextRecord(const ${ROW_TYPE} &next_record) { info.appender.BeginRow();""".replace( "${TABLENAME}", get_tablename(table) ).replace( "${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW' ) ) source.write("\n") collist = tables[table] for i in range(len(collist)): entry = collist[i] name = entry[0].upper() tpe = entry[1] if tpe == "TypeId::BIGINT": funcname = "bigint" elif tpe == "TypeId::DECIMAL": funcname = "double" elif tpe == "TypeId::INTEGER": funcname = "value" elif tpe == "TypeId::TIMESTAMP": funcname = "timestamp" elif tpe == 'TypeId::BOOLEAN': funcname = "bool" elif tpe == "TypeId::VARCHAR": if entry[2]: funcname = "char" else: funcname = "string" else: print("Unknown type " + tpe) exit(1) source.write("\t\tappend_%s(info, next_record.%s);" % (funcname, name)) if i != len(collist) - 1: source.write("\n") source.write( """ info.appender.EndRow(); } };""" ) for table in tables.keys(): source.write( """ CBaseLoader<${ROW_TYPE}> * DuckDBLoaderFactory::Create${TABLENAME}Loader() { return new DuckDB${TABLENAME}Load(con, schema, "${TABLEINDB}" + suffix); } """.replace( "${TABLENAME}", get_tablename(table) ) .replace("${ROW_TYPE}", table.upper().replace(' ', '_') + '_ROW') .replace("${TABLEINDB}", table.replace(' ', '_')) ) source.write("\n") # static string RegionSchema(string schema, string suffix) { # return "CREATE TABLE " + schema + ".region" + suffix + " (" # "r_regionkey INT NOT NULL," # "r_name VARCHAR(25) NOT NULL," # "r_comment VARCHAR(152) NOT NULL);"; # } for table in tables.keys(): tname = table.replace(' ', '_') str = 'static string ' + table.title().replace(' ', '') + 'Schema(string schema, string suffix) {\n' str += '\treturn "CREATE TABLE " + schema + ".%s" + suffix + " ("\n' % (tname,) columns = tables[table] for i in range(len(columns)): column = columns[i] str += '\t "' + column[0] + " " + column[3] if i == len(columns) - 1: str += ')";' else: str += ',"' str += "\n" str += "}\n\n" source.write(str) func = 'void CreateTPCESchema(duckdb::DuckDB &db, duckdb::Connection &con, std::string &schema, std::string &suffix)' header.write(func + ';\n\n') source.write(func + ' {\n') # con.Query(RegionSchema(schema, suffix)); for table in tables.keys(): tname = table.replace(' ', '_') source.write('\tcon.Query(%sSchema(schema, suffix));\n' % (table.title().replace(' ', ''))) source.write('}\n\n') for fp in [header, source]: fp.write("} /* namespace TPCE */\n") fp.close()