should be it
This commit is contained in:
718
external/duckdb/extension/tpch/dbgen/dbgen.cpp
vendored
Normal file
718
external/duckdb/extension/tpch/dbgen/dbgen.cpp
vendored
Normal file
@@ -0,0 +1,718 @@
|
||||
#include "dbgen/dbgen.hpp"
|
||||
#include "dbgen/dbgen_gunk.hpp"
|
||||
#include "tpch_constants.hpp"
|
||||
#include "duckdb/common/exception.hpp"
|
||||
#include "duckdb/common/types/date.hpp"
|
||||
#include "duckdb/parser/column_definition.hpp"
|
||||
#include "duckdb/parser/parsed_data/create_table_info.hpp"
|
||||
#include "duckdb/parser/constraints/not_null_constraint.hpp"
|
||||
#include "duckdb/catalog/catalog.hpp"
|
||||
#include "duckdb/main/appender.hpp"
|
||||
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
||||
#ifndef DUCKDB_NO_THREADS
|
||||
#include "duckdb/common/thread.hpp"
|
||||
#endif
|
||||
|
||||
#define DECLARER /* EXTERN references get defined here */
|
||||
|
||||
#include "dbgen/dss.h"
|
||||
#include "dbgen/dsstypes.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
|
||||
using namespace duckdb;
|
||||
|
||||
namespace tpch {
|
||||
|
||||
struct tpch_append_information {
|
||||
duckdb::unique_ptr<InternalAppender> appender;
|
||||
};
|
||||
|
||||
void append_int32(tpch_append_information &info, int32_t value) {
|
||||
info.appender->Append<int32_t>(value);
|
||||
}
|
||||
|
||||
void append_int64(tpch_append_information &info, int64_t value) {
|
||||
info.appender->Append<int64_t>(value);
|
||||
}
|
||||
|
||||
void append_string(tpch_append_information &info, const char *value) {
|
||||
info.appender->Append<const char *>(value);
|
||||
}
|
||||
|
||||
void append_decimal(tpch_append_information &info, int64_t value) {
|
||||
info.appender->Append<int64_t>(value);
|
||||
}
|
||||
|
||||
void append_date(tpch_append_information &info, string value) {
|
||||
info.appender->Append<date_t>(Date::FromString(value));
|
||||
}
|
||||
|
||||
void append_char(tpch_append_information &info, char value) {
|
||||
char val[2];
|
||||
val[0] = value;
|
||||
val[1] = '\0';
|
||||
append_string(info, val);
|
||||
}
|
||||
|
||||
static void append_order(order_t *o, tpch_append_information *info) {
|
||||
auto &append_info = info[ORDER];
|
||||
|
||||
// fill the current row with the order information
|
||||
append_info.appender->BeginRow();
|
||||
// o_orderkey
|
||||
append_int64(append_info, o->okey);
|
||||
// o_custkey
|
||||
append_int64(append_info, o->custkey);
|
||||
// o_orderstatus
|
||||
append_char(append_info, o->orderstatus);
|
||||
// o_totalprice
|
||||
append_decimal(append_info, o->totalprice);
|
||||
// o_orderdate
|
||||
append_date(append_info, o->odate);
|
||||
// o_orderpriority
|
||||
append_string(append_info, o->opriority);
|
||||
// o_clerk
|
||||
append_string(append_info, o->clerk);
|
||||
// o_shippriority
|
||||
append_int32(append_info, o->spriority);
|
||||
// o_comment
|
||||
append_string(append_info, o->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void append_line(order_t *o, tpch_append_information *info) {
|
||||
auto &append_info = info[LINE];
|
||||
|
||||
// fill the current row with the order information
|
||||
for (DSS_HUGE i = 0; i < o->lines; i++) {
|
||||
append_info.appender->BeginRow();
|
||||
// l_orderkey
|
||||
append_int64(append_info, o->l[i].okey);
|
||||
// l_partkey
|
||||
append_int64(append_info, o->l[i].partkey);
|
||||
// l_suppkey
|
||||
append_int64(append_info, o->l[i].suppkey);
|
||||
// l_linenumber
|
||||
append_int64(append_info, o->l[i].lcnt);
|
||||
// l_quantity
|
||||
append_decimal(append_info, o->l[i].quantity);
|
||||
// l_extendedprice
|
||||
append_decimal(append_info, o->l[i].eprice);
|
||||
// l_discount
|
||||
append_decimal(append_info, o->l[i].discount);
|
||||
// l_tax
|
||||
append_decimal(append_info, o->l[i].tax);
|
||||
// l_returnflag
|
||||
append_char(append_info, o->l[i].rflag[0]);
|
||||
// l_linestatus
|
||||
append_char(append_info, o->l[i].lstatus[0]);
|
||||
// l_shipdate
|
||||
append_date(append_info, o->l[i].sdate);
|
||||
// l_commitdate
|
||||
append_date(append_info, o->l[i].cdate);
|
||||
// l_receiptdate
|
||||
append_date(append_info, o->l[i].rdate);
|
||||
// l_shipinstruct
|
||||
append_string(append_info, o->l[i].shipinstruct);
|
||||
// l_shipmode
|
||||
append_string(append_info, o->l[i].shipmode);
|
||||
// l_comment
|
||||
append_string(append_info, o->l[i].comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
}
|
||||
|
||||
static void append_order_line(order_t *o, tpch_append_information *info) {
|
||||
append_order(o, info);
|
||||
append_line(o, info);
|
||||
}
|
||||
|
||||
static void append_supp(supplier_t *supp, tpch_append_information *info) {
|
||||
auto &append_info = info[SUPP];
|
||||
|
||||
append_info.appender->BeginRow();
|
||||
// s_suppkey
|
||||
append_int64(append_info, supp->suppkey);
|
||||
// s_name
|
||||
append_string(append_info, supp->name);
|
||||
// s_address
|
||||
append_string(append_info, supp->address);
|
||||
// s_nationkey
|
||||
append_int32(append_info, supp->nation_code);
|
||||
// s_phone
|
||||
append_string(append_info, supp->phone);
|
||||
// s_acctbal
|
||||
append_decimal(append_info, supp->acctbal);
|
||||
// s_comment
|
||||
append_string(append_info, supp->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void append_cust(customer_t *c, tpch_append_information *info) {
|
||||
auto &append_info = info[CUST];
|
||||
|
||||
append_info.appender->BeginRow();
|
||||
// c_custkey
|
||||
append_int64(append_info, c->custkey);
|
||||
// c_name
|
||||
append_string(append_info, c->name);
|
||||
// c_address
|
||||
append_string(append_info, c->address);
|
||||
// c_nationkey
|
||||
append_int32(append_info, c->nation_code);
|
||||
// c_phone
|
||||
append_string(append_info, c->phone);
|
||||
// c_acctbal
|
||||
append_decimal(append_info, c->acctbal);
|
||||
// c_mktsegment
|
||||
append_string(append_info, c->mktsegment);
|
||||
// c_comment
|
||||
append_string(append_info, c->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void append_part(part_t *part, tpch_append_information *info) {
|
||||
auto &append_info = info[PART];
|
||||
|
||||
append_info.appender->BeginRow();
|
||||
// p_partkey
|
||||
append_int64(append_info, part->partkey);
|
||||
// p_name
|
||||
append_string(append_info, part->name);
|
||||
// p_mfgr
|
||||
append_string(append_info, part->mfgr);
|
||||
// p_brand
|
||||
append_string(append_info, part->brand);
|
||||
// p_type
|
||||
append_string(append_info, part->type);
|
||||
// p_size
|
||||
append_int32(append_info, part->size);
|
||||
// p_container
|
||||
append_string(append_info, part->container);
|
||||
// p_retailprice
|
||||
append_decimal(append_info, part->retailprice);
|
||||
// p_comment
|
||||
append_string(append_info, part->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void append_psupp(part_t *part, tpch_append_information *info) {
|
||||
auto &append_info = info[PSUPP];
|
||||
for (size_t i = 0; i < SUPP_PER_PART; i++) {
|
||||
append_info.appender->BeginRow();
|
||||
// ps_partkey
|
||||
append_int64(append_info, part->s[i].partkey);
|
||||
// ps_suppkey
|
||||
append_int64(append_info, part->s[i].suppkey);
|
||||
// ps_availqty
|
||||
append_int64(append_info, part->s[i].qty);
|
||||
// ps_supplycost
|
||||
append_decimal(append_info, part->s[i].scost);
|
||||
// ps_comment
|
||||
append_string(append_info, part->s[i].comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
}
|
||||
|
||||
static void append_part_psupp(part_t *part, tpch_append_information *info) {
|
||||
append_part(part, info);
|
||||
append_psupp(part, info);
|
||||
}
|
||||
|
||||
static void append_nation(code_t *c, tpch_append_information *info) {
|
||||
auto &append_info = info[NATION];
|
||||
|
||||
append_info.appender->BeginRow();
|
||||
// n_nationkey
|
||||
append_int32(append_info, c->code);
|
||||
// n_name
|
||||
append_string(append_info, c->text);
|
||||
// n_regionkey
|
||||
append_int32(append_info, c->join);
|
||||
// n_comment
|
||||
append_string(append_info, c->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void append_region(code_t *c, tpch_append_information *info) {
|
||||
auto &append_info = info[REGION];
|
||||
|
||||
append_info.appender->BeginRow();
|
||||
// r_regionkey
|
||||
append_int32(append_info, c->code);
|
||||
// r_name
|
||||
append_string(append_info, c->text);
|
||||
// r_comment
|
||||
append_string(append_info, c->comment);
|
||||
append_info.appender->EndRow();
|
||||
}
|
||||
|
||||
static void gen_tbl(ClientContext &context, int tnum, DSS_HUGE count, tpch_append_information *info, DBGenContext *dbgen_ctx,
|
||||
idx_t offset = 0) {
|
||||
order_t o;
|
||||
supplier_t supp;
|
||||
customer_t cust;
|
||||
part_t part;
|
||||
code_t code;
|
||||
|
||||
for (DSS_HUGE i = offset + 1; count; count--, i++) {
|
||||
if (count % 1000 == 0 && context.interrupted) {
|
||||
return;
|
||||
}
|
||||
row_start(tnum, dbgen_ctx);
|
||||
switch (tnum) {
|
||||
case LINE:
|
||||
case ORDER:
|
||||
case ORDER_LINE:
|
||||
mk_order(i, &o, dbgen_ctx, 0);
|
||||
append_order_line(&o, info);
|
||||
break;
|
||||
case SUPP:
|
||||
mk_supp(i, &supp, dbgen_ctx);
|
||||
append_supp(&supp, info);
|
||||
break;
|
||||
case CUST:
|
||||
mk_cust(i, &cust, dbgen_ctx);
|
||||
append_cust(&cust, info);
|
||||
break;
|
||||
case PSUPP:
|
||||
case PART:
|
||||
case PART_PSUPP:
|
||||
mk_part(i, &part, dbgen_ctx);
|
||||
append_part_psupp(&part, info);
|
||||
break;
|
||||
case NATION:
|
||||
mk_nation(i, &code, dbgen_ctx);
|
||||
append_nation(&code, info);
|
||||
break;
|
||||
case REGION:
|
||||
mk_region(i, &code, dbgen_ctx);
|
||||
append_region(&code, info);
|
||||
break;
|
||||
}
|
||||
row_stop_h(tnum, dbgen_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
string get_table_name(int num) {
|
||||
switch (num) {
|
||||
case PART:
|
||||
return "part";
|
||||
case PSUPP:
|
||||
return "partsupp";
|
||||
case SUPP:
|
||||
return "supplier";
|
||||
case CUST:
|
||||
return "customer";
|
||||
case ORDER:
|
||||
return "orders";
|
||||
case LINE:
|
||||
return "lineitem";
|
||||
case NATION:
|
||||
return "nation";
|
||||
case REGION:
|
||||
return "region";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
struct RegionInfo {
|
||||
static constexpr char *Name = "region";
|
||||
static constexpr idx_t ColumnCount = 3;
|
||||
static const char *Columns[];
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *RegionInfo::Columns[] = {"r_regionkey", "r_name", "r_comment"};
|
||||
const LogicalType RegionInfo::Types[] = {LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct NationInfo {
|
||||
static constexpr char *Name = "nation";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 4;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *NationInfo::Columns[] = {"n_nationkey", "n_name", "n_regionkey", "n_comment"};
|
||||
const LogicalType NationInfo::Types[] = {LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct SupplierInfo {
|
||||
static constexpr char *Name = "supplier";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 7;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *SupplierInfo::Columns[] = {"s_suppkey", "s_name", "s_address", "s_nationkey",
|
||||
"s_phone", "s_acctbal", "s_comment"};
|
||||
const LogicalType SupplierInfo::Types[] = {LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2),
|
||||
LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct CustomerInfo {
|
||||
static constexpr char *Name = "customer";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 8;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *CustomerInfo::Columns[] = {"c_custkey", "c_name", "c_address", "c_nationkey",
|
||||
"c_phone", "c_acctbal", "c_mktsegment", "c_comment"};
|
||||
const LogicalType CustomerInfo::Types[] = {LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct PartInfo {
|
||||
static constexpr char *Name = "part";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 9;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *PartInfo::Columns[] = {"p_partkey", "p_name", "p_mfgr", "p_brand", "p_type",
|
||||
"p_size", "p_container", "p_retailprice", "p_comment"};
|
||||
const LogicalType PartInfo::Types[] = {
|
||||
LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct PartsuppInfo {
|
||||
static constexpr char *Name = "partsupp";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 5;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *PartsuppInfo::Columns[] = {"ps_partkey", "ps_suppkey", "ps_availqty", "ps_supplycost", "ps_comment"};
|
||||
const LogicalType PartsuppInfo::Types[] = {LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::BIGINT),
|
||||
LogicalType(LogicalTypeId::BIGINT), LogicalType::DECIMAL(15, 2),
|
||||
LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct OrdersInfo {
|
||||
static constexpr char *Name = "orders";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 9;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *OrdersInfo::Columns[] = {"o_orderkey", "o_custkey", "o_orderstatus", "o_totalprice", "o_orderdate",
|
||||
"o_orderpriority", "o_clerk", "o_shippriority", "o_comment"};
|
||||
const LogicalType OrdersInfo::Types[] = {
|
||||
LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::INTEGER), LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
struct LineitemInfo {
|
||||
static constexpr char *Name = "lineitem";
|
||||
static const char *Columns[];
|
||||
static constexpr idx_t ColumnCount = 16;
|
||||
static const LogicalType Types[];
|
||||
};
|
||||
const char *LineitemInfo::Columns[] = {"l_orderkey", "l_partkey", "l_suppkey", "l_linenumber",
|
||||
"l_quantity", "l_extendedprice", "l_discount", "l_tax",
|
||||
"l_returnflag", "l_linestatus", "l_shipdate", "l_commitdate",
|
||||
"l_receiptdate", "l_shipinstruct", "l_shipmode", "l_comment"};
|
||||
const LogicalType LineitemInfo::Types[] = {
|
||||
LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::BIGINT), LogicalType(LogicalTypeId::BIGINT),
|
||||
LogicalType(LogicalTypeId::BIGINT), LogicalType::DECIMAL(15, 2), LogicalType::DECIMAL(15, 2),
|
||||
LogicalType::DECIMAL(15, 2), LogicalType::DECIMAL(15, 2), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::DATE),
|
||||
LogicalType(LogicalTypeId::DATE), LogicalType(LogicalTypeId::VARCHAR), LogicalType(LogicalTypeId::VARCHAR),
|
||||
LogicalType(LogicalTypeId::VARCHAR)};
|
||||
|
||||
template <class T>
|
||||
static void CreateTPCHTable(ClientContext &context, string catalog_name, string schema, string suffix) {
|
||||
auto info = make_uniq<CreateTableInfo>();
|
||||
info->catalog = catalog_name;
|
||||
info->schema = schema;
|
||||
info->table = T::Name + suffix;
|
||||
info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
|
||||
info->temporary = false;
|
||||
for (idx_t i = 0; i < T::ColumnCount; i++) {
|
||||
info->columns.AddColumn(ColumnDefinition(T::Columns[i], T::Types[i]));
|
||||
info->constraints.push_back(make_uniq<NotNullConstraint>(LogicalIndex(i)));
|
||||
}
|
||||
auto &catalog = Catalog::GetCatalog(context, catalog_name);
|
||||
catalog.CreateTable(context, std::move(info));
|
||||
}
|
||||
|
||||
void DBGenWrapper::CreateTPCHSchema(ClientContext &context, string catalog, string schema, string suffix) {
|
||||
CreateTPCHTable<RegionInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<NationInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<SupplierInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<CustomerInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<PartInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<PartsuppInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<OrdersInfo>(context, catalog, schema, suffix);
|
||||
CreateTPCHTable<LineitemInfo>(context, catalog, schema, suffix);
|
||||
}
|
||||
|
||||
void skip(int table, int children, DSS_HUGE step, DBGenContext &dbgen_ctx) {
|
||||
switch (table) {
|
||||
case CUST:
|
||||
sd_cust(children, step, &dbgen_ctx);
|
||||
break;
|
||||
case SUPP:
|
||||
sd_supp(children, step, &dbgen_ctx);
|
||||
break;
|
||||
case NATION:
|
||||
sd_nation(children, step, &dbgen_ctx);
|
||||
break;
|
||||
case REGION:
|
||||
sd_region(children, step, &dbgen_ctx);
|
||||
break;
|
||||
case ORDER_LINE:
|
||||
sd_line(children, step, &dbgen_ctx);
|
||||
sd_order(children, step, &dbgen_ctx);
|
||||
break;
|
||||
case PART_PSUPP:
|
||||
sd_part(children, step, &dbgen_ctx);
|
||||
sd_psupp(children, step, &dbgen_ctx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
struct TPCHDBgenParameters {
|
||||
TPCHDBgenParameters(ClientContext &context, Catalog &catalog, const string &schema, const string &suffix) {
|
||||
tables.resize(REGION + 1);
|
||||
for (size_t i = PART; i <= REGION; i++) {
|
||||
auto tname = get_table_name(i);
|
||||
if (!tname.empty()) {
|
||||
string full_tname = string(tname) + string(suffix);
|
||||
auto &tbl_catalog = catalog.GetEntry<TableCatalogEntry>(context, schema, full_tname);
|
||||
tables[i] = &tbl_catalog;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
vector<optional_ptr<TableCatalogEntry>> tables;
|
||||
};
|
||||
|
||||
class TPCHDataAppender {
|
||||
public:
|
||||
TPCHDataAppender(ClientContext &context, TPCHDBgenParameters ¶meters, DBGenContext base_context, idx_t flush_count) :
|
||||
context(context), parameters(parameters) {
|
||||
dbgen_ctx = base_context;
|
||||
append_info = duckdb::unique_ptr<tpch_append_information[]>(new tpch_append_information[REGION + 1]);
|
||||
memset(append_info.get(), 0, sizeof(tpch_append_information) * REGION + 1);
|
||||
for (size_t i = PART; i <= REGION; i++) {
|
||||
if (parameters.tables[i]) {
|
||||
auto &tbl_catalog = *parameters.tables[i];
|
||||
if (!tbl_catalog.IsDuckTable()) {
|
||||
throw InvalidInputException("dbgen is only supported for DuckDB database files");
|
||||
}
|
||||
append_info[i].appender = make_uniq<InternalAppender>(context, tbl_catalog, flush_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GenerateTableData(int table_index, idx_t row_count, idx_t offset) {
|
||||
gen_tbl(context, table_index, static_cast<DSS_HUGE>(row_count), append_info.get(), &dbgen_ctx, offset);
|
||||
}
|
||||
|
||||
void AppendData(int children, int current_step) {
|
||||
DSS_HUGE i;
|
||||
DSS_HUGE rowcnt = 0;
|
||||
for (i = PART; i <= REGION; i++) {
|
||||
if (table & (1 << i)) {
|
||||
if (i < NATION) {
|
||||
rowcnt = dbgen_ctx.tdefs[i].base * dbgen_ctx.scale_factor;
|
||||
} else {
|
||||
rowcnt = dbgen_ctx.tdefs[i].base;
|
||||
}
|
||||
if (context.interrupted) {
|
||||
return;
|
||||
}
|
||||
if (children > 1 && current_step != -1) {
|
||||
size_t part_size = std::ceil((double)rowcnt / (double)children);
|
||||
auto part_offset = part_size * current_step;
|
||||
auto part_end = part_offset + part_size;
|
||||
rowcnt = part_end > rowcnt ? rowcnt - part_offset : part_size;
|
||||
skip(i, children, part_offset, dbgen_ctx);
|
||||
if (rowcnt > 0) {
|
||||
// generate part of the table
|
||||
GenerateTableData((int) i, rowcnt, part_offset);
|
||||
}
|
||||
} else {
|
||||
// generate full table
|
||||
GenerateTableData((int) i, rowcnt, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
// flush any incomplete chunks
|
||||
for (idx_t i = PART; i <= REGION; i++) {
|
||||
if (append_info[i].appender) {
|
||||
append_info[i].appender->Flush();
|
||||
append_info[i].appender.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ClientContext &context;
|
||||
TPCHDBgenParameters ¶meters;
|
||||
unique_ptr<tpch_append_information[]> append_info;
|
||||
DBGenContext dbgen_ctx;
|
||||
};
|
||||
|
||||
static void ParallelTPCHAppend(TPCHDataAppender *appender, int children, int current_step) {
|
||||
appender->AppendData(children, current_step);
|
||||
}
|
||||
|
||||
void DBGenWrapper::LoadTPCHData(ClientContext &context, double flt_scale, string catalog_name, string schema,
|
||||
string suffix, int children, int current_step) {
|
||||
if (flt_scale == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// all tables
|
||||
table = (1 << CUST) | (1 << SUPP) | (1 << NATION) | (1 << REGION) | (1 << PART_PSUPP) | (1 << ORDER_LINE);
|
||||
force = 0;
|
||||
insert_segments = 0;
|
||||
delete_segments = 0;
|
||||
insert_orders_segment = 0;
|
||||
insert_lineitem_segment = 0;
|
||||
delete_segment = 0;
|
||||
verbose = 0;
|
||||
set_seeds = 0;
|
||||
updates = 0;
|
||||
|
||||
d_path = NULL;
|
||||
|
||||
DBGenContext base_context;
|
||||
tdef *tdefs = base_context.tdefs;
|
||||
tdefs[PART].base = 200000;
|
||||
tdefs[PSUPP].base = 200000;
|
||||
tdefs[SUPP].base = 10000;
|
||||
tdefs[CUST].base = 150000;
|
||||
tdefs[ORDER].base = 150000 * ORDERS_PER_CUST;
|
||||
tdefs[LINE].base = 150000 * ORDERS_PER_CUST;
|
||||
tdefs[ORDER_LINE].base = 150000 * ORDERS_PER_CUST;
|
||||
tdefs[PART_PSUPP].base = 200000;
|
||||
tdefs[NATION].base = NATIONS_MAX;
|
||||
tdefs[REGION].base = NATIONS_MAX;
|
||||
|
||||
if (flt_scale < MIN_SCALE) {
|
||||
int i;
|
||||
int int_scale;
|
||||
|
||||
base_context.scale_factor = 1;
|
||||
int_scale = (int)(1000 * flt_scale);
|
||||
for (i = PART; i < REGION; i++) {
|
||||
tdefs[i].base = (DSS_HUGE)(int_scale * tdefs[i].base) / 1000;
|
||||
if (tdefs[i].base < 1) {
|
||||
tdefs[i].base = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
base_context.scale_factor = (long)flt_scale;
|
||||
}
|
||||
|
||||
if (current_step >= children) {
|
||||
return;
|
||||
}
|
||||
|
||||
load_dists(10 * 1024 * 1024, &base_context); // 10MiB
|
||||
/* have to do this after init */
|
||||
tdefs[NATION].base = nations.count;
|
||||
tdefs[REGION].base = regions.count;
|
||||
|
||||
auto &catalog = Catalog::GetCatalog(context, catalog_name);
|
||||
|
||||
TPCHDBgenParameters parameters(context, catalog, schema, suffix);
|
||||
#ifndef DUCKDB_NO_THREADS
|
||||
bool explicit_partial_generation = children > 1 && current_step != -1;
|
||||
auto thread_count = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
||||
if (explicit_partial_generation || thread_count <= 1) {
|
||||
#endif
|
||||
// if we are doing explicit partial generation the parallelism is managed outside of dbgen
|
||||
// only generate the chunk we are interested in
|
||||
TPCHDataAppender appender(context, parameters, base_context, BaseAppender::DEFAULT_FLUSH_COUNT);
|
||||
appender.AppendData(children, current_step);
|
||||
appender.Flush();
|
||||
#ifndef DUCKDB_NO_THREADS
|
||||
} else {
|
||||
// we split into 20 children per scale factor by default
|
||||
static constexpr idx_t CHILDREN_PER_SCALE_FACTOR = 20;
|
||||
idx_t child_count;
|
||||
if (flt_scale < 1) {
|
||||
child_count = 1;
|
||||
} else {
|
||||
child_count = MinValue<idx_t>(static_cast<idx_t>(CHILDREN_PER_SCALE_FACTOR * flt_scale), MAX_CHILDREN);
|
||||
}
|
||||
idx_t step = 0;
|
||||
vector<TPCHDataAppender> finished_appenders;
|
||||
while(step < child_count) {
|
||||
// launch N threads
|
||||
vector<TPCHDataAppender> new_appenders;
|
||||
vector<std::thread> threads;
|
||||
idx_t launched_step = step;
|
||||
// initialize the appenders for each thread
|
||||
// note we prevent the threads themselves from flushing the appenders by specifying a very high flush count here
|
||||
for(idx_t thr_idx = 0; thr_idx < thread_count && launched_step < child_count; thr_idx++, launched_step++) {
|
||||
new_appenders.emplace_back(context, parameters, base_context, NumericLimits<int64_t>::Maximum());
|
||||
}
|
||||
// launch the threads
|
||||
for(idx_t thr_idx = 0; thr_idx < new_appenders.size(); thr_idx++) {
|
||||
threads.emplace_back(ParallelTPCHAppend, &new_appenders[thr_idx], child_count, step);
|
||||
step++;
|
||||
}
|
||||
ErrorData error;
|
||||
try {
|
||||
// flush the previous batch of appenders while waiting (if any are there)
|
||||
// now flush the appenders in-order
|
||||
for(auto &appender : finished_appenders) {
|
||||
appender.Flush();
|
||||
}
|
||||
} catch(std::exception &ex) {
|
||||
error = ErrorData(ex);
|
||||
}
|
||||
finished_appenders.clear();
|
||||
// wait for all threads to finish
|
||||
for(auto &thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
if (error.HasError()) {
|
||||
error.Throw();
|
||||
}
|
||||
finished_appenders = std::move(new_appenders);
|
||||
}
|
||||
// flush the final batch of appenders
|
||||
for(auto &appender : finished_appenders) {
|
||||
appender.Flush();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
cleanup_dists();
|
||||
}
|
||||
|
||||
string DBGenWrapper::GetQuery(int query) {
|
||||
if (query <= 0 || query > TPCH_QUERIES_COUNT) {
|
||||
throw SyntaxException("Out of range TPC-H query number %d", query);
|
||||
}
|
||||
return TPCH_QUERIES[query - 1];
|
||||
}
|
||||
|
||||
string DBGenWrapper::GetAnswer(double sf, int query) {
|
||||
if (query <= 0 || query > TPCH_QUERIES_COUNT) {
|
||||
throw SyntaxException("Out of range TPC-H query number %d", query);
|
||||
}
|
||||
const char *answer;
|
||||
if (sf == 0.01) {
|
||||
answer = TPCH_ANSWERS_SF0_01[query - 1];
|
||||
} else if (sf == 0.1) {
|
||||
answer = TPCH_ANSWERS_SF0_1[query - 1];
|
||||
} else if (sf == 1) {
|
||||
answer = TPCH_ANSWERS_SF1[query - 1];
|
||||
} else {
|
||||
throw NotImplementedException("Don't have TPC-H answers for SF %llf!", sf);
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
} // namespace tpch
|
||||
Reference in New Issue
Block a user