should be it

2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions
--- a/external/duckdb/extension/tpcds/dsdgen/dsdgen-c/scaling.cpp
+++ b/external/duckdb/extension/tpcds/dsdgen/dsdgen-c/scaling.cpp
@@ -0,0 +1,749 @@
+/*
+ * Legal Notice
+ *
+ * This document and associated source code (the "Work") is a part of a
+ * benchmark specification maintained by the TPC.
+ *
+ * The TPC reserves all right, title, and interest to the Work as provided
+ * under U.S. and international laws, including without limitation all patent
+ * and trademark rights therein.
+ *
+ * No Warranty
+ *
+ * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
+ *     CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
+ *     AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
+ *     WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
+ *     INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
+ *     DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
+ *     PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
+ *     WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
+ *     ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
+ *     QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
+ *     WITH REGARD TO THE WORK.
+ * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
+ *     ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
+ *     COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
+ *     OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
+ *     INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
+ *     OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
+ *     RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
+ *     ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Contributors:
+ * Gradient Systems
+ */
+#include "config.h"
+#include "porting.h"
+#include "init.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdio.h>
+#include "config.h"
+#include "porting.h"
+#include "dist.h"
+#include "constants.h"
+#include "genrand.h"
+#include "columns.h"
+#include "tdefs.h"
+#include "error_msg.h"
+#include "r_params.h"
+#include "tdefs.h"
+#include "tdef_functions.h"
+#include "w_inventory.h"
+#include "scaling.h"
+#include "tpcds.idx.h"
+#include "parallel.h"
+#include "scd.h"
+
+static struct SCALING_T {
+	ds_key_t kBaseRowcount;
+	ds_key_t kNextInsertValue;
+	int nUpdatePercentage;
+	ds_key_t kDayRowcount[6];
+} arRowcount[MAX_TABLE + 1];
+static int arUpdateDates[6];
+static int arInventoryUpdateDates[6];
+
+static int arScaleVolume[9] = {1, 10, 100, 300, 1000, 3000, 10000, 30000, 100000};
+
+void setUpdateScaling(int table);
+int row_skip(int tbl, ds_key_t count);
+
+/*
+ * Routine:
+ * Purpose:
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+int getScaleSlot(int nTargetGB) {
+	int i;
+
+	for (i = 0; nTargetGB > arScaleVolume[i]; i++)
+		;
+
+	return (i);
+}
+
+/*
+ * Routine: LogScale(void)
+ * Purpose: use the command line volume target, in GB, to calculate the global
+ * rowcount multiplier Algorithm: Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects: arRowcounts are set to the appropriate number of rows for the
+ * target scale factor
+ * TODO: None
+ */
+static ds_key_t LogScale(int nTable, int nTargetGB) {
+	int nIndex = 1, nDelta, i;
+	float fOffset;
+	ds_key_t hgRowcount = 0;
+
+	i = getScaleSlot(nTargetGB);
+
+	nDelta = dist_weight(NULL, "rowcounts", nTable + 1, i + 1) - dist_weight(NULL, "rowcounts", nTable + 1, i);
+	fOffset = (float)(nTargetGB - arScaleVolume[i - 1]) / (float)(arScaleVolume[i] - arScaleVolume[i - 1]);
+
+	hgRowcount = (int)(fOffset * (float)nDelta);
+	hgRowcount += dist_weight(NULL, "rowcounts", nTable + 1, nIndex);
+
+	return (hgRowcount);
+}
+
+/*
+ * Routine: StaticScale(void)
+ * Purpose: use the command line volume target, in GB, to calculate the global
+ * rowcount multiplier Algorithm: Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects: arRowcounts are set to the appropriate number of rows for the
+ * target scale factor
+ * TODO: None
+ */
+static ds_key_t StaticScale(int nTable, int nTargetGB) {
+	return (dist_weight(NULL, "rowcounts", nTable + 1, 1));
+}
+
+/*
+ * Routine: LinearScale(void)
+ * Purpose: use the command line volume target, in GB, to calculate the global
+ *rowcount multiplier Algorithm: Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions: scale factors defined in rowcounts distribution define
+ *1/10/100/1000/... GB with sufficient accuracy Side Effects: arRowcounts are
+ *set to the appropriate number of rows for the target scale factor
+ * TODO: None
+ */
+static ds_key_t LinearScale(int nTable, int nTargetGB) {
+	int i;
+	ds_key_t hgRowcount = 0;
+
+	for (i = 8; i >= 0; i--) /* work from large scales down)*/
+	{
+		/*
+		 * use the defined rowcounts to build up the target GB volume
+		 */
+		while (nTargetGB >= arScaleVolume[i]) {
+			hgRowcount += dist_weight(NULL, "rowcounts", nTable + 1, i + 1);
+			nTargetGB -= arScaleVolume[i];
+		}
+	}
+
+	return (hgRowcount);
+}
+/*
+ * Routine:
+ * Purpose:
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+ds_key_t getIDCount(int nTable) {
+	ds_key_t kRowcount, kUniqueCount;
+	tdef *pTdef;
+
+	kRowcount = get_rowcount(nTable);
+	if (nTable >= PSEUDO_TABLE_START)
+		return (kRowcount);
+	pTdef = getSimpleTdefsByNumber(nTable);
+	if (pTdef->flags & FL_TYPE_2) {
+		kUniqueCount = (kRowcount / 6) * 3;
+		switch (kRowcount % 6) {
+		case 1:
+			kUniqueCount += 1;
+			break;
+		case 2:
+		case 3:
+			kUniqueCount += 2;
+			break;
+		case 4:
+		case 5:
+			kUniqueCount += 3;
+			break;
+		}
+		return (kUniqueCount);
+	} else {
+		return (kRowcount);
+	}
+}
+
+/*
+ * Routine: get_rowcount(int table)
+ * Purpose:
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: 20040820 jms Need to address special case scaling in a more general
+ * fashion
+ */
+ds_key_t get_rowcount(int table) {
+
+	static double nScale;
+	int nTable, nMultiplier, i, nBadScale = 0, nRowcountOffset = 0;
+	tdef *pTdef;
+
+	if (!InitConstants::get_rowcount_init) {
+		nScale = get_dbl("SCALE");
+		if (nScale > 100000)
+			ReportErrorNoLine(QERR_BAD_SCALE, NULL, 1);
+
+		memset(arRowcount, 0, sizeof(long) * MAX_TABLE);
+		int iScale = nScale < 1 ? 1 : int(nScale);
+		for (nTable = CALL_CENTER; nTable <= MAX_TABLE; nTable++) {
+			switch (iScale) {
+			case 100000:
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 9);
+				break;
+			case 30000:
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 8);
+				break;
+			case 10000:
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 7);
+				break;
+			case 3000:
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 6);
+				break;
+			case 1000:
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 5);
+				break;
+			case 300:
+				nBadScale = QERR_BAD_SCALE;
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 4);
+				break;
+			case 100:
+				nBadScale = QERR_BAD_SCALE;
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 3);
+				break;
+			case 10:
+				nBadScale = QERR_BAD_SCALE;
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 2);
+				break;
+			case 1:
+				nBadScale = QERR_QUALIFICATION_SCALE;
+				arRowcount[nTable].kBaseRowcount = dist_weight(NULL, "rowcounts", nTable + nRowcountOffset + 1, 1);
+				break;
+			default:
+				nBadScale = QERR_BAD_SCALE;
+				int mem = dist_member(NULL, "rowcounts", nTable + 1, 3);
+				switch (mem) {
+				case 2:
+					arRowcount[nTable].kBaseRowcount = LinearScale(nTable + nRowcountOffset, nScale);
+					break;
+				case 1:
+					arRowcount[nTable].kBaseRowcount = StaticScale(nTable + nRowcountOffset, nScale);
+					break;
+				case 3:
+					arRowcount[nTable].kBaseRowcount = LogScale(nTable + nRowcountOffset, nScale);
+					break;
+				} /* switch(FL_SCALE_MASK) */
+				break;
+			} /* switch(nScale) */
+
+			/* now adjust for the multiplier */
+			nMultiplier = 1;
+			if (nTable < PSEUDO_TABLE_START) {
+				pTdef = getSimpleTdefsByNumber(nTable);
+				nMultiplier = (pTdef->flags & FL_TYPE_2) ? 2 : 1;
+			}
+			for (i = 1; i <= dist_member(NULL, "rowcounts", nTable + 1, 2); i++) {
+				nMultiplier *= 10;
+			}
+			arRowcount[nTable].kBaseRowcount *= nMultiplier;
+			if (arRowcount[nTable].kBaseRowcount >= 0) {
+				if (nScale < 1) {
+					int mem = dist_member(NULL, "rowcounts", nTable + 1, 3);
+					if (!(mem == 1 && nMultiplier == 1)) {
+						arRowcount[nTable].kBaseRowcount = int(arRowcount[nTable].kBaseRowcount * nScale);
+					}
+					if (arRowcount[nTable].kBaseRowcount == 0) {
+						arRowcount[nTable].kBaseRowcount = 1;
+					}
+				}
+			}
+		} /* for each table */
+
+		//		if (nBadScale && !is_set("QUIET"))
+		//			ReportErrorNoLine(nBadScale, NULL, 0);
+
+		InitConstants::get_rowcount_init = 1;
+	}
+
+	if (table == INVENTORY)
+		return (sc_w_inventory(nScale));
+	if (table == S_INVENTORY)
+		return (getIDCount(ITEM) * get_rowcount(WAREHOUSE) * 6);
+
+	return (arRowcount[table].kBaseRowcount);
+}
+
+/*
+ * Routine: setUpdateDates
+ * Purpose: determine the dates for fact table updates
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+void setUpdateDates(void) {
+	assert(0);
+	int nDay, nUpdate, i;
+	date_t dtTemp;
+
+	nUpdate = get_int("UPDATE");
+	while (nUpdate--) {
+		/* pick two adjacent days in the low density zone */
+		arUpdateDates[0] = getSkewedJulianDate(calendar_low, 0);
+		jtodt(&dtTemp, arUpdateDates[0]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp) + 1, calendar_low);
+		if (nDay)
+			arUpdateDates[1] = arUpdateDates[0] + 1;
+		else
+			arUpdateDates[1] = arUpdateDates[0] - 1;
+
+		/*
+		 * pick the related Thursdays for inventory
+		 * 1. shift first date to the Thursday in the current update week
+		 * 2. move forward/back to get into correct comparability zone
+		 * 3. set next date to next/prior Thursday based on comparability zone
+		 */
+		jtodt(&dtTemp, arUpdateDates[0] + (4 - set_dow(&dtTemp)));
+		dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_low);
+		arInventoryUpdateDates[0] = dtTemp.julian;
+		if (!nDay) {
+			jtodt(&dtTemp, dtTemp.julian - 7);
+			arInventoryUpdateDates[0] = dtTemp.julian;
+			dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_low);
+			if (!nDay)
+				arInventoryUpdateDates[0] += 14;
+		}
+
+		arInventoryUpdateDates[1] = arInventoryUpdateDates[0] + 7;
+		jtodt(&dtTemp, arInventoryUpdateDates[1]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp) + 1, calendar_low);
+		if (!nDay)
+			arInventoryUpdateDates[1] -= 14;
+
+		/* repeat for medium calendar zone */
+		arUpdateDates[2] = getSkewedJulianDate(calendar_medium, 0);
+		jtodt(&dtTemp, arUpdateDates[2]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp) + 1, calendar_medium);
+		if (nDay)
+			arUpdateDates[3] = arUpdateDates[2] + 1;
+		else
+			arUpdateDates[3] = arUpdateDates[2] - 1;
+
+		jtodt(&dtTemp, arUpdateDates[2] + (4 - set_dow(&dtTemp)));
+		dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_medium);
+		arInventoryUpdateDates[2] = dtTemp.julian;
+		if (!nDay) {
+			jtodt(&dtTemp, dtTemp.julian - 7);
+			arInventoryUpdateDates[2] = dtTemp.julian;
+			dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_medium);
+			if (!nDay)
+				arInventoryUpdateDates[2] += 14;
+		}
+
+		arInventoryUpdateDates[3] = arInventoryUpdateDates[2] + 7;
+		jtodt(&dtTemp, arInventoryUpdateDates[3]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_medium);
+		if (!nDay)
+			arInventoryUpdateDates[3] -= 14;
+
+		/* repeat for high calendar zone */
+		arUpdateDates[4] = getSkewedJulianDate(calendar_high, 0);
+		jtodt(&dtTemp, arUpdateDates[4]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp) + 1, calendar_high);
+		if (nDay)
+			arUpdateDates[5] = arUpdateDates[4] + 1;
+		else
+			arUpdateDates[5] = arUpdateDates[4] - 1;
+
+		jtodt(&dtTemp, arUpdateDates[4] + (4 - set_dow(&dtTemp)));
+		dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_high);
+		arInventoryUpdateDates[4] = dtTemp.julian;
+		if (!nDay) {
+			jtodt(&dtTemp, dtTemp.julian - 7);
+			arInventoryUpdateDates[4] = dtTemp.julian;
+			dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_high);
+			if (!nDay)
+				arInventoryUpdateDates[4] += 14;
+		}
+
+		arInventoryUpdateDates[5] = arInventoryUpdateDates[4] + 7;
+		jtodt(&dtTemp, arInventoryUpdateDates[5]);
+		dist_weight(&nDay, "calendar", day_number(&dtTemp), calendar_high);
+		if (!nDay)
+			arInventoryUpdateDates[5] -= 14;
+	}
+
+	//	/*
+	//	 * output the update dates for this update set
+	//	 */
+	//	openDeleteFile(1);
+	//	for (i = 0; i < 6; i += 2)
+	//		print_delete(&arUpdateDates[i]);
+	//
+	//	/*
+	//	 * inventory uses separate dates
+	//	 */
+	//	openDeleteFile(2);
+	//	for (i = 0; i < 6; i += 2)
+	//		print_delete(&arInventoryUpdateDates[i]);
+	//	openDeleteFile(0);
+
+	return;
+}
+
+/*
+ * Routine:
+ * Purpose:
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+int getUpdateDate(int nTable, ds_key_t kRowcount) {
+	static int nIndex = 0, nLastTable = -1;
+
+	if (nLastTable != nTable) {
+		nLastTable = nTable;
+		get_rowcount(nTable);
+		nIndex = 0;
+	}
+
+	for (nIndex = 0; kRowcount > arRowcount[nTable].kDayRowcount[nIndex]; nIndex++)
+		if (nIndex == 5)
+			break;
+
+	if (nTable == S_INVENTORY) {
+		return (arInventoryUpdateDates[nIndex]);
+	} else
+		return (arUpdateDates[nIndex]);
+}
+
+/*
+ * Routine: getUpdateID(int nTable, ds_key_t *pDest)
+ * Purpose: select the primary key for an update set row
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns: 1 if the row is new, 0 if it is reusing an existing ID
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: 20040326 jms getUpdateID() this MUST be updated for 64bit -- all usages
+ * use casts today
+ * TODO:	20060102 jms this will need to be looked at for parallelism at some
+ * point
+ */
+/*
+int
+getUpdateID(ds_key_t *pDest, int nTable, int nColumn)
+{
+    int bIsUpdate = 0,
+        nTemp;
+
+    if (genrand_integer(NULL, DIST_UNIFORM, 0, 99, 0, nColumn) <
+arRowcount[nTable].nUpdatePercentage)
+    {
+        bIsUpdate = 1;
+        genrand_integer(&nTemp, DIST_UNIFORM, 1, (int)getIDCount(nTable), 0,
+nColumn); *pDest = (ds_key_t)nTemp;
+    }
+    else
+    {
+        *pDest = ++arRowcount[nTable].kNextInsertValue;
+    }
+
+    return(bIsUpdate);
+}
+*/
+
+/*
+ * Routine: getSkewedJulianDate()
+ * Purpose: return a julian date based on the given skew and column
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+int getSkewedJulianDate(int nWeight, int nColumn) {
+	int i;
+	date_t Date;
+
+	pick_distribution(&i, "calendar", 1, nWeight, nColumn);
+	genrand_integer(&Date.year, DIST_UNIFORM, YEAR_MINIMUM, YEAR_MAXIMUM, 0, nColumn);
+	dist_member(&Date.day, "calendar", i, 3);
+	dist_member(&Date.month, "calendar", i, 5);
+	return (dttoj(&Date));
+}
+
+/*
+ * Routine: initializeOrderUpdate()
+ * Purpose: skip over prior updates for the named table
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+/*
+int
+initializeOrderUpdates(int nParent, int nChild, int nIDColumn, int nDateColumn,
+int *pnOrderNumber)
+{
+   int i,
+      nRowcount,
+      nRowsRemaining,
+      nStep = 0;
+   date_t Date;
+
+
+        *pnOrderNumber = 0;
+
+      for (i=0; i < (get_int("UPDATE") - 1); i++)
+        {
+            nRowsRemaining = (int)get_rowcount(nParent);
+         while (nRowsRemaining > 0)
+         {
+            nStep = nStep % 3;
+            nStep += 1;
+            Date.julian = getSkewedJulianDate((nStep++ % 3) + 8, nDateColumn);
+            nRowcount = (int)dateScaling(getTableFromColumn(nIDColumn),
+Date.julian); *pnOrderNumber += nRowcount; row_skip(nParent, nRowcount);
+            row_skip(nChild, LINES_PER_ORDER * nRowcount);
+            nRowsRemaining -= nRowcount;
+         }
+        }
+
+      return(nStep);
+}
+*/
+
+/*
+ * Routine: dateScaling(int nTable, ds_key_t jDate)
+ * Purpose: determine the number of rows to build for a given date and fact
+ * table Algorithm: Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+ds_key_t dateScaling(int nTable, ds_key_t jDate) {
+	static dist_t *pDist;
+	d_idx_t *pDistIndex;
+	date_t Date;
+	int nDateWeight = 1, nCalendarTotal, nDayWeight;
+	ds_key_t kRowCount = -1;
+	tdef *pTdef = getSimpleTdefsByNumber(nTable);
+
+	if (!InitConstants::dateScaling_init) {
+		pDistIndex = find_dist("calendar");
+		pDist = pDistIndex->dist;
+		if (!pDist)
+			ReportError(QERR_NO_MEMORY, "dateScaling()", 1);
+		InitConstants::dateScaling_init = 1;
+	}
+
+	jtodt(&Date, (int)jDate);
+
+	switch (nTable) {
+	case STORE_SALES:
+	case CATALOG_SALES:
+	case WEB_SALES:
+		kRowCount = get_rowcount(nTable);
+		nDateWeight = calendar_sales;
+		break;
+	case S_CATALOG_ORDER:
+		kRowCount = get_rowcount(CATALOG_SALES);
+		nDateWeight = calendar_sales;
+		break;
+	case S_PURCHASE:
+		kRowCount = get_rowcount(STORE_SALES);
+		nDateWeight = calendar_sales;
+		break;
+	case S_WEB_ORDER:
+		kRowCount = get_rowcount(WEB_SALES);
+		nDateWeight = calendar_sales;
+		break;
+	case S_INVENTORY:
+	case INVENTORY:
+		nDateWeight = calendar_uniform;
+		kRowCount = get_rowcount(WAREHOUSE) * getIDCount(ITEM);
+		break;
+	default:
+		ReportErrorNoLine(QERR_TABLE_NOP, pTdef->name, 1);
+		break;
+	}
+
+	if (nTable != INVENTORY) /* inventory rowcount is uniform thorughout the year */
+	{
+		if (is_leap(Date.year))
+			nDateWeight += 1;
+
+		nCalendarTotal = dist_max(pDist, nDateWeight);
+		nCalendarTotal *= 5; /* assumes date range is 5 years */
+
+		dist_weight(&nDayWeight, "calendar", day_number(&Date), nDateWeight);
+		kRowCount *= nDayWeight;
+		kRowCount += nCalendarTotal / 2;
+		kRowCount /= nCalendarTotal;
+	}
+
+	return (kRowCount);
+}
+
+/*
+ * Routine: getUpdateBase(int nTable)
+ * Purpose: return the offset to the first order in this update set for a given
+ * table Algorithm: Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+ds_key_t getUpdateBase(int nTable) {
+	return (arRowcount[nTable - S_BRAND].kNextInsertValue);
+}
+
+/*
+ * Routine:
+ * Purpose:
+ * Algorithm:
+ * Data Structures:
+ *
+ * Params:
+ * Returns:
+ * Called By:
+ * Calls:
+ * Assumptions:
+ * Side Effects:
+ * TODO: None
+ */
+void setUpdateScaling(int nTable) {
+	tdef *pTdef;
+	int i, nBaseTable;
+	ds_key_t kNewRowcount = 0;
+
+	pTdef = getSimpleTdefsByNumber(nTable);
+	if (!(pTdef->flags & FL_SOURCE_DDL) || !(pTdef->flags & FL_DATE_BASED) || (pTdef->flags & FL_NOP))
+		return;
+
+	switch (nTable) {
+	case S_PURCHASE:
+		nBaseTable = STORE_SALES;
+		break;
+	case S_CATALOG_ORDER:
+		nBaseTable = CATALOG_SALES;
+		break;
+	case S_WEB_ORDER:
+		nBaseTable = WEB_SALES;
+		break;
+	case S_INVENTORY:
+		nBaseTable = INVENTORY;
+		break;
+	default:
+		fprintf(stderr, "ERROR: Invalid table in setUpdateScaling\n");
+		exit(1);
+		break;
+	}
+
+	arRowcount[nTable].kNextInsertValue = arRowcount[nTable].kBaseRowcount;
+
+	for (i = 0; i < 6; i++) {
+		kNewRowcount += dateScaling(nBaseTable, arUpdateDates[i]);
+		arRowcount[nTable].kDayRowcount[i] = kNewRowcount;
+	}
+
+	arRowcount[nTable].kBaseRowcount = kNewRowcount;
+	arRowcount[nTable].kNextInsertValue += kNewRowcount * (get_int("update") - 1);
+
+	return;
+}