/*
 * Legal Notice
 *
 * This document and associated source code (the "Work") is a part of a
 * benchmark specification maintained by the TPC.
 *
 * The TPC reserves all right, title, and interest to the Work as provided
 * under U.S. and international laws, including without limitation all patent
 * and trademark rights therein.
 *
 * No Warranty
 *
 * 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
 *     CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
 *     AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
 *     WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
 *     INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
 *     DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
 *     PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
 *     WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
 *     ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
 *     QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
 *     WITH REGARD TO THE WORK.
 * 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
 *     ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
 *     COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
 *     OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
 *     INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
 *     OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
 *     RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
 *     ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
 *
 * Contributors:
 * Gradient Systems
 */
#include "config.h"
#include "porting.h"
#include "init.h"
#include <stdio.h>
#include "date.h"
#include "decimal.h"
#include "dist.h"
#include "constants.h"
#include "columns.h"
#include "genrand.h"
#include "tdefs.h"
#include "tables.h"
#include "build_support.h"
#include "tpcds.idx.h"
#include "scaling.h"
#include "w_web_sales.h"
#include "error_msg.h"
#include "tdefs.h"
#include "scd.h"
#include "r_params.h"
#include "sparse.h"

static ds_key_t web_join(int col, ds_key_t join_key);

/*
 * Routine: date_join(int from_tbl, int join_count)
 * Purpose: account for the different date-adjusted patterns in the data set
 * Data Structures:
 *
 * Params:
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
 * requre a rework of the genrand_xx routines
 */
static ds_key_t date_join(int from_tbl, int from_col, ds_key_t join_count, int nYear) {
	int nDay, nTemp, nMin = -1, nMax = -1, nResult;
	static int jToday;
	date_t TempDate;

	if (InitConstants::date_join_init == 0) {
		strtodt(&TempDate, TODAYS_DATE);
		jToday = dttoj(&TempDate);
		InitConstants::date_join_init = 1;
	}

	switch (from_tbl) {
	case STORE_SALES:
	case CATALOG_SALES:
	case WEB_SALES:
		pick_distribution(&nDay, "calendar", 1, calendar_sales + is_leap(nYear), from_col);
		break;

	/*
	 * returns are keyed to the sale date, with the lag between sale and return
	 * selected within a known range, based on sales channel
	 */
	case STORE_RETURNS:
		nMin = SS_MIN_SHIP_DELAY;
		nMax = SS_MAX_SHIP_DELAY;
	case CATALOG_RETURNS:
		if (nMin == -1) {
			nMin = CS_MIN_SHIP_DELAY;
			nMax = CS_MAX_SHIP_DELAY;
		}
	case WEB_RETURNS:
		if (nMin == -1) {
			nMin = WS_MIN_SHIP_DELAY;
			nMax = WS_MAX_SHIP_DELAY;
		}
		genrand_integer(&nTemp, DIST_UNIFORM, nMin * 2, nMax * 2, 0, from_col);
		return (join_count + nTemp);
		break;
	case WEB_SITE:
	case WEB_PAGE:
		return (web_join(from_col, join_count));
	default:
		pick_distribution(&nDay, "calendar", 1, 1 + is_leap(nYear), from_col);
		break;
	}

	TempDate.year = nYear;
	TempDate.month = 1;
	TempDate.day = 1;

	nResult = dttoj(&TempDate) + nDay;

	return ((ds_key_t)(nResult > jToday) ? -1 : nResult);
}

/*
 * Routine: time_join(int from_tbl, int join_count)
 * Purpose: create joins that are time-skewed
 * Data Structures:
 *
 * Params:
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
 * requre a rework of the genrand_xx routines
 */
static ds_key_t time_join(int to_tbl, int to_col, ds_key_t join_count) {
	int hour, secs;

	switch (to_tbl) {
	case STORE_SALES:
	case STORE_RETURNS:
		pick_distribution(&hour, "hours", 1, 2, to_col);
		break;
	case CATALOG_SALES:
	case WEB_SALES:
	case CATALOG_RETURNS:
	case WEB_RETURNS:
		pick_distribution(&hour, "hours", 1, 3, to_col);
		break;
	default:
		pick_distribution(&hour, "hours", 1, 1, to_col);
		break;
	}
	genrand_integer(&secs, DIST_UNIFORM, 0, 3599, 0, to_col);

	return ((ds_key_t)(hour * 3600 + secs));
}

/*
 * Routine: cp_join(int from_tbl, int join_count)
 * Purpose: create joins to catalog_page
 * Data Structures:
 *
 * Params:
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: None
 */
static ds_key_t cp_join(int tbl, int col, ds_key_t jDate) {
	ds_key_t res;
	static int nPagePerCatalog;
	int nType, nCount, nOffset, nPage;
	static date_t dTemp;
	char *szTemp;

	if (!InitConstants::cp_join_init) {
		nPagePerCatalog = ((int)get_rowcount(CATALOG_PAGE) / CP_CATALOGS_PER_YEAR) / (YEAR_MAXIMUM - YEAR_MINIMUM + 2);
		strtodt(&dTemp, DATA_START_DATE);
		InitConstants::cp_join_init = 1;
	}

	nType = pick_distribution(&szTemp, "catalog_page_type", 1, 2, col);
	genrand_integer(&nPage, DIST_UNIFORM, 1, nPagePerCatalog, 0, col);
	nOffset = (int)jDate - dTemp.julian - 1;
	nCount = (nOffset / 365) * CP_CATALOGS_PER_YEAR;
	nOffset %= 365;

	switch (nType) {
	case 1: /* bi-annual */
		if (nOffset > 183)
			nCount += 1;
		break;
	case 2: /* quarterly */
		nCount += (nOffset / 91);
		break;
	case 3: /* monthly */
		nCount += (nOffset / 31);
		break;
	}

	res = CP_SK(nCount, nPagePerCatalog, nPage);

	return (res);
}
/*
 * Routine:
 * Purpose:
 * Algorithm:
 * Data Structures:
 *
 * Params:
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: None
 */
ds_key_t getCatalogNumberFromPage(ds_key_t kPageNumber) {
	static int nPagePerCatalog;

	if (!InitConstants::getCatalogNumberFromPage_init) {
		nPagePerCatalog = ((int)get_rowcount(CATALOG_PAGE) / CP_CATALOGS_PER_YEAR) / (YEAR_MAXIMUM - YEAR_MINIMUM + 2);
		InitConstants::getCatalogNumberFromPage_init = 1;
	}

	return (kPageNumber / nPagePerCatalog);
}

/*
 * Routine: web_join(int col, ds_key_t join_key)
 * Purpose: create joins to web_site/web_page. These need to be handled
 *together, since the date of transaction must fit within the lifetime of a
 *particular page, which must fit within the lifetime of a particular site Data
 *Structures:
 *
 * Params:
 *	join_key is one of two things:
 *		1. the xxx_sk for a particular row in the dimension for which we need
 *appropriate dates
 *		2. a julian date for which we need to pick a valid xxx_sk value
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: None
 */
static ds_key_t web_join(int col, ds_key_t join_key) {
	ds_key_t res = -1, kSite;
	static int nConcurrentSites, nSiteDuration, nOffset;
	static date_t dSiteOpen, /* open/close dates for current web site */
	    dSiteClose;
	int nTemp;
	tdef *pWS = getSimpleTdefsByNumber(WEB_SITE);
	tdef *pWP = getSimpleTdefsByNumber(WEB_PAGE);

	if (!InitConstants::web_join_init) {
		strtodt(&dSiteClose, WEB_END_DATE);
		nSiteDuration = dSiteClose.julian;
		nConcurrentSites = (int)get_rowcount(CONCURRENT_WEB_SITES);
		strtodt(&dSiteOpen, WEB_START_DATE);
		nSiteDuration -= dSiteOpen.julian;
		nSiteDuration *= nConcurrentSites;
		nOffset = (dSiteClose.julian - dSiteOpen.julian) / (2 * nSiteDuration);
		InitConstants::web_join_init = 1;
	}

	switch (col) {
		/**************
		 * join_key is the xxx_sk value for a dimension
		 */
	case WEB_OPEN_DATE:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
		if (WEB_IS_REPLACED(join_key)) /* this site is completely replaced */
		{
			if (WEB_IS_REPLACEMENT(join_key)) /* this is the second site */
			{
				/* the open date of the second site needs to align on a revision
				 * boundary */
				res += nOffset * nSiteDuration;
			}
		}
		break;
	case WEB_CLOSE_DATE:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
		res += pWS->nParam * nSiteDuration;
		if (WEB_IS_REPLACED(join_key)) /* this site is completely replaced */
		{
			if (!WEB_IS_REPLACEMENT(join_key)) /* this is the first site */
			{
				/* the close date of the first site needs to align on a revision
				 * boundary */
				res -= pWS->nParam * nSiteDuration / 2;
			}
		}
		break;
	case WEB_REC_START_DATE_ID:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - (((join_key - 1) * WEB_DATE_STAGGER) % nSiteDuration / 2);
		res += (join_key % pWS->nParam) * nSiteDuration;
		break;
	case WEB_REC_END_DATE_ID:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
		res += ((join_key + 1) % pWS->nParam) * nSiteDuration * 5 - 1;
		break;
	case WP_REC_START_DATE_ID:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - (((join_key - 1) * WEB_DATE_STAGGER) % nSiteDuration / 2);
		res += (join_key % pWP->nParam) * nSiteDuration * 5;
		break;
	case WP_REC_END_DATE_ID:
		strtodt(&dSiteOpen, DATE_MINIMUM);
		res = dSiteOpen.julian - ((join_key * WEB_DATE_STAGGER) % nSiteDuration / 2);
		res += ((join_key + 1) % pWP->nParam) * nSiteDuration - 1;
		break;
	case WP_CREATION_DATE_SK:
		/* page creation has to happen outside of the page window, to assure a
		 * constant number of pages, so it occurs in the gap between site
		 * creation and the site's actual activity. For sites that are replaced
		 * in the time span of the data set, this will depend on whether they
		 * are the first version or the second
		 */
		strtodt(&dSiteOpen, DATE_MINIMUM);
		kSite = join_key / WEB_PAGES_PER_SITE + 1;
		res = dSiteOpen.julian - (((int)kSite * WEB_DATE_STAGGER) % nSiteDuration / 2);
		if (((int)kSite % pWP->nParam) == 0) /* this is a site that gets replaced */
		{
			genrand_integer(&nTemp, DIST_UNIFORM, (int)res, dSiteOpen.julian, 0, col);
			res = nTemp;
		}
		break;
		/*****************
		 * join key from here on is a date for which a valid site/page must be
		 * found the sk for a web page is a compound value: <site id><page id>
		 * and each component is a combination of the unique site or page and
		 * the active revision to it
		 */
	case WR_WEB_PAGE_SK:
	case WS_WEB_PAGE_SK:
		res = genrand_integer(NULL, DIST_UNIFORM, 1, WEB_PAGES_PER_SITE, 0, col);
		break;
	}

	return (res);
}

/*
 * Routine: mk_join(int from_tbl, int to_tbl, int join_count)
 * Purpose: return a primary key for to_tbl, creating a join between from_tbl
 *and to_tbl Algorithm: all joins are currently uniformly distributed. The
 *calling convention allows for each join in the schema to be distributed
 *differently Data Structures:
 *
 * Params:
 * Returns:
 * Called By:
 * Calls:
 * Assumptions:
 * Side Effects:
 * TODO: Relies on existing RNG code, which isn't really 64bit; will probably
 *requre a rework of the genrand_xx routines
 */
ds_key_t mk_join(int from_col, int to_tbl, ds_key_t join_count) {
	ds_key_t res;
	int nYear, nFromTable = 0, nTableIndex = to_tbl;
	tdef *pTdef;

	nFromTable = getTableFromColumn(from_col);

	/*
	 * if the table being joined to employs sparse keys, the join gets handled
	 * in sparse.c
	 */
	pTdef = getSimpleTdefsByNumber(to_tbl);
	if (pTdef->flags & FL_SPARSE) {
		if (pTdef->arSparseKeys == NULL)
			initSparseKeys(to_tbl);
	}

	switch (to_tbl) {
	/* some tables require special handling */
	case CATALOG_PAGE:
		return (cp_join(nFromTable, from_col, join_count));
	case DATET:
		genrand_integer(&nYear, DIST_UNIFORM, YEAR_MINIMUM, YEAR_MAXIMUM, 0, from_col);
		return (date_join(nFromTable, from_col, join_count, nYear));
	case TIME:
		return (time_join(nFromTable, from_col, join_count));
		/* the rest of the tables use standard, uniform joins */
	default:
		/*
		 * all TYPE2 tables (i.e., history keeping dimensions) need a special
		 * join algorithm
		 */
		if (pTdef->flags & FL_TYPE_2)
			return (scd_join(nTableIndex, from_col, join_count));

		if (pTdef->flags & FL_SPARSE)
			return (randomSparseKey(nTableIndex, from_col));

		genrand_key(&res, DIST_UNIFORM, (ds_key_t)1, get_rowcount(nTableIndex), (ds_key_t)0, from_col);
		break;
	}

	return ((ds_key_t)res);
}