Files
email-tracker/external/duckdb/extension/tpcds/dsdgen/dsdgen-c/dist.cpp
2025-10-24 19:21:19 -05:00

1012 lines
25 KiB
C++

/*
* Legal Notice
*
* This document and associated source code (the "Work") is a part of a
* benchmark specification maintained by the TPC.
*
* The TPC reserves all right, title, and interest to the Work as provided
* under U.S. and international laws, including without limitation all patent
* and trademark rights therein.
*
* No Warranty
*
* 1.1 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE INFORMATION
* CONTAINED HEREIN IS PROVIDED "AS IS" AND WITH ALL FAULTS, AND THE
* AUTHORS AND DEVELOPERS OF THE WORK HEREBY DISCLAIM ALL OTHER
* WARRANTIES AND CONDITIONS, EITHER EXPRESS, IMPLIED OR STATUTORY,
* INCLUDING, BUT NOT LIMITED TO, ANY (IF ANY) IMPLIED WARRANTIES,
* DUTIES OR CONDITIONS OF MERCHANTABILITY, OF FITNESS FOR A PARTICULAR
* PURPOSE, OF ACCURACY OR COMPLETENESS OF RESPONSES, OF RESULTS, OF
* WORKMANLIKE EFFORT, OF LACK OF VIRUSES, AND OF LACK OF NEGLIGENCE.
* ALSO, THERE IS NO WARRANTY OR CONDITION OF TITLE, QUIET ENJOYMENT,
* QUIET POSSESSION, CORRESPONDENCE TO DESCRIPTION OR NON-INFRINGEMENT
* WITH REGARD TO THE WORK.
* 1.2 IN NO EVENT WILL ANY AUTHOR OR DEVELOPER OF THE WORK BE LIABLE TO
* ANY OTHER PARTY FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO THE
* COST OF PROCURING SUBSTITUTE GOODS OR SERVICES, LOST PROFITS, LOSS
* OF USE, LOSS OF DATA, OR ANY INCIDENTAL, CONSEQUENTIAL, DIRECT,
* INDIRECT, OR SPECIAL DAMAGES WHETHER UNDER CONTRACT, TORT, WARRANTY,
* OR OTHERWISE, ARISING IN ANY WAY OUT OF THIS OR ANY OTHER AGREEMENT
* RELATING TO THE WORK, WHETHER OR NOT SUCH AUTHOR OR DEVELOPER HAD
* ADVANCE NOTICE OF THE POSSIBILITY OF SUCH DAMAGES.
*
* Contributors:
* Gradient Systems
*/
#include "config.h"
#include "porting.h"
#include "tpcds_idx.hpp"
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#ifdef _WIN32
#include <io.h>
#include <search.h>
#include <stdlib.h>
#include <winsock.h>
#else
#include <netinet/in.h>
#include <sys/stat.h>
#include <sys/types.h>
#endif
#ifdef NCR
#include <sys/types.h>
#endif
#ifndef USE_STDLIB_H
#include <malloc.h>
#endif
#include "config.h"
#include "date.h"
#include "dcomp.h"
#include "decimal.h"
#include "dist.h"
#include "error_msg.h"
#include "genrand.h"
#include "r_params.h"
#ifdef TEST
option_t options[] = {{"DISTRIBUTIONS", OPT_STR, 2, "read distributions from file <s>", NULL, "tester_dist.idx"}, NULL};
char params[2];
struct {
char *name;
} tdefs[] = {NULL};
#endif
/* NOTE: these need to be in sync with a_dist.h */
#define D_NAME_LEN 20
#define FL_LOADED 0x01
static int load_dist(d_idx_t *d);
/*
* Routine: di_compare()
* Purpose: comparison routine for two d_idx_t entries; used by qsort
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int di_compare(const void *op1, const void *op2) {
d_idx_t *ie1 = (d_idx_t *)op1, *ie2 = (d_idx_t *)op2;
return (strcasecmp(ie1->name, ie2->name));
}
/*
* Routine: find_dist(char *name)
* Purpose: translate from dist_t name to d_idx_t *
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
d_idx_t *find_dist(const char *name) {
static int entry_count;
static d_idx_t *idx = NULL;
d_idx_t key, *id = NULL;
int i;
FILE *ifp;
int32_t temp;
/* load the index if this is the first time through */
if (!InitConstants::find_dist_init) {
/* make sure that this is read one thread at a time */
if (!InitConstants::find_dist_init) /* make sure no one beat us to it */
{
/* open the dist file */
auto read_ptr = tpcds_idx;
// if ((ifp = fopen(get_str("DISTRIBUTIONS"), "rb")) == NULL) {
// fprintf(stderr, "Error: open of distributions failed: ");
// perror(get_str("DISTRIBUTIONS"));
// exit(1);
// }
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read of index count failed: ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
entry_count = ntohl(temp);
read_ptr = tpcds_idx + tpcds_idx_len - (entry_count * IDX_SIZE);
// if ((temp = fseek(ifp, -entry_count * IDX_SIZE, SEEK_END)) < 0) {
// fprintf(stderr, "Error: lseek to index failed: ");
// fprintf(stderr, "attempting to reach %d\nSystem error: ", (int)(-entry_count * IDX_SIZE));
// perror(get_str("DISTRIBUTIONS"));
// exit(3);
// }
idx = (d_idx_t *)malloc(entry_count * sizeof(d_idx_t));
MALLOC_CHECK(idx);
for (i = 0; i < entry_count; i++) {
memset(idx + i, 0, sizeof(d_idx_t));
// if (fread(idx[i].name, 1, D_NAME_LEN, ifp) < D_NAME_LEN) {
// fprintf(stderr, "Error: read index failed (1): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
memcpy(idx[i].name, read_ptr, D_NAME_LEN);
read_ptr += D_NAME_LEN;
idx[i].name[D_NAME_LEN] = '\0';
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (2): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].index = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (4): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].offset = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (5): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].str_space = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (6): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].length = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (7): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].w_width = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (8): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].v_width = ntohl(temp);
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read index failed (9): ");
// perror(get_str("DISTRIBUTIONS"));
// exit(2);
// }
idx[i].name_space = ntohl(temp);
idx[i].dist = NULL;
}
qsort((void *)idx, entry_count, sizeof(d_idx_t), di_compare);
InitConstants::find_dist_init = 1;
/* make sure that this is read one thread at a time */
// fclose(ifp);
}
}
/* find the distribution, if it exists and move to it */
strcpy(key.name, name);
id = (d_idx_t *)bsearch((void *)&key, (void *)idx, entry_count, sizeof(d_idx_t), di_compare);
if (id != NULL) /* found a valid distribution */
if (id->flags != FL_LOADED) /* but it needs to be loaded */
load_dist(id);
return (id);
}
/*
* Routine: load_dist(int fd, dist_t *d)
* Purpose: load a particular distribution
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
static int load_dist(d_idx_t *di) {
int res = 0, i, j;
dist_t *d;
int32_t temp;
FILE *ifp;
if (di->flags != FL_LOADED) /* make sure no one beat us to it */
{
auto read_ptr = tpcds_idx;
// if ((ifp = fopen(get_str("DISTRIBUTIONS"), "rb")) == NULL) {
// fprintf(stderr, "Error: open of distributions failed: ");
// perror(get_str("DISTRIBUTIONS"));
// exit(1);
// }
read_ptr += di->offset;
// if ((temp = fseek(ifp, di->offset, SEEK_SET)) < 0) {
// fprintf(stderr, "Error: lseek to distribution failed: ");
// perror("load_dist()");
// exit(2);
// }
di->dist = (dist_t *)malloc(sizeof(struct DIST_T));
MALLOC_CHECK(di->dist);
d = di->dist;
// fprintf(stderr, "\ndist %s ", di->name);
/* load the type information */
d->type_vector = (int *)malloc(sizeof(int32_t) * di->v_width);
MALLOC_CHECK(d->type_vector);
for (i = 0; i < di->v_width; i++) {
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read of type vector failed for '%s': ", di->name);
// perror("load_dist()");
// exit(3);
// }
d->type_vector[i] = ntohl(temp);
// fprintf(stderr, "type %d, ", d->type_vector[i]);
}
/* load the weights */
d->weight_sets = (int **)malloc(sizeof(int *) * di->w_width);
d->maximums = (int *)malloc(sizeof(int32_t) * di->w_width);
MALLOC_CHECK(d->weight_sets);
MALLOC_CHECK(d->maximums);
for (i = 0; i < di->w_width; i++) {
*(d->weight_sets + i) = (int *)malloc(di->length * sizeof(int32_t));
MALLOC_CHECK(*(d->weight_sets + i));
d->maximums[i] = 0;
for (j = 0; j < di->length; j++) {
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) < 0) {
// fprintf(stderr, "Error: read of weights failed: ");
// perror("load_dist()");
// exit(3);
// }
*(*(d->weight_sets + i) + j) = ntohl(temp);
/* calculate the maximum weight and convert sets to cummulative
*/
d->maximums[i] += d->weight_sets[i][j];
d->weight_sets[i][j] = d->maximums[i];
}
}
/* load the value offsets */
d->value_sets = (int **)malloc(sizeof(int *) * di->v_width);
MALLOC_CHECK(d->value_sets);
for (i = 0; i < di->v_width; i++) {
*(d->value_sets + i) = (int *)malloc(di->length * sizeof(int32_t));
MALLOC_CHECK(*(d->value_sets + i));
for (j = 0; j < di->length; j++) {
memcpy(&temp, read_ptr, sizeof(int32_t));
read_ptr += sizeof(int32_t);
// if (fread(&temp, 1, sizeof(int32_t), ifp) != sizeof(int32_t)) {
// fprintf(stderr, "Error: read of values failed: ");
// perror("load_dist()");
// exit(4);
// }
*(*(d->value_sets + i) + j) = ntohl(temp);
}
}
/* load the column aliases, if they were defined */
if (di->name_space) {
d->names = (char *)malloc(di->name_space);
MALLOC_CHECK(d->names);
memcpy(d->names, read_ptr, di->name_space * sizeof(char));
read_ptr += di->name_space * sizeof(char);
// if (fread(d->names, 1, di->name_space * sizeof(char), ifp) < 0) {
// fprintf(stderr, "Error: read of names failed: ");
// perror("load_dist()");
// exit(599);
// }
}
/* and finally the values themselves */
d->strings = (char *)malloc(sizeof(char) * di->str_space);
MALLOC_CHECK(d->strings);
memcpy(d->strings, read_ptr, di->str_space * sizeof(char));
read_ptr += di->str_space * sizeof(char);
// if (fread(d->strings, 1, di->str_space * sizeof(char), ifp) < 0) {
// fprintf(stderr, "Error: read of strings failed: ");
// perror("load_dist()");
// exit(5);
// }
// fclose(ifp);
//
// fprintf(stderr, "%s {\n", di->name);
//
// // type_vector
// fprintf(stderr, "{");
//
// for (int i = 0 ; i < di->v_width; i++) {
// fprintf(stderr, "%d", d->type_vector[i]);
//
// if (i < di->v_width-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//// weight_sets
// fprintf(stderr, "{");
// for (int i = 0 ; i < di->w_width; i++) {
// fprintf(stderr, "{");
//
// for (int j = 0 ; j < di->length; j++) {
//
// fprintf(stderr, "%d", d->weight_sets[i][j]);
//
//
//
// if (j < di->length-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//
// if (i < di->w_width-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
// // maximums
// fprintf(stderr, "{");
//
// for (int i = 0 ; i < di->w_width; i++) {
// fprintf(stderr, "%d", d->maximums[i]);
//
// if (i < di->w_width-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//
// // value sets
// fprintf(stderr, "{");
// for (int i = 0 ; i < di->v_width; i++) {
// fprintf(stderr, "{");
//
// for (int j = 0 ; j < di->length; j++) {
//
// fprintf(stderr, "%d", d->value_sets[i][j]);
//
//
//
// if (j < di->length-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//
// if (i < di->v_width-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//// strings
//
//
// fprintf(stderr, "{");
//
// for (int i = 0 ; i < di->str_space; i++) {
// fprintf(stderr, "%d", (int) d->strings[i]);
//
// if (i < di->str_space-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//
// // names
//
// fprintf(stderr, "{");
//
// for (int i = 0 ; i < di->name_space; i++) {
// fprintf(stderr, "%d", (int) d->names[i]);
//
// if (i < di->name_space-1) {
// fprintf(stderr, ", ");
//
// }
// }
// fprintf(stderr, "},");
//
//
//
// // size
// fprintf(stderr, "%d}\n", d->size);
di->flags = FL_LOADED;
}
return (res);
}
/*
* Routine: void *dist_op()
* Purpose: select a value/weight from a distribution
* Algorithm:
* Data Structures:
*
* Params: char *d_name
* int vset: which set of values
* int wset: which set of weights
* Returns: appropriate data type cast as a void *
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: 20000317 Need to be sure this is portable to NT and others
*/
int dist_op(void *dest, int op, const char *d_name, int vset, int wset, int stream) {
d_idx_t *d;
dist_t *dist;
int level, index = 0, dt;
char *char_val;
int i_res = 1;
if ((d = find_dist(d_name)) == NULL) {
char msg[80];
sprintf(msg, "Invalid distribution name '%s'", d_name);
INTERNAL(msg);
assert(d != NULL);
}
dist = d->dist;
if (op == 0) {
genrand_integer(&level, DIST_UNIFORM, 1, dist->maximums[wset - 1], 0, stream);
while (level > dist->weight_sets[wset - 1][index] && index < d->length)
index += 1;
dt = vset - 1;
if ((index >= d->length) || (dt > d->v_width))
INTERNAL("Distribution overrun");
char_val = dist->strings + dist->value_sets[dt][index];
} else {
index = vset - 1;
dt = wset - 1;
if (index >= d->length || index < 0) {
fprintf(stderr, "Runtime ERROR: Distribution over-run/under-run\n");
fprintf(stderr, "Check distribution definitions and usage for %s.\n", d->name);
fprintf(stderr, "index = %d, length=%d.\n", index, d->length);
exit(1);
}
char_val = dist->strings + dist->value_sets[dt][index];
}
switch (dist->type_vector[dt]) {
case TKN_VARCHAR:
if (dest)
*(char **)dest = (char *)char_val;
break;
case TKN_INT:
i_res = atoi(char_val);
if (dest)
*(int *)dest = i_res;
break;
case TKN_DATE:
if (dest == NULL) {
dest = (date_t *)malloc(sizeof(date_t));
MALLOC_CHECK(dest);
}
strtodt(*(date_t **)dest, char_val);
break;
case TKN_DECIMAL:
if (dest == NULL) {
dest = (decimal_t *)malloc(sizeof(decimal_t));
MALLOC_CHECK(dest);
}
strtodec(*(decimal_t **)dest, char_val);
break;
}
return ((dest == NULL) ? i_res : index + 1); /* shift back to the 1-based indexing scheme */
}
/*
* Routine: int dist_weight
* Purpose: return the weight of a particular member of a distribution
* Algorithm:
* Data Structures:
*
* Params: distribution *d
* int index: which "row"
* int wset: which set of weights
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
* 20000405 need to add error checking
*/
int dist_weight(int *dest, const char *d, int index, int wset) {
d_idx_t *d_idx;
dist_t *dist;
int res;
if ((d_idx = find_dist(d)) == NULL) {
char msg[80];
sprintf(msg, "Invalid distribution name '%s'", d);
INTERNAL(msg);
}
dist = d_idx->dist;
assert(index > 0);
assert(wset > 0);
res = dist->weight_sets[wset - 1][index - 1];
/* reverse the accumulation of weights */
if (index > 1)
res -= dist->weight_sets[wset - 1][index - 2];
if (dest == NULL)
return (res);
*dest = res;
return (0);
}
/*
* Routine: int DistNameIndex()
* Purpose: return the index of a column alias
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
*/
int DistNameIndex(const char *szDist, int nNameType, const char *szName) {
d_idx_t *d_idx;
dist_t *dist;
int res;
char *cp = NULL;
if ((d_idx = find_dist(szDist)) == NULL)
return (-1);
dist = d_idx->dist;
if (dist->names == NULL)
return (-1);
res = 0;
cp = dist->names;
do {
if (strcasecmp(szName, cp) == 0)
break;
cp += strlen(cp) + 1;
res += 1;
} while (res < (d_idx->v_width + d_idx->w_width));
if (res >= 0) {
if ((nNameType == VALUE_NAME) && (res < d_idx->v_width))
return (res + 1);
if ((nNameType == WEIGHT_NAME) && (res > d_idx->v_width))
return (res - d_idx->v_width + 1);
}
return (-1);
}
/*
* Routine: int distsize(char *name)
* Purpose: return the size of a distribution
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
* 20000405 need to add error checking
*/
int distsize(const char *name) {
d_idx_t *dist;
dist = find_dist(name);
if (dist == NULL)
return (-1);
return (dist->length);
}
/*
* Routine: int IntegrateDist(char *szDistName, int nPct, int nStartIndex, int
*nWeightSet) Purpose: return the index of the entry which, starting from
*nStartIndex, would create a range comprising nPct of the total contained in
*nWeightSet NOTE: the value can "wrap" -- that is, the returned value can be
*less than nStartIndex Algorithm: Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
*/
int IntegrateDist(const char *szDistName, int nPct, int nStartIndex, int nWeightSet) {
d_idx_t *pDistIndex;
int nGoal, nSize;
if ((nPct <= 0) || (nPct >= 100))
return (QERR_RANGE_ERROR);
pDistIndex = find_dist(szDistName);
if (pDistIndex == NULL)
return (QERR_BAD_NAME);
if (nStartIndex > pDistIndex->length)
return (QERR_RANGE_ERROR);
nGoal = pDistIndex->dist->maximums[nWeightSet];
nGoal = nGoal * nPct / 100;
nSize = distsize(szDistName);
while (nGoal >= 0) {
nStartIndex++;
nGoal -= dist_weight(NULL, szDistName, nStartIndex % nSize, nWeightSet);
}
return (nStartIndex);
}
/*
* Routine: int dist_type(char *name, int nValueSet)
* Purpose: return the type of the n-th value set in a distribution
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
*/
int dist_type(const char *name, int nValueSet) {
d_idx_t *dist;
dist = find_dist(name);
if (dist == NULL)
return (-1);
if (nValueSet < 1 || nValueSet > dist->v_width)
return (-1);
return (dist->dist->type_vector[nValueSet - 1]);
}
/*
* Routine:
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
void dump_dist(const char *name) {
d_idx_t *pIndex;
int i, j;
char *pCharVal = NULL;
int nVal;
pIndex = find_dist(name);
if (pIndex == NULL)
ReportErrorNoLine(QERR_BAD_NAME, name, 1);
printf("create %s;\n", pIndex->name);
printf("set types = (");
for (i = 0; i < pIndex->v_width; i++) {
if (i > 0)
printf(", ");
printf("%s", dist_type(name, i + 1) == 7 ? "int" : "varchar");
}
printf(");\n");
printf("set weights = %d;\n", pIndex->w_width);
for (i = 0; i < pIndex->length; i++) {
printf("add(");
for (j = 0; j < pIndex->v_width; j++) {
if (j)
printf(", ");
if (dist_type(name, j + 1) != 7) {
dist_member(&pCharVal, name, i + 1, j + 1);
printf("\"%s\"", pCharVal);
} else {
dist_member(&nVal, name, i + 1, j + 1);
printf("%d", nVal);
}
}
printf("; ");
for (j = 0; j < pIndex->w_width; j++) {
if (j)
printf(", ");
printf("%d", dist_weight(NULL, name, i + 1, j + 1));
}
printf(");\n");
}
return;
}
/*
* Routine: dist_active(char *szName, int nWeightSet)
* Purpose: return number of entries with non-zero weght values
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int dist_active(const char *szName, int nWeightSet) {
int nSize, nResult = 0, i;
nSize = distsize(szName);
for (i = 1; i <= nSize; i++) {
if (dist_weight(NULL, szName, i, nWeightSet) != 0)
nResult += 1;
}
return (nResult);
}
/*
* Routine: DistSizeToShiftWidth(char *szDist)
* Purpose: Determine the number of bits required to select a member of the
* distribution Algorithm: Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int DistSizeToShiftWidth(const char *szDist, int nWeightSet) {
int nBits = 1, nTotal = 2, nMax;
d_idx_t *d;
d = find_dist(szDist);
nMax = dist_max(d->dist, nWeightSet);
while (nTotal < nMax) {
nBits += 1;
nTotal <<= 1;
}
return (nBits);
}
/*
* Routine:
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int MatchDistWeight(void *dest, const char *szDist, int nWeight, int nWeightSet, int ValueSet) {
d_idx_t *d;
dist_t *dist;
int index = 0, dt, i_res, nRetcode;
char *char_val;
if ((d = find_dist(szDist)) == NULL) {
char msg[80];
sprintf(msg, "Invalid distribution name '%s'", szDist);
INTERNAL(msg);
}
dist = d->dist;
nWeight %= dist->maximums[nWeightSet - 1];
while (nWeight > dist->weight_sets[nWeightSet - 1][index] && index < d->length)
index += 1;
dt = ValueSet - 1;
if (index >= d->length)
index = d->length - 1;
char_val = dist->strings + dist->value_sets[dt][index];
switch (dist->type_vector[dt]) {
case TKN_VARCHAR:
if (dest)
*(char **)dest = (char *)char_val;
break;
case TKN_INT:
i_res = atoi(char_val);
if (dest)
*(int *)dest = i_res;
break;
case TKN_DATE:
if (dest == NULL) {
dest = (date_t *)malloc(sizeof(date_t));
MALLOC_CHECK(dest);
}
strtodt(*(date_t **)dest, char_val);
break;
case TKN_DECIMAL:
if (dest == NULL) {
dest = (decimal_t *)malloc(sizeof(decimal_t));
MALLOC_CHECK(dest);
}
strtodec(*(decimal_t **)dest, char_val);
break;
}
nRetcode = 1;
index = 1;
while (index < dist->maximums[nWeightSet - 1]) {
nRetcode += 1;
index *= 2;
}
return (nRetcode);
}
/*
* Routine: findDistValue(char *szValue, char *szDistName, int nValueSet)
* Purpose: Return the row number where the entry is found
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO:
* 20031024 jms this routine needs to handle all data types, not just varchar
*/
int findDistValue(const char *szValue, const char *szDistName, int ValueSet) {
int nRetValue = 1, nDistMax;
char szDistValue[128];
nDistMax = distsize(szDistName);
for (nRetValue = 1; nRetValue < nDistMax; nRetValue++) {
dist_member(&szDistValue, szDistName, nRetValue, ValueSet);
if (strcmp(szValue, szDistValue) == 0)
break;
}
if (nRetValue <= nDistMax)
return (nRetValue);
return (-1);
}
#ifdef TEST
main() {
int i_res;
char *c_res;
decimal_t dec_res;
init_params();
dist_member(&i_res, "test_dist", 1, 1);
if (i_res != 10) {
printf("dist_member(\"test_dist\", 1, 1): %d != 10\n", i_res);
exit(1);
}
dist_member(&i_res, "test_dist", 1, 2);
if (i_res != 60) {
printf("dist_member(\"test_dist\", 1, 2): %d != 60\n", i_res);
exit(1);
}
dist_member((void *)&c_res, "test_dist", 1, 3);
if (strcmp(c_res, "El Camino")) {
printf("dist_member(\"test_dist\", 1, 3): %s != El Camino\n", c_res);
exit(1);
}
dist_member((void *)&dec_res, "test_dist", 1, 4);
if (strcmp(dec_res.number, "1") || strcmp(dec_res.fraction, "23")) {
printf("dist_member(\"test_dist\", 1, 4): %s.%s != 1.23\n", dec_res.number, dec_res.fraction);
exit(1);
}
dist_weight(&i_res, "test_dist", 2, 2);
if (3 != i_res) {
printf("dist_weight(\"test_dist\", 2, 2): %d != 3\n", i_res);
exit(1);
}
}
#endif /* TEST */