Files
email-tracker/external/duckdb/tools/sqlite3_api_wrapper/sqlite3/strglob.c
2025-10-24 19:21:19 -05:00

263 lines
8.4 KiB
C

#include "stripped_sqlite_int.h"
#include <ctype.h>
#define sqlite3Toupper(x) toupper((unsigned char)(x))
#define sqlite3Tolower(x) tolower((unsigned char)(x))
/*
** This lookup table is used to help decode the first byte of
** a multi-byte UTF8 character.
*/
static const unsigned char sqlite3Utf8Trans1[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
u32 sqlite3Utf8Read(const unsigned char **pz /* Pointer to string from which to read char */
) {
unsigned int c;
/* Same as READ_UTF8() above but without the zTerm parameter.
** For this routine, we assume the UTF8 string is always zero-terminated.
*/
c = *((*pz)++);
if (c >= 0xc0) {
c = sqlite3Utf8Trans1[c - 0xc0];
while ((*(*pz) & 0xc0) == 0x80) {
c = (c << 6) + (0x3f & *((*pz)++));
}
if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) {
c = 0xFFFD;
}
}
return c;
}
/*
** A structure defining how to do GLOB-style comparisons.
*/
struct compareInfo {
u8 matchAll; /* "*" or "%" */
u8 matchOne; /* "?" or "_" */
u8 matchSet; /* "[" or 0 */
u8 noCase; /* true to ignore case differences */
};
/*
** For LIKE and GLOB matching on EBCDIC machines, assume that every
** character is exactly one byte in size. Also, provde the Utf8Read()
** macro for fast reading of the next character in the common case where
** the next character is ASCII.
*/
#define Utf8Read(A) (A[0] < 0x80 ? *(A++) : sqlite3Utf8Read(&A))
static const struct compareInfo globInfo = {'*', '?', '[', 0};
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case. Thus 'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = {'%', '_', 0, 1};
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
// static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
/*
** Possible error returns from patternMatch()
*/
#define SQLITE_MATCH 0
#define SQLITE_NOMATCH 1
#define SQLITE_NOWILDCARDMATCH 2
/*
** Compare two UTF-8 strings for equality where the first string is
** a GLOB or LIKE expression. Return values:
**
** SQLITE_MATCH: Match
** SQLITE_NOMATCH: No match
** SQLITE_NOWILDCARDMATCH: No match in spite of having * or % wildcards.
**
** Globbing rules:
**
** '*' Matches any sequence of zero or more characters.
**
** '?' Matches exactly one character.
**
** [...] Matches one character from the enclosed list of
** characters.
**
** [^...] Matches one character not in the enclosed list.
**
** With the [...] and [^...] matching, a ']' character can be included
** in the list by making it the first character after '[' or '^'. A
** range of characters can be specified using '-'. Example:
** "[a-z]" matches any single lower-case letter. To match a '-', make
** it the last character in the list.
**
** Like matching rules:
**
** '%' Matches any sequence of zero or more characters
**
*** '_' Matches any one character
**
** Ec Where E is the "esc" character and c is any other
** character, including '%', '_', and esc, match exactly c.
**
** The comments within this routine usually assume glob matching.
**
** This routine is usually quick, but can be N**2 in the worst case.
*/
static int patternCompare(const u8 *zPattern, /* The glob pattern */
const u8 *zString, /* The string to compare against the glob */
const struct compareInfo *pInfo, /* Information about how to do the compare */
uint32_t matchOther /* The escape char (LIKE) or '[' (GLOB) */
) {
uint32_t c, c2; /* Next pattern and input string chars */
uint32_t matchOne = pInfo->matchOne; /* "?" or "_" */
uint32_t matchAll = pInfo->matchAll; /* "*" or "%" */
u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */
const u8 *zEscaped = 0; /* One past the last escaped input char */
while ((c = Utf8Read(zPattern)) != 0) {
if (c == matchAll) { /* Match "*" */
/* Skip over multiple "*" characters in the pattern. If there
** are also "?" characters, skip those as well, but consume a
** single character of the input string for each "?" skipped */
while ((c = Utf8Read(zPattern)) == matchAll || c == matchOne) {
if (c == matchOne && sqlite3Utf8Read(&zString) == 0) {
return SQLITE_NOWILDCARDMATCH;
}
}
if (c == 0) {
return SQLITE_MATCH; /* "*" at the end of the pattern matches */
} else if (c == matchOther) {
if (pInfo->matchSet == 0) {
c = sqlite3Utf8Read(&zPattern);
if (c == 0)
return SQLITE_NOWILDCARDMATCH;
} else {
/* "[...]" immediately follows the "*". We have to do a slow
** recursive search in this case, but it is an unusual case. */
assert(matchOther < 0x80); /* '[' is a single-byte character */
while (*zString) {
int bMatch = patternCompare(&zPattern[-1], zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
SQLITE_SKIP_UTF8(zString);
}
return SQLITE_NOWILDCARDMATCH;
}
}
/* At this point variable c contains the first character of the
** pattern string past the "*". Search in the input string for the
** first matching character and recursively continue the match from
** that point.
**
** For a case-insensitive search, set variable cx to be the same as
** c but in the other case and search the input string for either
** c or cx.
*/
if (c <= 0x80) {
char zStop[3];
int bMatch;
if (noCase) {
zStop[0] = sqlite3Toupper(c);
zStop[1] = sqlite3Tolower(c);
zStop[2] = 0;
} else {
zStop[0] = c;
zStop[1] = 0;
}
while (1) {
zString += strcspn((const char *)zString, zStop);
if (zString[0] == 0)
break;
zString++;
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
}
} else {
int bMatch;
while ((c2 = Utf8Read(zString)) != 0) {
if (c2 != c)
continue;
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
if (bMatch != SQLITE_NOMATCH)
return bMatch;
}
}
return SQLITE_NOWILDCARDMATCH;
}
if (c == matchOther) {
if (pInfo->matchSet == 0) {
c = sqlite3Utf8Read(&zPattern);
if (c == 0)
return SQLITE_NOMATCH;
zEscaped = zPattern;
} else {
uint32_t prior_c = 0;
int seen = 0;
int invert = 0;
c = sqlite3Utf8Read(&zString);
if (c == 0)
return SQLITE_NOMATCH;
c2 = sqlite3Utf8Read(&zPattern);
if (c2 == '^') {
invert = 1;
c2 = sqlite3Utf8Read(&zPattern);
}
if (c2 == ']') {
if (c == ']')
seen = 1;
c2 = sqlite3Utf8Read(&zPattern);
}
while (c2 && c2 != ']') {
if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c > 0) {
c2 = sqlite3Utf8Read(&zPattern);
if (c >= prior_c && c <= c2)
seen = 1;
prior_c = 0;
} else {
if (c == c2) {
seen = 1;
}
prior_c = c2;
}
c2 = sqlite3Utf8Read(&zPattern);
}
if (c2 == 0 || (seen ^ invert) == 0) {
return SQLITE_NOMATCH;
}
continue;
}
}
c2 = Utf8Read(zString);
if (c == c2)
continue;
if (noCase && sqlite3Tolower(c) == sqlite3Tolower(c2) && c < 0x80 && c2 < 0x80) {
continue;
}
if (c == matchOne && zPattern != zEscaped && c2 != 0)
continue;
return SQLITE_NOMATCH;
}
return *zString == 0 ? SQLITE_MATCH : SQLITE_NOMATCH;
}
/*
** The sqlite3_strglob() interface. Return 0 on a match (like strcmp()) and
** non-zero if there is no match.
*/
int sqlite3_strglob(const char *zGlobPattern, const char *zString) {
return patternCompare((u8 *)zGlobPattern, (u8 *)zString, &globInfo, '[');
}
/*
** The sqlite3_strlike() interface. Return 0 on a match and non-zero for
** a miss - like strcmp().
*/
int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc) {
return patternCompare((u8 *)zPattern, (u8 *)zStr, &likeInfoNorm, esc);
}