263 lines
8.4 KiB
C
263 lines
8.4 KiB
C
#include "stripped_sqlite_int.h"
|
|
#include <ctype.h>
|
|
|
|
#define sqlite3Toupper(x) toupper((unsigned char)(x))
|
|
#define sqlite3Tolower(x) tolower((unsigned char)(x))
|
|
|
|
/*
|
|
** This lookup table is used to help decode the first byte of
|
|
** a multi-byte UTF8 character.
|
|
*/
|
|
static const unsigned char sqlite3Utf8Trans1[] = {
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
|
|
};
|
|
|
|
u32 sqlite3Utf8Read(const unsigned char **pz /* Pointer to string from which to read char */
|
|
) {
|
|
unsigned int c;
|
|
|
|
/* Same as READ_UTF8() above but without the zTerm parameter.
|
|
** For this routine, we assume the UTF8 string is always zero-terminated.
|
|
*/
|
|
c = *((*pz)++);
|
|
if (c >= 0xc0) {
|
|
c = sqlite3Utf8Trans1[c - 0xc0];
|
|
while ((*(*pz) & 0xc0) == 0x80) {
|
|
c = (c << 6) + (0x3f & *((*pz)++));
|
|
}
|
|
if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) {
|
|
c = 0xFFFD;
|
|
}
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/*
|
|
** A structure defining how to do GLOB-style comparisons.
|
|
*/
|
|
struct compareInfo {
|
|
u8 matchAll; /* "*" or "%" */
|
|
u8 matchOne; /* "?" or "_" */
|
|
u8 matchSet; /* "[" or 0 */
|
|
u8 noCase; /* true to ignore case differences */
|
|
};
|
|
|
|
/*
|
|
** For LIKE and GLOB matching on EBCDIC machines, assume that every
|
|
** character is exactly one byte in size. Also, provde the Utf8Read()
|
|
** macro for fast reading of the next character in the common case where
|
|
** the next character is ASCII.
|
|
*/
|
|
#define Utf8Read(A) (A[0] < 0x80 ? *(A++) : sqlite3Utf8Read(&A))
|
|
|
|
static const struct compareInfo globInfo = {'*', '?', '[', 0};
|
|
/* The correct SQL-92 behavior is for the LIKE operator to ignore
|
|
** case. Thus 'a' LIKE 'A' would be true. */
|
|
static const struct compareInfo likeInfoNorm = {'%', '_', 0, 1};
|
|
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
|
|
** is case sensitive causing 'a' LIKE 'A' to be false */
|
|
// static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
|
|
|
|
/*
|
|
** Possible error returns from patternMatch()
|
|
*/
|
|
#define SQLITE_MATCH 0
|
|
#define SQLITE_NOMATCH 1
|
|
#define SQLITE_NOWILDCARDMATCH 2
|
|
|
|
/*
|
|
** Compare two UTF-8 strings for equality where the first string is
|
|
** a GLOB or LIKE expression. Return values:
|
|
**
|
|
** SQLITE_MATCH: Match
|
|
** SQLITE_NOMATCH: No match
|
|
** SQLITE_NOWILDCARDMATCH: No match in spite of having * or % wildcards.
|
|
**
|
|
** Globbing rules:
|
|
**
|
|
** '*' Matches any sequence of zero or more characters.
|
|
**
|
|
** '?' Matches exactly one character.
|
|
**
|
|
** [...] Matches one character from the enclosed list of
|
|
** characters.
|
|
**
|
|
** [^...] Matches one character not in the enclosed list.
|
|
**
|
|
** With the [...] and [^...] matching, a ']' character can be included
|
|
** in the list by making it the first character after '[' or '^'. A
|
|
** range of characters can be specified using '-'. Example:
|
|
** "[a-z]" matches any single lower-case letter. To match a '-', make
|
|
** it the last character in the list.
|
|
**
|
|
** Like matching rules:
|
|
**
|
|
** '%' Matches any sequence of zero or more characters
|
|
**
|
|
*** '_' Matches any one character
|
|
**
|
|
** Ec Where E is the "esc" character and c is any other
|
|
** character, including '%', '_', and esc, match exactly c.
|
|
**
|
|
** The comments within this routine usually assume glob matching.
|
|
**
|
|
** This routine is usually quick, but can be N**2 in the worst case.
|
|
*/
|
|
static int patternCompare(const u8 *zPattern, /* The glob pattern */
|
|
const u8 *zString, /* The string to compare against the glob */
|
|
const struct compareInfo *pInfo, /* Information about how to do the compare */
|
|
uint32_t matchOther /* The escape char (LIKE) or '[' (GLOB) */
|
|
) {
|
|
uint32_t c, c2; /* Next pattern and input string chars */
|
|
uint32_t matchOne = pInfo->matchOne; /* "?" or "_" */
|
|
uint32_t matchAll = pInfo->matchAll; /* "*" or "%" */
|
|
u8 noCase = pInfo->noCase; /* True if uppercase==lowercase */
|
|
const u8 *zEscaped = 0; /* One past the last escaped input char */
|
|
|
|
while ((c = Utf8Read(zPattern)) != 0) {
|
|
if (c == matchAll) { /* Match "*" */
|
|
/* Skip over multiple "*" characters in the pattern. If there
|
|
** are also "?" characters, skip those as well, but consume a
|
|
** single character of the input string for each "?" skipped */
|
|
while ((c = Utf8Read(zPattern)) == matchAll || c == matchOne) {
|
|
if (c == matchOne && sqlite3Utf8Read(&zString) == 0) {
|
|
return SQLITE_NOWILDCARDMATCH;
|
|
}
|
|
}
|
|
if (c == 0) {
|
|
return SQLITE_MATCH; /* "*" at the end of the pattern matches */
|
|
} else if (c == matchOther) {
|
|
if (pInfo->matchSet == 0) {
|
|
c = sqlite3Utf8Read(&zPattern);
|
|
if (c == 0)
|
|
return SQLITE_NOWILDCARDMATCH;
|
|
} else {
|
|
/* "[...]" immediately follows the "*". We have to do a slow
|
|
** recursive search in this case, but it is an unusual case. */
|
|
assert(matchOther < 0x80); /* '[' is a single-byte character */
|
|
while (*zString) {
|
|
int bMatch = patternCompare(&zPattern[-1], zString, pInfo, matchOther);
|
|
if (bMatch != SQLITE_NOMATCH)
|
|
return bMatch;
|
|
SQLITE_SKIP_UTF8(zString);
|
|
}
|
|
return SQLITE_NOWILDCARDMATCH;
|
|
}
|
|
}
|
|
|
|
/* At this point variable c contains the first character of the
|
|
** pattern string past the "*". Search in the input string for the
|
|
** first matching character and recursively continue the match from
|
|
** that point.
|
|
**
|
|
** For a case-insensitive search, set variable cx to be the same as
|
|
** c but in the other case and search the input string for either
|
|
** c or cx.
|
|
*/
|
|
if (c <= 0x80) {
|
|
char zStop[3];
|
|
int bMatch;
|
|
if (noCase) {
|
|
zStop[0] = sqlite3Toupper(c);
|
|
zStop[1] = sqlite3Tolower(c);
|
|
zStop[2] = 0;
|
|
} else {
|
|
zStop[0] = c;
|
|
zStop[1] = 0;
|
|
}
|
|
while (1) {
|
|
zString += strcspn((const char *)zString, zStop);
|
|
if (zString[0] == 0)
|
|
break;
|
|
zString++;
|
|
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
|
|
if (bMatch != SQLITE_NOMATCH)
|
|
return bMatch;
|
|
}
|
|
} else {
|
|
int bMatch;
|
|
while ((c2 = Utf8Read(zString)) != 0) {
|
|
if (c2 != c)
|
|
continue;
|
|
bMatch = patternCompare(zPattern, zString, pInfo, matchOther);
|
|
if (bMatch != SQLITE_NOMATCH)
|
|
return bMatch;
|
|
}
|
|
}
|
|
return SQLITE_NOWILDCARDMATCH;
|
|
}
|
|
if (c == matchOther) {
|
|
if (pInfo->matchSet == 0) {
|
|
c = sqlite3Utf8Read(&zPattern);
|
|
if (c == 0)
|
|
return SQLITE_NOMATCH;
|
|
zEscaped = zPattern;
|
|
} else {
|
|
uint32_t prior_c = 0;
|
|
int seen = 0;
|
|
int invert = 0;
|
|
c = sqlite3Utf8Read(&zString);
|
|
if (c == 0)
|
|
return SQLITE_NOMATCH;
|
|
c2 = sqlite3Utf8Read(&zPattern);
|
|
if (c2 == '^') {
|
|
invert = 1;
|
|
c2 = sqlite3Utf8Read(&zPattern);
|
|
}
|
|
if (c2 == ']') {
|
|
if (c == ']')
|
|
seen = 1;
|
|
c2 = sqlite3Utf8Read(&zPattern);
|
|
}
|
|
while (c2 && c2 != ']') {
|
|
if (c2 == '-' && zPattern[0] != ']' && zPattern[0] != 0 && prior_c > 0) {
|
|
c2 = sqlite3Utf8Read(&zPattern);
|
|
if (c >= prior_c && c <= c2)
|
|
seen = 1;
|
|
prior_c = 0;
|
|
} else {
|
|
if (c == c2) {
|
|
seen = 1;
|
|
}
|
|
prior_c = c2;
|
|
}
|
|
c2 = sqlite3Utf8Read(&zPattern);
|
|
}
|
|
if (c2 == 0 || (seen ^ invert) == 0) {
|
|
return SQLITE_NOMATCH;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
c2 = Utf8Read(zString);
|
|
if (c == c2)
|
|
continue;
|
|
if (noCase && sqlite3Tolower(c) == sqlite3Tolower(c2) && c < 0x80 && c2 < 0x80) {
|
|
continue;
|
|
}
|
|
if (c == matchOne && zPattern != zEscaped && c2 != 0)
|
|
continue;
|
|
return SQLITE_NOMATCH;
|
|
}
|
|
return *zString == 0 ? SQLITE_MATCH : SQLITE_NOMATCH;
|
|
}
|
|
|
|
/*
|
|
** The sqlite3_strglob() interface. Return 0 on a match (like strcmp()) and
|
|
** non-zero if there is no match.
|
|
*/
|
|
int sqlite3_strglob(const char *zGlobPattern, const char *zString) {
|
|
return patternCompare((u8 *)zGlobPattern, (u8 *)zString, &globInfo, '[');
|
|
}
|
|
|
|
/*
|
|
** The sqlite3_strlike() interface. Return 0 on a match and non-zero for
|
|
** a miss - like strcmp().
|
|
*/
|
|
int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc) {
|
|
return patternCompare((u8 *)zPattern, (u8 *)zStr, &likeInfoNorm, esc);
|
|
}
|