8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-24 19:23:02 +01:00
firebird-mirror/src/jrd/evl_like.cpp
2003-11-03 23:59:24 +00:00

513 lines
13 KiB
C++

#ifndef EVL_LIKE_INCLUDED_BY_EVL_CPP
#pragma message ("Do not compile this file stand-alone. It gets included by evl.cpp!")
#else // EVL_LIKE_INCLUDED_BY_EVL_CPP
/*
* PROGRAM: JRD Access Method
* MODULE: evl_like.c
* DESCRIPTION: Expression evaluation (international)
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#define SLEUTH_insensitive 1
#define COND_UPPER(obj,c) ((flags & SLEUTH_insensitive) ? (obj).to_upper(c) : (c))
#define SQL_MATCH_ONE '_'
#define SQL_MATCH_ANY '%'
#define GDML_MATCH_ONE '?'
#define GDML_MATCH_ANY '*'
#define GDML_QUOTE '@'
#define GDML_NOT '~'
#define GDML_RANGE '-'
#define GDML_CLASS_START '['
#define GDML_CLASS_END ']'
#define GDML_SUBSTITUTE '='
#define GDML_FLAG_SET '+'
#define GDML_FLAG_CLEAR '-'
#define GDML_COMMA ','
#define GDML_LPAREN '('
#define GDML_RPAREN ')'
USHORT LIKENAME(TDBB tdbb, TextType obj, LIKETYPE * p1, SSHORT l1_bytes, /* byte count */
LIKETYPE * p2, SSHORT l2_bytes, /* byte count */
UCS2_CHAR escape_char)
{
/**************************************
*
* E V L _ ? ? _ l i k e
* E V L _ w c _ l i k e
* E V L _ n c _ l i k e
*
**************************************
*
* Functional description
* Return true if a string (p1, l1) matches a given pattern (p2, l2).
* The character '_' in the pattern may match any single character
* in the the string, and the character '%' may match any sequence
* of characters.
*
* Wide char version operates on USHORT-based buffer,
* instead of UCHAR-based.
*
* Note Bene: LIKETYPE is defined by including file (evl.c) to either
* UCS2_CHAR or NCHAR, depending on the varient being compiled here.
*
* (escape_char == 0) means no escape character is specified.
*
**************************************/
LIKETYPE c;
USHORT escape;
SSHORT l1, l2;
fb_assert(p1 != NULL);
fb_assert(p2 != NULL);
fb_assert((l1_bytes % sizeof(LIKETYPE)) == 0);
fb_assert((l2_bytes % sizeof(LIKETYPE)) == 0);
l1 = l1_bytes / sizeof(LIKETYPE);
l2 = l2_bytes / sizeof(LIKETYPE);
escape = FALSE;
while (l2-- > 0) {
c = *p2++;
if (escape_char && ((UCS2_CHAR) c == escape_char)) {
if (l2-- > 0) {
c = *p2++;
/* Note: SQL II says <escape_char><escape_char> is error condition */
if (((UCS2_CHAR) c == escape_char) ||
((UCS2_CHAR) c == (UCS2_CHAR) SQL_MATCH_ANY) ||
((UCS2_CHAR) c == (UCS2_CHAR) SQL_MATCH_ONE))
escape = TRUE;
}
if (!escape)
ERR_post(gds_like_escape_invalid, 0);
}
if (!escape && (c == (LIKETYPE) SQL_MATCH_ANY)) {
while ((l2 > 0) && (*p2 == (LIKETYPE) SQL_MATCH_ANY)) {
l2--;
p2++;
}
if (l2 == 0)
return TRUE;
while (l1)
if (LIKENAME(tdbb, obj, p1++, l1-- * sizeof(LIKETYPE),
p2, l2 * sizeof(LIKETYPE), escape_char))
return TRUE;
return FALSE;
}
if (l1-- == 0)
return FALSE;
if ((escape || c != (LIKETYPE) SQL_MATCH_ONE) && c != *p1)
return FALSE;
escape = FALSE;
p1++;
}
return (l1) ? FALSE : TRUE;
}
USHORT MATCHESNAME(TDBB tdbb,
TextType obj,
MATCHESTYPE * p1,
SSHORT l1_bytes, MATCHESTYPE * p2, SSHORT l2_bytes)
{
/**************************************
*
* E V L _ ? ? _ m a t c h e s
* E V L _ w c _ m a t c h e s
* E V L _ n c _ m a t c h e s
*
**************************************
*
* Functional description
* Return true if a string (p1, l1) matches a given pattern (p2, l2).
* The character '?' in the pattern may match any single character
* in the the string, and the character '*' may match any sequence
* of characters.
*
* Wide SCHAR version operates on short-based buffer,
* instead of SCHAR-based.
*
* Matches is not a case-sensitive operation, thus it has no
* 8-bit international impact.
*
**************************************/
MATCHESTYPE c;
SSHORT l1, l2;
fb_assert(p1 != NULL);
fb_assert(p2 != NULL);
fb_assert((l1_bytes % sizeof(MATCHESTYPE)) == 0);
fb_assert((l2_bytes % sizeof(MATCHESTYPE)) == 0);
l1 = l1_bytes / sizeof(MATCHESTYPE);
l2 = l2_bytes / sizeof(MATCHESTYPE);
while (l2-- > 0) {
c = *p2++;
if (c == (MATCHESTYPE) GDML_MATCH_ANY) {
while ((l2 > 0) && (*p2 == (MATCHESTYPE) GDML_MATCH_ANY)) {
l2--;
p2++;
}
if (l2 == 0)
return TRUE;
while (l1)
if (MATCHESNAME(tdbb, obj, p1++,
l1-- * sizeof(MATCHESTYPE), p2,
l2 * sizeof(MATCHESTYPE))) return TRUE;
return FALSE;
}
if (l1-- == 0)
return FALSE;
if ((c != (MATCHESTYPE) GDML_MATCH_ONE && c != *p1))
return FALSE;
p1++;
}
return (l1) ? FALSE : TRUE;
}
USHORT SLEUTHNAME(TDBB tdbb_dummy,
TextType obj,
USHORT flags,
SLEUTHTYPE * search,
USHORT search_len, SLEUTHTYPE * match, USHORT match_len)
{
/**************************************
*
* E V L _ ? ? _ s l e u t h _ c h e c k
*
**************************************
*
* Functional description
* Evaluate the "sleuth" search operator.
*
* Turn the (pointer, byte length) input parameters into
* (pointer, end_pointer) for use in SLEUTH_AUX
*
**************************************/
SLEUTHTYPE *end_match, *end_search;
fb_assert((match_len % sizeof(SLEUTHTYPE)) == 0);
fb_assert((search_len % sizeof(SLEUTHTYPE)) == 0);
end_match = match + (match_len / sizeof(SLEUTHTYPE));
end_search = search + (search_len / sizeof(SLEUTHTYPE));
return ((USHORT)
SLEUTH_AUX(obj, flags, search, end_search, match, end_match));
}
USHORT SLEUTH_MERGE_NAME(TDBB tdbb_dummy,
TextType obj,
SLEUTHTYPE * match,
USHORT match_bytes,
SLEUTHTYPE * control,
USHORT control_bytes,
SLEUTHTYPE * combined, USHORT combined_bytes)
{
/**************************************
*
* E V L _ ? ? _ s l e u t h _ m e r g e
*
**************************************
*
* Functional description
* Merge the matching pattern and control strings to give a cannonical
* matching pattern. Return the length of the combined string.
*
* What this routine does is to take the language template, strip off
* the prefix and put it in the output string, then parse the definitions
* into an array of character pointers. The index array is the defined
* character. The routine then takes the actual match pattern and uses
* the characters in it to index into the definitions to produce an equivalent
* pattern in the cannonical language.
*
* The silly loop setting *v++ to zero initializes the array up to the
* highest character defined (also max_op). Believe it or not, that part
* is not a bug.
*
**************************************/
SLEUTHTYPE c, *comb, **v, *vector[256], **end_vector, *p, max_op,
temp[256], *t, *end_match, *end_control;
fb_assert(match != NULL);
fb_assert(control != NULL);
fb_assert(combined != NULL);
fb_assert((match_bytes % sizeof(SLEUTHTYPE)) == 0);
fb_assert((control_bytes % sizeof(SLEUTHTYPE)) == 0);
end_match = match + (match_bytes / sizeof(SLEUTHTYPE));
end_control = control + (control_bytes / sizeof(SLEUTHTYPE));
max_op = 0;
comb = combined;
v = vector;
t = temp;
/* Parse control string into substitution strings and initializing string */
while (control < end_control) {
c = *control++;
if (*control == GDML_SUBSTITUTE) {
/* Note: don't allow substitution characters larger than vector */
end_vector = vector + (((int)c < FB_NELEM(vector)) ? c : 0);
while (v <= end_vector)
*v++ = 0;
*end_vector = t;
++control;
while (control < end_control) {
c = *control++;
if ((t > temp && t[-1] == GDML_QUOTE)
|| ((c != GDML_COMMA) && (c != GDML_RPAREN)))
*t++ = c;
else
break;
}
*t++ = 0;
}
else if (c == GDML_QUOTE && control < end_control)
*comb++ = *control++;
else if (c == GDML_RPAREN)
break;
else if (c != GDML_LPAREN)
*comb++ = c;
}
max_op = v - vector;
/* Interpret matching string, substituting where appropriate */
while (match < end_match) {
c = *match++;
/* if we've got a defined character, slurp the definition */
if (c <= max_op && (p = vector[c])) {
while (*p)
*comb++ = *p++;
/* if we've got the definition of a quote character,
slurp the next character too */
if (comb > combined && comb[-1] == GDML_QUOTE && *match)
*comb++ = *match++;
}
/* at this point we've got a non-match, but as it might be one of ours,
quote it. */
else {
if ((((size_t) c) < sizeof(special)) && special[c] &&
comb > combined && comb[-1] != GDML_QUOTE)
*comb++ = GDML_QUOTE;
*comb++ = c;
}
}
/* Put in trailing stuff */
while (control < end_control)
*comb++ = *control++;
/* YYY - need to add code watching for overflow of combined */
return (comb - combined) * sizeof(SLEUTHTYPE);
}
static BOOLEAN SLEUTH_AUX(
TextType obj,
USHORT flags,
SLEUTHTYPE * search,
SLEUTHTYPE * end_search,
SLEUTHTYPE * match, SLEUTHTYPE * end_match)
{
/**************************************
*
* s l e u t h _ a u x
*
**************************************
*
* Functional description
* Evaluate the "sleuth" search operator.
*
**************************************/
SLEUTHTYPE c, d, *class_, *end_class;
fb_assert(search != NULL);
fb_assert(end_search != NULL);
fb_assert(match != NULL);
fb_assert(end_match != NULL);
fb_assert(search <= end_search);
fb_assert(match <= end_match);
while (match < end_match) {
c = *match++;
if ((c == GDML_QUOTE && (c = *match++)) ||
((((size_t) c) < sizeof(special)) && !special[c])) {
c = COND_UPPER(obj, c);
if (match >= end_match || *match != GDML_MATCH_ANY) {
if (search >= end_search)
return FALSE;
d = *search++;
if (c != COND_UPPER(obj, d))
return FALSE;
}
else {
++match;
for (;;)
if (SLEUTH_AUX
(obj, flags, search, end_search, match,
end_match)) return TRUE;
else if (search < end_search) {
d = *search++;
if (c != COND_UPPER(obj, d))
return FALSE;
}
else
return FALSE;
}
}
else if (c == GDML_MATCH_ONE)
if (match >= end_match || *match != GDML_MATCH_ANY) {
if (search >= end_search)
return FALSE;
search++;
}
else {
if (++match >= end_match)
return TRUE;
for (;;)
if (SLEUTH_AUX
(obj, flags, search, end_search, match,
end_match)) return TRUE;
else if (++search >= end_search)
return FALSE;
}
else if (c == GDML_CLASS_START) {
class_ = match;
while (*match++ != GDML_CLASS_END)
if (match >= end_match)
return FALSE;
end_class = match - 1;
if (match >= end_match || *match != GDML_MATCH_ANY) {
if (!SLEUTH_CLASS_NAME
(obj, flags, class_, end_class, *search++))
return FALSE;
}
else {
++match;
for (;;)
if (SLEUTH_AUX
(obj, flags, search, end_search, match,
end_match)) return TRUE;
else if (search < end_search) {
if (!SLEUTH_CLASS_NAME
(obj, flags, class_, end_class, *search++))
return FALSE;
}
else
return FALSE;
}
}
else if (c == GDML_FLAG_SET) {
c = *match++;
if (c == 's' || c == 'S')
flags &= ~SLEUTH_insensitive;
}
else if (c == GDML_FLAG_CLEAR) {
c = *match++;
if (c == 's' || c == 'S')
flags |= SLEUTH_insensitive;
}
}
if (search < end_search)
return FALSE;
return TRUE;
}
static BOOLEAN SLEUTH_CLASS_NAME(
TextType obj,
USHORT flags,
SLEUTHTYPE * class_,
SLEUTHTYPE * end_class, SLEUTHTYPE character)
{
/**************************************
*
* s l e u t h _ c l a s s
*
**************************************
*
* Functional description
* See if a character is a member of a class.
* Japanese version operates on short-based buffer,
* instead of SCHAR-based.
*
**************************************/
SLEUTHTYPE c;
USHORT result;
fb_assert(class_ != NULL);
fb_assert(end_class != NULL);
fb_assert(class_ <= end_class);
result = TRUE;
character = COND_UPPER(obj, character);
if (*class_ == GDML_NOT) {
++class_;
result = FALSE;
}
while (class_ < end_class) {
c = *class_++;
if (c == GDML_QUOTE) {
if (*class_++ == character)
return TRUE;
}
else if (*class_ == GDML_RANGE) {
class_ += 2;
if (character >= c && character <= class_[-1])
return result;
}
else if (character == c)
return result;
}
return (result) ? FALSE : TRUE;
}
#endif // EVL_LIKE_INCLUDED_BY_EVL_CPP