mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-23 04:43:03 +01:00
Fix #6873 - SIMILAR TO does not use index when pattern starts with non-wildcard character
(in contrary to LIKE).
This commit is contained in:
parent
23607dc3cf
commit
d2addfa578
@ -142,31 +142,6 @@ namespace
|
||||
return c == '*' || c == '+' || c == '?' || c == '{';
|
||||
}
|
||||
|
||||
bool isSpecial(UChar32 c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '^':
|
||||
case '-':
|
||||
case '_':
|
||||
case '%':
|
||||
case '[':
|
||||
case ']':
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '?':
|
||||
case '+':
|
||||
case '*':
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isRe2Special(UChar32 c)
|
||||
{
|
||||
switch (c)
|
||||
@ -401,7 +376,7 @@ namespace
|
||||
charSavePos = patternPos;
|
||||
c = getPatternChar();
|
||||
|
||||
if (!(c == escapeChar || isSpecial(c)))
|
||||
if (!(c == escapeChar || SimilarToRegex::isSpecialChar(c)))
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
else
|
||||
@ -470,7 +445,7 @@ namespace
|
||||
charSavePos = patternPos;
|
||||
c = getPatternChar();
|
||||
|
||||
if (!(c == escapeChar || isSpecial(c)))
|
||||
if (!(c == escapeChar || SimilarToRegex::isSpecialChar(c)))
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
|
||||
@ -638,12 +613,12 @@ namespace
|
||||
charSavePos = patternPos;
|
||||
op = getPatternChar();
|
||||
|
||||
if (!isSpecial(op) && op != escapeChar)
|
||||
if (!SimilarToRegex::isSpecialChar(op) && op != escapeChar)
|
||||
status_exception::raise(Arg::Gds(isc_escape_invalid));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isSpecial(op))
|
||||
if (SimilarToRegex::isSpecialChar(op))
|
||||
{
|
||||
controlChar = true;
|
||||
patternPos = charSavePos;
|
||||
|
@ -51,6 +51,31 @@ public:
|
||||
const char* patternStr, unsigned patternLen, const char* escapeStr, unsigned escapeLen);
|
||||
~SimilarToRegex();
|
||||
|
||||
public:
|
||||
static bool isSpecialChar(ULONG c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '^':
|
||||
case '-':
|
||||
case '_':
|
||||
case '%':
|
||||
case '[':
|
||||
case ']':
|
||||
case '(':
|
||||
case ')':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '?':
|
||||
case '+':
|
||||
case '*':
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
static void finalize(SimilarToRegex* self);
|
||||
|
||||
|
149
src/jrd/opt.cpp
149
src/jrd/opt.cpp
@ -352,7 +352,7 @@ static bool map_equal(const ValueExprNode*, const ValueExprNode*, const MapNode*
|
||||
static void mark_indices(CompilerScratch::csb_repeat* csbTail, SSHORT relationId);
|
||||
static bool node_equality(const ValueExprNode*, const ValueExprNode*);
|
||||
static bool node_equality(const BoolExprNode*, const BoolExprNode*);
|
||||
static ValueExprNode* optimize_like(thread_db*, CompilerScratch*, ComparativeBoolNode*);
|
||||
static ValueExprNode* optimize_like_similar(thread_db*, CompilerScratch*, ComparativeBoolNode*);
|
||||
static USHORT river_count(USHORT count, ValueExprNode** eq_class);
|
||||
static bool search_stack(const ValueExprNode*, const ValueExprNodeStack&);
|
||||
static void set_direction(SortNode*, SortNode*);
|
||||
@ -1445,12 +1445,13 @@ static SLONG decompose(thread_db* tdbb, BoolExprNode* boolNode, BoolExprNodeStac
|
||||
return 2;
|
||||
}
|
||||
|
||||
// turn a LIKE into a LIKE and a STARTING WITH, if it starts
|
||||
// turn a LIKE/SIMILAR into a LIKE/SIMILAR and a STARTING WITH, if it starts
|
||||
// with anything other than a pattern-matching character
|
||||
|
||||
ValueExprNode* arg;
|
||||
|
||||
if (cmpNode->blrOp == blr_like && (arg = optimize_like(tdbb, csb, cmpNode)))
|
||||
if ((cmpNode->blrOp == blr_like || cmpNode->blrOp == blr_similar) &&
|
||||
(arg = optimize_like_similar(tdbb, csb, cmpNode)))
|
||||
{
|
||||
ComparativeBoolNode* newCmpNode = FB_NEW_POOL(csb->csb_pool) ComparativeBoolNode(
|
||||
csb->csb_pool, blr_starting);
|
||||
@ -3389,81 +3390,80 @@ static bool node_equality(const BoolExprNode* node1, const BoolExprNode* node2)
|
||||
}
|
||||
|
||||
|
||||
static ValueExprNode* optimize_like(thread_db* tdbb, CompilerScratch* csb, ComparativeBoolNode* like_node)
|
||||
static ValueExprNode* optimize_like_similar(thread_db* tdbb, CompilerScratch* csb, ComparativeBoolNode* cmpNode)
|
||||
{
|
||||
/**************************************
|
||||
*
|
||||
* o p t i m i z e _ l i k e
|
||||
* o p t i m i z e _ l i k e _ s i m i l a r
|
||||
*
|
||||
**************************************
|
||||
*
|
||||
* Functional description
|
||||
* Optimize a LIKE expression, if possible,
|
||||
* into a "starting with" AND a "like". This
|
||||
* Optimize a LIKE/SIMILAR expression, if possible,
|
||||
* into a "starting with" AND a "LIKE/SIMILAR". This
|
||||
* will allow us to use the index for the
|
||||
* starting with, and the LIKE can just tag
|
||||
* starting with, and the LIKE/SIMILAR can just tag
|
||||
* along for the ride.
|
||||
* But on the ride it does useful work, consider
|
||||
* match LIKE "ab%c". This is optimized by adding
|
||||
* AND starting_with "ab", but the LIKE clause is
|
||||
* match LIKE/SIMILAR "ab%c". This is optimized by adding
|
||||
* AND starting_with "ab", but the LIKE/SIMILAR clause is
|
||||
* still needed.
|
||||
*
|
||||
**************************************/
|
||||
SET_TDBB(tdbb);
|
||||
|
||||
ValueExprNode* match_node = like_node->arg1;
|
||||
ValueExprNode* pattern_node = like_node->arg2;
|
||||
ValueExprNode* escape_node = like_node->arg3;
|
||||
ValueExprNode* matchNode = cmpNode->arg1;
|
||||
ValueExprNode* patternNode = cmpNode->arg2;
|
||||
ValueExprNode* escapeNode = cmpNode->arg3;
|
||||
|
||||
// if the pattern string or the escape string can't be
|
||||
// evaluated at compile time, forget it
|
||||
if (!nodeIs<LiteralNode>(pattern_node) || (escape_node && !nodeIs<LiteralNode>(escape_node)))
|
||||
return NULL;
|
||||
if (!nodeIs<LiteralNode>(patternNode) || (escapeNode && !nodeIs<LiteralNode>(escapeNode)))
|
||||
return nullptr;
|
||||
|
||||
dsc match_desc;
|
||||
match_node->getDesc(tdbb, csb, &match_desc);
|
||||
dsc matchDesc;
|
||||
matchNode->getDesc(tdbb, csb, &matchDesc);
|
||||
|
||||
dsc* pattern_desc = &nodeAs<LiteralNode>(pattern_node)->litDesc;
|
||||
dsc* escape_desc = NULL;
|
||||
dsc* patternDesc = &nodeAs<LiteralNode>(patternNode)->litDesc;
|
||||
dsc* escapeDesc = nullptr;
|
||||
|
||||
if (escape_node)
|
||||
escape_desc = &nodeAs<LiteralNode>(escape_node)->litDesc;
|
||||
if (escapeNode)
|
||||
escapeDesc = &nodeAs<LiteralNode>(escapeNode)->litDesc;
|
||||
|
||||
// if either is not a character expression, forget it
|
||||
if ((match_desc.dsc_dtype > dtype_any_text) ||
|
||||
(pattern_desc->dsc_dtype > dtype_any_text) ||
|
||||
(escape_node && escape_desc->dsc_dtype > dtype_any_text))
|
||||
if ((matchDesc.dsc_dtype > dtype_any_text) ||
|
||||
(patternDesc->dsc_dtype > dtype_any_text) ||
|
||||
(escapeNode && escapeDesc->dsc_dtype > dtype_any_text))
|
||||
{
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TextType* matchTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(&match_desc));
|
||||
TextType* matchTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(&matchDesc));
|
||||
CharSet* matchCharset = matchTextType->getCharSet();
|
||||
TextType* patternTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(pattern_desc));
|
||||
TextType* patternTextType = INTL_texttype_lookup(tdbb, INTL_TTYPE(patternDesc));
|
||||
CharSet* patternCharset = patternTextType->getCharSet();
|
||||
|
||||
if (cmpNode->blrOp == blr_like)
|
||||
{
|
||||
UCHAR escape_canonic[sizeof(ULONG)];
|
||||
UCHAR first_ch[sizeof(ULONG)];
|
||||
ULONG first_len;
|
||||
UCHAR* p;
|
||||
USHORT p_count;
|
||||
MoveBuffer escapeBuffer;
|
||||
|
||||
// Get the escape character, if any
|
||||
if (escape_node)
|
||||
if (escapeNode)
|
||||
{
|
||||
// Ensure escape string is same character set as match string
|
||||
|
||||
MoveBuffer escape_buffer;
|
||||
|
||||
p_count = MOV_make_string2(tdbb, escape_desc, INTL_TTYPE(&match_desc), &p, escape_buffer);
|
||||
p_count = MOV_make_string2(tdbb, escapeDesc, INTL_TTYPE(&matchDesc), &p, escapeBuffer);
|
||||
|
||||
first_len = matchCharset->substring(p_count, p, sizeof(first_ch), first_ch, 0, 1);
|
||||
matchTextType->canonical(first_len, p, sizeof(escape_canonic), escape_canonic);
|
||||
}
|
||||
|
||||
MoveBuffer pattern_buffer;
|
||||
|
||||
p_count = MOV_make_string2(tdbb, pattern_desc, INTL_TTYPE(&match_desc), &p, pattern_buffer);
|
||||
MoveBuffer patternBuffer;
|
||||
p_count = MOV_make_string2(tdbb, patternDesc, INTL_TTYPE(&matchDesc), &p, patternBuffer);
|
||||
|
||||
first_len = matchCharset->substring(p_count, p, sizeof(first_ch), first_ch, 0, 1);
|
||||
|
||||
@ -3473,26 +3473,24 @@ static ValueExprNode* optimize_like(thread_db* tdbb, CompilerScratch* csb, Compa
|
||||
const BYTE canWidth = matchTextType->getCanonicalWidth();
|
||||
|
||||
const UCHAR* matchOneChar = matchCharset->getSqlMatchOneLength() != 0 ?
|
||||
matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ONE) : NULL;
|
||||
matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ONE) : nullptr;
|
||||
const UCHAR* matchAnyChar = matchCharset->getSqlMatchAnyLength() != 0 ?
|
||||
matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ANY) : NULL;
|
||||
matchTextType->getCanonicalChar(TextType::CHAR_SQL_MATCH_ANY) : nullptr;
|
||||
|
||||
// If the first character is a wildcard char, forget it.
|
||||
if ((!escape_node || memcmp(first_canonic, escape_canonic, canWidth) != 0) &&
|
||||
if ((!escapeNode || memcmp(first_canonic, escape_canonic, canWidth) != 0) &&
|
||||
((matchOneChar && memcmp(first_canonic, matchOneChar, canWidth) == 0) ||
|
||||
(matchAnyChar && memcmp(first_canonic, matchAnyChar, canWidth) == 0)))
|
||||
{
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// allocate a literal node to store the starting with string;
|
||||
// assume it will be shorter than the pattern string
|
||||
// CVC: This assumption may not be true if we use "value like field".
|
||||
|
||||
LiteralNode* literal = FB_NEW_POOL(csb->csb_pool) LiteralNode(csb->csb_pool);
|
||||
literal->litDesc = *pattern_desc;
|
||||
UCHAR* q = literal->litDesc.dsc_address = FB_NEW_POOL(csb->csb_pool)
|
||||
UCHAR[literal->litDesc.dsc_length];
|
||||
literal->litDesc = *patternDesc;
|
||||
UCHAR* q = literal->litDesc.dsc_address = FB_NEW_POOL(csb->csb_pool) UCHAR[literal->litDesc.dsc_length];
|
||||
|
||||
// Set the string length to point till the first wildcard character.
|
||||
|
||||
@ -3504,12 +3502,11 @@ static ValueExprNode* optimize_like(thread_db* tdbb, CompilerScratch* csb, Compa
|
||||
|
||||
for (const UCHAR* patternPtr = patternCanonical.begin(); patternPtr < patternCanonical.end(); )
|
||||
{
|
||||
// if there are escape characters, skip past them and
|
||||
// don't treat the next char as a wildcard
|
||||
// if there are escape characters, skip past them and don't treat the next char as a wildcard
|
||||
const UCHAR* patternPtrStart = patternPtr;
|
||||
patternPtr += canWidth;
|
||||
|
||||
if (escape_node && (memcmp(patternPtrStart, escape_canonic, canWidth) == 0))
|
||||
if (escapeNode && (memcmp(patternPtrStart, escape_canonic, canWidth) == 0))
|
||||
{
|
||||
// Check for Escape character at end of string
|
||||
if (!(patternPtr < patternCanonical.end()))
|
||||
@ -3524,8 +3521,8 @@ static ValueExprNode* optimize_like(thread_db* tdbb, CompilerScratch* csb, Compa
|
||||
break;
|
||||
}
|
||||
|
||||
q += patternCharset->substring(pattern_desc->dsc_length,
|
||||
pattern_desc->dsc_address,
|
||||
q += patternCharset->substring(patternDesc->dsc_length,
|
||||
patternDesc->dsc_address,
|
||||
literal->litDesc.dsc_length - (q - literal->litDesc.dsc_address), q,
|
||||
(patternPtrStart - patternCanonical.begin()) / canWidth, 1);
|
||||
}
|
||||
@ -3533,6 +3530,62 @@ static ValueExprNode* optimize_like(thread_db* tdbb, CompilerScratch* csb, Compa
|
||||
literal->litDesc.dsc_length = q - literal->litDesc.dsc_address;
|
||||
|
||||
return literal;
|
||||
}
|
||||
else
|
||||
{
|
||||
fb_assert(cmpNode->blrOp == blr_similar);
|
||||
|
||||
MoveBuffer escapeBuffer;
|
||||
UCHAR* escapeStart = nullptr;
|
||||
ULONG escapeLen = 0;
|
||||
|
||||
// Get the escape character, if any
|
||||
if (escapeNode)
|
||||
{
|
||||
// Ensure escape string is same character set as match string
|
||||
escapeLen = MOV_make_string2(tdbb, escapeDesc, INTL_TTYPE(&matchDesc), &escapeStart, escapeBuffer);
|
||||
}
|
||||
|
||||
MoveBuffer patternBuffer;
|
||||
UCHAR* patternStart;
|
||||
ULONG patternLen = MOV_make_string2(tdbb, patternDesc, INTL_TTYPE(&matchDesc), &patternStart, patternBuffer);
|
||||
const auto patternEnd = patternStart + patternLen;
|
||||
const UCHAR* patternPtr = patternStart;
|
||||
|
||||
MoveBuffer prefixBuffer;
|
||||
ULONG charLen = 0;
|
||||
|
||||
while (IntlUtil::readOneChar(matchCharset, &patternPtr, patternEnd, &charLen))
|
||||
{
|
||||
if (escapeNode && charLen == escapeLen && memcmp(patternPtr, escapeStart, escapeLen) == 0)
|
||||
{
|
||||
if (!IntlUtil::readOneChar(matchCharset, &patternPtr, patternEnd, &charLen) ||
|
||||
!((charLen == escapeLen && memcmp(patternPtr, escapeStart, escapeLen) == 0) ||
|
||||
(charLen == 1 && SimilarToRegex::isSpecialChar(*patternPtr))))
|
||||
{
|
||||
// Invalid escape.
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
else if (charLen == 1 && SimilarToRegex::isSpecialChar(*patternPtr))
|
||||
break;
|
||||
|
||||
prefixBuffer.push(patternPtr, charLen);
|
||||
}
|
||||
|
||||
if (prefixBuffer.isEmpty())
|
||||
return nullptr;
|
||||
|
||||
// Allocate a literal node to store the starting with string.
|
||||
// Use the match text type as the pattern string is converted to it.
|
||||
|
||||
LiteralNode* literal = FB_NEW_POOL(csb->csb_pool) LiteralNode(csb->csb_pool);
|
||||
literal->litDesc.makeText(prefixBuffer.getCount(), INTL_TTYPE(&matchDesc),
|
||||
FB_NEW_POOL(csb->csb_pool) UCHAR[prefixBuffer.getCount()]);
|
||||
memcpy(literal->litDesc.dsc_address, prefixBuffer.begin(), prefixBuffer.getCount());
|
||||
|
||||
return literal;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user