8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:03:02 +01:00

Fix #6839 - Weird string comparison with index

This commit is contained in:
Adriano dos Santos Fernandes 2021-06-24 11:06:54 -03:00
parent 4212706a15
commit b059703dd8
11 changed files with 383 additions and 347 deletions

View File

@ -212,23 +212,8 @@ USHORT TextType::string_to_key(USHORT srcLen, const UCHAR* src,
space = utf16Space;
}
if (tt->texttype_pad_option)
{
const UCHAR* pad;
for (pad = src + srcLen - spaceLength; pad >= src; pad -= spaceLength)
{
if (memcmp(pad, space, spaceLength) != 0)
break;
}
srcLen = pad - src + spaceLength;
}
if (getCharSet()->isMultiByte())
{
dstLen = UnicodeUtil::utf16ToKey(srcLen, Firebird::Aligner<USHORT>(src, srcLen), dstLen, dst);
}
else
{
if (dstLen >= srcLen)
@ -278,44 +263,61 @@ SSHORT TextType::compare(ULONG len1, const UCHAR* str1, ULONG len2, const UCHAR*
getCharSet()->getConvToUnicode().convert(spaceLength, space, sizeof(utf16Space), utf16Space);
fb_assert(spaceLength == 2); // space character can't be surrogate for default compare
space = utf16Space;
INTL_BOOL error_flag;
return UnicodeUtil::utf16Compare(len1, Firebird::Aligner<USHORT>(str1, len1),
len2, Firebird::Aligner<USHORT>(str2, len2), tt->texttype_pad_option, &error_flag);
}
int fill = len1 - len2;
if (len1 >= len2)
{
if (len2)
{
do
{
if (*str1++ != *str2++)
return (str1[-1] > str2[-1]) ? 1 : -1;
} while (--len2);
}
if (fill > 0)
{
if (tt->texttype_pad_option)
{
do
{
if (*str1++ != *space)
return (str1[-1] > *space) ? 1 : -1;
} while (--fill);
}
else
return 1;
}
return 0;
}
if (len1)
{
do
{
if (*str1++ != *str2++)
return (str1[-1] > str2[-1]) ? 1 : -1;
} while (--len1);
}
if (tt->texttype_pad_option)
{
const UCHAR* pad;
for (pad = str1 + len1 - spaceLength; pad >= str1; pad -= spaceLength)
do
{
if (memcmp(pad, space, spaceLength) != 0)
break;
}
len1 = pad - str1 + spaceLength;
for (pad = str2 + len2 - spaceLength; pad >= str2; pad -= spaceLength)
{
if (memcmp(pad, space, spaceLength) != 0)
break;
}
len2 = pad - str2 + spaceLength;
if (*str2++ != *space)
return (*space > str2[-1]) ? 1 : -1;
} while (++fill);
}
if (getCharSet()->isMultiByte())
{
INTL_BOOL error_flag;
return UnicodeUtil::utf16Compare(len1, Firebird::Aligner<USHORT>(str1, len1),
len2, Firebird::Aligner<USHORT>(str2, len2), &error_flag);
}
int cmp = memcmp(str1, str2, MIN(len1, len2));
if (cmp == 0)
cmp = (len1 < len2 ? -1 : (len1 > len2 ? 1 : 0));
else
cmp = (cmp < 0 ? -1 : 1);
return (SSHORT) cmp;
return fill ? -1 : 0;
}

View File

@ -905,7 +905,7 @@ ULONG UnicodeUtil::utf32ToUtf16(ULONG srcLen, const ULONG* src, ULONG dstLen, US
}
SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2,
SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, bool pad,
INTL_BOOL* error_flag)
{
fb_assert(len1 % sizeof(*str1) == 0);
@ -916,11 +916,62 @@ SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, con
*error_flag = false;
// safe casts - alignment not changed
int32_t cmp = getConversionICU().u_strCompare(reinterpret_cast<const UChar*>(str1), len1 / sizeof(*str1),
reinterpret_cast<const UChar*>(str2), len2 / sizeof(*str2), true);
len1 /= sizeof(*str1);
len2 /= sizeof(*str2);
return (cmp < 0 ? -1 : (cmp > 0 ? 1 : 0));
int32_t cmp;
if (pad)
{
int length1 = len1;
int length2 = len2;
if (len2 > len1)
{
length2 = len1;
if (len1 > 0 && UTF_IS_LEAD(str2[len1 - 1]))
++length2;
}
else if (len1 > len2)
{
length1 = len2;
if (len2 > 0 && UTF_IS_LEAD(str1[len2 - 1]))
++length1;
}
// safe casts - alignment not changed
cmp = getConversionICU().u_strCompare(reinterpret_cast<const UChar*>(str1), length1,
reinterpret_cast<const UChar*>(str2), length2, true);
if (cmp == 0)
{
if (length1 < len1)
{
for (const USHORT* p = str1 + length1; p != str1 + len1; ++p)
{
if (*p != ' ')
return *p < ' ' ? -1 : 1;
}
}
if (length2 < len2)
{
for (const USHORT* p = str2 + length2; p != str2 + len2; ++p)
{
if (*p != ' ')
return ' ' < *p ? -1 : 1;
}
}
}
}
else
{
// safe casts - alignment not changed
cmp = getConversionICU().u_strCompare(reinterpret_cast<const UChar*>(str1), len1,
reinterpret_cast<const UChar*>(str2), len2, true);
}
return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0);
}
@ -1525,19 +1576,6 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
srcLenLong /= sizeof(*src);
if (tt->texttype_pad_option)
{
const USHORT* pad;
for (pad = src + srcLenLong - 1; pad >= src; --pad)
{
if (*pad != 32)
break;
}
srcLenLong = pad - src + 1;
}
HalfStaticArray<USHORT, BUFFER_SMALL / 2> buffer;
const UCollator* coll = NULL;
@ -1624,33 +1662,6 @@ SSHORT UnicodeUtil::Utf16Collation::compare(ULONG len1, const USHORT* str1,
*error_flag = false;
len1 /= sizeof(*str1);
len2 /= sizeof(*str2);
if (tt->texttype_pad_option)
{
const USHORT* pad;
for (pad = str1 + len1 - 1; pad >= str1; --pad)
{
if (*pad != 32)
break;
}
len1 = pad - str1 + 1;
for (pad = str2 + len2 - 1; pad >= str2; --pad)
{
if (*pad != 32)
break;
}
len2 = pad - str2 + 1;
}
len1 *= sizeof(*str1);
len2 *= sizeof(*str2);
HalfStaticArray<USHORT, BUFFER_SMALL / 2> buffer1, buffer2;
normalize(&len1, &str1, true, buffer1);
normalize(&len2, &str2, true, buffer2);
@ -1658,10 +1669,61 @@ SSHORT UnicodeUtil::Utf16Collation::compare(ULONG len1, const USHORT* str1,
len1 /= sizeof(*str1);
len2 /= sizeof(*str2);
return (SSHORT) icu->ucolStrColl(compareCollator,
int32_t cmp;
if (tt->texttype_pad_option)
{
int length1 = len1;
int length2 = len2;
if (len2 > len1)
{
length2 = len1;
if (len1 > 0 && UTF_IS_LEAD(str2[len1 - 1]))
++length2;
}
else if (len1 > len2)
{
length1 = len2;
if (len2 > 0 && UTF_IS_LEAD(str1[len2 - 1]))
++length1;
}
// safe casts - alignment not changed
reinterpret_cast<const UChar*>(str1), len1,
reinterpret_cast<const UChar*>(str2), len2);
SSHORT cmp = icu->ucolStrColl(compareCollator,
reinterpret_cast<const UChar*>(str1), length1,
reinterpret_cast<const UChar*>(str2), length2);
if (cmp == 0)
{
if (length1 < len1)
{
for (const USHORT* p = str1 + length1; p != str1 + len1; ++p)
{
if (*p != ' ')
return *p < ' ' ? -1 : 1;
}
}
if (length2 < len2)
{
for (const USHORT* p = str2 + length2; p != str2 + len2; ++p)
{
if (*p != ' ')
return ' ' < *p ? -1 : 1;
}
}
}
return cmp;
}
else
{
// safe casts - alignment not changed
return (SSHORT) icu->ucolStrColl(compareCollator,
reinterpret_cast<const UChar*>(str1), len1,
reinterpret_cast<const UChar*>(str2), len2);
}
}

View File

@ -160,7 +160,7 @@ public:
USHORT* err_code, ULONG* err_position);
static ULONG utf32ToUtf16(ULONG srcLen, const ULONG* src, ULONG dstLen, USHORT* dst,
USHORT* err_code, ULONG* err_position);
static SSHORT utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2,
static SSHORT utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, bool pad,
INTL_BOOL* error_flag);
static ULONG utf16Length(ULONG len, const USHORT* str);

View File

@ -527,36 +527,12 @@ USHORT famasc_string_to_key(texttype* obj, USHORT iInLen, const BYTE* pInChar, U
fb_assert(iOutLen <= LANGASCII_MAX_KEY);
fb_assert(iOutLen >= famasc_key_length(obj, iInLen));
// point inbuff at last character
const BYTE* inbuff = pInChar + iInLen - 1;
if (obj->texttype_pad_option)
{
// skip backwards over all spaces & reset input length
while ((inbuff >= pInChar) && (*inbuff == ASCII_SPACE))
inbuff--;
}
iInLen = (inbuff - pInChar + 1);
BYTE* outbuff = pOutChar;
while (iInLen-- && iOutLen--) {
while (iInLen-- && iOutLen--)
*outbuff++ = *pInChar++;
}
return (outbuff - pOutChar);
}
static bool all_spaces(const BYTE* s, SLONG len)
{
fb_assert(s != NULL);
while (len-- > 0)
{
if (*s++ != ASCII_SPACE)
return false;
}
return true;
return outbuff - pOutChar;
}
@ -570,32 +546,53 @@ SSHORT famasc_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const B
*error_flag = false;
const ULONG len = MIN(l1, l2);
for (ULONG i = 0; i < len; i++)
{
if (s1[i] == s2[i])
continue;
if (all_spaces(&s1[i], (SLONG) (l1 - i)))
return -1;
if (all_spaces(&s2[i], (SLONG) (l2 - i)))
return 1;
if (s1[i] < s2[i])
return -1;
int fill = l1 - l2;
return 1;
if (l1 >= l2)
{
if (l2)
{
do
{
if (*s1++ != *s2++)
return (s1[-1] > s2[-1]) ? 1 : -1;
} while (--l2);
}
if (fill > 0)
{
if (obj->texttype_pad_option)
{
do
{
if (*s1++ != ASCII_SPACE)
return (s1[-1] > ASCII_SPACE) ? 1 : -1;
} while (--fill);
}
else
return 1;
}
return 0;
}
if (l1 > len)
if (l1)
{
if (obj->texttype_pad_option && all_spaces(&s1[len], (SLONG) (l1 - len)))
return 0;
return 1;
do
{
if (*s1++ != *s2++)
return (s1[-1] > s2[-1]) ? 1 : -1;
} while (--l1);
}
if (l2 > len)
if (obj->texttype_pad_option)
{
if (obj->texttype_pad_option && all_spaces(&s2[len], (SLONG) (l2 - len)))
return 0;
return -1;
do
{
if (*s2++ != ASCII_SPACE)
return (ASCII_SPACE > s2[-1]) ? 1 : -1;
} while (++fill);
}
return (0);
return fill ? -1 : 0;
}

View File

@ -193,18 +193,6 @@ USHORT LC_NARROW_string_to_key(texttype* obj, USHORT iInLen, const BYTE* pInChar
BYTE tertiary[LANGFAM2_MAX_KEY];
BYTE special[LANGFAM2_MAX_KEY * 2];
// point inbuff at last character
const BYTE* inbuff = pInChar + iInLen - 1;
if (obj->texttype_pad_option)
{
// skip backwards over all spaces & reset input length
while ((inbuff >= pInChar) && (*inbuff == ASCII_SPACE))
inbuff--;
}
iInLen = (inbuff - pInChar + 1);
for (USHORT i = 0; i < iInLen; i++, pInChar++)
{
fb_assert(lprimary < iOutLen);
@ -405,10 +393,11 @@ struct coltab_status
static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const BYTE* s2)
static SSHORT special_scan(texttype* obj, const UCHAR* s1, const UCHAR* end1, ULONG paddedLen1,
const UCHAR* s2, const UCHAR* end2, ULONG paddedLen2)
{
const SortOrderTblEntry* col1 = 0;
const SortOrderTblEntry* col2 = 0;
const SortOrderTblEntry* col1 = nullptr;
const SortOrderTblEntry* col2 = nullptr;
ULONG index1 = 0;
ULONG index2 = 0;
@ -419,47 +408,49 @@ static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, co
while (true)
{
// Scan to find ignore char from l1
while (l1)
while (paddedLen1)
{
col1 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[*s1];
col1 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[s1 < end1 ? *s1 : ASCII_SPACE];
if (col1->IsExpand && col1->IsCompress && noSpecialsFirst)
{
break;
}
l1--;
paddedLen1--;
s1++;
index1++;
}
// Scan to find ignore char from l2
while (l2)
while (paddedLen2)
{
col2 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[*s2];
col2 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[s2 < end2 ? *s2 : ASCII_SPACE];
if (col2->IsExpand && col2->IsCompress && noSpecialsFirst)
{
break;
}
l2--;
paddedLen2--;
s2++;
index2++;
}
if (!l1 && !l2) // All out of ignore characters
if (!paddedLen1 && !paddedLen2) // All out of ignore characters
return 0;
if (l1 && !l2) // Out in l2 only
if (paddedLen1 && !paddedLen2) // Out in paddedLen2 only
return 1000;
if (!l1 && l2) // Out in l1 only
if (!paddedLen1 && paddedLen2) // Out in paddedLen1 only
return -1000;
if (index1 < index2) // l1 has ignore ch before l2
if (index1 < index2) // paddedLen1 has ignore ch before paddedLen2
return -2000;
if (index1 > index2) // l2 has ignore ch before l1
if (index1 > index2) // paddedLen2 has ignore ch before paddedLen1
return 2000;
if (col1->Primary != col2->Primary)
return (col1->Primary - col2->Primary);
l1--;
l2--;
paddedLen1--;
paddedLen2--;
s1++;
s2++;
index1++;
@ -468,8 +459,8 @@ static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, co
}
static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
ULONG* l, coltab_status* stat, int* sum)
static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** ptr,
const UCHAR* end, ULONG* paddedLen, coltab_status* stat, int* sum)
{
TextTypeImpl* impl = static_cast<TextTypeImpl*>(obj->texttype_impl);
@ -477,18 +468,18 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
if (stat->stat_flags & LC_HAVE_WAITING)
{
--*l;
++*p;
--*paddedLen;
++*ptr;
stat->stat_flags &= ~LC_HAVE_WAITING;
fb_assert(stat->stat_waiting);
return stat->stat_waiting;
}
stat->stat_waiting = NULL;
while (*l)
while (*paddedLen)
{
const SortOrderTblEntry* col =
&((const SortOrderTblEntry*) impl->texttype_collation_table)[**p];
&((const SortOrderTblEntry*) impl->texttype_collation_table)[*ptr < end ? **ptr : ASCII_SPACE];
if (col->IsExpand && col->IsCompress)
{
@ -497,15 +488,15 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
*sum = impl->ignore_sum;
// Have col
--*l;
++*p;
--*paddedLen;
++*ptr;
return col;
}
// Both flags set indicate a special value
// Need a new col
--*l;
++*p;
--*paddedLen;
++*ptr;
stat->stat_flags |= LC_HAVE_SPECIAL;
continue;
}
@ -514,17 +505,17 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
(col->IsCompress && !(impl->texttype_flags & TEXTTYPE_disable_compressions))))
{
// Have col
--*l;
++*p;
--*paddedLen;
++*ptr;
return col;
}
if (col->IsExpand)
{
const ExpandChar* exp = &((const ExpandChar*) impl->texttype_expand_table)[0];
while (exp->Ch && exp->Ch != **p)
while (exp->Ch && exp->Ch != (*ptr < end ? **ptr : ASCII_SPACE))
exp++;
fb_assert(exp->Ch == **p);
fb_assert(exp->Ch == (*ptr < end ? **ptr : ASCII_SPACE));
// Have coll1
// Have waiting
@ -534,29 +525,31 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
}
// (col->IsCompress)
if (*l > 1)
if (*ptr + 1 < end)
{
const CompressPair* cmp = &((const CompressPair*) impl->texttype_compress_table)[0];
while (cmp->CharPair[0])
{
if ((cmp->CharPair[0] == **p) &&
(cmp->CharPair[1] == *(*p + 1)))
if ((cmp->CharPair[0] == **ptr) &&
(cmp->CharPair[1] == *(*ptr + 1)))
{
// Have Col
col = &cmp->NoCaseWeight;
(*l) -= 2;
(*p) += 2;
*paddedLen -= 2;
*ptr += 2;
return col;
}
cmp++;
}
}
// Have col
--*l;
++*p;
--*paddedLen;
++*ptr;
return col;
}
return NULL;
return nullptr;
}
@ -575,24 +568,14 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons
*error_flag = false;
if (obj->texttype_pad_option)
{
// Start at EOS, scan backwards to find non-space
const BYTE* p = s1 + l1 - 1;
while ((p >= s1) && (*p == ASCII_SPACE))
p--;
l1 = (p - s1 + 1);
p = s2 + l2 - 1;
while ((p >= s2) && (*p == ASCII_SPACE))
p--;
l2 = (p - s2 + 1);
}
const ULONG save_l1 = l1;
const ULONG save_l2 = l2;
const UCHAR* const end1 = s1 + l1;
const UCHAR* const end2 = s2 + l2;
const BYTE* const save_s1 = s1;
const BYTE* const save_s2 = s2;
ULONG paddedLen1 = obj->texttype_pad_option ? MAX(l1, l2) : l1;
ULONG paddedLen2 = obj->texttype_pad_option ? MAX(l1, l2) : l2;
ULONG savePaddedLen1 = paddedLen1;
ULONG savePaddedLen2 = paddedLen2;
SSHORT save_secondary = 0;
SSHORT save_tertiary = 0;
SSHORT save_quandary = 0;
@ -601,15 +584,15 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons
stat1.stat_flags = 0;
stat2.stat_flags = 0;
const SortOrderTblEntry* col1 = 0;
const SortOrderTblEntry* col2 = 0;
const SortOrderTblEntry* col1 = nullptr;
const SortOrderTblEntry* col2 = nullptr;
while (true)
{
int sum1, sum2;
col1 = get_coltab_entry(obj, &s1, &l1, &stat1, &sum1);
col2 = get_coltab_entry(obj, &s2, &l2, &stat2, &sum2);
col1 = get_coltab_entry(obj, &s1, end1, &paddedLen1, &stat1, &sum1);
col2 = get_coltab_entry(obj, &s2, end2, &paddedLen2, &stat2, &sum2);
if (!col1 || !col2)
break;
@ -640,7 +623,7 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons
}
// One of the strings ended
fb_assert(l1 == 0 || l2 == 0);
fb_assert(paddedLen1 == 0 || paddedLen2 == 0);
fb_assert(col1 == NULL || col2 == NULL);
if (col1 && !col2)
@ -649,7 +632,7 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons
if (!col1 && col2)
return -500;
if (l1 == 0 && l2 == 0)
if (paddedLen1 == 0 && paddedLen2 == 0)
{
if (save_secondary)
return save_secondary;
@ -662,12 +645,12 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons
!(impl->texttype_flags & TEXTTYPE_ignore_specials) &&
!(impl->texttype_flags & TEXTTYPE_specials_first))
{
return special_scan(obj, save_l1, save_s1, save_l2, save_s2);
return special_scan(obj, save_s1, end1, savePaddedLen1, save_s2, end2, savePaddedLen2);
}
return 0;
}
if (l1)
if (paddedLen1)
return 600;
return -600;
}

View File

@ -98,18 +98,27 @@ namespace
ValueExprNode* injectCast(CompilerScratch* csb,
ValueExprNode* value, CastNode*& cast,
const dsc& desc)
const dsc& desc, bool upperLimit)
{
// If the indexed column is of type int64, then we need to inject
// an extra cast to deliver the scale value to the BTR level
// an extra cast to deliver the scale value to the BTR level.
// If the type is text and this is an upper limit, we need to cast
// the expression to CHAR to fill the key with trailing spaces.
if (value && desc.dsc_dtype == dtype_int64)
if (value && (desc.dsc_dtype == dtype_int64 || (desc.isText() && upperLimit)))
{
if (!cast)
{
cast = FB_NEW_POOL(csb->csb_pool) CastNode(csb->csb_pool);
cast->source = value;
cast->castDesc = desc;
if (cast->castDesc.dsc_dtype == dtype_varying)
{
cast->castDesc.dsc_dtype = dtype_text;
cast->castDesc.dsc_length -= sizeof(USHORT);
}
cast->impureOffset = csb->allocImpure<impure_value>();
}
@ -1857,7 +1866,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
const bool isDesc = (indexScratch->idx->idx_flags & idx_descending);
// Needed for int64 matches, see injectCast() function
// Needed for int64/text matches, see injectCast() function
CastNode *cast = NULL, *cast2 = NULL;
fb_assert(indexScratch->segments.getCount() == indexScratch->idx->idx_count);
@ -1885,8 +1894,8 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
if (!((segment->scanType == segmentScanEqual) ||
(segment->scanType == segmentScanEquivalent)))
{
segment->lowerValue = injectCast(csb, value, cast, matchDesc);
segment->upperValue = injectCast(csb, value2, cast2, matchDesc);
segment->lowerValue = injectCast(csb, value, cast, matchDesc, false);
segment->upperValue = injectCast(csb, value2, cast2, matchDesc, true);
segment->scanType = segmentScanBetween;
segment->excludeLower = false;
segment->excludeUpper = false;
@ -1900,7 +1909,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
if (!(segment->scanType == segmentScanEqual))
{
segment->lowerValue = segment->upperValue =
injectCast(csb, value, cast, matchDesc);
injectCast(csb, value, cast, matchDesc, false);
segment->scanType = segmentScanEquivalent;
segment->excludeLower = false;
segment->excludeUpper = false;
@ -1910,7 +1919,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
case blr_eql:
segment->matches.add(boolean);
segment->lowerValue = segment->upperValue =
injectCast(csb, value, cast, matchDesc);
injectCast(csb, value, cast, matchDesc, false);
segment->scanType = segmentScanEqual;
segment->excludeLower = false;
segment->excludeUpper = false;
@ -1946,7 +1955,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
if (forward)
{
segment->lowerValue = injectCast(csb, value, cast, matchDesc);
segment->lowerValue = injectCast(csb, value, cast, matchDesc, false);
if (segment->scanType == segmentScanLess)
segment->scanType = segmentScanBetween;
else
@ -1954,7 +1963,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
}
else
{
segment->upperValue = injectCast(csb, value, cast, matchDesc);
segment->upperValue = injectCast(csb, value, cast, matchDesc, true);
if (segment->scanType == segmentScanGreater)
segment->scanType = segmentScanBetween;
else
@ -1977,7 +1986,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
if (forward)
{
segment->upperValue = injectCast(csb, value, cast, matchDesc);
segment->upperValue = injectCast(csb, value, cast, matchDesc, true);
if (segment->scanType == segmentScanGreater)
segment->scanType = segmentScanBetween;
else
@ -1985,7 +1994,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
}
else
{
segment->lowerValue = injectCast(csb, value, cast, matchDesc);
segment->lowerValue = injectCast(csb, value, cast, matchDesc, false);
if (segment->scanType == segmentScanLess)
segment->scanType = segmentScanBetween;
else
@ -2003,7 +2012,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode*
(segment->scanType == segmentScanEquivalent)))
{
segment->lowerValue = segment->upperValue =
injectCast(csb, value, cast, matchDesc);
injectCast(csb, value, cast, matchDesc, false);
segment->scanType = segmentScanStarting;
segment->excludeLower = false;
segment->excludeUpper = false;

View File

@ -188,7 +188,7 @@ namespace
static ULONG add_node(thread_db*, WIN*, index_insertion*, temporary_key*, RecordNumber*,
ULONG*, ULONG*);
static void compress(thread_db*, const dsc*, temporary_key*, USHORT, bool, bool, USHORT);
static void compress(thread_db*, const dsc*, temporary_key*, USHORT, bool, bool, USHORT, bool);
static USHORT compress_root(thread_db*, index_root_page*);
static void copy_key(const temporary_key*, temporary_key*);
static contents delete_node(thread_db*, WIN*, UCHAR*);
@ -895,12 +895,14 @@ btree_page* BTR_find_page(thread_db* tdbb,
else
{
idx_e errorCode = idx_e_ok;
bool avoidTrailingTrim = !(retrieval->irb_generic & (irb_starting | irb_equality));
if (retrieval->irb_upper_count)
{
errorCode = BTR_make_key(tdbb, retrieval->irb_upper_count,
retrieval->irb_value + retrieval->irb_desc.idx_count,
&retrieval->irb_desc, upper,
avoidTrailingTrim && !(retrieval->irb_generic & irb_descending),
(retrieval->irb_generic & irb_starting) != 0);
}
@ -910,6 +912,7 @@ btree_page* BTR_find_page(thread_db* tdbb,
{
errorCode = BTR_make_key(tdbb, retrieval->irb_lower_count,
retrieval->irb_value, &retrieval->irb_desc, lower,
avoidTrailingTrim && (retrieval->irb_generic & irb_descending),
(retrieval->irb_generic & irb_starting) != 0);
}
}
@ -1253,7 +1256,7 @@ idx_e BTR_key(thread_db* tdbb, jrd_rel* relation, Record* record, index_desc* id
key->key_flags |= key_empty;
compress(tdbb, desc_ptr, key, tail->idx_itype, isNull, descending, keyType);
compress(tdbb, desc_ptr, key, tail->idx_itype, isNull, descending, keyType, false);
}
else
{
@ -1287,7 +1290,7 @@ idx_e BTR_key(thread_db* tdbb, jrd_rel* relation, Record* record, index_desc* id
}
}
compress(tdbb, desc_ptr, &temp, tail->idx_itype, isNull, descending, keyType);
compress(tdbb, desc_ptr, &temp, tail->idx_itype, isNull, descending, keyType, false);
const UCHAR* q = temp.key_data;
for (USHORT l = temp.key_length; l; --l, --stuff_count)
@ -1519,6 +1522,7 @@ idx_e BTR_make_key(thread_db* tdbb,
const ValueExprNode* const* exprs,
const index_desc* idx,
temporary_key* key,
bool avoidTrailingTrim,
bool fuzzy)
{
/**************************************
@ -1567,7 +1571,7 @@ idx_e BTR_make_key(thread_db* tdbb,
if (isNull)
key->key_nulls = 1;
compress(tdbb, desc, key, tail->idx_itype, isNull, descending, keyType);
compress(tdbb, desc, key, tail->idx_itype, isNull, descending, keyType, avoidTrailingTrim);
if (fuzzy && (key->key_flags & key_empty))
key->key_length = 0;
@ -1600,7 +1604,8 @@ idx_e BTR_make_key(thread_db* tdbb,
compress(tdbb, desc, &temp, tail->idx_itype, isNull, descending,
(n == count - 1 ?
keyType : ((idx->idx_flags & idx_unique) ? INTL_KEY_UNIQUE : INTL_KEY_SORT)));
keyType : ((idx->idx_flags & idx_unique) ? INTL_KEY_UNIQUE : INTL_KEY_SORT)),
avoidTrailingTrim && n == count - 1);
if (!(temp.key_flags & key_empty))
is_key_empty = false;
@ -1707,7 +1712,7 @@ void BTR_make_null_key(thread_db* tdbb, const index_desc* idx, temporary_key* ke
// If the index is a single segment index, don't sweat the compound stuff
if ((idx->idx_count == 1) || (idx->idx_flags & idx_expressn))
{
compress(tdbb, &null_desc, key, tail->idx_itype, true, descending, false);
compress(tdbb, &null_desc, key, tail->idx_itype, true, descending, false, false);
}
else
{
@ -1721,7 +1726,7 @@ void BTR_make_null_key(thread_db* tdbb, const index_desc* idx, temporary_key* ke
for (; stuff_count; --stuff_count)
*p++ = 0;
compress(tdbb, &null_desc, &temp, tail->idx_itype, true, descending, false);
compress(tdbb, &null_desc, &temp, tail->idx_itype, true, descending, false, false);
const UCHAR* q = temp.key_data;
for (USHORT l = temp.key_length; l; --l, --stuff_count)
@ -2409,11 +2414,8 @@ static ULONG add_node(thread_db* tdbb,
}
static void compress(thread_db* tdbb,
const dsc* desc,
temporary_key* key,
USHORT itype,
bool isNull, bool descending, USHORT key_type)
static void compress(thread_db* tdbb, const dsc* desc, temporary_key* key, USHORT itype,
bool isNull, bool descending, USHORT key_type, bool avoidTrailingTrim)
{
/**************************************
*
@ -2470,9 +2472,8 @@ static void compress(thread_db* tdbb,
itype == idx_decimal || itype >= idx_first_intl_string)
{
VaryStr<MAX_KEY> buffer;
const UCHAR pad = (itype == idx_string) ? ' ' : 0;
const UCHAR pad = itype == idx_string ? ' ' : 0;
UCHAR* ptr;
size_t length;
if (itype == idx_decimal)
@ -2481,11 +2482,18 @@ static void compress(thread_db* tdbb,
length = dec.makeIndexKey(&buffer);
ptr = reinterpret_cast<UCHAR*>(buffer.vary_string);
}
else if (itype >= idx_first_intl_string || itype == idx_metadata)
else
{
DSC to;
USHORT ttype = ttype_binary;
length = MOV_get_string_ptr(tdbb, desc, &ttype, &ptr, &buffer, MAX_KEY);
dsc desc2 = *desc;
desc2.dsc_dtype = dtype_text;
desc2.dsc_length = length;
desc2.dsc_address = ptr;
// convert to an international byte array
dsc to;
to.dsc_dtype = dtype_text;
to.dsc_flags = 0;
to.dsc_sub_type = 0;
@ -2493,10 +2501,9 @@ static void compress(thread_db* tdbb,
to.dsc_ttype() = ttype_sort_key;
to.dsc_length = MIN(MAX_KEY, sizeof(buffer));
ptr = to.dsc_address = reinterpret_cast<UCHAR*>(buffer.vary_string);
length = INTL_string_to_key(tdbb, itype, desc, &to, key_type);
length = INTL_string_to_key(tdbb, itype, &desc2, &to, key_type, !avoidTrailingTrim);
}
else
length = MOV_get_string(tdbb, desc, &ptr, &buffer, MAX_KEY);
if (length)
{
@ -2515,23 +2522,27 @@ static void compress(thread_db* tdbb,
memcpy(p, ptr, length);
p += length;
if (itype != idx_string)
{
while (p > key->key_data)
{
if (*--p != pad)
{
++p;
break;
}
}
}
}
else
{
// Leave key_empty flag, because the string is an empty string
if (descending && ((pad == desc_end_value_prefix) || (pad == desc_end_value_check)))
*p++ = desc_end_value_prefix;
*p++ = pad;
}
while (p > key->key_data)
{
if (*--p != pad)
break;
}
key->key_length = p + 1 - key->key_data;
key->key_length = p - key->key_data;
return;
}
@ -3443,9 +3454,9 @@ static ULONG fast_load(thread_db* tdbb,
// Detect the case when set of duplicate keys contains more then one key
// from primary record version. It breaks the unique constraint and must
// be rejected. Note, it is not always could be detected while sorting.
// Set to true when primary record version is found in current set of
// duplicate keys.
// be rejected. Note, it is not always could be detected while sorting.
// Set to true when primary record version is found in current set of
// duplicate keys.
bool primarySeen = false;
while (!error)
@ -6405,7 +6416,7 @@ static bool scan(thread_db* tdbb, UCHAR* pointer, RecordBitmap** bitmap, RecordB
break;
// node contains more bytes than a key, check numbers
// of last key segment and current node segment.
// of last key segment and current node segment.
fb_assert(!descending);
fb_assert(p - STUFF_COUNT - 1 >= key->key_data);
@ -6415,7 +6426,7 @@ static bool scan(thread_db* tdbb, UCHAR* pointer, RecordBitmap** bitmap, RecordB
fb_assert(keySeg <= nodeSeg);
// If current segment at node is the same as last segment
// If current segment at node is the same as last segment
// of the key then node > key.
if (keySeg == nodeSeg)
return false;

View File

@ -46,7 +46,7 @@ USHORT BTR_key_length(Jrd::thread_db*, Jrd::jrd_rel*, Jrd::index_desc*);
Ods::btree_page* BTR_left_handoff(Jrd::thread_db*, Jrd::win*, Ods::btree_page*, SSHORT);
bool BTR_lookup(Jrd::thread_db*, Jrd::jrd_rel*, USHORT, Jrd::index_desc*, Jrd::RelationPages*);
Jrd::idx_e BTR_make_key(Jrd::thread_db*, USHORT, const Jrd::ValueExprNode* const*, const Jrd::index_desc*,
Jrd::temporary_key*, bool);
Jrd::temporary_key*, bool, bool);
void BTR_make_null_key(Jrd::thread_db*, const Jrd::index_desc*, Jrd::temporary_key*);
bool BTR_next_index(Jrd::thread_db*, Jrd::jrd_rel*, Jrd::jrd_tra*, Jrd::index_desc*, Jrd::win*);
void BTR_remove(Jrd::thread_db*, Jrd::win*, Jrd::index_insertion*);

View File

@ -1207,7 +1207,8 @@ USHORT INTL_string_to_key(thread_db* tdbb,
USHORT idxType,
const dsc* pString,
DSC* pByte,
USHORT key_type)
USHORT key_type,
bool trimTrailing)
{
/**************************************
*
@ -1251,8 +1252,14 @@ USHORT INTL_string_to_key(thread_db* tdbb,
ttype = ttype_metadata;
break;
default:
pad_char = 0;
ttype = INTL_INDEX_TO_TEXT(idxType);
if (trimTrailing)
{
auto charSet = INTL_charset_lookup(tdbb, ttype);
pad_char = *charSet->getSpace();
}
break;
}
@ -1262,6 +1269,19 @@ USHORT INTL_string_to_key(thread_db* tdbb,
UCHAR* src;
USHORT len = MOV_make_string2(tdbb, pString, ttype, &src, temp);
if (trimTrailing && len)
{
const UCHAR* end = src + len;
while (--end >= src)
{
if (*end != pad_char)
break;
}
len = end + 1 - src;
}
USHORT outlen;
UCHAR* dest = pByte->dsc_address;
USHORT destLen = pByte->dsc_length;
@ -1274,15 +1294,7 @@ USHORT INTL_string_to_key(thread_db* tdbb,
case ttype_none:
while (len-- && destLen-- > 0)
*dest++ = *src++;
// strip off ending pad characters
while (dest > pByte->dsc_address)
{
if (*(dest - 1) == pad_char)
dest--;
else
break;
}
outlen = (dest - pByte->dsc_address);
outlen = dest - pByte->dsc_address;
break;
default:
TextType* obj = INTL_texttype_lookup(tdbb, ttype);

View File

@ -476,22 +476,11 @@ static USHORT internal_string_to_key(texttype* obj,
**************************************/
const UCHAR* const pStart = dest;
const UCHAR pad_char = static_cast<TextTypeImpl*>(obj->texttype_impl)->texttype_pad_char;
while (inLen-- && outLen--)
*dest++ = *src++;
if (obj->texttype_pad_option)
{
// strip off ending pad characters
while (dest > pStart)
{
if (*(dest - 1) == pad_char)
dest--;
else
break;
}
}
return (dest - pStart);
return dest - pStart;
}
static SSHORT internal_compare(texttype* obj,
@ -508,56 +497,56 @@ static SSHORT internal_compare(texttype* obj,
*
**************************************/
const UCHAR pad = static_cast<TextTypeImpl*>(obj->texttype_impl)->texttype_pad_char;
SLONG fill = length1 - length2;
int fill = length1 - length2;
if (length1 >= length2)
{
if (length2)
{
do {
do
{
if (*p1++ != *p2++)
{
if (p1[-1] > p2[-1])
return 1;
return -1;
}
return (p1[-1] > p2[-1]) ? 1 : -1;
} while (--length2);
}
if (fill > 0)
{
do {
if (!obj->texttype_pad_option || *p1++ != pad)
if (obj->texttype_pad_option)
{
do
{
if (p1[-1] > pad)
return 1;
return -1;
}
} while (--fill);
if (*p1++ != pad)
return (p1[-1] > pad) ? 1 : -1;
} while (--fill);
}
else
return 1;
}
return 0;
}
if (length1)
{
do {
do
{
if (*p1++ != *p2++)
{
if (p1[-1] > p2[-1])
return 1;
return -1;
}
return (p1[-1] > p2[-1]) ? 1 : -1;
} while (--length1);
}
do {
if (!obj->texttype_pad_option || *p2++ != pad)
if (obj->texttype_pad_option)
{
do
{
if (pad > p2[-1])
return 1;
return -1;
}
} while (++fill);
if (*p2++ != pad)
return (pad > p2[-1]) ? 1 : -1;
} while (++fill);
}
return 0;
return fill ? -1 : 0;
}
@ -638,19 +627,6 @@ static USHORT utf16_keylength(texttype* /*obj*/, USHORT len)
return UnicodeUtil::utf16KeyLength(len);
}
namespace {
template <typename U>
void padUtf16(const USHORT* text, U& len)
{
fb_assert(len % sizeof(USHORT) == 0);
for (; len > 0; len -= sizeof(USHORT))
{
if (text[len / sizeof(USHORT) - 1] != 32)
break;
}
}
} //namespace
static USHORT utf16_string_to_key(texttype* obj,
USHORT srcLen,
const UCHAR* src,
@ -672,11 +648,6 @@ static USHORT utf16_string_to_key(texttype* obj,
Firebird::Aligner<USHORT> alSrc(src, srcLen);
if (obj->texttype_pad_option)
{
padUtf16(alSrc, srcLen);
}
return UnicodeUtil::utf16ToKey(srcLen, alSrc, dstLen, dst);
}
@ -703,13 +674,7 @@ static SSHORT utf16_compare(texttype* obj,
Firebird::Aligner<USHORT> al1(str1, len1);
Firebird::Aligner<USHORT> al2(str2, len2);
if (obj->texttype_pad_option)
{
padUtf16(al1, len1);
padUtf16(al2, len2);
}
return UnicodeUtil::utf16Compare(len1, al1, len2, al2, error_flag);
return UnicodeUtil::utf16Compare(len1, al1, len2, al2, obj->texttype_pad_option, error_flag);
}
static ULONG utf16_upper(texttype* obj, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst)
@ -792,11 +757,6 @@ static USHORT utf32_string_to_key(texttype* obj,
dstLen, utf16Str.getBuffer(dstLen / sizeof(USHORT) + 1), &err_code, &err_position);
const USHORT* s = utf16Str.begin();
if (obj->texttype_pad_option)
{
padUtf16(s, sLen);
}
return UnicodeUtil::utf16ToKey(sLen, s, dstLen, dst);
}

View File

@ -52,7 +52,7 @@ Jrd::Collation* INTL_texttype_lookup(Jrd::thread_db* tdbb, USHORT parm1);
void INTL_texttype_unload(Jrd::thread_db*, USHORT);
bool INTL_texttype_validate(Jrd::thread_db*, const SubtypeInfo*);
void INTL_pad_spaces(Jrd::thread_db*, dsc*, UCHAR*, ULONG);
USHORT INTL_string_to_key(Jrd::thread_db*, USHORT, const dsc*, dsc*, USHORT);
USHORT INTL_string_to_key(Jrd::thread_db*, USHORT, const dsc*, dsc*, USHORT, bool trimTrailing = false);
// Built-in charsets/texttypes interface
INTL_BOOL INTL_builtin_lookup_charset(charset* cs, const ASCII* charset_name, const ASCII* config_info);