diff --git a/src/common/TextType.cpp b/src/common/TextType.cpp index ed16de3ba5..bd2f4d99a3 100644 --- a/src/common/TextType.cpp +++ b/src/common/TextType.cpp @@ -212,23 +212,8 @@ USHORT TextType::string_to_key(USHORT srcLen, const UCHAR* src, space = utf16Space; } - if (tt->texttype_pad_option) - { - const UCHAR* pad; - - for (pad = src + srcLen - spaceLength; pad >= src; pad -= spaceLength) - { - if (memcmp(pad, space, spaceLength) != 0) - break; - } - - srcLen = pad - src + spaceLength; - } - if (getCharSet()->isMultiByte()) - { dstLen = UnicodeUtil::utf16ToKey(srcLen, Firebird::Aligner(src, srcLen), dstLen, dst); - } else { if (dstLen >= srcLen) @@ -278,44 +263,61 @@ SSHORT TextType::compare(ULONG len1, const UCHAR* str1, ULONG len2, const UCHAR* getCharSet()->getConvToUnicode().convert(spaceLength, space, sizeof(utf16Space), utf16Space); fb_assert(spaceLength == 2); // space character can't be surrogate for default compare space = utf16Space; + + INTL_BOOL error_flag; + return UnicodeUtil::utf16Compare(len1, Firebird::Aligner(str1, len1), + len2, Firebird::Aligner(str2, len2), tt->texttype_pad_option, &error_flag); + } + + int fill = len1 - len2; + + if (len1 >= len2) + { + if (len2) + { + do + { + if (*str1++ != *str2++) + return (str1[-1] > str2[-1]) ? 1 : -1; + } while (--len2); + } + + if (fill > 0) + { + if (tt->texttype_pad_option) + { + do + { + if (*str1++ != *space) + return (str1[-1] > *space) ? 1 : -1; + } while (--fill); + } + else + return 1; + } + + return 0; + } + + if (len1) + { + do + { + if (*str1++ != *str2++) + return (str1[-1] > str2[-1]) ? 1 : -1; + } while (--len1); } if (tt->texttype_pad_option) { - const UCHAR* pad; - - for (pad = str1 + len1 - spaceLength; pad >= str1; pad -= spaceLength) + do { - if (memcmp(pad, space, spaceLength) != 0) - break; - } - - len1 = pad - str1 + spaceLength; - - for (pad = str2 + len2 - spaceLength; pad >= str2; pad -= spaceLength) - { - if (memcmp(pad, space, spaceLength) != 0) - break; - } - - len2 = pad - str2 + spaceLength; + if (*str2++ != *space) + return (*space > str2[-1]) ? 1 : -1; + } while (++fill); } - if (getCharSet()->isMultiByte()) - { - INTL_BOOL error_flag; - return UnicodeUtil::utf16Compare(len1, Firebird::Aligner(str1, len1), - len2, Firebird::Aligner(str2, len2), &error_flag); - } - - int cmp = memcmp(str1, str2, MIN(len1, len2)); - - if (cmp == 0) - cmp = (len1 < len2 ? -1 : (len1 > len2 ? 1 : 0)); - else - cmp = (cmp < 0 ? -1 : 1); - - return (SSHORT) cmp; + return fill ? -1 : 0; } diff --git a/src/common/unicode_util.cpp b/src/common/unicode_util.cpp index eee2cebff1..d8890ef2d4 100644 --- a/src/common/unicode_util.cpp +++ b/src/common/unicode_util.cpp @@ -905,7 +905,7 @@ ULONG UnicodeUtil::utf32ToUtf16(ULONG srcLen, const ULONG* src, ULONG dstLen, US } -SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, +SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, bool pad, INTL_BOOL* error_flag) { fb_assert(len1 % sizeof(*str1) == 0); @@ -916,11 +916,62 @@ SSHORT UnicodeUtil::utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, con *error_flag = false; - // safe casts - alignment not changed - int32_t cmp = getConversionICU().u_strCompare(reinterpret_cast(str1), len1 / sizeof(*str1), - reinterpret_cast(str2), len2 / sizeof(*str2), true); + len1 /= sizeof(*str1); + len2 /= sizeof(*str2); - return (cmp < 0 ? -1 : (cmp > 0 ? 1 : 0)); + int32_t cmp; + + if (pad) + { + int length1 = len1; + int length2 = len2; + + if (len2 > len1) + { + length2 = len1; + if (len1 > 0 && UTF_IS_LEAD(str2[len1 - 1])) + ++length2; + } + else if (len1 > len2) + { + length1 = len2; + if (len2 > 0 && UTF_IS_LEAD(str1[len2 - 1])) + ++length1; + } + + // safe casts - alignment not changed + cmp = getConversionICU().u_strCompare(reinterpret_cast(str1), length1, + reinterpret_cast(str2), length2, true); + + if (cmp == 0) + { + if (length1 < len1) + { + for (const USHORT* p = str1 + length1; p != str1 + len1; ++p) + { + if (*p != ' ') + return *p < ' ' ? -1 : 1; + } + } + + if (length2 < len2) + { + for (const USHORT* p = str2 + length2; p != str2 + len2; ++p) + { + if (*p != ' ') + return ' ' < *p ? -1 : 1; + } + } + } + } + else + { + // safe casts - alignment not changed + cmp = getConversionICU().u_strCompare(reinterpret_cast(str1), len1, + reinterpret_cast(str2), len2, true); + } + + return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0); } @@ -1525,19 +1576,6 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src srcLenLong /= sizeof(*src); - if (tt->texttype_pad_option) - { - const USHORT* pad; - - for (pad = src + srcLenLong - 1; pad >= src; --pad) - { - if (*pad != 32) - break; - } - - srcLenLong = pad - src + 1; - } - HalfStaticArray buffer; const UCollator* coll = NULL; @@ -1624,33 +1662,6 @@ SSHORT UnicodeUtil::Utf16Collation::compare(ULONG len1, const USHORT* str1, *error_flag = false; - len1 /= sizeof(*str1); - len2 /= sizeof(*str2); - - if (tt->texttype_pad_option) - { - const USHORT* pad; - - for (pad = str1 + len1 - 1; pad >= str1; --pad) - { - if (*pad != 32) - break; - } - - len1 = pad - str1 + 1; - - for (pad = str2 + len2 - 1; pad >= str2; --pad) - { - if (*pad != 32) - break; - } - - len2 = pad - str2 + 1; - } - - len1 *= sizeof(*str1); - len2 *= sizeof(*str2); - HalfStaticArray buffer1, buffer2; normalize(&len1, &str1, true, buffer1); normalize(&len2, &str2, true, buffer2); @@ -1658,10 +1669,61 @@ SSHORT UnicodeUtil::Utf16Collation::compare(ULONG len1, const USHORT* str1, len1 /= sizeof(*str1); len2 /= sizeof(*str2); - return (SSHORT) icu->ucolStrColl(compareCollator, + int32_t cmp; + + if (tt->texttype_pad_option) + { + int length1 = len1; + int length2 = len2; + + if (len2 > len1) + { + length2 = len1; + if (len1 > 0 && UTF_IS_LEAD(str2[len1 - 1])) + ++length2; + } + else if (len1 > len2) + { + length1 = len2; + if (len2 > 0 && UTF_IS_LEAD(str1[len2 - 1])) + ++length1; + } + // safe casts - alignment not changed - reinterpret_cast(str1), len1, - reinterpret_cast(str2), len2); + SSHORT cmp = icu->ucolStrColl(compareCollator, + reinterpret_cast(str1), length1, + reinterpret_cast(str2), length2); + + if (cmp == 0) + { + if (length1 < len1) + { + for (const USHORT* p = str1 + length1; p != str1 + len1; ++p) + { + if (*p != ' ') + return *p < ' ' ? -1 : 1; + } + } + + if (length2 < len2) + { + for (const USHORT* p = str2 + length2; p != str2 + len2; ++p) + { + if (*p != ' ') + return ' ' < *p ? -1 : 1; + } + } + } + + return cmp; + } + else + { + // safe casts - alignment not changed + return (SSHORT) icu->ucolStrColl(compareCollator, + reinterpret_cast(str1), len1, + reinterpret_cast(str2), len2); + } } diff --git a/src/common/unicode_util.h b/src/common/unicode_util.h index e98370a693..e9e1fb97df 100644 --- a/src/common/unicode_util.h +++ b/src/common/unicode_util.h @@ -160,7 +160,7 @@ public: USHORT* err_code, ULONG* err_position); static ULONG utf32ToUtf16(ULONG srcLen, const ULONG* src, ULONG dstLen, USHORT* dst, USHORT* err_code, ULONG* err_position); - static SSHORT utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, + static SSHORT utf16Compare(ULONG len1, const USHORT* str1, ULONG len2, const USHORT* str2, bool pad, INTL_BOOL* error_flag); static ULONG utf16Length(ULONG len, const USHORT* str); diff --git a/src/intl/lc_ascii.cpp b/src/intl/lc_ascii.cpp index 22cc9b9d9d..d1d449913c 100644 --- a/src/intl/lc_ascii.cpp +++ b/src/intl/lc_ascii.cpp @@ -527,36 +527,12 @@ USHORT famasc_string_to_key(texttype* obj, USHORT iInLen, const BYTE* pInChar, U fb_assert(iOutLen <= LANGASCII_MAX_KEY); fb_assert(iOutLen >= famasc_key_length(obj, iInLen)); - // point inbuff at last character - const BYTE* inbuff = pInChar + iInLen - 1; - - if (obj->texttype_pad_option) - { - // skip backwards over all spaces & reset input length - while ((inbuff >= pInChar) && (*inbuff == ASCII_SPACE)) - inbuff--; - } - - iInLen = (inbuff - pInChar + 1); - BYTE* outbuff = pOutChar; - while (iInLen-- && iOutLen--) { + + while (iInLen-- && iOutLen--) *outbuff++ = *pInChar++; - } - return (outbuff - pOutChar); -} - -static bool all_spaces(const BYTE* s, SLONG len) -{ - fb_assert(s != NULL); - - while (len-- > 0) - { - if (*s++ != ASCII_SPACE) - return false; - } - return true; + return outbuff - pOutChar; } @@ -570,32 +546,53 @@ SSHORT famasc_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const B *error_flag = false; - const ULONG len = MIN(l1, l2); - for (ULONG i = 0; i < len; i++) - { - if (s1[i] == s2[i]) - continue; - if (all_spaces(&s1[i], (SLONG) (l1 - i))) - return -1; - if (all_spaces(&s2[i], (SLONG) (l2 - i))) - return 1; - if (s1[i] < s2[i]) - return -1; + int fill = l1 - l2; - return 1; + if (l1 >= l2) + { + if (l2) + { + do + { + if (*s1++ != *s2++) + return (s1[-1] > s2[-1]) ? 1 : -1; + } while (--l2); + } + + if (fill > 0) + { + if (obj->texttype_pad_option) + { + do + { + if (*s1++ != ASCII_SPACE) + return (s1[-1] > ASCII_SPACE) ? 1 : -1; + } while (--fill); + } + else + return 1; + } + + return 0; } - if (l1 > len) + if (l1) { - if (obj->texttype_pad_option && all_spaces(&s1[len], (SLONG) (l1 - len))) - return 0; - return 1; + do + { + if (*s1++ != *s2++) + return (s1[-1] > s2[-1]) ? 1 : -1; + } while (--l1); } - if (l2 > len) + + if (obj->texttype_pad_option) { - if (obj->texttype_pad_option && all_spaces(&s2[len], (SLONG) (l2 - len))) - return 0; - return -1; + do + { + if (*s2++ != ASCII_SPACE) + return (ASCII_SPACE > s2[-1]) ? 1 : -1; + } while (++fill); } - return (0); + + return fill ? -1 : 0; } diff --git a/src/intl/lc_narrow.cpp b/src/intl/lc_narrow.cpp index f4e4943e4d..781fa395a0 100644 --- a/src/intl/lc_narrow.cpp +++ b/src/intl/lc_narrow.cpp @@ -193,18 +193,6 @@ USHORT LC_NARROW_string_to_key(texttype* obj, USHORT iInLen, const BYTE* pInChar BYTE tertiary[LANGFAM2_MAX_KEY]; BYTE special[LANGFAM2_MAX_KEY * 2]; - // point inbuff at last character - const BYTE* inbuff = pInChar + iInLen - 1; - - if (obj->texttype_pad_option) - { - // skip backwards over all spaces & reset input length - while ((inbuff >= pInChar) && (*inbuff == ASCII_SPACE)) - inbuff--; - } - - iInLen = (inbuff - pInChar + 1); - for (USHORT i = 0; i < iInLen; i++, pInChar++) { fb_assert(lprimary < iOutLen); @@ -405,10 +393,11 @@ struct coltab_status -static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const BYTE* s2) +static SSHORT special_scan(texttype* obj, const UCHAR* s1, const UCHAR* end1, ULONG paddedLen1, + const UCHAR* s2, const UCHAR* end2, ULONG paddedLen2) { - const SortOrderTblEntry* col1 = 0; - const SortOrderTblEntry* col2 = 0; + const SortOrderTblEntry* col1 = nullptr; + const SortOrderTblEntry* col2 = nullptr; ULONG index1 = 0; ULONG index2 = 0; @@ -419,47 +408,49 @@ static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, co while (true) { // Scan to find ignore char from l1 - while (l1) + while (paddedLen1) { - col1 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[*s1]; + col1 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[s1 < end1 ? *s1 : ASCII_SPACE]; if (col1->IsExpand && col1->IsCompress && noSpecialsFirst) { break; } - l1--; + paddedLen1--; s1++; index1++; } // Scan to find ignore char from l2 - while (l2) + while (paddedLen2) { - col2 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[*s2]; + col2 = &((const SortOrderTblEntry*) impl->texttype_collation_table)[s2 < end2 ? *s2 : ASCII_SPACE]; if (col2->IsExpand && col2->IsCompress && noSpecialsFirst) { break; } - l2--; + paddedLen2--; s2++; index2++; } - if (!l1 && !l2) // All out of ignore characters + + if (!paddedLen1 && !paddedLen2) // All out of ignore characters return 0; - if (l1 && !l2) // Out in l2 only + if (paddedLen1 && !paddedLen2) // Out in paddedLen2 only return 1000; - if (!l1 && l2) // Out in l1 only + if (!paddedLen1 && paddedLen2) // Out in paddedLen1 only return -1000; - if (index1 < index2) // l1 has ignore ch before l2 + if (index1 < index2) // paddedLen1 has ignore ch before paddedLen2 return -2000; - if (index1 > index2) // l2 has ignore ch before l1 + if (index1 > index2) // paddedLen2 has ignore ch before paddedLen1 return 2000; if (col1->Primary != col2->Primary) return (col1->Primary - col2->Primary); - l1--; - l2--; + + paddedLen1--; + paddedLen2--; s1++; s2++; index1++; @@ -468,8 +459,8 @@ static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, co } -static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p, - ULONG* l, coltab_status* stat, int* sum) +static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** ptr, + const UCHAR* end, ULONG* paddedLen, coltab_status* stat, int* sum) { TextTypeImpl* impl = static_cast(obj->texttype_impl); @@ -477,18 +468,18 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p, if (stat->stat_flags & LC_HAVE_WAITING) { - --*l; - ++*p; + --*paddedLen; + ++*ptr; stat->stat_flags &= ~LC_HAVE_WAITING; fb_assert(stat->stat_waiting); return stat->stat_waiting; } stat->stat_waiting = NULL; - while (*l) + while (*paddedLen) { const SortOrderTblEntry* col = - &((const SortOrderTblEntry*) impl->texttype_collation_table)[**p]; + &((const SortOrderTblEntry*) impl->texttype_collation_table)[*ptr < end ? **ptr : ASCII_SPACE]; if (col->IsExpand && col->IsCompress) { @@ -497,15 +488,15 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p, *sum = impl->ignore_sum; // Have col - --*l; - ++*p; + --*paddedLen; + ++*ptr; return col; } // Both flags set indicate a special value // Need a new col - --*l; - ++*p; + --*paddedLen; + ++*ptr; stat->stat_flags |= LC_HAVE_SPECIAL; continue; } @@ -514,17 +505,17 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p, (col->IsCompress && !(impl->texttype_flags & TEXTTYPE_disable_compressions)))) { // Have col - --*l; - ++*p; + --*paddedLen; + ++*ptr; return col; } if (col->IsExpand) { const ExpandChar* exp = &((const ExpandChar*) impl->texttype_expand_table)[0]; - while (exp->Ch && exp->Ch != **p) + while (exp->Ch && exp->Ch != (*ptr < end ? **ptr : ASCII_SPACE)) exp++; - fb_assert(exp->Ch == **p); + fb_assert(exp->Ch == (*ptr < end ? **ptr : ASCII_SPACE)); // Have coll1 // Have waiting @@ -534,29 +525,31 @@ static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p, } // (col->IsCompress) - if (*l > 1) + if (*ptr + 1 < end) { const CompressPair* cmp = &((const CompressPair*) impl->texttype_compress_table)[0]; while (cmp->CharPair[0]) { - if ((cmp->CharPair[0] == **p) && - (cmp->CharPair[1] == *(*p + 1))) + if ((cmp->CharPair[0] == **ptr) && + (cmp->CharPair[1] == *(*ptr + 1))) { // Have Col col = &cmp->NoCaseWeight; - (*l) -= 2; - (*p) += 2; + *paddedLen -= 2; + *ptr += 2; return col; } cmp++; } } + // Have col - --*l; - ++*p; + --*paddedLen; + ++*ptr; return col; } - return NULL; + + return nullptr; } @@ -575,24 +568,14 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons *error_flag = false; - if (obj->texttype_pad_option) - { - // Start at EOS, scan backwards to find non-space - const BYTE* p = s1 + l1 - 1; - while ((p >= s1) && (*p == ASCII_SPACE)) - p--; - l1 = (p - s1 + 1); - - p = s2 + l2 - 1; - while ((p >= s2) && (*p == ASCII_SPACE)) - p--; - l2 = (p - s2 + 1); - } - - const ULONG save_l1 = l1; - const ULONG save_l2 = l2; + const UCHAR* const end1 = s1 + l1; + const UCHAR* const end2 = s2 + l2; const BYTE* const save_s1 = s1; const BYTE* const save_s2 = s2; + ULONG paddedLen1 = obj->texttype_pad_option ? MAX(l1, l2) : l1; + ULONG paddedLen2 = obj->texttype_pad_option ? MAX(l1, l2) : l2; + ULONG savePaddedLen1 = paddedLen1; + ULONG savePaddedLen2 = paddedLen2; SSHORT save_secondary = 0; SSHORT save_tertiary = 0; SSHORT save_quandary = 0; @@ -601,15 +584,15 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons stat1.stat_flags = 0; stat2.stat_flags = 0; - const SortOrderTblEntry* col1 = 0; - const SortOrderTblEntry* col2 = 0; + const SortOrderTblEntry* col1 = nullptr; + const SortOrderTblEntry* col2 = nullptr; while (true) { int sum1, sum2; - col1 = get_coltab_entry(obj, &s1, &l1, &stat1, &sum1); - col2 = get_coltab_entry(obj, &s2, &l2, &stat2, &sum2); + col1 = get_coltab_entry(obj, &s1, end1, &paddedLen1, &stat1, &sum1); + col2 = get_coltab_entry(obj, &s2, end2, &paddedLen2, &stat2, &sum2); if (!col1 || !col2) break; @@ -640,7 +623,7 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons } // One of the strings ended - fb_assert(l1 == 0 || l2 == 0); + fb_assert(paddedLen1 == 0 || paddedLen2 == 0); fb_assert(col1 == NULL || col2 == NULL); if (col1 && !col2) @@ -649,7 +632,7 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons if (!col1 && col2) return -500; - if (l1 == 0 && l2 == 0) + if (paddedLen1 == 0 && paddedLen2 == 0) { if (save_secondary) return save_secondary; @@ -662,12 +645,12 @@ SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, cons !(impl->texttype_flags & TEXTTYPE_ignore_specials) && !(impl->texttype_flags & TEXTTYPE_specials_first)) { - return special_scan(obj, save_l1, save_s1, save_l2, save_s2); + return special_scan(obj, save_s1, end1, savePaddedLen1, save_s2, end2, savePaddedLen2); } return 0; } - if (l1) + if (paddedLen1) return 600; return -600; } diff --git a/src/jrd/Optimizer.cpp b/src/jrd/Optimizer.cpp index 99f895ab6e..a985b895bb 100644 --- a/src/jrd/Optimizer.cpp +++ b/src/jrd/Optimizer.cpp @@ -98,18 +98,27 @@ namespace ValueExprNode* injectCast(CompilerScratch* csb, ValueExprNode* value, CastNode*& cast, - const dsc& desc) + const dsc& desc, bool upperLimit) { // If the indexed column is of type int64, then we need to inject - // an extra cast to deliver the scale value to the BTR level + // an extra cast to deliver the scale value to the BTR level. + // If the type is text and this is an upper limit, we need to cast + // the expression to CHAR to fill the key with trailing spaces. - if (value && desc.dsc_dtype == dtype_int64) + if (value && (desc.dsc_dtype == dtype_int64 || (desc.isText() && upperLimit))) { if (!cast) { cast = FB_NEW_POOL(csb->csb_pool) CastNode(csb->csb_pool); cast->source = value; cast->castDesc = desc; + + if (cast->castDesc.dsc_dtype == dtype_varying) + { + cast->castDesc.dsc_dtype = dtype_text; + cast->castDesc.dsc_length -= sizeof(USHORT); + } + cast->impureOffset = csb->allocImpure(); } @@ -1857,7 +1866,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* const bool isDesc = (indexScratch->idx->idx_flags & idx_descending); - // Needed for int64 matches, see injectCast() function + // Needed for int64/text matches, see injectCast() function CastNode *cast = NULL, *cast2 = NULL; fb_assert(indexScratch->segments.getCount() == indexScratch->idx->idx_count); @@ -1885,8 +1894,8 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* if (!((segment->scanType == segmentScanEqual) || (segment->scanType == segmentScanEquivalent))) { - segment->lowerValue = injectCast(csb, value, cast, matchDesc); - segment->upperValue = injectCast(csb, value2, cast2, matchDesc); + segment->lowerValue = injectCast(csb, value, cast, matchDesc, false); + segment->upperValue = injectCast(csb, value2, cast2, matchDesc, true); segment->scanType = segmentScanBetween; segment->excludeLower = false; segment->excludeUpper = false; @@ -1900,7 +1909,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* if (!(segment->scanType == segmentScanEqual)) { segment->lowerValue = segment->upperValue = - injectCast(csb, value, cast, matchDesc); + injectCast(csb, value, cast, matchDesc, false); segment->scanType = segmentScanEquivalent; segment->excludeLower = false; segment->excludeUpper = false; @@ -1910,7 +1919,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* case blr_eql: segment->matches.add(boolean); segment->lowerValue = segment->upperValue = - injectCast(csb, value, cast, matchDesc); + injectCast(csb, value, cast, matchDesc, false); segment->scanType = segmentScanEqual; segment->excludeLower = false; segment->excludeUpper = false; @@ -1946,7 +1955,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* if (forward) { - segment->lowerValue = injectCast(csb, value, cast, matchDesc); + segment->lowerValue = injectCast(csb, value, cast, matchDesc, false); if (segment->scanType == segmentScanLess) segment->scanType = segmentScanBetween; else @@ -1954,7 +1963,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* } else { - segment->upperValue = injectCast(csb, value, cast, matchDesc); + segment->upperValue = injectCast(csb, value, cast, matchDesc, true); if (segment->scanType == segmentScanGreater) segment->scanType = segmentScanBetween; else @@ -1977,7 +1986,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* if (forward) { - segment->upperValue = injectCast(csb, value, cast, matchDesc); + segment->upperValue = injectCast(csb, value, cast, matchDesc, true); if (segment->scanType == segmentScanGreater) segment->scanType = segmentScanBetween; else @@ -1985,7 +1994,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* } else { - segment->lowerValue = injectCast(csb, value, cast, matchDesc); + segment->lowerValue = injectCast(csb, value, cast, matchDesc, false); if (segment->scanType == segmentScanLess) segment->scanType = segmentScanBetween; else @@ -2003,7 +2012,7 @@ bool OptimizerRetrieval::matchBoolean(IndexScratch* indexScratch, BoolExprNode* (segment->scanType == segmentScanEquivalent))) { segment->lowerValue = segment->upperValue = - injectCast(csb, value, cast, matchDesc); + injectCast(csb, value, cast, matchDesc, false); segment->scanType = segmentScanStarting; segment->excludeLower = false; segment->excludeUpper = false; diff --git a/src/jrd/btr.cpp b/src/jrd/btr.cpp index d720e610b7..bdbe1a4bd4 100644 --- a/src/jrd/btr.cpp +++ b/src/jrd/btr.cpp @@ -188,7 +188,7 @@ namespace static ULONG add_node(thread_db*, WIN*, index_insertion*, temporary_key*, RecordNumber*, ULONG*, ULONG*); -static void compress(thread_db*, const dsc*, temporary_key*, USHORT, bool, bool, USHORT); +static void compress(thread_db*, const dsc*, temporary_key*, USHORT, bool, bool, USHORT, bool); static USHORT compress_root(thread_db*, index_root_page*); static void copy_key(const temporary_key*, temporary_key*); static contents delete_node(thread_db*, WIN*, UCHAR*); @@ -895,12 +895,14 @@ btree_page* BTR_find_page(thread_db* tdbb, else { idx_e errorCode = idx_e_ok; + bool avoidTrailingTrim = !(retrieval->irb_generic & (irb_starting | irb_equality)); if (retrieval->irb_upper_count) { errorCode = BTR_make_key(tdbb, retrieval->irb_upper_count, retrieval->irb_value + retrieval->irb_desc.idx_count, &retrieval->irb_desc, upper, + avoidTrailingTrim && !(retrieval->irb_generic & irb_descending), (retrieval->irb_generic & irb_starting) != 0); } @@ -910,6 +912,7 @@ btree_page* BTR_find_page(thread_db* tdbb, { errorCode = BTR_make_key(tdbb, retrieval->irb_lower_count, retrieval->irb_value, &retrieval->irb_desc, lower, + avoidTrailingTrim && (retrieval->irb_generic & irb_descending), (retrieval->irb_generic & irb_starting) != 0); } } @@ -1253,7 +1256,7 @@ idx_e BTR_key(thread_db* tdbb, jrd_rel* relation, Record* record, index_desc* id key->key_flags |= key_empty; - compress(tdbb, desc_ptr, key, tail->idx_itype, isNull, descending, keyType); + compress(tdbb, desc_ptr, key, tail->idx_itype, isNull, descending, keyType, false); } else { @@ -1287,7 +1290,7 @@ idx_e BTR_key(thread_db* tdbb, jrd_rel* relation, Record* record, index_desc* id } } - compress(tdbb, desc_ptr, &temp, tail->idx_itype, isNull, descending, keyType); + compress(tdbb, desc_ptr, &temp, tail->idx_itype, isNull, descending, keyType, false); const UCHAR* q = temp.key_data; for (USHORT l = temp.key_length; l; --l, --stuff_count) @@ -1519,6 +1522,7 @@ idx_e BTR_make_key(thread_db* tdbb, const ValueExprNode* const* exprs, const index_desc* idx, temporary_key* key, + bool avoidTrailingTrim, bool fuzzy) { /************************************** @@ -1567,7 +1571,7 @@ idx_e BTR_make_key(thread_db* tdbb, if (isNull) key->key_nulls = 1; - compress(tdbb, desc, key, tail->idx_itype, isNull, descending, keyType); + compress(tdbb, desc, key, tail->idx_itype, isNull, descending, keyType, avoidTrailingTrim); if (fuzzy && (key->key_flags & key_empty)) key->key_length = 0; @@ -1600,7 +1604,8 @@ idx_e BTR_make_key(thread_db* tdbb, compress(tdbb, desc, &temp, tail->idx_itype, isNull, descending, (n == count - 1 ? - keyType : ((idx->idx_flags & idx_unique) ? INTL_KEY_UNIQUE : INTL_KEY_SORT))); + keyType : ((idx->idx_flags & idx_unique) ? INTL_KEY_UNIQUE : INTL_KEY_SORT)), + avoidTrailingTrim && n == count - 1); if (!(temp.key_flags & key_empty)) is_key_empty = false; @@ -1707,7 +1712,7 @@ void BTR_make_null_key(thread_db* tdbb, const index_desc* idx, temporary_key* ke // If the index is a single segment index, don't sweat the compound stuff if ((idx->idx_count == 1) || (idx->idx_flags & idx_expressn)) { - compress(tdbb, &null_desc, key, tail->idx_itype, true, descending, false); + compress(tdbb, &null_desc, key, tail->idx_itype, true, descending, false, false); } else { @@ -1721,7 +1726,7 @@ void BTR_make_null_key(thread_db* tdbb, const index_desc* idx, temporary_key* ke for (; stuff_count; --stuff_count) *p++ = 0; - compress(tdbb, &null_desc, &temp, tail->idx_itype, true, descending, false); + compress(tdbb, &null_desc, &temp, tail->idx_itype, true, descending, false, false); const UCHAR* q = temp.key_data; for (USHORT l = temp.key_length; l; --l, --stuff_count) @@ -2409,11 +2414,8 @@ static ULONG add_node(thread_db* tdbb, } -static void compress(thread_db* tdbb, - const dsc* desc, - temporary_key* key, - USHORT itype, - bool isNull, bool descending, USHORT key_type) +static void compress(thread_db* tdbb, const dsc* desc, temporary_key* key, USHORT itype, + bool isNull, bool descending, USHORT key_type, bool avoidTrailingTrim) { /************************************** * @@ -2470,9 +2472,8 @@ static void compress(thread_db* tdbb, itype == idx_decimal || itype >= idx_first_intl_string) { VaryStr buffer; - const UCHAR pad = (itype == idx_string) ? ' ' : 0; + const UCHAR pad = itype == idx_string ? ' ' : 0; UCHAR* ptr; - size_t length; if (itype == idx_decimal) @@ -2481,11 +2482,18 @@ static void compress(thread_db* tdbb, length = dec.makeIndexKey(&buffer); ptr = reinterpret_cast(buffer.vary_string); } - else if (itype >= idx_first_intl_string || itype == idx_metadata) + else { - DSC to; + USHORT ttype = ttype_binary; + length = MOV_get_string_ptr(tdbb, desc, &ttype, &ptr, &buffer, MAX_KEY); + + dsc desc2 = *desc; + desc2.dsc_dtype = dtype_text; + desc2.dsc_length = length; + desc2.dsc_address = ptr; // convert to an international byte array + dsc to; to.dsc_dtype = dtype_text; to.dsc_flags = 0; to.dsc_sub_type = 0; @@ -2493,10 +2501,9 @@ static void compress(thread_db* tdbb, to.dsc_ttype() = ttype_sort_key; to.dsc_length = MIN(MAX_KEY, sizeof(buffer)); ptr = to.dsc_address = reinterpret_cast(buffer.vary_string); - length = INTL_string_to_key(tdbb, itype, desc, &to, key_type); + + length = INTL_string_to_key(tdbb, itype, &desc2, &to, key_type, !avoidTrailingTrim); } - else - length = MOV_get_string(tdbb, desc, &ptr, &buffer, MAX_KEY); if (length) { @@ -2515,23 +2522,27 @@ static void compress(thread_db* tdbb, memcpy(p, ptr, length); p += length; + + if (itype != idx_string) + { + while (p > key->key_data) + { + if (*--p != pad) + { + ++p; + break; + } + } + } } else { // Leave key_empty flag, because the string is an empty string if (descending && ((pad == desc_end_value_prefix) || (pad == desc_end_value_check))) *p++ = desc_end_value_prefix; - - *p++ = pad; } - while (p > key->key_data) - { - if (*--p != pad) - break; - } - - key->key_length = p + 1 - key->key_data; + key->key_length = p - key->key_data; return; } @@ -3443,9 +3454,9 @@ static ULONG fast_load(thread_db* tdbb, // Detect the case when set of duplicate keys contains more then one key // from primary record version. It breaks the unique constraint and must - // be rejected. Note, it is not always could be detected while sorting. - // Set to true when primary record version is found in current set of - // duplicate keys. + // be rejected. Note, it is not always could be detected while sorting. + // Set to true when primary record version is found in current set of + // duplicate keys. bool primarySeen = false; while (!error) @@ -6405,7 +6416,7 @@ static bool scan(thread_db* tdbb, UCHAR* pointer, RecordBitmap** bitmap, RecordB break; // node contains more bytes than a key, check numbers - // of last key segment and current node segment. + // of last key segment and current node segment. fb_assert(!descending); fb_assert(p - STUFF_COUNT - 1 >= key->key_data); @@ -6415,7 +6426,7 @@ static bool scan(thread_db* tdbb, UCHAR* pointer, RecordBitmap** bitmap, RecordB fb_assert(keySeg <= nodeSeg); - // If current segment at node is the same as last segment + // If current segment at node is the same as last segment // of the key then node > key. if (keySeg == nodeSeg) return false; diff --git a/src/jrd/btr_proto.h b/src/jrd/btr_proto.h index 71a38586fa..9bfa9a658e 100644 --- a/src/jrd/btr_proto.h +++ b/src/jrd/btr_proto.h @@ -46,7 +46,7 @@ USHORT BTR_key_length(Jrd::thread_db*, Jrd::jrd_rel*, Jrd::index_desc*); Ods::btree_page* BTR_left_handoff(Jrd::thread_db*, Jrd::win*, Ods::btree_page*, SSHORT); bool BTR_lookup(Jrd::thread_db*, Jrd::jrd_rel*, USHORT, Jrd::index_desc*, Jrd::RelationPages*); Jrd::idx_e BTR_make_key(Jrd::thread_db*, USHORT, const Jrd::ValueExprNode* const*, const Jrd::index_desc*, - Jrd::temporary_key*, bool); + Jrd::temporary_key*, bool, bool); void BTR_make_null_key(Jrd::thread_db*, const Jrd::index_desc*, Jrd::temporary_key*); bool BTR_next_index(Jrd::thread_db*, Jrd::jrd_rel*, Jrd::jrd_tra*, Jrd::index_desc*, Jrd::win*); void BTR_remove(Jrd::thread_db*, Jrd::win*, Jrd::index_insertion*); diff --git a/src/jrd/intl.cpp b/src/jrd/intl.cpp index c9df5cd402..7bf128ea01 100644 --- a/src/jrd/intl.cpp +++ b/src/jrd/intl.cpp @@ -1207,7 +1207,8 @@ USHORT INTL_string_to_key(thread_db* tdbb, USHORT idxType, const dsc* pString, DSC* pByte, - USHORT key_type) + USHORT key_type, + bool trimTrailing) { /************************************** * @@ -1251,8 +1252,14 @@ USHORT INTL_string_to_key(thread_db* tdbb, ttype = ttype_metadata; break; default: - pad_char = 0; ttype = INTL_INDEX_TO_TEXT(idxType); + + if (trimTrailing) + { + auto charSet = INTL_charset_lookup(tdbb, ttype); + pad_char = *charSet->getSpace(); + } + break; } @@ -1262,6 +1269,19 @@ USHORT INTL_string_to_key(thread_db* tdbb, UCHAR* src; USHORT len = MOV_make_string2(tdbb, pString, ttype, &src, temp); + if (trimTrailing && len) + { + const UCHAR* end = src + len; + + while (--end >= src) + { + if (*end != pad_char) + break; + } + + len = end + 1 - src; + } + USHORT outlen; UCHAR* dest = pByte->dsc_address; USHORT destLen = pByte->dsc_length; @@ -1274,15 +1294,7 @@ USHORT INTL_string_to_key(thread_db* tdbb, case ttype_none: while (len-- && destLen-- > 0) *dest++ = *src++; - // strip off ending pad characters - while (dest > pByte->dsc_address) - { - if (*(dest - 1) == pad_char) - dest--; - else - break; - } - outlen = (dest - pByte->dsc_address); + outlen = dest - pByte->dsc_address; break; default: TextType* obj = INTL_texttype_lookup(tdbb, ttype); diff --git a/src/jrd/intl_builtin.cpp b/src/jrd/intl_builtin.cpp index da94a9ac10..d6842dd3ca 100644 --- a/src/jrd/intl_builtin.cpp +++ b/src/jrd/intl_builtin.cpp @@ -476,22 +476,11 @@ static USHORT internal_string_to_key(texttype* obj, **************************************/ const UCHAR* const pStart = dest; const UCHAR pad_char = static_cast(obj->texttype_impl)->texttype_pad_char; + while (inLen-- && outLen--) *dest++ = *src++; - if (obj->texttype_pad_option) - { - // strip off ending pad characters - while (dest > pStart) - { - if (*(dest - 1) == pad_char) - dest--; - else - break; - } - } - - return (dest - pStart); + return dest - pStart; } static SSHORT internal_compare(texttype* obj, @@ -508,56 +497,56 @@ static SSHORT internal_compare(texttype* obj, * **************************************/ const UCHAR pad = static_cast(obj->texttype_impl)->texttype_pad_char; - SLONG fill = length1 - length2; + + int fill = length1 - length2; + if (length1 >= length2) { if (length2) { - do { + do + { if (*p1++ != *p2++) - { - if (p1[-1] > p2[-1]) - return 1; - return -1; - } + return (p1[-1] > p2[-1]) ? 1 : -1; } while (--length2); } + if (fill > 0) { - do { - if (!obj->texttype_pad_option || *p1++ != pad) + if (obj->texttype_pad_option) + { + do { - if (p1[-1] > pad) - return 1; - return -1; - } - } while (--fill); + if (*p1++ != pad) + return (p1[-1] > pad) ? 1 : -1; + } while (--fill); + } + else + return 1; } + return 0; } if (length1) { - do { + do + { if (*p1++ != *p2++) - { - if (p1[-1] > p2[-1]) - return 1; - return -1; - } + return (p1[-1] > p2[-1]) ? 1 : -1; } while (--length1); } - do { - if (!obj->texttype_pad_option || *p2++ != pad) + if (obj->texttype_pad_option) + { + do { - if (pad > p2[-1]) - return 1; - return -1; - } - } while (++fill); + if (*p2++ != pad) + return (pad > p2[-1]) ? 1 : -1; + } while (++fill); + } - return 0; + return fill ? -1 : 0; } @@ -638,19 +627,6 @@ static USHORT utf16_keylength(texttype* /*obj*/, USHORT len) return UnicodeUtil::utf16KeyLength(len); } -namespace { -template -void padUtf16(const USHORT* text, U& len) -{ - fb_assert(len % sizeof(USHORT) == 0); - for (; len > 0; len -= sizeof(USHORT)) - { - if (text[len / sizeof(USHORT) - 1] != 32) - break; - } -} -} //namespace - static USHORT utf16_string_to_key(texttype* obj, USHORT srcLen, const UCHAR* src, @@ -672,11 +648,6 @@ static USHORT utf16_string_to_key(texttype* obj, Firebird::Aligner alSrc(src, srcLen); - if (obj->texttype_pad_option) - { - padUtf16(alSrc, srcLen); - } - return UnicodeUtil::utf16ToKey(srcLen, alSrc, dstLen, dst); } @@ -703,13 +674,7 @@ static SSHORT utf16_compare(texttype* obj, Firebird::Aligner al1(str1, len1); Firebird::Aligner al2(str2, len2); - if (obj->texttype_pad_option) - { - padUtf16(al1, len1); - padUtf16(al2, len2); - } - - return UnicodeUtil::utf16Compare(len1, al1, len2, al2, error_flag); + return UnicodeUtil::utf16Compare(len1, al1, len2, al2, obj->texttype_pad_option, error_flag); } static ULONG utf16_upper(texttype* obj, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst) @@ -792,11 +757,6 @@ static USHORT utf32_string_to_key(texttype* obj, dstLen, utf16Str.getBuffer(dstLen / sizeof(USHORT) + 1), &err_code, &err_position); const USHORT* s = utf16Str.begin(); - if (obj->texttype_pad_option) - { - padUtf16(s, sLen); - } - return UnicodeUtil::utf16ToKey(sLen, s, dstLen, dst); } diff --git a/src/jrd/intl_proto.h b/src/jrd/intl_proto.h index 58c3b479f8..da4ac45f32 100644 --- a/src/jrd/intl_proto.h +++ b/src/jrd/intl_proto.h @@ -52,7 +52,7 @@ Jrd::Collation* INTL_texttype_lookup(Jrd::thread_db* tdbb, USHORT parm1); void INTL_texttype_unload(Jrd::thread_db*, USHORT); bool INTL_texttype_validate(Jrd::thread_db*, const SubtypeInfo*); void INTL_pad_spaces(Jrd::thread_db*, dsc*, UCHAR*, ULONG); -USHORT INTL_string_to_key(Jrd::thread_db*, USHORT, const dsc*, dsc*, USHORT); +USHORT INTL_string_to_key(Jrd::thread_db*, USHORT, const dsc*, dsc*, USHORT, bool trimTrailing = false); // Built-in charsets/texttypes interface INTL_BOOL INTL_builtin_lookup_charset(charset* cs, const ASCII* charset_name, const ASCII* config_info);