diff --git a/src/common/unicode_util.cpp b/src/common/unicode_util.cpp index eeda6b0bc5..ed9a5d29d7 100644 --- a/src/common/unicode_util.cpp +++ b/src/common/unicode_util.cpp @@ -1336,10 +1336,6 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create( icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status); } - USet* contractions = icu->usetOpen(0, 0); - // status not verified here. - icu->ucolGetContractions(partialCollator, contractions, &status); - Utf16Collation* obj = FB_NEW Utf16Collation(); obj->icu = icu; obj->tt = tt; @@ -1347,9 +1343,39 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create( obj->compareCollator = compareCollator; obj->partialCollator = partialCollator; obj->sortCollator = sortCollator; - obj->contractions = contractions; - obj->contractionsCount = icu->usetGetItemCount(contractions); obj->numericSort = isNumericSort; + obj->maxContractionsPrefixLength = 0; + + USet* contractions = icu->usetOpen(1, 0); + // status not verified here. + icu->ucolGetContractions(partialCollator, contractions, &status); + + int contractionsCount = icu->usetGetItemCount(contractions); + + for (int contractionIndex = 0; contractionIndex < contractionsCount; ++contractionIndex) + { + UChar str[10]; + UChar32 start, end; + + status = U_ZERO_ERROR; + int len = icu->usetGetItem(contractions, contractionIndex, &start, &end, str, sizeof(str), &status); + + if (len >= 2) + { + obj->maxContractionsPrefixLength = len - 1 > obj->maxContractionsPrefixLength ? + len - 1 : obj->maxContractionsPrefixLength; + + for (int currentLen = 1; currentLen < len; ++currentLen) + { + string s(reinterpret_cast(str), currentLen * 2); + + if (!obj->contractionsPrefix.exist(s)) + obj->contractionsPrefix.push(s); + } + } + } + + icu->usetClose(contractions); return obj; } @@ -1357,8 +1383,6 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create( UnicodeUtil::Utf16Collation::~Utf16Collation() { - icu->usetClose(contractions); - icu->ucolClose(compareCollator); icu->ucolClose(partialCollator); icu->ucolClose(sortCollator); @@ -1409,28 +1433,16 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src switch (key_type) { case INTL_KEY_PARTIAL: - { coll = partialCollator; // Remove last bytes of key if they are start of a contraction // to correctly find in the index. - ConversionICU& cIcu(getConversionICU()); - for (int i = 0; i < contractionsCount; ++i) + + for (int i = MIN(maxContractionsPrefixLength, srcLenLong); i > 0; --i) { - UChar str[10]; - UErrorCode status = U_ZERO_ERROR; - int len = icu->usetGetItem(contractions, i, NULL, NULL, str, sizeof(str), &status); - - if (len > SLONG(srcLenLong)) - len = srcLenLong; - else - --len; - - // safe cast - alignment not changed - if (cIcu.u_strCompare(str, len, - reinterpret_cast(src) + srcLenLong - len, len, true) == 0) + if (contractionsPrefix.exist(string(reinterpret_cast(src + srcLenLong - i), i * 2))) { - srcLenLong -= len; + srcLenLong -= i; break; } } @@ -1451,7 +1463,6 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src } break; - } case INTL_KEY_UNIQUE: coll = compareCollator; diff --git a/src/common/unicode_util.h b/src/common/unicode_util.h index 03d48f9419..c5940399f1 100644 --- a/src/common/unicode_util.h +++ b/src/common/unicode_util.h @@ -31,6 +31,7 @@ #include "../common/IntlUtil.h" #include "../common/os/mod_loader.h" #include "../common/classes/fb_string.h" +#include "../common/classes/objects_array.h" #undef U_SHOW_CPLUSPLUS_API #define U_SHOW_CPLUSPLUS_API 0 @@ -168,6 +169,11 @@ public: Firebird::IntlUtil::SpecificAttributesMap& specificAttributes, const Firebird::string& configInfo); + Utf16Collation() + : contractionsPrefix(*getDefaultMemoryPool()) + { + } + ~Utf16Collation(); USHORT keyLength(USHORT len) const; @@ -190,8 +196,8 @@ public: UCollator* compareCollator; UCollator* partialCollator; UCollator* sortCollator; - USet* contractions; - int contractionsCount; + Firebird::SortedObjectsArray contractionsPrefix; // UTF-16 string + unsigned maxContractionsPrefixLength; // number of characters bool numericSort; };