mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-24 06:03:02 +01:00
Reimplementation of CORE-824 for ICU 3.0
This commit is contained in:
parent
8dcb423be3
commit
840ff6f0ac
@ -231,7 +231,11 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
|||||||
{CS_UTF8, 0, "UTF8", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_UTF8, 0, "UTF8", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_UTF8, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_UTF8, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_UTF8, 2, "UNICODE", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_UTF8, 2, "UNICODE", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_UTF8, 3, "UNICODE_CI", "UNICODE", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
{CS_UTF8, 3, "UNICODE_CI", "UNICODE",
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
||||||
|
{CS_UTF8, 4, "UNICODE_CI_AI", "UNICODE",
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
|
NULL},
|
||||||
#ifdef FB_NEW_INTL_ALLOW_NOT_READY
|
#ifdef FB_NEW_INTL_ALLOW_NOT_READY
|
||||||
{CS_UTF16, 0, "UTF16", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_UTF16, 0, "UTF16", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_UTF16, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_UTF16, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
@ -283,9 +287,15 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
|||||||
{CS_ISO8859_1, 12, "EN_UK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_1, 12, "EN_UK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_ISO8859_1, 14, "EN_US", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_1, 14, "EN_US", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_ISO8859_1, 15, "PT_PT", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_1, 15, "PT_PT", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_ISO8859_1, 16, "PT_BR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
{CS_ISO8859_1, 16, "PT_BR", NULL,
|
||||||
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "SPECIALS-FIRST=1"},
|
NULL},
|
||||||
|
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL,
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
|
"DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
|
||||||
|
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR",
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
|
"SPECIALS-FIRST=1"},
|
||||||
{CS_ISO8859_2, 0, "ISO8859_2", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_2, 0, "ISO8859_2", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_ISO8859_2, 1, "CS_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_2, 1, "CS_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_ISO8859_2, 2, "ISO_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_ISO8859_2, 2, "ISO_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
@ -332,8 +342,10 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
|||||||
{CS_WIN1250, 4, "PXW_SLOV", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1250, 4, "PXW_SLOV", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1250, 5, "PXW_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1250, 5, "PXW_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1250, 6, "BS_BA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1250, 6, "BS_BA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, 0},
|
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
||||||
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL,
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
|
NULL},
|
||||||
{CS_WIN1251, 0, "WIN1251", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1251, 0, "WIN1251", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1251, 1, "PXW_CYRL", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1251, 1, "PXW_CYRL", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1251, 2, "WIN1251_UA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1251, 2, "WIN1251_UA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
@ -343,7 +355,9 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
|||||||
{CS_WIN1252, 3, "PXW_NORDAN4", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1252, 3, "PXW_NORDAN4", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1252, 4, "PXW_SPAN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1252, 4, "PXW_SPAN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1252, 5, "PXW_SWEDFIN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1252, 5, "PXW_SWEDFIN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1252, 6, "WIN_PTBR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
{CS_WIN1252, 6, "WIN_PTBR", NULL,
|
||||||
|
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||||
|
NULL},
|
||||||
{CS_WIN1253, 0, "WIN1253", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1253, 0, "WIN1253", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1253, 1, "PXW_GREEK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1253, 1, "PXW_GREEK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
{CS_WIN1254, 0, "WIN1254", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
{CS_WIN1254, 0, "WIN1254", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
#include "../common/classes/objects_array.h"
|
#include "../common/classes/objects_array.h"
|
||||||
#include "../common/classes/rwlock.h"
|
#include "../common/classes/rwlock.h"
|
||||||
#include "unicode/ustring.h"
|
#include "unicode/ustring.h"
|
||||||
///#include "unicode/utrans.h"
|
#include "unicode/utrans.h"
|
||||||
#include "unicode/uchar.h"
|
#include "unicode/uchar.h"
|
||||||
#include "unicode/ucnv.h"
|
#include "unicode/ucnv.h"
|
||||||
#include "unicode/ucol.h"
|
#include "unicode/ucol.h"
|
||||||
@ -94,6 +94,23 @@ public:
|
|||||||
UCollationResult (U_EXPORT2 *ucolStrColl)(const UCollator* coll, const UChar* source,
|
UCollationResult (U_EXPORT2 *ucolStrColl)(const UCollator* coll, const UChar* source,
|
||||||
int32_t sourceLength, const UChar* target, int32_t targetLength);
|
int32_t sourceLength, const UChar* target, int32_t targetLength);
|
||||||
void (U_EXPORT2 *ucolGetVersion)(const UCollator* coll, UVersionInfo info);
|
void (U_EXPORT2 *ucolGetVersion)(const UCollator* coll, UVersionInfo info);
|
||||||
|
|
||||||
|
void (U_EXPORT2 *utransClose)(UTransliterator* trans);
|
||||||
|
UTransliterator* (U_EXPORT2 *utransOpen)(
|
||||||
|
const char* id,
|
||||||
|
UTransDirection dir,
|
||||||
|
const UChar* rules, /* may be Null */
|
||||||
|
int32_t rulesLength, /* -1 if null-terminated */
|
||||||
|
UParseError* parseError, /* may be Null */
|
||||||
|
UErrorCode* status);
|
||||||
|
void (U_EXPORT2 *utransTransUChars)(
|
||||||
|
const UTransliterator* trans,
|
||||||
|
UChar* text,
|
||||||
|
int32_t* textLength,
|
||||||
|
int32_t textCapacity,
|
||||||
|
int32_t start,
|
||||||
|
int32_t* limit,
|
||||||
|
UErrorCode* status);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -845,6 +862,15 @@ UnicodeUtil::ICU* UnicodeUtil::loadICU(const Firebird::string& icuVersion,
|
|||||||
symbol.printf("ucol_getVersion_%s_%s", majorVersion.c_str(), minorVersion.c_str());
|
symbol.printf("ucol_getVersion_%s_%s", majorVersion.c_str(), minorVersion.c_str());
|
||||||
icu->inModule->findSymbol(symbol, icu->ucolGetVersion);
|
icu->inModule->findSymbol(symbol, icu->ucolGetVersion);
|
||||||
|
|
||||||
|
symbol.printf("utrans_open_%s_%s", majorVersion.c_str(), minorVersion.c_str());
|
||||||
|
icu->inModule->findSymbol(symbol, icu->utransOpen);
|
||||||
|
|
||||||
|
symbol.printf("utrans_close_%s_%s", majorVersion.c_str(), minorVersion.c_str());
|
||||||
|
icu->inModule->findSymbol(symbol, icu->utransClose);
|
||||||
|
|
||||||
|
symbol.printf("utrans_transUChars_%s_%s", majorVersion.c_str(), minorVersion.c_str());
|
||||||
|
icu->inModule->findSymbol(symbol, icu->utransTransUChars);
|
||||||
|
|
||||||
if (!icu->uVersionToString || !icu->ulocCountAvailable || !icu->ulocGetAvailable ||
|
if (!icu->uVersionToString || !icu->ulocCountAvailable || !icu->ulocGetAvailable ||
|
||||||
!icu->usetClose || !icu->usetGetItem || !icu->usetGetItemCount || !icu->usetOpen ||
|
!icu->usetClose || !icu->usetGetItem || !icu->usetGetItemCount || !icu->usetOpen ||
|
||||||
!icu->ucolClose || !icu->ucolGetContractions || !icu->ucolGetSortKey || !icu->ucolOpen ||
|
!icu->ucolClose || !icu->ucolGetContractions || !icu->ucolGetSortKey || !icu->ucolOpen ||
|
||||||
@ -931,7 +957,10 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
|
|||||||
if (error)
|
if (error)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE)) ||
|
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE |
|
||||||
|
TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ||
|
||||||
|
((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
||||||
|
TEXTTYPE_ATTR_ACCENT_INSENSITIVE) ||
|
||||||
(specificAttributes.count() - attributeCount) != 0)
|
(specificAttributes.count() - attributeCount) != 0)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -969,7 +998,14 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
|
|||||||
|
|
||||||
icu->ucolSetAttribute(partialCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
|
icu->ucolSetAttribute(partialCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
|
||||||
|
|
||||||
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
||||||
|
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
||||||
|
{
|
||||||
|
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
|
||||||
|
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
||||||
|
tt->texttype_canonical_width = 4; // UTF-32
|
||||||
|
}
|
||||||
|
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||||
{
|
{
|
||||||
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status);
|
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status);
|
||||||
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
||||||
@ -1144,7 +1180,38 @@ ULONG UnicodeUtil::Utf16Collation::canonical(ULONG srcLen, const USHORT* src, UL
|
|||||||
|
|
||||||
HalfStaticArray<USHORT, BUFFER_SMALL / 2> upperStr;
|
HalfStaticArray<USHORT, BUFFER_SMALL / 2> upperStr;
|
||||||
|
|
||||||
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
||||||
|
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
||||||
|
{
|
||||||
|
fb_assert(srcLen % sizeof(*src) == 0);
|
||||||
|
|
||||||
|
memcpy(upperStr.getBuffer(srcLen / sizeof(USHORT)), src, srcLen);
|
||||||
|
|
||||||
|
UErrorCode errorCode = U_ZERO_ERROR;
|
||||||
|
UTransliterator* trans = icu->utransOpen("Any-Upper; NFD; [:Nonspacing Mark:] Remove; NFC",
|
||||||
|
UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
|
||||||
|
|
||||||
|
if (errorCode <= 0)
|
||||||
|
{
|
||||||
|
int32_t capacity = dstLen;
|
||||||
|
int32_t len = srcLen / sizeof(USHORT);
|
||||||
|
int32_t limit = len;
|
||||||
|
|
||||||
|
icu->utransTransUChars(trans, reinterpret_cast<UChar*>(upperStr.begin()),
|
||||||
|
&len, capacity, 0, &limit, &errorCode);
|
||||||
|
icu->utransClose(trans);
|
||||||
|
|
||||||
|
len *= sizeof(USHORT);
|
||||||
|
if (len > dstLen)
|
||||||
|
len = INTL_BAD_STR_LENGTH;
|
||||||
|
|
||||||
|
srcLen = len;
|
||||||
|
src = upperStr.begin();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return INTL_BAD_STR_LENGTH;
|
||||||
|
}
|
||||||
|
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||||
{
|
{
|
||||||
srcLen = utf16UpperCase(srcLen, src,
|
srcLen = utf16UpperCase(srcLen, src,
|
||||||
srcLen, upperStr.getBuffer(srcLen / sizeof(USHORT)), exceptions);
|
srcLen, upperStr.getBuffer(srcLen / sizeof(USHORT)), exceptions);
|
||||||
|
Loading…
Reference in New Issue
Block a user