8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-23 22:03:03 +01:00

Reimplementation of CORE-824 for ICU 3.0

This commit is contained in:
asfernandes 2008-02-12 14:47:19 +00:00
parent 8dcb423be3
commit 840ff6f0ac
2 changed files with 92 additions and 11 deletions

View File

@ -231,7 +231,11 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
{CS_UTF8, 0, "UTF8", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_UTF8, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_UTF8, 2, "UNICODE", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_UTF8, 3, "UNICODE_CI", "UNICODE", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
{CS_UTF8, 3, "UNICODE_CI", "UNICODE",
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
{CS_UTF8, 4, "UNICODE_CI_AI", "UNICODE",
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
NULL},
#ifdef FB_NEW_INTL_ALLOW_NOT_READY
{CS_UTF16, 0, "UTF16", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_UTF16, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
@ -283,9 +287,15 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
{CS_ISO8859_1, 12, "EN_UK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_ISO8859_1, 14, "EN_US", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_ISO8859_1, 15, "PT_PT", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_ISO8859_1, 16, "PT_BR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "SPECIALS-FIRST=1"},
{CS_ISO8859_1, 16, "PT_BR", NULL,
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
NULL},
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL,
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
"DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR",
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
"SPECIALS-FIRST=1"},
{CS_ISO8859_2, 0, "ISO8859_2", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_ISO8859_2, 1, "CS_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_ISO8859_2, 2, "ISO_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
@ -332,8 +342,10 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
{CS_WIN1250, 4, "PXW_SLOV", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1250, 5, "PXW_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1250, 6, "BS_BA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, 0},
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL,
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
NULL},
{CS_WIN1251, 0, "WIN1251", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1251, 1, "PXW_CYRL", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1251, 2, "WIN1251_UA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
@ -343,7 +355,9 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
{CS_WIN1252, 3, "PXW_NORDAN4", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1252, 4, "PXW_SPAN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1252, 5, "PXW_SWEDFIN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1252, 6, "WIN_PTBR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
{CS_WIN1252, 6, "WIN_PTBR", NULL,
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
NULL},
{CS_WIN1253, 0, "WIN1253", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1253, 1, "PXW_GREEK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
{CS_WIN1254, 0, "WIN1254", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},

View File

@ -37,7 +37,7 @@
#include "../common/classes/objects_array.h"
#include "../common/classes/rwlock.h"
#include "unicode/ustring.h"
///#include "unicode/utrans.h"
#include "unicode/utrans.h"
#include "unicode/uchar.h"
#include "unicode/ucnv.h"
#include "unicode/ucol.h"
@ -94,6 +94,23 @@ public:
UCollationResult (U_EXPORT2 *ucolStrColl)(const UCollator* coll, const UChar* source,
int32_t sourceLength, const UChar* target, int32_t targetLength);
void (U_EXPORT2 *ucolGetVersion)(const UCollator* coll, UVersionInfo info);
void (U_EXPORT2 *utransClose)(UTransliterator* trans);
UTransliterator* (U_EXPORT2 *utransOpen)(
const char* id,
UTransDirection dir,
const UChar* rules, /* may be Null */
int32_t rulesLength, /* -1 if null-terminated */
UParseError* parseError, /* may be Null */
UErrorCode* status);
void (U_EXPORT2 *utransTransUChars)(
const UTransliterator* trans,
UChar* text,
int32_t* textLength,
int32_t textCapacity,
int32_t start,
int32_t* limit,
UErrorCode* status);
};
@ -845,6 +862,15 @@ UnicodeUtil::ICU* UnicodeUtil::loadICU(const Firebird::string& icuVersion,
symbol.printf("ucol_getVersion_%s_%s", majorVersion.c_str(), minorVersion.c_str());
icu->inModule->findSymbol(symbol, icu->ucolGetVersion);
symbol.printf("utrans_open_%s_%s", majorVersion.c_str(), minorVersion.c_str());
icu->inModule->findSymbol(symbol, icu->utransOpen);
symbol.printf("utrans_close_%s_%s", majorVersion.c_str(), minorVersion.c_str());
icu->inModule->findSymbol(symbol, icu->utransClose);
symbol.printf("utrans_transUChars_%s_%s", majorVersion.c_str(), minorVersion.c_str());
icu->inModule->findSymbol(symbol, icu->utransTransUChars);
if (!icu->uVersionToString || !icu->ulocCountAvailable || !icu->ulocGetAvailable ||
!icu->usetClose || !icu->usetGetItem || !icu->usetGetItemCount || !icu->usetOpen ||
!icu->ucolClose || !icu->ucolGetContractions || !icu->ucolGetSortKey || !icu->ucolOpen ||
@ -931,7 +957,10 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
if (error)
return NULL;
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE)) ||
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE |
TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ||
((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
TEXTTYPE_ATTR_ACCENT_INSENSITIVE) ||
(specificAttributes.count() - attributeCount) != 0)
{
return NULL;
@ -969,7 +998,14 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
icu->ucolSetAttribute(partialCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
{
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
tt->texttype_canonical_width = 4; // UTF-32
}
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
{
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status);
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
@ -1144,7 +1180,38 @@ ULONG UnicodeUtil::Utf16Collation::canonical(ULONG srcLen, const USHORT* src, UL
HalfStaticArray<USHORT, BUFFER_SMALL / 2> upperStr;
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
{
fb_assert(srcLen % sizeof(*src) == 0);
memcpy(upperStr.getBuffer(srcLen / sizeof(USHORT)), src, srcLen);
UErrorCode errorCode = U_ZERO_ERROR;
UTransliterator* trans = icu->utransOpen("Any-Upper; NFD; [:Nonspacing Mark:] Remove; NFC",
UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
if (errorCode <= 0)
{
int32_t capacity = dstLen;
int32_t len = srcLen / sizeof(USHORT);
int32_t limit = len;
icu->utransTransUChars(trans, reinterpret_cast<UChar*>(upperStr.begin()),
&len, capacity, 0, &limit, &errorCode);
icu->utransClose(trans);
len *= sizeof(USHORT);
if (len > dstLen)
len = INTL_BAD_STR_LENGTH;
srcLen = len;
src = upperStr.begin();
}
else
return INTL_BAD_STR_LENGTH;
}
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
{
srcLen = utf16UpperCase(srcLen, src,
srcLen, upperStr.getBuffer(srcLen / sizeof(USHORT)), exceptions);