mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-23 22:03:03 +01:00
UNICODE_CI_AI (case-/accent- insensitive) collation - CORE-824
This commit is contained in:
parent
fdf10df998
commit
9ffe4b124f
@ -231,7 +231,11 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
||||
{CS_UTF8, 0, "UTF8", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_UTF8, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_UTF8, 2, "UNICODE", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_UTF8, 3, "UNICODE_CI", "UNICODE", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
||||
{CS_UTF8, 3, "UNICODE_CI", "UNICODE",
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
||||
{CS_UTF8, 4, "UNICODE_CI_AI", "UNICODE",
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
NULL},
|
||||
#ifdef FB_NEW_INTL_ALLOW_NOT_READY
|
||||
{CS_UTF16, 0, "UTF16", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_UTF16, 1, "UCS_BASIC", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
@ -283,9 +287,15 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
||||
{CS_ISO8859_1, 12, "EN_UK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_ISO8859_1, 14, "EN_US", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_ISO8859_1, 15, "PT_PT", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_ISO8859_1, 16, "PT_BR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
||||
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
|
||||
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR", TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, "SPECIALS-FIRST=1"},
|
||||
{CS_ISO8859_1, 16, "PT_BR", NULL,
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
NULL},
|
||||
{CS_ISO8859_1, 17, "ES_ES_CI_AI", NULL,
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
"DISABLE-COMPRESSIONS=1;SPECIALS-FIRST=1"},
|
||||
{CS_ISO8859_1, 18, "FR_FR_CI_AI", "FR_FR",
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
"SPECIALS-FIRST=1"},
|
||||
{CS_ISO8859_2, 0, "ISO8859_2", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_ISO8859_2, 1, "CS_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_ISO8859_2, 2, "ISO_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
@ -332,8 +342,10 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
||||
{CS_WIN1250, 4, "PXW_SLOV", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1250, 5, "PXW_HUN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1250, 6, "BS_BA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, 0},
|
||||
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
||||
{CS_WIN1250, 7, "WIN_CZ", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE, NULL},
|
||||
{CS_WIN1250, 8, "WIN_CZ_CI_AI", NULL,
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
NULL},
|
||||
{CS_WIN1251, 0, "WIN1251", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1251, 1, "PXW_CYRL", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1251, 2, "WIN1251_UA", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
@ -343,7 +355,9 @@ const IntlManager::CollationDefinition IntlManager::defaultCollations[] = {
|
||||
{CS_WIN1252, 3, "PXW_NORDAN4", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1252, 4, "PXW_SPAN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1252, 5, "PXW_SWEDFIN", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1252, 6, "WIN_PTBR", NULL, TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE, 0},
|
||||
{CS_WIN1252, 6, "WIN_PTBR", NULL,
|
||||
TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE,
|
||||
NULL},
|
||||
{CS_WIN1253, 0, "WIN1253", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1253, 1, "PXW_GREEK", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
{CS_WIN1254, 0, "WIN1254", NULL, TEXTTYPE_ATTR_PAD_SPACE, NULL},
|
||||
|
@ -37,7 +37,7 @@
|
||||
#include "../common/classes/objects_array.h"
|
||||
#include "../common/classes/rwlock.h"
|
||||
#include "unicode/ustring.h"
|
||||
///#include "unicode/utrans.h"
|
||||
#include "unicode/utrans.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/ucol.h"
|
||||
@ -931,7 +931,10 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
|
||||
if (error)
|
||||
return NULL;
|
||||
|
||||
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE)) ||
|
||||
if ((attributes & ~(TEXTTYPE_ATTR_PAD_SPACE | TEXTTYPE_ATTR_CASE_INSENSITIVE |
|
||||
TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ||
|
||||
(attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE) ==
|
||||
TEXTTYPE_ATTR_ACCENT_INSENSITIVE) ||
|
||||
(specificAttributes.count() - attributeCount) != 0)
|
||||
{
|
||||
return NULL;
|
||||
@ -969,7 +972,14 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
|
||||
|
||||
icu->ucolSetAttribute(partialCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
|
||||
|
||||
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
||||
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
||||
{
|
||||
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
|
||||
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
||||
tt->texttype_canonical_width = 4; // UTF-32
|
||||
}
|
||||
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||
{
|
||||
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status);
|
||||
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
||||
@ -1144,7 +1154,37 @@ ULONG UnicodeUtil::Utf16Collation::canonical(ULONG srcLen, const USHORT* src, UL
|
||||
|
||||
HalfStaticArray<USHORT, BUFFER_SMALL / 2> upperStr;
|
||||
|
||||
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
||||
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
||||
{
|
||||
fb_assert(srcLen % sizeof(*src) == 0);
|
||||
|
||||
memcpy(upperStr.getBuffer(srcLen / sizeof(USHORT)), src, srcLen);
|
||||
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
UTransliterator* trans = utrans_open("Any-Upper; NFD; [:Nonspacing Mark:] Remove; NFC",
|
||||
UTRANS_FORWARD, NULL, 0, NULL, &errorCode);
|
||||
|
||||
if (errorCode <= 0)
|
||||
{
|
||||
int32_t capacity = dstLen;
|
||||
int32_t len = srcLen / sizeof(USHORT);
|
||||
int32_t limit = len;
|
||||
|
||||
utrans_transUChars(trans, upperStr.begin(), &len, capacity, 0, &limit, &errorCode);
|
||||
utrans_close(trans);
|
||||
|
||||
len *= sizeof(USHORT);
|
||||
if (len > dstLen)
|
||||
len = INTL_BAD_STR_LENGTH;
|
||||
|
||||
srcLen = len;
|
||||
src = upperStr.begin();
|
||||
}
|
||||
else
|
||||
return INTL_BAD_STR_LENGTH;
|
||||
}
|
||||
else if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
||||
{
|
||||
srcLen = utf16UpperCase(srcLen, src,
|
||||
srcLen, upperStr.getBuffer(srcLen / sizeof(USHORT)), exceptions);
|
||||
|
Loading…
Reference in New Issue
Block a user