2001-05-23 15:26:42 +02:00
|
|
|
/*
|
|
|
|
* PROGRAM: InterBase International support
|
2003-11-05 10:02:33 +01:00
|
|
|
* MODULE: lc_narrow.cpp
|
2001-05-23 15:26:42 +02:00
|
|
|
* DESCRIPTION: Common base for Narrow language drivers
|
|
|
|
* (full International collation)
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the Interbase Public
|
|
|
|
* License Version 1.0 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy
|
|
|
|
* of the License at http://www.Inprise.com/IPL.html
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an
|
|
|
|
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
|
|
|
|
* or implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code was created by Inprise Corporation
|
|
|
|
* and its predecessors. Portions created by Inprise Corporation are
|
|
|
|
* Copyright (C) Inprise Corporation.
|
|
|
|
*
|
|
|
|
* All Rights Reserved.
|
|
|
|
* Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
|
2003-02-17 11:37:42 +01:00
|
|
|
#include "firebird.h"
|
2001-05-23 15:26:42 +02:00
|
|
|
#include "../intl/ldcommon.h"
|
2006-10-16 21:17:44 +02:00
|
|
|
#include "../jrd/CharSet.h"
|
|
|
|
#include "../jrd/IntlUtil.h"
|
2003-02-20 16:47:23 +01:00
|
|
|
#include "lc_narrow.h"
|
2003-09-21 01:33:36 +02:00
|
|
|
#include "ld_proto.h"
|
2006-10-16 21:17:44 +02:00
|
|
|
#include <limits.h>
|
2006-12-01 15:36:12 +01:00
|
|
|
#include <math.h>
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
using namespace Firebird;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
static ULONG fam2_str_to_upper(texttype* obj, ULONG iLen, const BYTE* pStr, ULONG iOutLen, BYTE *pOutStr);
|
|
|
|
static ULONG fam2_str_to_lower(texttype* obj, ULONG iLen, const BYTE* pStr, ULONG iOutLen, BYTE *pOutStr);
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
|
2004-05-18 23:58:19 +02:00
|
|
|
const USHORT LANGFAM2_MAX_KEY = MAX_KEY;
|
|
|
|
const BYTE ASCII_SPACE = 32;
|
|
|
|
const UINT16 NULL_WEIGHT = 0;
|
|
|
|
const UINT16 NULL_SECONDARY = 0;
|
|
|
|
const UINT16 NULL_TERTIARY = 0;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* key_length (obj, inLen)
|
|
|
|
*
|
|
|
|
* For an input string of (inLen) bytes, return the maximum
|
|
|
|
* key buffer length.
|
|
|
|
*
|
|
|
|
* This is used for index buffer allocation within the
|
|
|
|
* Engine.
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* Strings containing (m) expand chars need 2*(m)*3
|
|
|
|
* bytes for key values of the expansion. This is
|
|
|
|
* offset by string values that don't have corresponding
|
|
|
|
* secondary or tertiary key values.
|
|
|
|
* Let:
|
|
|
|
* n = length of input string
|
|
|
|
* np = count of "simple" bytes in the string, alphabetic
|
|
|
|
* no secondary and no tertiary.
|
|
|
|
* ns = Has secondary or tertiary, but not both
|
|
|
|
* nt = Has tertiary and secondary.
|
|
|
|
* nc = is a COMPRESSED value.
|
|
|
|
* ne = Has an EXPAND value.
|
|
|
|
* nsp = Is a special value.
|
|
|
|
*
|
|
|
|
* n = np + ns + nt + nc + ne + nsp
|
|
|
|
*
|
|
|
|
* Key_length(n) =
|
|
|
|
* np
|
|
|
|
* + 2 * ns
|
|
|
|
* + 3 * nt
|
|
|
|
* + 3 * (nc/2)
|
|
|
|
* + 3 * 2 * ne
|
|
|
|
* + 2 * nsp
|
|
|
|
* + 1 (if nsp > 0, for separating keys from special keys)
|
|
|
|
*
|
|
|
|
* Clearly this is maximized when the string consists solely of
|
|
|
|
* EXPAND characters. This degenerate case doesn't occur in
|
|
|
|
* standard text usage, except for short strings (1-2 characters).
|
|
|
|
*
|
|
|
|
* Therefore, we compute the keylength based on the "normal" case
|
|
|
|
* of the (nt) term. It is likely we could choose a probabilistic value
|
|
|
|
* (such as 2.5 * n) for the length of the key.
|
|
|
|
*
|
|
|
|
* The degenerate case of short strings is handled by a minimal key
|
|
|
|
* length.
|
|
|
|
*/
|
2006-09-17 22:06:36 +02:00
|
|
|
USHORT LC_NARROW_key_length(texttype* obj, USHORT inLen)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
/* fb_assert (inLen <= LANGFAM2_MAX_KEY); *//* almost certainly an error */
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
// is it the first time key_length is called?
|
|
|
|
if (obj->texttype_impl->texttype_bytes_per_key == 0)
|
|
|
|
{
|
|
|
|
BYTE bytesPerChar = 3;
|
|
|
|
|
|
|
|
// if collation is not multi-level, the weights used is already know
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_non_multi_level)
|
|
|
|
{
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_secondary_insensitive)
|
|
|
|
--bytesPerChar;
|
|
|
|
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_tertiary_insensitive)
|
|
|
|
--bytesPerChar;
|
|
|
|
}
|
|
|
|
else // scan the table to identify what weights are used
|
|
|
|
{
|
|
|
|
bool useSecondary = false;
|
|
|
|
bool useTertiary = false;
|
|
|
|
|
2006-10-16 21:17:44 +02:00
|
|
|
for (int ch = 0; ch <= 255; ++ch)
|
2005-05-28 00:45:31 +02:00
|
|
|
{
|
|
|
|
const SortOrderTblEntry* coll =
|
|
|
|
&((const SortOrderTblEntry*)obj->texttype_impl->texttype_collation_table)[ch];
|
|
|
|
|
|
|
|
if (coll->Secondary != NULL_SECONDARY)
|
|
|
|
useSecondary = true;
|
|
|
|
|
|
|
|
if (coll->Tertiary != NULL_TERTIARY)
|
|
|
|
useTertiary = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!useSecondary)
|
|
|
|
--bytesPerChar;
|
|
|
|
|
|
|
|
if (!useTertiary)
|
|
|
|
--bytesPerChar;
|
|
|
|
}
|
|
|
|
|
|
|
|
obj->texttype_impl->texttype_bytes_per_key = bytesPerChar;
|
|
|
|
}
|
|
|
|
|
2006-12-01 15:36:12 +01:00
|
|
|
USHORT len = obj->texttype_impl->texttype_bytes_per_key * MAX(inLen, 2);
|
|
|
|
|
|
|
|
if (obj->texttype_impl->texttype_expand_table &&
|
|
|
|
((const ExpandChar*) obj->texttype_impl->texttype_expand_table)[0].Ch)
|
|
|
|
{
|
|
|
|
len += (USHORT) log10(inLen + 1.0) * 4 * obj->texttype_impl->texttype_bytes_per_key;
|
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
return (MIN(len, LANGFAM2_MAX_KEY));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
2004-04-29 00:36:29 +02:00
|
|
|
#include <stdio.h>
|
2001-05-23 15:26:42 +02:00
|
|
|
static ULONG do_debug = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
*
|
|
|
|
* Convert a user string to a sequence that will collate bytewise.
|
|
|
|
*
|
|
|
|
* RETURN:
|
|
|
|
* Length, in bytes, of returned key
|
|
|
|
*/
|
2006-09-17 22:06:36 +02:00
|
|
|
USHORT LC_NARROW_string_to_key(texttype* obj, USHORT iInLen, const BYTE* pInChar, USHORT iOutLen, BYTE *pOutChar,
|
2005-05-28 00:45:31 +02:00
|
|
|
USHORT key_type)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(pOutChar != NULL);
|
|
|
|
fb_assert(pInChar != NULL);
|
|
|
|
/* fb_assert (iInLen <= LANGFAM2_MAX_KEY); */
|
|
|
|
fb_assert(iOutLen <= LANGFAM2_MAX_KEY);
|
|
|
|
fb_assert(iOutLen >= LC_NARROW_key_length(obj, iInLen));
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
/* Dump out the input string */
|
|
|
|
if (do_debug) {
|
2004-04-29 00:36:29 +02:00
|
|
|
printf("string: (%02d) '%*s'\n", iInLen, iInLen, pInChar);
|
|
|
|
fflush(stdout);
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
#endif /* DEBUG */
|
|
|
|
|
2004-03-11 06:04:26 +01:00
|
|
|
BYTE* outbuff = pOutChar;
|
|
|
|
USHORT lprimary = 0;
|
|
|
|
USHORT lsecondary = 0;
|
|
|
|
USHORT ltertiary = 0;
|
|
|
|
USHORT lspecial = 0;
|
|
|
|
|
|
|
|
BYTE secondary[LANGFAM2_MAX_KEY];
|
|
|
|
BYTE tertiary[LANGFAM2_MAX_KEY];
|
|
|
|
BYTE special[LANGFAM2_MAX_KEY * 2];
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
/* point inbuff at last character */
|
2004-03-11 06:04:26 +01:00
|
|
|
const BYTE* inbuff = pInChar + iInLen - 1;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
if (obj->texttype_pad_option)
|
|
|
|
{
|
|
|
|
/* skip backwards over all spaces & reset input length */
|
|
|
|
while ((inbuff >= pInChar) && (*inbuff == ASCII_SPACE))
|
|
|
|
inbuff--;
|
|
|
|
}
|
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
iInLen = (inbuff - pInChar + 1);
|
2004-03-11 06:04:26 +01:00
|
|
|
|
|
|
|
USHORT i;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
for (i = 0; i < iInLen; i++, pInChar++) {
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(lprimary < iOutLen);
|
|
|
|
fb_assert(lsecondary < sizeof(secondary));
|
|
|
|
fb_assert(ltertiary < sizeof(tertiary));
|
|
|
|
fb_assert(lspecial < sizeof(special));
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2004-03-11 06:04:26 +01:00
|
|
|
const SortOrderTblEntry* coll =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[*pInChar];
|
|
|
|
if (!(coll->IsExpand || coll->IsCompress)) {
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Primary != NULL_WEIGHT && lprimary < iOutLen)
|
2006-10-16 21:17:44 +02:00
|
|
|
outbuff[lprimary++] = coll->Primary + obj->texttype_impl->primary_sum;
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Secondary != NULL_SECONDARY && lsecondary < sizeof(secondary))
|
2001-05-23 15:26:42 +02:00
|
|
|
secondary[lsecondary++] = coll->Secondary;
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Tertiary != NULL_TERTIARY && ltertiary < sizeof(tertiary))
|
2001-05-23 15:26:42 +02:00
|
|
|
tertiary[ltertiary++] = coll->Tertiary;
|
|
|
|
}
|
|
|
|
else if (coll->IsExpand && coll->IsCompress) {
|
|
|
|
/* Both flags set indicate a special value */
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_specials_first)
|
2004-03-11 06:04:26 +01:00
|
|
|
{
|
2006-10-16 21:17:44 +02:00
|
|
|
if (coll->Primary != NULL_WEIGHT && lprimary < iOutLen)
|
|
|
|
outbuff[lprimary++] = coll->Primary + obj->texttype_impl->ignore_sum;
|
|
|
|
if (coll->Secondary != NULL_SECONDARY && lsecondary < sizeof(secondary))
|
|
|
|
secondary[lsecondary++] = coll->Secondary;
|
|
|
|
if (coll->Tertiary != NULL_TERTIARY && ltertiary < sizeof(tertiary))
|
|
|
|
tertiary[ltertiary++] = coll->Tertiary;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ((coll->Primary != NULL_WEIGHT) &&
|
|
|
|
!(obj->texttype_impl->texttype_flags & TEXTTYPE_ignore_specials) &&
|
|
|
|
lspecial + 1 < sizeof(special))
|
|
|
|
{
|
|
|
|
special[lspecial++] = (i + 1); /* position */
|
|
|
|
special[lspecial++] = coll->Primary;
|
|
|
|
}
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
|
|
|
else if (coll->IsExpand) {
|
2005-05-28 00:45:31 +02:00
|
|
|
const ExpandChar* exp = &((const ExpandChar*) obj->texttype_impl->texttype_expand_table)[0];
|
2001-05-23 15:26:42 +02:00
|
|
|
while (exp->Ch && exp->Ch != *pInChar)
|
|
|
|
exp++;
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(exp->Ch == *pInChar);
|
2004-03-11 06:04:26 +01:00
|
|
|
for (int j = 0; j < 2; j++) {
|
2001-05-23 15:26:42 +02:00
|
|
|
if (j)
|
|
|
|
coll =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[exp->ExpCh2];
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Primary != NULL_WEIGHT && lprimary < iOutLen)
|
2001-05-23 15:26:42 +02:00
|
|
|
outbuff[lprimary++] = coll->Primary;
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Secondary != NULL_SECONDARY && lsecondary < sizeof(secondary))
|
2001-05-23 15:26:42 +02:00
|
|
|
secondary[lsecondary++] = coll->Secondary;
|
2005-05-28 00:45:31 +02:00
|
|
|
if (coll->Tertiary != NULL_TERTIARY && ltertiary < sizeof(tertiary))
|
2001-05-23 15:26:42 +02:00
|
|
|
tertiary[ltertiary++] = coll->Tertiary;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else { /* (col->IsCompress) */
|
2005-05-28 00:45:31 +02:00
|
|
|
bool complete = (USHORT) (i + 1) < iInLen;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
if (complete) {
|
2004-03-11 06:04:26 +01:00
|
|
|
const CompressPair* cmp =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const CompressPair*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_compress_table)[0];
|
|
|
|
while (cmp->CharPair[0]) {
|
|
|
|
if ((cmp->CharPair[0] == *pInChar) &&
|
2004-03-11 06:04:26 +01:00
|
|
|
(cmp->CharPair[1] == *(pInChar + 1)))
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
/* Gobble the two-to-1 entry */
|
|
|
|
coll = &cmp->NoCaseWeight;
|
|
|
|
pInChar++;
|
|
|
|
i++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cmp++;
|
|
|
|
}
|
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
// ASF: If key_type == INTL_KEY_PARTIAL and the last CompressPair
|
|
|
|
// isn't complete, don't put the byte in the sortkey. If we put,
|
|
|
|
// incorrect results occur when using index.
|
|
|
|
if (key_type != INTL_KEY_PARTIAL || complete)
|
|
|
|
{
|
|
|
|
if (coll->Primary != NULL_WEIGHT && lprimary < iOutLen)
|
|
|
|
outbuff[lprimary++] = coll->Primary;
|
|
|
|
if (coll->Secondary != NULL_SECONDARY && lsecondary < sizeof(secondary))
|
|
|
|
secondary[lsecondary++] = coll->Secondary;
|
|
|
|
if (coll->Tertiary != NULL_TERTIARY && ltertiary < sizeof(tertiary))
|
|
|
|
tertiary[ltertiary++] = coll->Tertiary;
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
/* primary keys are already in output key */
|
|
|
|
|
|
|
|
outbuff += lprimary;
|
|
|
|
iOutLen -= lprimary;
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
if (key_type == INTL_KEY_PARTIAL)
|
2001-05-23 15:26:42 +02:00
|
|
|
/* return length of key */
|
|
|
|
return (outbuff - pOutChar);
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
bool useLevel = !(obj->texttype_impl->texttype_flags & TEXTTYPE_secondary_insensitive);
|
|
|
|
|
|
|
|
if (!(obj->texttype_impl->texttype_flags & TEXTTYPE_non_multi_level)) // multi-level
|
|
|
|
{
|
|
|
|
if (key_type == INTL_KEY_SORT)
|
|
|
|
useLevel = true;
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
if (useLevel)
|
|
|
|
{
|
|
|
|
/* put secondary keys into output key */
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_reverse_secondary) {
|
|
|
|
for (i = 0; i < lsecondary && iOutLen; i++) {
|
|
|
|
*outbuff++ = secondary[lsecondary - i - 1];
|
|
|
|
iOutLen--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (i = 0; i < lsecondary && iOutLen; i++) {
|
|
|
|
*outbuff++ = secondary[i];
|
|
|
|
iOutLen--;
|
|
|
|
}
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
useLevel = !(obj->texttype_impl->texttype_flags & TEXTTYPE_tertiary_insensitive);
|
|
|
|
|
|
|
|
if (!(obj->texttype_impl->texttype_flags & TEXTTYPE_non_multi_level)) // multi-level
|
|
|
|
{
|
|
|
|
if (key_type == INTL_KEY_SORT)
|
|
|
|
useLevel = true;
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
if (useLevel)
|
2004-03-11 06:04:26 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
/* put tertiary keys into output key */
|
|
|
|
for (i = 0; i < ltertiary && iOutLen; i++) {
|
|
|
|
*outbuff++ = tertiary[i];
|
2001-05-23 15:26:42 +02:00
|
|
|
iOutLen--;
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-07-30 20:00:56 +02:00
|
|
|
/* put special keys into output key */
|
|
|
|
if ((lspecial && iOutLen) &&
|
|
|
|
!(obj->texttype_impl->texttype_flags & TEXTTYPE_ignore_specials))
|
2005-05-28 00:45:31 +02:00
|
|
|
{
|
2005-07-30 20:00:56 +02:00
|
|
|
/* Insert the marker-byte */
|
|
|
|
*outbuff++ = 0;
|
|
|
|
iOutLen--;
|
|
|
|
for (i = 0; i < lspecial && iOutLen; i++) {
|
|
|
|
*outbuff++ = special[i];
|
2005-05-28 00:45:31 +02:00
|
|
|
iOutLen--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
#ifdef DEBUG
|
|
|
|
/* Dump out the computed key */
|
|
|
|
if (do_debug) {
|
2004-04-29 00:36:29 +02:00
|
|
|
printf(" key: (%02d) ", (outbuff - pOutChar));
|
2004-03-11 06:04:26 +01:00
|
|
|
for (const UCHAR* p = pOutChar; p < outbuff; p++)
|
2004-04-29 00:36:29 +02:00
|
|
|
printf("%2x ", *p);
|
|
|
|
printf("\n");
|
|
|
|
fflush(stdout);
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* return length of key */
|
|
|
|
return (outbuff - pOutChar);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2005-12-03 07:39:08 +01:00
|
|
|
#define LC_HAVE_WAITING 1
|
|
|
|
#define LC_HAVE_SPECIAL 2
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
/* expansion char go before the expansion. */
|
|
|
|
/* eg: S-set collates before ss */
|
|
|
|
|
2004-03-11 06:04:26 +01:00
|
|
|
struct coltab_status {
|
2001-05-23 15:26:42 +02:00
|
|
|
USHORT stat_flags;
|
2004-03-11 06:04:26 +01:00
|
|
|
const SortOrderTblEntry* stat_waiting;
|
|
|
|
};
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2006-09-17 22:06:36 +02:00
|
|
|
static SSHORT special_scan(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const BYTE* s2)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2004-03-11 06:04:26 +01:00
|
|
|
const SortOrderTblEntry* col1 = 0;
|
|
|
|
const SortOrderTblEntry* col2 = 0;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
ULONG index1 = 0;
|
|
|
|
ULONG index2 = 0;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2004-03-11 06:04:26 +01:00
|
|
|
while (true) {
|
2001-05-23 15:26:42 +02:00
|
|
|
/* Scan to find ignore char from l1 */
|
|
|
|
while (l1) {
|
|
|
|
col1 =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[*s1];
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
if (col1->IsExpand && col1->IsCompress &&
|
|
|
|
!(obj->texttype_impl->texttype_flags & TEXTTYPE_specials_first))
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
break;
|
2006-10-16 21:17:44 +02:00
|
|
|
}
|
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
l1--;
|
|
|
|
s1++;
|
|
|
|
index1++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Scan to find ignore char from l2 */
|
|
|
|
while (l2) {
|
|
|
|
col2 =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[*s2];
|
2006-10-16 21:17:44 +02:00
|
|
|
if (col2->IsExpand && col2->IsCompress &&
|
|
|
|
!(obj->texttype_impl->texttype_flags & TEXTTYPE_specials_first))
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
break;
|
2006-10-16 21:17:44 +02:00
|
|
|
}
|
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
l2--;
|
|
|
|
s2++;
|
|
|
|
index2++;
|
|
|
|
}
|
|
|
|
if (!l1 && !l2) /* All out of ignore characters */
|
|
|
|
return 0;
|
|
|
|
if (l1 && !l2) /* Out in l2 only */
|
|
|
|
return 1000;
|
|
|
|
if (!l1 && l2) /* Out in l1 only */
|
|
|
|
return -1000;
|
|
|
|
if (index1 < index2) /* l1 has ignore ch before l2 */
|
|
|
|
return -2000;
|
|
|
|
if (index1 > index2) /* l2 has ignore ch before l1 */
|
|
|
|
return 2000;
|
|
|
|
if (col1->Primary != col2->Primary)
|
|
|
|
return (col1->Primary - col2->Primary);
|
|
|
|
l1--;
|
|
|
|
l2--;
|
|
|
|
s1++;
|
|
|
|
s2++;
|
|
|
|
index1++;
|
|
|
|
index2++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-09-17 22:06:36 +02:00
|
|
|
static const SortOrderTblEntry* get_coltab_entry(texttype* obj, const UCHAR** p,
|
2006-10-16 21:17:44 +02:00
|
|
|
ULONG* l, coltab_status* stat, int* sum)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2006-10-16 21:17:44 +02:00
|
|
|
*sum = obj->texttype_impl->primary_sum;
|
|
|
|
|
2005-12-03 07:39:08 +01:00
|
|
|
if (stat->stat_flags & LC_HAVE_WAITING) {
|
2001-05-23 15:26:42 +02:00
|
|
|
(*l)--;
|
|
|
|
(*p)++;
|
2005-12-03 07:39:08 +01:00
|
|
|
stat->stat_flags &= ~LC_HAVE_WAITING;
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(stat->stat_waiting);
|
2001-05-23 15:26:42 +02:00
|
|
|
return stat->stat_waiting;
|
|
|
|
}
|
|
|
|
|
|
|
|
stat->stat_waiting = NULL;
|
|
|
|
while (*l) {
|
2004-03-11 06:04:26 +01:00
|
|
|
const SortOrderTblEntry* col =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[**p];
|
2006-10-16 21:17:44 +02:00
|
|
|
if (!(col->IsExpand || col->IsCompress))
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
/* Have col */
|
|
|
|
(*l)--;
|
|
|
|
(*p)++;
|
|
|
|
return col;
|
|
|
|
}
|
2006-10-16 21:17:44 +02:00
|
|
|
else if (col->IsExpand && col->IsCompress)
|
|
|
|
{
|
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_specials_first)
|
|
|
|
{
|
|
|
|
*sum = obj->texttype_impl->ignore_sum;
|
|
|
|
|
|
|
|
/* Have col */
|
|
|
|
(*l)--;
|
|
|
|
(*p)++;
|
|
|
|
return col;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Both flags set indicate a special value */
|
|
|
|
/* Need a new col */
|
|
|
|
(*l)--;
|
|
|
|
(*p)++;
|
|
|
|
stat->stat_flags |= LC_HAVE_SPECIAL;
|
|
|
|
continue;
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
|
|
|
else if (col->IsExpand) {
|
2005-05-28 00:45:31 +02:00
|
|
|
const ExpandChar* exp = &((const ExpandChar*) obj->texttype_impl->texttype_expand_table)[0];
|
2001-05-23 15:26:42 +02:00
|
|
|
while (exp->Ch && exp->Ch != **p)
|
|
|
|
exp++;
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(exp->Ch == **p);
|
2001-05-23 15:26:42 +02:00
|
|
|
/* Have coll1 */
|
|
|
|
/* Have waiting */
|
|
|
|
|
|
|
|
stat->stat_waiting =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const SortOrderTblEntry*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_collation_table)[exp->ExpCh2];
|
2005-12-03 07:39:08 +01:00
|
|
|
stat->stat_flags |= LC_HAVE_WAITING;
|
2001-05-23 15:26:42 +02:00
|
|
|
return col;
|
|
|
|
}
|
|
|
|
else { /* (col->IsCompress) */
|
|
|
|
if (*l > 1) {
|
2004-03-11 06:04:26 +01:00
|
|
|
const CompressPair* cmp =
|
2005-05-28 00:45:31 +02:00
|
|
|
&((const CompressPair*) obj->texttype_impl->
|
2001-05-23 15:26:42 +02:00
|
|
|
texttype_compress_table)[0];
|
|
|
|
while (cmp->CharPair[0]) {
|
|
|
|
if ((cmp->CharPair[0] == **p) &&
|
2004-03-11 06:04:26 +01:00
|
|
|
(cmp->CharPair[1] == *(*p + 1)))
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
/* Have Col */
|
|
|
|
col = &cmp->NoCaseWeight;
|
|
|
|
(*l) -= 2;
|
|
|
|
(*p) += 2;
|
|
|
|
return col;
|
|
|
|
}
|
|
|
|
cmp++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Have col */
|
|
|
|
(*l)--;
|
|
|
|
(*p)++;
|
|
|
|
return col;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define XOR ^ /* C bitwise XOR operator - defined for readability */
|
|
|
|
|
2006-09-17 22:06:36 +02:00
|
|
|
SSHORT LC_NARROW_compare(texttype* obj, ULONG l1, const BYTE* s1, ULONG l2, const BYTE* s2,
|
2005-05-28 00:45:31 +02:00
|
|
|
INTL_BOOL* error_flag)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(obj != NULL);
|
|
|
|
fb_assert(s1 != NULL);
|
|
|
|
fb_assert(s2 != NULL);
|
|
|
|
fb_assert(error_flag != NULL);
|
|
|
|
|
|
|
|
*error_flag = false;
|
|
|
|
|
|
|
|
if (obj->texttype_pad_option)
|
|
|
|
{
|
|
|
|
/* Start at EOS, scan backwards to find non-space */
|
|
|
|
const BYTE* p = s1 + l1 - 1;
|
|
|
|
while ((p >= s1) && (*p == ASCII_SPACE))
|
|
|
|
p--;
|
|
|
|
l1 = (p - s1 + 1);
|
|
|
|
|
|
|
|
p = s2 + l2 - 1;
|
|
|
|
while ((p >= s2) && (*p == ASCII_SPACE))
|
|
|
|
p--;
|
|
|
|
l2 = (p - s2 + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
const ULONG save_l1 = l1;
|
|
|
|
const ULONG save_l2 = l2;
|
2004-03-11 06:04:26 +01:00
|
|
|
const BYTE* const save_s1 = s1;
|
|
|
|
const BYTE* const save_s2 = s2;
|
|
|
|
SSHORT save_secondary = 0;
|
|
|
|
SSHORT save_tertiary = 0;
|
|
|
|
SSHORT save_quandary = 0;
|
|
|
|
|
|
|
|
coltab_status stat1, stat2;
|
|
|
|
stat1.stat_flags = 0;
|
|
|
|
stat2.stat_flags = 0;
|
|
|
|
|
|
|
|
const SortOrderTblEntry* col1 = 0;
|
|
|
|
const SortOrderTblEntry* col2 = 0;
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2004-03-11 06:04:26 +01:00
|
|
|
while (true) {
|
2006-10-16 21:17:44 +02:00
|
|
|
int sum1, sum2;
|
|
|
|
|
|
|
|
col1 = get_coltab_entry(obj, &s1, &l1, &stat1, &sum1);
|
|
|
|
col2 = get_coltab_entry(obj, &s2, &l2, &stat2, &sum2);
|
|
|
|
|
2001-05-23 15:26:42 +02:00
|
|
|
if (!col1 || !col2)
|
|
|
|
break;
|
2006-10-16 21:17:44 +02:00
|
|
|
if (col1->Primary + sum1 != col2->Primary + sum2)
|
|
|
|
return ((col1->Primary + sum1) - (col2->Primary + sum2));
|
2005-05-28 00:45:31 +02:00
|
|
|
if ((obj->texttype_impl->texttype_flags & TEXTTYPE_secondary_insensitive) == 0 &&
|
|
|
|
col1->Secondary != col2->Secondary)
|
|
|
|
{
|
|
|
|
if ((obj->texttype_impl->texttype_flags & TEXTTYPE_reverse_secondary) ||
|
2001-05-23 15:26:42 +02:00
|
|
|
!save_secondary)
|
2004-03-11 06:04:26 +01:00
|
|
|
{
|
|
|
|
save_secondary = (col1->Secondary - col2->Secondary);
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
else if ((obj->texttype_impl->texttype_flags & TEXTTYPE_tertiary_insensitive) == 0 &&
|
|
|
|
col1->Tertiary != col2->Tertiary)
|
|
|
|
{
|
2001-05-23 15:26:42 +02:00
|
|
|
if (!save_tertiary)
|
|
|
|
save_tertiary = (col1->Tertiary - col2->Tertiary);
|
|
|
|
}
|
2005-12-03 07:39:08 +01:00
|
|
|
else if (((stat1.stat_flags & LC_HAVE_WAITING) XOR
|
|
|
|
(stat2.stat_flags & LC_HAVE_WAITING)) && !save_quandary)
|
2004-03-11 06:04:26 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
if (obj->texttype_impl->texttype_flags & TEXTTYPE_expand_before)
|
2005-12-03 07:39:08 +01:00
|
|
|
save_quandary = (stat1.stat_flags & LC_HAVE_WAITING) ? -1 : 1;
|
2001-05-23 15:26:42 +02:00
|
|
|
else
|
2005-12-03 07:39:08 +01:00
|
|
|
save_quandary = (stat1.stat_flags & LC_HAVE_WAITING) ? 1 : -1;
|
2001-05-23 15:26:42 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* One of the strings ended */
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(l1 == 0 || l2 == 0);
|
|
|
|
fb_assert(col1 == NULL || col2 == NULL);
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
if (col1 && !col2)
|
|
|
|
return 500;
|
|
|
|
|
|
|
|
if (!col1 && col2)
|
|
|
|
return -500;
|
|
|
|
|
|
|
|
if (l1 == 0 && l2 == 0) {
|
|
|
|
if (save_secondary)
|
|
|
|
return save_secondary;
|
|
|
|
if (save_tertiary)
|
|
|
|
return save_tertiary;
|
|
|
|
if (save_quandary)
|
|
|
|
return save_quandary;
|
|
|
|
if (
|
2005-12-03 07:39:08 +01:00
|
|
|
((stat1.stat_flags & LC_HAVE_SPECIAL)
|
|
|
|
|| (stat2.stat_flags & LC_HAVE_SPECIAL))
|
2006-10-16 21:17:44 +02:00
|
|
|
&& !(obj->texttype_impl->texttype_flags & TEXTTYPE_ignore_specials)
|
|
|
|
&& !(obj->texttype_impl->texttype_flags & TEXTTYPE_specials_first))
|
2004-03-07 08:58:55 +01:00
|
|
|
{
|
|
|
|
return special_scan(obj, save_l1, save_s1, save_l2, save_s2);
|
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (l1)
|
|
|
|
return 600;
|
|
|
|
return -600;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_COMPARE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging only
|
2005-12-03 07:39:08 +01:00
|
|
|
* Routine used for comparing results from comparison algorithm
|
2001-05-23 15:26:42 +02:00
|
|
|
* to results from key creation algorithm
|
|
|
|
*/
|
2006-09-17 22:06:36 +02:00
|
|
|
static SSHORT old_fam2_compare(texttype* obj, ULONG l1, const BYTE* s1,
|
2005-05-28 00:45:31 +02:00
|
|
|
ULONG l2, const BYTE* s2, INTL_BOOL* error_flag)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
|
|
|
BYTE key1[LANGFAM2_MAX_KEY];
|
|
|
|
BYTE key2[LANGFAM2_MAX_KEY];
|
|
|
|
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(obj != NULL);
|
|
|
|
fb_assert(s1 != NULL);
|
|
|
|
fb_assert(s2 != NULL);
|
2001-05-23 15:26:42 +02:00
|
|
|
|
2005-12-03 07:39:08 +01:00
|
|
|
const ULONG len1 = LC_NARROW_string_to_key(obj, l1, s1, sizeof(key1), key1, INTL_KEY_SORT);
|
|
|
|
const ULONG len2 = LC_NARROW_string_to_key(obj, l2, s2, sizeof(key2), key2, INTL_KEY_SORT);
|
2005-05-28 00:45:31 +02:00
|
|
|
const ULONG len = MIN(len1, len2);
|
|
|
|
for (ULONG i = 0; i < len; i++) {
|
2001-05-23 15:26:42 +02:00
|
|
|
if (key1[i] == key2[i])
|
|
|
|
continue;
|
|
|
|
else if (key1[i] < key2[i])
|
|
|
|
return (-1);
|
|
|
|
else
|
|
|
|
return (1);
|
2004-03-11 06:04:26 +01:00
|
|
|
}
|
2001-05-23 15:26:42 +02:00
|
|
|
if (len1 < len2)
|
|
|
|
return (-1);
|
|
|
|
else if (len1 > len2)
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif /* DEBUG_COMPARE */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_COMPARE
|
|
|
|
|
2004-12-08 06:58:41 +01:00
|
|
|
#define SIGN(x) (((x) < 0) ? -1 : (((x) == 0) ? 0 : 1))
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Debugging only
|
|
|
|
* Routine used for comparing results from comparision algorithm
|
|
|
|
* to results from key creation algorithm
|
|
|
|
*/
|
2006-09-17 22:06:36 +02:00
|
|
|
static SSHORT fam2_compare(texttype* obj, ULONG l1, const BYTE* s1,
|
2005-05-28 00:45:31 +02:00
|
|
|
ULONG l2, const BYTE* s2, INTL_BOOL* error_flag)
|
2001-05-23 15:26:42 +02:00
|
|
|
{
|
2004-03-11 06:04:26 +01:00
|
|
|
SSHORT res1 = old_fam2_compare(obj, l1, s1, l2, s2);
|
|
|
|
SSHORT res2 = LC_NARROW_compare(obj, l1, s1, l2, s2);
|
2001-05-23 15:26:42 +02:00
|
|
|
|
|
|
|
if (SIGN(res1) != SIGN(res2)) {
|
2004-04-29 00:36:29 +02:00
|
|
|
printf("different compares:\n%d %s\n%d %s\nold = %d new = %d\n",
|
2001-05-23 15:26:42 +02:00
|
|
|
l1, s1, l2, s2, res1, res2);
|
2004-04-29 00:36:29 +02:00
|
|
|
fflush(stdout);
|
2001-05-23 15:26:42 +02:00
|
|
|
do_debug = 1;
|
|
|
|
res1 = old_fam2_compare(obj, l1, s1, l2, s2);
|
|
|
|
res2 = LC_NARROW_compare(obj, l1, s1, l2, s2);
|
|
|
|
do_debug = 0;
|
|
|
|
}
|
|
|
|
return res2;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* DEBUG_COMPARE */
|
2005-05-28 00:45:31 +02:00
|
|
|
|
2006-09-17 22:06:36 +02:00
|
|
|
ULONG LC_NARROW_canonical(texttype* obj, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst)
|
2005-05-28 00:45:31 +02:00
|
|
|
{
|
|
|
|
fb_assert(dst != NULL);
|
|
|
|
fb_assert(src != NULL);
|
|
|
|
fb_assert(dstLen >= obj->texttype_canonical_width * srcLen);
|
|
|
|
|
2005-12-03 07:39:08 +01:00
|
|
|
const BYTE* const inbuff = src;
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
for (ULONG i = 0; i < srcLen; i++, src++)
|
|
|
|
{
|
2005-12-03 07:39:08 +01:00
|
|
|
const SortOrderTblEntry* coll =
|
|
|
|
&((const SortOrderTblEntry*)obj->texttype_impl->texttype_collation_table)[*src];
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
if ((obj->texttype_impl->texttype_flags & (TEXTTYPE_secondary_insensitive | TEXTTYPE_tertiary_insensitive)) == 0)
|
|
|
|
{
|
|
|
|
*reinterpret_cast<USHORT*>(dst) = (coll->Primary << 8) | (coll->Secondary << 4) | coll->Tertiary;
|
|
|
|
dst += sizeof(USHORT);
|
|
|
|
}
|
|
|
|
else if ((obj->texttype_impl->texttype_flags & TEXTTYPE_secondary_insensitive) == 0)
|
|
|
|
{
|
|
|
|
*reinterpret_cast<USHORT*>(dst) = (coll->Primary << 8) | coll->Secondary;
|
|
|
|
dst += sizeof(USHORT);
|
|
|
|
}
|
|
|
|
else if ((obj->texttype_impl->texttype_flags & TEXTTYPE_tertiary_insensitive) == 0)
|
|
|
|
{
|
|
|
|
*reinterpret_cast<USHORT*>(dst) = (coll->Primary << 8) | coll->Tertiary;
|
|
|
|
dst += sizeof(USHORT);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*dst++ = coll->Primary;
|
2006-04-06 10:18:53 +02:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
return src - inbuff;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-09-17 22:06:36 +02:00
|
|
|
void LC_NARROW_destroy(texttype* obj)
|
2005-05-28 00:45:31 +02:00
|
|
|
{
|
|
|
|
delete obj->texttype_impl;
|
|
|
|
}
|
2005-12-03 07:39:08 +01:00
|
|
|
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
|
|
|
|
bool LC_NARROW_family2(
|
|
|
|
texttype* tt,
|
|
|
|
charset* cs,
|
|
|
|
SSHORT country,
|
|
|
|
USHORT flags,
|
|
|
|
const SortOrderTblEntry* noCaseOrderTbl,
|
|
|
|
const BYTE* toUpperConversionTbl,
|
|
|
|
const BYTE* toLowerConversionTbl,
|
|
|
|
const CompressPair* compressTbl,
|
|
|
|
const ExpandChar* expansionTbl,
|
|
|
|
const ASCII* name,
|
|
|
|
USHORT attributes,
|
|
|
|
const UCHAR* specificAttributes,
|
|
|
|
ULONG specificAttributesLength)
|
|
|
|
{
|
|
|
|
if (attributes & ~TEXTTYPE_ATTR_PAD_SPACE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
tt->texttype_version = TEXTTYPE_VERSION_1;
|
|
|
|
tt->texttype_name = name;
|
|
|
|
tt->texttype_country = country;
|
|
|
|
tt->texttype_pad_option = (attributes & TEXTTYPE_ATTR_PAD_SPACE) ? true : false;
|
|
|
|
tt->texttype_fn_key_length = LC_NARROW_key_length;
|
|
|
|
tt->texttype_fn_string_to_key = LC_NARROW_string_to_key;
|
|
|
|
tt->texttype_fn_compare = LC_NARROW_compare;
|
|
|
|
tt->texttype_fn_str_to_upper = fam2_str_to_upper;
|
|
|
|
tt->texttype_fn_str_to_lower = fam2_str_to_lower;
|
|
|
|
tt->texttype_fn_destroy = LC_NARROW_destroy;
|
|
|
|
tt->texttype_impl = new TextTypeImpl;
|
|
|
|
tt->texttype_impl->texttype_collation_table = (const BYTE*) noCaseOrderTbl;
|
|
|
|
tt->texttype_impl->texttype_toupper_table = toUpperConversionTbl;
|
|
|
|
tt->texttype_impl->texttype_tolower_table = toLowerConversionTbl;
|
|
|
|
tt->texttype_impl->texttype_compress_table = (const BYTE*) compressTbl;
|
|
|
|
tt->texttype_impl->texttype_expand_table = (const BYTE*) expansionTbl;
|
|
|
|
tt->texttype_impl->texttype_flags = ((flags) & REVERSE) ? TEXTTYPE_reverse_secondary : 0;
|
|
|
|
tt->texttype_impl->texttype_bytes_per_key = 0;
|
|
|
|
|
|
|
|
IntlUtil::SpecificAttributesMap map;
|
|
|
|
Jrd::CharSet* charSet = NULL;
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
charSet = Jrd::CharSet::createInstance(*getDefaultMemoryPool(), 0, cs);
|
|
|
|
|
|
|
|
if (!IntlUtil::parseSpecificAttributes(charSet, specificAttributesLength, specificAttributes, &map))
|
|
|
|
{
|
|
|
|
delete charSet;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
delete charSet;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
delete charSet;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int validAttributeCount = 0;
|
|
|
|
string value;
|
|
|
|
|
|
|
|
if (map.get("SPECIALS-FIRST", value) && (value == "0" || value == "1"))
|
|
|
|
{
|
|
|
|
int maxPrimary = 0;
|
|
|
|
int minPrimary = INT_MAX;
|
|
|
|
int maxIgnore = 0;
|
|
|
|
|
|
|
|
while (compressTbl->CharPair[0])
|
|
|
|
{
|
|
|
|
if (compressTbl->NoCaseWeight.Primary > maxPrimary)
|
|
|
|
maxPrimary = compressTbl->NoCaseWeight.Primary;
|
|
|
|
|
|
|
|
if (compressTbl->NoCaseWeight.Primary < minPrimary)
|
|
|
|
minPrimary = compressTbl->NoCaseWeight.Primary;
|
|
|
|
|
|
|
|
++compressTbl;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int ch = 0; ch <= 255; ++ch)
|
|
|
|
{
|
|
|
|
const SortOrderTblEntry* coll =
|
|
|
|
&((const SortOrderTblEntry*)tt->texttype_impl->texttype_collation_table)[ch];
|
|
|
|
|
|
|
|
if (!(coll->IsExpand || coll->IsCompress))
|
|
|
|
{
|
|
|
|
if (coll->Primary > maxPrimary)
|
|
|
|
maxPrimary = coll->Primary;
|
|
|
|
|
|
|
|
if (coll->Primary < minPrimary)
|
|
|
|
minPrimary = coll->Primary;
|
|
|
|
}
|
|
|
|
else if (coll->IsExpand && coll->IsCompress)
|
|
|
|
{
|
|
|
|
if (coll->Primary > maxIgnore)
|
|
|
|
maxIgnore = coll->Primary;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (maxIgnore > 0 && maxPrimary + maxIgnore - 1 <= 255)
|
|
|
|
{
|
|
|
|
++validAttributeCount;
|
|
|
|
|
|
|
|
if (value == "1")
|
|
|
|
{
|
|
|
|
tt->texttype_impl->texttype_flags |= TEXTTYPE_specials_first;
|
|
|
|
tt->texttype_impl->ignore_sum = minPrimary - 1;
|
|
|
|
tt->texttype_impl->primary_sum = maxIgnore - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (map.count() - validAttributeCount != 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool LC_NARROW_family3(
|
|
|
|
texttype* tt,
|
|
|
|
charset* cs,
|
|
|
|
SSHORT country,
|
|
|
|
USHORT flags,
|
|
|
|
const SortOrderTblEntry* noCaseOrderTbl,
|
|
|
|
const BYTE* toUpperConversionTbl,
|
|
|
|
const BYTE* toLowerConversionTbl,
|
|
|
|
const CompressPair* compressTbl,
|
|
|
|
const ExpandChar* expansionTbl,
|
|
|
|
const ASCII* name,
|
|
|
|
USHORT attributes,
|
|
|
|
const UCHAR* specificAttributes,
|
|
|
|
ULONG specificAttributesLength)
|
|
|
|
{
|
|
|
|
bool multiLevel = false;
|
|
|
|
|
|
|
|
IntlUtil::SpecificAttributesMap map;
|
|
|
|
Jrd::CharSet* charSet = NULL;
|
2006-10-18 05:14:33 +02:00
|
|
|
string newSpecificAttributes;
|
2006-10-16 21:17:44 +02:00
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
charSet = Jrd::CharSet::createInstance(*getDefaultMemoryPool(), 0, cs);
|
|
|
|
|
|
|
|
if (!IntlUtil::parseSpecificAttributes(charSet, specificAttributesLength, specificAttributes, &map))
|
|
|
|
{
|
|
|
|
delete charSet;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
string value;
|
|
|
|
if (map.get("MULTI-LEVEL", value))
|
|
|
|
{
|
|
|
|
if (value == "0")
|
|
|
|
multiLevel = false;
|
|
|
|
else if (value == "1")
|
|
|
|
multiLevel = true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
map.remove("MULTI-LEVEL");
|
|
|
|
}
|
|
|
|
|
2006-10-18 05:14:33 +02:00
|
|
|
newSpecificAttributes = IntlUtil::generateSpecificAttributes(charSet, map);
|
2006-10-16 21:17:44 +02:00
|
|
|
specificAttributes = (const UCHAR*)newSpecificAttributes.begin();
|
|
|
|
specificAttributesLength = newSpecificAttributes.length();
|
|
|
|
|
|
|
|
delete charSet;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
delete charSet;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LC_NARROW_family2(tt, cs, country, flags, noCaseOrderTbl,
|
|
|
|
toUpperConversionTbl, toLowerConversionTbl, compressTbl, expansionTbl, name,
|
|
|
|
attributes & ~(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE),
|
|
|
|
specificAttributes, specificAttributesLength))
|
|
|
|
{
|
|
|
|
if (!multiLevel)
|
|
|
|
tt->texttype_impl->texttype_flags |= TEXTTYPE_non_multi_level;
|
|
|
|
|
|
|
|
if (attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
|
|
|
{
|
|
|
|
tt->texttype_impl->texttype_flags |= TEXTTYPE_ignore_specials;
|
|
|
|
|
|
|
|
if (multiLevel)
|
|
|
|
{
|
|
|
|
tt->texttype_flags |= TEXTTYPE_SEPARATE_UNIQUE;
|
|
|
|
|
|
|
|
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
|
|
|
TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
|
|
|
{
|
|
|
|
tt->texttype_flags |= TEXTTYPE_UNSORTED_UNIQUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((attributes & (TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE)) ==
|
|
|
|
(TEXTTYPE_ATTR_CASE_INSENSITIVE | TEXTTYPE_ATTR_ACCENT_INSENSITIVE))
|
|
|
|
{
|
|
|
|
tt->texttype_canonical_width = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
tt->texttype_canonical_width = 2;
|
|
|
|
|
|
|
|
tt->texttype_fn_canonical = LC_NARROW_canonical;
|
|
|
|
|
|
|
|
if (attributes & TEXTTYPE_ATTR_ACCENT_INSENSITIVE)
|
|
|
|
tt->texttype_impl->texttype_flags |= TEXTTYPE_secondary_insensitive;
|
|
|
|
|
|
|
|
if (attributes & TEXTTYPE_ATTR_CASE_INSENSITIVE)
|
|
|
|
tt->texttype_impl->texttype_flags |= TEXTTYPE_tertiary_insensitive;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns INTL_BAD_STR_LENGTH if output buffer was too small
|
|
|
|
*/
|
|
|
|
static ULONG fam2_str_to_upper(texttype* obj, ULONG iLen, const BYTE* pStr, ULONG iOutLen, BYTE *pOutStr)
|
|
|
|
{
|
|
|
|
fb_assert(pStr != NULL);
|
|
|
|
fb_assert(pOutStr != NULL);
|
|
|
|
fb_assert(iOutLen >= iLen);
|
|
|
|
const BYTE* const p = pOutStr;
|
|
|
|
while (iLen && iOutLen) {
|
|
|
|
*pOutStr++ = (obj->texttype_impl->texttype_toupper_table[(unsigned) *pStr]);
|
|
|
|
pStr++;
|
|
|
|
iLen--;
|
|
|
|
iOutLen--;
|
|
|
|
}
|
|
|
|
if (iLen != 0)
|
|
|
|
return (INTL_BAD_STR_LENGTH);
|
|
|
|
return (pOutStr - p);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns INTL_BAD_STR_LENGTH if output buffer was too small
|
|
|
|
*/
|
|
|
|
static ULONG fam2_str_to_lower(texttype* obj, ULONG iLen, const BYTE* pStr, ULONG iOutLen, BYTE *pOutStr)
|
|
|
|
{
|
|
|
|
fb_assert(pStr != NULL);
|
|
|
|
fb_assert(pOutStr != NULL);
|
|
|
|
fb_assert(iOutLen >= iLen);
|
|
|
|
const BYTE* const p = pOutStr;
|
|
|
|
while (iLen && iOutLen) {
|
|
|
|
*pOutStr++ = (obj->texttype_impl->texttype_tolower_table[(unsigned) *pStr]);
|
|
|
|
pStr++;
|
|
|
|
iLen--;
|
|
|
|
iOutLen--;
|
|
|
|
}
|
|
|
|
if (iLen != 0)
|
|
|
|
return (INTL_BAD_STR_LENGTH);
|
|
|
|
return (pOutStr - p);
|
|
|
|
}
|