2002-06-04 21:36:48 +02:00
|
|
|
/*
|
|
|
|
* PROGRAM: JRD International support
|
|
|
|
* MODULE: intl_classes.h
|
|
|
|
* DESCRIPTION: International text handling definitions
|
|
|
|
*
|
2004-06-30 03:45:18 +02:00
|
|
|
* The contents of this file are subject to the Initial
|
|
|
|
* Developer's Public License Version 1.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the
|
|
|
|
* License. You may obtain a copy of the License at
|
|
|
|
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
2002-06-04 21:36:48 +02:00
|
|
|
*
|
2004-06-30 03:45:18 +02:00
|
|
|
* Software distributed under the License is distributed AS IS,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing rights
|
|
|
|
* and limitations under the License.
|
2002-06-04 21:36:48 +02:00
|
|
|
*
|
2004-06-30 03:45:18 +02:00
|
|
|
* The Original Code was created by Nickolay Samofatov
|
|
|
|
* for the Firebird Open Source RDBMS project.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2004 Nickolay Samofatov <nickolay@broadviewsoftware.com>
|
|
|
|
* and all contributors signed below.
|
|
|
|
*
|
|
|
|
* All Rights Reserved.
|
|
|
|
* Contributor(s): ______________________________________.
|
2002-06-04 21:36:48 +02:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2003-10-03 03:53:34 +02:00
|
|
|
#ifndef JRD_INTL_CLASSES_H
|
|
|
|
#define JRD_INTL_CLASSES_H
|
2002-06-04 21:36:48 +02:00
|
|
|
|
2003-11-03 18:14:45 +01:00
|
|
|
#include "firebird.h"
|
|
|
|
#include "../jrd/jrd.h"
|
2005-05-28 00:45:31 +02:00
|
|
|
#include "../jrd/intlobj_new.h"
|
|
|
|
#include "../jrd/constants.h"
|
|
|
|
#include "../jrd/unicode_util.h"
|
|
|
|
|
|
|
|
typedef SSHORT CHARSET_ID;
|
|
|
|
typedef SSHORT COLLATE_ID;
|
|
|
|
typedef USHORT TTYPE_ID;
|
2002-06-04 21:36:48 +02:00
|
|
|
|
2004-03-20 15:57:40 +01:00
|
|
|
namespace Jrd {
|
|
|
|
|
2004-04-25 04:30:32 +02:00
|
|
|
class LikeObject {
|
2002-06-04 21:36:48 +02:00
|
|
|
public:
|
2004-04-25 04:30:32 +02:00
|
|
|
virtual void reset() = 0;
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool process(thread_db*, TextType*, const UCHAR*, SLONG) = 0;
|
2004-04-25 04:30:32 +02:00
|
|
|
virtual bool result() = 0;
|
2004-11-06 08:20:44 +01:00
|
|
|
virtual ~LikeObject() {}
|
2004-04-25 04:30:32 +02:00
|
|
|
};
|
2003-12-27 05:37:23 +01:00
|
|
|
|
2004-04-25 04:30:32 +02:00
|
|
|
class ContainsObject {
|
|
|
|
public:
|
|
|
|
virtual void reset() = 0;
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool process(Jrd::thread_db*, Jrd::TextType*, const UCHAR*, SLONG) = 0;
|
2004-04-25 04:30:32 +02:00
|
|
|
virtual bool result() = 0;
|
2004-11-06 08:20:44 +01:00
|
|
|
virtual ~ContainsObject() {}
|
2004-04-25 04:30:32 +02:00
|
|
|
};
|
2003-12-27 05:37:23 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
class CsConvert
|
|
|
|
{
|
2004-04-25 04:30:32 +02:00
|
|
|
public:
|
2005-05-28 00:45:31 +02:00
|
|
|
CsConvert(csconvert* _cnvt) : cnvt(_cnvt) {}
|
|
|
|
CsConvert(const CsConvert& obj) : cnvt(obj.cnvt) {}
|
2003-11-03 18:14:45 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
// CVC: Beware of this can of worms: csconvert_convert gets assigned
|
|
|
|
// different functions that not necessarily take the same argument. Typically,
|
|
|
|
// the src pointer and the dest pointer use different types.
|
|
|
|
// How does this work without crashing is a miracle of IT.
|
2003-11-03 18:14:45 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
// To be used with getConvFromUnicode method of CharSet class
|
|
|
|
ULONG convert(ULONG a,
|
|
|
|
const USHORT* b,
|
|
|
|
ULONG c,
|
|
|
|
UCHAR* d,
|
|
|
|
USHORT* e,
|
|
|
|
ULONG* f)
|
|
|
|
{
|
|
|
|
fb_assert(cnvt != NULL);
|
|
|
|
return (*cnvt->csconvert_fn_convert)(cnvt, a, reinterpret_cast<const UCHAR*>(b), c, d, e, f);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
// To be used with getConvToUnicode method of CharSet class
|
|
|
|
ULONG convert(ULONG a,
|
|
|
|
const UCHAR* b,
|
|
|
|
ULONG c,
|
|
|
|
USHORT* d,
|
|
|
|
USHORT* e,
|
|
|
|
ULONG* f)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(cnvt != NULL);
|
|
|
|
return (*cnvt->csconvert_fn_convert)(cnvt, a, b, c, reinterpret_cast<UCHAR*>(d), e, f);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
// To be used for arbitrary conversions
|
|
|
|
ULONG convert(ULONG a,
|
|
|
|
const UCHAR* b,
|
|
|
|
ULONG c,
|
|
|
|
UCHAR* d,
|
|
|
|
USHORT* e,
|
|
|
|
ULONG* f)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(cnvt != NULL);
|
|
|
|
return (*cnvt->csconvert_fn_convert)(cnvt, a, b, c, d, e, f);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
// To be used for measure length of conversion
|
|
|
|
ULONG convertLength(ULONG srcLen)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(cnvt != NULL);
|
|
|
|
|
|
|
|
USHORT errCode;
|
|
|
|
ULONG errPos;
|
|
|
|
return (*cnvt->csconvert_fn_convert)(cnvt, srcLen, NULL, 0, NULL, &errCode, &errPos);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
const char* getName() const { fb_assert(cnvt); return cnvt->csconvert_name; }
|
|
|
|
|
|
|
|
csconvert* getStruct() const { return cnvt; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
csconvert* cnvt;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline bool operator ==(const CsConvert& cv1, const CsConvert& cv2)
|
|
|
|
{
|
|
|
|
return cv1.getStruct() == cv2.getStruct();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool operator !=(const CsConvert& cv1, const CsConvert& cv2)
|
|
|
|
{
|
|
|
|
return cv1.getStruct() != cv2.getStruct();
|
|
|
|
}
|
|
|
|
|
|
|
|
class CharSet
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
CharSet(CHARSET_ID _id, charset* _cs)
|
|
|
|
: id(_id), cs(_cs)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_pos;
|
|
|
|
|
|
|
|
sqlMatchAnyLength = getConvFromUnicode().convert(sizeof(SQL_MATCH_ANY_CHARS), &SQL_MATCH_ANY_CHARS, sizeof(sqlMatchAny), sqlMatchAny, &err_code, &err_pos);
|
|
|
|
sqlMatchOneLength = getConvFromUnicode().convert(sizeof(SQL_MATCH_1_CHAR), &SQL_MATCH_1_CHAR, sizeof(sqlMatchOne), sqlMatchOne, &err_code, &err_pos);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
CharSet(const CharSet&) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
virtual ~CharSet() {}
|
|
|
|
|
|
|
|
CHARSET_ID getId() const { fb_assert(cs); return id; }
|
|
|
|
const char* getName() const { fb_assert(cs); return cs->charset_name; }
|
|
|
|
UCHAR minBytesPerChar() const { fb_assert(cs); return cs->charset_min_bytes_per_char; }
|
|
|
|
UCHAR maxBytesPerChar() const { fb_assert(cs); return cs->charset_max_bytes_per_char; }
|
|
|
|
UCHAR getSpaceLength() const { fb_assert(cs); return cs->charset_space_length; }
|
|
|
|
const UCHAR* getSpace() const { fb_assert(cs); return cs->charset_space_character; }
|
|
|
|
USHORT getFlags() const { fb_assert(cs); return cs->charset_flags; }
|
|
|
|
|
|
|
|
bool isMultiByte() const
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(cs);
|
|
|
|
return cs->charset_min_bytes_per_char != cs->charset_max_bytes_per_char;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
bool wellFormed(ULONG len, const UCHAR* str, ULONG* offendingPos = NULL) const
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
ULONG offendingPos2;
|
|
|
|
|
|
|
|
if (offendingPos == NULL)
|
|
|
|
offendingPos = &offendingPos2;
|
|
|
|
|
|
|
|
fb_assert(cs);
|
|
|
|
if (cs->charset_fn_well_formed)
|
|
|
|
return cs->charset_fn_well_formed(cs, len, str, offendingPos);
|
|
|
|
else
|
|
|
|
return true;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
CsConvert getConvToUnicode() const { fb_assert(cs); return &cs->charset_to_unicode; }
|
|
|
|
CsConvert getConvFromUnicode() const { fb_assert(cs); return &cs->charset_from_unicode; }
|
|
|
|
|
|
|
|
void destroy()
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(cs);
|
|
|
|
|
|
|
|
if (cs->charset_fn_destroy)
|
|
|
|
cs->charset_fn_destroy(cs);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
const UCHAR* getSqlMatchAny() const { return sqlMatchAny; }
|
|
|
|
const UCHAR* getSqlMatchOne() const { return sqlMatchOne; }
|
|
|
|
const BYTE getSqlMatchAnyLength() const { return sqlMatchAnyLength; }
|
|
|
|
const BYTE getSqlMatchOneLength() const { return sqlMatchOneLength; }
|
|
|
|
|
|
|
|
charset* getStruct() const { return cs; }
|
|
|
|
|
|
|
|
ULONG removeTrailingSpaces(ULONG srcLen, const UCHAR* src) const
|
2003-12-27 05:37:23 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
const UCHAR* p = src + srcLen - getSpaceLength();
|
|
|
|
|
|
|
|
while (p >= src && memcmp(p, getSpace(), getSpaceLength()) == 0)
|
|
|
|
p -= getSpaceLength();
|
|
|
|
|
|
|
|
p += getSpaceLength();
|
|
|
|
|
|
|
|
return p - src;
|
2003-12-27 05:37:23 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
virtual ULONG length(thread_db* tdbb, ULONG srcLen, const UCHAR* src, bool countTrailingSpaces) const = 0;
|
|
|
|
virtual ULONG substring(thread_db* tdbb, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst, ULONG startPos, ULONG length) const = 0;
|
|
|
|
|
|
|
|
private:
|
|
|
|
CHARSET_ID id;
|
|
|
|
UCHAR sqlMatchAny[sizeof(ULONG)];
|
|
|
|
UCHAR sqlMatchOne[sizeof(ULONG)];
|
|
|
|
BYTE sqlMatchAnyLength;
|
|
|
|
BYTE sqlMatchOneLength;
|
|
|
|
charset* cs;
|
|
|
|
};
|
|
|
|
|
|
|
|
class TextType
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
TextType(TTYPE_ID _type, texttype *_tt, CharSet* _cs)
|
|
|
|
: type(_type), tt(_tt), cs(_cs)
|
2003-12-27 05:37:23 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
canonical(cs->getSqlMatchAnyLength(), cs->getSqlMatchAny(), sizeof(sqlMatchAnyCanonic), sqlMatchAnyCanonic);
|
|
|
|
canonical(cs->getSqlMatchOneLength(), cs->getSqlMatchOne(), sizeof(sqlMatchOneCanonic), sqlMatchOneCanonic);
|
|
|
|
|
|
|
|
struct Conversion
|
|
|
|
{
|
|
|
|
USHORT ch;
|
|
|
|
UCHAR* ptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
Conversion conversions[] =
|
|
|
|
{
|
|
|
|
{GDML_MATCH_ONE, gdmlMatchOneCanonic},
|
|
|
|
{GDML_MATCH_ANY, gdmlMatchAnyCanonic},
|
|
|
|
{GDML_QUOTE, gdmlQuoteCanonic},
|
|
|
|
{GDML_NOT, gdmlNotCanonic},
|
|
|
|
{GDML_RANGE, gdmlRangeCanonic},
|
|
|
|
{GDML_CLASS_START, gdmlClassStartCanonic},
|
|
|
|
{GDML_CLASS_END, gdmlClassEndCanonic},
|
|
|
|
{GDML_SUBSTITUTE, gdmlSubstituteCanonic},
|
|
|
|
{GDML_FLAG_SET, gdmlFlagSetCanonic},
|
|
|
|
{GDML_FLAG_CLEAR, gdmlFlagClearCanonic},
|
|
|
|
{GDML_COMMA, gdmlCommaCanonic},
|
|
|
|
{GDML_LPAREN, gdmlLParenCanonic},
|
|
|
|
{GDML_RPAREN, gdmlRParenCanonic},
|
|
|
|
{'S', gdmlUpperSCanonic},
|
|
|
|
{'s', gdmlLowerSCanonic}
|
|
|
|
};
|
|
|
|
|
|
|
|
for (int i = 0; i < FB_NELEM(conversions); i++)
|
|
|
|
{
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
UCHAR temp[sizeof(ULONG)];
|
|
|
|
|
|
|
|
ULONG length = getCharSet()->getConvFromUnicode().convert(sizeof(USHORT), &conversions[i].ch, sizeof(temp), temp, &err_code, &err_position);
|
|
|
|
canonical(length, temp, sizeof(ULONG), conversions[i].ptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
TextType(const TextType&) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
virtual ~TextType() {}
|
|
|
|
|
|
|
|
USHORT key_length(USHORT len) {
|
2003-12-27 05:37:23 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
if (tt->texttype_fn_key_length)
|
|
|
|
return (*tt->texttype_fn_key_length)(tt, len);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (getCharSet()->isMultiByte())
|
|
|
|
return UnicodeUtil::utf16KeyLength(len);
|
|
|
|
else
|
|
|
|
return len;
|
|
|
|
}
|
2003-12-27 05:37:23 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
USHORT string_to_key(USHORT srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
USHORT dstLen,
|
|
|
|
UCHAR* dst,
|
|
|
|
USHORT key_type)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
if (tt->texttype_fn_string_to_key)
|
|
|
|
return (*tt->texttype_fn_string_to_key)(tt, srcLen, src, dstLen, dst, key_type);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const UCHAR* space = getCharSet()->getSpace();
|
|
|
|
BYTE spaceLength = getCharSet()->getSpaceLength();
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str;
|
|
|
|
UCHAR utf16Space[sizeof(ULONG)];
|
|
|
|
|
|
|
|
if (getCharSet()->isMultiByte())
|
|
|
|
{
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
|
|
|
|
// convert src to UTF-16
|
|
|
|
ULONG utf16Length = getCharSet()->getConvToUnicode().convertLength(srcLen);
|
|
|
|
|
|
|
|
srcLen = getCharSet()->getConvToUnicode().convert(srcLen, src,
|
|
|
|
utf16Length, utf16Str.getBuffer(utf16Length), &err_code, &err_position);
|
|
|
|
src = utf16Str.begin();
|
|
|
|
|
|
|
|
// convert charset space to UTF-16
|
|
|
|
spaceLength = getCharSet()->getConvToUnicode().convert(spaceLength, space,
|
|
|
|
sizeof(utf16Space), utf16Space, &err_code, &err_position);
|
|
|
|
fb_assert(spaceLength == 2); // space character can't be surrogate for default string_to_key
|
|
|
|
space = utf16Space;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tt->texttype_pad_option)
|
|
|
|
{
|
|
|
|
const UCHAR* pad;
|
|
|
|
|
|
|
|
for (pad = src + srcLen - spaceLength; pad >= src; pad -= spaceLength)
|
|
|
|
{
|
|
|
|
if (memcmp(pad, space, spaceLength) != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
srcLen = pad - src + spaceLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCharSet()->isMultiByte())
|
|
|
|
{
|
|
|
|
dstLen = UnicodeUtil::utf16ToKey(srcLen, reinterpret_cast<const USHORT*>(src),
|
|
|
|
dstLen, dst, key_type);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (dstLen >= srcLen)
|
|
|
|
{
|
|
|
|
memcpy(dst, src, srcLen);
|
|
|
|
dstLen = srcLen;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
dstLen = INTL_BAD_KEY_LENGTH;
|
|
|
|
}
|
|
|
|
|
|
|
|
return dstLen;
|
|
|
|
}
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
SSHORT compare(ULONG len1,
|
|
|
|
const UCHAR* str1,
|
|
|
|
ULONG len2,
|
|
|
|
const UCHAR* str2)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
INTL_BOOL error = false;
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
if (tt->texttype_fn_compare)
|
|
|
|
return (*tt->texttype_fn_compare)(tt, len1, str1, len2, str2, &error);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const UCHAR* space = getCharSet()->getSpace();
|
|
|
|
BYTE spaceLength = getCharSet()->getSpaceLength();
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str1;
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str2;
|
|
|
|
UCHAR utf16Space[sizeof(ULONG)];
|
|
|
|
|
|
|
|
if (getCharSet()->isMultiByte())
|
|
|
|
{
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
|
|
|
|
// convert str1 to UTF-16
|
|
|
|
ULONG utf16Length = getCharSet()->getConvToUnicode().convertLength(len1);
|
|
|
|
|
|
|
|
len1 = getCharSet()->getConvToUnicode().convert(len1, str1,
|
|
|
|
utf16Length, utf16Str1.getBuffer(utf16Length), &err_code, &err_position);
|
|
|
|
str1 = utf16Str1.begin();
|
|
|
|
|
|
|
|
// convert str2 to UTF-16
|
|
|
|
utf16Length = getCharSet()->getConvToUnicode().convertLength(len2);
|
|
|
|
|
|
|
|
len2 = getCharSet()->getConvToUnicode().convert(len2, str2,
|
|
|
|
utf16Length, utf16Str2.getBuffer(utf16Length), &err_code, &err_position);
|
|
|
|
str2 = utf16Str2.begin();
|
|
|
|
|
|
|
|
// convert charset space to UTF-16
|
|
|
|
spaceLength = getCharSet()->getConvToUnicode().convert(spaceLength, space,
|
|
|
|
sizeof(utf16Space), utf16Space, &err_code, &err_position);
|
|
|
|
fb_assert(spaceLength == 2); // space character can't be surrogate for default compare
|
|
|
|
space = utf16Space;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tt->texttype_pad_option)
|
|
|
|
{
|
|
|
|
const UCHAR* pad;
|
|
|
|
|
|
|
|
for (pad = str1 + len1 - spaceLength; pad >= str1; pad -= spaceLength)
|
|
|
|
{
|
|
|
|
if (memcmp(pad, space, spaceLength) != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
len1 = pad - str1 + spaceLength;
|
|
|
|
|
|
|
|
for (pad = str2 + len2 - spaceLength; pad >= str2; pad -= spaceLength)
|
|
|
|
{
|
|
|
|
if (memcmp(pad, space, spaceLength) != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
len2 = pad - str2 + spaceLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (getCharSet()->isMultiByte())
|
|
|
|
{
|
|
|
|
INTL_BOOL error_flag;
|
|
|
|
return UnicodeUtil::utf16Compare(len1, reinterpret_cast<const USHORT*>(str1),
|
|
|
|
len2, reinterpret_cast<const USHORT*>(str2), &error_flag);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
SSHORT cmp = memcmp(str1, str2, MIN(len1, len2));
|
|
|
|
|
|
|
|
if (cmp == 0)
|
|
|
|
cmp = (len1 < len2 ? -1 : (len1 > len2 ? 1 : 0));
|
|
|
|
|
|
|
|
return cmp;
|
|
|
|
}
|
|
|
|
}
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
ULONG str_to_upper(ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
if (tt->texttype_fn_str_to_upper)
|
|
|
|
return (*tt->texttype_fn_str_to_upper)(tt, srcLen, src, dstLen, dst);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
|
|
|
|
ULONG utf16_length = getCharSet()->getConvToUnicode().convertLength(srcLen);
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16_str;
|
|
|
|
UCHAR* utf16_ptr;
|
|
|
|
|
|
|
|
if (dstLen >= utf16_length) // if dst buffer is sufficient large, use it as intermediate
|
|
|
|
utf16_ptr = dst;
|
|
|
|
else
|
|
|
|
utf16_ptr = utf16_str.getBuffer(utf16_length);
|
|
|
|
|
|
|
|
// convert to UTF-16
|
|
|
|
srcLen = getCharSet()->getConvToUnicode().convert(srcLen, src,
|
|
|
|
utf16_length, utf16_ptr, &err_code, &err_position);
|
|
|
|
|
|
|
|
// convert to uppercase
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> upper_str;
|
|
|
|
srcLen = UnicodeUtil::utf16UpperCase(srcLen, reinterpret_cast<USHORT*>(utf16_ptr),
|
|
|
|
utf16_length, reinterpret_cast<USHORT*>(upper_str.getBuffer(utf16_length)));
|
|
|
|
|
|
|
|
// convert to original character set
|
|
|
|
return getCharSet()->getConvFromUnicode().convert(srcLen, upper_str.begin(),
|
|
|
|
dstLen, dst, &err_code, &err_position);
|
|
|
|
}
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
ULONG str_to_lower(ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
if (tt->texttype_fn_str_to_lower)
|
|
|
|
return (*tt->texttype_fn_str_to_lower)(tt, srcLen, src, dstLen, dst);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
|
|
|
|
ULONG utf16_length = getCharSet()->getConvToUnicode().convertLength(srcLen);
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16_str;
|
|
|
|
UCHAR* utf16_ptr;
|
|
|
|
|
|
|
|
if (dstLen >= utf16_length) // if dst buffer is sufficient large, use it as intermediate
|
|
|
|
utf16_ptr = dst;
|
|
|
|
else
|
|
|
|
utf16_ptr = utf16_str.getBuffer(utf16_length);
|
|
|
|
|
|
|
|
// convert to UTF-16
|
|
|
|
srcLen = getCharSet()->getConvToUnicode().convert(srcLen, src,
|
|
|
|
utf16_length, utf16_ptr, &err_code, &err_position);
|
|
|
|
|
|
|
|
// convert to lowercase
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> lower_str;
|
|
|
|
srcLen = UnicodeUtil::utf16LowerCase(srcLen, reinterpret_cast<USHORT*>(utf16_ptr),
|
|
|
|
utf16_length, reinterpret_cast<USHORT*>(lower_str.getBuffer(utf16_length)));
|
|
|
|
|
|
|
|
// convert to original character set
|
|
|
|
return getCharSet()->getConvFromUnicode().convert(srcLen, lower_str.begin(),
|
|
|
|
dstLen, dst, &err_code, &err_position);
|
|
|
|
}
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
ULONG canonical(ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst)
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
|
|
|
|
if (tt->texttype_fn_canonical)
|
|
|
|
return (*tt->texttype_fn_canonical)(tt, srcLen, src, dstLen, dst);
|
|
|
|
else if (getCharSet()->isMultiByte())
|
|
|
|
{
|
|
|
|
fb_assert(tt->texttype_canonical_width == sizeof(ULONG));
|
|
|
|
|
|
|
|
USHORT err_code;
|
|
|
|
ULONG err_position;
|
|
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16_str;
|
|
|
|
|
|
|
|
ULONG utf16_len = getCharSet()->getConvToUnicode().convertLength(srcLen);
|
|
|
|
|
|
|
|
// convert to UTF-16
|
|
|
|
utf16_len = getCharSet()->getConvToUnicode().convert(srcLen, src,
|
|
|
|
utf16_len, utf16_str.getBuffer(utf16_len), &err_code, &err_position);
|
|
|
|
|
|
|
|
// convert UTF-16 to UTF-32
|
|
|
|
return UnicodeUtil::utf16ToUtf32(utf16_len, reinterpret_cast<const USHORT*>(utf16_str.begin()),
|
|
|
|
dstLen, reinterpret_cast<ULONG*>(dst), &err_code, &err_position) / sizeof(ULONG);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
fb_assert(tt->texttype_canonical_width == getCharSet()->minBytesPerChar());
|
|
|
|
fb_assert(dstLen >= srcLen);
|
|
|
|
|
|
|
|
memcpy(dst, src, srcLen);
|
|
|
|
|
|
|
|
return srcLen / getCharSet()->minBytesPerChar();
|
|
|
|
}
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
const UCHAR* getSqlMatchAnyCanonic() const { return sqlMatchAnyCanonic; }
|
|
|
|
const UCHAR* getSqlMatchOneCanonic() const { return sqlMatchOneCanonic; }
|
|
|
|
const UCHAR* getGdmlMatchOneCanonic() const { return gdmlMatchOneCanonic; }
|
|
|
|
const UCHAR* getGdmlMatchAnyCanonic() const { return gdmlMatchAnyCanonic; }
|
|
|
|
const UCHAR* getGdmlQuoteCanonic() const { return gdmlQuoteCanonic; }
|
|
|
|
const UCHAR* getGdmlNotCanonic() const { return gdmlNotCanonic; }
|
|
|
|
const UCHAR* getGdmlRangeCanonic() const { return gdmlRangeCanonic; }
|
|
|
|
const UCHAR* getGdmlClassStartCanonic() const { return gdmlClassStartCanonic; }
|
|
|
|
const UCHAR* getGdmlClassEndCanonic() const { return gdmlClassEndCanonic; }
|
|
|
|
const UCHAR* getGdmlSubstituteCanonic() const { return gdmlSubstituteCanonic; }
|
|
|
|
const UCHAR* getGdmlFlagSetCanonic() const { return gdmlFlagSetCanonic; }
|
|
|
|
const UCHAR* getGdmlFlagClearCanonic() const { return gdmlFlagClearCanonic; }
|
|
|
|
const UCHAR* getGdmlCommaCanonic() const { return gdmlCommaCanonic; }
|
|
|
|
const UCHAR* getGdmlLParenCanonic() const { return gdmlLParenCanonic; }
|
|
|
|
const UCHAR* getGdmlRParenCanonic() const { return gdmlRParenCanonic; }
|
|
|
|
const UCHAR* getGdmlUpperSCanonic() const { return gdmlUpperSCanonic; }
|
|
|
|
const UCHAR* getGdmlLowerSCanonic() const { return gdmlLowerSCanonic; }
|
2003-11-03 18:14:45 +01:00
|
|
|
|
2004-03-07 08:58:55 +01:00
|
|
|
USHORT getType() const
|
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
return type;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-06-20 07:40:38 +02:00
|
|
|
CharSet* getCharSet() const
|
2005-05-28 00:45:31 +02:00
|
|
|
{
|
|
|
|
return cs;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
BYTE getCanonicalWidth() const
|
2004-03-07 08:58:55 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
return tt->texttype_canonical_width;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
USHORT getFlags() const
|
2004-03-07 08:58:55 +01:00
|
|
|
{
|
2003-11-04 00:59:24 +01:00
|
|
|
fb_assert(tt);
|
2005-05-28 00:45:31 +02:00
|
|
|
return tt->texttype_flags;
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
2003-11-03 21:16:03 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
void destroy()
|
2003-11-03 18:14:45 +01:00
|
|
|
{
|
2005-05-28 00:45:31 +02:00
|
|
|
fb_assert(tt);
|
|
|
|
if (tt->texttype_fn_destroy)
|
|
|
|
tt->texttype_fn_destroy(tt);
|
2003-11-03 18:14:45 +01:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool matches(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d) = 0;
|
2003-11-03 21:16:03 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool sleuth_check(thread_db* tdbb, USHORT a, const UCHAR* b, SLONG c, const UCHAR* d, SLONG e) = 0;
|
|
|
|
virtual ULONG sleuth_merge(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d, UCHAR* e, SLONG f) = 0;
|
2003-11-04 00:59:24 +01:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool like(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length) = 0;
|
|
|
|
virtual LikeObject *like_create(thread_db* tdbb, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length) = 0;
|
2002-06-04 21:36:48 +02:00
|
|
|
|
2005-05-28 00:45:31 +02:00
|
|
|
virtual bool contains(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl) = 0;
|
|
|
|
virtual ContainsObject *contains_create(thread_db* tdbb, const UCHAR* p, SLONG pl) = 0;
|
2003-11-04 00:59:24 +01:00
|
|
|
|
2003-11-03 18:14:45 +01:00
|
|
|
private:
|
2005-05-28 00:45:31 +02:00
|
|
|
TTYPE_ID type;
|
|
|
|
texttype* tt;
|
|
|
|
CharSet* cs;
|
|
|
|
UCHAR sqlMatchAnyCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR sqlMatchOneCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlMatchOneCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlMatchAnyCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlQuoteCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlNotCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlRangeCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlClassStartCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlClassEndCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlSubstituteCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlFlagSetCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlFlagClearCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlCommaCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlLParenCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlRParenCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlUpperSCanonic[sizeof(ULONG)];
|
|
|
|
UCHAR gdmlLowerSCanonic[sizeof(ULONG)];
|
2002-06-04 21:36:48 +02:00
|
|
|
};
|
|
|
|
|
2004-03-20 15:57:40 +01:00
|
|
|
} //namespace Jrd
|
|
|
|
|
2003-10-03 03:53:34 +02:00
|
|
|
#endif /* JRD_INTL_CLASSES_H */
|