mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-27 20:03:03 +01:00
1766 lines
49 KiB
C++
1766 lines
49 KiB
C++
/************* history ************
|
|
*
|
|
* COMPONENT: JRD MODULE: INTL.CPP
|
|
* generated by Marion V2.5 2/6/90
|
|
* from dev db on 4-JAN-1995
|
|
*****************************************************************
|
|
*
|
|
* PR 2002-06-02 Added ugly c hack in
|
|
* intl_back_compat_alloc_func_lookup.
|
|
* When someone has time we need to change the references to
|
|
* return (void*) function to something more C++ like
|
|
*
|
|
* 42 4711 3 11 17 tamlin 2001
|
|
* Added silly numbers before my name, and converted it to C++.
|
|
*
|
|
* 18850 daves 4-JAN-1995
|
|
* Fix gds__alloc usage
|
|
*
|
|
* 18837 deej 31-DEC-1994
|
|
* fixing up HARBOR_MERGE
|
|
*
|
|
* 18821 deej 27-DEC-1994
|
|
* HARBOR MERGE
|
|
*
|
|
* 18789 jdavid 19-DEC-1994
|
|
* Cast some functions
|
|
*
|
|
* 17508 jdavid 15-JUL-1994
|
|
* Bring it up to date
|
|
*
|
|
* 17500 daves 13-JUL-1994
|
|
* Bug 6645: Different calculation of partial keys
|
|
*
|
|
* 17202 katz 24-MAY-1994
|
|
* PC_PLATFORM requires the .dll extension
|
|
*
|
|
* 17191 katz 23-MAY-1994
|
|
* OS/2 requires the .dll extension
|
|
*
|
|
* 17180 katz 23-MAY-1994
|
|
* Define location of DLL on OS/2
|
|
*
|
|
* 17149 katz 20-MAY-1994
|
|
* In JRD, isc_arg_number arguments are SLONG's not int's
|
|
*
|
|
* 16633 daves 19-APR-1994
|
|
* Bug 6202: International licensing uses INTERNATIONAL product code
|
|
*
|
|
* 16555 katz 17-APR-1994
|
|
* The last argument of calls to ERR_post should be 0
|
|
*
|
|
* 16521 katz 14-APR-1994
|
|
* Borland C needs a decorated symbol to lookup
|
|
*
|
|
* 16403 daves 8-APR-1994
|
|
* Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
|
|
*
|
|
* 16141 katz 28-MAR-1994
|
|
* Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
|
|
*
|
|
* The contents of this file are subject to the Interbase Public
|
|
* License Version 1.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy
|
|
* of the License at http://www.Inprise.com/IPL.html
|
|
*
|
|
* Software distributed under the License is distributed on an
|
|
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
|
|
* or implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code was created by Inprise Corporation
|
|
* and its predecessors. Portions created by Inprise Corporation are
|
|
* Copyright (C) Inprise Corporation.
|
|
*
|
|
* All Rights Reserved.
|
|
* Contributor(s): ______________________________________.
|
|
*
|
|
* 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
|
|
*
|
|
* 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
|
|
*
|
|
*/
|
|
|
|
|
|
/*
|
|
* PROGRAM: JRD Intl
|
|
* MODULE: intl.cpp
|
|
* DESCRIPTION: International text support routines
|
|
*
|
|
* copyright (c) 1992, 1993 by Borland International
|
|
*/
|
|
|
|
#include "firebird.h"
|
|
#include <string.h>
|
|
#include "../jrd/common.h"
|
|
#include <stdio.h>
|
|
#include "../jrd/jrd.h"
|
|
#include "../jrd/req.h"
|
|
#include "../jrd/val.h"
|
|
#include "gen/iberror.h"
|
|
#include "../jrd/intl.h"
|
|
#include "../jrd/intl_classes.h"
|
|
#include "../jrd/ods.h"
|
|
#include "../jrd/btr.h"
|
|
#include "../intl/charsets.h"
|
|
#include "../intl/country_codes.h"
|
|
#include "../jrd/gdsassert.h"
|
|
#include "../jrd/license.h"
|
|
#ifdef INTL_BUILTIN
|
|
#include "../intl/ld_proto.h"
|
|
#endif
|
|
#include "../jrd/all_proto.h"
|
|
#include "../jrd/cvt_proto.h"
|
|
#include "../jrd/err_proto.h"
|
|
#include "../jrd/fun_proto.h"
|
|
#include "../jrd/gds_proto.h"
|
|
#include "../jrd/iberr_proto.h"
|
|
#include "../jrd/intl_proto.h"
|
|
#include "../jrd/isc_proto.h"
|
|
#include "../jrd/met_proto.h"
|
|
#include "../jrd/thd.h"
|
|
#include "../jrd/evl_string.h"
|
|
#include "../jrd/jrd.h"
|
|
#include "../jrd/evl_like.h"
|
|
#include "../jrd/mov_proto.h"
|
|
#include "../jrd/IntlManager.h"
|
|
#include "../common/classes/init.h"
|
|
|
|
using namespace Jrd;
|
|
|
|
#define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\
|
|
((x)->dsc_dtype == dtype_varying)||\
|
|
((x)->dsc_dtype == dtype_cstring))
|
|
|
|
#define TTYPE_TO_CHARSET(tt) ((SSHORT)((tt) & 0x00FF))
|
|
#define TTYPE_TO_COLLATION(tt) ((SSHORT)((tt) >> 8))
|
|
|
|
|
|
static bool all_spaces(thread_db*, CHARSET_ID, const BYTE*, ULONG, ULONG);
|
|
static void pad_spaces(thread_db*, CHARSET_ID, BYTE *, ULONG);
|
|
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info);
|
|
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info);
|
|
|
|
// We need all the structure definitions from the old interface
|
|
#define INTL_ENGINE_INTERNAL
|
|
#include "../jrd/intlobj_new.h"
|
|
|
|
|
|
// Classes and structures used internally to this file and intl implementation
|
|
class CharSetContainer
|
|
{
|
|
public:
|
|
CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info);
|
|
|
|
void destroy()
|
|
{
|
|
cs->destroy();
|
|
for (size_t i = 0; i < charset_collations.getCount(); i++)
|
|
if (charset_collations[i])
|
|
charset_collations[i]->destroy();
|
|
}
|
|
|
|
CharSet* getCharSet() { return cs; }
|
|
|
|
TextType* lookupCollation(thread_db* tdbb, USHORT tt_id);
|
|
|
|
CsConvert lookupConverter(thread_db* tdbb, CHARSET_ID to_cs);
|
|
|
|
static CharSetContainer* lookupCharset(thread_db* tdbb, SSHORT ttype, ISC_STATUS *status);
|
|
|
|
private:
|
|
Firebird::Array<TextType*> charset_collations;
|
|
CharSet* cs;
|
|
};
|
|
|
|
/* Below are templates for functions used in TextType implementation */
|
|
|
|
class NullStrConverter {
|
|
public:
|
|
NullStrConverter(thread_db* tdbb, const TextType* obj, const UCHAR *str, SLONG len) { }
|
|
};
|
|
|
|
template <typename PrevConverter>
|
|
class UpcaseConverter : public PrevConverter {
|
|
public:
|
|
UpcaseConverter(thread_db* tdbb, TextType* obj, const UCHAR* &str, SLONG &len) :
|
|
PrevConverter(tdbb, obj, str, len)
|
|
{
|
|
if (len > (int) sizeof(tempBuffer))
|
|
out_str = FB_NEW(*tdbb->getDefaultPool()) UCHAR[len];
|
|
else
|
|
out_str = tempBuffer;
|
|
obj->str_to_upper(len, str, len, out_str);
|
|
str = out_str;
|
|
}
|
|
~UpcaseConverter() {
|
|
if (out_str != tempBuffer)
|
|
delete[] out_str;
|
|
}
|
|
private:
|
|
UCHAR tempBuffer[100], *out_str;
|
|
};
|
|
|
|
template <typename PrevConverter>
|
|
class CanonicalConverter : public PrevConverter {
|
|
public:
|
|
CanonicalConverter(thread_db* tdbb, TextType* obj, const UCHAR* &str, SLONG &len) :
|
|
PrevConverter(tdbb, obj, str, len)
|
|
{
|
|
SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();
|
|
|
|
if (out_len > (int) sizeof(tempBuffer))
|
|
out_str = FB_NEW(*tdbb->getDefaultPool()) UCHAR[out_len];
|
|
else
|
|
out_str = tempBuffer;
|
|
|
|
if (str)
|
|
{
|
|
len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth();
|
|
str = out_str;
|
|
}
|
|
else
|
|
len = 0;
|
|
}
|
|
~CanonicalConverter() {
|
|
if (out_str != tempBuffer)
|
|
delete[] out_str;
|
|
}
|
|
private:
|
|
UCHAR tempBuffer[100], *out_str;
|
|
};
|
|
|
|
template <typename StrConverter, typename CharType>
|
|
class LikeObjectImpl : public LikeObject {
|
|
public:
|
|
LikeObjectImpl(MemoryPool& pool, const CharType* str, SLONG str_len,
|
|
CharType escape, bool use_escape,
|
|
CharType sql_match_any, CharType sql_match_one)
|
|
: evaluator(pool, str, str_len, escape, use_escape, sql_match_any, sql_match_one)
|
|
{ }
|
|
|
|
void reset() { evaluator.reset(); }
|
|
|
|
bool result() { return evaluator.getResult(); }
|
|
|
|
bool process(thread_db* tdbb, Jrd::TextType* ttype, const UCHAR* str, SLONG length) {
|
|
StrConverter cvt(tdbb, ttype, str, length);
|
|
fb_assert(length % sizeof(CharType) == 0);
|
|
return evaluator.processNextChunk(
|
|
reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
|
|
}
|
|
|
|
~LikeObjectImpl() {}
|
|
|
|
static LikeObject* create(thread_db* tdbb, TextType* ttype, const UCHAR* str, SLONG length,
|
|
const UCHAR* escape, SLONG escape_length,
|
|
const UCHAR* sql_match_any, SLONG match_any_length,
|
|
const UCHAR* sql_match_one, SLONG match_one_length)
|
|
{
|
|
StrConverter cvt(tdbb, ttype, str, length),
|
|
cvt_escape(tdbb, ttype, escape, escape_length),
|
|
cvt_match_any(tdbb, ttype, sql_match_any, match_any_length),
|
|
cvt_match_one(tdbb, ttype, sql_match_one, match_one_length);
|
|
|
|
fb_assert(length % sizeof(CharType) == 0);
|
|
return FB_NEW(*tdbb->getDefaultPool()) LikeObjectImpl(*tdbb->getDefaultPool(),
|
|
reinterpret_cast<const CharType*>(str), length / sizeof(CharType),
|
|
(escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
|
|
*reinterpret_cast<const CharType*>(sql_match_any),
|
|
*reinterpret_cast<const CharType*>(sql_match_one));
|
|
}
|
|
|
|
static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
|
|
const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length, const UCHAR* sql_match_any, SLONG match_any_length, const UCHAR* sql_match_one, SLONG match_one_length)
|
|
{
|
|
StrConverter cvt1(tdbb, ttype, p, pl),
|
|
cvt2(tdbb, ttype, s, sl),
|
|
cvt_escape(tdbb, ttype, escape, escape_length),
|
|
cvt_match_any(tdbb, ttype, sql_match_any, match_any_length),
|
|
cvt_match_one(tdbb, ttype, sql_match_one, match_one_length);
|
|
|
|
fb_assert(pl % sizeof(CharType) == 0);
|
|
fb_assert(sl % sizeof(CharType) == 0);
|
|
Firebird::LikeEvaluator<CharType> evaluator(*tdbb->getDefaultPool(),
|
|
reinterpret_cast<const CharType*>(p), pl / sizeof(CharType),
|
|
(escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
|
|
*reinterpret_cast<const CharType*>(sql_match_any),
|
|
*reinterpret_cast<const CharType*>(sql_match_one));
|
|
evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
|
|
return evaluator.getResult();
|
|
}
|
|
|
|
private:
|
|
Firebird::LikeEvaluator<CharType> evaluator;
|
|
};
|
|
|
|
template <typename StrConverter, typename CharType>
|
|
class ContainsObjectImpl : public ContainsObject
|
|
{
|
|
public:
|
|
ContainsObjectImpl(MemoryPool& pool, const CharType* str, SLONG str_len)
|
|
: evaluator(pool, str, str_len)
|
|
{ }
|
|
|
|
void reset() { evaluator.reset(); }
|
|
|
|
bool result() { return evaluator.getResult(); }
|
|
|
|
bool process(thread_db* tdbb, Jrd::TextType* ttype, const UCHAR* str, SLONG length) {
|
|
StrConverter cvt(tdbb, ttype, str, length);
|
|
fb_assert(length % sizeof(CharType) == 0);
|
|
return evaluator.processNextChunk(
|
|
reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
|
|
}
|
|
|
|
~ContainsObjectImpl() {}
|
|
|
|
static ContainsObject* create(thread_db* tdbb, TextType* ttype, const UCHAR* str, SLONG length) {
|
|
StrConverter cvt(tdbb, ttype, str, length);
|
|
fb_assert(length % sizeof(CharType) == 0);
|
|
return FB_NEW(*tdbb->getDefaultPool()) ContainsObjectImpl(*tdbb->getDefaultPool(),
|
|
reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
|
|
}
|
|
|
|
static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
|
|
const UCHAR* p, SLONG pl)
|
|
{
|
|
StrConverter cvt1(tdbb, ttype, p, pl), cvt2(tdbb, ttype, s, sl);
|
|
fb_assert(pl % sizeof(CharType) == 0);
|
|
fb_assert(sl % sizeof(CharType) == 0);
|
|
Firebird::ContainsEvaluator<CharType> evaluator(*tdbb->getDefaultPool(),
|
|
reinterpret_cast<const CharType*>(p), pl / sizeof(CharType));
|
|
evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
|
|
return evaluator.getResult();
|
|
}
|
|
|
|
private:
|
|
Firebird::ContainsEvaluator<CharType> evaluator;
|
|
};
|
|
|
|
template <typename StrConverter, typename CharType>
|
|
class MatchesObjectImpl
|
|
{
|
|
public:
|
|
static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
|
|
const UCHAR* p, SLONG pl)
|
|
{
|
|
StrConverter cvt1(tdbb, ttype, p, pl), cvt2(tdbb, ttype, s, sl);
|
|
fb_assert(pl % sizeof(CharType) == 0);
|
|
fb_assert(sl % sizeof(CharType) == 0);
|
|
return MATCHESNAME(tdbb, ttype, reinterpret_cast<const CharType*>(s), sl,
|
|
reinterpret_cast<const CharType*>(p), pl);
|
|
}
|
|
};
|
|
|
|
template <typename StrConverter, typename CharType>
|
|
class SleuthObjectImpl
|
|
{
|
|
public:
|
|
static bool check(thread_db* tdbb, TextType* ttype, USHORT flags,
|
|
const UCHAR* search, SLONG search_len,
|
|
const UCHAR* match, SLONG match_len)
|
|
{
|
|
StrConverter cvt1(tdbb, ttype, search, search_len), cvt2(tdbb, ttype, match, match_len);
|
|
fb_assert(search_len % sizeof(CharType) == 0);
|
|
fb_assert(match_len % sizeof(CharType) == 0);
|
|
return SLEUTHNAME(tdbb, ttype, flags,
|
|
reinterpret_cast<const CharType*>(search), search_len,
|
|
reinterpret_cast<const CharType*>(match), match_len);
|
|
}
|
|
|
|
static bool merge(thread_db* tdbb, TextType* ttype,
|
|
const UCHAR* match, SLONG match_bytes,
|
|
const UCHAR* control, SLONG control_bytes,
|
|
UCHAR* combined, SLONG combined_bytes)
|
|
{
|
|
StrConverter cvt1(tdbb, ttype, match, match_bytes), cvt2(tdbb, ttype, control, control_bytes);
|
|
fb_assert(match_bytes % sizeof(CharType) == 0);
|
|
fb_assert(control_bytes % sizeof(CharType) == 0);
|
|
return SLEUTH_MERGE_NAME(tdbb, ttype,
|
|
reinterpret_cast<const CharType*>(match), match_bytes,
|
|
reinterpret_cast<const CharType*>(control), control_bytes,
|
|
reinterpret_cast<CharType*>(combined), combined_bytes);
|
|
}
|
|
};
|
|
|
|
class FixedWidthCharSet : public CharSet
|
|
{
|
|
public:
|
|
FixedWidthCharSet(CHARSET_ID _id, charset* _cs) : CharSet(_id, _cs) {}
|
|
|
|
virtual ULONG length(thread_db* tdbb, ULONG srcLen, const UCHAR* src, bool countTrailingSpaces) const
|
|
{
|
|
fb_assert(getStruct());
|
|
|
|
if (!countTrailingSpaces)
|
|
srcLen = removeTrailingSpaces(srcLen, src);
|
|
|
|
if (getStruct()->charset_fn_length)
|
|
return getStruct()->charset_fn_length(getStruct(), srcLen, src);
|
|
else
|
|
return srcLen / minBytesPerChar();
|
|
}
|
|
|
|
virtual ULONG substring(thread_db* tdbb, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst, ULONG startPos, ULONG length) const
|
|
{
|
|
fb_assert(getStruct());
|
|
if (getStruct()->charset_fn_substring)
|
|
return getStruct()->charset_fn_substring(getStruct(), srcLen, src, dstLen, dst, startPos, length);
|
|
else
|
|
{
|
|
fb_assert(src != NULL && dst != NULL);
|
|
|
|
if (dstLen < length * minBytesPerChar())
|
|
return INTL_BAD_STR_LENGTH;
|
|
else if (startPos * minBytesPerChar() > srcLen)
|
|
return 0;
|
|
|
|
length = MIN(srcLen / minBytesPerChar() - startPos, length) * minBytesPerChar();
|
|
|
|
memcpy(dst, src + startPos * minBytesPerChar(), length);
|
|
|
|
return length;
|
|
}
|
|
}
|
|
};
|
|
|
|
class MultiByteCharSet : public CharSet
|
|
{
|
|
public:
|
|
MultiByteCharSet(CHARSET_ID _id, charset* _cs) : CharSet(_id, _cs) {}
|
|
|
|
virtual ULONG length(thread_db* tdbb, ULONG srcLen, const UCHAR* src, bool countTrailingSpaces) const
|
|
{
|
|
fb_assert(getStruct());
|
|
|
|
if (!countTrailingSpaces)
|
|
srcLen = removeTrailingSpaces(srcLen, src);
|
|
|
|
if (getStruct()->charset_fn_length)
|
|
return getStruct()->charset_fn_length(getStruct(), srcLen, src);
|
|
else
|
|
{
|
|
USHORT errCode;
|
|
ULONG errPos;
|
|
ULONG length = getConvToUnicode().convertLength(srcLen);
|
|
|
|
// convert to UTF16
|
|
Firebird::HalfStaticArray<USHORT, BUFFER_SMALL> str;
|
|
length = getConvToUnicode().convert(srcLen, src, length,
|
|
str.getBuffer(length / sizeof(USHORT)), &errCode, &errPos);
|
|
|
|
// calculate length of UTF16
|
|
return UnicodeUtil::utf16Length(length, str.begin());
|
|
}
|
|
}
|
|
|
|
virtual ULONG substring(thread_db* tdbb, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst, ULONG startPos, ULONG length) const
|
|
{
|
|
fb_assert(getStruct());
|
|
if (getStruct()->charset_fn_substring)
|
|
return getStruct()->charset_fn_substring(getStruct(), srcLen, src, dstLen, dst, startPos, length);
|
|
else
|
|
{
|
|
fb_assert(src != NULL && dst != NULL);
|
|
|
|
if (length == 0 || startPos >= srcLen)
|
|
return 0;
|
|
|
|
USHORT errCode;
|
|
ULONG errPos;
|
|
|
|
// convert to UTF16
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> str;
|
|
ULONG length = getConvToUnicode().convertLength(srcLen);
|
|
length = getConvToUnicode().convert(srcLen, src, length,
|
|
reinterpret_cast<USHORT*>(str.getBuffer(length)), &errCode, &errPos);
|
|
|
|
// generate substring of UTF16
|
|
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> substr;
|
|
length = UnicodeUtil::utf16Substring(length, reinterpret_cast<const USHORT*>(str.begin()),
|
|
length, reinterpret_cast<USHORT*>(substr.getBuffer(length)), startPos, length);
|
|
|
|
// convert generated substring to original charset
|
|
return getConvFromUnicode().convert(length, substr.begin(), dstLen, dst, &errCode, &errPos);
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename pContainsObjectImpl, typename pLikeObjectImpl,
|
|
typename pMatchesObjectImpl, typename pSleuthObjectImpl>
|
|
class CollationImpl : public TextType
|
|
{
|
|
public:
|
|
CollationImpl(TTYPE_ID type, TEXTTYPE tt, CharSet* cs) : TextType(type, tt, cs) {}
|
|
|
|
virtual bool matches(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d)
|
|
{
|
|
return pMatchesObjectImpl::evaluate(tdbb, this, a, b, c, d);
|
|
}
|
|
|
|
virtual bool sleuth_check(thread_db* tdbb, USHORT a, const UCHAR* b, SLONG c, const UCHAR* d, SLONG e)
|
|
{
|
|
return pSleuthObjectImpl::check(tdbb, this, a, b, c, d, e);
|
|
}
|
|
|
|
virtual ULONG sleuth_merge(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d, UCHAR* e, SLONG f)
|
|
{
|
|
return pSleuthObjectImpl::merge(tdbb, this, a, b, c, d, e, f);
|
|
}
|
|
|
|
virtual bool like(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
|
|
{
|
|
return pLikeObjectImpl::evaluate(tdbb, this, s, sl, p, pl, escape, escape_length, getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(), getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
|
|
}
|
|
|
|
virtual LikeObject *like_create(thread_db* tdbb, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
|
|
{
|
|
return pLikeObjectImpl::create(tdbb, this, p, pl, escape, escape_length, getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(), getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
|
|
}
|
|
|
|
virtual bool contains(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
|
|
{
|
|
return pContainsObjectImpl::evaluate(tdbb, this, s, sl, p, pl);
|
|
}
|
|
|
|
virtual ContainsObject *contains_create(thread_db* tdbb, const UCHAR* p, SLONG pl)
|
|
{
|
|
return pContainsObjectImpl::create(tdbb, this, p, pl);
|
|
}
|
|
};
|
|
|
|
typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, UCHAR> uchar_contains_direct;
|
|
typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, USHORT> ushort_contains_direct;
|
|
typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, ULONG> ulong_contains_direct;
|
|
|
|
typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_matches_canonical;
|
|
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_sleuth_canonical;
|
|
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_like_canonical;
|
|
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, UCHAR> uchar_contains_canonical;
|
|
|
|
typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_matches_canonical;
|
|
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_sleuth_canonical;
|
|
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_like_canonical;
|
|
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, USHORT> ushort_contains_canonical;
|
|
|
|
typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_matches_canonical;
|
|
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_sleuth_canonical;
|
|
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_like_canonical;
|
|
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, ULONG> ulong_contains_canonical;
|
|
|
|
CharSetContainer* CharSetContainer::lookupCharset(thread_db* tdbb, SSHORT ttype, ISC_STATUS *status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* l o o k u p C h a r s e t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup a character set descriptor.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to allocate it, or return
|
|
* null if we don't know about the charset.
|
|
*
|
|
* Returns:
|
|
* *charset - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
CharSetContainer *cs = NULL;
|
|
|
|
SET_TDBB(tdbb);
|
|
Database* dbb = tdbb->tdbb_database;
|
|
|
|
USHORT id = TTYPE_TO_CHARSET(ttype);
|
|
if (id == CS_dynamic)
|
|
id = tdbb->tdbb_attachment->att_charset;
|
|
|
|
if (id >= dbb->dbb_charsets.size())
|
|
dbb->dbb_charsets.resize(id + 10);
|
|
else
|
|
cs = dbb->dbb_charsets[id];
|
|
|
|
// allocate a new character set object if we couldn't find one.
|
|
if (!cs) {
|
|
SubtypeInfo info;
|
|
|
|
if (id == CS_UTF16)
|
|
info.charsetName = "UTF16";
|
|
|
|
if ((id == CS_UTF16) || MET_get_char_coll_subtype_info(tdbb, id, &info))
|
|
{
|
|
cs = FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, id, &info);
|
|
if (cs->getCharSet() == NULL) {
|
|
delete cs;
|
|
return NULL;
|
|
}
|
|
dbb->dbb_charsets[id] = cs;
|
|
}
|
|
}
|
|
|
|
return cs;
|
|
}
|
|
|
|
CharSetContainer::CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info) :
|
|
charset_collations(p),
|
|
cs(NULL)
|
|
{
|
|
charset* csL = FB_NEW(p) charset;
|
|
memset(csL, 0, sizeof(charset));
|
|
|
|
if (lookup_charset(csL, info) && (csL->charset_flags & CHARSET_ASCII_BASED))
|
|
{
|
|
if (csL->charset_min_bytes_per_char != csL->charset_max_bytes_per_char)
|
|
this->cs = FB_NEW(p) MultiByteCharSet(cs_id, csL);
|
|
else
|
|
this->cs = FB_NEW(p) FixedWidthCharSet(cs_id, csL);
|
|
}
|
|
else
|
|
{
|
|
delete csL;
|
|
csL = NULL;
|
|
}
|
|
}
|
|
|
|
CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID to_cs)
|
|
{
|
|
if (to_cs == CS_UTF16) {
|
|
return cs->getConvToUnicode();
|
|
}
|
|
if (cs->getId() == CS_UTF16) {
|
|
CharSet* to_charset = INTL_charset_lookup(tdbb, to_cs, NULL);
|
|
if (to_charset == NULL)
|
|
return NULL;
|
|
return to_charset->getConvFromUnicode();
|
|
}
|
|
|
|
//// TODO: converters
|
|
|
|
return NULL;
|
|
}
|
|
|
|
TextType* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id)
|
|
{
|
|
const USHORT id = TTYPE_TO_COLLATION(tt_id);
|
|
|
|
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
|
|
return charset_collations[id];
|
|
|
|
SubtypeInfo info;
|
|
if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info))
|
|
{
|
|
CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id), NULL);
|
|
|
|
if (TTYPE_TO_CHARSET(tt_id) != CS_UNICODE_FSS)
|
|
{
|
|
Firebird::HalfStaticArray<UCHAR, 32> specificAttributes;
|
|
ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar();
|
|
|
|
size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id),
|
|
specificAttributes.getBuffer(size), size,
|
|
CS_UNICODE_FSS, info.specificAttributes.begin(),
|
|
info.specificAttributes.getCount(), ERR_post);
|
|
specificAttributes.shrink(size);
|
|
info.specificAttributes = specificAttributes;
|
|
}
|
|
|
|
TEXTTYPE tt = FB_NEW(*tdbb->tdbb_database->dbb_permanent) texttype;
|
|
memset(tt, 0, sizeof(texttype));
|
|
|
|
if (!lookup_texttype(tt, &info))
|
|
{
|
|
delete tt;
|
|
return NULL;
|
|
}
|
|
|
|
if (charset_collations.getCount() <= id)
|
|
charset_collations.grow(id + 1);
|
|
|
|
if (charset_collations[id] == NULL)
|
|
{
|
|
fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) ||
|
|
(tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL));
|
|
|
|
if (tt->texttype_canonical_width == 0)
|
|
{
|
|
if (charset->isMultiByte())
|
|
tt->texttype_canonical_width = sizeof(ULONG); // UTF-32
|
|
else
|
|
{
|
|
tt->texttype_canonical_width = charset->minBytesPerChar();
|
|
// canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on
|
|
tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH;
|
|
}
|
|
}
|
|
|
|
fb_assert(tt->texttype_canonical_width == 1 ||
|
|
tt->texttype_canonical_width == 2 ||
|
|
tt->texttype_canonical_width == 4);
|
|
|
|
switch (tt->texttype_canonical_width)
|
|
{
|
|
case 1:
|
|
if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<uchar_contains_direct, uchar_like_canonical,
|
|
uchar_matches_canonical, uchar_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
else
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<uchar_contains_canonical, uchar_like_canonical,
|
|
uchar_matches_canonical, uchar_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<uchar_contains_direct, ushort_like_canonical,
|
|
ushort_matches_canonical, ushort_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
else
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<ushort_contains_canonical, ushort_like_canonical,
|
|
ushort_matches_canonical, ushort_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<uchar_contains_direct, ulong_like_canonical,
|
|
ulong_matches_canonical, ulong_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
else
|
|
{
|
|
charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
|
|
CollationImpl<ulong_contains_canonical, ulong_like_canonical,
|
|
ulong_matches_canonical, ulong_sleuth_canonical>(tt_id, tt, charset);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
fb_assert(false);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return charset_collations[id];
|
|
}
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info)
|
|
{
|
|
return IntlManager::lookupCharSet(info->charsetName, cs);
|
|
}
|
|
|
|
|
|
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info)
|
|
{
|
|
return IntlManager::lookupCollation(info->baseCollationName, info->charsetName,
|
|
info->attributes, info->specificAttributes.begin(),
|
|
info->specificAttributes.getCount(), info->ignore_attributes, tt);
|
|
}
|
|
|
|
|
|
void Database::destroyIntlObjects()
|
|
{
|
|
for (size_t i = 0; i < dbb_charsets.size(); i++)
|
|
if (dbb_charsets[i])
|
|
dbb_charsets[i]->destroy();
|
|
}
|
|
|
|
|
|
CHARSET_ID INTL_charset(thread_db* tdbb, USHORT ttype, FPTR_ERROR err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c h a r s e t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Return the character set ID for a piece of text.
|
|
*
|
|
**************************************/
|
|
|
|
switch (ttype)
|
|
{
|
|
case ttype_none:
|
|
return (CS_NONE);
|
|
case ttype_ascii:
|
|
return (CS_ASCII);
|
|
case ttype_unicode_fss:
|
|
return (CS_UNICODE_FSS);
|
|
case ttype_binary:
|
|
return (CS_BINARY);
|
|
case ttype_dynamic:
|
|
SET_TDBB(tdbb);
|
|
return (tdbb->tdbb_attachment->att_charset);
|
|
default:
|
|
return (TTYPE_TO_CHARSET(ttype));
|
|
}
|
|
}
|
|
|
|
|
|
int INTL_compare(thread_db* tdbb,
|
|
const dsc* pText1,
|
|
const dsc* pText2,
|
|
FPTR_ERROR err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o m p a r e
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Compare two pieces of international text.
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(pText1 != NULL);
|
|
fb_assert(pText2 != NULL);
|
|
fb_assert(IS_TEXT(pText1) && IS_TEXT(pText2));
|
|
fb_assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
|
|
fb_assert(err);
|
|
|
|
/* normal compare routine from CVT_compare */
|
|
/* trailing spaces in strings are ignored for comparision */
|
|
|
|
UCHAR* p1;
|
|
USHORT t1;
|
|
USHORT length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
|
|
|
|
UCHAR* p2;
|
|
USHORT t2;
|
|
USHORT length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);
|
|
|
|
/* YYY - by SQL II compare_type must be explicit in the
|
|
SQL statement if there is any doubt */
|
|
|
|
SSHORT compare_type = MAX(t1, t2); /* YYY */
|
|
UCHAR buffer[MAX_KEY];
|
|
|
|
if (t1 != t2) {
|
|
CHARSET_ID cs1 = INTL_charset(tdbb, t1, err);
|
|
CHARSET_ID cs2 = INTL_charset(tdbb, t2, err);
|
|
if (cs1 != cs2) {
|
|
if (compare_type != t2) {
|
|
/* convert pText2 to pText1's type, if possible */
|
|
/* YYY - should failure to convert really return
|
|
an error here?
|
|
Support joining a 437 & Latin1 Column, and we
|
|
pick the compare_type as 437, still only want the
|
|
equal values....
|
|
But then, what about < operations, which make no
|
|
sense if the string cannot be expressed...
|
|
*/
|
|
|
|
length2 = INTL_convert_bytes(tdbb, cs1,
|
|
buffer, sizeof(buffer),
|
|
cs2, p2, length2, err);
|
|
p2 = buffer;
|
|
}
|
|
else {
|
|
/* convert pText1 to pText2's type, if possible */
|
|
|
|
length1 = INTL_convert_bytes(tdbb, cs2,
|
|
buffer, sizeof(buffer),
|
|
cs1, p1, length1, err);
|
|
p1 = buffer;
|
|
}
|
|
}
|
|
}
|
|
|
|
TextType* obj = INTL_texttype_lookup(tdbb, compare_type, err, NULL);
|
|
|
|
return obj->compare(length1, p1, length2, p2);
|
|
}
|
|
|
|
|
|
ULONG INTL_convert_bytes(thread_db* tdbb,
|
|
CHARSET_ID dest_type,
|
|
BYTE* dest_ptr,
|
|
ULONG dest_len,
|
|
CHARSET_ID src_type,
|
|
const BYTE* src_ptr,
|
|
ULONG src_len,
|
|
FPTR_ERROR err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ b y t e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given a string of bytes in one character set, convert it to another
|
|
* character set.
|
|
*
|
|
* If (dest_ptr) is NULL, return the count of bytes needed to convert
|
|
* the string. This does not guarantee the string can be converted,
|
|
* the purpose of this is to allocate a large enough buffer.
|
|
*
|
|
* RETURNS:
|
|
* Length of resulting string, in bytes.
|
|
* calls (err) if conversion error occurs.
|
|
*
|
|
**************************************/
|
|
ULONG len;
|
|
ULONG len2;
|
|
USHORT err_code = 0;
|
|
ULONG err_position;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
|
|
fb_assert(src_ptr != NULL);
|
|
fb_assert(src_type != dest_type);
|
|
fb_assert(err != NULL);
|
|
|
|
const UCHAR* const start_dest_ptr = dest_ptr;
|
|
|
|
if ((dest_type == CS_BINARY) || (dest_type == CS_NONE)) {
|
|
|
|
/* See if we just need a length estimate */
|
|
if (dest_ptr == NULL)
|
|
return (src_len);
|
|
|
|
len = MIN(dest_len, src_len);
|
|
if (len)
|
|
do {
|
|
*dest_ptr++ = *src_ptr++;
|
|
} while (--len);
|
|
|
|
/* See if only space characters are remaining */
|
|
len = src_len - MIN(dest_len, src_len);
|
|
if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
|
|
return (dest_ptr - start_dest_ptr);
|
|
else
|
|
(*err) (isc_arith_except, 0);
|
|
}
|
|
else if (src_len == 0)
|
|
return (0);
|
|
else if (src_type == CS_BINARY)
|
|
(*err)(isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
|
|
else
|
|
/* character sets are known to be different */
|
|
{
|
|
/* Do we know an object from cs1 to cs2? */
|
|
|
|
CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
|
|
if (cs_obj != NULL) {
|
|
len = cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr,
|
|
&err_code, &err_position);
|
|
if (!err_code || ((err_code == CS_TRUNCATION_ERROR)
|
|
&& all_spaces(tdbb, src_type, src_ptr, src_len,
|
|
err_position)))
|
|
{
|
|
return (len);
|
|
}
|
|
else if (err_code == CS_TRUNCATION_ERROR)
|
|
(*err) (isc_arith_except, 0);
|
|
else
|
|
(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
|
|
}
|
|
|
|
/* Find a CS1 to UNICODE object */
|
|
|
|
CharSet* from_cs = INTL_charset_lookup(tdbb, src_type, NULL);
|
|
if (from_cs == NULL)
|
|
(*err)(isc_arith_except, isc_arg_gds, isc_text_subtype, isc_arg_number,
|
|
(ISC_STATUS) src_type, 0);
|
|
|
|
/*
|
|
** allocate a temporary buffer that is large enough.
|
|
*/
|
|
BYTE* tmp_buffer =
|
|
(BYTE *) FB_NEW(*tdbb->getDefaultPool()) char[(SLONG) src_len * sizeof(ULONG)];
|
|
|
|
cs_obj = from_cs->getConvToUnicode();
|
|
fb_assert(cs_obj != NULL);
|
|
len = cs_obj.convert(src_len, src_ptr, src_len * sizeof(ULONG), tmp_buffer,
|
|
&err_code, &err_position);
|
|
if (err_code && !((err_code == CS_TRUNCATION_ERROR)
|
|
&& all_spaces(tdbb, src_type, src_ptr, src_len,
|
|
err_position)))
|
|
{
|
|
delete [] tmp_buffer;
|
|
if (err_code == CS_TRUNCATION_ERROR)
|
|
(*err) (isc_arith_except, 0);
|
|
else
|
|
(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
|
|
}
|
|
|
|
/* Find a UNICODE to CS2 object */
|
|
|
|
CharSet* to_cs = INTL_charset_lookup(tdbb, dest_type, NULL);
|
|
if (to_cs == NULL) {
|
|
delete [] tmp_buffer;
|
|
(*err) (isc_arith_except, isc_arg_gds, isc_text_subtype, isc_arg_number,
|
|
(ISC_STATUS) dest_type, 0);
|
|
}
|
|
cs_obj = to_cs->getConvFromUnicode();
|
|
fb_assert(cs_obj != NULL);
|
|
len2 = cs_obj.convert(len, tmp_buffer, dest_len, dest_ptr,
|
|
&err_code, &err_position);
|
|
|
|
if (err_code &&
|
|
!((err_code == CS_TRUNCATION_ERROR) &&
|
|
all_spaces(tdbb, CS_UTF16, tmp_buffer, len, err_position)))
|
|
{
|
|
delete [] tmp_buffer;
|
|
if (err_code == CS_TRUNCATION_ERROR)
|
|
(*err) (isc_arith_except, 0);
|
|
else
|
|
(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
|
|
}
|
|
|
|
delete [] tmp_buffer;
|
|
return (len2);
|
|
}
|
|
return (0); /* to remove compiler errors. This should never be executed */
|
|
}
|
|
|
|
|
|
CsConvert INTL_convert_lookup(thread_db* tdbb,
|
|
CHARSET_ID to_cs,
|
|
CHARSET_ID from_cs)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
|
|
CharSetContainer *charset;
|
|
|
|
SET_TDBB(tdbb);
|
|
Database* dbb = tdbb->tdbb_database;
|
|
CHECK_DBB(dbb);
|
|
|
|
if (from_cs == CS_dynamic)
|
|
from_cs = tdbb->tdbb_attachment->att_charset;
|
|
|
|
if (to_cs == CS_dynamic)
|
|
to_cs = tdbb->tdbb_attachment->att_charset;
|
|
|
|
/* Should from_cs == to_cs? be handled better? YYY */
|
|
|
|
fb_assert(from_cs != CS_dynamic);
|
|
fb_assert(to_cs != CS_dynamic);
|
|
|
|
charset = CharSetContainer::lookupCharset(tdbb, from_cs, NULL);
|
|
if (charset == NULL)
|
|
return NULL;
|
|
|
|
return charset->lookupConverter(tdbb, to_cs);
|
|
}
|
|
|
|
|
|
int INTL_convert_string(dsc* to, const dsc* from, FPTR_ERROR err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ s t r i n g
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert a string from one type to another
|
|
*
|
|
* RETURNS:
|
|
* 0 if no error in conversion
|
|
* non-zero otherwise.
|
|
* CVC: Unfortunately, this function puts the source in the 2nd param,
|
|
* as opposed to the CVT routines, so const help mitigating coding mistakes.
|
|
*
|
|
**************************************/
|
|
|
|
/* Note: This function is called from outside the engine as
|
|
well as inside - we likely can't get rid of JRD_get_thread_data here */
|
|
thread_db* tdbb = JRD_get_thread_data();
|
|
if (tdbb == NULL) /* are we in the Engine? */
|
|
return (1); /* no, then can't access intl gah */
|
|
|
|
fb_assert(to != NULL);
|
|
fb_assert(from != NULL);
|
|
fb_assert(IS_TEXT(to) && IS_TEXT(from));
|
|
|
|
CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err);
|
|
CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err);
|
|
|
|
UCHAR* start = to->dsc_address;
|
|
UCHAR* p = start;
|
|
|
|
/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */
|
|
|
|
UCHAR* from_ptr;
|
|
USHORT from_type;
|
|
const USHORT from_len =
|
|
CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);
|
|
|
|
ULONG to_size, to_len, to_fill;
|
|
to_size = to_len = TEXT_LEN(to);
|
|
ULONG from_fill;
|
|
|
|
const UCHAR* q = from_ptr;
|
|
CharSet* toCharSet = INTL_charset_lookup(tdbb, to_cs, NULL);
|
|
fb_assert(toCharSet != NULL);
|
|
ULONG toLength;
|
|
|
|
switch (to->dsc_dtype) {
|
|
case dtype_text:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
|
|
|
|
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
|
|
from_cs, from_ptr, from_len, err);
|
|
toLength = to_len;
|
|
to_fill = to_size - to_len;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
p += to_len;
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
|
|
to_len = MIN(from_len, to_size);
|
|
if (!toCharSet->wellFormed(to_len, q))
|
|
(*err)(isc_malformed_string, 0);
|
|
toLength = to_len;
|
|
from_fill = from_len - to_len;
|
|
to_fill = to_size - to_len;
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
}
|
|
|
|
if (to_fill > 0)
|
|
pad_spaces(tdbb, to_cs, p, to_fill);
|
|
break;
|
|
|
|
case dtype_cstring:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
|
|
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
|
|
from_cs, from_ptr, from_len, err);
|
|
toLength = to_len;
|
|
to->dsc_address[to_len] = 0;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
|
|
to_len = MIN(from_len, to_size);
|
|
if (!toCharSet->wellFormed(to_len, q))
|
|
(*err)(isc_malformed_string, 0);
|
|
toLength = to_len;
|
|
from_fill = from_len - to_len;
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
*p = 0;
|
|
}
|
|
break;
|
|
|
|
case dtype_varying:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
|
|
|
|
to_len =
|
|
INTL_convert_bytes(tdbb, to_cs,
|
|
(start = reinterpret_cast<UCHAR*>(((vary*) to->dsc_address)->vary_string)),
|
|
to_size, from_cs, from_ptr, from_len, err);
|
|
toLength = to_len;
|
|
((vary*) to->dsc_address)->vary_length = to_len;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
to_len = MIN(from_len, to_size);
|
|
if (!toCharSet->wellFormed(to_len, q))
|
|
(*err)(isc_malformed_string, 0);
|
|
toLength = to_len;
|
|
from_fill = from_len - to_len;
|
|
((vary*) p)->vary_length = to_len;
|
|
start = p = reinterpret_cast<UCHAR*>(((vary*) p)->vary_string);
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (toCharSet->isMultiByte() &&
|
|
!(toCharSet->getFlags() & CHARSET_LEGACY_SEMANTICS) &&
|
|
toLength != 31 && /* allow non CHARSET_LEGACY_SEMANTICS to be used as connection charset */
|
|
toCharSet->length(tdbb, toLength, start, false) > to_size / toCharSet->maxBytesPerChar())
|
|
{
|
|
(*err)(isc_arith_except, 0);
|
|
}
|
|
|
|
if (from_fill)
|
|
/* Make sure remaining characters on From string are spaces */
|
|
if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
|
|
(*err) (isc_arith_except, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int INTL_data(const dsc* pText)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d a t a
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input text descriptor,
|
|
* return TRUE if the data pointed to represents
|
|
* international text (subject to user defined or non-binary
|
|
* collation or comparison).
|
|
*
|
|
**************************************/
|
|
|
|
fb_assert(pText != NULL);
|
|
|
|
if (!IS_TEXT(pText))
|
|
return FALSE;
|
|
|
|
if (!INTERNAL_TTYPE(pText))
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
int INTL_data_or_binary(const dsc* pText)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d a t a _ o r _ b i n a r y
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
|
|
return (INTL_data(pText) || (pText->dsc_ttype() == ttype_binary));
|
|
}
|
|
|
|
|
|
bool INTL_defined_type(thread_db* tdbb, ISC_STATUS * status, SSHORT t_type)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d e f i n e d _ t y p e
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Is (t_type) a known text type?
|
|
* Return:
|
|
* false type is not defined.
|
|
* true type is defined
|
|
* status set to gds_status codes to describe any error.
|
|
*
|
|
* Note:
|
|
* Due to cleanup that must happen in DFW, this routine
|
|
* must return, and not call ERR directly.
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
if (status)
|
|
status[0] = isc_arg_end;
|
|
TextType* obj = INTL_texttype_lookup(tdbb, t_type, NULL, status);
|
|
if (obj == NULL)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
|
|
void INTL_init(thread_db* tdbb)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
}
|
|
|
|
|
|
USHORT INTL_key_length(thread_db* tdbb, USHORT idxType, USHORT iLength)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ k e y _ l e n g t h
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an index type, and a maximum length (iLength)
|
|
* return the length of the byte string key descriptor to
|
|
* use when collating text of this type.
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(idxType >= idx_first_intl_string);
|
|
|
|
const SSHORT ttype = INTL_INDEX_TO_TEXT(idxType);
|
|
|
|
USHORT key_length;
|
|
if (ttype >= 0 && ttype <= ttype_last_internal)
|
|
key_length = iLength;
|
|
else {
|
|
TextType* obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
|
|
key_length = obj->key_length(iLength);
|
|
}
|
|
|
|
/* Validity checks on the computed key_length */
|
|
|
|
if (key_length > MAX_KEY)
|
|
key_length = MAX_KEY;
|
|
|
|
if (key_length < iLength)
|
|
key_length = iLength;
|
|
|
|
return (key_length);
|
|
}
|
|
|
|
|
|
CharSet* INTL_charset_lookup(thread_db* tdbb, SSHORT parm1, ISC_STATUS* status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c h a r s e t _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup a character set descriptor.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to allocate it, or return
|
|
* null if we don't know about the charset.
|
|
*
|
|
* Returns:
|
|
* *charset - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1, status);
|
|
if (!cs) return NULL;
|
|
return cs->getCharSet();
|
|
}
|
|
|
|
|
|
TextType* INTL_texttype_lookup(thread_db* tdbb,
|
|
SSHORT parm1,
|
|
FPTR_ERROR err,
|
|
ISC_STATUS* status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ t e x t t y p e _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup either a character set descriptor or
|
|
* texttype descriptor object.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to find it in the libraries.
|
|
*
|
|
* Returns:
|
|
* *object - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
Database* dbb = tdbb->tdbb_database;
|
|
|
|
if (parm1 == ttype_dynamic)
|
|
parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset);
|
|
|
|
CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, parm1, status);
|
|
if (!csc)
|
|
return NULL;
|
|
return csc->lookupCollation(tdbb, parm1);
|
|
}
|
|
|
|
|
|
bool INTL_texttype_validate(Jrd::thread_db* tdbb, const SubtypeInfo* info)
|
|
{
|
|
texttype tt;
|
|
memset(&tt, 0, sizeof(tt));
|
|
|
|
bool ret = lookup_texttype(&tt, info);
|
|
|
|
if (ret && tt.texttype_fn_destroy)
|
|
tt.texttype_fn_destroy(&tt);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
void INTL_pad_spaces(thread_db* tdbb, DSC * type, UCHAR * string, ULONG length)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ p a d _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Pad a buffer with spaces, using the character
|
|
* set's defined space character.
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(type != NULL);
|
|
fb_assert(IS_TEXT(type));
|
|
fb_assert(string != NULL);
|
|
|
|
const USHORT charset = INTL_charset(tdbb, type->dsc_ttype(), NULL);
|
|
pad_spaces(tdbb, charset, string, length);
|
|
}
|
|
|
|
|
|
USHORT INTL_string_to_key(thread_db* tdbb,
|
|
USHORT idxType,
|
|
const dsc* pString,
|
|
DSC* pByte,
|
|
USHORT key_type)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ s t r i n g _ t o _ k e y
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input string, convert it to a byte string
|
|
* that will collate naturally (byte order).
|
|
*
|
|
* Return the length of the resulting byte string.
|
|
*
|
|
**************************************/
|
|
UCHAR pad_char;
|
|
SSHORT ttype;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(idxType >= idx_first_intl_string || idxType == idx_string
|
|
|| idxType == idx_byte_array || idxType == idx_metadata);
|
|
fb_assert(pString != NULL);
|
|
fb_assert(pByte != NULL);
|
|
fb_assert(pString->dsc_address != NULL);
|
|
fb_assert(pByte->dsc_address != NULL);
|
|
fb_assert(pByte->dsc_dtype == dtype_text);
|
|
|
|
switch (idxType) {
|
|
case idx_string:
|
|
pad_char = ' ';
|
|
ttype = ttype_none;
|
|
break;
|
|
case idx_byte_array:
|
|
pad_char = 0;
|
|
ttype = ttype_binary;
|
|
break;
|
|
case idx_metadata:
|
|
pad_char = ' ';
|
|
ttype = ttype_metadata;
|
|
break;
|
|
default:
|
|
pad_char = 0;
|
|
ttype = INTL_INDEX_TO_TEXT(idxType);
|
|
break;
|
|
}
|
|
|
|
/* Make a string into the proper type of text */
|
|
|
|
MoveBuffer temp;
|
|
UCHAR* src;
|
|
USHORT len =
|
|
MOV_make_string2(pString, ttype, &src, temp);
|
|
|
|
USHORT outlen;
|
|
char* dest = reinterpret_cast<char*>(pByte->dsc_address);
|
|
USHORT destLen = pByte->dsc_length;
|
|
|
|
switch (ttype) {
|
|
case ttype_metadata:
|
|
case ttype_binary:
|
|
case ttype_ascii:
|
|
case ttype_none:
|
|
while (len-- && destLen-- > 0)
|
|
*dest++ = *src++;
|
|
/* strip off ending pad characters */
|
|
while (dest > (const char*)pByte->dsc_address) {
|
|
if (*(dest - 1) == pad_char)
|
|
dest--;
|
|
else
|
|
break;
|
|
}
|
|
outlen = (dest - (const char*)pByte->dsc_address);
|
|
break;
|
|
default:
|
|
TextType* obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
|
|
outlen = obj->string_to_key(len,
|
|
reinterpret_cast<const unsigned char*>(src),
|
|
pByte->dsc_length,
|
|
reinterpret_cast<unsigned char*>(dest),
|
|
key_type);
|
|
break;
|
|
}
|
|
|
|
return (outlen);
|
|
}
|
|
|
|
|
|
int INTL_str_to_upper(thread_db* tdbb, DSC * pString)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ s t r _ t o _ u p p e r
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input string, convert it to uppercase
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(pString != NULL);
|
|
fb_assert(pString->dsc_address != NULL);
|
|
|
|
UCHAR* src;
|
|
UCHAR buffer[MAX_KEY];
|
|
USHORT ttype;
|
|
USHORT len =
|
|
CVT_get_string_ptr(pString, &ttype, &src,
|
|
reinterpret_cast<vary*>(buffer),
|
|
sizeof(buffer), ERR_post);
|
|
|
|
UCHAR* dest;
|
|
switch (ttype) {
|
|
case ttype_binary:
|
|
/* cannot uppercase binary strings */
|
|
break;
|
|
|
|
case ttype_none:
|
|
case ttype_ascii:
|
|
dest = src;
|
|
while (len--) {
|
|
*dest++ = UPPER7(*src);
|
|
src++;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
TextType* obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
|
|
obj->str_to_upper(len, src, len, src); // ASF: this works for all cases? (src and dst buffers are the same)
|
|
break;
|
|
}
|
|
/*
|
|
* Added to remove compiler errors. Callers are not checking
|
|
* the return code from this function 4/5/95.
|
|
*/
|
|
return (0);
|
|
}
|
|
|
|
|
|
int INTL_str_to_lower(thread_db* tdbb, DSC * pString)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ s t r _ t o _ l o w e r
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input string, convert it to lowercase
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(pString != NULL);
|
|
fb_assert(pString->dsc_address != NULL);
|
|
|
|
UCHAR* src;
|
|
UCHAR buffer[MAX_KEY];
|
|
USHORT ttype;
|
|
USHORT len =
|
|
CVT_get_string_ptr(pString, &ttype, &src,
|
|
reinterpret_cast<vary*>(buffer),
|
|
sizeof(buffer), ERR_post);
|
|
|
|
UCHAR* dest;
|
|
switch (ttype) {
|
|
case ttype_binary:
|
|
/* cannot lowercase binary strings */
|
|
break;
|
|
|
|
case ttype_none:
|
|
case ttype_ascii:
|
|
dest = src;
|
|
while (len--) {
|
|
*dest++ = LOWWER7(*src);
|
|
src++;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
TextType* obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
|
|
obj->str_to_lower(len, src, len, src); // ASF: this works for all cases? (src and dst buffers are the same)
|
|
break;
|
|
}
|
|
/*
|
|
* Added to remove compiler errors. Callers are not checking
|
|
* the return code from this function 4/5/95.
|
|
*/
|
|
return (0);
|
|
}
|
|
|
|
|
|
static bool all_spaces(
|
|
thread_db* tdbb,
|
|
CHARSET_ID charset,
|
|
const BYTE* ptr, ULONG len, ULONG offset)
|
|
{
|
|
/**************************************
|
|
*
|
|
* a l l _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* determine if the string at ptr[offset] ... ptr[len] is entirely
|
|
* spaces, as per the space definition of (charset).
|
|
* The binary representation of a Space is character-set dependent.
|
|
* (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
|
|
* 0x??20, which is NOT a space.
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(ptr != NULL);
|
|
|
|
CharSet* obj = INTL_charset_lookup(tdbb, charset, NULL);
|
|
|
|
fb_assert(obj != NULL);
|
|
|
|
/*
|
|
* We are assuming offset points to the first byte which was not
|
|
* consumed in a conversion. And that offset is pointing
|
|
* to a character boundary
|
|
*/
|
|
|
|
// Single-octet character sets are optimized here
|
|
|
|
if (obj->getSpaceLength() == 1) {
|
|
const BYTE* p = &ptr[offset];
|
|
const BYTE* const end = &ptr[len];
|
|
while (p < end) {
|
|
if (*p++ != *obj->getSpace())
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
else {
|
|
const BYTE* p = &ptr[offset];
|
|
const BYTE* const end = &ptr[len];
|
|
const unsigned char* space = obj->getSpace();
|
|
const unsigned char* const end_space = &space[obj->getSpaceLength()];
|
|
while (p < end) {
|
|
space = obj->getSpace();
|
|
while (p < end && space < end_space) {
|
|
if (*p++ != *space++)
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len)
|
|
{ /* byte count */
|
|
/**************************************
|
|
*
|
|
* p a d _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Pad a buffer with the character set defined space character.
|
|
*
|
|
**************************************/
|
|
SET_TDBB(tdbb);
|
|
|
|
fb_assert(ptr != NULL);
|
|
|
|
CharSet* obj = INTL_charset_lookup(tdbb, charset, NULL);
|
|
|
|
fb_assert(obj != NULL);
|
|
|
|
/* Single-octet character sets are optimized here */
|
|
if (obj->getSpaceLength() == 1) {
|
|
const BYTE* const end = &ptr[len];
|
|
while (ptr < end)
|
|
*ptr++ = *obj->getSpace();
|
|
}
|
|
else {
|
|
const BYTE* const end = &ptr[len];
|
|
const UCHAR* space = obj->getSpace();
|
|
const UCHAR* const end_space = &space[obj->getSpaceLength()];
|
|
while (ptr < end) {
|
|
space = obj->getSpace();
|
|
while (ptr < end && space < end_space) {
|
|
*ptr++ = *space++;
|
|
}
|
|
/* This fb_assert is checking that we didn't have a buffer-end
|
|
* in the middle of a space character
|
|
*/
|
|
fb_assert(!(ptr == end) || (space == end_space));
|
|
}
|
|
}
|
|
}
|
|
|