8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-27 05:23:02 +01:00
firebird-mirror/src/jrd/intl.cpp
2015-02-22 11:02:49 +00:00

1448 lines
36 KiB
C++

/************* history ************
*
* COMPONENT: JRD MODULE: INTL.CPP
* generated by Marion V2.5 2/6/90
* from dev db on 4-JAN-1995
*****************************************************************
*
* PR 2002-06-02 Added ugly c hack in
* intl_back_compat_alloc_func_lookup.
* When someone has time we need to change the references to
* return (void*) function to something more C++ like
*
* 42 4711 3 11 17 tamlin 2001
* Added silly numbers before my name, and converted it to C++.
*
* 18850 daves 4-JAN-1995
* Fix gds__alloc usage
*
* 18837 deej 31-DEC-1994
* fixing up HARBOR_MERGE
*
* 18821 deej 27-DEC-1994
* HARBOR MERGE
*
* 18789 jdavid 19-DEC-1994
* Cast some functions
*
* 17508 jdavid 15-JUL-1994
* Bring it up to date
*
* 17500 daves 13-JUL-1994
* Bug 6645: Different calculation of partial keys
*
* 17202 katz 24-MAY-1994
* PC_PLATFORM requires the .dll extension
*
* 17191 katz 23-MAY-1994
* OS/2 requires the .dll extension
*
* 17180 katz 23-MAY-1994
* Define location of DLL on OS/2
*
* 17149 katz 20-MAY-1994
* In JRD, isc_arg_number arguments are SLONG's not int's
*
* 16633 daves 19-APR-1994
* Bug 6202: International licensing uses INTERNATIONAL product code
*
* 16555 katz 17-APR-1994
* The last argument of calls to ERR_post should be 0
*
* 16521 katz 14-APR-1994
* Borland C needs a decorated symbol to lookup
*
* 16403 daves 8-APR-1994
* Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
*
* 16141 katz 28-MAR-1994
* Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*
* 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
*
* 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
*
*/
/*
* PROGRAM: JRD Intl
* MODULE: intl.cpp
* DESCRIPTION: International text support routines
*
* copyright (c) 1992, 1993 by Borland International
*/
#include "firebird.h"
#include <string.h>
#include <stdio.h>
#include "../jrd/jrd.h"
#include "../jrd/req.h"
#include "../jrd/val.h"
#include "gen/iberror.h"
#include "../jrd/intl.h"
#include "../jrd/intl_classes.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../intl/charsets.h"
#include "../intl/country_codes.h"
#include "../common/gdsassert.h"
//#include "../jrd/license.h"
#ifdef INTL_BUILTIN
#include "../intl/ld_proto.h"
#endif
#include "../jrd/cvt_proto.h"
#include "../common/cvt.h"
#include "../jrd/err_proto.h"
#include "../jrd/fun_proto.h"
#include "../yvalve/gds_proto.h"
#include "../jrd/intl_proto.h"
#include "../common/isc_proto.h"
#include "../jrd/lck_proto.h"
#include "../jrd/met_proto.h"
#include "../common/intlobj_new.h"
#include "../jrd/Collation.h"
#include "../jrd/mov_proto.h"
#include "../jrd/IntlManager.h"
#include "../common/classes/init.h"
using namespace Jrd;
using namespace Firebird;
#define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\
((x)->dsc_dtype == dtype_varying)||\
((x)->dsc_dtype == dtype_cstring))
static bool allSpaces(CharSet*, const BYTE*, ULONG, ULONG);
static int blocking_ast_collation(void* ast_object);
static void pad_spaces(thread_db*, CHARSET_ID, BYTE *, ULONG);
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info);
static GlobalPtr<Mutex> createCollationMtx;
// Classes and structures used internally to this file and intl implementation
class CharSetContainer
{
public:
CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info);
void release(thread_db* tdbb)
{
for (FB_SIZE_T i = 0; i < charset_collations.getCount(); i++)
{
if (charset_collations[i])
charset_collations[i]->release(tdbb);
}
}
void destroy(thread_db* tdbb)
{
cs->destroy();
for (FB_SIZE_T i = 0; i < charset_collations.getCount(); i++)
{
if (charset_collations[i])
charset_collations[i]->destroy(tdbb);
}
}
CharSet* getCharSet() { return cs; }
Collation* lookupCollation(thread_db* tdbb, USHORT tt_id);
void unloadCollation(thread_db* tdbb, USHORT tt_id);
CsConvert lookupConverter(thread_db* tdbb, CHARSET_ID to_cs);
static CharSetContainer* lookupCharset(thread_db* tdbb, USHORT ttype);
static Lock* createCollationLock(thread_db* tdbb, USHORT ttype, void* object = NULL);
private:
static bool lookupInternalCharSet(USHORT id, SubtypeInfo* info);
private:
Firebird::Array<Collation*> charset_collations;
CharSet* cs;
};
CharSetContainer* CharSetContainer::lookupCharset(thread_db* tdbb, USHORT ttype)
{
/**************************************
*
* l o o k u p C h a r s e t
*
**************************************
*
* Functional description
*
* Lookup a character set descriptor.
*
* First, search the appropriate vector that hangs
* off the dbb. If not found, then call the lower
* level lookup routine to allocate it, or punt
* if we don't know about the charset.
*
* Returns:
* *charset
* <never> - if error
*
**************************************/
CharSetContainer* cs = NULL;
SET_TDBB(tdbb);
Jrd::Attachment* attachment = tdbb->getAttachment();
fb_assert(attachment);
USHORT id = TTYPE_TO_CHARSET(ttype);
if (id == CS_dynamic)
id = tdbb->getCharSet();
if (id >= attachment->att_charsets.getCount())
attachment->att_charsets.resize(id + 10);
else
cs = attachment->att_charsets[id];
// allocate a new character set object if we couldn't find one.
if (!cs)
{
SubtypeInfo info;
if (lookupInternalCharSet(id, &info) || MET_get_char_coll_subtype_info(tdbb, id, &info))
{
attachment->att_charsets[id] = cs =
FB_NEW(*attachment->att_pool) CharSetContainer(*attachment->att_pool, id, &info);
}
else
ERR_post(Arg::Gds(isc_text_subtype) << Arg::Num(ttype));
}
return cs;
}
// Lookup a system character set without looking in the database.
bool CharSetContainer::lookupInternalCharSet(USHORT id, SubtypeInfo* info)
{
if (id == CS_UTF16)
{
info->charsetName = "UTF16";
return true;
}
if (id > ttype_last_internal)
return false;
// ASF: This linear lookup appears slow, but it should be cached per database so should not
// cause performance problem.
for (const IntlManager::CharSetDefinition* csDef = IntlManager::defaultCharSets;
csDef->name; ++csDef)
{
if (csDef->id != id)
continue;
for (const IntlManager::CollationDefinition* colDef = IntlManager::defaultCollations;
colDef->name; ++colDef)
{
if (colDef->charSetId == id && colDef->collationId == 0)
{
info->charsetName = csDef->name;
info->collationName = colDef->name;
info->attributes = colDef->attributes;
info->ignoreAttributes = false;
if (colDef->specificAttributes)
{
info->specificAttributes.push((const UCHAR*) colDef->specificAttributes,
fb_strlen(colDef->specificAttributes));
}
return true;
}
}
}
return false;
}
Lock* CharSetContainer::createCollationLock(thread_db* tdbb, USHORT ttype, void* object)
{
/**************************************
*
* c r e a t e C o l l a t i o n L o c k
*
**************************************
*
* Functional description
* Create a collation lock.
*
**************************************/
// Could we have an AST on this lock? If yes, it will fail if we don't
// have lck_object to it, so set ast routine to NULL for safety.
Lock* lock = FB_NEW_RPT(*tdbb->getAttachment()->att_pool, 0)
Lock(tdbb, sizeof(SLONG), LCK_tt_exist, object, (object ? blocking_ast_collation : NULL));
lock->lck_key.lck_long = ttype;
return lock;
}
CharSetContainer::CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info)
: charset_collations(p),
cs(NULL)
{
charset* csL = FB_NEW(p) charset;
memset(csL, 0, sizeof(charset));
if (IntlManager::lookupCharSet(info->charsetName.c_str(), csL) &&
(csL->charset_flags & CHARSET_ASCII_BASED))
{
this->cs = CharSet::createInstance(p, cs_id, csL);
}
else
{
delete csL;
ERR_post(Arg::Gds(isc_charset_not_installed) << Arg::Str(info->charsetName));
}
}
CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID toCsId)
{
if (toCsId == CS_UTF16)
return CsConvert(cs->getStruct(), NULL);
CharSet* toCs = INTL_charset_lookup(tdbb, toCsId);
if (cs->getId() == CS_UTF16)
return CsConvert(NULL, toCs->getStruct());
return CsConvert(cs->getStruct(), toCs->getStruct());
}
Collation* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id)
{
const USHORT id = TTYPE_TO_COLLATION(tt_id);
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
{
if (!charset_collations[id]->obsolete)
return charset_collations[id];
}
Jrd::Attachment* att = tdbb->getAttachment();
Jrd::Attachment::CheckoutLockGuard guard(att, createCollationMtx, FB_FUNCTION); // do we need it ?
Collation* to_delete = NULL;
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
{
if (charset_collations[id]->obsolete)
{
// if obsolete collation is not used delete it immediately,
// else wait until all references are released
if (charset_collations[id]->useCount == 0)
{
charset_collations[id]->destroy(tdbb);
delete charset_collations[id];
}
else
to_delete = charset_collations[id];
charset_collations[id] = NULL;
}
else
return charset_collations[id];
}
SubtypeInfo info;
if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info))
{
CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id));
if (TTYPE_TO_CHARSET(tt_id) != CS_METADATA)
{
Firebird::UCharBuffer specificAttributes;
ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar();
size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id),
specificAttributes.getBuffer(size), size,
CS_METADATA, info.specificAttributes.begin(),
info.specificAttributes.getCount(), ERR_post);
specificAttributes.shrink(size);
info.specificAttributes = specificAttributes;
}
texttype* tt = FB_NEW(*att->att_pool) texttype;
memset(tt, 0, sizeof(texttype));
if (!lookup_texttype(tt, &info))
{
delete tt;
ERR_post(Arg::Gds(isc_collation_not_installed) << Arg::Str(info.collationName) <<
Arg::Str(info.charsetName));
}
if (charset_collations.getCount() <= id)
charset_collations.grow(id + 1);
fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) ||
(tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL));
if (tt->texttype_canonical_width == 0)
{
if (charset->isMultiByte())
tt->texttype_canonical_width = sizeof(ULONG); // UTF-32
else
{
tt->texttype_canonical_width = charset->minBytesPerChar();
// canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on
tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH;
}
}
charset_collations[id] = Collation::createInstance(*att->att_pool, tt_id, tt, charset);
charset_collations[id]->name = info.collationName;
// we don't need a lock in the charset
if (id != 0)
{
Lock* lock = charset_collations[id]->existenceLock =
CharSetContainer::createCollationLock(tdbb, tt_id, charset_collations[id]);
fb_assert(charset_collations[id]->useCount == 0);
fb_assert(!charset_collations[id]->obsolete);
LCK_lock(tdbb, lock, LCK_SR, LCK_WAIT);
// as we just obtained SR lock for new collation instance
// we could safely delete obsolete instance
if (to_delete)
{
to_delete->destroy(tdbb);
delete to_delete;
}
}
}
else
{
if (to_delete)
{
LCK_lock(tdbb, to_delete->existenceLock, LCK_SR, LCK_WAIT);
to_delete->destroy(tdbb);
delete to_delete;
}
ERR_post(Arg::Gds(isc_text_subtype) << Arg::Num(tt_id));
}
return charset_collations[id];
}
void CharSetContainer::unloadCollation(thread_db* tdbb, USHORT tt_id)
{
const USHORT id = TTYPE_TO_COLLATION(tt_id);
fb_assert(id != 0);
if (id < charset_collations.getCount() && charset_collations[id] != NULL)
{
if (charset_collations[id]->useCount != 0)
{
ERR_post(Arg::Gds(isc_no_meta_update) <<
Arg::Gds(isc_obj_in_use) << Arg::Str(charset_collations[id]->name));
}
fb_assert(charset_collations[id]->existenceLock);
if (!charset_collations[id]->obsolete)
{
LCK_convert(tdbb, charset_collations[id]->existenceLock, LCK_EX, LCK_WAIT);
charset_collations[id]->obsolete = true;
LCK_release(tdbb, charset_collations[id]->existenceLock);
}
}
else
{
// signal other processes collation is gone
Lock* lock = CharSetContainer::createCollationLock(tdbb, tt_id);
LCK_lock(tdbb, lock, LCK_EX, LCK_WAIT);
LCK_release(tdbb, lock);
delete lock;
}
}
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info)
{
return IntlManager::lookupCollation(info->baseCollationName.c_str(), info->charsetName.c_str(),
info->attributes, info->specificAttributes.begin(),
info->specificAttributes.getCount(), info->ignoreAttributes, tt);
}
void Jrd::Attachment::releaseIntlObjects(thread_db* tdbb)
{
for (FB_SIZE_T i = 0; i < att_charsets.getCount(); i++)
{
if (att_charsets[i])
att_charsets[i]->release(tdbb);
}
}
void Jrd::Attachment::destroyIntlObjects(thread_db* tdbb)
{
for (FB_SIZE_T i = 0; i < att_charsets.getCount(); i++)
{
if (att_charsets[i])
{
att_charsets[i]->destroy(tdbb);
att_charsets[i] = NULL;
}
}
}
void INTL_adjust_text_descriptor(thread_db* tdbb, dsc* desc)
{
/**************************************
*
* I N T L _ a d j u s t _ t e x t _ d e s c r i p t o r
*
**************************************
*
* Functional description
* This function receives a text descriptor with
* dsc_length = numberOfCharacters * maxBytesPerChar
* and change dsc_length to number of bytes used by the string.
*
**************************************/
if (desc->dsc_dtype == dtype_text)
{
SET_TDBB(tdbb);
USHORT ttype = INTL_TTYPE(desc);
CharSet* charSet = INTL_charset_lookup(tdbb, ttype);
if (charSet->isMultiByte())
{
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> buffer;
if (charSet->getFlags() & CHARSET_LEGACY_SEMANTICS)
{
desc->dsc_length = charSet->substring(TEXT_LEN(desc), desc->dsc_address, TEXT_LEN(desc),
buffer.getBuffer(TEXT_LEN(desc) * charSet->maxBytesPerChar()), 0,
TEXT_LEN(desc));
const ULONG maxLength = TEXT_LEN(desc) / charSet->maxBytesPerChar();
ULONG charLength = charSet->length(desc->dsc_length, desc->dsc_address, true);
while (charLength > maxLength)
{
if (desc->dsc_address[desc->dsc_length - 1] == *charSet->getSpace())
{
--desc->dsc_length;
--charLength;
}
else
break;
}
}
else
{
desc->dsc_length = charSet->substring(TEXT_LEN(desc), desc->dsc_address,
TEXT_LEN(desc), buffer.getBuffer(TEXT_LEN(desc)), 0,
TEXT_LEN(desc) / charSet->maxBytesPerChar());
}
}
}
}
CHARSET_ID INTL_charset(thread_db* tdbb, USHORT ttype)
{
/**************************************
*
* I N T L _ c h a r s e t
*
**************************************
*
* Functional description
* Return the character set ID for a piece of text.
*
**************************************/
switch (ttype)
{
case ttype_none:
return (CS_NONE);
case ttype_ascii:
return (CS_ASCII);
case ttype_unicode_fss:
return (CS_UNICODE_FSS);
case ttype_binary:
return (CS_BINARY);
case ttype_dynamic:
SET_TDBB(tdbb);
return (tdbb->getCharSet());
default:
return (TTYPE_TO_CHARSET(ttype));
}
}
int INTL_compare(thread_db* tdbb, const dsc* pText1, const dsc* pText2, ErrorFunction err)
{
/**************************************
*
* I N T L _ c o m p a r e
*
**************************************
*
* Functional description
* Compare two pieces of international text.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(pText1 != NULL);
fb_assert(pText2 != NULL);
fb_assert(IS_TEXT(pText1) && IS_TEXT(pText2));
fb_assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
fb_assert(err);
// normal compare routine from CVT_compare
// trailing spaces in strings are ignored for comparision
UCHAR* p1;
USHORT t1;
ULONG length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
UCHAR* p2;
USHORT t2;
ULONG length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);
// YYY - by SQL II compare_type must be explicit in the
// SQL statement if there is any doubt
USHORT compare_type = MAX(t1, t2); // YYY
HalfStaticArray<UCHAR, BUFFER_XLARGE> buffer;
if (t1 != t2)
{
CHARSET_ID cs1 = INTL_charset(tdbb, t1);
CHARSET_ID cs2 = INTL_charset(tdbb, t2);
if (cs1 != cs2)
{
if (compare_type != t2)
{
// convert pText2 to pText1's type, if possible
/* YYY - should failure to convert really return
an error here?
Support joining a 437 & Latin1 Column, and we
pick the compare_type as 437, still only want the
equal values....
But then, what about < operations, which make no
sense if the string cannot be expressed...
*/
UCHAR* p = buffer.getBuffer(INTL_convert_bytes(tdbb, cs1, NULL, 0,
cs2, p2, length2, err));
length2 = INTL_convert_bytes(tdbb, cs1, p, (ULONG) buffer.getCount(),
cs2, p2, length2, err);
p2 = p;
}
else
{
// convert pText1 to pText2's type, if possible
UCHAR* p = buffer.getBuffer(INTL_convert_bytes(tdbb, cs2, NULL, 0,
cs1, p1, length1, err));
length1 = INTL_convert_bytes(tdbb, cs2, p, (ULONG) buffer.getCount(),
cs1, p1, length1, err);
p1 = p;
}
}
}
TextType* obj = INTL_texttype_lookup(tdbb, compare_type);
return obj->compare(length1, p1, length2, p2);
}
ULONG INTL_convert_bytes(thread_db* tdbb,
CHARSET_ID dest_type,
BYTE* dest_ptr,
const ULONG dest_len,
CHARSET_ID src_type,
const BYTE* src_ptr,
const ULONG src_len,
ErrorFunction err)
{
/**************************************
*
* I N T L _ c o n v e r t _ b y t e s
*
**************************************
*
* Functional description
* Given a string of bytes in one character set, convert it to another
* character set.
*
* If (dest_ptr) is NULL, return the count of bytes needed to convert
* the string. This does not guarantee the string can be converted,
* the purpose of this is to allocate a large enough buffer.
*
* RETURNS:
* Length of resulting string, in bytes.
* calls (err) if conversion error occurs.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(src_ptr != NULL);
fb_assert(src_type != dest_type);
fb_assert(err != NULL);
dest_type = INTL_charset(tdbb, dest_type);
src_type = INTL_charset(tdbb, src_type);
const UCHAR* const start_dest_ptr = dest_ptr;
if (dest_type == CS_BINARY || dest_type == CS_NONE ||
src_type == CS_BINARY || src_type == CS_NONE)
{
// See if we just need a length estimate
if (dest_ptr == NULL)
return (src_len);
if (dest_type != CS_BINARY && dest_type != CS_NONE)
{
CharSet* toCharSet = INTL_charset_lookup(tdbb, dest_type);
if (!toCharSet->wellFormed(src_len, src_ptr))
err(Arg::Gds(isc_malformed_string));
}
ULONG len = MIN(dest_len, src_len);
if (len)
{
do {
*dest_ptr++ = *src_ptr++;
} while (--len);
}
// See if only space characters are remaining
len = src_len - MIN(dest_len, src_len);
if (len == 0 || allSpaces(INTL_charset_lookup(tdbb, src_type), src_ptr, len, 0))
return dest_ptr - start_dest_ptr;
err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation) <<
Arg::Gds(isc_trunc_limits) << Arg::Num(dest_len) << Arg::Num(src_len));
}
else if (src_len)
{
// character sets are known to be different
// Do we know an object from cs1 to cs2?
CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
return cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr, NULL, true);
}
return 0;
}
CsConvert INTL_convert_lookup(thread_db* tdbb, CHARSET_ID to_cs, CHARSET_ID from_cs)
{
/**************************************
*
* I N T L _ c o n v e r t _ l o o k u p
*
**************************************
*
* Functional description
*
**************************************/
SET_TDBB(tdbb);
Database* dbb = tdbb->getDatabase();
CHECK_DBB(dbb);
if (from_cs == CS_dynamic)
from_cs = tdbb->getCharSet();
if (to_cs == CS_dynamic)
to_cs = tdbb->getCharSet();
// Should from_cs == to_cs? be handled better? YYY
fb_assert(from_cs != CS_dynamic);
fb_assert(to_cs != CS_dynamic);
CharSetContainer* charset = CharSetContainer::lookupCharset(tdbb, from_cs);
return charset->lookupConverter(tdbb, to_cs);
}
int INTL_convert_string(dsc* to, const dsc* from, ErrorFunction err)
{
/**************************************
*
* I N T L _ c o n v e r t _ s t r i n g
*
**************************************
*
* Functional description
* Convert a string from one type to another
*
* RETURNS:
* 0 if no error in conversion
* non-zero otherwise.
* CVC: Unfortunately, this function puts the source in the 2nd param,
* as opposed to the CVT routines, so const helps mitigating coding mistakes.
*
**************************************/
// Note: This function is called from outside the engine as
// well as inside - we likely can't get rid of JRD_get_thread_data here
thread_db* tdbb = JRD_get_thread_data();
if (tdbb == NULL) // are we in the Engine?
return (1); // no, then can't access intl gah
fb_assert(to != NULL);
fb_assert(from != NULL);
fb_assert(IS_TEXT(to) && IS_TEXT(from));
const CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from));
const CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to));
UCHAR* p = to->dsc_address;
const UCHAR* start = p;
// Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..)
UCHAR* from_ptr;
USHORT from_type;
const USHORT from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);
const ULONG to_size = TEXT_LEN(to);
ULONG from_fill, to_fill;
const UCHAR* q = from_ptr;
CharSet* const toCharSet = INTL_charset_lookup(tdbb, to_cs);
ULONG toLength;
switch (to->dsc_dtype)
{
case dtype_text:
if (from_cs != to_cs && to_cs != CS_BINARY && to_cs != CS_NONE && from_cs != CS_NONE)
{
const ULONG to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
from_cs, from_ptr, from_len, err);
toLength = to_len;
to_fill = to_size - to_len;
from_fill = 0; // Convert_bytes handles source truncation
p += to_len;
}
else
{
// binary string can always be converted TO by byte-copy
ULONG to_len = MIN(from_len, to_size);
if (!toCharSet->wellFormed(to_len, q))
err(Arg::Gds(isc_malformed_string));
toLength = to_len;
from_fill = from_len - to_len;
to_fill = to_size - to_len;
if (to_len)
{
do
{
*p++ = *q++;
} while (--to_len);
}
}
if (to_fill > 0)
pad_spaces(tdbb, to_cs, p, to_fill);
break;
case dtype_cstring:
if (from_cs != to_cs && to_cs != CS_BINARY && to_cs != CS_NONE && from_cs != CS_NONE)
{
const ULONG to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
from_cs, from_ptr, from_len, err);
toLength = to_len;
to->dsc_address[to_len] = 0;
from_fill = 0; // Convert_bytes handles source truncation
}
else
{
// binary string can always be converted TO by byte-copy
ULONG to_len = MIN(from_len, to_size);
if (!toCharSet->wellFormed(to_len, q))
err(Arg::Gds(isc_malformed_string));
toLength = to_len;
from_fill = from_len - to_len;
if (to_len)
{
do
{
*p++ = *q++;
} while (--to_len);
}
*p = 0;
}
break;
case dtype_varying:
if (from_cs != to_cs && to_cs != CS_BINARY && to_cs != CS_NONE && from_cs != CS_NONE)
{
UCHAR* vstr = reinterpret_cast<UCHAR*>(((vary*) to->dsc_address)->vary_string);
start = vstr;
const ULONG to_len = INTL_convert_bytes(tdbb, to_cs, vstr,
to_size, from_cs, from_ptr, from_len, err);
toLength = to_len;
((vary*) to->dsc_address)->vary_length = to_len;
from_fill = 0; // Convert_bytes handles source truncation
}
else
{
// binary string can always be converted TO by byte-copy
ULONG to_len = MIN(from_len, to_size);
if (!toCharSet->wellFormed(to_len, q))
err(Arg::Gds(isc_malformed_string));
toLength = to_len;
from_fill = from_len - to_len;
((vary*) p)->vary_length = to_len;
start = p = reinterpret_cast<UCHAR*>(((vary*) p)->vary_string);
if (to_len)
{
do
{
*p++ = *q++;
} while (--to_len);
}
}
break;
}
const ULONG src_len = toCharSet->length(toLength, start, false);
const ULONG dest_len = (ULONG) to_size / toCharSet->maxBytesPerChar();
if (toCharSet->isMultiByte() &&
!(toCharSet->getFlags() & CHARSET_LEGACY_SEMANTICS) &&
toLength != 31 && // allow non CHARSET_LEGACY_SEMANTICS to be used as connection charset
src_len > dest_len)
{
err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation) <<
Arg::Gds(isc_trunc_limits) << Arg::Num(dest_len) << Arg::Num(src_len));
}
if (from_fill)
{
// Make sure remaining characters on From string are spaces
if (!allSpaces(INTL_charset_lookup(tdbb, from_cs), q, from_fill, 0))
err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation) <<
Arg::Gds(isc_trunc_limits) << Arg::Num(dest_len) << Arg::Num(src_len));
}
return 0;
}
bool INTL_data(const dsc* pText)
{
/**************************************
*
* I N T L _ d a t a
*
**************************************
*
* Functional description
* Given an input text descriptor,
* return true if the data pointed to represents
* international text (subject to user defined or non-binary
* collation or comparison).
*
**************************************/
fb_assert(pText != NULL);
if (!IS_TEXT(pText))
return false;
if (!INTERNAL_TTYPE(pText))
return true;
return false;
}
bool INTL_data_or_binary(const dsc* pText)
{
/**************************************
*
* I N T L _ d a t a _ o r _ b i n a r y
*
**************************************
*
* Functional description
*
**************************************/
return (INTL_data(pText) || (pText->dsc_ttype() == ttype_binary));
}
bool INTL_defined_type(thread_db* tdbb, USHORT t_type)
{
/**************************************
*
* I N T L _ d e f i n e d _ t y p e
*
**************************************
*
* Functional description
* Is (t_type) a known text type?
* Return:
* false type is not defined.
* true type is defined
*
* Note:
* Due to cleanup that must happen in DFW, this routine
* must return, and not call ERR directly.
*
**************************************/
SET_TDBB(tdbb);
try
{
ThreadStatusGuard local_status(tdbb);
INTL_texttype_lookup(tdbb, t_type);
}
catch (...)
{
return false;
}
return true;
}
USHORT INTL_key_length(thread_db* tdbb, USHORT idxType, USHORT iLength)
{
/**************************************
*
* I N T L _ k e y _ l e n g t h
*
**************************************
*
* Functional description
* Given an index type, and a maximum length (iLength)
* return the length of the byte string key descriptor to
* use when collating text of this type.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(idxType >= idx_first_intl_string);
const USHORT ttype = INTL_INDEX_TO_TEXT(idxType);
USHORT key_length;
if (ttype <= ttype_last_internal)
key_length = iLength;
else
{
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
key_length = obj->key_length(iLength);
}
// Validity checks on the computed key_length
if (key_length > MAX_KEY)
key_length = MAX_KEY;
if (key_length < iLength)
key_length = iLength;
return (key_length);
}
CharSet* INTL_charset_lookup(thread_db* tdbb, USHORT parm1)
{
/**************************************
*
* I N T L _ c h a r s e t _ l o o k u p
*
**************************************
*
* Functional description
*
* Lookup a character set descriptor.
*
* First, search the appropriate vector that hangs
* off the dbb. If not found, then call the lower
* level lookup routine to allocate it, or punt
* if we don't know about the charset.
*
* Returns:
* *charset - if no errors;
* <never> - if error
*
**************************************/
CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1);
return cs->getCharSet();
}
Collation* INTL_texttype_lookup(thread_db* tdbb, USHORT parm1)
{
/**************************************
*
* I N T L _ t e x t t y p e _ l o o k u p
*
**************************************
*
* Functional description
*
* Lookup either a character set descriptor or
* texttype descriptor object.
*
* First, search the appropriate vector that hangs
* off the dbb. If not found, then call the lower
* level lookup routine to find it in the libraries.
*
* Returns:
* *object - if no errors;
* <never> - if error
*
**************************************/
SET_TDBB(tdbb);
if (parm1 == ttype_dynamic)
parm1 = MAP_CHARSET_TO_TTYPE(tdbb->getCharSet());
CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, parm1);
return csc->lookupCollation(tdbb, parm1);
}
void INTL_texttype_unload(thread_db* tdbb, USHORT ttype)
{
/**************************************
*
* I N T L _ t e x t t y p e _ u n l o a d
*
**************************************
*
* Functional description
* Unload a collation from memory.
*
**************************************/
SET_TDBB(tdbb);
CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, ttype);
if (csc)
csc->unloadCollation(tdbb, ttype);
}
bool INTL_texttype_validate(Jrd::thread_db* tdbb, const SubtypeInfo* info)
{
/**************************************
*
* I N T L _ t e x t t y p e _ v a l i d a t e
*
**************************************
*
* Functional description
* Check if collation attributes are valid.
*
**************************************/
SET_TDBB(tdbb);
texttype tt;
memset(&tt, 0, sizeof(tt));
bool ret = lookup_texttype(&tt, info);
if (ret && tt.texttype_fn_destroy)
tt.texttype_fn_destroy(&tt);
return ret;
}
void INTL_pad_spaces(thread_db* tdbb, DSC* type, UCHAR* string, ULONG length)
{
/**************************************
*
* I N T L _ p a d _ s p a c e s
*
**************************************
*
* Functional description
* Pad a buffer with spaces, using the character
* set's defined space character.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(type != NULL);
fb_assert(IS_TEXT(type));
fb_assert(string != NULL);
const USHORT charset = INTL_charset(tdbb, type->dsc_ttype());
pad_spaces(tdbb, charset, string, length);
}
USHORT INTL_string_to_key(thread_db* tdbb,
USHORT idxType,
const dsc* pString,
DSC* pByte,
USHORT key_type)
{
/**************************************
*
* I N T L _ s t r i n g _ t o _ k e y
*
**************************************
*
* Functional description
* Given an input string, convert it to a byte string
* that will collate naturally (byte order).
*
* Return the length of the resulting byte string.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(idxType >= idx_first_intl_string || idxType == idx_string ||
idxType == idx_byte_array || idxType == idx_metadata);
fb_assert(pString != NULL);
fb_assert(pByte != NULL);
fb_assert(pString->dsc_address != NULL);
fb_assert(pByte->dsc_address != NULL);
fb_assert(pByte->dsc_dtype == dtype_text);
UCHAR pad_char;
USHORT ttype;
switch (idxType)
{
case idx_string:
pad_char = ' ';
ttype = ttype_none;
break;
case idx_byte_array:
pad_char = 0;
ttype = ttype_binary;
break;
case idx_metadata:
pad_char = ' ';
ttype = ttype_metadata;
break;
default:
pad_char = 0;
ttype = INTL_INDEX_TO_TEXT(idxType);
break;
}
// Make a string into the proper type of text
MoveBuffer temp;
UCHAR* src;
USHORT len = MOV_make_string2(tdbb, pString, ttype, &src, temp);
USHORT outlen;
UCHAR* dest = pByte->dsc_address;
USHORT destLen = pByte->dsc_length;
switch (ttype)
{
case ttype_metadata:
case ttype_binary:
case ttype_ascii:
case ttype_none:
while (len-- && destLen-- > 0)
*dest++ = *src++;
// strip off ending pad characters
while (dest > pByte->dsc_address)
{
if (*(dest - 1) == pad_char)
dest--;
else
break;
}
outlen = (dest - pByte->dsc_address);
break;
default:
TextType* obj = INTL_texttype_lookup(tdbb, ttype);
outlen = obj->string_to_key(len, src, pByte->dsc_length, dest, key_type);
break;
}
return (outlen);
}
static bool allSpaces(CharSet* charSet, const BYTE* ptr, ULONG len, ULONG offset)
{
/**************************************
*
* a l l _ s p a c e s
*
**************************************
*
* Functional description
* determine if the string at ptr[offset] ... ptr[len] is entirely
* spaces, as per the space definition of (charset).
* The binary representation of a Space is character-set dependent.
* (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
* 0x??20, which is NOT a space.
**************************************/
fb_assert(ptr != NULL);
// We are assuming offset points to the first byte which was not
// consumed in a conversion. And that offset is pointing
// to a character boundary
// Single-octet character sets are optimized here
if (charSet->getSpaceLength() == 1)
{
const BYTE* p = &ptr[offset];
const BYTE* const end = &ptr[len];
while (p < end)
{
if (*p++ != *charSet->getSpace())
return false;
}
}
else
{
const BYTE* p = &ptr[offset];
const BYTE* const end = &ptr[len];
const unsigned char* space = charSet->getSpace();
const unsigned char* const end_space = &space[charSet->getSpaceLength()];
while (p < end)
{
space = charSet->getSpace();
while (p < end && space < end_space)
{
if (*p++ != *space++)
return false;
}
}
}
return true;
}
static int blocking_ast_collation(void* ast_object)
{
/**************************************
*
* b l o c k i n g _ a s t _ c o l l a t i o n
*
**************************************
*
* Functional description
* Someone is trying to drop a collation. If there
* are outstanding interests in the existence of
* the collation then just mark as blocking and return.
* Otherwise, mark the collation as obsolete
* and release the collation existence lock.
*
**************************************/
Collation* const tt = static_cast<Collation*>(ast_object);
try
{
Database* const dbb = tt->existenceLock->lck_dbb;
AsyncContextHolder tdbb(dbb, FB_FUNCTION, tt->existenceLock);
tt->obsolete = true;
LCK_release(tdbb, tt->existenceLock);
}
catch (const Firebird::Exception&)
{} // no-op
return 0;
}
static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len)
{ /* byte count */
/**************************************
*
* p a d _ s p a c e s
*
**************************************
*
* Functional description
* Pad a buffer with the character set defined space character.
*
**************************************/
SET_TDBB(tdbb);
fb_assert(ptr != NULL);
CharSet* obj = INTL_charset_lookup(tdbb, charset);
// Single-octet character sets are optimized here
if (obj->getSpaceLength() == 1)
{
const BYTE* const end = &ptr[len];
while (ptr < end)
*ptr++ = *obj->getSpace();
}
else
{
const BYTE* const end = &ptr[len];
const UCHAR* space = obj->getSpace();
const UCHAR* const end_space = &space[obj->getSpaceLength()];
while (ptr < end)
{
space = obj->getSpace();
while (ptr < end && space < end_space) {
*ptr++ = *space++;
}
// This fb_assert is checking that we didn't have a buffer-end
// in the middle of a space character
fb_assert(!(ptr == end) || (space == end_space));
}
}
}