firebird-mirror/src/jrd/intl.cpp

/*************  history ************
*
*       COMPONENT: JRD  MODULE: INTL.CPP
*       generated by Marion V2.5     2/6/90
*       from dev              db        on 4-JAN-1995
*****************************************************************
*
*       PR	2002-06-02 Added ugly c hack in
*       intl_back_compat_alloc_func_lookup.
*       When someone has time we need to change the references to
*       return (void*) function to something more C++ like
*
*       42 4711 3 11 17  tamlin   2001
*       Added silly numbers before my name, and converted it to C++.
*
*       18850   daves   4-JAN-1995
*       Fix gds__alloc usage
*
*       18837   deej    31-DEC-1994
*       fixing up HARBOR_MERGE
*
*       18821   deej    27-DEC-1994
*       HARBOR MERGE
*
*       18789   jdavid  19-DEC-1994
*       Cast some functions
*
*       17508   jdavid  15-JUL-1994
*       Bring it up to date
*
*       17500   daves   13-JUL-1994
*       Bug 6645: Different calculation of partial keys
*
*       17202   katz    24-MAY-1994
*       PC_PLATFORM requires the .dll extension
*
*       17191   katz    23-MAY-1994
*       OS/2 requires the .dll extension
*
*       17180   katz    23-MAY-1994
*       Define location of DLL on OS/2
*
*       17149   katz    20-MAY-1994
*       In JRD, isc_arg_number arguments are SLONG's not int's
*
*       16633   daves   19-APR-1994
*       Bug 6202: International licensing uses INTERNATIONAL product code
*
*       16555   katz    17-APR-1994
*       The last argument of calls to ERR_post should be 0
*
*       16521   katz    14-APR-1994
*       Borland C needs a decorated symbol to lookup
*
*       16403   daves   8-APR-1994
*       Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
*
*       16141   katz    28-MAR-1994
*       Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
*
 * The contents of this file are subject to the Interbase Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy
 * of the License at http://www.Inprise.com/IPL.html
 *
 * Software distributed under the License is distributed on an
 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code was created by Inprise Corporation
 * and its predecessors. Portions created by Inprise Corporation are
 * Copyright (C) Inprise Corporation.
 *
 * All Rights Reserved.
 * Contributor(s): ______________________________________.
 *
 * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
 *
 * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
 *
*/


/*
 *      PROGRAM:        JRD Intl
 *      MODULE:         intl.cpp
 *      DESCRIPTION:    International text support routines
 *
 * copyright (c) 1992, 1993 by Borland International
 */

#include "firebird.h"
#include <string.h>
#include "../jrd/common.h"
#include <stdio.h>
#include "../jrd/jrd.h"
#include "../jrd/req.h"
#include "../jrd/val.h"
#include "gen/iberror.h"
#include "../jrd/intl.h"
#include "../jrd/intl_classes.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../intl/charsets.h"
#include "../intl/country_codes.h"
#include "../jrd/gdsassert.h"
//#include "../jrd/license.h"
#ifdef INTL_BUILTIN
#include "../intl/ld_proto.h"
#endif
#include "../jrd/cvt_proto.h"
#include "../common/cvt.h"
#include "../jrd/err_proto.h"
#include "../jrd/fun_proto.h"
#include "../jrd/gds_proto.h"
#include "../jrd/intl_proto.h"
#include "../jrd/isc_proto.h"
#include "../jrd/lck_proto.h"
#include "../jrd/met_proto.h"
#include "../jrd/intlobj_new.h"
#include "../jrd/mov_proto.h"
#include "../jrd/IntlManager.h"
#include "../common/classes/init.h"

using namespace Jrd;
using namespace Firebird;

#define IS_TEXT(x)      (((x)->dsc_dtype == dtype_text)   ||\
			 ((x)->dsc_dtype == dtype_varying)||\
			 ((x)->dsc_dtype == dtype_cstring))


static bool all_spaces(thread_db*, CHARSET_ID, const BYTE*, ULONG, ULONG);
static int blocking_ast_collation(void* ast_object);
static void pad_spaces(thread_db*, CHARSET_ID, BYTE *, ULONG);
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info);
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info);


// Classes and structures used internally to this file and intl implementation
class CharSetContainer
{
public:
	CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info);

	void destroy()
	{
		cs->destroy();
		for (size_t i = 0; i < charset_collations.getCount(); i++)
		{
			if (charset_collations[i])
				charset_collations[i]->destroy();
		}
	}

	CharSet* getCharSet() { return cs; }

	Collation* lookupCollation(thread_db* tdbb, USHORT tt_id);
	void unloadCollation(thread_db* tdbb, USHORT tt_id);

	CsConvert lookupConverter(thread_db* tdbb, CHARSET_ID to_cs);

	static CharSetContainer* lookupCharset(thread_db* tdbb, USHORT ttype);
	static Lock* createCollationLock(thread_db* tdbb, USHORT ttype);

private:
	Firebird::Array<Collation*> charset_collations;
	CharSet* cs;
};


CharSetContainer* CharSetContainer::lookupCharset(thread_db* tdbb, USHORT ttype)
{
/**************************************
 *
 *      l o o k u p C h a r s e t
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or punt
 *		if we don't know about the charset.
 *
 * Returns:
 *      *charset
 *      <never>         - if error
 *
 **************************************/
	CharSetContainer *cs = NULL;

	SET_TDBB(tdbb);
	Database* dbb = tdbb->getDatabase();

	USHORT id = TTYPE_TO_CHARSET(ttype);
	if (id == CS_dynamic)
		id = tdbb->getAttachment()->att_charset;

	if (id >= dbb->dbb_charsets.getCount())
		dbb->dbb_charsets.resize(id + 10);
	else
		cs = dbb->dbb_charsets[id];

	// allocate a new character set object if we couldn't find one.
	if (!cs) {
		SubtypeInfo info;

		if (id == CS_UTF16)
			info.charsetName = "UTF16";

		if ((id == CS_UTF16) || MET_get_char_coll_subtype_info(tdbb, id, &info))
		{
			dbb->dbb_charsets[id] = cs =
				FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, id, &info);
		}
		else
			ERR_post(Arg::Gds(isc_text_subtype) << Arg::Num(ttype));
	}

	return cs;
}

Lock* CharSetContainer::createCollationLock(thread_db* tdbb, USHORT ttype)
{
/**************************************
 *
 *      c r e a t e C o l l a t i o n L o c k
 *
 **************************************
 *
 * Functional description
 *      Create a collation lock.
 *
 **************************************/
	Lock* lock = FB_NEW_RPT(*tdbb->getDatabase()->dbb_permanent, 0) Lock;
	lock->lck_parent = tdbb->getDatabase()->dbb_lock;
	lock->lck_dbb = tdbb->getDatabase();
	lock->lck_key.lck_long = ttype;
	lock->lck_length = sizeof(lock->lck_key.lck_long);
	lock->lck_type = LCK_tt_exist;
	lock->lck_owner_handle = LCK_get_owner_handle(tdbb, lock->lck_type);
	lock->lck_object = NULL;
	lock->lck_ast = blocking_ast_collation;

	return lock;
}

CharSetContainer::CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info) :
	charset_collations(p),
	cs(NULL)
{
	charset* csL = FB_NEW(p) charset;
	memset(csL, 0, sizeof(charset));

	if (lookup_charset(csL, info) && (csL->charset_flags & CHARSET_ASCII_BASED))
		this->cs = CharSet::createInstance(p, cs_id, csL);
	else
	{
		delete csL;
		ERR_post(Arg::Gds(isc_charset_not_installed) << Arg::Str(info->charsetName));
	}
}

CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID toCsId)
{
	if (toCsId == CS_UTF16)
		return CsConvert(cs->getStruct(), NULL);

	CharSet* toCs = INTL_charset_lookup(tdbb, toCsId);
	if (cs->getId() == CS_UTF16)
		return CsConvert(NULL, toCs->getStruct());

	return CsConvert(cs->getStruct(), toCs->getStruct());
}

Collation* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id)
{
	const USHORT id = TTYPE_TO_COLLATION(tt_id);

	if (id < charset_collations.getCount() && charset_collations[id] != NULL)
	{
		if (charset_collations[id]->obsolete)
		{
			if (charset_collations[id]->existenceLock)
				LCK_release(tdbb, charset_collations[id]->existenceLock);

			charset_collations[id]->destroy();
			delete charset_collations[id];
			charset_collations[id] = NULL;
		}
		else
			return charset_collations[id];
	}

	SubtypeInfo info;
	if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info))
	{
		CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id));

		if (TTYPE_TO_CHARSET(tt_id) != CS_METADATA)
		{
			Firebird::UCharBuffer specificAttributes;
			ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar();

			size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id),
									  specificAttributes.getBuffer(size), size,
									  CS_METADATA, info.specificAttributes.begin(),
									  info.specificAttributes.getCount(), ERR_post);
			specificAttributes.shrink(size);
			info.specificAttributes = specificAttributes;
		}

		texttype* tt = FB_NEW(*tdbb->getDatabase()->dbb_permanent) texttype;
		memset(tt, 0, sizeof(texttype));

		if (!lookup_texttype(tt, &info))
		{
			delete tt;
			ERR_post(Arg::Gds(isc_collation_not_installed) << Arg::Str(info.collationName) <<
															  Arg::Str(info.charsetName));
		}

		if (charset_collations.getCount() <= id)
			charset_collations.grow(id + 1);

		fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) ||
				  (tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL));

		if (tt->texttype_canonical_width == 0)
		{
			if (charset->isMultiByte())
				tt->texttype_canonical_width = sizeof(ULONG);	// UTF-32
			else
			{
				tt->texttype_canonical_width = charset->minBytesPerChar();
				// canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on
				tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH;
			}
		}

		charset_collations[id] = Collation::createInstance(*tdbb->getDatabase()->dbb_permanent, tt_id, tt, charset);
		charset_collations[id]->name = info.collationName;

		// we don't need a lock in the charset
		if (id != 0)
		{
			Lock* lock = charset_collations[id]->existenceLock =
				CharSetContainer::createCollationLock(tdbb, tt_id);
			lock->lck_object = charset_collations[id];

			LCK_lock(tdbb, lock, LCK_SR, LCK_WAIT);
		}
	}
	else
		ERR_post(Arg::Gds(isc_text_subtype) << Arg::Num(tt_id));

	return charset_collations[id];
}


void CharSetContainer::unloadCollation(thread_db* tdbb, USHORT tt_id)
{
	const USHORT id = TTYPE_TO_COLLATION(tt_id);

	if (id < charset_collations.getCount() && charset_collations[id] != NULL)
	{
		if (charset_collations[id]->useCount != 0)
		{
			ERR_post(Arg::Gds(isc_no_meta_update) <<
					 Arg::Gds(isc_obj_in_use) << Arg::Str(charset_collations[id]->name));
		}

		if (charset_collations[id]->existenceLock)
			LCK_convert(tdbb, charset_collations[id]->existenceLock, LCK_EX, LCK_WAIT);

		charset_collations[id]->obsolete = true;

		if (charset_collations[id]->existenceLock)
		{
			LCK_release(tdbb, charset_collations[id]->existenceLock);
			charset_collations[id]->existenceLock = NULL;
		}
	}
	else
	{
		Lock* lock = CharSetContainer::createCollationLock(tdbb, tt_id);

		LCK_lock(tdbb, lock, LCK_EX, LCK_WAIT);
		LCK_release(tdbb, lock);

		delete lock;
	}
}


static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info)
{
	return IntlManager::lookupCharSet(info->charsetName.c_str(), cs);
}


static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info)
{
	return IntlManager::lookupCollation(info->baseCollationName.c_str(), info->charsetName.c_str(),
		info->attributes, info->specificAttributes.begin(),
		info->specificAttributes.getCount(), info->ignoreAttributes, tt);
}


void Database::destroyIntlObjects()
{
	for (size_t i = 0; i < dbb_charsets.getCount(); i++)
	{
		if (dbb_charsets[i])
		{
			dbb_charsets[i]->destroy();
			dbb_charsets[i] = 0;
		}
	}
}


CHARSET_ID INTL_charset(thread_db* tdbb, USHORT ttype)
{
/**************************************
 *
 *      I N T L _ c h a r s e t
 *
 **************************************
 *
 * Functional description
 *      Return the character set ID for a piece of text.
 *
 **************************************/

	switch (ttype)
	{
	case ttype_none:
		return (CS_NONE);
	case ttype_ascii:
		return (CS_ASCII);
	case ttype_unicode_fss:
		return (CS_UNICODE_FSS);
	case ttype_binary:
		return (CS_BINARY);
	case ttype_dynamic:
		SET_TDBB(tdbb);
		return (tdbb->getAttachment()->att_charset);
	default:
		return (TTYPE_TO_CHARSET(ttype));
	}
}


int INTL_compare(thread_db* tdbb,
				const dsc* pText1,
				const dsc* pText2,
				ErrorFunction err)
{
/**************************************
 *
 *      I N T L _ c o m p a r e
 *
 **************************************
 *
 * Functional description
 *      Compare two pieces of international text.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(pText1 != NULL);
	fb_assert(pText2 != NULL);
	fb_assert(IS_TEXT(pText1) && IS_TEXT(pText2));
	fb_assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
	fb_assert(err);

/* normal compare routine from CVT_compare */
/* trailing spaces in strings are ignored for comparision */

	UCHAR* p1;
	USHORT t1;
	USHORT length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);

	UCHAR* p2;
	USHORT t2;
	USHORT length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);

/* YYY - by SQL II compare_type must be explicit in the
   SQL statement if there is any doubt */

	USHORT compare_type = MAX(t1, t2);	/* YYY */
	UCHAR buffer[MAX_KEY];

	if (t1 != t2) {
		CHARSET_ID cs1 = INTL_charset(tdbb, t1);
		CHARSET_ID cs2 = INTL_charset(tdbb, t2);
		if (cs1 != cs2) {
			if (compare_type != t2) {
				/* convert pText2 to pText1's type, if possible */
				/* YYY - should failure to convert really return
				   an error here?
				   Support joining a 437 & Latin1 Column, and we
				   pick the compare_type as 437, still only want the
				   equal values....
				   But then, what about < operations, which make no
				   sense if the string cannot be expressed...
				 */

				length2 = INTL_convert_bytes(tdbb, cs1, buffer, sizeof(buffer), cs2, p2, length2, err);
				p2 = buffer;
			}
			else {
				/* convert pText1 to pText2's type, if possible */

				length1 = INTL_convert_bytes(tdbb, cs2, buffer, sizeof(buffer), cs1, p1, length1, err);
				p1 = buffer;
			}
		}
	}

	TextType* obj = INTL_texttype_lookup(tdbb, compare_type);

	return obj->compare(length1, p1, length2, p2);
}


ULONG INTL_convert_bytes(thread_db* tdbb,
						 CHARSET_ID dest_type,
						 BYTE* dest_ptr,
						 ULONG dest_len,
						 CHARSET_ID src_type,
						 const BYTE* src_ptr,
						 ULONG src_len,
						 ErrorFunction err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ b y t e s
 *
 **************************************
 *
 * Functional description
 *      Given a string of bytes in one character set, convert it to another
 *      character set.
 *
 *      If (dest_ptr) is NULL, return the count of bytes needed to convert
 *      the string.  This does not guarantee the string can be converted,
 *      the purpose of this is to allocate a large enough buffer.
 *
 * RETURNS:
 *      Length of resulting string, in bytes.
 *      calls (err) if conversion error occurs.
 *
 **************************************/
	ULONG len;


	SET_TDBB(tdbb);

	fb_assert(src_ptr != NULL);
	fb_assert(src_type != dest_type);
	fb_assert(err != NULL);

	dest_type = INTL_charset(tdbb, dest_type);
	src_type = INTL_charset(tdbb, src_type);

	const UCHAR* const start_dest_ptr = dest_ptr;

	if ((dest_type == CS_BINARY) || (dest_type == CS_NONE) ||
		(src_type == CS_BINARY) || (src_type == CS_NONE))
	{
		/* See if we just need a length estimate */
		if (dest_ptr == NULL)
			return (src_len);

		if (dest_type != CS_BINARY && dest_type != CS_NONE)
		{
			CharSet* toCharSet = INTL_charset_lookup(tdbb, dest_type);

			if (!toCharSet->wellFormed(src_len, src_ptr))
				err(Arg::Gds(isc_malformed_string));
		}

		len = MIN(dest_len, src_len);
		if (len)
			do {
				*dest_ptr++ = *src_ptr++;
			} while (--len);

		/* See if only space characters are remaining */
		len = src_len - MIN(dest_len, src_len);
		if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
			return (dest_ptr - start_dest_ptr);

		err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation));
	}
	else if (src_len)
	{
		/* character sets are known to be different */
		/* Do we know an object from cs1 to cs2? */

		CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
		return cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr, NULL, true);
	}

	return 0;
}


CsConvert INTL_convert_lookup(thread_db* tdbb,
								CHARSET_ID to_cs,
								CHARSET_ID from_cs)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	SET_TDBB(tdbb);
	Database* dbb = tdbb->getDatabase();
	CHECK_DBB(dbb);

	if (from_cs == CS_dynamic)
		from_cs = tdbb->getAttachment()->att_charset;

	if (to_cs == CS_dynamic)
		to_cs = tdbb->getAttachment()->att_charset;

/* Should from_cs == to_cs? be handled better? YYY */

	fb_assert(from_cs != CS_dynamic);
	fb_assert(to_cs != CS_dynamic);

	CharSetContainer* charset = CharSetContainer::lookupCharset(tdbb, from_cs);

	return charset->lookupConverter(tdbb, to_cs);
}


int INTL_convert_string(dsc* to, const dsc* from, ErrorFunction err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ s t r i n g
 *
 **************************************
 *
 * Functional description
 *      Convert a string from one type to another
 *
 * RETURNS:
 *      0 if no error in conversion
 *      non-zero otherwise.
 *      CVC: Unfortunately, this function puts the source in the 2nd param,
 *      as opposed to the CVT routines, so const help mitigating coding mistakes.
 *
 **************************************/

/* Note: This function is called from outside the engine as
   well as inside - we likely can't get rid of JRD_get_thread_data here */
	thread_db* tdbb = JRD_get_thread_data();
	if (tdbb == NULL)			/* are we in the Engine? */
		return (1);				/* no, then can't access intl gah */

	fb_assert(to != NULL);
	fb_assert(from != NULL);
	fb_assert(IS_TEXT(to) && IS_TEXT(from));

	CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from));
	CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to));

	UCHAR* start = to->dsc_address;
	UCHAR* p = start;

/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */

	UCHAR* from_ptr;
	USHORT from_type;
	const USHORT from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);

	ULONG to_size, to_len, to_fill;
	to_size = to_len = TEXT_LEN(to);
	ULONG from_fill;

	const UCHAR* q = from_ptr;
	CharSet* toCharSet = INTL_charset_lookup(tdbb, to_cs);
	ULONG toLength;

	switch (to->dsc_dtype) {
	case dtype_text:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE))
		{

			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			toLength = to_len;
			to_fill = to_size - to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
			p += to_len;
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				err(Arg::Gds(isc_malformed_string));
			toLength = to_len;
			from_fill = from_len - to_len;
			to_fill = to_size - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}

		if (to_fill > 0)
			pad_spaces(tdbb, to_cs, p, to_fill);
		break;

	case dtype_cstring:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE))
		{
			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			toLength = to_len;
			to->dsc_address[to_len] = 0;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				err(Arg::Gds(isc_malformed_string));
			toLength = to_len;
			from_fill = from_len - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
			*p = 0;
		}
		break;

	case dtype_varying:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE))
		{

			to_len =
				INTL_convert_bytes(tdbb, to_cs,
								   (start = reinterpret_cast<UCHAR*>(((vary*) to->dsc_address)->vary_string)),
								   to_size, from_cs, from_ptr, from_len, err);
			toLength = to_len;
			((vary*) to->dsc_address)->vary_length = to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */
			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				err(Arg::Gds(isc_malformed_string));
			toLength = to_len;
			from_fill = from_len - to_len;
			((vary*) p)->vary_length = to_len;
			start = p = reinterpret_cast<UCHAR*>(((vary*) p)->vary_string);
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}
		break;
	}

	if (toCharSet->isMultiByte() &&
		!(toCharSet->getFlags() & CHARSET_LEGACY_SEMANTICS) &&
		toLength != 31 &&	/* allow non CHARSET_LEGACY_SEMANTICS to be used as connection charset */
		toCharSet->length(toLength, start, false) > to_size / toCharSet->maxBytesPerChar())
	{
		err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation));
	}

	if (from_fill)
	{
		/* Make sure remaining characters on From string are spaces */
		if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
			err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation));
	}

	return 0;
}


int INTL_data(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a
 *
 **************************************
 *
 * Functional description
 *      Given an input text descriptor,
 *      return TRUE if the data pointed to represents
 *      international text (subject to user defined or non-binary
 *      collation or comparison).
 *
 **************************************/

	fb_assert(pText != NULL);

	if (!IS_TEXT(pText))
		return FALSE;

	if (!INTERNAL_TTYPE(pText))
		return TRUE;

	return FALSE;
}

int INTL_data_or_binary(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a _ o r _ b i n a r y
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	return (INTL_data(pText) || (pText->dsc_ttype() == ttype_binary));
}


bool INTL_defined_type(thread_db* tdbb, USHORT t_type)
{
/**************************************
 *
 *      I N T L _ d e f i n e d _ t y p e
 *
 **************************************
 *
 * Functional description
 *      Is (t_type) a known text type?
 * Return:
 *      false   type is not defined.
 *      true    type is defined
 *
 * Note:
 *      Due to cleanup that must happen in DFW, this routine
 *      must return, and not call ERR directly.
 *
 **************************************/
	SET_TDBB(tdbb);

	try
	{
		ThreadStatusGuard local_status(tdbb);
		INTL_texttype_lookup(tdbb, t_type);
	}
	catch (...)
	{
		return false;
	}

	return true;
}


void INTL_init(thread_db* tdbb)
{
/**************************************
 *
 *      I N T L _ i n i t
 *
 **************************************
 *
 * Functional description
 *
 **************************************/
}


USHORT INTL_key_length(thread_db* tdbb, USHORT idxType, USHORT iLength)
{
/**************************************
 *
 *      I N T L _ k e y _ l e n g t h
 *
 **************************************
 *
 * Functional description
 *      Given an index type, and a maximum length (iLength)
 *      return the length of the byte string key descriptor to
 *      use when collating text of this type.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(idxType >= idx_first_intl_string);

	const USHORT ttype = INTL_INDEX_TO_TEXT(idxType);

	USHORT key_length;
	if (ttype <= ttype_last_internal)
		key_length = iLength;
	else {
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		key_length = obj->key_length(iLength);
	}

/* Validity checks on the computed key_length */

	if (key_length > MAX_KEY)
		key_length = MAX_KEY;

	if (key_length < iLength)
		key_length = iLength;

	return (key_length);
}


CharSet* INTL_charset_lookup(thread_db* tdbb, USHORT parm1)
{
/**************************************
 *
 *      I N T L _ c h a r s e t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or punt
 *		if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error
 *
 **************************************/
	CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1);
	return cs->getCharSet();
}


Collation* INTL_texttype_lookup(thread_db* tdbb,
								USHORT parm1)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup either a character set descriptor or
 *      texttype descriptor object.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to find it in the libraries.
 *
 * Returns:
 *      *object         - if no errors;
 *      <never>         - if error
 *
 **************************************/
	SET_TDBB(tdbb);
	Database* dbb = tdbb->getDatabase();

	if (parm1 == ttype_dynamic)
		parm1 = MAP_CHARSET_TO_TTYPE(tdbb->getAttachment()->att_charset);

	CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, parm1);

	return csc->lookupCollation(tdbb, parm1);
}


void INTL_texttype_unload(thread_db* tdbb,
						  USHORT ttype)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ u n l o a d
 *
 **************************************
 *
 * Functional description
 *  Unload a collation from memory.
 *
 **************************************/
	SET_TDBB(tdbb);

	CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, ttype);
	if (csc)
		csc->unloadCollation(tdbb, ttype);
}


bool INTL_texttype_validate(Jrd::thread_db* tdbb, const SubtypeInfo* info)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ v a l i d a t e
 *
 **************************************
 *
 * Functional description
 *  Check if collation attributes are valid.
 *
 **************************************/
	SET_TDBB(tdbb);

	texttype tt;
	memset(&tt, 0, sizeof(tt));

	bool ret = lookup_texttype(&tt, info);

	if (ret && tt.texttype_fn_destroy)
		tt.texttype_fn_destroy(&tt);

	return ret;
}


void INTL_pad_spaces(thread_db* tdbb, DSC * type, UCHAR * string, ULONG length)
{
/**************************************
 *
 *      I N T L _ p a d _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with spaces, using the character
 *      set's defined space character.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(type != NULL);
	fb_assert(IS_TEXT(type));
	fb_assert(string != NULL);

	const USHORT charset = INTL_charset(tdbb, type->dsc_ttype());
	pad_spaces(tdbb, charset, string, length);
}


USHORT INTL_string_to_key(thread_db* tdbb,
						USHORT idxType,
						const dsc* pString,
						DSC* pByte,
						USHORT key_type)
{
/**************************************
 *
 *      I N T L _ s t r i n g _ t o _ k e y
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to a byte string
 *      that will collate naturally (byte order).
 *
 *      Return the length of the resulting byte string.
 *
 **************************************/
	UCHAR pad_char;
	USHORT ttype;

	SET_TDBB(tdbb);

	fb_assert(idxType >= idx_first_intl_string || idxType == idx_string ||
			  idxType == idx_byte_array || idxType == idx_metadata);
	fb_assert(pString != NULL);
	fb_assert(pByte != NULL);
	fb_assert(pString->dsc_address != NULL);
	fb_assert(pByte->dsc_address != NULL);
	fb_assert(pByte->dsc_dtype == dtype_text);

	switch (idxType) {
	case idx_string:
		pad_char = ' ';
		ttype = ttype_none;
		break;
	case idx_byte_array:
		pad_char = 0;
		ttype = ttype_binary;
		break;
	case idx_metadata:
		pad_char = ' ';
		ttype = ttype_metadata;
		break;
	default:
		pad_char = 0;
		ttype = INTL_INDEX_TO_TEXT(idxType);
		break;
	}

/* Make a string into the proper type of text */

	MoveBuffer temp;
	UCHAR* src;
	USHORT len = MOV_make_string2(tdbb, pString, ttype, &src, temp);

	USHORT outlen;
	char* dest = reinterpret_cast<char*>(pByte->dsc_address);
	USHORT destLen = pByte->dsc_length;

	switch (ttype) {
	case ttype_metadata:
	case ttype_binary:
	case ttype_ascii:
	case ttype_none:
		while (len-- && destLen-- > 0)
			*dest++ = *src++;
		/* strip off ending pad characters */
		while (dest > (const char*)pByte->dsc_address) {
			if (*(dest - 1) == pad_char)
				dest--;
			else
				break;
		}
		outlen = (dest - (const char*)pByte->dsc_address);
		break;
	default:
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		outlen = obj->string_to_key(len,
									reinterpret_cast<const unsigned char*>(src),
									pByte->dsc_length,
									reinterpret_cast<unsigned char*>(dest),
									key_type);
		break;
	}

	return (outlen);
}


static bool all_spaces(thread_db* tdbb,
					   CHARSET_ID charset,
					   const BYTE* ptr, ULONG len, ULONG offset)
{
/**************************************
 *
 *      a l l _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      determine if the string at ptr[offset] ... ptr[len] is entirely
 *      spaces, as per the space definition of (charset).
 *      The binary representation of a Space is character-set dependent.
 *      (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
 *      0x??20, which is NOT a space.
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(ptr != NULL);

	CharSet* obj = INTL_charset_lookup(tdbb, charset);

/*
 * We are assuming offset points to the first byte which was not
 * consumed in a conversion.  And that offset is pointing
 * to a character boundary
 */

// Single-octet character sets are optimized here

	if (obj->getSpaceLength() == 1) {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		while (p < end) {
			if (*p++ != *obj->getSpace())
				return false;
		}
	}
	else {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		const unsigned char* space = obj->getSpace();
		const unsigned char* const end_space = &space[obj->getSpaceLength()];
		while (p < end) {
			space = obj->getSpace();
			while (p < end && space < end_space) {
				if (*p++ != *space++)
					return false;
			}
		}
	}

	return true;
}


static int blocking_ast_collation(void* ast_object)
{
/**************************************
 *
 *      b l o c k i n g _ a s t _ c o l l a t i o n
 *
 **************************************
 *
 * Functional description
 *      Someone is trying to drop a collation. If there
 *      are outstanding interests in the existence of
 *      the collation then just mark as blocking and return.
 *      Otherwise, mark the collation as obsolete
 *      and release the collation existence lock.
 *
 **************************************/
	Collation* tt = static_cast<Collation*>(ast_object);

	if (tt && tt->useCount == 0)
	{
		tt->obsolete = true;

		if (tt->existenceLock)
		{
			try
			{
				Database* dbb = tt->existenceLock->lck_dbb;

				Database::SyncGuard dsGuard(dbb, true);

				ThreadContextHolder tdbb;
				tdbb->setDatabase(dbb);
				tdbb->setAttachment(tt->existenceLock->lck_attachment);

				Jrd::ContextPoolHolder context(tdbb, 0);

				LCK_release(tdbb, tt->existenceLock);
				tt->existenceLock = NULL;
			}
			catch (const Firebird::Exception&)
			{} // no-op
		}
	}

	return 0;
}


static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len)
{								/* byte count */
/**************************************
 *
 *      p a d  _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with the character set defined space character.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(ptr != NULL);

	CharSet* obj = INTL_charset_lookup(tdbb, charset);

/* Single-octet character sets are optimized here */
	if (obj->getSpaceLength() == 1) {
		const BYTE* const end = &ptr[len];
		while (ptr < end)
			*ptr++ = *obj->getSpace();
	}
	else {
		const BYTE* const end = &ptr[len];
		const UCHAR* space = obj->getSpace();
		const UCHAR* const end_space = &space[obj->getSpaceLength()];
		while (ptr < end) {
			space = obj->getSpace();
			while (ptr < end && space < end_space) {
				*ptr++ = *space++;
			}
			/* This fb_assert is checking that we didn't have a buffer-end
			 * in the middle of a space character
			 */
			fb_assert(!(ptr == end) || (space == end_space));
		}
	}
}