firebird-mirror/src/jrd/intl.cpp

/*************  history ************
*
*       COMPONENT: JRD  MODULE: INTL.CPP
*       generated by Marion V2.5     2/6/90
*       from dev              db        on 4-JAN-1995
*****************************************************************
*
*       PR	2002-06-02 Added ugly c hack in
*       intl_back_compat_alloc_func_lookup.
*       When someone has time we need to change the references to
*       return (void*) function to something more C++ like
*
*       42 4711 3 11 17  tamlin   2001
*       Added silly numbers before my name, and converted it to C++.
*
*       18850   daves   4-JAN-1995
*       Fix gds__alloc usage
*
*       18837   deej    31-DEC-1994
*       fixing up HARBOR_MERGE
*
*       18821   deej    27-DEC-1994
*       HARBOR MERGE
*
*       18789   jdavid  19-DEC-1994
*       Cast some functions
*
*       17508   jdavid  15-JUL-1994
*       Bring it up to date
*
*       17500   daves   13-JUL-1994
*       Bug 6645: Different calculation of partial keys
*
*       17202   katz    24-MAY-1994
*       PC_PLATFORM requires the .dll extension
*
*       17191   katz    23-MAY-1994
*       OS/2 requires the .dll extension
*
*       17180   katz    23-MAY-1994
*       Define location of DLL on OS/2
*
*       17149   katz    20-MAY-1994
*       In JRD, isc_arg_number arguments are SLONG's not int's
*
*       16633   daves   19-APR-1994
*       Bug 6202: International licensing uses INTERNATIONAL product code
*
*       16555   katz    17-APR-1994
*       The last argument of calls to ERR_post should be 0
*
*       16521   katz    14-APR-1994
*       Borland C needs a decorated symbol to lookup
*
*       16403   daves   8-APR-1994
*       Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
*
*       16141   katz    28-MAR-1994
*       Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
*
 * The contents of this file are subject to the Interbase Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy
 * of the License at http://www.Inprise.com/IPL.html
 *
 * Software distributed under the License is distributed on an
 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code was created by Inprise Corporation
 * and its predecessors. Portions created by Inprise Corporation are
 * Copyright (C) Inprise Corporation.
 *
 * All Rights Reserved.
 * Contributor(s): ______________________________________.
 *
 * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
 *
 * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
 *
*/


/*
 *      PROGRAM:        JRD Intl
 *      MODULE:         intl.cpp
 *      DESCRIPTION:    International text support routines
 *
 * copyright (c) 1992, 1993 by Borland International
 */

#include "firebird.h"
#include <string.h>
#include "../jrd/common.h"
#include <stdio.h>
#include "../jrd/jrd.h"
#include "../jrd/req.h"
#include "../jrd/val.h"
#include "gen/iberror.h"
#include "../jrd/intl.h"
#include "../jrd/intl_classes.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../intl/charsets.h"
#include "../intl/country_codes.h"
#include "../jrd/gdsassert.h"
//#include "../jrd/license.h"
#ifdef INTL_BUILTIN
#include "../intl/ld_proto.h"
#endif
#include "../jrd/all_proto.h"
#include "../jrd/cvt_proto.h"
#include "../jrd/err_proto.h"
#include "../jrd/fun_proto.h"
#include "../jrd/gds_proto.h"
#include "../jrd/iberr_proto.h"
#include "../jrd/intl_proto.h"
#include "../jrd/isc_proto.h"
#include "../jrd/met_proto.h"
#include "../jrd/thd.h"
#include "../jrd/evl_string.h"
#include "../jrd/jrd.h"
#include "../jrd/evl_like.h"
#include "../jrd/mov_proto.h"
#include "../jrd/IntlManager.h"
#include "../common/classes/init.h"

using namespace Jrd;

#define IS_TEXT(x)      (((x)->dsc_dtype == dtype_text)   ||\
			 ((x)->dsc_dtype == dtype_varying)||\
			 ((x)->dsc_dtype == dtype_cstring))

#define TTYPE_TO_CHARSET(tt)    ((SSHORT)((tt) & 0x00FF))
#define TTYPE_TO_COLLATION(tt)  ((SSHORT)((tt) >> 8))


static bool all_spaces(thread_db*, CHARSET_ID, const BYTE*, ULONG, ULONG);
static void pad_spaces(thread_db*, CHARSET_ID, BYTE *, ULONG);
static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info);
static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info);

// We need all the structure definitions from the old interface
#define INTL_ENGINE_INTERNAL
#include "../jrd/intlobj_new.h"


// Classes and structures used internally to this file and intl implementation
class CharSetContainer
{
public:
	CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info);

	void destroy()
	{
		cs->destroy();
		for (size_t i = 0; i < charset_collations.getCount(); i++)
			if (charset_collations[i])
				charset_collations[i]->destroy();
	}

	CharSet* getCharSet() { return cs; }

	TextType* lookupCollation(thread_db* tdbb, USHORT tt_id);

	CsConvert lookupConverter(thread_db* tdbb, CHARSET_ID to_cs);

	static CharSetContainer* lookupCharset(thread_db* tdbb, SSHORT ttype);

private:
	Firebird::Array<TextType*> charset_collations;
	CharSet* cs;
};

/* Below are templates for functions used in TextType implementation */

class NullStrConverter {
public:
	NullStrConverter(thread_db* tdbb, const TextType* obj, const UCHAR *str, SLONG len) { }
};

template <typename PrevConverter>
class UpcaseConverter : public PrevConverter {
public:
	UpcaseConverter(thread_db* tdbb, TextType* obj, const UCHAR* &str, SLONG &len) :
		PrevConverter(tdbb, obj, str, len)
	{
		if (len > (int) sizeof(tempBuffer))
			out_str = FB_NEW(*tdbb->getDefaultPool()) UCHAR[len];
		else
			out_str = tempBuffer;
		obj->str_to_upper(len, str, len, out_str);
		str = out_str;
	}
	~UpcaseConverter() {
		if (out_str != tempBuffer)
			delete[] out_str;
	}
private:
	UCHAR tempBuffer[100], *out_str;
};

template <typename PrevConverter>
class CanonicalConverter : public PrevConverter {
public:
	CanonicalConverter(thread_db* tdbb, TextType* obj, const UCHAR* &str, SLONG &len) :
		PrevConverter(tdbb, obj, str, len)
	{
		SLONG out_len = len / obj->getCharSet()->minBytesPerChar() * obj->getCanonicalWidth();

		if (out_len > (int) sizeof(tempBuffer))
			out_str = FB_NEW(*tdbb->getDefaultPool()) UCHAR[out_len];
		else
			out_str = tempBuffer;

		if (str)
		{
			len = obj->canonical(len, str, out_len, out_str) * obj->getCanonicalWidth();
			str = out_str;
		}
		else
			len = 0;
	}
	~CanonicalConverter() {
		if (out_str != tempBuffer)
			delete[] out_str;
	}
private:
	UCHAR tempBuffer[100], *out_str;
};

template <typename StrConverter, typename CharType>
class LikeObjectImpl : public LikeObject {
public:
	LikeObjectImpl(MemoryPool& pool, const CharType* str, SLONG str_len,
				   CharType escape, bool use_escape,
				   CharType sql_match_any, CharType sql_match_one)
		: evaluator(pool, str, str_len, escape, use_escape, sql_match_any, sql_match_one)
	{ }

	void reset() { evaluator.reset(); }

	bool result() { return evaluator.getResult(); }

	bool process(thread_db* tdbb, Jrd::TextType* ttype, const UCHAR* str, SLONG length) {
		StrConverter cvt(tdbb, ttype, str, length);
		fb_assert(length % sizeof(CharType) == 0);
		return evaluator.processNextChunk(
			reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
	}

	~LikeObjectImpl() {}

	static LikeObject* create(thread_db* tdbb, TextType* ttype, const UCHAR* str, SLONG length,
		const UCHAR* escape, SLONG escape_length,
		const UCHAR* sql_match_any, SLONG match_any_length,
		const UCHAR* sql_match_one, SLONG match_one_length)
	{
		StrConverter cvt(tdbb, ttype, str, length),
					 cvt_escape(tdbb, ttype, escape, escape_length),
					 cvt_match_any(tdbb, ttype, sql_match_any, match_any_length),
					 cvt_match_one(tdbb, ttype, sql_match_one, match_one_length);

		fb_assert(length % sizeof(CharType) == 0);
		return FB_NEW(*tdbb->getDefaultPool()) LikeObjectImpl(*tdbb->getDefaultPool(),
			reinterpret_cast<const CharType*>(str), length / sizeof(CharType),
			(escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
			*reinterpret_cast<const CharType*>(sql_match_any),
			*reinterpret_cast<const CharType*>(sql_match_one));
	}

	static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
		const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length, const UCHAR* sql_match_any, SLONG match_any_length, const UCHAR* sql_match_one, SLONG match_one_length)
	{
		StrConverter cvt1(tdbb, ttype, p, pl),
					 cvt2(tdbb, ttype, s, sl),
					 cvt_escape(tdbb, ttype, escape, escape_length),
					 cvt_match_any(tdbb, ttype, sql_match_any, match_any_length),
					 cvt_match_one(tdbb, ttype, sql_match_one, match_one_length);

		fb_assert(pl % sizeof(CharType) == 0);
		fb_assert(sl % sizeof(CharType) == 0);
		Firebird::LikeEvaluator<CharType> evaluator(*tdbb->getDefaultPool(),
			reinterpret_cast<const CharType*>(p), pl / sizeof(CharType),
			(escape ? *reinterpret_cast<const CharType*>(escape) : 0), escape_length != 0,
			*reinterpret_cast<const CharType*>(sql_match_any),
			*reinterpret_cast<const CharType*>(sql_match_one));
		evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
		return evaluator.getResult();
	}

private:
	Firebird::LikeEvaluator<CharType> evaluator;
};

template <typename StrConverter, typename CharType>
class ContainsObjectImpl : public ContainsObject
{
public:
	ContainsObjectImpl(MemoryPool& pool, const CharType* str, SLONG str_len)
		: evaluator(pool, str, str_len)
	{ }

	void reset() { evaluator.reset(); }

	bool result() { return evaluator.getResult(); }

	bool process(thread_db* tdbb, Jrd::TextType* ttype, const UCHAR* str, SLONG length) {
		StrConverter cvt(tdbb, ttype, str, length);
		fb_assert(length % sizeof(CharType) == 0);
		return evaluator.processNextChunk(
			reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
	}

	~ContainsObjectImpl() {}

	static ContainsObject* create(thread_db* tdbb, TextType* ttype, const UCHAR* str, SLONG length) {
		StrConverter cvt(tdbb, ttype, str, length);
		fb_assert(length % sizeof(CharType) == 0);
		return FB_NEW(*tdbb->getDefaultPool()) ContainsObjectImpl(*tdbb->getDefaultPool(),
			reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
	}

	static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
			const UCHAR* p, SLONG pl)
	{
		StrConverter cvt1(tdbb, ttype, p, pl), cvt2(tdbb, ttype, s, sl);
		fb_assert(pl % sizeof(CharType) == 0);
		fb_assert(sl % sizeof(CharType) == 0);
		Firebird::ContainsEvaluator<CharType> evaluator(*tdbb->getDefaultPool(),
			reinterpret_cast<const CharType*>(p), pl / sizeof(CharType));
		evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));
		return evaluator.getResult();
	}

private:
	Firebird::ContainsEvaluator<CharType> evaluator;
};

template <typename StrConverter, typename CharType>
class MatchesObjectImpl
{
public:
	static bool evaluate(thread_db* tdbb, TextType* ttype, const UCHAR* s, SLONG sl,
			const UCHAR* p, SLONG pl)
	{
		StrConverter cvt1(tdbb, ttype, p, pl), cvt2(tdbb, ttype, s, sl);
		fb_assert(pl % sizeof(CharType) == 0);
		fb_assert(sl % sizeof(CharType) == 0);
		return MATCHESNAME(tdbb, ttype, reinterpret_cast<const CharType*>(s), sl,
						   reinterpret_cast<const CharType*>(p), pl);
	}
};

template <typename StrConverter, typename CharType>
class SleuthObjectImpl
{
public:
	static bool check(thread_db* tdbb, TextType* ttype, USHORT flags,
					  const UCHAR* search, SLONG search_len,
					  const UCHAR* match, SLONG match_len)
	{
		StrConverter cvt1(tdbb, ttype, search, search_len);//, cvt2(tdbb, ttype, match, match_len);
		fb_assert(search_len % sizeof(CharType) == 0);
		fb_assert(match_len % sizeof(CharType) == 0);
		return SLEUTHNAME(tdbb, ttype, flags,
						  reinterpret_cast<const CharType*>(search), search_len,
						  reinterpret_cast<const CharType*>(match), match_len);
	}

	static ULONG merge(thread_db* tdbb, TextType* ttype,
					  const UCHAR* match, SLONG match_bytes,
					  const UCHAR* control, SLONG control_bytes,
					  UCHAR* combined, SLONG combined_bytes)
	{
		StrConverter cvt1(tdbb, ttype, match, match_bytes), cvt2(tdbb, ttype, control, control_bytes);
		fb_assert(match_bytes % sizeof(CharType) == 0);
		fb_assert(control_bytes % sizeof(CharType) == 0);
		return SLEUTH_MERGE_NAME(tdbb, ttype,
						   reinterpret_cast<const CharType*>(match), match_bytes,
						   reinterpret_cast<const CharType*>(control), control_bytes,
						   reinterpret_cast<CharType*>(combined), combined_bytes);
	}
};

class FixedWidthCharSet : public CharSet
{
public:
	FixedWidthCharSet(CHARSET_ID _id, charset* _cs) : CharSet(_id, _cs) {}

	virtual ULONG length(thread_db* tdbb, ULONG srcLen, const UCHAR* src, bool countTrailingSpaces) const
	{
		fb_assert(getStruct());

		if (!countTrailingSpaces)
			srcLen = removeTrailingSpaces(srcLen, src);

		if (getStruct()->charset_fn_length)
			return getStruct()->charset_fn_length(getStruct(), srcLen, src);
		else
			return srcLen / minBytesPerChar();
	}

	virtual ULONG substring(thread_db* tdbb, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst, ULONG startPos, ULONG len) const
	{
		fb_assert(getStruct());
		if (getStruct()->charset_fn_substring)
			return getStruct()->charset_fn_substring(getStruct(), srcLen, src, dstLen, dst, startPos, len);
		else
		{
			fb_assert(src != NULL && dst != NULL);

			if (dstLen < len * minBytesPerChar())
				return INTL_BAD_STR_LENGTH;
			else if (startPos * minBytesPerChar() > srcLen)
				return 0;

			len = MIN(srcLen / minBytesPerChar() - startPos, len) * minBytesPerChar();

			memcpy(dst, src + startPos * minBytesPerChar(), len);

			return len;
		}
	}
};

class MultiByteCharSet : public CharSet
{
public:
	MultiByteCharSet(CHARSET_ID _id, charset* _cs) : CharSet(_id, _cs) {}

	virtual ULONG length(thread_db* tdbb, ULONG srcLen, const UCHAR* src, bool countTrailingSpaces) const
	{
		fb_assert(getStruct());

		if (!countTrailingSpaces)
			srcLen = removeTrailingSpaces(srcLen, src);

		if (getStruct()->charset_fn_length)
			return getStruct()->charset_fn_length(getStruct(), srcLen, src);
		else
		{
			USHORT errCode;
			ULONG errPos;
			ULONG len = getConvToUnicode().convertLength(srcLen);

			// convert to UTF16
			Firebird::HalfStaticArray<USHORT, BUFFER_SMALL> str;
			len = getConvToUnicode().convert(srcLen, src, len,
							str.getBuffer(len / sizeof(USHORT)), &errCode, &errPos);

			// calculate length of UTF16
			return UnicodeUtil::utf16Length(len, str.begin());
		}
	}

	virtual ULONG substring(thread_db* tdbb, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst, ULONG startPos, ULONG len) const
	{
		fb_assert(getStruct());
		if (getStruct()->charset_fn_substring)
			return getStruct()->charset_fn_substring(getStruct(), srcLen, src, dstLen, dst, startPos, len);
		else
		{
			fb_assert(src != NULL && dst != NULL);

			if (len == 0 || startPos >= srcLen)
				return 0;

			USHORT errCode;
			ULONG errPos;

			// convert to UTF16
			Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> str;
			ULONG unilength = getConvToUnicode().convertLength(srcLen);
			unilength = getConvToUnicode().convert(srcLen, src, unilength,
				reinterpret_cast<USHORT*>(str.getBuffer(unilength)), &errCode, &errPos);

			// generate substring of UTF16
			Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> substr;
			unilength = UnicodeUtil::utf16Substring(unilength, reinterpret_cast<const USHORT*>(str.begin()),
				unilength, reinterpret_cast<USHORT*>(substr.getBuffer(unilength)), startPos, len);

			// convert generated substring to original charset
			return getConvFromUnicode().convert(unilength, substr.begin(), dstLen, dst, &errCode, &errPos);
		}
	}
};

template <typename pContainsObjectImpl, typename pLikeObjectImpl,
		  typename pMatchesObjectImpl, typename pSleuthObjectImpl>
class CollationImpl : public TextType
{
public:
	CollationImpl(TTYPE_ID a_type, TEXTTYPE a_tt, CharSet* a_cs) : TextType(a_type, a_tt, a_cs) {}

	virtual bool matches(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d)
	{
		return pMatchesObjectImpl::evaluate(tdbb, this, a, b, c, d);
	}

	virtual bool sleuth_check(thread_db* tdbb, USHORT a, const UCHAR* b, SLONG c, const UCHAR* d, SLONG e)
	{
		return pSleuthObjectImpl::check(tdbb, this, a, b, c, d, e);
	}

	virtual ULONG sleuth_merge(thread_db* tdbb, const UCHAR* a, SLONG b, const UCHAR* c, SLONG d, UCHAR* e, SLONG f)
	{
		return pSleuthObjectImpl::merge(tdbb, this, a, b, c, d, e, f);
	}

	virtual bool like(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
	{
		return pLikeObjectImpl::evaluate(tdbb, this, s, sl, p, pl, escape, escape_length, getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(), getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
	}

	virtual LikeObject *like_create(thread_db* tdbb, const UCHAR* p, SLONG pl, const UCHAR* escape, SLONG escape_length)
	{
		return pLikeObjectImpl::create(tdbb, this, p, pl, escape, escape_length, getCharSet()->getSqlMatchAny(), getCharSet()->getSqlMatchAnyLength(), getCharSet()->getSqlMatchOne(), getCharSet()->getSqlMatchOneLength());
	}

	virtual bool contains(thread_db* tdbb, const UCHAR* s, SLONG sl, const UCHAR* p, SLONG pl)
	{
		return pContainsObjectImpl::evaluate(tdbb, this, s, sl, p, pl);
	}

	virtual ContainsObject *contains_create(thread_db* tdbb, const UCHAR* p, SLONG pl)
	{
		return pContainsObjectImpl::create(tdbb, this, p, pl);
	}
};

typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, UCHAR> uchar_contains_direct;
typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, USHORT> ushort_contains_direct;
typedef ContainsObjectImpl<UpcaseConverter<NullStrConverter>, ULONG> ulong_contains_direct;

typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_matches_canonical;
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_sleuth_canonical;
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, UCHAR> uchar_like_canonical;
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, UCHAR> uchar_contains_canonical;

typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_matches_canonical;
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_sleuth_canonical;
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, USHORT> ushort_like_canonical;
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, USHORT> ushort_contains_canonical;

typedef MatchesObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_matches_canonical;
typedef SleuthObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_sleuth_canonical;
typedef LikeObjectImpl<CanonicalConverter<NullStrConverter>, ULONG> ulong_like_canonical;
typedef ContainsObjectImpl<CanonicalConverter<UpcaseConverter<NullStrConverter> >, ULONG> ulong_contains_canonical;

CharSetContainer* CharSetContainer::lookupCharset(thread_db* tdbb, SSHORT ttype)
{
/**************************************
 *
 *      l o o k u p C h a r s e t
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or punt
 *		if we don't know about the charset.
 *
 * Returns:
 *      *charset
 *      <never>         - if error
 *
 **************************************/
	CharSetContainer *cs = NULL;

	SET_TDBB(tdbb);
	Database* dbb = tdbb->tdbb_database;

	USHORT id = TTYPE_TO_CHARSET(ttype);
	if (id == CS_dynamic)
		id = tdbb->tdbb_attachment->att_charset;

	if (id >= dbb->dbb_charsets.size())
		dbb->dbb_charsets.resize(id + 10);
	else
		cs = dbb->dbb_charsets[id];

	// allocate a new character set object if we couldn't find one.
	if (!cs) {
		SubtypeInfo info;

		if (id == CS_UTF16)
			info.charsetName = "UTF16";

		if ((id == CS_UTF16) || MET_get_char_coll_subtype_info(tdbb, id, &info))
		{
			dbb->dbb_charsets[id] = cs =
				FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, id, &info);
		}
		else
			ERR_post(isc_text_subtype, isc_arg_number, (ISC_STATUS) ttype, 0);
	}

	return cs;
}

CharSetContainer::CharSetContainer(MemoryPool& p, USHORT cs_id, const SubtypeInfo* info) :
	charset_collations(p),
	cs(NULL)
{
	charset* csL = FB_NEW(p) charset;
	memset(csL, 0, sizeof(charset));

	if (lookup_charset(csL, info) && (csL->charset_flags & CHARSET_ASCII_BASED))
	{
		if (csL->charset_min_bytes_per_char != csL->charset_max_bytes_per_char)
		    this->cs = FB_NEW(p) MultiByteCharSet(cs_id, csL);
		else
		    this->cs = FB_NEW(p) FixedWidthCharSet(cs_id, csL);
	}
	else
	{
		delete csL;
		ERR_post(isc_charset_not_installed, isc_arg_string, ERR_cstring(info->charsetName.c_str()), 0);
	}
}

CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID to_cs)
{
	if (to_cs == CS_UTF16)
		return cs->getConvToUnicode();
	else if (cs->getId() == CS_UTF16)
	{
		CharSet* to_charset = INTL_charset_lookup(tdbb, to_cs);
		return to_charset->getConvFromUnicode();
	}

	//// TODO: converters

	return NULL;
}

TextType* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id)
{
	const USHORT id = TTYPE_TO_COLLATION(tt_id);

	if (id < charset_collations.getCount() && charset_collations[id] != NULL)
		return charset_collations[id];

	SubtypeInfo info;
	if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info))
	{
		CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id));

		if (TTYPE_TO_CHARSET(tt_id) != CS_METADATA)
		{
			Firebird::HalfStaticArray<UCHAR, 32> specificAttributes;
			ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar();

			size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id),
									  specificAttributes.getBuffer(size), size,
									  CS_METADATA, info.specificAttributes.begin(),
									  info.specificAttributes.getCount(), ERR_post);
			specificAttributes.shrink(size);
			info.specificAttributes = specificAttributes;
		}

		TEXTTYPE tt = FB_NEW(*tdbb->tdbb_database->dbb_permanent) texttype;
		memset(tt, 0, sizeof(texttype));

		if (!lookup_texttype(tt, &info))
		{
			delete tt;
			ERR_post(isc_collation_not_installed,
				isc_arg_string, ERR_cstring(info.collationName.c_str()),
				isc_arg_string, ERR_cstring(info.charsetName.c_str()), 0);
		}

		if (charset_collations.getCount() <= id)
			charset_collations.grow(id + 1);

		if (charset_collations[id] == NULL)
		{
			fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) ||
					  (tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL));

			if (tt->texttype_canonical_width == 0)
			{
				if (charset->isMultiByte())
					tt->texttype_canonical_width = sizeof(ULONG);	// UTF-32
				else
				{
					tt->texttype_canonical_width = charset->minBytesPerChar();
					// canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on
					tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH;
				}
			}

			fb_assert(tt->texttype_canonical_width == 1 ||
					  tt->texttype_canonical_width == 2 ||
					  tt->texttype_canonical_width == 4);

			switch (tt->texttype_canonical_width)
			{
				case 1:
					if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<uchar_contains_direct, uchar_like_canonical,
								uchar_matches_canonical, uchar_sleuth_canonical>(tt_id, tt, charset);
					}
					else
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<uchar_contains_canonical, uchar_like_canonical,
								uchar_matches_canonical, uchar_sleuth_canonical>(tt_id, tt, charset);
					}
					break;

				case 2:
					if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<uchar_contains_direct, ushort_like_canonical,
								ushort_matches_canonical, ushort_sleuth_canonical>(tt_id, tt, charset);
					}
					else
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<ushort_contains_canonical, ushort_like_canonical,
								ushort_matches_canonical, ushort_sleuth_canonical>(tt_id, tt, charset);
					}
					break;

				case 4:
					if (tt->texttype_flags & TEXTTYPE_DIRECT_MATCH)
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<uchar_contains_direct, ulong_like_canonical,
								ulong_matches_canonical, ulong_sleuth_canonical>(tt_id, tt, charset);
					}
					else
					{
						charset_collations[id] = FB_NEW(*tdbb->tdbb_database->dbb_permanent)
							CollationImpl<ulong_contains_canonical, ulong_like_canonical,
								ulong_matches_canonical, ulong_sleuth_canonical>(tt_id, tt, charset);
					}
					break;

				default:
					fb_assert(false);
					return NULL;
			}
		}
	}
	else
		ERR_post(isc_text_subtype, isc_arg_number, (ISC_STATUS) tt_id, 0);

	return charset_collations[id];
}


static INTL_BOOL lookup_charset(charset* cs, const SubtypeInfo* info)
{
	return IntlManager::lookupCharSet(info->charsetName, cs);
}


static INTL_BOOL lookup_texttype(texttype* tt, const SubtypeInfo* info)
{
	return IntlManager::lookupCollation(info->baseCollationName, info->charsetName,
		info->attributes, info->specificAttributes.begin(),
		info->specificAttributes.getCount(), info->ignoreAttributes, tt);
}


void Database::destroyIntlObjects()
{
	for (size_t i = 0; i < dbb_charsets.size(); i++)
		if (dbb_charsets[i])
			dbb_charsets[i]->destroy();
}


CHARSET_ID INTL_charset(thread_db* tdbb, USHORT ttype)
{
/**************************************
 *
 *      I N T L _ c h a r s e t
 *
 **************************************
 *
 * Functional description
 *      Return the character set ID for a piece of text.
 *
 **************************************/

	switch (ttype)
	{
	case ttype_none:
		return (CS_NONE);
	case ttype_ascii:
		return (CS_ASCII);
	case ttype_unicode_fss:
		return (CS_UNICODE_FSS);
	case ttype_binary:
		return (CS_BINARY);
	case ttype_dynamic:
		SET_TDBB(tdbb);
		return (tdbb->tdbb_attachment->att_charset);
	default:
		return (TTYPE_TO_CHARSET(ttype));
	}
}


int INTL_compare(thread_db* tdbb,
				const dsc* pText1,
				const dsc* pText2,
				FPTR_ERROR err)
{
/**************************************
 *
 *      I N T L _ c o m p a r e
 *
 **************************************
 *
 * Functional description
 *      Compare two pieces of international text.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(pText1 != NULL);
	fb_assert(pText2 != NULL);
	fb_assert(IS_TEXT(pText1) && IS_TEXT(pText2));
	fb_assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
	fb_assert(err);

/* normal compare routine from CVT_compare */
/* trailing spaces in strings are ignored for comparision */

	UCHAR* p1;
	USHORT t1;
	USHORT length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);

	UCHAR* p2;
	USHORT t2;
	USHORT length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);

/* YYY - by SQL II compare_type must be explicit in the
   SQL statement if there is any doubt */

	SSHORT compare_type = MAX(t1, t2);	/* YYY */
	UCHAR buffer[MAX_KEY];

	if (t1 != t2) {
		CHARSET_ID cs1 = INTL_charset(tdbb, t1);
		CHARSET_ID cs2 = INTL_charset(tdbb, t2);
		if (cs1 != cs2) {
			if (compare_type != t2) {
				/* convert pText2 to pText1's type, if possible */
				/* YYY - should failure to convert really return
				   an error here?
				   Support joining a 437 & Latin1 Column, and we
				   pick the compare_type as 437, still only want the
				   equal values....
				   But then, what about < operations, which make no
				   sense if the string cannot be expressed...
				 */

				length2 = INTL_convert_bytes(tdbb, cs1,
											 buffer, sizeof(buffer),
											 cs2, p2, length2, err);
				p2 = buffer;
			}
			else {
				/* convert pText1 to pText2's type, if possible */

				length1 = INTL_convert_bytes(tdbb, cs2,
											 buffer, sizeof(buffer),
											 cs1, p1, length1, err);
				p1 = buffer;
			}
		}
	}

	TextType* obj = INTL_texttype_lookup(tdbb, compare_type);

	return obj->compare(length1, p1, length2, p2);
}


ULONG INTL_convert_bytes(thread_db* tdbb,
						 CHARSET_ID dest_type,
						 BYTE* dest_ptr,
						 ULONG dest_len,
						 CHARSET_ID src_type,
						 const BYTE* src_ptr,
						 ULONG src_len,
						 FPTR_ERROR err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ b y t e s
 *
 **************************************
 *
 * Functional description
 *      Given a string of bytes in one character set, convert it to another
 *      character set.
 *
 *      If (dest_ptr) is NULL, return the count of bytes needed to convert
 *      the string.  This does not guarantee the string can be converted,
 *      the purpose of this is to allocate a large enough buffer.
 *
 * RETURNS:
 *      Length of resulting string, in bytes.
 *      calls (err) if conversion error occurs.
 *
 **************************************/
	ULONG len;
	ULONG len2;
	USHORT err_code = 0;
	ULONG err_position;

	SET_TDBB(tdbb);

	fb_assert(src_ptr != NULL);
	fb_assert(src_type != dest_type);
	fb_assert(err != NULL);

	const UCHAR* const start_dest_ptr = dest_ptr;

	if ((dest_type == CS_BINARY) ||
		(dest_type == CS_NONE) ||
		(src_type == CS_NONE))
	{
		/* See if we just need a length estimate */
		if (dest_ptr == NULL)
			return (src_len);

		len = MIN(dest_len, src_len);
		if (len)
			do {
				*dest_ptr++ = *src_ptr++;
			} while (--len);

		/* See if only space characters are remaining */
		len = src_len - MIN(dest_len, src_len);
		if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
			return (dest_ptr - start_dest_ptr);
		else
			(*err) (isc_arith_except, 0);
	}
	else if (src_len == 0)
		return (0);
	else if (src_type == CS_BINARY)
		(*err)(isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
	else
		/* character sets are known to be different */
	{
		/* Do we know an object from cs1 to cs2? */

		CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
		if (cs_obj != NULL) {
			len = cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr,
								 &err_code, &err_position);
			if (!err_code || ((err_code == CS_TRUNCATION_ERROR)
							  && all_spaces(tdbb, src_type, src_ptr, src_len,
											err_position)))
			{
				return (len);
			}
			else if (err_code == CS_TRUNCATION_ERROR)
				(*err) (isc_arith_except, 0);
			else
				(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
		}

		/* Find a CS1 to UNICODE object */

		CharSet* from_cs = INTL_charset_lookup(tdbb, src_type);

		/*
		   ** allocate a temporary buffer that is large enough.
		 */
		BYTE* tmp_buffer =
			(BYTE *) FB_NEW(*tdbb->getDefaultPool()) char[(SLONG) src_len * sizeof(ULONG)];

		cs_obj = from_cs->getConvToUnicode();
		fb_assert(cs_obj != NULL);
		len = cs_obj.convert(src_len, src_ptr, src_len * sizeof(ULONG), tmp_buffer,
							 &err_code, &err_position);
		if (err_code && !((err_code == CS_TRUNCATION_ERROR)
						  && all_spaces(tdbb, src_type, src_ptr, src_len,
										err_position)))
		{
			delete [] tmp_buffer;
			if (err_code == CS_TRUNCATION_ERROR)
				(*err) (isc_arith_except, 0);
			else
				(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
		}

		/* Find a UNICODE to CS2 object */

		CharSet* to_cs;

		try
		{
			to_cs = INTL_charset_lookup(tdbb, dest_type);
		}
		catch (...)
		{
			delete [] tmp_buffer;
			throw;
		}

		cs_obj = to_cs->getConvFromUnicode();
		fb_assert(cs_obj != NULL);
		len2 = cs_obj.convert(len, tmp_buffer, dest_len, dest_ptr,
							&err_code, &err_position);

		if (err_code &&
			!((err_code == CS_TRUNCATION_ERROR) &&
			  all_spaces(tdbb, CS_UTF16, tmp_buffer, len, err_position)))
		{
			delete [] tmp_buffer;
			if (err_code == CS_TRUNCATION_ERROR)
				(*err) (isc_arith_except, 0);
			else
				(*err) (isc_arith_except, isc_arg_gds, isc_transliteration_failed, 0);
		}

		delete [] tmp_buffer;
		return (len2);
	}
	return (0);					/* to remove compiler errors.  This should never be executed */
}


CsConvert INTL_convert_lookup(thread_db* tdbb,
								CHARSET_ID to_cs,
								CHARSET_ID from_cs)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	SET_TDBB(tdbb);
	Database* dbb = tdbb->tdbb_database;
	CHECK_DBB(dbb);

	if (from_cs == CS_dynamic)
		from_cs = tdbb->tdbb_attachment->att_charset;

	if (to_cs == CS_dynamic)
		to_cs = tdbb->tdbb_attachment->att_charset;

/* Should from_cs == to_cs? be handled better? YYY */

	fb_assert(from_cs != CS_dynamic);
	fb_assert(to_cs != CS_dynamic);

	CharSetContainer* charset = CharSetContainer::lookupCharset(tdbb, from_cs);

	return charset->lookupConverter(tdbb, to_cs);
}


int INTL_convert_string(dsc* to, const dsc* from, FPTR_ERROR err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ s t r i n g
 *
 **************************************
 *
 * Functional description
 *      Convert a string from one type to another
 *
 * RETURNS:
 *      0 if no error in conversion
 *      non-zero otherwise.
 *      CVC: Unfortunately, this function puts the source in the 2nd param,
 *      as opposed to the CVT routines, so const help mitigating coding mistakes.
 *
 **************************************/

/* Note: This function is called from outside the engine as
   well as inside - we likely can't get rid of JRD_get_thread_data here */
	thread_db* tdbb = JRD_get_thread_data();
	if (tdbb == NULL)			/* are we in the Engine? */
		return (1);				/* no, then can't access intl gah */

	fb_assert(to != NULL);
	fb_assert(from != NULL);
	fb_assert(IS_TEXT(to) && IS_TEXT(from));

	CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from));
	CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to));

	UCHAR* start = to->dsc_address;
	UCHAR* p = start;

/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */

	UCHAR* from_ptr;
	USHORT from_type;
	const USHORT from_len =
		CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);

	ULONG to_size, to_len, to_fill;
	to_size = to_len = TEXT_LEN(to);
	ULONG from_fill;

	const UCHAR* q = from_ptr;
	CharSet* toCharSet = INTL_charset_lookup(tdbb, to_cs);
	ULONG toLength;

	switch (to->dsc_dtype) {
	case dtype_text:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {

			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			toLength = to_len;
			to_fill = to_size - to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
			p += to_len;
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				(*err)(isc_malformed_string, 0);
			toLength = to_len;
			from_fill = from_len - to_len;
			to_fill = to_size - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}

		if (to_fill > 0)
			pad_spaces(tdbb, to_cs, p, to_fill);
		break;

	case dtype_cstring:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {
			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			toLength = to_len;
			to->dsc_address[to_len] = 0;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				(*err)(isc_malformed_string, 0);
			toLength = to_len;
			from_fill = from_len - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
			*p = 0;
		}
		break;

	case dtype_varying:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE) && (from_cs != CS_NONE)) {

			to_len =
				INTL_convert_bytes(tdbb, to_cs,
								   (start = reinterpret_cast<UCHAR*>(((vary*) to->dsc_address)->vary_string)),
								   to_size, from_cs, from_ptr, from_len, err);
			toLength = to_len;
			((vary*) to->dsc_address)->vary_length = to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */
			to_len = MIN(from_len, to_size);
			if (!toCharSet->wellFormed(to_len, q))
				(*err)(isc_malformed_string, 0);
			toLength = to_len;
			from_fill = from_len - to_len;
			((vary*) p)->vary_length = to_len;
			start = p = reinterpret_cast<UCHAR*>(((vary*) p)->vary_string);
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}
		break;
	}

	if (toCharSet->isMultiByte() &&
		!(toCharSet->getFlags() & CHARSET_LEGACY_SEMANTICS) &&
		toLength != 31 &&	/* allow non CHARSET_LEGACY_SEMANTICS to be used as connection charset */
		toCharSet->length(tdbb, toLength, start, false) > to_size / toCharSet->maxBytesPerChar())
	{
		(*err)(isc_arith_except, 0);
	}

	if (from_fill)
		/* Make sure remaining characters on From string are spaces */
		if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
			(*err) (isc_arith_except, 0);

	return 0;
}


int INTL_data(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a
 *
 **************************************
 *
 * Functional description
 *      Given an input text descriptor,
 *      return TRUE if the data pointed to represents
 *      international text (subject to user defined or non-binary
 *      collation or comparison).
 *
 **************************************/

	fb_assert(pText != NULL);

	if (!IS_TEXT(pText))
		return FALSE;

	if (!INTERNAL_TTYPE(pText))
		return TRUE;

	return FALSE;
}

int INTL_data_or_binary(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a _ o r _ b i n a r y
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	return (INTL_data(pText) || (pText->dsc_ttype() == ttype_binary));
}


bool INTL_defined_type(thread_db* tdbb, SSHORT t_type)
{
/**************************************
 *
 *      I N T L _ d e f i n e d _ t y p e
 *
 **************************************
 *
 * Functional description
 *      Is (t_type) a known text type?
 * Return:
 *      false   type is not defined.
 *      true    type is defined
 *
 * Note:
 *      Due to cleanup that must happen in DFW, this routine
 *      must return, and not call ERR directly.
 *
 **************************************/
	SET_TDBB(tdbb);

	ISC_STATUS* const original_status = tdbb->tdbb_status_vector;
	bool defined = true;

	try
	{
		ISC_STATUS_ARRAY local_status;
		tdbb->tdbb_status_vector = local_status;

		INTL_texttype_lookup(tdbb, t_type);
	}
	catch (...)
	{
		defined = false;
	}

	tdbb->tdbb_status_vector = original_status;

	return defined;
}


void INTL_init(thread_db* tdbb)
{
/**************************************
 *
 *      I N T L _ i n i t
 *
 **************************************
 *
 * Functional description
 *
 **************************************/
}


USHORT INTL_key_length(thread_db* tdbb, USHORT idxType, USHORT iLength)
{
/**************************************
 *
 *      I N T L _ k e y _ l e n g t h
 *
 **************************************
 *
 * Functional description
 *      Given an index type, and a maximum length (iLength)
 *      return the length of the byte string key descriptor to
 *      use when collating text of this type.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(idxType >= idx_first_intl_string);

	const SSHORT ttype = INTL_INDEX_TO_TEXT(idxType);

	USHORT key_length;
	if (ttype >= 0 && ttype <= ttype_last_internal)
		key_length = iLength;
	else {
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		key_length = obj->key_length(iLength);
	}

/* Validity checks on the computed key_length */

	if (key_length > MAX_KEY)
		key_length = MAX_KEY;

	if (key_length < iLength)
		key_length = iLength;

	return (key_length);
}


CharSet* INTL_charset_lookup(thread_db* tdbb, SSHORT parm1)
{
/**************************************
 *
 *      I N T L _ c h a r s e t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or punt
 *		if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error
 *
 **************************************/
	CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1);
	return cs->getCharSet();
}


TextType* INTL_texttype_lookup(thread_db* tdbb,
								SSHORT parm1)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup either a character set descriptor or
 *      texttype descriptor object.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to find it in the libraries.
 *
 * Returns:
 *      *object         - if no errors;
 *      <never>         - if error
 *
 **************************************/
	SET_TDBB(tdbb);
	Database* dbb = tdbb->tdbb_database;

	if (parm1 == ttype_dynamic)
		parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset);

	CharSetContainer* csc = CharSetContainer::lookupCharset(tdbb, parm1);

	return csc->lookupCollation(tdbb, parm1);
}


bool INTL_texttype_validate(Jrd::thread_db* tdbb, const SubtypeInfo* info)
{
	SET_TDBB(tdbb);

	texttype tt;
	memset(&tt, 0, sizeof(tt));

	bool ret = lookup_texttype(&tt, info);

	if (ret && tt.texttype_fn_destroy)
		tt.texttype_fn_destroy(&tt);

	return ret;
}


void INTL_pad_spaces(thread_db* tdbb, DSC * type, UCHAR * string, ULONG length)
{
/**************************************
 *
 *      I N T L _ p a d _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with spaces, using the character
 *      set's defined space character.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(type != NULL);
	fb_assert(IS_TEXT(type));
	fb_assert(string != NULL);

	const USHORT charset = INTL_charset(tdbb, type->dsc_ttype());
	pad_spaces(tdbb, charset, string, length);
}


USHORT INTL_string_to_key(thread_db* tdbb,
						USHORT idxType,
						const dsc* pString,
						DSC* pByte,
						USHORT key_type)
{
/**************************************
 *
 *      I N T L _ s t r i n g _ t o _ k e y
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to a byte string
 *      that will collate naturally (byte order).
 *
 *      Return the length of the resulting byte string.
 *
 **************************************/
	UCHAR pad_char;
	SSHORT ttype;

	SET_TDBB(tdbb);

	fb_assert(idxType >= idx_first_intl_string || idxType == idx_string
		   || idxType == idx_byte_array || idxType == idx_metadata);
	fb_assert(pString != NULL);
	fb_assert(pByte != NULL);
	fb_assert(pString->dsc_address != NULL);
	fb_assert(pByte->dsc_address != NULL);
	fb_assert(pByte->dsc_dtype == dtype_text);

	switch (idxType) {
	case idx_string:
		pad_char = ' ';
		ttype = ttype_none;
		break;
	case idx_byte_array:
		pad_char = 0;
		ttype = ttype_binary;
		break;
	case idx_metadata:
		pad_char = ' ';
		ttype = ttype_metadata;
		break;
	default:
		pad_char = 0;
		ttype = INTL_INDEX_TO_TEXT(idxType);
		break;
	}

/* Make a string into the proper type of text */

	MoveBuffer temp;
	UCHAR* src;
	USHORT len =
		MOV_make_string2(pString, ttype, &src, temp);

	USHORT outlen;
	char* dest = reinterpret_cast<char*>(pByte->dsc_address);
	USHORT destLen = pByte->dsc_length;

	switch (ttype) {
	case ttype_metadata:
	case ttype_binary:
	case ttype_ascii:
	case ttype_none:
		while (len-- && destLen-- > 0)
			*dest++ = *src++;
		/* strip off ending pad characters */
		while (dest > (const char*)pByte->dsc_address) {
			if (*(dest - 1) == pad_char)
				dest--;
			else
				break;
		}
		outlen = (dest - (const char*)pByte->dsc_address);
		break;
	default:
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		outlen = obj->string_to_key(len,
									reinterpret_cast<const unsigned char*>(src),
									pByte->dsc_length,
									reinterpret_cast<unsigned char*>(dest),
									key_type);
		break;
	}

	return (outlen);
}


int INTL_str_to_upper(thread_db* tdbb, DSC * pString)
{
/**************************************
 *
 *      I N T L _ s t r _ t o _ u p p e r
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to uppercase
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(pString != NULL);
	fb_assert(pString->dsc_address != NULL);

	UCHAR* src;
	UCHAR buffer[MAX_KEY];
	USHORT ttype;
	USHORT len =
		CVT_get_string_ptr(pString, &ttype, &src,
						   reinterpret_cast<vary*>(buffer),
						   sizeof(buffer), ERR_post);

	UCHAR* dest;
	switch (ttype) {
	case ttype_binary:
		/* cannot uppercase binary strings */
		break;

	case ttype_none:
	case ttype_ascii:
		dest = src;
		while (len--) {
			*dest++ = UPPER7(*src);
			src++;
		}
		break;

	default:
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		obj->str_to_upper(len, src, len, src);	// ASF: this works for all cases? (src and dst buffers are the same)
		break;
	}
/*
 * Added to remove compiler errors. Callers are not checking
 * the return code from this function 4/5/95.
*/
	return (0);
}


int INTL_str_to_lower(thread_db* tdbb, DSC * pString)
{
/**************************************
 *
 *      I N T L _ s t r _ t o _ l o w e r
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to lowercase
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(pString != NULL);
	fb_assert(pString->dsc_address != NULL);

	UCHAR* src;
	UCHAR buffer[MAX_KEY];
	USHORT ttype;
	USHORT len =
		CVT_get_string_ptr(pString, &ttype, &src,
						   reinterpret_cast<vary*>(buffer),
						   sizeof(buffer), ERR_post);

	UCHAR* dest;
	switch (ttype) {
	case ttype_binary:
		/* cannot lowercase binary strings */
		break;

	case ttype_none:
	case ttype_ascii:
		dest = src;
		while (len--) {
			*dest++ = LOWWER7(*src);
			src++;
		}
		break;

	default:
		TextType* obj = INTL_texttype_lookup(tdbb, ttype);
		obj->str_to_lower(len, src, len, src);	// ASF: this works for all cases? (src and dst buffers are the same)
		break;
	}
/*
 * Added to remove compiler errors. Callers are not checking
 * the return code from this function 4/5/95.
*/
	return (0);
}


static bool all_spaces(
						  thread_db* tdbb,
						  CHARSET_ID charset,
						  const BYTE* ptr, ULONG len, ULONG offset)
{
/**************************************
 *
 *      a l l _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      determine if the string at ptr[offset] ... ptr[len] is entirely
 *      spaces, as per the space definition of (charset).
 *      The binary representation of a Space is character-set dependent.
 *      (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
 *      0x??20, which is NOT a space.
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(ptr != NULL);

	CharSet* obj = INTL_charset_lookup(tdbb, charset);

/*
 * We are assuming offset points to the first byte which was not
 * consumed in a conversion.  And that offset is pointing
 * to a character boundary
 */

// Single-octet character sets are optimized here

	if (obj->getSpaceLength() == 1) {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		while (p < end) {
			if (*p++ != *obj->getSpace())
				return false;
		}
		return true;
	}
	else {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		const unsigned char* space = obj->getSpace();
		const unsigned char* const end_space = &space[obj->getSpaceLength()];
		while (p < end) {
			space = obj->getSpace();
			while (p < end && space < end_space) {
				if (*p++ != *space++)
					return false;
			}
		}
		return true;
	}
}


static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len)
{								/* byte count */
/**************************************
 *
 *      p a d  _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with the character set defined space character.
 *
 **************************************/
	SET_TDBB(tdbb);

	fb_assert(ptr != NULL);

	CharSet* obj = INTL_charset_lookup(tdbb, charset);

/* Single-octet character sets are optimized here */
	if (obj->getSpaceLength() == 1) {
		const BYTE* const end = &ptr[len];
		while (ptr < end)
			*ptr++ = *obj->getSpace();
	}
	else {
		const BYTE* const end = &ptr[len];
		const UCHAR* space = obj->getSpace();
		const UCHAR* const end_space = &space[obj->getSpaceLength()];
		while (ptr < end) {
			space = obj->getSpace();
			while (ptr < end && space < end_space) {
				*ptr++ = *space++;
			}
			/* This fb_assert is checking that we didn't have a buffer-end
			 * in the middle of a space character
			 */
			fb_assert(!(ptr == end) || (space == end_space));
		}
	}
}