/*************  history ************
*
*       COMPONENT: JRD  MODULE: INTL.CPP
*       generated by Marion V2.5     2/6/90
*       from dev              db        on 4-JAN-1995
*****************************************************************
*
*       PR	2002-06-02 Added ugly c hack in
*       intl_back_compat_alloc_func_lookup.
*       When someone has time we need to change the references to
*       return (void*) function to something more C++ like
*
*       42 4711 3 11 17  tamlin   2001
*       Added silly numbers before my name, and converted it to C++.
*
*       18850   daves   4-JAN-1995
*       Fix gds__alloc usage
*
*       18837   deej    31-DEC-1994
*       fixing up HARBOR_MERGE
*
*       18821   deej    27-DEC-1994
*       HARBOR MERGE
*
*       18789   jdavid  19-DEC-1994
*       Cast some functions
*
*       17508   jdavid  15-JUL-1994
*       Bring it up to date
*
*       17500   daves   13-JUL-1994
*       Bug 6645: Different calculation of partial keys
*
*       17202   katz    24-MAY-1994
*       PC_PLATFORM requires the .dll extension
*
*       17191   katz    23-MAY-1994
*       OS/2 requires the .dll extension
*
*       17180   katz    23-MAY-1994
*       Define location of DLL on OS/2
*
*       17149   katz    20-MAY-1994
*       In JRD, gds_arg_number arguments are SLONG's not int's
*
*       16633   daves   19-APR-1994
*       Bug 6202: International licensing uses INTERNATIONAL product code
*
*       16555   katz    17-APR-1994
*       The last argument of calls to ERR_post should be 0
*
*       16521   katz    14-APR-1994
*       Borland C needs a decorated symbol to lookup
*
*       16403   daves   8-APR-1994 
*       Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
*
*       16141   katz    28-MAR-1994
*       Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
*
 * The contents of this file are subject to the Interbase Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy
 * of the License at http://www.Inprise.com/IPL.html
 *
 * Software distributed under the License is distributed on an
 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code was created by Inprise Corporation
 * and its predecessors. Portions created by Inprise Corporation are
 * Copyright (C) Inprise Corporation.
 *
 * All Rights Reserved.
 * Contributor(s): ______________________________________.
 *
 * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
 *
 * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
 *
*/


/*
 *      PROGRAM:        JRD Intl
 *      MODULE:         intl.c
 *      DESCRIPTION:    International text support routines
 *
 * copyright (c) 1992, 1993 by Borland International
 */

#include "firebird.h"
#include <string.h>
#include "../jrd/ib_stdio.h"
#include "../jrd/jrd.h"
#include "../jrd/req.h"
#include "../jrd/val.h"
#include "gen/codes.h"
#include "../jrd/intl.h"
#include "../jrd/intl_classes.h"
#include "../jrd/ods.h"
#include "../jrd/btr.h"
#include "../intl/charsets.h"
#include "../intl/country_codes.h"
#include "../jrd/gdsassert.h"
#include "../jrd/license.h"
#ifdef INTL_BUILTIN
#include "../intl/ld_proto.h"
#endif
#include "../jrd/all_proto.h"
#include "../jrd/cvt_proto.h"
#include "../jrd/err_proto.h"
#include "../jrd/evl_proto.h"
#include "../jrd/flu_proto.h"
#include "../jrd/fun_proto.h"
#include "../jrd/gds_proto.h"
#include "../jrd/iberr_proto.h"
#include "../jrd/intl_proto.h"
#include "../jrd/isc_proto.h"
#include "../jrd/thd_proto.h"

#include "../jrd/plugin_manager.h"

#ifdef DEV_BUILD

#define isprintable(x)  ((((unsigned char)(x)) & 0x7F) >= ' ')

#ifdef DEBUG_INTL
#define INTL_TRACE(args)     gds__log args
#else
#define INTL_TRACE(args)
#endif

#else
#define INTL_TRACE(args)
#endif
#define IS_TEXT(x)      (((x)->dsc_dtype == dtype_text)   ||\
			 ((x)->dsc_dtype == dtype_varying)||\
			 ((x)->dsc_dtype == dtype_cstring))

#define TTYPE_TO_CHARSET(tt)    ((SSHORT)((tt) & 0x00FF))
#define TTYPE_TO_COLLATION(tt)  ((SSHORT)((tt) >> 8))


typedef unsigned char FILECHAR;

static bool all_spaces(TDBB, CHARSET_ID, const BYTE*, USHORT, USHORT);
static void pad_spaces(TDBB, CHARSET_ID, BYTE *, USHORT);
static void* lookup_init_function(USHORT, SSHORT, SSHORT);
static void finish_texttype_init(TEXTTYPE);
static USHORT nc_to_wc(CSCONVERT, WCHAR *, USHORT, UCHAR *, USHORT, SSHORT *,
					   USHORT *);
static USHORT wc_to_wc(CSCONVERT, WCHAR *, USHORT, WCHAR *, USHORT, SSHORT *,
					   USHORT *);

/* Name of module that implements text-type (n) */

#ifdef VMS
/* Note: MUST be only the file name.  The VMS lib$find_shared_image
 *       call insists on file name only, not any "path" components.
 */
#define INTL_MODULE1    "FBINTL"
#define INTL_MODULE2    "FBINTL2"
#endif

#ifdef WIN_NT
/* prefixed with $INTERBASE */
#define INTL_MODULE1 "fbintl.dll"
#define INTL_MODULE2 "fbintl2.dll"
#endif

#ifndef INTL_MODULE1
/* prefixed with $INTERBASE */
#define INTL_MODULE1 "fbintl"
#define INTL_MODULE2 "fbintl2"
#endif

#define INTL_LOOKUP_ENTRY1      "LD_lookup"
#define INTL_LOOKUP_ENTRY2      "LD2_lookup"
#define INTL_USER_ENTRY         "USER_TEXTTYPE_%03d"

// We need all the structure definitions from the old interface
#define INTL_ENGINE_INTERNAL
#include "../jrd/intlobj.h"

// storage for the loadable modules
static PluginManager intlBCPlugins;
static bool bcLoaded = false;

static const char *INTL_PLUGIN_DIR = "intl";

class CharsetIDGetter
{
public:
	static CHARSET_ID generate(void *sender, const CsConvert& Item) { 
		return Item.getToCS(); 
	}
};

// Classes and structures used internally to this file and intl implementation
class CharSetContainer
{
public:
	CharSetContainer(MemoryPool *p, USHORT cs_id);
	
	CharSet getCharSet() { return cs; }
	
	TextType lookupCollation(TDBB tdbb, USHORT tt_id);
	
	CsConvert lookupConverter(TDBB tdbb, CHARSET_ID to_cs);
	
	static CharSetContainer* lookupCharset(TDBB tdbb, SSHORT ttype, ISC_STATUS *status);
	
private:
	Firebird::SortedArray<CsConvert, CHARSET_ID, CharsetIDGetter> charset_converters;
	Firebird::Array<TextType> charset_collations;
	Firebird::SortedArray<CHARSET_ID> impossible_conversions;
	CharSet cs;
};

CharSetContainer* CharSetContainer::lookupCharset(TDBB tdbb, SSHORT ttype, ISC_STATUS *status)
{
/**************************************
 *
 *      l o o k u p C h a r s e t
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or return
 *		null if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
	DBB dbb;
	CharSetContainer *cs = NULL;
	USHORT id;

	SET_TDBB(tdbb);
	dbb = tdbb->tdbb_database;

	id = TTYPE_TO_CHARSET(ttype);
	if (id == CS_dynamic)
		id = tdbb->tdbb_attachment->att_charset;

	if (id >= dbb->dbb_charsets.size())
		dbb->dbb_charsets.resize(id + 10);
	else
		cs = dbb->dbb_charsets[id];

	// allocate a new character set object if we couldn't find one.
	if (!cs) {
		cs = FB_NEW(*dbb->dbb_permanent) CharSetContainer(dbb->dbb_permanent, id);
		if (cs->getCharSet() == NULL) {
			delete cs;
			return NULL;
		}
		dbb->dbb_charsets[id] = cs;
	}

	assert(cs != NULL);
	return cs;
}

CharSetContainer::CharSetContainer(MemoryPool *p, USHORT cs_id) :
	charset_converters(p),
	charset_collations(p),
	impossible_conversions(p),
	cs(NULL)
{
	typedef USHORT (*CSInitFunc)(CHARSET, SSHORT, SSHORT);
	CSInitFunc csInitFunc = 
		reinterpret_cast<CSInitFunc>(lookup_init_function(type_charset, cs_id, 0));
	CHARSET cs = FB_NEW(*p) charset;
	memset(cs, 0, sizeof(charset));

	if (!csInitFunc || (*csInitFunc)(cs, cs_id, 0) != 0)
	{
		delete cs;
		cs = NULL;
	}

	this->cs = cs;
}

CsConvert CharSetContainer::lookupConverter(TDBB tdbb, CHARSET_ID to_cs)
{
	int pos;
	if (charset_converters.find(to_cs, pos))
		return charset_converters[pos];
	if (impossible_conversions.find(to_cs, pos))
		return NULL;
	if (to_cs == CS_UNICODE_UCS2) {
		return cs.getConvToUnicode();
	}
	if (cs.getId() == CS_UNICODE_UCS2) {
		CharSet to_charset = INTL_charset_lookup(tdbb, to_cs, NULL);
		if (to_charset == NULL)
			return NULL;
		return to_charset.getConvFromUnicode();
	}

	typedef USHORT (*CVTInitFunc)(CSCONVERT, SSHORT, SSHORT);
	CVTInitFunc cvtInitFunc = 
		reinterpret_cast<CVTInitFunc>(lookup_init_function(type_csconvert, to_cs, cs.getId()));
	if (!cvtInitFunc) {
		impossible_conversions.add(to_cs);
		return NULL;
	}

	CSCONVERT cvt = FB_NEW(*tdbb->tdbb_database->dbb_permanent) csconvert;
	memset(cvt, 0, sizeof(csconvert));
	if ((*cvtInitFunc)(cvt, to_cs, cs.getId()) != 0)
	{
		impossible_conversions.add(to_cs);
		delete cvt;
		return NULL;
	}
	CsConvert converter = cvt;
	assert(converter.getFromCS() == cs.getId());
	assert(converter.getToCS() == to_cs);
	charset_converters.add(converter);
	return cvt;
}

TextType CharSetContainer::lookupCollation(TDBB tdbb, USHORT tt_id)
{
	USHORT id = TTYPE_TO_COLLATION(tt_id);
	
	if (id < charset_collations.getCount() && charset_collations[id] != NULL)
		return charset_collations[id];
	typedef USHORT (*TTInitFunc)(TEXTTYPE, SSHORT, SSHORT);
	TTInitFunc ttInitFunc =
		reinterpret_cast<TTInitFunc>(lookup_init_function(type_texttype, tt_id, 0));
	if (!ttInitFunc)
		return NULL;
	TEXTTYPE tt = FB_NEW(*tdbb->tdbb_database->dbb_permanent) texttype;
	memset(tt, 0, sizeof(texttype));
	if ((*ttInitFunc)(tt, tt_id, 0) != 0)
	{
		delete tt;
		return NULL;
	}
	finish_texttype_init(tt);
	if (charset_collations.getCount() <= id)
		charset_collations.grow(id+1);
	charset_collations[id] = tt;

	return tt;
}

static void finish_texttype_init(TEXTTYPE txtobj)
{
/**************************************
 *
 *      f i n i s h _ t e x t t y p e _ i n i t
 *
 **************************************
 *
 * Functional description
 *
 *      Finish initializing a text object with pointers to 
 *      internal routines.
 *      This is also a handy place to check the licensing bits
 *      for the text object.
 *
 * Returns:
 *      The TEXTTYPE_init bit in texttype_flags is set if the
 *      object is sucessfully initialized.
 *
 **************************************/

	if ((txtobj->texttype_fn_to_wc == NULL) &&
		(txtobj->texttype_bytes_per_char == 1)) {
		/* Finish initialization of a narrow character object */

		txtobj->texttype_fn_to_wc = (FPTR_SHORT) nc_to_wc;
		txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_nc_contains;
		txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_nc_matches;
		txtobj->texttype_fn_like = (FPTR_SHORT) EVL_nc_like;
		txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_nc_sleuth_merge;
		txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_nc_sleuth_check;

		if (!txtobj->texttype_fn_mbtowc)
			txtobj->texttype_fn_mbtowc = (FPTR_short) INTL_builtin_nc_mbtowc;
	}
	else if ((txtobj->texttype_fn_to_wc == NULL) &&
			 (txtobj->texttype_bytes_per_char == 2)) {
		/* Finish initialization of a wide character object */

		txtobj->texttype_fn_to_wc = (FPTR_SHORT) wc_to_wc;
		txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_wc_contains;
		txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_wc_matches;
		txtobj->texttype_fn_like = (FPTR_SHORT) EVL_wc_like;
		txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_wc_sleuth_merge;
		txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_wc_sleuth_check;
		if (!txtobj->texttype_fn_mbtowc)
			txtobj->texttype_fn_mbtowc = (FPTR_short) INTL_builtin_wc_mbtowc;
	}
	else if (txtobj->texttype_fn_to_wc != NULL) {
		/* Finish initialization of a multibyte character object */

		txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_mb_contains;
		txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_mb_matches;
		txtobj->texttype_fn_like = (FPTR_SHORT) EVL_mb_like;
		txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_mb_sleuth_merge;
		txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_mb_sleuth_check;
		if (!txtobj->texttype_fn_mbtowc)
			txtobj->texttype_fn_mbtowc = (FPTR_short) INTL_builtin_mb_mbtowc;
	}
	else
		assert(0);

	txtobj->texttype_flags |= TEXTTYPE_init;
}

static void* lookup_init_function(
						USHORT type,
						SSHORT parm1,
						SSHORT parm2)
{
/**************************************
 *
 *      lookup_init_function
 *
 **************************************
 *
 * Functional description
 *      Find the allocator function for the requested international
 *		character set using the c/IB/FB 6.0 interface.
 *      Search algorithm is:
 *              Look in intllib
 *              Look in intllib2
 *              Look for a normal UDF entry
 *              Abort with an error.
 *
 * Returns:
 *      FALSE   - no errors
 *      TRUE    - error occurred, and parameter <err> was NULL;
 *      <never> - error occurred, and parameter <err> non-NULL;
 *
 *
 ***************************************/
	USHORT (*function)();

	if (!bcLoaded)
	{
		intlBCPlugins.addSearchPath(INTL_PLUGIN_DIR);
		bcLoaded = true;
	}

	PluginManager::Plugin intlMod1 = intlBCPlugins.findPlugin(INTL_MODULE1);
	PluginManager::Plugin intlMod2 = intlBCPlugins.findPlugin(INTL_MODULE2);

	USHORT(*lookup_fn) (USHORT, FPTR_SHORT *, SSHORT, SSHORT);

	INTL_TRACE(("INTL: looking for obj %d ttype %d\n", objtype, parm1));

	function = INTL_builtin_lookup(type, parm1, parm2); 
	if (function) return function;

#ifdef INTL_BUILTIN
	if (LD_lookup(type, &function, parm1, parm2) != 0)
		function = NULL;
	else
		return function;
#else
	/* Look for an InterBase supplied object to implement the text type */
	/* The flu.c uses searchpath which expects a file name not a path */
	INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE1, INTL_LOOKUP_ENTRY1));
	Firebird::string tempStr(INTL_LOOKUP_ENTRY1);
	if ( intlMod1 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
		(intlMod1.lookupSymbol(tempStr))) ) {
		INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE1,
					INTL_LOOKUP_ENTRY1));
		if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
			function = NULL;
		}
		else
		{
			return function;
		}
	}
#endif

/* Still not found, check the set of supplimental international objects */
#ifdef INTL_BUILTIN
	if (LD2_lookup(type, &function, parm1, parm2) != 0)
		function = NULL;
	else
		return function;
#else
	/* Look for an InterBase supplied object to implement the text type */
	/* The flu.c uses searchpath which expects a file name not a path */
	INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE2, INTL_LOOKUP_ENTRY2));
	tempStr = INTL_LOOKUP_ENTRY2;
	if ( intlMod2 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
		(intlMod2.lookupSymbol(tempStr))) ) {
		INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE2,
					INTL_LOOKUP_ENTRY2));
		if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
			function = NULL;
		}
		else
		{
			return function;
		}
	}
#endif

/* Still not found, check if there is a UDF in the database defined the right way */
	FUN function_block;
	USHORT argcount;
	char entry[48];

	switch (type) {
		case type_texttype:
			sprintf(entry, INTL_USER_ENTRY, parm1);
			argcount = 2;
			break;
		case type_charset:
			sprintf(entry, "USER_CHARSET_%03d", parm1);
			argcount = 2;
			break;
		case type_csconvert:
			sprintf(entry, "USER_TRANSLATE_%03d_%03d", parm1,
					parm2);
			argcount = 3;
			break;
		default:
			BUGCHECK(1);
			break;
	}
	INTL_TRACE(("INTL: trying user fn %s\n", entry));
	if ( (function_block = FUN_lookup_function((TEXT*)entry, false)) ) {
		INTL_TRACE(("INTL: found a user fn, validating\n"));
		if ((function_block->fun_count == argcount) &&
			(function_block->fun_args == argcount) &&
			(function_block->fun_return_arg == 0) &&
			(function_block->fun_entrypoint != NULL) &&
			(function_block->fun_rpt[0].fun_mechanism == FUN_value) &&
			(function_block->fun_rpt[0].fun_desc.dsc_dtype == dtype_short)
			&& (function_block->fun_rpt[1].fun_desc.dsc_dtype ==
				dtype_short)
			&& (function_block->fun_rpt[argcount - 1].
				fun_desc.dsc_dtype == dtype_short)
			&& (function_block->fun_rpt[argcount].fun_mechanism ==
				FUN_reference)
			&& (function_block->fun_rpt[argcount].fun_desc.dsc_dtype ==
				dtype_text))
		{
			return function_block->fun_entrypoint;
		}
	}
	return NULL;
}


CHARSET_ID INTL_charset(TDBB tdbb, USHORT ttype, FPTR_STATUS err)
{
/**************************************
 *
 *      I N T L _ c h a r s e t
 *
 **************************************
 *
 * Functional description
 *      Return the character set ID for a piece of text.
 *
 **************************************/

	switch (ttype)
	{
	case ttype_none:
		return (CS_NONE);
	case ttype_ascii:
		return (CS_ASCII);
	case ttype_unicode_fss:
		return (CS_UNICODE_FSS);
	case ttype_binary:
		return (CS_BINARY);
	case ttype_dynamic:
		SET_TDBB(tdbb);
		return (tdbb->tdbb_attachment->att_charset);
	default:
		return (TTYPE_TO_CHARSET(ttype));
	}
}


int INTL_compare(TDBB tdbb,
				const dsc* pText1,
				const dsc* pText2,
				FPTR_STATUS err)
{
/**************************************
 *
 *      I N T L _ c o m p a r e
 *
 **************************************
 *
 * Functional description
 *      Compare two pieces of international text.
 *
 **************************************/
	UCHAR *p1, *p2;
	UCHAR buffer[MAX_KEY];
	USHORT t1, t2;

	SET_TDBB(tdbb);

	assert(pText1 != NULL);
	assert(pText2 != NULL);
	assert(IS_TEXT(pText1) && IS_TEXT(pText2));
	assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
	assert(err);

/* normal compare routine from CVT_compare */
/* trailing spaces in strings are ignored for comparision */

	USHORT length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
	USHORT length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);

/* YYY - by SQL II compare_type must be explicit in the
   SQL statement if there is any doubt */

	SSHORT compare_type = MAX(t1, t2);	/* YYY */

	if (t1 != t2) {
		CHARSET_ID cs1 = INTL_charset(tdbb, t1, err);
		CHARSET_ID cs2 = INTL_charset(tdbb, t2, err);
		if (cs1 != cs2) {
			if (compare_type != t2) {
				/* convert pText2 to pText1's type, if possible */
				/* YYY - should failure to convert really return 
				   an error here?  
				   Support joining a 437 & Latin1 Column, and we
				   pick the compare_type as 437, still only want the
				   equal values....
				   But then, what about < operations, which make no
				   sense if the string cannot be expressed...
				 */

				length2 = INTL_convert_bytes(tdbb, cs1,
											 buffer, sizeof(buffer),
											 cs2, p2, length2, err);
				p2 = buffer;
			}
			else {
				/* convert pText1 to pText2's type, if possible */

				length1 = INTL_convert_bytes(tdbb, cs2,
											 buffer, sizeof(buffer),
											 cs1, p1, length1, err);
				p1 = buffer;
			}
		}
	}

	TextType obj = INTL_texttype_lookup(tdbb, compare_type, err, NULL);

	return obj.compare(length1, p1, length2, p2);
}


USHORT INTL_convert_bytes(TDBB tdbb,
						CHARSET_ID dest_type,
						BYTE * dest_ptr,
						USHORT dest_len,
						CHARSET_ID src_type,
						BYTE * src_ptr,
						USHORT src_len,
						FPTR_STATUS err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ b y t e s
 *
 **************************************
 *
 * Functional description
 *      Given a string of bytes in one character set, convert it to another 
 *      character set.
 *
 *      If (dest_ptr) is NULL, return the count of bytes needed to convert
 *      the string.  This does not guarantee the string can be converted,
 *      the purpose of this is to allocate a large enough buffer.
 *
 * RETURNS:
 *      Length of resulting string, in bytes.
 *      calls (err) if conversion error occurs.
 *
 **************************************/
	UCHAR *start_dest_ptr;
	USHORT len;
	USHORT len2;
	SSHORT err_code = 0;
	USHORT err_position;
	BYTE *tmp_buffer;

	SET_TDBB(tdbb);


	assert(src_ptr != NULL);
	assert(src_type != dest_type);
	assert(err != NULL);

	start_dest_ptr = dest_ptr;

	if ((dest_type == CS_BINARY) || (dest_type == CS_NONE)) {

		/* See if we just need a length estimate */
		if (dest_ptr == NULL)
			return (src_len);

		len = MIN(dest_len, src_len);
		if (len)
			do
				*dest_ptr++ = *src_ptr++;
			while (--len);

		/* See if only space characters are remaining */
		len = src_len - MIN(dest_len, src_len);
		if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
			return (dest_ptr - start_dest_ptr);
		else
			(*err) (gds_arith_except, 0);
	}
	else if (src_len == 0)
		return (0);
	else if (src_type == CS_BINARY)
		(*err)(gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0);
	else
		/* character sets are known to be different */
	{
		/* Do we know an object from cs1 to cs2? */

		CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
		if (cs_obj != NULL) {
			len = cs_obj.convert(dest_ptr, dest_len, src_ptr,
									src_len, &err_code, &err_position);
			if (!err_code || ((err_code == CS_TRUNCATION_ERROR)
							  && all_spaces(tdbb, src_type, src_ptr, src_len,
											err_position))) return (len);
			else if (err_code == CS_TRUNCATION_ERROR)
				(*err) (gds_arith_except, 0);
			else
				(*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0);
		}

		/* Find a CS1 to UNICODE object */

		CharSet from_cs = INTL_charset_lookup(tdbb, src_type, NULL);
		if (from_cs == NULL)
			(*err)(gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, 
				(ISC_STATUS) src_type, 0);

		/* 
		   ** allocate a temporary buffer that is large enough.
		 */
		tmp_buffer = (BYTE *) FB_NEW(*tdbb->tdbb_default) char[(SLONG) src_len * sizeof(UCS2_CHAR)];

		cs_obj = from_cs.getConvToUnicode();
		assert(cs_obj != NULL);
		len = cs_obj.convert(tmp_buffer, src_len * 2, src_ptr,
								src_len, &err_code, &err_position);
		if (err_code && !((err_code == CS_TRUNCATION_ERROR)
						  && all_spaces(tdbb, src_type, src_ptr, src_len,
										err_position))) {
			delete [] tmp_buffer;
			if (err_code == CS_TRUNCATION_ERROR)
				(*err) (gds_arith_except, 0);
			else
				(*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0);
		}

		/* Find a UNICODE to CS2 object */

		CharSet to_cs = INTL_charset_lookup(tdbb, dest_type, NULL);
		if (to_cs == NULL) {
			delete [] tmp_buffer;
			(*err) (gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, 
				   (ISC_STATUS) dest_type, 0);
		}
		cs_obj = to_cs.getConvFromUnicode();
		assert(cs_obj != NULL);
		len2 = cs_obj.convert(dest_ptr, dest_len, tmp_buffer,
							len, &err_code, &err_position);

		if (err_code &&
			!((err_code == CS_TRUNCATION_ERROR) &&
			  all_spaces(tdbb, CS_UNICODE_UCS2, tmp_buffer, len, err_position))) 
		{
			delete [] tmp_buffer;
			if (err_code == CS_TRUNCATION_ERROR)
				(*err) (gds_arith_except, 0);
			else
				(*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0);
		}

		delete [] tmp_buffer;
		return (len2);
	}
	return (0);					/* to remove compiler errors.  This should never be executed */
}


CsConvert INTL_convert_lookup(TDBB tdbb,
								CHARSET_ID to_cs,
								CHARSET_ID from_cs)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	CharSetContainer *charset;
	DBB dbb;

	SET_TDBB(tdbb);
	dbb = tdbb->tdbb_database;
	CHECK_DBB(dbb);

	if (from_cs == CS_dynamic)
		from_cs = tdbb->tdbb_attachment->att_charset;

	if (to_cs == CS_dynamic)
		to_cs = tdbb->tdbb_attachment->att_charset;

/* Should from_cs == to_cs? be handled better? YYY */

	assert(from_cs != CS_dynamic);
	assert(to_cs != CS_dynamic);

	charset = CharSetContainer::lookupCharset(tdbb, from_cs, NULL);
	if (charset == NULL)
		return NULL;

	return charset->lookupConverter(tdbb, to_cs);
}


int INTL_convert_string(dsc* to, const dsc* from, FPTR_STATUS err)
{
/**************************************
 *
 *      I N T L _ c o n v e r t _ s t r i n g
 *
 **************************************
 *
 * Functional description
 *      Convert a string from one type to another
 *
 * RETURNS:
 *      0 if no error in conversion
 *      non-zero otherwise.
 *      CVC: Unfortunately, this function puts the source in the 2nd param,
 *      as opposed to the CVT routines, so const help mitigating coding mistakes.
 *
 **************************************/

/* Note: This function is called from outside the engine as
   well as inside - we likely can't get rid of GET_THREAD_DATA here */
	TDBB tdbb = GET_THREAD_DATA;
	if (tdbb == NULL)			/* are we in the Engine? */
		return (1);				/* no, then can't access intl gah */

	assert(to != NULL);
	assert(from != NULL);
	assert(IS_TEXT(to) && IS_TEXT(from));

	CHARSET_ID from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err);
	CHARSET_ID to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err);

	UCHAR* p = to->dsc_address;

/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */

	UCHAR* from_ptr;
	USHORT from_type;
	const USHORT from_len =
		CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);

	USHORT to_size, to_len, to_fill;
	to_size = to_len = TEXT_LEN(to);
	USHORT from_fill;

	const UCHAR* q = from_ptr;
	switch (to->dsc_dtype) {
	case dtype_text:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {

			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			to_fill = to_size - to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
			p += to_len;
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			from_fill = from_len - to_len;
			to_fill = to_size - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}

		if (to_fill > 0)
			pad_spaces(tdbb, to_cs, p, to_fill);
		break;

	case dtype_cstring:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {
			to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
										from_cs, from_ptr, from_len, err);
			to->dsc_address[to_len] = 0;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */

			to_len = MIN(from_len, to_size);
			from_fill = from_len - to_len;
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
			*p = 0;
		}
		break;

	case dtype_varying:
		if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {

			to_len =
				INTL_convert_bytes(tdbb, to_cs,
								   reinterpret_cast<UCHAR*>(((VARY *) to->dsc_address)->vary_string),
								   to_size, from_cs, from_ptr, from_len, err);
			((VARY *) to->dsc_address)->vary_length = to_len;
			from_fill = 0;		/* Convert_bytes handles source truncation */
		}
		else {
			/* binary string can always be converted TO by byte-copy */
			to_len = MIN(from_len, to_size);
			from_fill = from_len - to_len;
			((VARY *) p)->vary_length = to_len;
			p = reinterpret_cast<UCHAR*>(((VARY *) p)->vary_string);
			if (to_len)
				do
					*p++ = *q++;
				while (--to_len);
		}
		break;
	}

	if (from_fill)
		/* Make sure remaining characters on From string are spaces */
		if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
			(*err) (gds_arith_except, 0);

	return 0;
}


int INTL_data(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a
 *
 **************************************
 *
 * Functional description
 *      Given an input text descriptor, 
 *      return TRUE if the data pointed to represents
 *      international text (subject to user defined or non-binary
 *      collation or comparison).
 *
 **************************************/

	assert(pText != NULL);

	if (!IS_TEXT(pText))
		return FALSE;

	if (!INTERNAL_TTYPE(pText))
		return TRUE;

	return FALSE;
}

int INTL_data_or_binary(const dsc* pText)
{
/**************************************
 *
 *      I N T L _ d a t a _ o r _ b i n a r y
 *
 **************************************
 *
 * Functional description
 *
 **************************************/

	return (INTL_data(pText) || (pText->dsc_ttype == ttype_binary));
}


int INTL_defined_type(TDBB tdbb, ISC_STATUS * status, SSHORT t_type)
{
/**************************************
 *
 *      I N T L _ d e f i n e d _ t y p e
 *
 **************************************
 *
 * Functional description
 *      Is (t_type) a known text type?
 * Return:
 *      FALSE   type is not defined.
 *      TRUE    type is defined
 *      status  set to gds_status codes to describe any error.
 *
 * Note:
 *      Due to cleanup that must happen in DFW, this routine
 *      must return, and not call ERR directly.
 *
 **************************************/
	SET_TDBB(tdbb);

	if (status)
		status[0] = gds_arg_end;
	TextType obj = INTL_texttype_lookup(tdbb, t_type, NULL, status);
	if (obj == NULL)
		return FALSE;
	return TRUE;
}


UCS2_CHAR INTL_getch(TDBB tdbb,
							TextType* obj,
							SSHORT t_type, UCHAR ** ptr, USHORT * count)
{
/**************************************
 *
 *      I N T L _ g e t c h
 *
 **************************************
 *
 * Functional description
 *      Get next character from a buffer.
 *
 **************************************/
	SSHORT used;
	UCS2_CHAR wc;

	SET_TDBB(tdbb);

	assert(obj);
	assert(ptr);

	if (*obj == NULL) {
		*obj = INTL_texttype_lookup(tdbb, t_type, ERR_post, NULL);
		assert(*obj != NULL);
	}
	used = obj->mbtowc(&wc, *ptr, *count);
	if (used == -1)
		return 0;
	*ptr += used;
	*count -= used;
	return wc;
}


void INTL_init(TDBB tdbb)
{
/**************************************
 *
 *      I N T L _ i n i t
 *
 **************************************
 *
 * Functional description
 *
 **************************************/
}


USHORT INTL_key_length(TDBB tdbb, USHORT idxType, USHORT iLength)
{
/**************************************
 *
 *      I N T L _ k e y _ l e n g t h
 *
 **************************************
 *
 * Functional description
 *      Given an index type, and a maximum length (iLength)
 *      return the length of the byte string key descriptor to
 *      use when collating text of this type.
 *
 **************************************/
	USHORT key_length;
	SSHORT ttype;

	SET_TDBB(tdbb);


	assert(idxType >= idx_first_intl_string);

	ttype = INTL_INDEX_TO_TEXT(idxType);

	if (ttype >= 0 && ttype <= ttype_last_internal)
		key_length = iLength;
	else {
		TextType obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
		key_length = obj.key_length(iLength);
	}

/* Validity checks on the computed key_length */

	if (key_length > MAX_KEY)
		key_length = MAX_KEY;

	if (key_length < iLength)
		key_length = iLength;

	return (key_length);
}

CharSet INTL_charset_lookup(TDBB tdbb, SSHORT parm1, ISC_STATUS *status)
{
/**************************************
 *
 *      I N T L _ c h a r s e t _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup a character set descriptor.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to allocate it, or return
 *		null if we don't know about the charset.
 *
 * Returns:
 *      *charset        - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
	CharSetContainer *cs = CharSetContainer::lookupCharset(tdbb, parm1, status);
	if (!cs) return NULL;
	return cs->getCharSet();
}


TextType INTL_texttype_lookup(TDBB tdbb,
								SSHORT parm1,
								FPTR_STATUS err,
								ISC_STATUS * status)
{
/**************************************
 *
 *      I N T L _ t e x t t y p e _ l o o k u p
 *
 **************************************
 *
 * Functional description
 *
 *      Lookup either a character set descriptor or
 *      texttype descriptor object.
 *
 *      First, search the appropriate vector that hangs
 *      off the dbb.  If not found, then call the lower
 *      level lookup routine to find it in the libraries.
 *
 * Returns:
 *      *object         - if no errors;
 *      <never>         - if error & err non NULL
 *      NULL            - if error & err NULL
 *
 **************************************/
	DBB dbb;
	CharSetContainer *csc;

	SET_TDBB(tdbb);
	dbb = tdbb->tdbb_database;

	if (parm1 == ttype_dynamic)
		parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset);

	csc = CharSetContainer::lookupCharset(tdbb, parm1, status);
	if (!csc)
		return NULL;
	return csc->lookupCollation(tdbb, parm1);
}

void INTL_pad_spaces(TDBB tdbb, DSC * type, UCHAR * string, USHORT length)
{
/**************************************
 *
 *      I N T L _ p a d _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with spaces, using the character
 *      set's defined space character.
 *
 **************************************/
	USHORT charset;

	SET_TDBB(tdbb);

	assert(type != NULL);
	assert(IS_TEXT(type));
	assert(string != NULL);

	charset = INTL_charset(tdbb, type->dsc_ttype, NULL);
	pad_spaces(tdbb, charset, string, length);
}


USHORT INTL_string_to_key(TDBB tdbb,
						USHORT idxType,
						DSC * pString,
						DSC * pByte,
						USHORT partial)
{
/**************************************
 *
 *      I N T L _ s t r i n g _ t o _ k e y
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to a byte string
 *      that will collate naturally (byte order).
 *
 *      Return the length of the resulting byte string.
 *
 **************************************/
	USHORT outlen;
	UCHAR buffer[MAX_KEY];
	UCHAR pad_char;
	SSHORT ttype;

	SET_TDBB(tdbb);


	assert(idxType >= idx_first_intl_string || idxType == idx_string
		   || idxType == idx_byte_array || idxType == idx_metadata);
	assert(pString != NULL);
	assert(pByte != NULL);
	assert(pString->dsc_address != NULL);
	assert(pByte->dsc_address != NULL);
	assert(pByte->dsc_dtype == dtype_text);

	switch (idxType) {
	case idx_string:
		pad_char = ' ';
		ttype = ttype_none;
		break;
	case idx_byte_array:
		pad_char = 0;
		ttype = ttype_binary;
		break;
	case idx_metadata:
		pad_char = ' ';
		ttype = ttype_metadata;
		break;
	default:
		pad_char = 0;
		ttype = INTL_INDEX_TO_TEXT(idxType);
		break;
	}

/* Make a string into the proper type of text */

	const char* src;
	USHORT len =
		CVT_make_string(pString, ttype, &src,
						reinterpret_cast<vary*>(buffer), sizeof(buffer),
						ERR_post);

	char* dest = reinterpret_cast<char*>(pByte->dsc_address);
	switch (ttype) {
	case ttype_metadata:
	case ttype_binary:
	case ttype_ascii:
	case ttype_none:
		while (len--)
			*dest++ = *src++;
		/* strip off ending pad characters */
		while (dest > (const char*)pByte->dsc_address)
			if (*(dest - 1) == pad_char)
				dest--;
			else
				break;
		outlen = (dest - (const char*)pByte->dsc_address);
		break;
	default:
		TextType obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
		outlen = obj.string_to_key(len,
									reinterpret_cast<unsigned char*>(const_cast<char*>(src)),
									pByte->dsc_length,
									reinterpret_cast<unsigned char*>(dest),
									partial);
		break;
	}

	return (outlen);
}


int INTL_str_to_upper(TDBB tdbb, DSC * pString)
{
/**************************************
 *
 *      I N T L _ s t r _ t o _ u p p e r
 *
 **************************************
 *
 * Functional description
 *      Given an input string, convert it to uppercase 
 *
 **************************************/
	USHORT len;
	UCHAR *src, *dest;
	UCHAR buffer[MAX_KEY];
	USHORT ttype;

	SET_TDBB(tdbb);

	assert(pString != NULL);
	assert(pString->dsc_address != NULL);

	len =
		CVT_get_string_ptr(pString, &ttype, &src,
						   reinterpret_cast < vary * >(buffer),
						   sizeof(buffer), ERR_post);
	switch (ttype) {
	case ttype_binary:
		/* cannot uppercase binary strings */
		break;

	case ttype_none:
	case ttype_ascii:
	case ttype_unicode_fss:
		dest = src;
		while (len--) {
			*dest++ = UPPER7(*src);
			src++;
		}
		break;

	default:
		TextType obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
		obj.str_to_upper(len, src, len, src);
		break;
	}
/* 
 * Added to remove compiler errors. Callers are not checking
 * the return code from this function 4/5/95. 
*/
	return (0);
}


UCHAR INTL_upper(TDBB tdbb, USHORT ttype, UCHAR ch)
{
/**************************************
 *
 *      I N T L _ u p p e r
 *
 **************************************
 *
 * Functional description
 *      Given an input character, convert it to uppercase 
 *
 **************************************/
	SET_TDBB(tdbb);


	switch (ttype) {
	case ttype_binary:
		/* cannot uppercase binary strings */
		return (ch);

	case ttype_none:
	case ttype_ascii:
	case ttype_unicode_fss:
		return (UPPER7(ch));

	default:
		TextType obj = INTL_texttype_lookup(tdbb, ttype, ERR_post, NULL);
		return obj.to_upper(ch);
	}

}


static bool all_spaces(
						  TDBB tdbb,
						  CHARSET_ID charset,
						  const BYTE* ptr, USHORT len, USHORT offset)
{
/**************************************
 *
 *      a l l _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      determine if the string at ptr[offset] ... ptr[len] is entirely
 *      spaces, as per the space definition of (charset).
 *      The binary representation of a Space is character-set dependent.
 *      (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for 
 *      0x??20, which is NOT a space.
 **************************************/
	SET_TDBB(tdbb);

	assert(ptr != NULL);

	CharSet obj = INTL_charset_lookup(tdbb, charset, NULL);

	assert(obj != NULL);

/*
 * We are assuming offset points to the first byte which was not
 * consumed in a conversion.  And that offset is pointing
 * to a character boundary
 */

// Single-octet character sets are optimized here

	if (obj.getSpaceLength() == 1) {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		while (p < end) {
			if (*p++ != *obj.getSpace())
				return false;
		}
		return true;
	}
	else {
		const BYTE* p = &ptr[offset];
		const BYTE* const end = &ptr[len];
		const unsigned char* space = obj.getSpace();
		const unsigned char* const end_space = &space[obj.getSpaceLength()];
		while (p < end) {
			space = obj.getSpace();
			while (p < end && space < end_space) {
				if (*p++ != *space++)
					return false;
			}
		}
		return true;
	}
}

static USHORT nc_to_wc(CSCONVERT obj, UCS2_CHAR * pWide, USHORT nWide,	/* byte count */
					   UCHAR * pNarrow, USHORT nNarrow,	/* byte count */
					   SSHORT * err_code, USHORT * err_position)
{
/**************************************
 *
 *      n c _ t o _ w c
 *
 **************************************
 *
 * Functional description
 *   Copies narrow chars buffer into wide chars buffer for charset NONE
 *
 **************************************/
	UCS2_CHAR *pStart;
	UCHAR *pNarrowStart;

	assert(obj != NULL);
	assert((pNarrow != NULL) || (pWide == NULL));
	assert(err_code != NULL);
	assert(err_position != NULL);

	*err_code = 0;
	if (pWide == NULL)
		return (sizeof(UCS2_CHAR) * nNarrow);	/* all cases */
	pStart = pWide;
	pNarrowStart = pNarrow;
	while (nWide-- > 1 && nNarrow) {
		/* YYY - Byte order issues here */
		*pWide++ = (UCS2_CHAR) * pNarrow++;
		nWide--;
		nNarrow--;
	}
	if (!*err_code && nNarrow) {
		*err_code = CS_TRUNCATION_ERROR;
	}
	*err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow);

	return ((pWide - pStart) * sizeof(*pWide));
}

static USHORT wc_to_wc(CSCONVERT obj, WCHAR * pDest, USHORT nDest,	/* byte count */
					   WCHAR * pSrc, USHORT nSrc,	/* byte count */
					   SSHORT * err_code, USHORT * err_position)
{
/**************************************
 *
 *      w c _ t o _ w c 
 *
 **************************************
 *
 * Functional description
 *
 *************************************/
	WCHAR *pStart;
	WCHAR *pStart_src;

	assert(obj != NULL);
	assert((pSrc != NULL) || (pDest == NULL));
	assert(err_code != NULL);
	assert(err_position != NULL);

	*err_code = 0;
	if (pDest == NULL)			/* length estimate needed? */
		return (nSrc);

	pStart = pDest;
	pStart_src = pSrc;
	while (nDest > 1 && nSrc > 1) {
		*pDest++ = *pSrc++;
		nDest -= 2;
		nSrc -= 2;
	}
	if (!*err_code && nSrc) {
		*err_code = CS_TRUNCATION_ERROR;
	}
	*err_position = (pSrc - pStart_src) * sizeof(*pSrc);

	return ((pDest - pStart) * sizeof(*pDest));
}

static void pad_spaces(TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len)
{								/* byte count */
/**************************************
 *
 *      p a d  _ s p a c e s
 *
 **************************************
 *
 * Functional description
 *      Pad a buffer with the character set defined space character.
 *      
 **************************************/
	BYTE *end;
	const unsigned char *space, *end_space;

	SET_TDBB(tdbb);

	assert(ptr != NULL);

	CharSet obj = INTL_charset_lookup(tdbb, charset, NULL);

	assert(obj != NULL);

/* Single-octet character sets are optimized here */
	if (obj.getSpaceLength() == 1) {
		end = &ptr[len];
		while (ptr < end)
			*ptr++ = *obj.getSpace();
	}
	else {
		end = &ptr[len];
		space = obj.getSpace();
		end_space = &space[obj.getSpaceLength()];
		while (ptr < end) {
			space = obj.getSpace();
			while (ptr < end && space < end_space) {
				*ptr++ = *space++;
			}
			/* This assert is checking that we didn't have a buffer-end
			 * in the middle of a space character
			 */
			assert(!(ptr == end) || (space == end_space));
		}
	}
}