mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-27 20:03:03 +01:00
2393 lines
59 KiB
C++
2393 lines
59 KiB
C++
/************* history ************
|
|
*
|
|
* COMPONENT: JRD MODULE: INTL.C
|
|
* generated by Marion V2.5 2/6/90
|
|
* from dev db on 4-JAN-1995
|
|
*****************************************************************
|
|
*
|
|
* PR 2002-06-02 Added ugly c hack in
|
|
* intl_back_compat_alloc_func_lookup.
|
|
* When someone has time we need to change the references to
|
|
* return (void*) function to something more C++ like
|
|
*
|
|
* 42 4711 3 11 17 tamlin 2001
|
|
* Added silly numbers before my name, and converted it to C++.
|
|
*
|
|
* 18850 daves 4-JAN-1995
|
|
* Fix gds__alloc usage
|
|
*
|
|
* 18837 deej 31-DEC-1994
|
|
* fixing up HARBOR_MERGE
|
|
*
|
|
* 18821 deej 27-DEC-1994
|
|
* HARBOR MERGE
|
|
*
|
|
* 18789 jdavid 19-DEC-1994
|
|
* Cast some functions
|
|
*
|
|
* 17508 jdavid 15-JUL-1994
|
|
* Bring it up to date
|
|
*
|
|
* 17500 daves 13-JUL-1994
|
|
* Bug 6645: Different calculation of partial keys
|
|
*
|
|
* 17202 katz 24-MAY-1994
|
|
* PC_PLATFORM requires the .dll extension
|
|
*
|
|
* 17191 katz 23-MAY-1994
|
|
* OS/2 requires the .dll extension
|
|
*
|
|
* 17180 katz 23-MAY-1994
|
|
* Define location of DLL on OS/2
|
|
*
|
|
* 17149 katz 20-MAY-1994
|
|
* In JRD, gds_arg_number arguments are SLONG's not int's
|
|
*
|
|
* 16633 daves 19-APR-1994
|
|
* Bug 6202: International licensing uses INTERNATIONAL product code
|
|
*
|
|
* 16555 katz 17-APR-1994
|
|
* The last argument of calls to ERR_post should be 0
|
|
*
|
|
* 16521 katz 14-APR-1994
|
|
* Borland C needs a decorated symbol to lookup
|
|
*
|
|
* 16403 daves 8-APR-1994
|
|
* Bug 6441: Emit an error whenever transliteration from ttype_binary attempted
|
|
*
|
|
* 16141 katz 28-MAR-1994
|
|
* Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE
|
|
*
|
|
* The contents of this file are subject to the Interbase Public
|
|
* License Version 1.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy
|
|
* of the License at http://www.Inprise.com/IPL.html
|
|
*
|
|
* Software distributed under the License is distributed on an
|
|
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
|
|
* or implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code was created by Inprise Corporation
|
|
* and its predecessors. Portions created by Inprise Corporation are
|
|
* Copyright (C) Inprise Corporation.
|
|
*
|
|
* All Rights Reserved.
|
|
* Contributor(s): ______________________________________.
|
|
*
|
|
* 2002.10.29 Sean Leyne - Removed obsolete "Netware" port
|
|
*
|
|
* 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define
|
|
*
|
|
*/
|
|
|
|
|
|
/*
|
|
* PROGRAM: JRD Intl
|
|
* MODULE: intl.c
|
|
* DESCRIPTION: International text support routines
|
|
*
|
|
* copyright (c) 1992, 1993 by Borland International
|
|
*/
|
|
|
|
#include "firebird.h"
|
|
#include <string.h>
|
|
#include "../jrd/ib_stdio.h"
|
|
#include "../jrd/jrd.h"
|
|
#include "../jrd/req.h"
|
|
#include "../jrd/val.h"
|
|
#include "gen/codes.h"
|
|
#include "../jrd/intl.h"
|
|
#include "../jrd/intl_classes.h"
|
|
#include "../jrd/ods.h"
|
|
#include "../jrd/btr.h"
|
|
#include "../intl/charsets.h"
|
|
#include "../intl/country_codes.h"
|
|
#include "../jrd/gdsassert.h"
|
|
#include "../jrd/license.h"
|
|
#ifdef INTL_BUILTIN
|
|
#include "../intl/ld_proto.h"
|
|
#endif
|
|
#include "../jrd/all_proto.h"
|
|
#include "../jrd/cvt_proto.h"
|
|
#include "../jrd/err_proto.h"
|
|
#include "../jrd/evl_proto.h"
|
|
#include "../jrd/flu_proto.h"
|
|
#include "../jrd/fun_proto.h"
|
|
#include "../jrd/gds_proto.h"
|
|
#include "../jrd/iberr_proto.h"
|
|
#include "../jrd/intl_proto.h"
|
|
#include "../jrd/isc_proto.h"
|
|
#include "../jrd/thd_proto.h"
|
|
|
|
#include "../jrd/plugin_manager.h"
|
|
|
|
#ifdef DEV_BUILD
|
|
|
|
#define isprintable(x) ((((unsigned char)(x)) & 0x7F) >= ' ')
|
|
|
|
#ifdef DEBUG_INTL
|
|
#define INTL_TRACE(args) gds__log args
|
|
#else
|
|
#define INTL_TRACE(args)
|
|
#endif
|
|
|
|
#else
|
|
#define INTL_TRACE(args)
|
|
#endif
|
|
/* 11 Sept 2002, Nickolay Samofatov. It is used only in asserts,
|
|
move it out DEV_BUILD section and let optimizer optimize it out */
|
|
#define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\
|
|
((x)->dsc_dtype == dtype_varying)||\
|
|
((x)->dsc_dtype == dtype_cstring))
|
|
|
|
#define TTYPE_TO_CHARSET(tt) ((SSHORT)((tt) & 0x00FF))
|
|
#define TTYPE_TO_COLLATION(tt) ((SSHORT)((tt) >> 8))
|
|
|
|
|
|
typedef unsigned char FILECHAR;
|
|
|
|
// extern declarations for the allocator functions for builtin charsets
|
|
extern CharSetAllocFunc INTL_charset_alloc_func(short);
|
|
extern TextTypeAllocFunc INTL_texttype_alloc_func(short);
|
|
extern CsConvertAllocFunc INTL_csconvert_alloc_func(short, short);
|
|
|
|
static BOOLEAN all_spaces(TDBB, CHARSET_ID, BYTE *, USHORT, USHORT);
|
|
static void dump_hex(UCHAR *, USHORT);
|
|
static void dump_latin(UCHAR *, USHORT);
|
|
static void finish_texttype_init(TextType*, FPTR_VOID, STATUS *);
|
|
static SSHORT internal_str_to_upper(TextType*, USHORT, UCHAR *, USHORT,
|
|
UCHAR *);
|
|
static USHORT internal_string_to_key(TextType*, USHORT, UCHAR *, USHORT,
|
|
UCHAR *, USHORT);
|
|
static USHORT mb_to_wc(CsConvert*, UCS2_CHAR *, USHORT, MBCHAR *, USHORT, SSHORT *,
|
|
USHORT *);
|
|
static USHORT nc_to_wc(CsConvert*, UCS2_CHAR *, USHORT, UCHAR *, USHORT, SSHORT *,
|
|
USHORT *);
|
|
static void pad_spaces(TDBB, CHARSET_ID, BYTE *, USHORT);
|
|
static USHORT wc_to_mb(CsConvert*, MBCHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
|
|
USHORT *);
|
|
static USHORT wc_to_nc(CsConvert*, NCHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
|
|
USHORT *);
|
|
static USHORT wc_to_wc(CsConvert*, UCS2_CHAR *, USHORT, UCS2_CHAR *, USHORT, SSHORT *,
|
|
USHORT *);
|
|
|
|
static CharSetContainer *internal_charset_container_lookup(TDBB, SSHORT, STATUS *);
|
|
static void* search_out_alloc_func(const char *, CHARSET_ID, CHARSET_ID);
|
|
static void* intl_back_compat_alloc_func_lookup(USHORT, CHARSET_ID, CHARSET_ID);
|
|
static void* intl_back_compat_obj_init_lookup(USHORT, SSHORT, SSHORT);
|
|
|
|
|
|
/* Name of module that implements text-type (n) */
|
|
|
|
#ifdef VMS
|
|
/* Note: MUST be only the file name. The VMS lib$find_shared_image
|
|
* call insists on file name only, not any "path" components.
|
|
*/
|
|
#define INTL_MODULE1 "FBINTL"
|
|
#define INTL_MODULE2 "FBINTL2"
|
|
#endif
|
|
|
|
#ifdef WIN_NT
|
|
/* prefixed with $INTERBASE */
|
|
#define INTL_MODULE1 "fbintl.dll"
|
|
#define INTL_MODULE2 "fbintl2.dll"
|
|
#endif
|
|
|
|
#ifndef INTL_MODULE1
|
|
/* prefixed with $INTERBASE */
|
|
#define INTL_MODULE1 "fbintl"
|
|
#define INTL_MODULE2 "fbintl2"
|
|
#endif
|
|
|
|
#define INTL_LOOKUP_ENTRY1 "LD_lookup"
|
|
#define INTL_LOOKUP_ENTRY2 "LD2_lookup"
|
|
#define INTL_USER_ENTRY "USER_TEXTTYPE_%03d"
|
|
|
|
|
|
// Classes and structures used internally to this file and intl implementation
|
|
class CharSetContainer
|
|
{
|
|
public:
|
|
CharSetContainer(MemoryPool &p, CharSet *cs = 0) :
|
|
charset_converters(p),
|
|
charset_collations(p),
|
|
impossible_conversions(p),
|
|
cs(cs)
|
|
{}
|
|
|
|
CharSet *getCharSet() { return cs; }
|
|
|
|
void setCollation(TextType *cs, short id)
|
|
{
|
|
if (id >= charset_collations.size())
|
|
charset_collations.resize(id + 10);
|
|
charset_collations[id] = cs;
|
|
}
|
|
|
|
TextType *collation(short id)
|
|
{
|
|
if (id >= charset_collations.size())
|
|
return NULL;
|
|
return charset_collations[id];
|
|
}
|
|
|
|
bool findConverter(CHARSET_ID id, CsConvert **cvt)
|
|
{
|
|
*cvt = NULL;
|
|
for(Firebird::vector<CsConvert*>::iterator itr1 = charset_converters.begin();
|
|
itr1 != charset_converters.end(); ++itr1)
|
|
if ((*itr1)->getToCS() == id)
|
|
{
|
|
*cvt = *itr1;
|
|
return true;
|
|
}
|
|
|
|
for(Firebird::vector<CHARSET_ID>::iterator itr2 = impossible_conversions.begin();
|
|
itr2 != impossible_conversions.end(); ++itr2)
|
|
if (*itr2 == id)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
void addConverter(CsConvert *conv)
|
|
{
|
|
charset_converters.push_back(conv);
|
|
}
|
|
|
|
void addNullConverter(CHARSET_ID nullId)
|
|
{
|
|
impossible_conversions.push_back(nullId);
|
|
}
|
|
|
|
private:
|
|
Firebird::vector<CsConvert*> charset_converters;
|
|
Firebird::vector<TextType*> charset_collations;
|
|
Firebird::vector<CHARSET_ID> impossible_conversions;
|
|
CharSet *cs;
|
|
};
|
|
|
|
CHARSET_ID DLL_EXPORT INTL_charset(TDBB tdbb, USHORT ttype, FPTR_VOID err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c h a r s e t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Return the character set ID for a piece of text.
|
|
*
|
|
**************************************/
|
|
|
|
switch (ttype)
|
|
{
|
|
case ttype_none:
|
|
return (CS_NONE);
|
|
case ttype_ascii:
|
|
return (CS_ASCII);
|
|
case ttype_unicode_fss:
|
|
return (CS_UNICODE_FSS);
|
|
case ttype_binary:
|
|
return (CS_BINARY);
|
|
case ttype_dynamic:
|
|
SET_TDBB(tdbb);
|
|
return (tdbb->tdbb_attachment->att_charset);
|
|
default:
|
|
return (TTYPE_TO_CHARSET(ttype));
|
|
}
|
|
}
|
|
|
|
|
|
int DLL_EXPORT INTL_compare(
|
|
TDBB tdbb,
|
|
DSC * pText1, DSC * pText2, FPTR_VOID err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o m p a r e
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Compare two pieces of international text.
|
|
*
|
|
**************************************/
|
|
UCHAR *p1, *p2;
|
|
USHORT length1, length2;
|
|
UCHAR buffer[MAX_KEY];
|
|
SSHORT compare_type;
|
|
TextType* obj;
|
|
USHORT t1, t2;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(pText1 != NULL);
|
|
assert(pText2 != NULL);
|
|
assert(IS_TEXT(pText1) && IS_TEXT(pText2));
|
|
assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2));
|
|
assert(err);
|
|
|
|
/* normal compare routine from CVT_compare */
|
|
/* trailing spaces in strings are ignored for comparision */
|
|
|
|
length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err);
|
|
length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err);
|
|
|
|
/* YYY - by SQL II compare_type must be explicit in the
|
|
SQL statement if there is any doubt */
|
|
|
|
compare_type = MAX(t1, t2); /* YYY */
|
|
|
|
if (t1 != t2) {
|
|
CHARSET_ID cs1, cs2;
|
|
cs1 = INTL_charset(tdbb, t1, err);
|
|
cs2 = INTL_charset(tdbb, t2, err);
|
|
if (cs1 != cs2) {
|
|
if (compare_type != t2) {
|
|
/* convert pText2 to pText1's type, if possible */
|
|
/* YYY - should failure to convert really return
|
|
an error here?
|
|
Support joining a 437 & Latin1 Column, and we
|
|
pick the compare_type as 437, still only want the
|
|
equal values....
|
|
But then, what about < operations, which make no
|
|
sense if the string cannot be expressed...
|
|
*/
|
|
|
|
length2 = INTL_convert_bytes(tdbb, cs1,
|
|
buffer, sizeof(buffer),
|
|
cs2, p2, length2, err);
|
|
p2 = buffer;
|
|
}
|
|
else {
|
|
/* convert pText1 to pText2's type, if possible */
|
|
|
|
length1 = INTL_convert_bytes(tdbb, cs2,
|
|
buffer, sizeof(buffer),
|
|
cs1, p1, length1, err);
|
|
p1 = buffer;
|
|
}
|
|
}
|
|
}
|
|
|
|
obj = INTL_texttype_lookup(tdbb, compare_type, err, NULL);
|
|
|
|
return obj->compare(length1, p1, length2, p2);
|
|
}
|
|
|
|
|
|
USHORT DLL_EXPORT INTL_convert_bytes(
|
|
TDBB tdbb,
|
|
CHARSET_ID dest_type,
|
|
BYTE * dest_ptr,
|
|
USHORT dest_len,
|
|
CHARSET_ID src_type, BYTE * src_ptr, USHORT src_len, FPTR_VOID err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ b y t e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given a string of bytes in one character set, convert it to another
|
|
* character set.
|
|
*
|
|
* If (dest_ptr) is NULL, return the count of bytes needed to convert
|
|
* the string. This does not guarantee the string can be converted,
|
|
* the purpose of this is to allocate a large enough buffer.
|
|
*
|
|
* RETURNS:
|
|
* Length of resulting string, in bytes.
|
|
* calls (err) if conversion error occurs.
|
|
*
|
|
**************************************/
|
|
UCHAR *start_dest_ptr;
|
|
USHORT len;
|
|
USHORT len2;
|
|
CsConvert* cs_obj;
|
|
CharSet* from_cs, *to_cs;
|
|
SSHORT err_code = 0;
|
|
USHORT err_position;
|
|
BYTE *tmp_buffer;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
|
|
assert(src_ptr != NULL);
|
|
assert(src_type != dest_type);
|
|
assert(err != NULL);
|
|
|
|
start_dest_ptr = dest_ptr;
|
|
|
|
if ((dest_type == CS_BINARY) || (dest_type == CS_NONE)) {
|
|
|
|
/* See if we just need a length estimate */
|
|
if (dest_ptr == NULL)
|
|
return (src_len);
|
|
|
|
len = MIN(dest_len, src_len);
|
|
if (len)
|
|
do
|
|
*dest_ptr++ = *src_ptr++;
|
|
while (--len);
|
|
|
|
/* See if only space characters are remaining */
|
|
len = src_len - MIN(dest_len, src_len);
|
|
if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0))
|
|
return (dest_ptr - start_dest_ptr);
|
|
else
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0);
|
|
}
|
|
else if (src_len == 0)
|
|
return (0);
|
|
else if (src_type == CS_BINARY)
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_transliteration_failed,
|
|
0);
|
|
else
|
|
/* character sets are known to be different */
|
|
{
|
|
/* Do we know an object from cs1 to cs2? */
|
|
|
|
cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type);
|
|
if (cs_obj != NULL) {
|
|
len = cs_obj->convert(dest_ptr, dest_len, src_ptr,
|
|
src_len, &err_code, &err_position);
|
|
if (!err_code || ((err_code == CS_TRUNCATION_ERROR)
|
|
&& all_spaces(tdbb, src_type, src_ptr, src_len,
|
|
err_position))) return (len);
|
|
else if (err_code == CS_TRUNCATION_ERROR)
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
0);
|
|
else
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_transliteration_failed,
|
|
0);
|
|
|
|
}
|
|
|
|
/* Find a CS1 to UNICODE object */
|
|
|
|
from_cs = INTL_charset_lookup(tdbb, src_type, NULL);
|
|
if (from_cs == NULL)
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_text_subtype,
|
|
gds_arg_number,
|
|
(SLONG) src_type, 0);
|
|
|
|
/*
|
|
** allocate a temporary buffer that is large enough.
|
|
*/
|
|
tmp_buffer = (BYTE *) FB_NEW(*getDefaultMemoryPool()) char[(SLONG) src_len * sizeof(UCS2_CHAR)];
|
|
|
|
cs_obj = from_cs->getConvToUnicode();
|
|
assert(cs_obj != NULL);
|
|
len = cs_obj->convert(tmp_buffer, src_len * 2, src_ptr,
|
|
src_len, &err_code, &err_position);
|
|
if (err_code && !((err_code == CS_TRUNCATION_ERROR)
|
|
&& all_spaces(tdbb, src_type, src_ptr, src_len,
|
|
err_position))) {
|
|
delete [] tmp_buffer;
|
|
if (err_code == CS_TRUNCATION_ERROR)
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
0);
|
|
else
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_transliteration_failed,
|
|
0);
|
|
}
|
|
|
|
/* Find a UNICODE to CS2 object */
|
|
|
|
to_cs = INTL_charset_lookup(tdbb, dest_type, NULL);
|
|
if (to_cs == NULL) {
|
|
delete [] tmp_buffer;
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_text_subtype,
|
|
gds_arg_number,
|
|
(SLONG) dest_type, 0);
|
|
}
|
|
cs_obj = to_cs->getConvFromUnicode();
|
|
assert(cs_obj != NULL);
|
|
len2 = cs_obj->convert(dest_ptr, dest_len, tmp_buffer,
|
|
len, &err_code, &err_position);
|
|
|
|
if (err_code &&
|
|
!((err_code == CS_TRUNCATION_ERROR) &&
|
|
all_spaces(tdbb, CS_UNICODE_UCS2, tmp_buffer, len, err_position))) {
|
|
delete [] tmp_buffer;
|
|
if (err_code == CS_TRUNCATION_ERROR)
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
0);
|
|
else
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except,
|
|
gds_arg_gds,
|
|
gds_transliteration_failed,
|
|
0);
|
|
}
|
|
|
|
delete [] tmp_buffer;
|
|
return (len2);
|
|
}
|
|
return (0); /* to remove compiler errors. This should never be executed */
|
|
}
|
|
|
|
|
|
CsConvert* DLL_EXPORT INTL_convert_lookup(TDBB tdbb,
|
|
CHARSET_ID to_cs, CHARSET_ID from_cs)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
|
|
CharSetContainer *charset;
|
|
CsConvert* converter;
|
|
DBB dbb;
|
|
|
|
SET_TDBB(tdbb);
|
|
dbb = tdbb->tdbb_database;
|
|
CHECK_DBB(dbb);
|
|
|
|
if (from_cs == CS_dynamic)
|
|
from_cs = tdbb->tdbb_attachment->att_charset;
|
|
|
|
if (to_cs == CS_dynamic)
|
|
to_cs = tdbb->tdbb_attachment->att_charset;
|
|
|
|
/* Should from_cs == to_cs? be handled better? YYY */
|
|
|
|
assert(from_cs != CS_dynamic);
|
|
assert(to_cs != CS_dynamic);
|
|
|
|
charset = internal_charset_container_lookup(tdbb, from_cs, NULL);
|
|
if (charset == NULL)
|
|
return (NULL);
|
|
|
|
if (charset->findConverter(to_cs, &converter))
|
|
return converter;
|
|
|
|
if (to_cs == CS_UNICODE_UCS2) {
|
|
converter = charset->getCharSet()->getConvToUnicode();
|
|
}
|
|
else if (from_cs == CS_UNICODE_UCS2) {
|
|
CharSet* charset2;
|
|
charset2 = INTL_charset_lookup(tdbb, to_cs, NULL);
|
|
if (charset2 == NULL)
|
|
return (NULL);
|
|
converter = charset2->getConvFromUnicode();
|
|
}
|
|
else {
|
|
CsConvertAllocFunc allocFunc;
|
|
|
|
allocFunc = INTL_csconvert_alloc_func(from_cs, to_cs);
|
|
if (!allocFunc)
|
|
allocFunc = (CsConvertAllocFunc)
|
|
search_out_alloc_func("FB_CsConvert_lookup", from_cs, to_cs);
|
|
if (!allocFunc)
|
|
allocFunc = (CsConvertAllocFunc)
|
|
// intl_back_compat_alloc_func_lookup(type_csconvert, from_cs, to_cs);
|
|
intl_back_compat_alloc_func_lookup(type_csconvert, to_cs, from_cs);
|
|
if (!allocFunc)
|
|
{
|
|
charset->addNullConverter(to_cs);
|
|
return NULL;
|
|
}
|
|
|
|
converter = (*allocFunc)(*dbb->dbb_permanent, from_cs, to_cs);
|
|
if (!converter)
|
|
{
|
|
charset->addNullConverter(to_cs);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
charset->addConverter(converter);
|
|
|
|
assert(converter->getFromCS() == from_cs);
|
|
assert(converter->getToCS() == to_cs);
|
|
|
|
return (converter);
|
|
}
|
|
|
|
|
|
int DLL_EXPORT INTL_convert_string(DSC * to, DSC * from, FPTR_VOID err)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c o n v e r t _ s t r i n g
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert a string from one type to another
|
|
*
|
|
* RETURNS:
|
|
* 0 if no error in conversion
|
|
* non-zero otherwise.
|
|
*
|
|
**************************************/
|
|
UCHAR *p, *q;
|
|
UCHAR *from_ptr;
|
|
CHARSET_ID to_cs, from_cs;
|
|
USHORT from_type;
|
|
TDBB tdbb;
|
|
USHORT from_len, from_fill;
|
|
USHORT to_size, to_len, to_fill;
|
|
|
|
/* Note: This function is called from outside the engine as
|
|
well as inside - we likely can't get rid of GET_THREAD_DATA here */
|
|
tdbb = GET_THREAD_DATA;
|
|
if (tdbb == NULL) /* are we in the Engine? */
|
|
return (1); /* no, then can't access intl gah */
|
|
|
|
assert(to != NULL);
|
|
assert(from != NULL);
|
|
assert(IS_TEXT(to) && IS_TEXT(from));
|
|
|
|
from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err);
|
|
to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err);
|
|
|
|
p = to->dsc_address;
|
|
|
|
/* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */
|
|
|
|
from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err);
|
|
|
|
to_size = to_len = TEXT_LEN(to);
|
|
|
|
q = from_ptr;
|
|
switch (to->dsc_dtype) {
|
|
case dtype_text:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {
|
|
|
|
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
|
|
from_cs, from_ptr, from_len, err);
|
|
to_fill = to_size - to_len;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
p += to_len;
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
|
|
to_len = MIN(from_len, to_size);
|
|
from_fill = from_len - to_len;
|
|
to_fill = to_size - to_len;
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
}
|
|
|
|
if (to_fill > 0)
|
|
pad_spaces(tdbb, to_cs, p, to_fill);
|
|
break;
|
|
|
|
case dtype_cstring:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {
|
|
to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size,
|
|
from_cs, from_ptr, from_len, err);
|
|
to->dsc_address[to_len] = 0;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
|
|
to_len = MIN(from_len, to_size);
|
|
from_fill = from_len - to_len;
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
*p = 0;
|
|
}
|
|
break;
|
|
|
|
case dtype_varying:
|
|
if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) {
|
|
|
|
to_len =
|
|
INTL_convert_bytes(tdbb, to_cs,
|
|
reinterpret_cast<UCHAR*>(((VARY *) to->dsc_address)->vary_string),
|
|
to_size, from_cs, from_ptr, from_len, err);
|
|
((VARY *) to->dsc_address)->vary_length = to_len;
|
|
from_fill = 0; /* Convert_bytes handles source truncation */
|
|
}
|
|
else {
|
|
/* binary string can always be converted TO by byte-copy */
|
|
to_len = MIN(from_len, to_size);
|
|
from_fill = from_len - to_len;
|
|
((VARY *) p)->vary_length = to_len;
|
|
p = reinterpret_cast<UCHAR*>(((VARY *) p)->vary_string);
|
|
if (to_len)
|
|
do
|
|
*p++ = *q++;
|
|
while (--to_len);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (from_fill)
|
|
/* Make sure remaining characters on From string are spaces */
|
|
if (!all_spaces(tdbb, from_cs, q, from_fill, 0))
|
|
reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
#ifdef DEV_BUILD
|
|
int DLL_EXPORT INTL_data(DSC * pText)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d a t a
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input text descriptor,
|
|
* return TRUE if the data pointed to respresents
|
|
* international text (subject to user defined or non-binary
|
|
* collation or comparison).
|
|
*
|
|
**************************************/
|
|
|
|
assert(pText != NULL);
|
|
|
|
if (!IS_TEXT(pText))
|
|
return FALSE;
|
|
|
|
if (!INTERNAL_TTYPE(pText))
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
#endif
|
|
|
|
#ifdef DEV_BUILD
|
|
int DLL_EXPORT INTL_data_or_binary(DSC * pText)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d a t a _ o r _ b i n a r y
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
|
|
return (INTL_data(pText) || (pText->dsc_ttype == ttype_binary));
|
|
}
|
|
#else
|
|
// 11 Sent 2002, Nickolay Samofatov
|
|
// Used only in asserts, but let optimizer wipe it out
|
|
int DLL_EXPORT INTL_data_or_binary(DSC * pText)
|
|
{
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
|
|
int DLL_EXPORT INTL_defined_type(TDBB tdbb, STATUS * status, SSHORT t_type)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ d e f i n e d _ t y p e
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Is (t_type) a known text type?
|
|
* Return:
|
|
* FALSE type is not defined.
|
|
* TRUE type is defined
|
|
* status set to gds_status codes to describe any error.
|
|
*
|
|
* Note:
|
|
* Due to cleanup that must happen in DFW, this routine
|
|
* must return, and not call ERR directly.
|
|
*
|
|
**************************************/
|
|
TextType* obj;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
if (status)
|
|
status[0] = gds_arg_end;
|
|
obj = INTL_texttype_lookup(tdbb, t_type, NULL, status);
|
|
if (obj == NULL)
|
|
return FALSE;
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
UCS2_CHAR DLL_EXPORT INTL_getch(TDBB tdbb,
|
|
TextType* * obj,
|
|
SSHORT t_type, UCHAR ** ptr, USHORT * count)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ g e t c h
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Get next character from a buffer.
|
|
*
|
|
**************************************/
|
|
SSHORT used;
|
|
UCS2_CHAR wc;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(obj);
|
|
assert(ptr);
|
|
|
|
if (*obj == NULL) {
|
|
*obj = INTL_texttype_lookup(tdbb, t_type, (FPTR_VOID) ERR_post, NULL);
|
|
assert(*obj);
|
|
}
|
|
used = (*obj)->mbtowc(&wc, *ptr, *count);
|
|
if (used == -1)
|
|
return 0;
|
|
*ptr += used;
|
|
*count -= used;
|
|
return wc;
|
|
}
|
|
|
|
|
|
void DLL_EXPORT INTL_init(TDBB tdbb)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
/*DBB dbb;
|
|
VEC vector;
|
|
|
|
SET_TDBB(tdbb);
|
|
dbb = tdbb->tdbb_database;
|
|
CHECK_DBB(dbb);
|
|
|
|
if (!(vector = dbb->dbb_text_objects)) {
|
|
vector = dbb->dbb_text_objects = vec::newVector(*dbb->dbb_permanent, 25);
|
|
}
|
|
if (!(vector = dbb->dbb_charsets)) {
|
|
vector = dbb->dbb_charsets = vec::newVector(*dbb->dbb_permanent, 25);
|
|
}*/
|
|
}
|
|
|
|
|
|
USHORT DLL_EXPORT INTL_key_length(TDBB tdbb, USHORT idxType, USHORT iLength)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ k e y _ l e n g t h
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an index type, and a maximum length (iLength)
|
|
* return the length of the byte string key descriptor to
|
|
* use when collating text of this type.
|
|
*
|
|
**************************************/
|
|
USHORT key_length;
|
|
TextType* obj;
|
|
SSHORT ttype;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
|
|
assert(idxType >= idx_first_intl_string);
|
|
|
|
ttype = INTL_INDEX_TO_TEXT(idxType);
|
|
|
|
if (ttype >= 0 && ttype <= ttype_last_internal)
|
|
key_length = iLength;
|
|
else {
|
|
obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
|
|
key_length = obj->key_length(iLength);
|
|
}
|
|
|
|
/* Validity checks on the computed key_length */
|
|
|
|
if (key_length > MAX_KEY)
|
|
key_length = MAX_KEY;
|
|
|
|
if (key_length < iLength)
|
|
key_length = iLength;
|
|
|
|
return (key_length);
|
|
}
|
|
|
|
static CharSetContainer *internal_charset_container_lookup(TDBB tdbb, SSHORT parm1, STATUS * status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c h a r s e t _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup a character set descriptor.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to allocate it, or return
|
|
* null if we don't know about the charset.
|
|
*
|
|
* Returns:
|
|
* *charset - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
DBB dbb;
|
|
CharSetContainer *cs = 0;
|
|
USHORT id;
|
|
|
|
SET_TDBB(tdbb);
|
|
dbb = tdbb->tdbb_database;
|
|
|
|
id = TTYPE_TO_CHARSET(parm1);
|
|
if (id == CS_dynamic)
|
|
id = tdbb->tdbb_attachment->att_charset;
|
|
|
|
if (id >= dbb->dbb_charsets.size())
|
|
dbb->dbb_charsets.resize(id + 10);
|
|
else
|
|
cs = dbb->dbb_charsets[id];
|
|
|
|
// allocate a new character set object if we couldn't find one.
|
|
if (!cs)
|
|
{
|
|
CharSet *newCs = NULL;
|
|
CharSetAllocFunc allocFunc;
|
|
|
|
allocFunc = INTL_charset_alloc_func(id);
|
|
if (!allocFunc)
|
|
allocFunc = (CharSetAllocFunc)search_out_alloc_func("FB_CharSet_lookup",id,0);
|
|
if (!allocFunc)
|
|
allocFunc = (CharSetAllocFunc)
|
|
intl_back_compat_alloc_func_lookup(type_charset,id,0);
|
|
if (!allocFunc)
|
|
return NULL;
|
|
|
|
newCs = (*allocFunc)(*dbb->dbb_permanent, id, 0);
|
|
if (!newCs)
|
|
return NULL;
|
|
|
|
cs = FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, newCs);
|
|
if (!cs)
|
|
{
|
|
delete newCs;
|
|
return NULL;
|
|
}
|
|
|
|
dbb->dbb_charsets[id] = cs;
|
|
}
|
|
|
|
assert(cs != NULL);
|
|
return cs;
|
|
}
|
|
|
|
CharSet *DLL_EXPORT INTL_charset_lookup(TDBB tdbb, SSHORT parm1, STATUS * status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ c h a r s e t _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup a character set descriptor.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to allocate it, or return
|
|
* null if we don't know about the charset.
|
|
*
|
|
* Returns:
|
|
* *charset - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
CharSetContainer *cs;
|
|
|
|
cs = internal_charset_container_lookup(tdbb, parm1, status);
|
|
if (!cs)
|
|
return NULL;
|
|
return cs->getCharSet();
|
|
}
|
|
|
|
|
|
TextType *DLL_EXPORT INTL_texttype_lookup(
|
|
TDBB tdbb,
|
|
SSHORT parm1, FPTR_VOID err, STATUS * status)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ t e x t t y p e _ l o o k u p
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
* Lookup either a character set descriptor or
|
|
* texttype descriptor object.
|
|
*
|
|
* First, search the appropriate vector that hangs
|
|
* off the dbb. If not found, then call the lower
|
|
* level lookup routine to find it in the libraries.
|
|
*
|
|
* Returns:
|
|
* *object - if no errors;
|
|
* <never> - if error & err non NULL
|
|
* NULL - if error & err NULL
|
|
*
|
|
**************************************/
|
|
DBB dbb;
|
|
TextType *cs_object;
|
|
CharSetContainer *csc;
|
|
USHORT id;
|
|
|
|
SET_TDBB(tdbb);
|
|
dbb = tdbb->tdbb_database;
|
|
|
|
if (parm1 == ttype_dynamic)
|
|
parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset);
|
|
|
|
csc = internal_charset_container_lookup(tdbb, parm1, status);
|
|
if (!csc)
|
|
return NULL;
|
|
id = TTYPE_TO_COLLATION(parm1);
|
|
|
|
cs_object = csc->collation(id);
|
|
|
|
// allocate a new TextType object if needed
|
|
if (!cs_object)
|
|
{
|
|
TextTypeAllocFunc allocFunc;
|
|
|
|
allocFunc = INTL_texttype_alloc_func(parm1);
|
|
if (!allocFunc)
|
|
allocFunc = (TextTypeAllocFunc)search_out_alloc_func("FB_texttype_lookup",parm1,0);
|
|
if (!allocFunc)
|
|
allocFunc = (TextTypeAllocFunc)
|
|
intl_back_compat_alloc_func_lookup(type_texttype,parm1,0);
|
|
if (!allocFunc)
|
|
return NULL;
|
|
|
|
cs_object = (*allocFunc)(*dbb->dbb_permanent,parm1,0);
|
|
if (!cs_object)
|
|
return NULL;
|
|
|
|
csc->setCollation(cs_object, id);
|
|
}
|
|
|
|
assert(cs_object != NULL);
|
|
return (cs_object);
|
|
}
|
|
|
|
void DLL_EXPORT INTL_pad_spaces(TDBB tdbb, DSC * type, UCHAR * string, USHORT length)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ p a d _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Pad a buffer with spaces, using the character
|
|
* set's defined space character.
|
|
*
|
|
**************************************/
|
|
USHORT charset;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(type != NULL);
|
|
assert(IS_TEXT(type));
|
|
assert(string != NULL);
|
|
|
|
charset = INTL_charset(tdbb, type->dsc_ttype, NULL);
|
|
pad_spaces(tdbb, charset, string, length);
|
|
}
|
|
|
|
|
|
USHORT DLL_EXPORT INTL_string_to_key(
|
|
TDBB tdbb,
|
|
USHORT idxType,
|
|
DSC * pString,
|
|
DSC * pByte, USHORT partial)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ s t r i n g _ t o _ k e y
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input string, convert it to a byte string
|
|
* that will collate naturally (byte order).
|
|
*
|
|
* Return the length of the resulting byte string.
|
|
*
|
|
**************************************/
|
|
USHORT outlen;
|
|
UCHAR buffer[MAX_KEY];
|
|
UCHAR pad_char;
|
|
TextType* obj;
|
|
SSHORT ttype;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
|
|
assert(idxType >= idx_first_intl_string || idxType == idx_string
|
|
|| idxType == idx_byte_array || idxType == idx_metadata);
|
|
assert(pString != NULL);
|
|
assert(pByte != NULL);
|
|
assert(pString->dsc_address != NULL);
|
|
assert(pByte->dsc_address != NULL);
|
|
assert(pByte->dsc_dtype == dtype_text);
|
|
|
|
switch (idxType) {
|
|
case idx_string:
|
|
pad_char = ' ';
|
|
ttype = ttype_none;
|
|
break;
|
|
case idx_byte_array:
|
|
pad_char = 0;
|
|
ttype = ttype_binary;
|
|
break;
|
|
case idx_metadata:
|
|
pad_char = ' ';
|
|
ttype = ttype_metadata;
|
|
break;
|
|
default:
|
|
pad_char = 0;
|
|
ttype = INTL_INDEX_TO_TEXT(idxType);
|
|
break;
|
|
}
|
|
|
|
/* Make a string into the proper type of text */
|
|
|
|
const char* src;
|
|
USHORT len =
|
|
CVT_make_string(pString, ttype, &src,
|
|
reinterpret_cast<vary*>(buffer), sizeof(buffer),
|
|
(FPTR_VOID) ERR_post);
|
|
|
|
char* dest = reinterpret_cast<char*>(pByte->dsc_address);
|
|
switch (ttype) {
|
|
case ttype_metadata:
|
|
case ttype_binary:
|
|
case ttype_ascii:
|
|
case ttype_none:
|
|
while (len--)
|
|
*dest++ = *src++;
|
|
/* strip off ending pad characters */
|
|
while (dest > (const char*)pByte->dsc_address)
|
|
if (*(dest - 1) == pad_char)
|
|
dest--;
|
|
else
|
|
break;
|
|
outlen = (dest - (const char*)pByte->dsc_address);
|
|
break;
|
|
default:
|
|
obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
|
|
outlen = obj->string_to_key(len,
|
|
reinterpret_cast<unsigned char*>(const_cast<char*>(src)),
|
|
pByte->dsc_length,
|
|
reinterpret_cast<unsigned char*>(dest),
|
|
partial);
|
|
break;
|
|
}
|
|
|
|
return (outlen);
|
|
}
|
|
|
|
|
|
int DLL_EXPORT INTL_str_to_upper(TDBB tdbb, DSC * pString)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ s t r _ t o _ u p p e r
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input string, convert it to uppercase
|
|
*
|
|
**************************************/
|
|
USHORT len;
|
|
UCHAR *src, *dest;
|
|
UCHAR buffer[MAX_KEY];
|
|
USHORT ttype;
|
|
TextType* obj;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(pString != NULL);
|
|
assert(pString->dsc_address != NULL);
|
|
|
|
len =
|
|
CVT_get_string_ptr(pString, &ttype, &src,
|
|
reinterpret_cast < vary * >(buffer),
|
|
sizeof(buffer), (FPTR_VOID) ERR_post);
|
|
switch (ttype) {
|
|
case ttype_binary:
|
|
/* cannot uppercase binary strings */
|
|
break;
|
|
|
|
case ttype_none:
|
|
case ttype_ascii:
|
|
case ttype_unicode_fss:
|
|
dest = src;
|
|
while (len--) {
|
|
*dest++ = UPPER7(*src);
|
|
src++;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
|
|
obj->str_to_upper(len, src, len, src);
|
|
break;
|
|
}
|
|
/*
|
|
* Added to remove compiler errors. Callers are not checking
|
|
* the return code from this function 4/5/95.
|
|
*/
|
|
return (0);
|
|
}
|
|
|
|
|
|
UCHAR DLL_EXPORT INTL_upper(TDBB tdbb, USHORT ttype, UCHAR ch)
|
|
{
|
|
/**************************************
|
|
*
|
|
* I N T L _ u p p e r
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Given an input character, convert it to uppercase
|
|
*
|
|
**************************************/
|
|
TextType* obj;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
|
|
switch (ttype) {
|
|
case ttype_binary:
|
|
/* cannot uppercase binary strings */
|
|
return (ch);
|
|
|
|
case ttype_none:
|
|
case ttype_ascii:
|
|
case ttype_unicode_fss:
|
|
return (UPPER7(ch));
|
|
|
|
default:
|
|
obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL);
|
|
return obj->to_upper(ch);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
static BOOLEAN all_spaces(
|
|
TDBB tdbb,
|
|
CHARSET_ID charset,
|
|
BYTE * ptr, USHORT len, USHORT offset)
|
|
{
|
|
/**************************************
|
|
*
|
|
* a l l _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* determine if the string at ptr[offset] ... ptr[len] is entirely
|
|
* spaces, as per the space definition of (charset).
|
|
* The binary representation of a Space is character-set dependent.
|
|
* (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for
|
|
* 0x??20, which is NOT a space.
|
|
**************************************/
|
|
CharSet* obj;
|
|
BYTE *p;
|
|
BYTE *end;
|
|
const unsigned char *space, *end_space;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(ptr != NULL);
|
|
|
|
obj = INTL_charset_lookup(tdbb, charset, NULL);
|
|
|
|
assert(obj != NULL);
|
|
|
|
/*
|
|
* We are assuming offset points to the first byte which was not
|
|
* consumed in a conversion. And that offset is pointing
|
|
* to a character boundary
|
|
*/
|
|
|
|
/* Single-octet character sets are optimized here */
|
|
|
|
if (obj->getSpaceLength() == 1) {
|
|
p = &ptr[offset];
|
|
end = &ptr[len];
|
|
while (p < end) {
|
|
if (*p++ != *obj->getSpace())
|
|
return (FALSE);
|
|
}
|
|
return (TRUE);
|
|
}
|
|
else {
|
|
p = &ptr[offset];
|
|
end = &ptr[len];
|
|
space = obj->getSpace();
|
|
end_space = &space[obj->getSpaceLength()];
|
|
while (p < end) {
|
|
space = obj->getSpace();
|
|
while (p < end && space < end_space) {
|
|
if (*p++ != *space++)
|
|
return (FALSE);
|
|
}
|
|
}
|
|
return (TRUE);
|
|
}
|
|
}
|
|
|
|
static USHORT internal_keylength(TextType* obj, USHORT iLength)
|
|
{
|
|
/**************************************
|
|
*
|
|
* i n t e r n a l _ k e y l e n g t h
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
|
|
return (iLength);
|
|
}
|
|
|
|
static USHORT nc_to_wc(CsConvert* obj, UCS2_CHAR * pWide, USHORT nWide, /* byte count */
|
|
UCHAR * pNarrow, USHORT nNarrow, /* byte count */
|
|
SSHORT * err_code, USHORT * err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* n c _ t o _ w c
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Copies narrow chars buffer into wide chars buffer for charset NONE
|
|
*
|
|
**************************************/
|
|
UCS2_CHAR *pStart;
|
|
UCHAR *pNarrowStart;
|
|
|
|
assert(obj != NULL);
|
|
assert((pNarrow != NULL) || (pWide == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pWide == NULL)
|
|
return (sizeof(UCS2_CHAR) * nNarrow); /* all cases */
|
|
pStart = pWide;
|
|
pNarrowStart = pNarrow;
|
|
while (nWide-- > 1 && nNarrow) {
|
|
/* YYY - Byte order issues here */
|
|
*pWide++ = (UCS2_CHAR) * pNarrow++;
|
|
nWide--;
|
|
nNarrow--;
|
|
}
|
|
if (!*err_code && nNarrow) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow);
|
|
|
|
return ((pWide - pStart) * sizeof(*pWide));
|
|
}
|
|
|
|
|
|
static void pad_spaces(TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len)
|
|
{ /* byte count */
|
|
/**************************************
|
|
*
|
|
* p a d _ s p a c e s
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Pad a buffer with the character set defined space character.
|
|
*
|
|
**************************************/
|
|
CharSet* obj;
|
|
BYTE *end;
|
|
const unsigned char *space, *end_space;
|
|
|
|
SET_TDBB(tdbb);
|
|
|
|
assert(ptr != NULL);
|
|
|
|
obj = INTL_charset_lookup(tdbb, charset, NULL);
|
|
|
|
assert(obj != NULL);
|
|
|
|
/* Single-octet character sets are optimized here */
|
|
if (obj->getSpaceLength() == 1) {
|
|
end = &ptr[len];
|
|
while (ptr < end)
|
|
*ptr++ = *obj->getSpace();
|
|
}
|
|
else {
|
|
end = &ptr[len];
|
|
space = obj->getSpace();
|
|
end_space = &space[obj->getSpaceLength()];
|
|
while (ptr < end) {
|
|
space = obj->getSpace();
|
|
while (ptr < end && space < end_space) {
|
|
*ptr++ = *space++;
|
|
}
|
|
/* This assert is checking that we didn't have a buffer-end
|
|
* in the middle of a space character
|
|
*/
|
|
assert(!(ptr == end) || (space == end_space));
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef DEV_BUILD
|
|
|
|
/*
|
|
* Utility routines designed to be called from the debugger to
|
|
* print buffers, pointers, etc. which may contain text that
|
|
* the debugger doesn't consider visible.
|
|
*/
|
|
static void dump_hex(UCHAR * p, USHORT len)
|
|
{
|
|
/**************************************
|
|
*
|
|
* d u m p _ h e x
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
while (len--)
|
|
ib_printf("%02X ", *p++);
|
|
ib_printf("\n");
|
|
}
|
|
|
|
|
|
static void dump_latin(UCHAR * p, USHORT len)
|
|
{
|
|
/**************************************
|
|
*
|
|
* d u m p _ l a t i n
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
while (len--)
|
|
if (isprintable(*p))
|
|
ib_printf("%c", *p++);
|
|
else
|
|
ib_printf("\0x%02X", *p++);
|
|
ib_printf("\n");
|
|
}
|
|
#endif
|
|
|
|
unsigned short TextTypeNC::to_wc(UCS2_CHAR *pWideUC,
|
|
unsigned short nWide,
|
|
unsigned char *pNarrow,
|
|
unsigned short nNarrow,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
/**************************************
|
|
*
|
|
* TextTypeNC::to_wc
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
{
|
|
UCS2_CHAR *pStart, *pWide = pWideUC;
|
|
UCHAR *pNarrowStart;
|
|
|
|
assert((pNarrow != NULL) || (pWide == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pWide == NULL)
|
|
return (sizeof(UCS2_CHAR) * nNarrow); /* all cases */
|
|
pStart = pWide;
|
|
pNarrowStart = pNarrow;
|
|
while (nWide-- > 1 && nNarrow) {
|
|
/* YYY - Byte order issues here */
|
|
*pWide++ = (UCS2_CHAR) * pNarrow++;
|
|
nWide--;
|
|
nNarrow--;
|
|
}
|
|
if (!*err_code && nNarrow) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow);
|
|
|
|
return ((pWide - pStart) * sizeof(*pWide));
|
|
}
|
|
|
|
unsigned short TextTypeNC::contains(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
return EVL_nc_contains(a,this,b,c,d,e);
|
|
}
|
|
|
|
unsigned short TextTypeNC::like(TDBB a, unsigned char *b,
|
|
short c,
|
|
unsigned char *d,
|
|
short e,
|
|
short f)
|
|
{
|
|
return EVL_nc_like(a,this,b,c,d,e,f);
|
|
}
|
|
|
|
unsigned short TextTypeNC::matches(TDBB a, unsigned char *b, short c,
|
|
unsigned char *d, short e)
|
|
{
|
|
return EVL_nc_matches(a,this,b,c,d,e);
|
|
}
|
|
|
|
unsigned short TextTypeNC::sleuth_check(TDBB a, unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
unsigned char *e,
|
|
unsigned short f)
|
|
{
|
|
return EVL_nc_sleuth_check(a,this,b,c,d,e,f);
|
|
}
|
|
|
|
unsigned short TextTypeNC::sleuth_merge(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e,
|
|
unsigned char *f,
|
|
unsigned short g)
|
|
{
|
|
return EVL_nc_sleuth_merge(a,this,b,c,d,e,f,g);
|
|
}
|
|
|
|
unsigned short TextTypeNC::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
|
|
/**************************************
|
|
*
|
|
* i n t e r n a l _ n c _ m b t o w c
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Get the next character from the multibyte
|
|
* input stream.
|
|
* Narrow character version.
|
|
* Returns:
|
|
* Count of bytes consumed from the input stream.
|
|
*
|
|
**************************************/
|
|
{
|
|
assert(ptr);
|
|
|
|
if (count >= 1) {
|
|
if (wc)
|
|
*wc = *ptr;
|
|
return 1;
|
|
}
|
|
if (wc)
|
|
*wc = 0;
|
|
return (unsigned short)-1; /* No more characters */
|
|
}
|
|
|
|
unsigned short TextTypeMB::contains(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
return EVL_mb_contains(a,this,b,c,d,e);
|
|
}
|
|
|
|
unsigned short TextTypeMB::like(TDBB a, unsigned char *b,
|
|
short c,
|
|
unsigned char *d,
|
|
short e,
|
|
short f)
|
|
{
|
|
return EVL_mb_like(a,this,b,c,d,e,f);
|
|
}
|
|
|
|
unsigned short TextTypeMB::matches(TDBB a, unsigned char *b, short c,
|
|
unsigned char *d, short e)
|
|
{
|
|
return EVL_mb_matches(a,this,b,c,d,e);
|
|
}
|
|
|
|
unsigned short TextTypeMB::sleuth_check(TDBB a, unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
unsigned char *e,
|
|
unsigned short f)
|
|
{
|
|
return EVL_mb_sleuth_check(a,this,b,c,d,e,f);
|
|
}
|
|
|
|
unsigned short TextTypeMB::sleuth_merge(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e,
|
|
unsigned char *f,
|
|
unsigned short g)
|
|
{
|
|
return EVL_mb_sleuth_merge(a,this,b,c,d,e,f,g);
|
|
}
|
|
|
|
unsigned short TextTypeMB::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
|
|
{
|
|
/**************************************
|
|
*
|
|
* TextTypeMB::mbtowc
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Get the next character from the multibyte
|
|
* input stream.
|
|
* Multibyte version character version.
|
|
* Returns:
|
|
* Count of bytes consumed from the input stream.
|
|
*
|
|
**************************************/
|
|
|
|
assert(ptr);
|
|
|
|
if (count >= 2) {
|
|
if (wc)
|
|
*wc = *(UCS2_CHAR *) ptr;
|
|
return 2;
|
|
}
|
|
if (wc)
|
|
*wc = 0;
|
|
return (unsigned short)-1; /* No more characters */
|
|
}
|
|
|
|
unsigned short TextTypeWC::to_wc(UCS2_CHAR *pDestUC,
|
|
unsigned short nDest,
|
|
unsigned char *pSrcUC,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* TextTypeWC::to_wc
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
UCS2_CHAR *pStart, *pDest = pDestUC;
|
|
UCS2_CHAR *pStart_src, *pSrc = (UCS2_CHAR*)pSrcUC;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (nSrc);
|
|
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest > 1 && nSrc > 1) {
|
|
*pDest++ = *pSrc++;
|
|
nDest -= 2;
|
|
nSrc -= 2;
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
unsigned short TextTypeWC::contains(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
return EVL_wc_contains(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e);
|
|
}
|
|
|
|
unsigned short TextTypeWC::like(TDBB a, unsigned char *b,
|
|
short c,
|
|
unsigned char *d,
|
|
short e,
|
|
short f)
|
|
{
|
|
return EVL_wc_like(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e,f);
|
|
}
|
|
|
|
unsigned short TextTypeWC::matches(TDBB a, unsigned char *b, short c,
|
|
unsigned char *d, short e)
|
|
{
|
|
return EVL_wc_matches(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e);
|
|
}
|
|
|
|
unsigned short TextTypeWC::sleuth_check(TDBB a, unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
unsigned char *e,
|
|
unsigned short f)
|
|
{
|
|
return EVL_wc_sleuth_check(a,this,b,(UCS2_CHAR*)c,d,(UCS2_CHAR*)e,f);
|
|
}
|
|
|
|
unsigned short TextTypeWC::sleuth_merge(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e,
|
|
unsigned char *f,
|
|
unsigned short g)
|
|
{
|
|
return EVL_wc_sleuth_merge(a,this,(UCS2_CHAR*)b,c,(UCS2_CHAR*)d,e,(UCS2_CHAR*)f,g);
|
|
}
|
|
|
|
unsigned short TextTypeWC::mbtowc(UCS2_CHAR *wc, unsigned char *ptr, unsigned short count)
|
|
{
|
|
/**************************************
|
|
*
|
|
* TextTypeWC::mbtowc
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Get the next character from the multibyte
|
|
* input stream.
|
|
* Wide character version.
|
|
* Returns:
|
|
* Count of bytes consumed from the input stream.
|
|
*
|
|
**************************************/
|
|
|
|
assert(ptr);
|
|
|
|
if (count >= 2) {
|
|
if (wc)
|
|
*wc = *(UCS2_CHAR *) ptr;
|
|
return 2;
|
|
}
|
|
if (wc)
|
|
*wc = 0;
|
|
return (unsigned short)-1; /* No more characters */
|
|
}
|
|
|
|
|
|
//===============================================================================
|
|
//===============================================================================
|
|
//===============================================================================
|
|
// Code to handle loading international charset plugins
|
|
// in the new c++ OO format.
|
|
|
|
static const char *INTL_PLUGIN_DIR = "intl";
|
|
static PluginManager intlPlugins;
|
|
static bool loaded = false;
|
|
|
|
static void* search_out_alloc_func(const char *sym, CHARSET_ID p1, CHARSET_ID p2)
|
|
{
|
|
typedef void* (*lookupFuncType)(CHARSET_ID,CHARSET_ID);
|
|
|
|
void* result = 0;
|
|
lookupFuncType lookupFunc;
|
|
Firebird::string entryPoint(sym);
|
|
|
|
if (!loaded)
|
|
{
|
|
intlPlugins.addSearchPath(INTL_PLUGIN_DIR);
|
|
intlPlugins.addIgnoreModule(INTL_MODULE1);
|
|
intlPlugins.addIgnoreModule(INTL_MODULE2);
|
|
intlPlugins.loadAllPlugins();
|
|
loaded = true;
|
|
}
|
|
|
|
for(PluginManager::iterator itr = intlPlugins.begin();
|
|
result == 0 && itr != intlPlugins.end(); ++itr)
|
|
{
|
|
lookupFunc = (lookupFuncType)(*itr).lookupSymbol(entryPoint);
|
|
if (!lookupFunc)
|
|
continue;
|
|
result = (*lookupFunc)(p1,p2);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
//===============================================================================
|
|
//===============================================================================
|
|
//===============================================================================
|
|
// This code handles backwards compatibility with the old internation
|
|
// character set plugin format.
|
|
|
|
// We need all the structure definitions from the old interface
|
|
#define INTL_ENGINE_INTERNAL
|
|
#include "../jrd/intlobj.h"
|
|
|
|
// storage for the loadable modules
|
|
static PluginManager intlBCPlugins;
|
|
static bool bcLoaded = false;
|
|
|
|
class CsConvert_BC : public CsConvert
|
|
{
|
|
public:
|
|
CsConvert_BC(struct csconvert *csv, bool deleteMemory) :
|
|
CsConvert(
|
|
csv->csconvert_id,
|
|
(const char*)csv->csconvert_name,
|
|
csv->csconvert_from,
|
|
csv->csconvert_to),
|
|
cnvt(csv),
|
|
deleteOnDestruct(deleteMemory)
|
|
{}
|
|
virtual ~CsConvert_BC() { if (deleteOnDestruct) delete cnvt; }
|
|
|
|
unsigned short convert(unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{
|
|
assert(cnvt != NULL);
|
|
return (*(reinterpret_cast<USHORT (*)(struct csconvert*, UCHAR*,USHORT,
|
|
UCHAR*,USHORT,short*,USHORT*)>(cnvt->csconvert_convert)))
|
|
(cnvt,a,b,c,d,e,f);
|
|
}
|
|
|
|
private:
|
|
struct csconvert *cnvt;
|
|
bool deleteOnDestruct;
|
|
};
|
|
|
|
class CharSet_BC : public CharSet
|
|
{
|
|
public:
|
|
CharSet_BC(MemoryPool &p, struct charset *csStruct) :
|
|
CharSet(
|
|
csStruct->charset_id,
|
|
(const char*)csStruct->charset_name,
|
|
csStruct->charset_min_bytes_per_char,
|
|
csStruct->charset_max_bytes_per_char,
|
|
csStruct->charset_space_length,
|
|
(char*)csStruct->charset_space_character),
|
|
cs(csStruct)
|
|
{
|
|
charset_to_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_to_unicode, false);
|
|
charset_from_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_from_unicode, false);
|
|
}
|
|
|
|
~CharSet_BC() { delete cs; }
|
|
private:
|
|
struct charset *cs;
|
|
};
|
|
|
|
template <class T>
|
|
class TextType_BC : public T
|
|
{
|
|
public:
|
|
TextType_BC(struct texttype *textt) :
|
|
T(
|
|
textt->texttype_type,
|
|
(char*)textt->texttype_name,
|
|
textt->texttype_character_set,
|
|
textt->texttype_country,
|
|
textt->texttype_bytes_per_char),
|
|
tt(textt)
|
|
{}
|
|
|
|
unsigned short key_length(unsigned short a)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_key_length);
|
|
return (*(reinterpret_cast<USHORT (*)(TEXTTYPE,USHORT)>
|
|
(tt->texttype_fn_key_length)))(tt,a);
|
|
}
|
|
|
|
unsigned short string_to_key(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_string_to_key);
|
|
return (*(reinterpret_cast
|
|
<USHORT(*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
|
|
(tt->texttype_fn_string_to_key)))
|
|
(tt,a,b,c,d,e);
|
|
}
|
|
|
|
short compare(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_compare);
|
|
return (*(reinterpret_cast
|
|
<short (*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*)>
|
|
(tt->texttype_fn_compare)))(tt,a,b,c,d);
|
|
}
|
|
|
|
unsigned short to_upper(unsigned short a)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_to_upper);
|
|
return (*(reinterpret_cast
|
|
<short (*)(TEXTTYPE,USHORT)>
|
|
(tt->texttype_fn_to_upper)))(tt,a);
|
|
}
|
|
|
|
unsigned short to_lower(unsigned short a)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_to_lower);
|
|
return (*(reinterpret_cast
|
|
<USHORT (*)(TEXTTYPE,USHORT)>
|
|
(tt->texttype_fn_to_lower)))(tt,a);
|
|
}
|
|
|
|
short str_to_upper(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_str_to_upper);
|
|
return (*(reinterpret_cast
|
|
<short (*)(TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*)>
|
|
(tt->texttype_fn_str_to_upper)))
|
|
(tt,a,b,c,d);
|
|
}
|
|
|
|
unsigned short to_wc(UCS2_CHAR *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{
|
|
assert(tt);
|
|
assert(tt->texttype_fn_to_wc);
|
|
return (*(reinterpret_cast
|
|
<USHORT (*)(TEXTTYPE,UCS2_CHAR*,USHORT,UCHAR*,USHORT,short*,USHORT*)>
|
|
(tt->texttype_fn_to_wc)))
|
|
(tt,a,b,c,d,e,f);
|
|
}
|
|
|
|
unsigned short mbtowc(UCS2_CHAR *a, unsigned char *b, unsigned short c)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_mbtowc)
|
|
return T::mbtowc(a,b,c);
|
|
return (*(reinterpret_cast<
|
|
USHORT (*)(TEXTTYPE, UCS2_CHAR*, UCHAR*, USHORT)>
|
|
(tt->texttype_fn_mbtowc)))(tt,a,b,c);
|
|
}
|
|
|
|
unsigned short contains(TDBB a, unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_contains)
|
|
return T::contains(a,b,c,d,e);
|
|
return (*(reinterpret_cast<
|
|
USHORT (*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT)>
|
|
(tt->texttype_fn_contains)))
|
|
(a,tt,b,c,d,e);
|
|
}
|
|
|
|
unsigned short like(TDBB tdbb, unsigned char *a,
|
|
short b,
|
|
unsigned char *c,
|
|
short d,
|
|
short e)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_like)
|
|
return T::like(tdbb,a,b,c,d,e);
|
|
else
|
|
return (*(reinterpret_cast<
|
|
USHORT(*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short,short)>
|
|
(tt->texttype_fn_like)))(tdbb,tt,a,b,c,d,e);
|
|
}
|
|
|
|
unsigned short matches(TDBB tdbb, unsigned char *a, short b,
|
|
unsigned char *c, short d)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_matches)
|
|
return T::matches(tdbb,a,b,c,d);
|
|
return (*(reinterpret_cast<
|
|
USHORT (*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short)>
|
|
(tt->texttype_fn_matches)))
|
|
(tdbb,tt,a,b,c,d);
|
|
}
|
|
|
|
unsigned short sleuth_check(TDBB tdbb, unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_sleuth_check)
|
|
return T::sleuth_check(tdbb,a,b,c,d,e);
|
|
return (*(reinterpret_cast<
|
|
USHORT(*)(TDBB,TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
|
|
(tt->texttype_fn_sleuth_check)))
|
|
(tdbb,tt,a,b,c,d,e);
|
|
}
|
|
|
|
unsigned short sleuth_merge(TDBB tdbb, unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
unsigned char *e,
|
|
unsigned short f)
|
|
{
|
|
assert(tt);
|
|
if (!tt->texttype_fn_sleuth_merge)
|
|
return T::sleuth_merge(tdbb,a,b,c,d,e,f);
|
|
return (*(reinterpret_cast<
|
|
USHORT(*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)>
|
|
(tt->texttype_fn_sleuth_merge)))
|
|
(tdbb,tt,a,b,c,d,e,f);
|
|
}
|
|
|
|
private:
|
|
struct texttype *tt;
|
|
};
|
|
|
|
static void* intl_back_compat_obj_init_lookup(
|
|
USHORT type,
|
|
SSHORT parm1,
|
|
SSHORT parm2)
|
|
{
|
|
/**************************************
|
|
*
|
|
* intl_back_compat_alloc_func_lookup
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Find the allocator function for the requested international
|
|
* character set using the obsolete c/IB/FB 6.0 interface.
|
|
* Search algorithm is:
|
|
* Look in intllib
|
|
* Look in intllib2
|
|
* Look for a normal UDF entry
|
|
* Abort with an error.
|
|
*
|
|
* Returns:
|
|
* FALSE - no errors
|
|
* TRUE - error occurred, and parameter <err> was NULL;
|
|
* <never> - error occurred, and parameter <err> non-NULL;
|
|
*
|
|
*
|
|
***************************************/
|
|
USHORT (*function)();
|
|
|
|
if (!bcLoaded)
|
|
{
|
|
intlBCPlugins.addSearchPath(INTL_PLUGIN_DIR);
|
|
bcLoaded = true;
|
|
}
|
|
|
|
PluginManager::Plugin intlMod1 = intlBCPlugins.findPlugin(INTL_MODULE1);
|
|
PluginManager::Plugin intlMod2 = intlBCPlugins.findPlugin(INTL_MODULE2);
|
|
|
|
USHORT(*lookup_fn) (USHORT, FPTR_SHORT *, SSHORT, SSHORT);
|
|
|
|
INTL_TRACE(("INTL: looking for obj %d ttype %d\n", objtype, parm1));
|
|
|
|
function = NULL;
|
|
|
|
#ifdef INTL_BUILTIN
|
|
if (LD_lookup(type, &function, parm1, parm2) != 0)
|
|
function = NULL;
|
|
#else
|
|
/* Look for an InterBase supplied object to implement the text type */
|
|
/* The flu.c uses searchpath which expects a file name not a path */
|
|
INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE1, INTL_LOOKUP_ENTRY1));
|
|
Firebird::string tempStr(INTL_LOOKUP_ENTRY1);
|
|
if ( intlMod1 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
|
|
(intlMod1.lookupSymbol(tempStr))) ) {
|
|
INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE1,
|
|
INTL_LOOKUP_ENTRY1));
|
|
if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
|
|
function = NULL;
|
|
}
|
|
else
|
|
{
|
|
return (void*) function;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Still not found, check the set of supplimental international objects */
|
|
#ifdef INTL_BUILTIN
|
|
if (LD2_lookup(type, &function, parm1, parm2) != 0)
|
|
function = NULL
|
|
#else
|
|
/* Look for an InterBase supplied object to implement the text type */
|
|
/* The flu.c uses searchpath which expects a file name not a path */
|
|
INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE2, INTL_LOOKUP_ENTRY2));
|
|
tempStr = INTL_LOOKUP_ENTRY2;
|
|
if ( intlMod2 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short))
|
|
(intlMod2.lookupSymbol(tempStr))) ) {
|
|
INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE2,
|
|
INTL_LOOKUP_ENTRY2));
|
|
if ((*lookup_fn) (type, &function, parm1, parm2) != 0) {
|
|
function = NULL;
|
|
}
|
|
else
|
|
{
|
|
return (void*) function;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Still not found, check if there is a UDF in the database defined the right way */
|
|
FUN function_block;
|
|
USHORT argcount;
|
|
char entry[48];
|
|
|
|
/* EKU: need a replacement for snprintf for systems like SINIX-Z!!! */
|
|
switch (type) {
|
|
case type_texttype:
|
|
#ifdef HAVE_SNPRINTF
|
|
snprintf(entry, sizeof(entry), INTL_USER_ENTRY, parm1);
|
|
#else
|
|
sprintf(entry, INTL_USER_ENTRY, parm1);
|
|
#endif
|
|
argcount = 2;
|
|
break;
|
|
case type_charset:
|
|
#ifdef HAVE_SNPRINTF
|
|
snprintf(entry, sizeof(entry), "USER_CHARSET_%03d", parm1);
|
|
#else
|
|
sprintf(entry, "USER_CHARSET_%03d", parm1);
|
|
#endif
|
|
argcount = 2;
|
|
break;
|
|
case type_csconvert:
|
|
#ifdef HAVE_SNPRINTF
|
|
snprintf(entry, sizeof(entry), "USER_TRANSLATE_%03d_%03d", parm1,
|
|
parm2);
|
|
#else
|
|
sprintf(entry, "USER_TRANSLATE_%03d_%03d", parm1,
|
|
parm2);
|
|
#endif
|
|
argcount = 3;
|
|
break;
|
|
default:
|
|
BUGCHECK(1);
|
|
break;
|
|
}
|
|
INTL_TRACE(("INTL: trying user fn %s\n", entry));
|
|
if ( (function_block = FUN_lookup_function((TEXT*)entry)) ) {
|
|
INTL_TRACE(("INTL: found a user fn, validating\n"));
|
|
if ((function_block->fun_count == argcount) &&
|
|
(function_block->fun_args == argcount) &&
|
|
(function_block->fun_return_arg == 0) &&
|
|
(function_block->fun_entrypoint != NULL) &&
|
|
(function_block->fun_rpt[0].fun_mechanism == FUN_value) &&
|
|
(function_block->fun_rpt[0].fun_desc.dsc_dtype == dtype_short)
|
|
&& (function_block->fun_rpt[1].fun_desc.dsc_dtype ==
|
|
dtype_short)
|
|
&& (function_block->fun_rpt[argcount - 1].
|
|
fun_desc.dsc_dtype == dtype_short)
|
|
&& (function_block->fun_rpt[argcount].fun_mechanism ==
|
|
FUN_reference)
|
|
&& (function_block->fun_rpt[argcount].fun_desc.dsc_dtype ==
|
|
dtype_text))
|
|
{
|
|
function = (FPTR_SHORT) function_block->fun_entrypoint;
|
|
return (void*) function;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static CharSet *BC_CharSetAllocFunc(MemoryPool &p, SSHORT cs_id, SSHORT unused)
|
|
{
|
|
typedef USHORT (*CSInitFunc)(CHARSET, SSHORT, SSHORT);
|
|
CSInitFunc csInitFunc;
|
|
|
|
csInitFunc = (CSInitFunc) intl_back_compat_obj_init_lookup(type_charset, cs_id, unused);
|
|
assert(csInitFunc != 0);
|
|
CHARSET cs = FB_NEW(p) charset;
|
|
memset(cs, 0, sizeof(charset));
|
|
|
|
if (0 != (*csInitFunc)(cs, cs_id, unused))
|
|
{
|
|
delete cs;
|
|
return 0;
|
|
}
|
|
|
|
CharSet *result = 0;
|
|
try
|
|
{
|
|
result = FB_NEW(p) CharSet_BC(p, cs);
|
|
}
|
|
catch(std::exception&)
|
|
{
|
|
delete cs;
|
|
throw;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static CsConvert *BC_CsConvertAllocFunc(MemoryPool &p, SSHORT from_id, SSHORT to_id)
|
|
{
|
|
typedef USHORT (*CVTInitFunc)(CSCONVERT, SSHORT, SSHORT);
|
|
CVTInitFunc cvtInitFunc;
|
|
|
|
//cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, from_id, to_id);
|
|
cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, to_id, from_id);
|
|
assert(cvtInitFunc != 0);
|
|
CSCONVERT cvt = FB_NEW(p) csconvert;
|
|
memset(cvt, 0, sizeof(csconvert));
|
|
|
|
//if (0 != (*cvtInitFunc)(cvt, from_id, to_id))
|
|
if (0 != (*cvtInitFunc)(cvt, to_id, from_id))
|
|
{
|
|
delete cvt;
|
|
return 0;
|
|
}
|
|
|
|
CsConvert *result = 0;
|
|
try
|
|
{
|
|
result = FB_NEW(p) CsConvert_BC(cvt, true);
|
|
}
|
|
catch(std::exception&)
|
|
{
|
|
delete cvt;
|
|
throw;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static TextType *BC_TextTypeAllocFunc(MemoryPool &p, SSHORT tt_id, SSHORT unused)
|
|
{
|
|
typedef USHORT (*TTInitFunc)(TEXTTYPE, SSHORT, SSHORT);
|
|
TTInitFunc ttInitFunc;
|
|
|
|
ttInitFunc = (TTInitFunc) intl_back_compat_obj_init_lookup(type_texttype, tt_id, unused);
|
|
assert(ttInitFunc != 0);
|
|
TEXTTYPE tt = FB_NEW(p) texttype;
|
|
memset(tt, 0, sizeof(texttype));
|
|
|
|
if (0 != (*ttInitFunc)(tt, tt_id, unused))
|
|
{
|
|
delete tt;
|
|
return 0;
|
|
}
|
|
|
|
TextType *result = 0;
|
|
try
|
|
{
|
|
if (tt->texttype_bytes_per_char == 1 && tt->texttype_fn_to_wc == NULL)
|
|
result = FB_NEW(p) TextType_BC<TextTypeNC>(tt);
|
|
else if (tt->texttype_bytes_per_char == 2 && tt->texttype_fn_to_wc == NULL)
|
|
result = FB_NEW(p) TextType_BC<TextTypeWC>(tt);
|
|
else if (tt->texttype_fn_to_wc != NULL)
|
|
result = FB_NEW(p) TextType_BC<TextTypeMB>(tt);
|
|
else
|
|
BUGCHECK(1);
|
|
}
|
|
catch(std::exception&)
|
|
{
|
|
delete tt;
|
|
throw;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static void* intl_back_compat_alloc_func_lookup(
|
|
USHORT type,
|
|
CHARSET_ID parm1,
|
|
CHARSET_ID parm2)
|
|
{
|
|
if (NULL != intl_back_compat_obj_init_lookup(type,parm1,parm2))
|
|
{
|
|
switch(type)
|
|
{
|
|
case type_charset:
|
|
return (void*)BC_CharSetAllocFunc;
|
|
case type_texttype:
|
|
return (void*)BC_TextTypeAllocFunc;
|
|
case type_csconvert:
|
|
return (void*)BC_CsConvertAllocFunc;
|
|
default:
|
|
BUGCHECK(1);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|