/************* history ************ * * COMPONENT: JRD MODULE: INTL.C * generated by Marion V2.5 2/6/90 * from dev db on 4-JAN-1995 ***************************************************************** * * PR 2002-06-02 Added ugly c hack in * intl_back_compat_alloc_func_lookup. * When someone has time we need to change the references to * return (void*) function to something more C++ like * * 42 4711 3 11 17 tamlin 2001 * Added silly numbers before my name, and converted it to C++. * * 18850 daves 4-JAN-1995 * Fix gds__alloc usage * * 18837 deej 31-DEC-1994 * fixing up HARBOR_MERGE * * 18821 deej 27-DEC-1994 * HARBOR MERGE * * 18789 jdavid 19-DEC-1994 * Cast some functions * * 17508 jdavid 15-JUL-1994 * Bring it up to date * * 17500 daves 13-JUL-1994 * Bug 6645: Different calculation of partial keys * * 17202 katz 24-MAY-1994 * PC_PLATFORM requires the .dll extension * * 17191 katz 23-MAY-1994 * OS/2 requires the .dll extension * * 17180 katz 23-MAY-1994 * Define location of DLL on OS/2 * * 17149 katz 20-MAY-1994 * In JRD, gds_arg_number arguments are SLONG's not int's * * 16633 daves 19-APR-1994 * Bug 6202: International licensing uses INTERNATIONAL product code * * 16555 katz 17-APR-1994 * The last argument of calls to ERR_post should be 0 * * 16521 katz 14-APR-1994 * Borland C needs a decorated symbol to lookup * * 16403 daves 8-APR-1994 * Bug 6441: Emit an error whenever transliteration from ttype_binary attempted * * 16141 katz 28-MAR-1994 * Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE * * The contents of this file are subject to the Interbase Public * License Version 1.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy * of the License at http://www.Inprise.com/IPL.html * * Software distributed under the License is distributed on an * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express * or implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code was created by Inprise Corporation * and its predecessors. Portions created by Inprise Corporation are * Copyright (C) Inprise Corporation. * * All Rights Reserved. * Contributor(s): ______________________________________. * * 2002.10.29 Sean Leyne - Removed obsolete "Netware" port * * 2002.10.30 Sean Leyne - Removed support for obsolete "PC_PLATFORM" define * */ /* * PROGRAM: JRD Intl * MODULE: intl.c * DESCRIPTION: International text support routines * * copyright (c) 1992, 1993 by Borland International */ #if defined(_MSC_VER) && _MSC_VER < 1300 // Any Microsoft compiler before MSVC7 #pragma warning(disable: 4786) #endif #include "firebird.h" #include #include "../jrd/ib_stdio.h" #include "../jrd/jrd.h" #include "../jrd/req.h" #include "../jrd/val.h" #include "gen/codes.h" #include "../jrd/intl.h" #include "../jrd/intl_classes.h" #include "../jrd/ods.h" #include "../jrd/btr.h" #include "../intl/charsets.h" #include "../intl/country_codes.h" #include "../jrd/gdsassert.h" #include "../jrd/license.h" #ifdef INTL_BACKEND #include "../intl/ld_proto.h" #endif #include "../jrd/all_proto.h" #include "../jrd/cvt_proto.h" #include "../jrd/err_proto.h" #include "../jrd/evl_proto.h" #include "../jrd/flu_proto.h" #include "../jrd/fun_proto.h" #include "../jrd/gds_proto.h" #include "../jrd/iberr_proto.h" #include "../jrd/intl_proto.h" #include "../jrd/isc_proto.h" #include "../jrd/thd_proto.h" #include "../jrd/plugin_manager.h" #ifdef DEV_BUILD #define isprintable(x) ((((unsigned char)(x)) & 0x7F) >= ' ') #ifdef DEBUG_INTL #define INTL_TRACE(args) gds__log args #else #define INTL_TRACE(args) #endif #else #define INTL_TRACE(args) #endif /* 11 Sept 2002, Nickolay Samofatov. It is used only in asserts, move it out DEV_BUILD section and let optimizer optimize it out */ #define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\ ((x)->dsc_dtype == dtype_varying)||\ ((x)->dsc_dtype == dtype_cstring)) #define TTYPE_TO_CHARSET(tt) ((SSHORT)((tt) & 0x00FF)) #define TTYPE_TO_COLLATION(tt) ((SSHORT)((tt) >> 8)) typedef unsigned char FILECHAR; typedef USHORT UNICODE; typedef USHORT fss_wchar_t; typedef int fss_size_t; // extern declarations for the allocator functions for builtin charsets extern CharSetAllocFunc INTL_charset_alloc_func(short); extern TextTypeAllocFunc INTL_texttype_alloc_func(short); extern CsConvertAllocFunc INTL_csconvert_alloc_func(short, short); static BOOLEAN all_spaces(TDBB, CHARSET_ID, BYTE *, USHORT, USHORT); static void dump_hex(UCHAR *, USHORT); static void dump_latin(UCHAR *, USHORT); static void finish_texttype_init(TextType*, FPTR_VOID, STATUS *); static SSHORT internal_str_to_upper(TextType*, USHORT, UCHAR *, USHORT, UCHAR *); static USHORT internal_string_to_key(TextType*, USHORT, UCHAR *, USHORT, UCHAR *, USHORT); static USHORT mb_to_wc(CsConvert*, WCHAR *, USHORT, MBCHAR *, USHORT, SSHORT *, USHORT *); static USHORT nc_to_wc(CsConvert*, WCHAR *, USHORT, UCHAR *, USHORT, SSHORT *, USHORT *); static void pad_spaces(TDBB, CHARSET_ID, BYTE *, USHORT); static USHORT wc_to_mb(CsConvert*, MBCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static USHORT wc_to_nc(CsConvert*, NCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static USHORT wc_to_wc(CsConvert*, WCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static CharSetContainer *internal_charset_container_lookup(TDBB, SSHORT, STATUS *); static void* search_out_alloc_func(const char *, CHARSET_ID, CHARSET_ID); static void* intl_back_compat_alloc_func_lookup(USHORT, CHARSET_ID, CHARSET_ID); static void* intl_back_compat_obj_init_lookup(USHORT, SSHORT, SSHORT); /* Name of module that implements text-type (n) */ #ifdef VMS /* Note: MUST be only the file name. The VMS lib$find_shared_image * call insists on file name only, not any "path" components. */ #define INTL_MODULE1 "FBINTL" #define INTL_MODULE2 "FBINTL2" #endif #ifdef WIN_NT /* prefixed with $INTERBASE */ #define INTL_MODULE1 "fbintl.dll" #define INTL_MODULE2 "fbintl2.dll" #endif #ifndef INTL_MODULE1 /* prefixed with $INTERBASE */ #define INTL_MODULE1 "fbintl" #define INTL_MODULE2 "fbintl2" #endif #ifndef __BORLANDC__ #define INTL_LOOKUP_ENTRY1 "LD_lookup" #define INTL_LOOKUP_ENTRY2 "LD2_lookup" #define INTL_USER_ENTRY "USER_TEXTTYPE_%03d" #else #define INTL_LOOKUP_ENTRY1 "_LD_lookup" #define INTL_LOOKUP_ENTRY2 "_LD2_lookup" #define INTL_USER_ENTRY "_USER_TEXTTYPE_%03d" #endif // Classes and structures used internally to this file and intl implementation class CharSetContainer { public: CharSetContainer(MemoryPool &p, CharSet *cs = 0) : charset_converters(p), charset_collations(p), impossible_conversions(p), cs(cs) {} CharSet *getCharSet() { return cs; } void setCollation(TextType *cs, short id) { if (id >= charset_collations.size()) charset_collations.resize(id + 10); charset_collations[id] = cs; } TextType *collation(short id) { if (id >= charset_collations.size()) return NULL; return charset_collations[id]; } bool findConverter(CHARSET_ID id, CsConvert **cvt) { *cvt = NULL; for(Firebird::vector::iterator itr1 = charset_converters.begin(); itr1 != charset_converters.end(); ++itr1) if ((*itr1)->getToCS() == id) { *cvt = *itr1; return true; } for(Firebird::vector::iterator itr2 = impossible_conversions.begin(); itr2 != impossible_conversions.end(); ++itr2) if (*itr2 == id) return true; return false; } void addConverter(CsConvert *conv) { charset_converters.push_back(conv); } void addNullConverter(CHARSET_ID nullId) { impossible_conversions.push_back(nullId); } private: Firebird::vector charset_converters; Firebird::vector charset_collations; Firebird::vector impossible_conversions; CharSet *cs; }; CHARSET_ID DLL_EXPORT INTL_charset(TDBB tdbb, USHORT ttype, FPTR_VOID err) { /************************************** * * I N T L _ c h a r s e t * ************************************** * * Functional description * Return the character set ID for a piece of text. * **************************************/ switch (ttype) { case ttype_none: return (CS_NONE); case ttype_ascii: return (CS_ASCII); case ttype_unicode_fss: return (CS_UNICODE_FSS); case ttype_binary: return (CS_BINARY); case ttype_dynamic: SET_TDBB(tdbb); return (tdbb->tdbb_attachment->att_charset); default: return (TTYPE_TO_CHARSET(ttype)); } } int DLL_EXPORT INTL_compare( TDBB tdbb, DSC * pText1, DSC * pText2, FPTR_VOID err) { /************************************** * * I N T L _ c o m p a r e * ************************************** * * Functional description * Compare two pieces of international text. * **************************************/ UCHAR *p1, *p2; USHORT length1, length2; UCHAR buffer[MAX_KEY]; SSHORT compare_type; TextType* obj; USHORT t1, t2; SET_TDBB(tdbb); assert(pText1 != NULL); assert(pText2 != NULL); assert(IS_TEXT(pText1) && IS_TEXT(pText2)); assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2)); assert(err); /* normal compare routine from CVT_compare */ /* trailing spaces in strings are ignored for comparision */ length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err); length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err); /* YYY - by SQL II compare_type must be explicit in the SQL statement if there is any doubt */ compare_type = MAX(t1, t2); /* YYY */ if (t1 != t2) { CHARSET_ID cs1, cs2; cs1 = INTL_charset(tdbb, t1, err); cs2 = INTL_charset(tdbb, t2, err); if (cs1 != cs2) { if (compare_type != t2) { /* convert pText2 to pText1's type, if possible */ /* YYY - should failure to convert really return an error here? Support joining a 437 & Latin1 Column, and we pick the compare_type as 437, still only want the equal values.... But then, what about < operations, which make no sense if the string cannot be expressed... */ length2 = INTL_convert_bytes(tdbb, cs1, buffer, sizeof(buffer), cs2, p2, length2, err); p2 = buffer; } else { /* convert pText1 to pText2's type, if possible */ length1 = INTL_convert_bytes(tdbb, cs2, buffer, sizeof(buffer), cs1, p1, length1, err); p1 = buffer; } } } obj = INTL_texttype_lookup(tdbb, compare_type, err, NULL); return obj->compare(length1, p1, length2, p2); } USHORT DLL_EXPORT INTL_convert_bytes( TDBB tdbb, CHARSET_ID dest_type, BYTE * dest_ptr, USHORT dest_len, CHARSET_ID src_type, BYTE * src_ptr, USHORT src_len, FPTR_VOID err) { /************************************** * * I N T L _ c o n v e r t _ b y t e s * ************************************** * * Functional description * Given a string of bytes in one character set, convert it to another * character set. * * If (dest_ptr) is NULL, return the count of bytes needed to convert * the string. This does not guarantee the string can be converted, * the purpose of this is to allocate a large enough buffer. * * RETURNS: * Length of resulting string, in bytes. * calls (err) if conversion error occurs. * **************************************/ UCHAR *start_dest_ptr; USHORT len; USHORT len2; CsConvert* cs_obj; CharSet* from_cs, *to_cs; SSHORT err_code = 0; USHORT err_position; BYTE *tmp_buffer; SET_TDBB(tdbb); assert(src_ptr != NULL); assert(src_type != dest_type); assert(err != NULL); start_dest_ptr = dest_ptr; if ((dest_type == CS_BINARY) || (dest_type == CS_NONE)) { /* See if we just need a length estimate */ if (dest_ptr == NULL) return (src_len); len = MIN(dest_len, src_len); if (len) do *dest_ptr++ = *src_ptr++; while (--len); /* See if only space characters are remaining */ len = src_len - MIN(dest_len, src_len); if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0)) return (dest_ptr - start_dest_ptr); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); } else if (src_len == 0) return (0); else if (src_type == CS_BINARY) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); else /* character sets are known to be different */ { /* Do we know an object from cs1 to cs2? */ cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type); if (cs_obj != NULL) { len = cs_obj->convert(dest_ptr, dest_len, src_ptr, src_len, &err_code, &err_position); if (!err_code || ((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, src_type, src_ptr, src_len, err_position))) return (len); else if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } /* Find a CS1 to UNICODE object */ from_cs = INTL_charset_lookup(tdbb, src_type, NULL); if (from_cs == NULL) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, (SLONG) src_type, 0); /* ** allocate a temporary buffer that is large enough. 2 = sizeof WCHAR */ tmp_buffer = (BYTE *) FB_NEW(*getDefaultMemoryPool()) char[(SLONG) src_len * 2]; cs_obj = from_cs->getConvToUnicode(); assert(cs_obj != NULL); len = cs_obj->convert(tmp_buffer, src_len * 2, src_ptr, src_len, &err_code, &err_position); if (err_code && !((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, src_type, src_ptr, src_len, err_position))) { delete [] tmp_buffer; if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } /* Find a UNICODE to CS2 object */ to_cs = INTL_charset_lookup(tdbb, dest_type, NULL); if (to_cs == NULL) { delete [] tmp_buffer; reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, (SLONG) dest_type, 0); } cs_obj = to_cs->getConvFromUnicode(); assert(cs_obj != NULL); len2 = cs_obj->convert(dest_ptr, dest_len, tmp_buffer, len, &err_code, &err_position); if (err_code && !((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, CS_UNICODE101, tmp_buffer, len, err_position))) { delete [] tmp_buffer; if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } delete [] tmp_buffer; return (len2); } return (0); /* to remove compiler errors. This should never be executed */ } CsConvert* DLL_EXPORT INTL_convert_lookup(TDBB tdbb, CHARSET_ID to_cs, CHARSET_ID from_cs) { /************************************** * * I N T L _ c o n v e r t _ l o o k u p * ************************************** * * Functional description * **************************************/ CharSetContainer *charset; CsConvert* converter; DBB dbb; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; CHECK_DBB(dbb); if (from_cs == CS_dynamic) from_cs = tdbb->tdbb_attachment->att_charset; if (to_cs == CS_dynamic) to_cs = tdbb->tdbb_attachment->att_charset; /* Should from_cs == to_cs? be handled better? YYY */ assert(from_cs != CS_dynamic); assert(to_cs != CS_dynamic); charset = internal_charset_container_lookup(tdbb, from_cs, NULL); if (charset == NULL) return (NULL); if (charset->findConverter(to_cs, &converter)) return converter; if (to_cs == CS_UNICODE101) { converter = charset->getCharSet()->getConvToUnicode(); } else if (from_cs == CS_UNICODE101) { CharSet* charset2; charset2 = INTL_charset_lookup(tdbb, to_cs, NULL); if (charset2 == NULL) return (NULL); converter = charset2->getConvFromUnicode(); } else { CsConvertAllocFunc allocFunc; allocFunc = INTL_csconvert_alloc_func(from_cs, to_cs); if (!allocFunc) allocFunc = (CsConvertAllocFunc) search_out_alloc_func("FB_CsConvert_lookup", from_cs, to_cs); if (!allocFunc) allocFunc = (CsConvertAllocFunc) // intl_back_compat_alloc_func_lookup(type_csconvert, from_cs, to_cs); intl_back_compat_alloc_func_lookup(type_csconvert, to_cs, from_cs); if (!allocFunc) { charset->addNullConverter(to_cs); return NULL; } converter = (*allocFunc)(*dbb->dbb_permanent, from_cs, to_cs); if (!converter) { charset->addNullConverter(to_cs); return NULL; } } charset->addConverter(converter); assert(converter->getFromCS() == from_cs); assert(converter->getToCS() == to_cs); return (converter); } int DLL_EXPORT INTL_convert_string(DSC * to, DSC * from, FPTR_VOID err) { /************************************** * * I N T L _ c o n v e r t _ s t r i n g * ************************************** * * Functional description * Convert a string from one type to another * * RETURNS: * 0 if no error in conversion * non-zero otherwise. * **************************************/ UCHAR *p, *q; UCHAR *from_ptr; CHARSET_ID to_cs, from_cs; USHORT from_type; TDBB tdbb; USHORT from_len, from_fill; USHORT to_size, to_len, to_fill; /* Note: This function is called from outside the engine as well as inside - we likely can't get rid of GET_THREAD_DATA here */ tdbb = GET_THREAD_DATA; if (tdbb == NULL) /* are we in the Engine? */ return (1); /* no, then can't access intl gah */ assert(to != NULL); assert(from != NULL); assert(IS_TEXT(to) && IS_TEXT(from)); from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err); to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err); p = to->dsc_address; /* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */ from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err); to_size = to_len = TEXT_LEN(to); q = from_ptr; switch (to->dsc_dtype) { case dtype_text: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size, from_cs, from_ptr, from_len, err); to_fill = to_size - to_len; from_fill = 0; /* Convert_bytes handles source truncation */ p += to_len; } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; to_fill = to_size - to_len; if (to_len) do *p++ = *q++; while (--to_len); } if (to_fill > 0) pad_spaces(tdbb, to_cs, p, to_fill); break; case dtype_cstring: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size, from_cs, from_ptr, from_len, err); to->dsc_address[to_len] = 0; from_fill = 0; /* Convert_bytes handles source truncation */ } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; if (to_len) do *p++ = *q++; while (--to_len); *p = 0; } break; case dtype_varying: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, reinterpret_cast(((VARY *) to->dsc_address)->vary_string), to_size, from_cs, from_ptr, from_len, err); ((VARY *) to->dsc_address)->vary_length = to_len; from_fill = 0; /* Convert_bytes handles source truncation */ } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; ((VARY *) p)->vary_length = to_len; p = reinterpret_cast(((VARY *) p)->vary_string); if (to_len) do *p++ = *q++; while (--to_len); } break; } if (from_fill) /* Make sure remaining characters on From string are spaces */ if (!all_spaces(tdbb, from_cs, q, from_fill, 0)) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); return 0; } #ifdef DEV_BUILD int DLL_EXPORT INTL_data(DSC * pText) { /************************************** * * I N T L _ d a t a * ************************************** * * Functional description * Given an input text descriptor, * return TRUE if the data pointed to respresents * international text (subject to user defined or non-binary * collation or comparison). * **************************************/ assert(pText != NULL); if (!IS_TEXT(pText)) return FALSE; if (!INTERNAL_TTYPE(pText)) return TRUE; return FALSE; } #endif #ifdef DEV_BUILD int DLL_EXPORT INTL_data_or_binary(DSC * pText) { /************************************** * * I N T L _ d a t a _ o r _ b i n a r y * ************************************** * * Functional description * **************************************/ return (INTL_data(pText) || (pText->dsc_ttype == ttype_binary)); } #else // 11 Sent 2002, Nickolay Samofatov // Used only in asserts, but let optimizer wipe it out int DLL_EXPORT INTL_data_or_binary(DSC * pText) { return TRUE; } #endif int DLL_EXPORT INTL_defined_type(TDBB tdbb, STATUS * status, SSHORT t_type) { /************************************** * * I N T L _ d e f i n e d _ t y p e * ************************************** * * Functional description * Is (t_type) a known text type? * Return: * FALSE type is not defined. * TRUE type is defined * status set to gds_status codes to describe any error. * * Note: * Due to cleanup that must happen in DFW, this routine * must return, and not call ERR directly. * **************************************/ TextType* obj; SET_TDBB(tdbb); if (status) status[0] = gds_arg_end; obj = INTL_texttype_lookup(tdbb, t_type, NULL, status); if (obj == NULL) return FALSE; return TRUE; } WCHAR DLL_EXPORT INTL_getch(TDBB tdbb, TextType* * obj, SSHORT t_type, UCHAR ** ptr, USHORT * count) { /************************************** * * I N T L _ g e t c h * ************************************** * * Functional description * Get next character from a buffer. * **************************************/ SSHORT used; USHORT wc; SET_TDBB(tdbb); assert(obj); assert(ptr); if (*obj == NULL) { *obj = INTL_texttype_lookup(tdbb, t_type, (FPTR_VOID) ERR_post, NULL); assert(*obj); } used = (*obj)->mbtowc(&wc, *ptr, *count); if (used == -1) return 0; *ptr += used; *count -= used; return wc; } void DLL_EXPORT INTL_init(TDBB tdbb) { /************************************** * * I N T L _ i n i t * ************************************** * * Functional description * **************************************/ /*DBB dbb; VEC vector; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; CHECK_DBB(dbb); if (!(vector = dbb->dbb_text_objects)) { vector = dbb->dbb_text_objects = vec::newVector(*dbb->dbb_permanent, 25); } if (!(vector = dbb->dbb_charsets)) { vector = dbb->dbb_charsets = vec::newVector(*dbb->dbb_permanent, 25); }*/ } USHORT DLL_EXPORT INTL_key_length(TDBB tdbb, USHORT idxType, USHORT iLength) { /************************************** * * I N T L _ k e y _ l e n g t h * ************************************** * * Functional description * Given an index type, and a maximum length (iLength) * return the length of the byte string key descriptor to * use when collating text of this type. * **************************************/ USHORT key_length; TextType* obj; SSHORT ttype; SET_TDBB(tdbb); assert(idxType >= idx_first_intl_string); ttype = INTL_INDEX_TO_TEXT(idxType); if (ttype >= 0 && ttype <= ttype_last_internal) key_length = iLength; else { obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL); key_length = obj->key_length(iLength); } /* Validity checks on the computed key_length */ if (key_length > MAX_KEY) key_length = MAX_KEY; if (key_length < iLength) key_length = iLength; return (key_length); } static CharSetContainer *internal_charset_container_lookup(TDBB tdbb, SSHORT parm1, STATUS * status) { /************************************** * * I N T L _ c h a r s e t _ l o o k u p * ************************************** * * Functional description * * Lookup a character set descriptor. * * First, search the appropriate vector that hangs * off the dbb. If not found, then call the lower * level lookup routine to allocate it, or return * null if we don't know about the charset. * * Returns: * *charset - if no errors; * - if error & err non NULL * NULL - if error & err NULL * **************************************/ DBB dbb; CharSetContainer *cs = 0; USHORT id; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; id = TTYPE_TO_CHARSET(parm1); if (id == CS_dynamic) id = tdbb->tdbb_attachment->att_charset; if (id >= dbb->dbb_charsets.size()) dbb->dbb_charsets.resize(id + 10); else cs = dbb->dbb_charsets[id]; // allocate a new character set object if we couldn't find one. if (!cs) { CharSet *newCs = NULL; CharSetAllocFunc allocFunc; allocFunc = INTL_charset_alloc_func(id); if (!allocFunc) allocFunc = (CharSetAllocFunc)search_out_alloc_func("FB_CharSet_lookup",id,0); if (!allocFunc) allocFunc = (CharSetAllocFunc) intl_back_compat_alloc_func_lookup(type_charset,id,0); if (!allocFunc) return NULL; newCs = (*allocFunc)(*dbb->dbb_permanent, id, 0); if (!newCs) return NULL; cs = FB_NEW(*dbb->dbb_permanent) CharSetContainer(*dbb->dbb_permanent, newCs); if (!cs) { delete newCs; return NULL; } dbb->dbb_charsets[id] = cs; } assert(cs != NULL); return cs; } CharSet *DLL_EXPORT INTL_charset_lookup(TDBB tdbb, SSHORT parm1, STATUS * status) { /************************************** * * I N T L _ c h a r s e t _ l o o k u p * ************************************** * * Functional description * * Lookup a character set descriptor. * * First, search the appropriate vector that hangs * off the dbb. If not found, then call the lower * level lookup routine to allocate it, or return * null if we don't know about the charset. * * Returns: * *charset - if no errors; * - if error & err non NULL * NULL - if error & err NULL * **************************************/ CharSetContainer *cs; cs = internal_charset_container_lookup(tdbb, parm1, status); if (!cs) return NULL; return cs->getCharSet(); } TextType *DLL_EXPORT INTL_texttype_lookup( TDBB tdbb, SSHORT parm1, FPTR_VOID err, STATUS * status) { /************************************** * * I N T L _ t e x t t y p e _ l o o k u p * ************************************** * * Functional description * * Lookup either a character set descriptor or * texttype descriptor object. * * First, search the appropriate vector that hangs * off the dbb. If not found, then call the lower * level lookup routine to find it in the libraries. * * Returns: * *object - if no errors; * - if error & err non NULL * NULL - if error & err NULL * **************************************/ DBB dbb; TextType *cs_object; CharSetContainer *csc; USHORT id; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; if (parm1 == ttype_dynamic) parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset); csc = internal_charset_container_lookup(tdbb, parm1, status); if (!csc) return NULL; id = TTYPE_TO_COLLATION(parm1); cs_object = csc->collation(id); // allocate a new TextType object if needed if (!cs_object) { TextTypeAllocFunc allocFunc; allocFunc = INTL_texttype_alloc_func(parm1); if (!allocFunc) allocFunc = (TextTypeAllocFunc)search_out_alloc_func("FB_texttype_lookup",parm1,0); if (!allocFunc) allocFunc = (TextTypeAllocFunc) intl_back_compat_alloc_func_lookup(type_texttype,parm1,0); if (!allocFunc) return NULL; cs_object = (*allocFunc)(*dbb->dbb_permanent,parm1,0); if (!cs_object) return NULL; csc->setCollation(cs_object, id); } assert(cs_object != NULL); return (cs_object); } void DLL_EXPORT INTL_pad_spaces(TDBB tdbb, DSC * type, UCHAR * string, USHORT length) { /************************************** * * I N T L _ p a d _ s p a c e s * ************************************** * * Functional description * Pad a buffer with spaces, using the character * set's defined space character. * **************************************/ USHORT charset; SET_TDBB(tdbb); assert(type != NULL); assert(IS_TEXT(type)); assert(string != NULL); charset = INTL_charset(tdbb, type->dsc_ttype, NULL); pad_spaces(tdbb, charset, string, length); } USHORT DLL_EXPORT INTL_string_to_key( TDBB tdbb, USHORT idxType, DSC * pString, DSC * pByte, USHORT partial) { /************************************** * * I N T L _ s t r i n g _ t o _ k e y * ************************************** * * Functional description * Given an input string, convert it to a byte string * that will collate naturally (byte order). * * Return the length of the resulting byte string. * **************************************/ USHORT outlen; UCHAR buffer[MAX_KEY]; UCHAR pad_char; TextType* obj; SSHORT ttype; SET_TDBB(tdbb); assert(idxType >= idx_first_intl_string || idxType == idx_string || idxType == idx_byte_array || idxType == idx_metadata); assert(pString != NULL); assert(pByte != NULL); assert(pString->dsc_address != NULL); assert(pByte->dsc_address != NULL); assert(pByte->dsc_dtype == dtype_text); switch (idxType) { case idx_string: pad_char = ' '; ttype = ttype_none; break; case idx_byte_array: pad_char = 0; ttype = ttype_binary; break; case idx_metadata: pad_char = ' '; ttype = ttype_metadata; break; default: pad_char = 0; ttype = INTL_INDEX_TO_TEXT(idxType); break; } /* Make a string into the proper type of text */ const char* src; USHORT len = CVT_make_string(pString, ttype, &src, reinterpret_cast(buffer), sizeof(buffer), (FPTR_VOID) ERR_post); char* dest = reinterpret_cast(pByte->dsc_address); switch (ttype) { case ttype_metadata: case ttype_binary: case ttype_ascii: case ttype_none: while (len--) *dest++ = *src++; /* strip off ending pad characters */ while (dest > (const char*)pByte->dsc_address) if (*(dest - 1) == pad_char) dest--; else break; outlen = (dest - (const char*)pByte->dsc_address); break; default: obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL); outlen = obj->string_to_key(len, reinterpret_cast(const_cast(src)), pByte->dsc_length, reinterpret_cast(dest), partial); break; } return (outlen); } int DLL_EXPORT INTL_str_to_upper(TDBB tdbb, DSC * pString) { /************************************** * * I N T L _ s t r _ t o _ u p p e r * ************************************** * * Functional description * Given an input string, convert it to uppercase * **************************************/ USHORT len; UCHAR *src, *dest; UCHAR buffer[MAX_KEY]; USHORT ttype; TextType* obj; SET_TDBB(tdbb); assert(pString != NULL); assert(pString->dsc_address != NULL); len = CVT_get_string_ptr(pString, &ttype, &src, reinterpret_cast < vary * >(buffer), sizeof(buffer), (FPTR_VOID) ERR_post); switch (ttype) { case ttype_binary: /* cannot uppercase binary strings */ break; case ttype_none: case ttype_ascii: case ttype_unicode_fss: dest = src; while (len--) { *dest++ = UPPER7(*src); src++; } break; default: obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL); obj->str_to_upper(len, src, len, src); break; } /* * Added to remove compiler errors. Callers are not checking * the return code from this function 4/5/95. */ return (0); } UCHAR DLL_EXPORT INTL_upper(TDBB tdbb, USHORT ttype, UCHAR ch) { /************************************** * * I N T L _ u p p e r * ************************************** * * Functional description * Given an input character, convert it to uppercase * **************************************/ TextType* obj; SET_TDBB(tdbb); switch (ttype) { case ttype_binary: /* cannot uppercase binary strings */ return (ch); case ttype_none: case ttype_ascii: case ttype_unicode_fss: return (UPPER7(ch)); default: obj = INTL_texttype_lookup(tdbb, ttype, (FPTR_VOID) ERR_post, NULL); return obj->to_upper(ch); } } static BOOLEAN all_spaces( TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len, USHORT offset) { /************************************** * * a l l _ s p a c e s * ************************************** * * Functional description * determine if the string at ptr[offset] ... ptr[len] is entirely * spaces, as per the space definition of (charset). * The binary representation of a Space is character-set dependent. * (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for * 0x??20, which is NOT a space. **************************************/ CharSet* obj; BYTE *p; BYTE *end; const unsigned char *space, *end_space; SET_TDBB(tdbb); assert(ptr != NULL); obj = INTL_charset_lookup(tdbb, charset, NULL); assert(obj != NULL); /* * We are assuming offset points to the first byte which was not * consumed in a conversion. And that offset is pointing * to a character boundary */ /* Single-octet character sets are optimized here */ if (obj->getSpaceLength() == 1) { p = &ptr[offset]; end = &ptr[len]; while (p < end) { if (*p++ != *obj->getSpace()) return (FALSE); } return (TRUE); } else { p = &ptr[offset]; end = &ptr[len]; space = obj->getSpace(); end_space = &space[obj->getSpaceLength()]; while (p < end) { space = obj->getSpace(); while (p < end && space < end_space) { if (*p++ != *space++) return (FALSE); } } return (TRUE); } } static USHORT internal_keylength(TextType* obj, USHORT iLength) { /************************************** * * i n t e r n a l _ k e y l e n g t h * ************************************** * * Functional description * **************************************/ return (iLength); } static USHORT nc_to_wc(CsConvert* obj, WCHAR * pWide, USHORT nWide, /* byte count */ UCHAR * pNarrow, USHORT nNarrow, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * n c _ t o _ w c * ************************************** * * Functional description * **************************************/ WCHAR *pStart; UCHAR *pNarrowStart; assert(obj != NULL); assert((pNarrow != NULL) || (pWide == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pWide == NULL) return (2 * nNarrow); /* all cases */ pStart = pWide; pNarrowStart = pNarrow; while (nWide-- > 1 && nNarrow) { /* YYY - Byte order issues here */ *pWide++ = (WCHAR) * pNarrow++; nWide--; nNarrow--; } if (!*err_code && nNarrow) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow); return ((pWide - pStart) * sizeof(*pWide)); } static void pad_spaces(TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len) { /* byte count */ /************************************** * * p a d _ s p a c e s * ************************************** * * Functional description * Pad a buffer with the character set defined space character. * **************************************/ CharSet* obj; BYTE *end; const unsigned char *space, *end_space; SET_TDBB(tdbb); assert(ptr != NULL); obj = INTL_charset_lookup(tdbb, charset, NULL); assert(obj != NULL); /* Single-octet character sets are optimized here */ if (obj->getSpaceLength() == 1) { end = &ptr[len]; while (ptr < end) *ptr++ = *obj->getSpace(); } else { end = &ptr[len]; space = obj->getSpace(); end_space = &space[obj->getSpaceLength()]; while (ptr < end) { space = obj->getSpace(); while (ptr < end && space < end_space) { *ptr++ = *space++; } /* This assert is checking that we didn't have a buffer-end * in the middle of a space character */ assert(!(ptr == end) || (space == end_space)); } } } #ifdef DEV_BUILD /* * Utility routines designed to be called from the debugger to * print buffers, pointers, etc. which may contain text that * the debugger doesn't consider visible. */ static void dump_hex(UCHAR * p, USHORT len) { /************************************** * * d u m p _ h e x * ************************************** * * Functional description * *************************************/ while (len--) ib_printf("%02X ", *p++); ib_printf("\n"); } static void dump_latin(UCHAR * p, USHORT len) { /************************************** * * d u m p _ l a t i n * ************************************** * * Functional description * *************************************/ while (len--) if (isprintable(*p)) ib_printf("%c", *p++); else ib_printf("\0x%02X", *p++); ib_printf("\n"); } #endif unsigned short TextTypeNC::to_wc(unsigned char *pWideUC, unsigned short nWide, unsigned char *pNarrow, unsigned short nNarrow, short *err_code, unsigned short *err_position) /************************************** * * TextTypeNC::to_wc * ************************************** * * Functional description * **************************************/ { WCHAR *pStart, *pWide = (WCHAR*)pWideUC; UCHAR *pNarrowStart; assert((pNarrow != NULL) || (pWide == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pWide == NULL) return (2 * nNarrow); /* all cases */ pStart = pWide; pNarrowStart = pNarrow; while (nWide-- > 1 && nNarrow) { /* YYY - Byte order issues here */ *pWide++ = (WCHAR) * pNarrow++; nWide--; nNarrow--; } if (!*err_code && nNarrow) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow); return ((pWide - pStart) * sizeof(*pWide)); } unsigned short TextTypeNC::contains(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { return EVL_nc_contains(a,this,b,c,d,e); } unsigned short TextTypeNC::like(TDBB a, unsigned char *b, short c, unsigned char *d, short e, short f) { return EVL_nc_like(a,this,b,c,d,e,f); } unsigned short TextTypeNC::matches(TDBB a, unsigned char *b, short c, unsigned char *d, short e) { return EVL_nc_matches(a,this,b,c,d,e); } unsigned short TextTypeNC::sleuth_check(TDBB a, unsigned short b, unsigned char *c, unsigned short d, unsigned char *e, unsigned short f) { return EVL_nc_sleuth_check(a,this,b,c,d,e,f); } unsigned short TextTypeNC::sleuth_merge(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e, unsigned char *f, unsigned short g) { return EVL_nc_sleuth_merge(a,this,b,c,d,e,f,g); } unsigned short TextTypeNC::mbtowc(WCHAR *wc, unsigned char *ptr, unsigned short count) /************************************** * * i n t e r n a l _ n c _ m b t o w c * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Narrow character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ { assert(ptr); if (count >= 1) { if (wc) *wc = *ptr; return 1; } if (wc) *wc = 0; return (unsigned short)-1; /* No more characters */ } unsigned short TextTypeMB::contains(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { return EVL_mb_contains(a,this,b,c,d,e); } unsigned short TextTypeMB::like(TDBB a, unsigned char *b, short c, unsigned char *d, short e, short f) { return EVL_mb_like(a,this,b,c,d,e,f); } unsigned short TextTypeMB::matches(TDBB a, unsigned char *b, short c, unsigned char *d, short e) { return EVL_mb_matches(a,this,b,c,d,e); } unsigned short TextTypeMB::sleuth_check(TDBB a, unsigned short b, unsigned char *c, unsigned short d, unsigned char *e, unsigned short f) { return EVL_mb_sleuth_check(a,this,b,c,d,e,f); } unsigned short TextTypeMB::sleuth_merge(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e, unsigned char *f, unsigned short g) { return EVL_mb_sleuth_merge(a,this,b,c,d,e,f,g); } unsigned short TextTypeMB::mbtowc(WCHAR *wc, unsigned char *ptr, unsigned short count) { /************************************** * * TextTypeMB::mbtowc * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Multibyte version character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ assert(ptr); if (count >= 2) { if (wc) *wc = *(WCHAR *) ptr; return 2; } if (wc) *wc = 0; return (unsigned short)-1; /* No more characters */ } unsigned short TextTypeWC::to_wc(unsigned char *pDestUC, unsigned short nDest, unsigned char *pSrcUC, unsigned short nSrc, short *err_code, unsigned short *err_position) { /************************************** * * TextTypeWC::to_wc * ************************************** * * Functional description * *************************************/ WCHAR *pStart, *pDest = (WCHAR*)pDestUC; WCHAR *pStart_src, *pSrc = (WCHAR*)pSrcUC; assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc); pStart = pDest; pStart_src = pSrc; while (nDest > 1 && nSrc > 1) { *pDest++ = *pSrc++; nDest -= 2; nSrc -= 2; } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } unsigned short TextTypeWC::contains(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { return EVL_wc_contains(a,this,(WCHAR*)b,c,(WCHAR*)d,e); } unsigned short TextTypeWC::like(TDBB a, unsigned char *b, short c, unsigned char *d, short e, short f) { return EVL_wc_like(a,this,(WCHAR*)b,c,(WCHAR*)d,e,f); } unsigned short TextTypeWC::matches(TDBB a, unsigned char *b, short c, unsigned char *d, short e) { return EVL_wc_matches(a,this,(WCHAR*)b,c,(WCHAR*)d,e); } unsigned short TextTypeWC::sleuth_check(TDBB a, unsigned short b, unsigned char *c, unsigned short d, unsigned char *e, unsigned short f) { return EVL_wc_sleuth_check(a,this,b,(WCHAR*)c,d,(WCHAR*)e,f); } unsigned short TextTypeWC::sleuth_merge(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e, unsigned char *f, unsigned short g) { return EVL_wc_sleuth_merge(a,this,(WCHAR*)b,c,(WCHAR*)d,e,(WCHAR*)f,g); } unsigned short TextTypeWC::mbtowc(WCHAR *wc, unsigned char *ptr, unsigned short count) { /************************************** * * TextTypeWC::mbtowc * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Wide character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ assert(ptr); if (count >= 2) { if (wc) *wc = *(WCHAR *) ptr; return 2; } if (wc) *wc = 0; return (unsigned short)-1; /* No more characters */ } //=============================================================================== //=============================================================================== //=============================================================================== // Code to handle loading international charset plugins // in the new c++ OO format. static const char *INTL_PLUGIN_DIR = "intl"; static PluginManager intlPlugins; static bool loaded = false; static void* search_out_alloc_func(const char *sym, CHARSET_ID p1, CHARSET_ID p2) { typedef void* (*lookupFuncType)(CHARSET_ID,CHARSET_ID); void* result = 0; lookupFuncType lookupFunc; Firebird::string entryPoint(sym); if (!loaded) { intlPlugins.addSearchPath(INTL_PLUGIN_DIR); intlPlugins.addIgnoreModule(INTL_MODULE1); intlPlugins.addIgnoreModule(INTL_MODULE2); intlPlugins.loadAllPlugins(); loaded = true; } for(PluginManager::iterator itr = intlPlugins.begin(); result == 0 && itr != intlPlugins.end(); ++itr) { lookupFunc = (lookupFuncType)(*itr).lookupSymbol(entryPoint); if (!lookupFunc) continue; result = (*lookupFunc)(p1,p2); } return result; } //=============================================================================== //=============================================================================== //=============================================================================== // This code handles backwards compatibility with the old internation // character set plugin format. // We need all the structure definitions from the old interface #define INTL_ENGINE_INTERNAL #include "../jrd/intlobj.h" // storage for the loadable modules static PluginManager intlBCPlugins; static bool bcLoaded = false; class CsConvert_BC : public CsConvert { public: CsConvert_BC(struct csconvert *csv, bool deleteMemory) : CsConvert( csv->csconvert_id, (const char*)csv->csconvert_name, csv->csconvert_from, csv->csconvert_to), cnvt(csv), deleteOnDestruct(deleteMemory) {} virtual ~CsConvert_BC() { if (deleteOnDestruct) delete cnvt; } unsigned short convert(unsigned char *a, unsigned short b, unsigned char *c, unsigned short d, short *e, unsigned short *f) { assert(cnvt != NULL); return (*(reinterpret_cast(cnvt->csconvert_convert))) (cnvt,a,b,c,d,e,f); } private: struct csconvert *cnvt; bool deleteOnDestruct; }; class CharSet_BC : public CharSet { public: CharSet_BC(MemoryPool &p, struct charset *csStruct) : CharSet( csStruct->charset_id, (const char*)csStruct->charset_name, csStruct->charset_min_bytes_per_char, csStruct->charset_max_bytes_per_char, csStruct->charset_space_length, (char*)csStruct->charset_space_character), cs(csStruct) { charset_to_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_to_unicode, false); charset_from_unicode = FB_NEW(p) CsConvert_BC(&cs->charset_from_unicode, false); } ~CharSet_BC() { delete cs; } private: struct charset *cs; }; template class TextType_BC : public T { public: TextType_BC(struct texttype *textt) : T( textt->texttype_type, (char*)textt->texttype_name, textt->texttype_character_set, textt->texttype_country, textt->texttype_bytes_per_char), tt(textt) {} unsigned short key_length(unsigned short a) { assert(tt); assert(tt->texttype_fn_key_length); return (*(reinterpret_cast (tt->texttype_fn_key_length)))(tt,a); } unsigned short string_to_key(unsigned short a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { assert(tt); assert(tt->texttype_fn_string_to_key); return (*(reinterpret_cast (tt->texttype_fn_string_to_key))) (tt,a,b,c,d,e); } short compare(unsigned short a, unsigned char *b, unsigned short c, unsigned char *d) { assert(tt); assert(tt->texttype_fn_compare); return (*(reinterpret_cast (tt->texttype_fn_compare)))(tt,a,b,c,d); } unsigned short to_upper(unsigned short a) { assert(tt); assert(tt->texttype_fn_to_upper); return (*(reinterpret_cast (tt->texttype_fn_to_upper)))(tt,a); } unsigned short to_lower(unsigned short a) { assert(tt); assert(tt->texttype_fn_to_lower); return (*(reinterpret_cast (tt->texttype_fn_to_lower)))(tt,a); } short str_to_upper(unsigned short a, unsigned char *b, unsigned short c, unsigned char *d) { assert(tt); assert(tt->texttype_fn_str_to_upper); return (*(reinterpret_cast (tt->texttype_fn_str_to_upper))) (tt,a,b,c,d); } unsigned short to_wc(unsigned char *a, unsigned short b, unsigned char *c, unsigned short d, short *e, unsigned short *f) { assert(tt); assert(tt->texttype_fn_to_wc); return (*(reinterpret_cast (tt->texttype_fn_to_wc))) (tt,a,b,c,d,e,f); } unsigned short mbtowc(WCHAR *a, unsigned char *b, unsigned short c) { assert(tt); if (!tt->texttype_fn_mbtowc) return T::mbtowc(a,b,c); return (*(reinterpret_cast< USHORT (*)(TEXTTYPE, WCHAR*, UCHAR*, USHORT)> (tt->texttype_fn_mbtowc)))(tt,a,b,c); } unsigned short contains(TDBB a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { assert(tt); if (!tt->texttype_fn_contains) return T::contains(a,b,c,d,e); return (*(reinterpret_cast< USHORT (*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT)> (tt->texttype_fn_contains))) (a,tt,b,c,d,e); } unsigned short like(TDBB tdbb, unsigned char *a, short b, unsigned char *c, short d, short e) { assert(tt); if (!tt->texttype_fn_like) return T::like(tdbb,a,b,c,d,e); else return (*(reinterpret_cast< USHORT(*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short,short)> (tt->texttype_fn_like)))(tdbb,tt,a,b,c,d,e); } unsigned short matches(TDBB tdbb, unsigned char *a, short b, unsigned char *c, short d) { assert(tt); if (!tt->texttype_fn_matches) return T::matches(tdbb,a,b,c,d); return (*(reinterpret_cast< USHORT (*)(TDBB,TEXTTYPE,UCHAR*,short,UCHAR*,short)> (tt->texttype_fn_matches))) (tdbb,tt,a,b,c,d); } unsigned short sleuth_check(TDBB tdbb, unsigned short a, unsigned char *b, unsigned short c, unsigned char *d, unsigned short e) { assert(tt); if (!tt->texttype_fn_sleuth_check) return T::sleuth_check(tdbb,a,b,c,d,e); return (*(reinterpret_cast< USHORT(*)(TDBB,TEXTTYPE,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)> (tt->texttype_fn_sleuth_check))) (tdbb,tt,a,b,c,d,e); } unsigned short sleuth_merge(TDBB tdbb, unsigned char *a, unsigned short b, unsigned char *c, unsigned short d, unsigned char *e, unsigned short f) { assert(tt); if (!tt->texttype_fn_sleuth_merge) return T::sleuth_merge(tdbb,a,b,c,d,e,f); return (*(reinterpret_cast< USHORT(*)(TDBB,TEXTTYPE,UCHAR*,USHORT,UCHAR*,USHORT,UCHAR*,USHORT)> (tt->texttype_fn_sleuth_merge))) (tdbb,tt,a,b,c,d,e,f); } private: struct texttype *tt; }; static void* intl_back_compat_obj_init_lookup( USHORT type, SSHORT parm1, SSHORT parm2) { /************************************** * * intl_back_compat_alloc_func_lookup * ************************************** * * Functional description * Find the allocator function for the requested international * character set using the obsolete c/IB/FB 6.0 interface. * Search algorithm is: * Look in intllib * Look in intllib2 * Look for a normal UDF entry * Abort with an error. * * Returns: * FALSE - no errors * TRUE - error occurred, and parameter was NULL; * - error occurred, and parameter non-NULL; * * ***************************************/ USHORT (*function)(); if (!bcLoaded) { intlBCPlugins.addSearchPath(INTL_PLUGIN_DIR); bcLoaded = true; } PluginManager::Plugin intlMod1 = intlBCPlugins.findPlugin(INTL_MODULE1); PluginManager::Plugin intlMod2 = intlBCPlugins.findPlugin(INTL_MODULE2); USHORT(*lookup_fn) (USHORT, FPTR_SHORT *, SSHORT, SSHORT); INTL_TRACE(("INTL: looking for obj %d ttype %d\n", objtype, parm1)); function = NULL; #ifdef INTL_BACKEND if (LD_lookup(type, &function, parm1, parm2) != 0) function = NULL; #else /* Look for an InterBase supplied object to implement the text type */ /* The flu.c uses searchpath which expects a file name not a path */ INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE1, INTL_LOOKUP_ENTRY1)); Firebird::string tempStr(INTL_LOOKUP_ENTRY1); if ( intlMod1 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short)) (intlMod1.lookupSymbol(tempStr))) ) { INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE1, INTL_LOOKUP_ENTRY1)); if ((*lookup_fn) (type, &function, parm1, parm2) != 0) { function = NULL; } else { return (void*) function; } } #endif /* Still not found, check the set of supplimental international objects */ #ifdef INTL_BACKEND if (LD2_lookup(type, &function, parm1, parm2) != 0) function = NULL; #else /* Look for an InterBase supplied object to implement the text type */ /* The flu.c uses searchpath which expects a file name not a path */ INTL_TRACE(("INTL: trying %s %s\n", INTL_MODULE2, INTL_LOOKUP_ENTRY2)); tempStr = INTL_LOOKUP_ENTRY2; if ( intlMod2 && (lookup_fn = (USHORT(*)(USHORT, USHORT(**)(), short, short)) (intlMod2.lookupSymbol(tempStr))) ) { INTL_TRACE(("INTL: calling lookup %s %s\n", INTL_MODULE2, INTL_LOOKUP_ENTRY2)); if ((*lookup_fn) (type, &function, parm1, parm2) != 0) { function = NULL; } else { return (void*) function; } } #endif /* Still not found, check if there is a UDF in the database defined the right way */ FUN function_block; USHORT argcount; char entry[48]; #if defined(_MSC_VER) #define snprintf _snprintf #endif /* EKU: need a replacement for snprintf for systems like SINIX-Z!!! */ switch (type) { case type_texttype: #ifdef HAVE_SNPRINTF snprintf(entry, sizeof(entry), INTL_USER_ENTRY, parm1); #else sprintf(entry, INTL_USER_ENTRY, parm1); #endif argcount = 2; break; case type_charset: #ifdef HAVE_SNPRINTF snprintf(entry, sizeof(entry), "USER_CHARSET_%03d", parm1); #else sprintf(entry, "USER_CHARSET_%03d", parm1); #endif argcount = 2; break; case type_csconvert: #ifdef HAVE_SNPRINTF snprintf(entry, sizeof(entry), "USER_TRANSLATE_%03d_%03d", parm1, parm2); #else sprintf(entry, "USER_TRANSLATE_%03d_%03d", parm1, parm2); #endif argcount = 3; break; default: BUGCHECK(1); break; } INTL_TRACE(("INTL: trying user fn %s\n", entry)); if ( (function_block = FUN_lookup_function((TEXT*)entry)) ) { INTL_TRACE(("INTL: found a user fn, validating\n")); if ((function_block->fun_count == argcount) && (function_block->fun_args == argcount) && (function_block->fun_return_arg == 0) && (function_block->fun_entrypoint != NULL) && (function_block->fun_rpt[0].fun_mechanism == FUN_value) && (function_block->fun_rpt[0].fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[1].fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[argcount - 1]. fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[argcount].fun_mechanism == FUN_reference) && (function_block->fun_rpt[argcount].fun_desc.dsc_dtype == dtype_text)) { function = (FPTR_SHORT) function_block->fun_entrypoint; return (void*) function; } } return NULL; } static CharSet *BC_CharSetAllocFunc(MemoryPool &p, SSHORT cs_id, SSHORT unused) { typedef USHORT (*CSInitFunc)(CHARSET, SSHORT, SSHORT); CSInitFunc csInitFunc; csInitFunc = (CSInitFunc) intl_back_compat_obj_init_lookup(type_charset, cs_id, unused); assert(csInitFunc != 0); CHARSET cs = FB_NEW(p) charset; memset(cs, 0, sizeof(charset)); if (0 != (*csInitFunc)(cs, cs_id, unused)) { delete cs; return 0; } CharSet *result = 0; try { result = FB_NEW(p) CharSet_BC(p, cs); } catch(std::exception&) { delete cs; throw; } return result; } static CsConvert *BC_CsConvertAllocFunc(MemoryPool &p, SSHORT from_id, SSHORT to_id) { typedef USHORT (*CVTInitFunc)(CSCONVERT, SSHORT, SSHORT); CVTInitFunc cvtInitFunc; //cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, from_id, to_id); cvtInitFunc = (CVTInitFunc) intl_back_compat_obj_init_lookup(type_csconvert, to_id, from_id); assert(cvtInitFunc != 0); CSCONVERT cvt = FB_NEW(p) csconvert; memset(cvt, 0, sizeof(csconvert)); //if (0 != (*cvtInitFunc)(cvt, from_id, to_id)) if (0 != (*cvtInitFunc)(cvt, to_id, from_id)) { delete cvt; return 0; } CsConvert *result = 0; try { result = FB_NEW(p) CsConvert_BC(cvt, true); } catch(std::exception&) { delete cvt; throw; } return result; } static TextType *BC_TextTypeAllocFunc(MemoryPool &p, SSHORT tt_id, SSHORT unused) { typedef USHORT (*TTInitFunc)(TEXTTYPE, SSHORT, SSHORT); TTInitFunc ttInitFunc; ttInitFunc = (TTInitFunc) intl_back_compat_obj_init_lookup(type_texttype, tt_id, unused); assert(ttInitFunc != 0); TEXTTYPE tt = FB_NEW(p) texttype; memset(tt, 0, sizeof(texttype)); if (0 != (*ttInitFunc)(tt, tt_id, unused)) { delete tt; return 0; } TextType *result = 0; try { if (tt->texttype_bytes_per_char == 1 && tt->texttype_fn_to_wc == NULL) result = FB_NEW(p) TextType_BC(tt); else if (tt->texttype_bytes_per_char == 2 && tt->texttype_fn_to_wc == NULL) result = FB_NEW(p) TextType_BC(tt); else if (tt->texttype_fn_to_wc != NULL) result = FB_NEW(p) TextType_BC(tt); else BUGCHECK(1); } catch(std::exception&) { delete tt; throw; } return result; } static void* intl_back_compat_alloc_func_lookup( USHORT type, CHARSET_ID parm1, CHARSET_ID parm2) { if (NULL != intl_back_compat_obj_init_lookup(type,parm1,parm2)) { switch(type) { case type_charset: return (void*)BC_CharSetAllocFunc; case type_texttype: return (void*)BC_TextTypeAllocFunc; case type_csconvert: return (void*)BC_CsConvertAllocFunc; default: BUGCHECK(1); } } return NULL; }