/************* history ************ * * COMPONENT: JRD MODULE: INTL.C * generated by Marion V2.5 2/6/90 * from dev db on 4-JAN-1995 ***************************************************************** * * 42 4711 3 11 17 tamlin 2001 * Added silly numbers before my name, and converted it to C++. * * 18850 daves 4-JAN-1995 * Fix gds__alloc usage * * 18837 deej 31-DEC-1994 * fixing up HARBOR_MERGE * * 18821 deej 27-DEC-1994 * HARBOR MERGE * * 18789 jdavid 19-DEC-1994 * Cast some functions * * 17508 jdavid 15-JUL-1994 * Bring it up to date * * 17500 daves 13-JUL-1994 * Bug 6645: Different calculation of partial keys * * 17202 katz 24-MAY-1994 * PC_PLATFORM requires the .dll extension * * 17191 katz 23-MAY-1994 * OS/2 requires the .dll extension * * 17180 katz 23-MAY-1994 * Define location of DLL on OS/2 * * 17149 katz 20-MAY-1994 * In JRD, gds_arg_number arguments are SLONG's not int's * * 16633 daves 19-APR-1994 * Bug 6202: International licensing uses INTERNATIONAL product code * * 16555 katz 17-APR-1994 * The last argument of calls to ERR_post should be 0 * * 16521 katz 14-APR-1994 * Borland C needs a decorated symbol to lookup * * 16403 daves 8-APR-1994 * Bug 6441: Emit an error whenever transliteration from ttype_binary attempted * * 16141 katz 28-MAR-1994 * Don't declare return value from ISC_lookup_entrypoint as API_ROUTINE * * The contents of this file are subject to the Interbase Public * License Version 1.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy * of the License at http://www.Inprise.com/IPL.html * * Software distributed under the License is distributed on an * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express * or implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code was created by Inprise Corporation * and its predecessors. Portions created by Inprise Corporation are * Copyright (C) Inprise Corporation. * * All Rights Reserved. * Contributor(s): ______________________________________. */ /* * PROGRAM: JRD Intl * MODULE: intl.c * DESCRIPTION: International text support routines * * copyright (c) 1992, 1993 by Borland International */ #include "firebird.h" #include #include "../jrd/ib_stdio.h" #include "../jrd/jrd.h" #include "../jrd/req.h" #include "../jrd/val.h" #include "gen/codes.h" #include "../jrd/intl.h" #include "../jrd/intlobj.h" #include "../jrd/ods.h" #include "../jrd/btr.h" #include "../intl/charsets.h" #include "../intl/country_codes.h" #include "../jrd/gdsassert.h" #include "../jrd/license.h" #ifdef INTL_BACKEND #include "../intl/ld_proto.h" #endif #include "../jrd/all_proto.h" #include "../jrd/cvt_proto.h" #include "../jrd/err_proto.h" #include "../jrd/evl_proto.h" #include "../jrd/flu_proto.h" #include "../jrd/fun_proto.h" #include "../jrd/gds_proto.h" #include "../jrd/iberr_proto.h" #include "../jrd/intl_proto.h" #include "../jrd/isc_proto.h" #include "../jrd/thd_proto.h" extern "C" { #ifdef DEV_BUILD #define isprintable(x) ((((unsigned char)(x)) & 0x7F) >= ' ') #ifdef DEBUG_INTL #define INTL_TRACE(args) gds__log args #else #define INTL_TRACE(args) #endif #define IS_TEXT(x) (((x)->dsc_dtype == dtype_text) ||\ ((x)->dsc_dtype == dtype_varying)||\ ((x)->dsc_dtype == dtype_cstring)) #else #define INTL_TRACE(args) #endif #define TTYPE_TO_CHARSET(tt) ((SSHORT)((tt) & 0x00FF)) #define TTYPE_TO_COLLATION(tt) ((SSHORT)((tt) >> 8)) typedef unsigned char FILECHAR; typedef USHORT UNICODE; typedef USHORT fss_wchar_t; typedef int fss_size_t; static BOOLEAN all_spaces(TDBB, CHARSET_ID, BYTE *, USHORT, USHORT); static void common_8bit_init(CHARSET, USHORT, ASCII *, BYTE *, BYTE *, BYTE *); static void common_convert_init(CSCONVERT, USHORT, USHORT, FPTR_SHORT, BYTE *, BYTE *); static USHORT cs_ascii_init(CHARSET, USHORT, USHORT); static USHORT cs_none_init(CHARSET, USHORT, USHORT); static USHORT cs_binary_init(CHARSET, USHORT, USHORT); static USHORT cs_unicode_fss_init(CHARSET, USHORT, USHORT); static USHORT cs_unicode_init(CHARSET, USHORT, USHORT); static USHORT cvt_ascii_utf_init(CSCONVERT, USHORT, USHORT); static USHORT cvt_none_to_unicode(CSCONVERT, WCHAR *, USHORT, UCHAR *, USHORT, SSHORT *, USHORT *); static USHORT cvt_ascii_to_unicode(CSCONVERT, WCHAR *, USHORT, UCHAR *, USHORT, SSHORT *, USHORT *); static USHORT cvt_unicode_to_ascii(CSCONVERT, NCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static void dump_hex(UCHAR *, USHORT); static void dump_latin(UCHAR *, USHORT); static void finish_texttype_init(TEXTTYPE, FPTR_VOID, STATUS *); static fss_size_t fss_mbtowc(fss_wchar_t *, UCHAR *, fss_size_t); static fss_size_t fss_wctomb(UCHAR *, fss_wchar_t); static USHORT internal_ch_copy(TEXTTYPE, UCHAR); static USHORT internal_ch_to_lower(TEXTTYPE, UCHAR); static USHORT internal_ch_to_upper(TEXTTYPE, UCHAR); static SSHORT internal_compare(TEXTTYPE, USHORT, UCHAR *, USHORT, UCHAR *); static USHORT internal_keylength(TEXTTYPE, USHORT); static SSHORT internal_mb_mbtowc(TEXTTYPE, WCHAR *, UCHAR *, USHORT); static SSHORT internal_nc_mbtowc(TEXTTYPE, WCHAR *, UCHAR *, USHORT); static SSHORT internal_str_copy(TEXTTYPE, USHORT, UCHAR *, USHORT, UCHAR *); static USHORT cvt_utffss_to_ascii(CSCONVERT, UCHAR *, USHORT, UCHAR *, USHORT, SSHORT *, USHORT *); static SSHORT internal_str_to_upper(TEXTTYPE, USHORT, UCHAR *, USHORT, UCHAR *); static USHORT internal_string_to_key(TEXTTYPE, USHORT, UCHAR *, USHORT, UCHAR *, USHORT); static SSHORT internal_wc_mbtowc(TEXTTYPE, WCHAR *, UCHAR *, USHORT); static USHORT mb_to_wc(CSCONVERT, WCHAR *, USHORT, MBCHAR *, USHORT, SSHORT *, USHORT *); static USHORT nc_to_wc(CSCONVERT, WCHAR *, USHORT, UCHAR *, USHORT, SSHORT *, USHORT *); static BOOLEAN obj_init(USHORT, SSHORT, SSHORT, void *, FPTR_VOID, STATUS *); static void pad_spaces(TDBB, CHARSET_ID, BYTE *, USHORT); static USHORT ttype_ascii_init(TEXTTYPE, USHORT, USHORT); static USHORT ttype_none_init(TEXTTYPE, USHORT, USHORT); static USHORT ttype_binary_init(TEXTTYPE, USHORT, USHORT); static USHORT ttype_unicode_fss_init(TEXTTYPE, USHORT, USHORT); static USHORT wc_to_mb(CSCONVERT, MBCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static USHORT wc_to_nc(CSCONVERT, NCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); static USHORT wc_to_wc(CSCONVERT, WCHAR *, USHORT, WCHAR *, USHORT, SSHORT *, USHORT *); #define INTL_TEXTTYPE(p,err) ((TEXTTYPE) INTL_obj_lookup (tdbb, type_texttype, (p), (err), NULL)) #define INTL_CHARSETTYPE(p,err) ((CHARSET) INTL_obj_lookup (tdbb, type_charset, (p), (err), NULL)) #define NUM_ELEMENTS(x) (sizeof ((x))/sizeof ((x)[0])) /* Name of module that implements text-type (n) */ #ifdef VMS /* Note: MUST be only the file name. The VMS lib$find_shared_image * call insists on file name only, not any "path" components. */ #define INTL_MODULE1 "GDSINTL" #define INTL_MODULE2 "GDSINTL2" #endif #if (defined PC_PLATFORM && !defined NETWARE_386) #define INTL_MODULE1 "intl.dll" #define INTL_MODULE2 "intl2.dll" #endif #ifdef WIN_NT /* prefixed with $INTERBASE */ #define INTL_MODULE1 "gdsintl.dll" #define INTL_MODULE2 "gdsintl2.dll" #endif #ifndef INTL_MODULE1 /* prefixed with $INTERBASE */ #define INTL_MODULE1 "gdsintl" #define INTL_MODULE2 "gdsintl2" #endif #ifndef __BORLANDC__ #define INTL_LOOKUP_ENTRY1 "LD_lookup" #define INTL_LOOKUP_ENTRY2 "LD2_lookup" #define INTL_USER_ENTRY "USER_TEXTTYPE_%03d" #else #define INTL_LOOKUP_ENTRY1 "_LD_lookup" #define INTL_LOOKUP_ENTRY2 "_LD2_lookup" #define INTL_USER_ENTRY "_USER_TEXTTYPE_%03d" #endif CHARSET_ID DLL_EXPORT INTL_charset(TDBB tdbb, USHORT ttype, FPTR_VOID err) { /************************************** * * I N T L _ c h a r s e t * ************************************** * * Functional description * Return the character set ID for a piece of text. * **************************************/ switch (ttype) { case ttype_none: return (CS_NONE); case ttype_ascii: return (CS_ASCII); case ttype_unicode_fss: return (CS_UNICODE_FSS); case ttype_binary: return (CS_BINARY); case ttype_dynamic: SET_TDBB(tdbb); return (tdbb->tdbb_attachment->att_charset); default: return (TTYPE_TO_CHARSET(ttype)); } } int DLL_EXPORT INTL_compare( TDBB tdbb, DSC * pText1, DSC * pText2, FPTR_VOID err) { /************************************** * * I N T L _ c o m p a r e * ************************************** * * Functional description * Compare two pieces of international text. * **************************************/ UCHAR *p1, *p2; USHORT length1, length2; UCHAR buffer[MAX_KEY]; SSHORT compare_type; TEXTTYPE obj; USHORT t1, t2; SET_TDBB(tdbb); assert(pText1 != NULL); assert(pText2 != NULL); assert(IS_TEXT(pText1) && IS_TEXT(pText2)); assert(INTL_data_or_binary(pText1) || INTL_data_or_binary(pText2)); assert(err); /* normal compare routine from CVT_compare */ /* trailing spaces in strings are ignored for comparision */ length1 = CVT_get_string_ptr(pText1, &t1, &p1, NULL, 0, err); length2 = CVT_get_string_ptr(pText2, &t2, &p2, NULL, 0, err); /* YYY - by SQL II compare_type must be explicit in the SQL statement if there is any doubt */ compare_type = MAX(t1, t2); /* YYY */ if (t1 != t2) { CHARSET_ID cs1, cs2; cs1 = INTL_charset(tdbb, t1, err); cs2 = INTL_charset(tdbb, t2, err); if (cs1 != cs2) { if (compare_type != t2) { /* convert pText2 to pText1's type, if possible */ /* YYY - should failure to convert really return an error here? Support joining a 437 & Latin1 Column, and we pick the compare_type as 437, still only want the equal values.... But then, what about < operations, which make no sense if the string cannot be expressed... */ length2 = INTL_convert_bytes(tdbb, cs1, buffer, sizeof(buffer), cs2, p2, length2, err); p2 = buffer; } else { /* convert pText1 to pText2's type, if possible */ length1 = INTL_convert_bytes(tdbb, cs2, buffer, sizeof(buffer), cs1, p1, length1, err); p1 = buffer; } } } obj = INTL_TEXTTYPE(compare_type, err); return reinterpret_cast < short (*) (...) > (obj->texttype_fn_compare) (obj, length1, p1, length2, p2); } USHORT DLL_EXPORT INTL_convert_bytes( TDBB tdbb, CHARSET_ID dest_type, BYTE * dest_ptr, USHORT dest_len, CHARSET_ID src_type, BYTE * src_ptr, USHORT src_len, FPTR_VOID err) { /************************************** * * I N T L _ c o n v e r t _ b y t e s * ************************************** * * Functional description * Given a string of bytes in one character set, convert it to another * character set. * * If (dest_ptr) is NULL, return the count of bytes needed to convert * the string. This does not guarantee the string can be converted, * the purpose of this is to allocate a large enough buffer. * * RETURNS: * Length of resulting string, in bytes. * calls (err) if conversion error occurs. * **************************************/ UCHAR *start_dest_ptr; USHORT len; USHORT len2; CSCONVERT cs_obj; CHARSET from_cs, to_cs; SSHORT err_code = 0; USHORT err_position; BYTE *tmp_buffer; SET_TDBB(tdbb); assert(src_ptr != NULL); assert(src_type != dest_type); assert(err != NULL); start_dest_ptr = dest_ptr; if ((dest_type == CS_BINARY) || (dest_type == CS_NONE)) { /* See if we just need a length estimate */ if (dest_ptr == NULL) return (src_len); len = MIN(dest_len, src_len); if (len) do *dest_ptr++ = *src_ptr++; while (--len); /* See if only space characters are remaining */ len = src_len - MIN(dest_len, src_len); if (!len || all_spaces(tdbb, src_type, src_ptr, len, 0)) return (dest_ptr - start_dest_ptr); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); } else if (src_len == 0) return (0); else if (src_type == CS_BINARY) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); else /* character sets are known to be different */ { /* Do we know an object from cs1 to cs2? */ cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type); if (cs_obj != NULL) { len = reinterpret_cast < USHORT(*)(...) > (*cs_obj->csconvert_convert) (cs_obj, dest_ptr, dest_len, src_ptr, src_len, &err_code, &err_position); if (!err_code || ((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, src_type, src_ptr, src_len, err_position))) return (len); else if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } /* Find a CS1 to UNICODE object */ from_cs = INTL_CHARSETTYPE(src_type, (FPTR_VOID) NULL); if (from_cs == NULL) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, (SLONG) src_type, 0); /* ** allocate a temporary buffer that is large enough. 2 = sizeof WCHAR */ tmp_buffer = (BYTE *) gds__alloc((SLONG) src_len * 2); cs_obj = &from_cs->charset_to_unicode; assert(cs_obj != NULL); len = reinterpret_cast < USHORT(*)(...) > (*cs_obj->csconvert_convert) (cs_obj, tmp_buffer, src_len * 2, src_ptr, src_len, &err_code, &err_position); if (err_code && !((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, src_type, src_ptr, src_len, err_position))) { gds__free((SLONG *) tmp_buffer); if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } /* Find a UNICODE to CS2 object */ to_cs = INTL_CHARSETTYPE(dest_type, (FPTR_VOID) NULL); if (to_cs == NULL) { gds__free((SLONG *) tmp_buffer); reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_text_subtype, gds_arg_number, (SLONG) dest_type, 0); } cs_obj = &to_cs->charset_from_unicode; assert(cs_obj != NULL); len2 = reinterpret_cast < USHORT(*)(...) > (*cs_obj->csconvert_convert) (cs_obj, dest_ptr, dest_len, tmp_buffer, len, &err_code, &err_position); if (err_code && !((err_code == CS_TRUNCATION_ERROR) && all_spaces(tdbb, CS_UNICODE101, tmp_buffer, len, err_position))) { gds__free((SLONG *) tmp_buffer); if (err_code == CS_TRUNCATION_ERROR) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); else reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, gds_arg_gds, gds_transliteration_failed, 0); } gds__free((SLONG *) tmp_buffer); return (len2); } return (0); /* to remove compiler errors. This should never be executed */ } CSCONVERT DLL_EXPORT INTL_convert_lookup(TDBB tdbb, CHARSET_ID to_cs, CHARSET_ID from_cs) { /************************************** * * I N T L _ c o n v e r t _ l o o k u p * ************************************** * * Functional description * **************************************/ VEC vector; CHARSET charset; CSCONVERT converter; USHORT i; DBB dbb; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; CHECK_DBB(dbb); if (from_cs == CS_dynamic) from_cs = tdbb->tdbb_attachment->att_charset; if (to_cs == CS_dynamic) to_cs = tdbb->tdbb_attachment->att_charset; /* Should from_cs == to_cs? be handled better? YYY */ assert(from_cs != CS_dynamic); assert(to_cs != CS_dynamic); charset = INTL_CHARSETTYPE(from_cs, (FPTR_VOID) NULL); if (charset == NULL) return (NULL); vector = charset->charset_converters; if (!(vector)) { vector = charset->charset_converters = (VEC) ALLOCPV(type_vec, 10); assert(vector != NULL); vector->vec_count = 10; } for (i = 0; i < vector->vec_count; i++) { converter = (CSCONVERT) (vector->vec_object[i]); if (converter == NULL) break; if (converter->csconvert_to == to_cs) { if (converter->csconvert_flags & CONVERTTYPE_init) return (converter); else return NULL; } } if (i >= vector->vec_count) { vector = (VEC) ALL_extend(reinterpret_cast < BLK * >(&charset->charset_converters), i + 10); converter = NULL; } if (to_cs == CS_UNICODE101) { converter = &charset->charset_to_unicode; } else if (from_cs == CS_UNICODE101) { CHARSET charset2; charset2 = INTL_CHARSETTYPE(to_cs, (FPTR_VOID) NULL); if (charset2 == NULL) return (NULL); converter = &charset2->charset_from_unicode; } else { if (converter == NULL) converter = (CSCONVERT) ALLOCP(type_csconvert); if (obj_init(type_csconvert, to_cs, from_cs, converter, NULL, NULL)) { /* Can't find a conversion object - cache that info in the * list of converters. */ vector->vec_object[i] = (BLK) converter; converter->csconvert_flags = 0; converter->csconvert_from = from_cs; converter->csconvert_to = to_cs; return NULL; } } vector->vec_object[i] = (BLK) converter; converter->csconvert_flags |= CONVERTTYPE_init; assert(converter->csconvert_from == from_cs); assert(converter->csconvert_to == to_cs); return (converter); } int DLL_EXPORT INTL_convert_string(DSC * to, DSC * from, FPTR_VOID err) { /************************************** * * I N T L _ c o n v e r t _ s t r i n g * ************************************** * * Functional description * Convert a string from one type to another * * RETURNS: * 0 if no error in conversion * non-zero otherwise. * **************************************/ UCHAR *p, *q; UCHAR *from_ptr; CHARSET_ID to_cs, from_cs; USHORT from_type; TDBB tdbb; USHORT from_len, from_fill; USHORT to_size, to_len, to_fill; /* Note: This function is called from outside the engine as well as inside - we likely can't get rid of GET_THREAD_DATA here */ tdbb = GET_THREAD_DATA; if (tdbb == NULL) /* are we in the Engine? */ return (1); /* no, then can't access intl gah */ assert(to != NULL); assert(from != NULL); assert(IS_TEXT(to) && IS_TEXT(from)); from_cs = INTL_charset(tdbb, INTL_TTYPE(from), err); to_cs = INTL_charset(tdbb, INTL_TTYPE(to), err); p = to->dsc_address; /* Must convert dtype(cstring,text,vary) and ttype(ascii,binary,..intl..) */ from_len = CVT_get_string_ptr(from, &from_type, &from_ptr, NULL, 0, err); to_size = to_len = TEXT_LEN(to); q = from_ptr; switch (to->dsc_dtype) { case dtype_text: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size, from_cs, from_ptr, from_len, err); to_fill = to_size - to_len; from_fill = 0; /* Convert_bytes handles source truncation */ p += to_len; } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; to_fill = to_size - to_len; if (to_len) do *p++ = *q++; while (--to_len); } if (to_fill > 0) pad_spaces(tdbb, to_cs, p, to_fill); break; case dtype_cstring: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, to->dsc_address, to_size, from_cs, from_ptr, from_len, err); to->dsc_address[to_len] = 0; from_fill = 0; /* Convert_bytes handles source truncation */ } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; if (to_len) do *p++ = *q++; while (--to_len); *p = 0; } break; case dtype_varying: if ((from_cs != to_cs) && (to_cs != CS_BINARY) && (to_cs != CS_NONE)) { to_len = INTL_convert_bytes(tdbb, to_cs, ((VARY *) to->dsc_address)->vary_string, to_size, from_cs, from_ptr, from_len, err); ((VARY *) to->dsc_address)->vary_length = to_len; from_fill = 0; /* Convert_bytes handles source truncation */ } else { /* binary string can always be converted TO by byte-copy */ to_len = MIN(from_len, to_size); from_fill = from_len - to_len; ((VARY *) p)->vary_length = to_len; p = ((VARY *) p)->vary_string; if (to_len) do *p++ = *q++; while (--to_len); } break; } if (from_fill) /* Make sure remaining characters on From string are spaces */ if (!all_spaces(tdbb, from_cs, q, from_fill, 0)) reinterpret_cast < void (*) (...) > (*err) (gds_arith_except, 0); return 0; } #ifdef DEV_BUILD int DLL_EXPORT INTL_data(DSC * pText) { /************************************** * * I N T L _ d a t a * ************************************** * * Functional description * Given an input text descriptor, * return TRUE if the data pointed to respresents * international text (subject to user defined or non-binary * collation or comparison). * **************************************/ assert(pText != NULL); if (!IS_TEXT(pText)) return FALSE; if (!INTERNAL_TTYPE(pText)) return TRUE; return FALSE; } #endif #ifdef DEV_BUILD int DLL_EXPORT INTL_data_or_binary(DSC * pText) { /************************************** * * I N T L _ d a t a _ o r _ b i n a r y * ************************************** * * Functional description * **************************************/ return (INTL_data(pText) || (pText->dsc_ttype == ttype_binary)); } #endif int DLL_EXPORT INTL_defined_type(TDBB tdbb, STATUS * status, SSHORT t_type) { /************************************** * * I N T L _ d e f i n e d _ t y p e * ************************************** * * Functional description * Is (t_type) a known text type? * Return: * FALSE type is not defined. * TRUE type is defined * status set to gds_status codes to describe any error. * * Note: * Due to cleanup that must happen in DFW, this routine * must return, and not call ERR directly. * **************************************/ TEXTTYPE obj; SET_TDBB(tdbb); if (status) status[0] = gds_arg_end; obj = reinterpret_cast < TEXTTYPE > (INTL_obj_lookup (tdbb, type_texttype, t_type, (FPTR_VOID) NULL, status)); if (obj == NULL) return FALSE; return ((obj->texttype_flags & TEXTTYPE_init) == TEXTTYPE_init); } SSHORT DLL_EXPORT INTL_fss_mbtowc(TEXTTYPE * obj, WCHAR * wc, NCHAR * p, USHORT n) { /************************************** * * I N T L _ f s s _ m b t o w c * ************************************** * * Functional description * InterBase interface to mbtowc function for Unicode * text in FSS bytestream format. * * Return: (common to all mbtowc routines) * -1 Error in parsing next character * Count of characters consumed. * *wc Next character from byte steam (if wc <> NULL) * * Note: This routine has a cousin in intl/cv_utffss.c * **************************************/ assert(obj); assert(wc); assert(p); return fss_mbtowc(wc, p, n); } /* * The following was provided by Ken Thompson of AT&T Bell Laboratories, * , on Tue, 8 Sep 92 03:22:07 EDT, to the X/Open * Joint Internationalization Group. Some minor formatting changes have * been made by Glenn Adams, . * * ------------------------------------------------------------------------- * File System Safe Universal Character Set Transformation Format (FSS-UTF) * ------------------------------------------------------------------------- * * With the approval of ISO/IEC 10646 (Unicode) as an international * standard and the anticipated wide spread use of this universal coded * character set (UCS), it is necessary for historically ASCII based * operating systems to devise ways to cope with representation and * handling of the large number of characters that are possible to be * encoded by this new standard. * * There are several challenges presented by UCS which must be dealt with * by historical operating systems and the C-language programming * environment. The most significant of these challenges is the encoding * scheme used by UCS. More precisely, the challenge is the marrying of * the UCS standard with existing programming languages and existing * operating systems and utilities. * * The challenges of the programming languages and the UCS standard are * being dealt with by other activities in the industry. However, we are * still faced with the handling of UCS by historical operating systems * and utilities. Prominent among the operating system UCS handling * concerns is the representation of the data within the file system. An * underlying assumption is that there is an absolute requirement to * maintain the existing operating system software investment while at * the same time taking advantage of the use the large number of * characters provided by the UCS. * * UCS provides the capability to encode multi-lingual text within a * single coded character set. However, UCS and its UTF variant do not * protect null bytes and/or the ASCII slash ("/") making these character * encodings incompatible with existing Unix implementations. The * following proposal provides a Unix compatible transformation format of * UCS such that Unix systems can support multi-lingual text in a single * encoding. This transformation format encoding is intended to be used * as a file code. This transformation format encoding of UCS is * intended as an intermediate step towards full UCS support. However, * since nearly all Unix implementations face the same obstacles in * supporting UCS, this proposal is intended to provide a common and * compatible encoding during this transition stage. * * Goal/Objective * -------------- * * With the assumption that most, if not all, of the issues surrounding * the handling and storing of UCS in historical operating system file * systems are understood, the objective is to define a UCS * transformation format which also meets the requirement of being usable * on a historical operating system file system in a non-disruptive * manner. The intent is that UCS will be the process code for the * transformation format, which is usable as a file code. * * Criteria for the Transformation Format * -------------------------------------- * * Below are the guidelines that were used in defining the UCS * transformation format: * * 1) Compatibility with historical file systems: * * Historical file systems disallow the null byte and the ASCII * slash character as a part of the file name. * * 2) Compatibility with existing programs: * * The existing model for multibyte processing is that ASCII does * not occur anywhere in a multibyte encoding. There should be * no ASCII code values for any part of a transformation format * representation of a character that was not in the ASCII * character set in the UCS representation of the character. * * 3) Ease of conversion from/to UCS. * * 4) The first byte should indicate the number of bytes to * follow in a multibyte sequence. * * 5) The transformation format should not be extravagant in * terms of number of bytes used for encoding. * * 6) It should be possible to find the start of a character * efficiently starting from an arbitrary location in a byte * stream. * * Proposed FSS-UTF * ---------------- * * The proposed UCS transformation format encodes UCS values in the range * [0,0x7fffffff] using multibyte characters of lengths 1, 2, 3, 4, 5, * and 6 bytes. For all encodings of more than one byte, the initial * byte determines the number of bytes used and the high-order bit in * each byte is set. Every byte that does not start 10xxxxxx is the * start of a UCS character sequence. * * An easy way to remember this transformation format is to note that the * number of high-order 1's in the first byte signifies the number of * bytes in the multibyte character: * * Bits Hex Min Hex Max Byte Sequence in Binary * 7 00000000 0000007f 0vvvvvvv * 11 00000080 000007FF 110vvvvv 10vvvvvv * 16 00000800 0000FFFF 1110vvvv 10vvvvvv 10vvvvvv * 21 00010000 001FFFFF 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv * 26 00200000 03FFFFFF 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv * 31 04000000 7FFFFFFF 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv * * The UCS value is just the concatenation of the v bits in the multibyte * encoding. When there are multiple ways to encode a value, for example * UCS 0, only the SSHORTest encoding is legal. * * Below are sample implementations of the C standard wctomb() and * mbtowc() functions which demonstrate the algorithms for converting * from UCS to the transformation format and converting from the * transformation format to UCS. The sample implementations include error * checks, some of which may not be necessary for conformance: * */ typedef struct { int cmask; int cval; int shift; SLONG lmask; SLONG lval; } Tab; static CONST Tab tab[] = { 0x80, 0x00, 0 * 6, 0x7F, 0, /* 1 byte sequence */ 0xE0, 0xC0, 1 * 6, 0x7FF, 0x80, /* 2 byte sequence */ 0xF0, 0xE0, 2 * 6, 0xFFFF, 0x800, /* 3 byte sequence */ 0xF8, 0xF0, 3 * 6, 0x1FFFFF, 0x10000, /* 4 byte sequence */ 0xFC, 0xF8, 4 * 6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */ 0xFE, 0xFC, 5 * 6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */ 0, /* end of table */ }; static fss_size_t fss_mbtowc(fss_wchar_t * p, UCHAR * s, fss_size_t n) { SLONG l; int c0, c, nc; Tab *t; if (s == 0) return 0; nc = 0; if (n <= nc) return -1; c0 = *s & 0xff; l = c0; for (t = const_cast < Tab * >(tab); t->cmask; t++) { nc++; if ((c0 & t->cmask) == t->cval) { l &= t->lmask; if (l < t->lval) return -1; *p = l; return nc; } if (n <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; if (c & 0xC0) return -1; l = (l << 6) | c; } return -1; } static fss_size_t fss_wctomb(UCHAR * s, fss_wchar_t wc) { SLONG l; int c, nc; Tab *t; if (s == 0) return 0; l = wc; nc = 0; for (t = const_cast < Tab * >(tab); t->cmask; t++) { nc++; if (l <= t->lmask) { c = t->shift; *s = t->cval | (l >> c); while (c > 0) { c -= 6; s++; *s = 0x80 | ((l >> c) & 0x3F); } return nc; } } return -1; } USHORT DLL_EXPORT INTL_fss_to_unicode(CSCONVERT obj, UNICODE * dest_ptr, USHORT dest_len, /* BYTE count */ NCHAR * src_ptr, USHORT src_len, SSHORT * err_code, USHORT * err_position) { UNICODE *start; USHORT src_start = src_len; fss_size_t res; assert(src_ptr != NULL || dest_ptr == NULL); assert(err_code != NULL); assert(err_position != NULL); assert(obj != NULL); *err_code = 0; /* See if we're only after a length estimate */ if (dest_ptr == NULL) return (src_len * 2); /* All single byte narrow characters */ start = dest_ptr; src_start = src_len; while ((src_len) && (dest_len >= sizeof(*dest_ptr))) { res = fss_mbtowc(dest_ptr, src_ptr, src_len); if (res == -1) { *err_code = CS_BAD_INPUT; break; } assert(res <= src_len); dest_ptr++; dest_len -= sizeof(*dest_ptr); src_ptr += res; src_len -= res; } if (src_len && !*err_code) { *err_code = CS_TRUNCATION_ERROR; } *err_position = src_start - src_len; return ((dest_ptr - start) * sizeof(*dest_ptr)); } USHORT DLL_EXPORT INTL_unicode_to_fss(CSCONVERT obj, MBCHAR * fss_str, USHORT fss_len, UNICODE * unicode_str, USHORT unicode_len, /* BYTE count */ SSHORT * err_code, USHORT * err_position) { UCHAR *start; USHORT src_start = unicode_len; UCHAR tmp_buffer[6]; UCHAR *p; fss_size_t res; assert(unicode_str != NULL || fss_str == NULL); assert(err_code != NULL); assert(err_position != NULL); assert(obj != NULL); assert(obj->csconvert_convert == (FPTR_SHORT) INTL_unicode_to_fss); *err_code = 0; /* See if we're only after a length estimate */ if (fss_str == NULL) return ((unicode_len + 1) / 2 * 3); /* worst case - all han character input */ start = fss_str; while ((fss_len) && (unicode_len >= sizeof(*unicode_str))) { /* Convert the wide character into temp buffer */ res = fss_wctomb(tmp_buffer, *unicode_str); if (res == -1) { *err_code = CS_BAD_INPUT; break; } /* will the mb sequence fit into space left? */ if (res > fss_len) { *err_code = CS_TRUNCATION_ERROR; break; } /* copy the converted bytes into the destination */ p = tmp_buffer; for (; res; res--, fss_len--) *fss_str++ = *p++; unicode_len -= sizeof(*unicode_str); unicode_str++; } if (unicode_len && !*err_code) { *err_code = CS_TRUNCATION_ERROR; } *err_position = src_start - unicode_len; return ((fss_str - start) * sizeof(*fss_str)); } WCHAR DLL_EXPORT INTL_getch(TDBB tdbb, TEXTTYPE * obj, SSHORT t_type, UCHAR ** ptr, USHORT * count) { /************************************** * * I N T L _ g e t c h * ************************************** * * Functional description * Get next character from a buffer. * **************************************/ SSHORT used; USHORT wc; SET_TDBB(tdbb); assert(obj); assert(ptr); if (*obj == NULL) { *obj = INTL_TEXTTYPE(t_type, (FPTR_VOID) ERR_post); assert(*obj); assert((*obj)->texttype_fn_mbtowc); } used = reinterpret_cast < short (*) (...) > (*(*obj)->texttype_fn_mbtowc) (*obj, &wc, *ptr, *count); if (used == -1) return 0; *ptr += used; *count -= used; return wc; } void DLL_EXPORT INTL_init(TDBB tdbb) { /************************************** * * I N T L _ i n i t * ************************************** * * Functional description * **************************************/ DBB dbb; VEC vector; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; CHECK_DBB(dbb); if (!(vector = dbb->dbb_text_objects)) { vector = dbb->dbb_text_objects = (VEC) ALLOCPV(type_vec, 25); vector->vec_count = 25; } if (!(vector = dbb->dbb_charsets)) { vector = dbb->dbb_charsets = (VEC) ALLOCPV(type_vec, 25); vector->vec_count = 25; } } USHORT DLL_EXPORT INTL_key_length(TDBB tdbb, USHORT idxType, USHORT iLength) { /************************************** * * I N T L _ k e y _ l e n g t h * ************************************** * * Functional description * Given an index type, and a maximum length (iLength) * return the length of the byte string key descriptor to * use when collating text of this type. * **************************************/ USHORT key_length; TEXTTYPE obj; SSHORT ttype; SET_TDBB(tdbb); assert(idxType >= idx_first_intl_string); ttype = INTL_INDEX_TO_TEXT(idxType); if (ttype >= 0 && ttype <= ttype_last_internal) key_length = iLength; else { obj = INTL_TEXTTYPE(ttype, (FPTR_VOID) ERR_post); key_length = reinterpret_cast < USHORT(*)(...) > (*obj->texttype_fn_key_length) (obj, iLength); } /* Validity checks on the computed key_length */ if (key_length > MAX_KEY) key_length = MAX_KEY; if (key_length < iLength) key_length = iLength; return (key_length); } void *DLL_EXPORT INTL_obj_lookup( TDBB tdbb, USHORT objtype, SSHORT parm1, FPTR_VOID err, STATUS * status) { /************************************** * * I N T L _ o b j _ l o o k u p * ************************************** * * Functional description * * Lookup either a character set descriptor or * texttype descriptor object. * * First, search the appropriate vector that hangs * off the dbb. If not found, then call the lower * level lookup routine to find it in the libraries. * * Returns: * *object - if no errors; * - if error & err non NULL * NULL - if error & err NULL * **************************************/ DBB dbb; VEC vector, *pVector; BLK cs_object; CHARSET cs; USHORT id; SET_TDBB(tdbb); dbb = tdbb->tdbb_database; assert((objtype == type_charset) || (objtype == type_texttype)); if (objtype == type_charset) { pVector = &dbb->dbb_charsets; id = TTYPE_TO_CHARSET(parm1); if (id == CS_dynamic) id = tdbb->tdbb_attachment->att_charset; } else { if (parm1 == ttype_dynamic) parm1 = MAP_CHARSET_TO_TTYPE(tdbb->tdbb_attachment->att_charset); cs = (CHARSET) INTL_obj_lookup(tdbb, type_charset, parm1, err, status); if (!cs) return NULL; pVector = &cs->charset_collations; id = TTYPE_TO_COLLATION(parm1); } assert(pVector != NULL); vector = *pVector; assert(vector != NULL); if (id >= vector->vec_count) vector = (VEC) ALL_extend(reinterpret_cast < BLK * >(pVector), id + 10); if (!(cs_object = vector->vec_object[id])) { cs_object = ALLOCP(objtype); vector->vec_object[id] = (BLK) cs_object; } assert(cs_object != NULL); if (objtype == type_charset) { if (((CHARSET) cs_object)->charset_flags & CHARSET_init) return (cs_object); if (obj_init (objtype, TTYPE_TO_CHARSET(parm1), 0, cs_object, err, (STATUS *) status)) return (NULL); ((CHARSET) cs_object)->charset_collations = (VEC) ALLOCPV(type_vec, 10); ((CHARSET) cs_object)->charset_collations->vec_count = 10; ((CHARSET) cs_object)->charset_flags |= CHARSET_init; return (cs_object); } assert(objtype == type_texttype); if (((TEXTTYPE) cs_object)->texttype_flags & TEXTTYPE_init) return (cs_object); if (obj_init(objtype, parm1, 0, cs_object, err, (STATUS *) status)) return (NULL); finish_texttype_init((TEXTTYPE) cs_object, err, status); if (((TEXTTYPE) cs_object)->texttype_flags & TEXTTYPE_init) return (cs_object); return NULL; } void DLL_EXPORT INTL_pad_spaces( TDBB tdbb, DSC * type, UCHAR * string, USHORT length) { /************************************** * * I N T L _ p a d _ s p a c e s * ************************************** * * Functional description * Pad a buffer with spaces, using the character * set's defined space character. * **************************************/ USHORT charset; SET_TDBB(tdbb); assert(type != NULL); assert(IS_TEXT(type)); assert(string != NULL); charset = INTL_charset(tdbb, type->dsc_ttype, NULL); pad_spaces(tdbb, charset, string, length); } USHORT DLL_EXPORT INTL_string_to_key( TDBB tdbb, USHORT idxType, DSC * pString, DSC * pByte, USHORT partial) { /************************************** * * I N T L _ s t r i n g _ t o _ k e y * ************************************** * * Functional description * Given an input string, convert it to a byte string * that will collate naturally (byte order). * * Return the length of the resulting byte string. * **************************************/ USHORT len, outlen; UCHAR *src, *dest; UCHAR buffer[MAX_KEY]; UCHAR pad_char; TEXTTYPE obj; SSHORT ttype; SET_TDBB(tdbb); assert(idxType >= idx_first_intl_string || idxType == idx_string || idxType == idx_byte_array || idxType == idx_metadata); assert(pString != NULL); assert(pByte != NULL); assert(pString->dsc_address != NULL); assert(pByte->dsc_address != NULL); assert(pByte->dsc_dtype == dtype_text); switch (idxType) { case idx_string: pad_char = ' '; ttype = ttype_none; break; case idx_byte_array: pad_char = 0; ttype = ttype_binary; break; case idx_metadata: pad_char = ' '; ttype = ttype_metadata; break; default: pad_char = 0; ttype = INTL_INDEX_TO_TEXT(idxType); break; } /* Make a string into the proper type of text */ len = CVT_make_string(pString, ttype, &src, reinterpret_cast < vary * >(buffer), sizeof(buffer), (FPTR_VOID) ERR_post); dest = pByte->dsc_address; switch (ttype) { case ttype_metadata: case ttype_binary: case ttype_ascii: case ttype_none: while (len--) *dest++ = *src++; /* strip off ending pad characters */ while (dest > pByte->dsc_address) if (*(dest - 1) == pad_char) dest--; else break; outlen = (dest - pByte->dsc_address); break; default: obj = INTL_TEXTTYPE(ttype, (FPTR_VOID) ERR_post); outlen = reinterpret_cast < USHORT(*)(...) > (*obj->texttype_fn_string_to_key) (obj, len, src, pByte->dsc_length, dest, partial); break; } return (outlen); } int DLL_EXPORT INTL_str_to_upper(TDBB tdbb, DSC * pString) { /************************************** * * I N T L _ s t r _ t o _ u p p e r * ************************************** * * Functional description * Given an input string, convert it to uppercase * **************************************/ USHORT len; UCHAR *src, *dest; UCHAR buffer[MAX_KEY]; USHORT ttype; TEXTTYPE obj; SET_TDBB(tdbb); assert(pString != NULL); assert(pString->dsc_address != NULL); len = CVT_get_string_ptr(pString, &ttype, &src, reinterpret_cast < vary * >(buffer), sizeof(buffer), (FPTR_VOID) ERR_post); switch (ttype) { case ttype_binary: /* cannot uppercase binary strings */ break; case ttype_none: case ttype_ascii: case ttype_unicode_fss: dest = src; while (len--) { *dest++ = UPPER7(*src); src++; } break; default: obj = INTL_TEXTTYPE(ttype, (FPTR_VOID) ERR_post); (void) reinterpret_cast < short (*) (...) > (*obj->texttype_fn_str_to_upper) (obj, len, src, len, src); break; } /* * Added to remove compiler errors. Callers are not checking * the return code from this function 4/5/95. */ return (0); } UCHAR DLL_EXPORT INTL_upper(TDBB tdbb, USHORT ttype, UCHAR ch) { /************************************** * * I N T L _ u p p e r * ************************************** * * Functional description * Given an input character, convert it to uppercase * **************************************/ TEXTTYPE obj; SET_TDBB(tdbb); switch (ttype) { case ttype_binary: /* cannot uppercase binary strings */ return (ch); case ttype_none: case ttype_ascii: case ttype_unicode_fss: return (UPPER7(ch)); default: obj = INTL_TEXTTYPE(ttype, (FPTR_VOID) ERR_post); return ((UCHAR) reinterpret_cast < USHORT(*)(...) > (*obj->texttype_fn_to_upper) (obj, ch)); } } static BOOLEAN all_spaces( TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len, USHORT offset) { /************************************** * * a l l _ s p a c e s * ************************************** * * Functional description * determine if the string at ptr[offset] ... ptr[len] is entirely * spaces, as per the space definition of (charset). * The binary representation of a Space is character-set dependent. * (0x20 for Ascii, 0x0020 for Unicode, 0x20 for SJIS, but must watch for * 0x??20, which is NOT a space. **************************************/ CHARSET obj; BYTE *p; BYTE *end, *space, *end_space; SET_TDBB(tdbb); assert(ptr != NULL); obj = INTL_CHARSETTYPE(charset, (FPTR_VOID) ERR_post); assert(obj != NULL); /* * We are assuming offset points to the first byte which was not * consumed in a conversion. And that offset is pointing * to a character boundary */ /* Single-octet character sets are optimized here */ if (obj->charset_space_length == 1) { p = &ptr[offset]; end = &ptr[len]; while (p < end) { if (*p++ != *obj->charset_space_character) return (FALSE); } return (TRUE); } else { p = &ptr[offset]; end = &ptr[len]; end_space = &obj->charset_space_character[obj->charset_space_length]; while (p < end) { space = obj->charset_space_character; while (p < end && space < end_space) { if (*p++ != *space++) return (FALSE); } } return (TRUE); } } static void common_convert_init( CSCONVERT csptr, USHORT to_cs, USHORT from_cs, FPTR_SHORT cvt_fn, BYTE * datatable, BYTE * datatable2) { /************************************** * * c o m m o n _ c o n v e r t _ i n i t * ************************************** * * Functional description * **************************************/ csptr->csconvert_version = 40; csptr->csconvert_name = (ASCII *) "DIRECT"; csptr->csconvert_from = from_cs; csptr->csconvert_to = to_cs; csptr->csconvert_convert = cvt_fn; csptr->csconvert_datatable = datatable; csptr->csconvert_misc = datatable2; } static void finish_texttype_init( TEXTTYPE txtobj, FPTR_VOID err, STATUS * status) { /************************************** * * f i n i s h _ t e x t t y p e _ i n i t * ************************************** * * Functional description * * Finish initializing a text object with pointers to * internal routines. * This is also a handy place to check the licensing bits * for the text object. * * Returns: * The TEXTTYPE_init bit in texttype_flags is set if the * object is sucessfully initialized. * **************************************/ if ((txtobj->texttype_fn_to_wc == NULL) && (txtobj->texttype_bytes_per_char == 1)) { /* Finish initialization of a narrow character object */ txtobj->texttype_fn_to_wc = (FPTR_SHORT) nc_to_wc; txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_nc_contains; txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_nc_matches; txtobj->texttype_fn_like = (FPTR_SHORT) EVL_nc_like; txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_nc_sleuth_merge; txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_nc_sleuth_check; if (!txtobj->texttype_fn_mbtowc) txtobj->texttype_fn_mbtowc = (FPTR_short) internal_nc_mbtowc; } else if ((txtobj->texttype_fn_to_wc == NULL) && (txtobj->texttype_bytes_per_char == 2)) { /* Finish initialization of a wide character object */ txtobj->texttype_fn_to_wc = (FPTR_SHORT) wc_to_wc; txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_wc_contains; txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_wc_matches; txtobj->texttype_fn_like = (FPTR_SHORT) EVL_wc_like; txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_wc_sleuth_merge; txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_wc_sleuth_check; if (!txtobj->texttype_fn_mbtowc) txtobj->texttype_fn_mbtowc = (FPTR_short) internal_wc_mbtowc; } else if (txtobj->texttype_fn_to_wc != NULL) { /* Finish initialization of a multibyte character object */ txtobj->texttype_fn_contains = (FPTR_SHORT) EVL_mb_contains; txtobj->texttype_fn_matches = (FPTR_SHORT) EVL_mb_matches; txtobj->texttype_fn_like = (FPTR_SHORT) EVL_mb_like; txtobj->texttype_fn_sleuth_merge = (FPTR_SHORT) EVL_mb_sleuth_merge; txtobj->texttype_fn_sleuth_check = (FPTR_SHORT) EVL_mb_sleuth_check; if (!txtobj->texttype_fn_mbtowc) txtobj->texttype_fn_mbtowc = (FPTR_short) internal_mb_mbtowc; } else assert(0); txtobj->texttype_flags |= TEXTTYPE_init; } static USHORT internal_ch_copy(TEXTTYPE obj, UCHAR ch) { /************************************** * * i n t e r n a l _ c h _ c o p y * ************************************** * * Functional description * **************************************/ return (ch); } static USHORT internal_ch_to_upper(TEXTTYPE obj, UCHAR ch) { /************************************** * * i n t e r n a l _ c h _ t o _ u p p e r * ************************************** * * Functional description * **************************************/ return (UPPER7(ch)); } static USHORT internal_ch_to_lower(TEXTTYPE obj, UCHAR ch) { /************************************** * * i n t e r n a l _ c h _ t o _ l o w e r * ************************************** * * Functional description * **************************************/ return ((((ch) >= 'A') && ((ch) < 'Z')) ? ((ch) - 'A' + 'a') : (ch)); } static SSHORT internal_compare( TEXTTYPE obj, USHORT length1, UCHAR * p1, USHORT length2, UCHAR * p2) { /************************************** * * i n t e r n a l _ c o m p a r e * ************************************** * * Functional description * **************************************/ SSHORT fill; UCHAR pad; pad = (obj->texttype_type == ttype_binary) ? 0 : ' '; fill = length1 - length2; if (length1 >= length2) { if (length2) do if (*p1++ != *p2++) if (p1[-1] > p2[-1]) return 1; else return -1; while (--length2); if (fill > 0) do if (*p1++ != pad) if (p1[-1] > pad) return 1; else return -1; while (--fill); return 0; } if (length1) do if (*p1++ != *p2++) if (p1[-1] > p2[-1]) return 1; else return -1; while (--length1); do if (*p2++ != pad) if (pad > p2[-1]) return 1; else return -1; while (++fill); return 0; } static USHORT internal_keylength(TEXTTYPE obj, USHORT iLength) { /************************************** * * i n t e r n a l _ k e y l e n g t h * ************************************** * * Functional description * **************************************/ return (iLength); } static SSHORT internal_nc_mbtowc( TEXTTYPE obj, WCHAR * wc, UCHAR * ptr, USHORT count) { /************************************** * * i n t e r n a l _ n c _ m b t o w c * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Narrow character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ assert(obj); assert(ptr); if (count >= 1) { if (wc) *wc = *ptr; return 1; } if (wc) *wc = 0; return -1; /* No more characters */ } static SSHORT internal_mb_mbtowc( TEXTTYPE obj, WCHAR * wc, UCHAR * ptr, USHORT count) { /************************************** * * i n t e r n a l _ m b _ m b t o w c * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Multibyte version character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ assert(obj); assert(ptr); if (count >= 2) { if (wc) *wc = *(WCHAR *) ptr; return 2; } if (wc) *wc = 0; return -1; /* No more characters */ } static SSHORT internal_wc_mbtowc( TEXTTYPE obj, WCHAR * wc, UCHAR * ptr, USHORT count) { /************************************** * * i n t e r n a l _ w c _ m b t o w c * ************************************** * * Functional description * Get the next character from the multibyte * input stream. * Wide character version. * Returns: * Count of bytes consumed from the input stream. * **************************************/ assert(obj); assert(ptr); if (count >= 2) { if (wc) *wc = *(WCHAR *) ptr; return 2; } if (wc) *wc = 0; return -1; /* No more characters */ } static SSHORT internal_str_copy( TEXTTYPE obj, USHORT inLen, UCHAR * src, USHORT outLen, UCHAR * dest) { /************************************** * * i n t e r n a l _ s t r _ c o p y * ************************************** * * Functional description * Note: dest may equal src. * **************************************/ UCHAR *pStart; pStart = dest; while (inLen-- && outLen--) { *dest++ = *src++; } return (dest - pStart); } static USHORT cvt_utffss_to_ascii(CSCONVERT obj, UCHAR * pDest, USHORT nDest, /* byte count */ UCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * c v t _ u t f f s s _ t o _ a s c i i * also * c v t _ a s c i i _ t o _ u t f f s s * also * c v t _ n o n e _ t o _ u t f f s s * ************************************** * * Functional description * Perform a pass-through transformation of ASCII to Unicode * in FSS format. Note that any byte values greater than 127 * cannot be converted in either direction, so the same * routine does double duty. * *************************************/ UCHAR *pStart; UCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc); pStart = pDest; pStart_src = pSrc; while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) { if (*pSrc > 127) { /* In the cvt_ascii_to_utffss case this should be CS_BAD_INPUT */ /* but not in cvt_none_to_utffss or cvt_utffss_to_ascii */ *err_code = CS_CONVERT_ERROR; break; } *pDest++ = *pSrc++; nDest -= sizeof(*pDest); nSrc -= sizeof(*pSrc); } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT cvt_none_to_unicode(CSCONVERT obj, WCHAR * pDest, USHORT nDest, /* byte count */ UCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * c v t _ n o n e _ t o _ u n i c o d e * ************************************** * * Functional description * Convert CHARACTER SET NONE to UNICODE (wide char). * Byte values below 128 treated as ASCII. * Byte values >= 128 create CONVERT ERROR * *************************************/ WCHAR *pStart; UCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (2 * nSrc); pStart = pDest; pStart_src = pSrc; while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) { if (*pSrc > 127) { *err_code = CS_CONVERT_ERROR; break; } *pDest++ = *pSrc++; nDest -= sizeof(*pDest); nSrc -= sizeof(*pSrc); } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT cvt_ascii_to_unicode(CSCONVERT obj, WCHAR * pDest, USHORT nDest, /* byte count */ UCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * c v t _ a s c i i _ t o _ u n i c o d e * ************************************** * * Functional description * Convert CHARACTER SET NONE to UNICODE (wide char). * Byte values below 128 treated as ASCII. * Byte values >= 128 create BAD_INPUT * *************************************/ WCHAR *pStart; UCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (2 * nSrc); pStart = pDest; pStart_src = pSrc; while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) { if (*pSrc > 127) { *err_code = CS_BAD_INPUT; break; } *pDest++ = *pSrc++; nDest -= sizeof(*pDest); nSrc -= sizeof(*pSrc); } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT cvt_unicode_to_ascii(CSCONVERT obj, NCHAR * pDest, USHORT nDest, /* byte count */ WCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * c v t _ u n i c o d e _ t o _ a s c i i * ************************************** * * Functional description * Convert UNICODE to CHARACTER SET ASCII (wide char). * Byte values below 128 treated as ASCII. * Byte values >= 128 create CONVERT_ERROR * *************************************/ NCHAR *pStart; WCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc / 2); pStart = pDest; pStart_src = pSrc; while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) { if (*pSrc > 127) { *err_code = CS_CONVERT_ERROR; break; } *pDest++ = *pSrc++; nDest -= sizeof(*pDest); nSrc -= sizeof(*pSrc); } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static SSHORT internal_str_to_upper( TEXTTYPE obj, USHORT inLen, UCHAR * src, USHORT outLen, UCHAR * dest) { /************************************** * * i n t e r n a l _ s t r _ t o _ u p p e r * ************************************** * * Functional description * Note: dest may equal src. * **************************************/ UCHAR *pStart; pStart = dest; while (inLen-- && outLen--) { *dest++ = UPPER7(*src); src++; } return (dest - pStart); } static USHORT internal_string_to_key( TEXTTYPE obj, USHORT inLen, UCHAR * src, USHORT outLen, UCHAR * dest, USHORT partial) { /************************************** * * i n t e r n a l _ s t r i n g _ t o _ k e y * ************************************** * * Functional description * **************************************/ UCHAR *pStart; UCHAR pad_char; pStart = dest; pad_char = *obj->texttype_collation_table; while (inLen-- && outLen--) *dest++ = *src++; /* strip off ending pad characters */ while (dest > pStart) if (*(dest - 1) == pad_char) dest--; else break; return (dest - pStart); } static BOOLEAN obj_init( USHORT objtype, SSHORT parm1, SSHORT parm2, void *text_object, FPTR_VOID err, STATUS * status) { /************************************** * * o b j _ i n i t * ************************************** * * Functional description * Find the module that implements a given text type, and initialize * the text type. * Search algorithm is: * Check list of internal implementations. * Look for general entrypoint in intllib * Look for general entrypoint in intllib2 * Look for a normal UDF entry * Abort with an error. * * Returns: * FALSE - no errors * TRUE - error occurred, and parameter was NULL; * - error occurred, and parameter non-NULL; * * **************************************/ FILECHAR path[MAX_PATH_LENGTH]; ASCII entry[32]; USHORT(*lookup_fn) (USHORT, FPTR_SHORT *, SSHORT, SSHORT); FPTR_SHORT function; INTL_TRACE(("INTL: looking for obj %d ttype %d\n", objtype, parm1)); function = NULL; switch (objtype) { case type_texttype: if (parm1 == ttype_none) function = (FPTR_SHORT) ttype_none_init; else if (parm1 == ttype_ascii) function = (FPTR_SHORT) ttype_ascii_init; else if (parm1 == ttype_unicode_fss) function = (FPTR_SHORT) ttype_unicode_fss_init; else if (parm1 == ttype_binary) function = (FPTR_SHORT) ttype_binary_init; break; case type_charset: if (parm1 == CS_NONE) function = (FPTR_SHORT) cs_none_init; else if (parm1 == CS_ASCII) function = (FPTR_SHORT) cs_ascii_init; else if (parm1 == CS_UNICODE_FSS) function = (FPTR_SHORT) cs_unicode_fss_init; else if (parm1 == CS_UNICODE101) function = (FPTR_SHORT) cs_unicode_init; else if (parm1 == CS_BINARY) function = (FPTR_SHORT) cs_binary_init; break; case type_csconvert: if (((parm1 == CS_ASCII) && (parm2 == CS_UNICODE_FSS)) || ((parm2 == CS_ASCII) && (parm1 == CS_UNICODE_FSS))) function = (FPTR_SHORT) cvt_ascii_utf_init; /* converting FROM NONE to UNICODE has a short cut * - it's treated like ASCII */ else if ((parm2 == CS_NONE) && (parm1 == CS_UNICODE_FSS)) function = (FPTR_SHORT) cvt_ascii_utf_init; #ifdef DEV_BUILD /* Converting TO character set NONE should have been handled at * a higher level */ assert(parm1 != CS_NONE); #endif break; default: BUGCHECK(1); break; } if (function == NULL) { #ifdef INTL_BACKEND if (LD_lookup(objtype, &function, parm1, parm2) != 0) function = NULL; #else /* Look for an InterBase supplied object to implement the text type */ /* The flu.c uses searchpath which expects a file name not a path */ strcpy(reinterpret_cast < char *>(path), INTL_MODULE1); INTL_TRACE(("INTL: trying %s %s\n", path, INTL_LOOKUP_ENTRY1)); if (lookup_fn = reinterpret_cast (ISC_lookup_entrypoint(reinterpret_cast < char *>(path), INTL_LOOKUP_ENTRY1, NULL))) { INTL_TRACE(("INTL: calling lookup %s %s\n", path, INTL_LOOKUP_ENTRY1)); if ((*lookup_fn) (objtype, &function, parm1, parm2) != 0) { function = NULL; } } #endif } /* Still not found, check the set of supplimental international objects */ if (function == NULL) { #ifdef INTL_BACKEND if (LD2_lookup(objtype, &function, parm1, parm2) != 0) function = NULL; #else gds__prefix(reinterpret_cast < char *>(path), INTL_MODULE2); INTL_TRACE(("INTL: trying %s %s\n", path, INTL_LOOKUP_ENTRY2)); if (lookup_fn = reinterpret_cast < USHORT(*)(USHORT, USHORT(**)(), short, short) >(ISC_lookup_entrypoint (reinterpret_cast < char *>(path), INTL_LOOKUP_ENTRY2, NULL))) { INTL_TRACE( ("INTL: calling lookup %s %s\n", path, INTL_LOOKUP_ENTRY2)); if ((*lookup_fn) (objtype, &function, parm1, parm2) != 0) { function = NULL; } } #endif } /* Still not found, check if there is a UDF in the database defined the right way */ if (function == NULL) { FUN function_block; USHORT argcount; switch (objtype) { case type_texttype: sprintf((SCHAR *) entry, INTL_USER_ENTRY, parm1); argcount = 2; break; case type_charset: sprintf((SCHAR *) entry, "USER_CHARSET_%03d", parm1); argcount = 2; break; case type_csconvert: sprintf((SCHAR *) entry, "USER_TRANSLATE_%03d_%03d", parm1, parm2); argcount = 3; break; default: BUGCHECK(1); break; } INTL_TRACE(("INTL: trying user fn %s\n", entry)); if (function_block = FUN_lookup_function(entry)) { INTL_TRACE(("INTL: found a user fn, validating\n")); if ((function_block->fun_count == argcount) && (function_block->fun_args == argcount) && (function_block->fun_return_arg == 0) && (function_block->fun_entrypoint != NULL) && (function_block->fun_rpt[0].fun_mechanism == FUN_value) && (function_block->fun_rpt[0].fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[1].fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[argcount - 1]. fun_desc.dsc_dtype == dtype_short) && (function_block->fun_rpt[argcount].fun_mechanism == FUN_reference) && (function_block->fun_rpt[argcount].fun_desc.dsc_dtype == dtype_text)) function = (FPTR_SHORT) function_block->fun_entrypoint; } } /* If we found an entry point, call it, if it returns OK flag the * text object as initialized * Otherwise, report error condition "Can't resolve text type" */ if (function != NULL) { if ((USHORT) reinterpret_cast < USHORT(*)(...) > (*function) (text_object, parm1, parm2) == 0) { INTL_TRACE( ("INTL: object %d, ttype %d init ok\n", objtype, parm1)); return FALSE; } } if (err != NULL) reinterpret_cast < void (*) (...) > (*err) (gds_text_subtype, gds_arg_number, (SLONG) parm1, 0); else if (status) IBERR_build_status(status, gds_text_subtype, gds_arg_number, (SLONG) parm1, 0); return TRUE; } static USHORT nc_to_wc(CSCONVERT obj, WCHAR * pWide, USHORT nWide, /* byte count */ UCHAR * pNarrow, USHORT nNarrow, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * n c _ t o _ w c * ************************************** * * Functional description * **************************************/ WCHAR *pStart; UCHAR *pNarrowStart; assert(obj != NULL); assert((pNarrow != NULL) || (pWide == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pWide == NULL) return (2 * nNarrow); /* all cases */ pStart = pWide; pNarrowStart = pNarrow; while (nWide-- > 1 && nNarrow) { /* YYY - Byte order issues here */ *pWide++ = (WCHAR) * pNarrow++; nWide--; nNarrow--; } if (!*err_code && nNarrow) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pNarrow - pNarrowStart) * sizeof(*pNarrow); return ((pWide - pStart) * sizeof(*pWide)); } static void pad_spaces(TDBB tdbb, CHARSET_ID charset, BYTE * ptr, USHORT len) { /* byte count */ /************************************** * * p a d _ s p a c e s * ************************************** * * Functional description * Pad a buffer with the character set defined space character. * **************************************/ CHARSET obj; BYTE *end, *space, *end_space; SET_TDBB(tdbb); assert(ptr != NULL); obj = INTL_CHARSETTYPE(charset, (FPTR_VOID) ERR_post); assert(obj != NULL); /* Single-octet character sets are optimized here */ if (obj->charset_space_length == 1) { end = &ptr[len]; while (ptr < end) *ptr++ = *obj->charset_space_character; } else { end = &ptr[len]; end_space = &obj->charset_space_character[obj->charset_space_length]; while (ptr < end) { space = obj->charset_space_character; while (ptr < end && space < end_space) { *ptr++ = *space++; } /* This assert is checking that we didn't have a buffer-end * in the middle of a space character */ assert(!(ptr == end) || (space == end_space)); } } } static USHORT wc_to_nc(CSCONVERT obj, NCHAR * pDest, USHORT nDest, /* byte count */ WCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * w c _ t o _ n c * ************************************** * * Functional description * **************************************/ NCHAR *pStart; WCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return ((nSrc + 1) / 2); pStart = pDest; pStart_src = pSrc; while (nDest && nSrc >= sizeof(*pSrc)) { if (*pSrc >= 256) { *err_code = CS_CONVERT_ERROR; break; } *pDest++ = *pSrc++; nDest -= sizeof(*pDest); nSrc -= sizeof(*pSrc); } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT mb_to_wc(CSCONVERT obj, WCHAR * pDest, USHORT nDest, /* byte count */ MBCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * m b _ t o _ w c * ************************************** * * Functional description * Convert a wc string from network form - high-endian * byte stream. * *************************************/ WCHAR *pStart; MBCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc); pStart = pDest; pStart_src = pSrc; while (nDest > 1 && nSrc > 1) { *pDest++ = *pSrc * 256 + *(pSrc + 1); pSrc += 2; nDest -= 2; nSrc -= 2; } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT wc_to_mb(CSCONVERT obj, MBCHAR * pDest, USHORT nDest, /* byte count */ WCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * w c _ t o _ m b * ************************************** * * Functional description * Convert a wc string to network form - high-endian * byte stream. * *************************************/ MBCHAR *pStart; WCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc); pStart = pDest; pStart_src = pSrc; while (nDest > 1 && nSrc > 1) { *pDest++ = *pSrc / 256; *pDest++ = *pSrc++ % 256; nDest -= 2; nSrc -= 2; } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } static USHORT wc_to_wc(CSCONVERT obj, WCHAR * pDest, USHORT nDest, /* byte count */ WCHAR * pSrc, USHORT nSrc, /* byte count */ SSHORT * err_code, USHORT * err_position) { /************************************** * * w c _ t o _ w c * ************************************** * * Functional description * *************************************/ WCHAR *pStart; WCHAR *pStart_src; assert(obj != NULL); assert((pSrc != NULL) || (pDest == NULL)); assert(err_code != NULL); assert(err_position != NULL); *err_code = 0; if (pDest == NULL) /* length estimate needed? */ return (nSrc); pStart = pDest; pStart_src = pSrc; while (nDest > 1 && nSrc > 1) { *pDest++ = *pSrc++; nDest -= 2; nSrc -= 2; } if (!*err_code && nSrc) { *err_code = CS_TRUNCATION_ERROR; } *err_position = (pSrc - pStart_src) * sizeof(*pSrc); return ((pDest - pStart) * sizeof(*pDest)); } #define TEXTTYPE_RETURN return (0) #define FAMILY_INTERNAL(id_number, name, charset, country) \ cache->texttype_version = 40; \ cache->texttype_type = (id_number); \ cache->texttype_character_set = (charset); \ cache->texttype_country = (country); \ cache->texttype_bytes_per_char = 1; \ cache->texttype_fn_init = (FPTR_SHORT) (name); \ cache->texttype_fn_key_length = (FPTR_SHORT) internal_keylength; \ cache->texttype_fn_string_to_key = (FPTR_SHORT) internal_string_to_key; \ cache->texttype_fn_compare = (FPTR_short) internal_compare; \ cache->texttype_fn_to_upper = (FPTR_SHORT) internal_ch_to_upper; \ cache->texttype_fn_to_lower = (FPTR_SHORT) internal_ch_to_lower; \ cache->texttype_fn_str_to_upper = (FPTR_short) internal_str_to_upper; \ cache->texttype_fn_mbtowc = (FPTR_short) internal_nc_mbtowc; \ cache->texttype_collation_table = (BYTE *) " "; \ cache->texttype_toupper_table = (BYTE *) NULL; \ cache->texttype_tolower_table = (BYTE *) NULL; \ cache->texttype_compress_table = (BYTE *) NULL; \ cache->texttype_expand_table = (BYTE *) NULL; \ cache->texttype_name = const_cast(POSIX); \ static USHORT ttype_ascii_init(TEXTTYPE cache, USHORT parm1, USHORT dummy) { /************************************** * * t t y p e _ a s c i i _ i n i t * ************************************** * * Functional description * *************************************/ static CONST ASCII POSIX[] = "C.ASCII"; FAMILY_INTERNAL(ttype_ascii, ttype_ascii_init, CS_ASCII, CC_C); TEXTTYPE_RETURN; } static USHORT ttype_none_init(TEXTTYPE cache, USHORT parm1, USHORT dummy) { /************************************** * * t t y p e _ n o n e _ i n i t * ************************************** * * Functional description * *************************************/ static CONST ASCII POSIX[] = "C"; FAMILY_INTERNAL(ttype_none, ttype_none_init, CS_NONE, CC_C); TEXTTYPE_RETURN; } static USHORT ttype_unicode_fss_init( TEXTTYPE cache, USHORT parm1, USHORT dummy) { /************************************** * * t t y p e _ u n i c o d e _ f s s _ i n i t * ************************************** * * Functional description * *************************************/ static CONST ASCII POSIX[] = "C.UNICODE_FSS"; FAMILY_INTERNAL(ttype_unicode_fss, ttype_unicode_fss_init, CS_UNICODE_FSS, CC_C); cache->texttype_bytes_per_char = 3; cache->texttype_fn_to_wc = (FPTR_SHORT) INTL_fss_to_unicode; cache->texttype_fn_mbtowc = (FPTR_short) INTL_fss_mbtowc; TEXTTYPE_RETURN; } static USHORT ttype_binary_init(TEXTTYPE cache, USHORT parm1, USHORT dummy) { /************************************** * * t t y p e _ b i n a r y _ i n i t * ************************************** * * Functional description * *************************************/ static CONST ASCII POSIX[] = "C.OCTETS"; FAMILY_INTERNAL(ttype_binary, ttype_binary_init, CS_BINARY, CC_C); cache->texttype_fn_to_upper = (FPTR_SHORT) internal_ch_copy; cache->texttype_fn_to_lower = (FPTR_SHORT) internal_ch_copy; cache->texttype_fn_str_to_upper = (FPTR_short) internal_str_copy; cache->texttype_collation_table = (BYTE *) "\0"; /* pad character */ TEXTTYPE_RETURN; } /* * Start of Character set definitions */ #define CHARSET_RETURN return (0) static void common_8bit_init( CHARSET csptr, USHORT id, ASCII * name, BYTE * to_unicode_tbl, BYTE * from_unicode_tbl1, BYTE * from_unicode_tbl2) { /************************************** * * c o m m o n _ 8 b i t _ i n i t * ************************************** * * Functional description * *************************************/ csptr->charset_version = 40; csptr->charset_id = id; csptr->charset_name = name; csptr->charset_flags = 0; csptr->charset_min_bytes_per_char = 1; csptr->charset_max_bytes_per_char = 1; csptr->charset_space_length = 1; csptr->charset_space_character = (BYTE *) " "; csptr->charset_well_formed = (FPTR_SHORT) NULL; } static USHORT cs_ascii_init(CHARSET csptr, USHORT cs_id, USHORT dummy) { /************************************** * * c s _ a s c i i _ i n i t * ************************************** * * Functional description * *************************************/ common_8bit_init(csptr, CS_ASCII, (ASCII *) "ASCII", NULL, NULL, NULL); common_convert_init(&csptr->charset_to_unicode, CS_UNICODE101, CS_ASCII, (FPTR_SHORT) cvt_ascii_to_unicode, NULL, NULL); common_convert_init(&csptr->charset_from_unicode, CS_ASCII, CS_UNICODE101, (FPTR_SHORT) cvt_unicode_to_ascii, NULL, NULL); CHARSET_RETURN; } static USHORT cs_none_init(CHARSET csptr, USHORT cs_id, USHORT dummy) { /************************************** * * c s _ n o n e _ i n i t * ************************************** * * Functional description * *************************************/ common_8bit_init(csptr, CS_NONE, (ASCII *) "NONE", NULL, NULL, NULL); /* common_convert_init (&csptr->charset_to_unicode, CS_UNICODE101, id, nc_to_wc, to_unicode_tbl, NULL); */ common_convert_init(&csptr->charset_to_unicode, CS_UNICODE101, CS_NONE, (FPTR_SHORT) cvt_none_to_unicode, NULL, NULL); common_convert_init(&csptr->charset_from_unicode, CS_NONE, CS_UNICODE101, (FPTR_SHORT) wc_to_nc, NULL, NULL); CHARSET_RETURN; } static USHORT cs_unicode_fss_init(CHARSET csptr, USHORT cs_id, USHORT dummy) { /************************************** * * c s _ u n i c o d e _ f s s _ i n i t * ************************************** * * Functional description * *************************************/ common_8bit_init(csptr, CS_UNICODE_FSS, (ASCII *) "UNICODE_FSS", NULL, NULL, NULL); common_convert_init(&csptr->charset_to_unicode, CS_UNICODE101, CS_UNICODE_FSS, (FPTR_SHORT) INTL_fss_to_unicode, NULL, NULL); common_convert_init(&csptr->charset_from_unicode, CS_UNICODE_FSS, CS_UNICODE101, (FPTR_SHORT) INTL_unicode_to_fss, NULL, NULL); CHARSET_RETURN; } static USHORT cs_unicode_init(CHARSET csptr, USHORT cs_id, USHORT dummy) { /************************************** * * c s _ u n i c o d e _ i n i t * ************************************** * * Functional description * *************************************/ static CONST WCHAR space = 0x0020; csptr->charset_version = 40; csptr->charset_id = CS_UNICODE101; csptr->charset_name = ( /*CONST*/ ASCII *) "UNICODE101"; csptr->charset_flags = 0; csptr->charset_min_bytes_per_char = 2; csptr->charset_max_bytes_per_char = 2; csptr->charset_space_length = 2; csptr->charset_space_character = (BYTE *) & space; /* 0x0020 */ csptr->charset_well_formed = (FPTR_SHORT) NULL; CHARSET_RETURN; } static USHORT cs_binary_init(CHARSET csptr, USHORT cs_id, USHORT dummy) { /************************************** * * c s _ b i n a r y _ i n i t * ************************************** * * Functional description * *************************************/ common_8bit_init(csptr, CS_BINARY, (ASCII *) "BINARY", NULL, NULL, NULL); csptr->charset_space_character = (BYTE *) "\0"; common_convert_init(&csptr->charset_to_unicode, CS_UNICODE101, CS_BINARY, (FPTR_SHORT) mb_to_wc, NULL, NULL); common_convert_init(&csptr->charset_from_unicode, CS_BINARY, CS_UNICODE101, (FPTR_SHORT) wc_to_mb, NULL, NULL); CHARSET_RETURN; } /* * Start of Conversion entries */ #define CONVERT_RETURN return (0) static USHORT cvt_ascii_utf_init( CSCONVERT csptr, USHORT dest_cs, USHORT source_cs) { /************************************** * * c v t _ a s c i i _ u t f _ i n i t * ************************************** * * Functional description * *************************************/ common_convert_init(csptr, dest_cs, source_cs, (FPTR_SHORT) cvt_utffss_to_ascii, NULL, NULL); CONVERT_RETURN; } #ifdef DEV_BUILD /* * Utility routines designed to be called from the debugger to * print buffers, pointers, etc. which may contain text that * the debugger doesn't consider visible. */ static void dump_hex(UCHAR * p, USHORT len) { /************************************** * * d u m p _ h e x * ************************************** * * Functional description * *************************************/ while (len--) ib_printf("%02X ", *p++); ib_printf("\n"); } static void dump_latin(UCHAR * p, USHORT len) { /************************************** * * d u m p _ l a t i n * ************************************** * * Functional description * *************************************/ while (len--) if (isprintable(*p)) ib_printf("%c", *p++); else ib_printf("\0x%02X", *p++); ib_printf("\n"); } #endif } // extern "C"