mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-26 08:03:03 +01:00
1607 lines
44 KiB
C++
1607 lines
44 KiB
C++
#include "intl_classes.h"
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#include "../common/classes/alloc.h"
|
|
#include "../jrd/intl.h"
|
|
#include "../intl/country_codes.h"
|
|
#include "../jrd/gdsassert.h"
|
|
|
|
|
|
// types unsed internal to this file
|
|
typedef USHORT fss_wchar_t;
|
|
typedef int fss_size_t;
|
|
typedef unsigned char FILECHAR;
|
|
|
|
// internal functions
|
|
static fss_size_t fss_wctomb(UCHAR *, fss_wchar_t);
|
|
static fss_size_t fss_mbtowc(fss_wchar_t *, UCHAR *, fss_size_t);
|
|
static unsigned short cvt_utffss_to_ascii(UCHAR *, USHORT, UCHAR *, USHORT,
|
|
short*, USHORT *);
|
|
static USHORT internal_string_to_key(USHORT, UCHAR *, USHORT,
|
|
UCHAR *, USHORT, UCHAR);
|
|
static SSHORT internal_compare(USHORT, UCHAR *, USHORT, UCHAR *, UCHAR);
|
|
static SSHORT internal_str_to_upper(USHORT, UCHAR *, USHORT,
|
|
UCHAR *);
|
|
static unsigned short internal_unicode_to_fss(
|
|
unsigned char *,
|
|
unsigned short,
|
|
unsigned char *,
|
|
unsigned short ,
|
|
short *,
|
|
unsigned short *);
|
|
static USHORT internal_fss_to_unicode (
|
|
UCS2_CHAR*,
|
|
USHORT,
|
|
NCHAR*,
|
|
USHORT,
|
|
SSHORT*,
|
|
USHORT*);
|
|
/********************************************************************************/
|
|
// This module contains the code to handle the default international character
|
|
// sets. As such it defines its own subclasses of the international interface.
|
|
// The classes are defined in this section and implemented twords the end of the
|
|
// file.
|
|
|
|
|
|
class TextType_Binary : public TextTypeNC
|
|
{
|
|
public:
|
|
static TextType *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) TextType_Binary; }
|
|
TextType_Binary() : TextTypeNC(ttype_binary, "C.OCTETS", CS_BINARY, CC_C, 1)
|
|
{}
|
|
|
|
unsigned short key_length(unsigned short len)
|
|
{ return len; }
|
|
unsigned short string_to_key(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{ return internal_string_to_key(a,b,c,d,e,'\0'); }
|
|
|
|
short compare(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_compare(a,b,c,d,'\0'); }
|
|
|
|
unsigned short to_upper(unsigned short ch)
|
|
{ return (ch); }
|
|
|
|
unsigned short to_lower(unsigned short ch)
|
|
{ return (ch); }
|
|
|
|
short str_to_upper(unsigned short,
|
|
unsigned char *,
|
|
unsigned short,
|
|
unsigned char *);
|
|
};
|
|
|
|
class TextType_UFSS : public TextTypeMB
|
|
{
|
|
public:
|
|
static TextType *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) TextType_UFSS; }
|
|
TextType_UFSS() : TextTypeMB(ttype_unicode_fss, "C.UNICODE_FSS", CS_UNICODE_FSS, CC_C, 3)
|
|
{}
|
|
|
|
unsigned short key_length(unsigned short len)
|
|
{ return len; }
|
|
unsigned short string_to_key(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{ return internal_string_to_key(a,b,c,d,e,' '); }
|
|
|
|
short compare(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_compare(a,b,c,d,' '); }
|
|
|
|
unsigned short to_upper(unsigned short ch)
|
|
{ return (UPPER7(ch)); }
|
|
|
|
unsigned short to_lower(unsigned short ch)
|
|
{ return ((((ch) >= 'A') && ((ch) < 'Z')) ? ((ch) - 'A' + 'a') : (ch)); }
|
|
|
|
short str_to_upper(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_str_to_upper(a,b,c,d); }
|
|
|
|
unsigned short to_wc(UCS2_CHAR *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{ return internal_fss_to_unicode(a,b,c,d,e,f); }
|
|
|
|
unsigned short mbtowc(UCS2_CHAR *wc, unsigned char *p, unsigned short n)
|
|
{ return fss_mbtowc(wc, p, n); }
|
|
};
|
|
|
|
class TextType_ASCII : public TextTypeNC
|
|
{
|
|
public:
|
|
static TextType *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) TextType_ASCII; }
|
|
TextType_ASCII() : TextTypeNC(ttype_ascii, "C.ASCII", CS_ASCII, CC_C, 1)
|
|
{}
|
|
|
|
unsigned short key_length(unsigned short len)
|
|
{ return len; }
|
|
unsigned short string_to_key(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{ return internal_string_to_key(a,b,c,d,e,' '); }
|
|
|
|
short compare(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_compare(a,b,c,d,' '); }
|
|
|
|
unsigned short to_upper(unsigned short ch)
|
|
{ return (UPPER7(ch)); }
|
|
|
|
unsigned short to_lower(unsigned short ch)
|
|
{ return ((((ch) >= 'A') && ((ch) < 'Z')) ? ((ch) - 'A' + 'a') : (ch)); }
|
|
|
|
short str_to_upper(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_str_to_upper(a,b,c,d); }
|
|
};
|
|
|
|
class TextType_None : public TextTypeNC
|
|
{
|
|
public:
|
|
static TextType *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) TextType_None; }
|
|
TextType_None() : TextTypeNC(ttype_none, "C", CS_NONE, CC_C, 1)
|
|
{}
|
|
|
|
unsigned short key_length(unsigned short len)
|
|
{ return len; }
|
|
unsigned short string_to_key(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d,
|
|
unsigned short e)
|
|
{ return internal_string_to_key(a,b,c,d,e,' '); }
|
|
|
|
short compare(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_compare(a,b,c,d,' '); }
|
|
|
|
unsigned short to_upper(unsigned short ch)
|
|
{ return (UPPER7(ch)); }
|
|
|
|
unsigned short to_lower(unsigned short ch)
|
|
{ return ((((ch) >= 'A') && ((ch) < 'Z')) ? ((ch) - 'A' + 'a') : (ch)); }
|
|
|
|
short str_to_upper(unsigned short a,
|
|
unsigned char *b,
|
|
unsigned short c,
|
|
unsigned char *d)
|
|
{ return internal_str_to_upper(a,b,c,d); }
|
|
};
|
|
|
|
class CsConvert_ASCII_UFSS : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_ASCII_UFSS; }
|
|
CsConvert_ASCII_UFSS() : CsConvert(0, "DIRECT", CS_ASCII, CS_UNICODE_FSS)
|
|
{}
|
|
|
|
unsigned short convert(unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{ return cvt_utffss_to_ascii(a,b,c,d,e,f); }
|
|
};
|
|
|
|
class CsConvert_UFSS_ASCII : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_UFSS_ASCII; }
|
|
CsConvert_UFSS_ASCII() : CsConvert(0, "DIRECT", CS_UNICODE_FSS, CS_ASCII)
|
|
{}
|
|
unsigned short convert(unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{ return cvt_utffss_to_ascii(a,b,c,d,e,f); }
|
|
};
|
|
|
|
class CsConvert_UFSS_None : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_UFSS_None; }
|
|
CsConvert_UFSS_None() : CsConvert(0, "DIRECT", CS_UNICODE_FSS, CS_NONE)
|
|
{}
|
|
unsigned short convert(unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{ return cvt_utffss_to_ascii(a,b,c,d,e,f); }
|
|
};
|
|
|
|
class CsConvert_None_Unicode : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_None_Unicode; }
|
|
CsConvert_None_Unicode() : CsConvert(0, "DIRECT", CS_NONE, CS_UNICODE_UCS2)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_Unicode_None : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_Unicode_None; }
|
|
CsConvert_Unicode_None() : CsConvert(0, "DIRECT", CS_UNICODE_UCS2, CS_NONE)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_ASCII_Unicode : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_ASCII_Unicode; }
|
|
CsConvert_ASCII_Unicode() : CsConvert(0, "DIRECT", CS_ASCII, CS_UNICODE_UCS2)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_Unicode_ASCII : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_Unicode_ASCII; }
|
|
CsConvert_Unicode_ASCII() : CsConvert(0, "DIRECT", CS_UNICODE_UCS2, CS_ASCII)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_UFSS_Unicode : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_UFSS_Unicode; }
|
|
CsConvert_UFSS_Unicode() : CsConvert(0, "DIRECT", CS_UNICODE_FSS, CS_UNICODE_UCS2)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_Unicode_UFSS : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_Unicode_UFSS; }
|
|
CsConvert_Unicode_UFSS() : CsConvert(0, "DIRECT", CS_UNICODE_UCS2, CS_UNICODE_FSS)
|
|
{}
|
|
unsigned short convert(unsigned char *a,
|
|
unsigned short b,
|
|
unsigned char *c,
|
|
unsigned short d,
|
|
short *e,
|
|
unsigned short *f)
|
|
{ return internal_unicode_to_fss(a,b,c,d,e,f); }
|
|
};
|
|
|
|
class CsConvert_Binary_Unicode : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_Binary_Unicode; }
|
|
CsConvert_Binary_Unicode() : CsConvert(0, "DIRECT", CS_BINARY, CS_UNICODE_UCS2)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CsConvert_Unicode_Binary : public CsConvert
|
|
{
|
|
public:
|
|
static CsConvert *object_factory(MemoryPool& p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CsConvert_Unicode_Binary; }
|
|
CsConvert_Unicode_Binary() : CsConvert(0, "DIRECT", CS_UNICODE_UCS2, CS_BINARY)
|
|
{}
|
|
unsigned short convert(unsigned char*,
|
|
unsigned short,
|
|
unsigned char*,
|
|
unsigned short,
|
|
short*,
|
|
unsigned short*);
|
|
};
|
|
|
|
class CharSet_None : public CharSet
|
|
{
|
|
public:
|
|
static CharSet *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CharSet_None(p); }
|
|
CharSet_None(MemoryPool &p) : CharSet(CS_NONE, "NONE", 1, 1, 1, " ")
|
|
{
|
|
charset_to_unicode = CsConvert_None_Unicode::object_factory(p,0,0);
|
|
charset_from_unicode = CsConvert_Unicode_None::object_factory(p,0,0);
|
|
}
|
|
};
|
|
|
|
class CharSet_ASCII : public CharSet
|
|
{
|
|
public:
|
|
static CharSet *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CharSet_ASCII(p); }
|
|
CharSet_ASCII(MemoryPool &p) : CharSet(CS_ASCII, "ASCII", 1, 1, 1, " ")
|
|
{
|
|
charset_to_unicode = CsConvert_ASCII_Unicode::object_factory(p,0,0);
|
|
charset_from_unicode = CsConvert_Unicode_ASCII::object_factory(p,0,0);
|
|
}
|
|
};
|
|
|
|
class CharSet_Unicode_FSS : public CharSet
|
|
{
|
|
public:
|
|
static CharSet *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CharSet_Unicode_FSS(p); }
|
|
CharSet_Unicode_FSS(MemoryPool &p) : CharSet(CS_UNICODE_FSS, "UNICODE_FSS", 1, 1, 1, " ")
|
|
{
|
|
charset_to_unicode = CsConvert_UFSS_Unicode::object_factory(p,0,0);
|
|
charset_from_unicode = CsConvert_Unicode_UFSS::object_factory(p,0,0);
|
|
}
|
|
};
|
|
|
|
class CharSet_Unicode : public CharSet
|
|
{
|
|
public:
|
|
static CharSet *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CharSet_Unicode(p); }
|
|
CharSet_Unicode(MemoryPool &p) : CharSet(CS_UNICODE_UCS2, "UNICODE_UCS2", 2, 2, 2, 0)
|
|
{
|
|
static const UCS2_CHAR space = 0x0020;
|
|
charset_space_character = (const char *) & space; /* 0x0020 */
|
|
}
|
|
};
|
|
|
|
class CharSet_Binary : public CharSet
|
|
{
|
|
public:
|
|
static CharSet *object_factory(MemoryPool &p, CHARSET_ID u1, CHARSET_ID u2)
|
|
{ return FB_NEW(p) CharSet_Binary(p); }
|
|
CharSet_Binary(MemoryPool &p) : CharSet(CS_BINARY, "BINARY", 1, 1, 1, "\0")
|
|
{
|
|
charset_to_unicode = CsConvert_Binary_Unicode::object_factory(p,0,0);
|
|
charset_from_unicode = CsConvert_Unicode_Binary::object_factory(p,0,0);
|
|
}
|
|
};
|
|
|
|
/********************************************************************************/
|
|
|
|
#ifndef HAVE_SWAB
|
|
#ifdef HAVE__SWAB
|
|
#define swab _swab
|
|
#else // use generic swab(). Slow (but faster than previous implementation) and buggy
|
|
void swab(char * a, char * b, int n)
|
|
{
|
|
while(--n>0)
|
|
{
|
|
*b++ = a[1];
|
|
*b++ = *a++;
|
|
a++; n--;
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
unsigned short CsConvert_Unicode_Binary::convert(
|
|
unsigned char *pDest,
|
|
unsigned short nDest,
|
|
unsigned char *pSrcUC,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* CsConvert_Unicode_Binary::convert
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert a wc string to network form - high-endian
|
|
* byte stream.
|
|
*
|
|
*************************************/
|
|
unsigned short res;
|
|
|
|
assert((pSrcUC != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (nSrc);
|
|
|
|
assert(nSrc&1 == 0); // check for even length
|
|
|
|
if (nSrc>nDest) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
res = nSrc>nDest?nDest:nSrc;
|
|
#ifdef WORDS_BIGENDIAN
|
|
MOVE_FAST(pSrcUC,pDest,res);
|
|
#else
|
|
swab((char *)pSrcUC,(char *)pDest,res);
|
|
#endif
|
|
*err_position = res;
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
unsigned short CsConvert_Binary_Unicode::convert(
|
|
unsigned char *pDestUC,
|
|
unsigned short nDest,
|
|
unsigned char *pSrc,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* CsConvert_Binary_Unicode::convert
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert a wc string from network form - high-endian
|
|
* byte stream.
|
|
* Symmetrical with CsConvert_Unicode_Binary::convert
|
|
* but really the same
|
|
*
|
|
*************************************/
|
|
unsigned short res;
|
|
|
|
assert((pSrc != NULL) || (pDestUC == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDestUC == NULL) /* length estimate needed? */
|
|
return (nSrc);
|
|
|
|
assert(nSrc&1 == 0);
|
|
|
|
if (nSrc>nDest) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
res = nSrc>nDest?nDest:nSrc;
|
|
#ifdef WORDS_BIGENDIAN
|
|
MOVE_FAST(pSrc,pDestUC,res);
|
|
#else
|
|
swab((char *)pSrc,(char *)pDestUC,res);
|
|
#endif
|
|
*err_position = res;
|
|
|
|
return res;
|
|
}
|
|
|
|
unsigned short CsConvert_UFSS_Unicode::convert(
|
|
unsigned char *dest_ptrUC,
|
|
unsigned short dest_len,
|
|
unsigned char *src_ptr,
|
|
unsigned short src_len,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
UCS2_CHAR *start, *dest_ptr = (UCS2_CHAR*)dest_ptrUC;
|
|
USHORT src_start = src_len;
|
|
fss_size_t res;
|
|
|
|
assert(src_ptr != NULL || dest_ptr == NULL);
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
|
|
/* See if we're only after a length estimate */
|
|
if (dest_ptr == NULL)
|
|
return (src_len * 2); /* All single byte narrow characters */
|
|
|
|
start = dest_ptr;
|
|
src_start = src_len;
|
|
while ((src_len) && (dest_len >= sizeof(*dest_ptr))) {
|
|
res = fss_mbtowc(dest_ptr, src_ptr, src_len);
|
|
if (res == -1) {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
assert(res <= src_len);
|
|
dest_ptr++;
|
|
dest_len -= sizeof(*dest_ptr);
|
|
src_ptr += res;
|
|
src_len -= res;
|
|
}
|
|
if (src_len && !*err_code) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = src_start - src_len;
|
|
return ((dest_ptr - start) * sizeof(*dest_ptr));
|
|
}
|
|
|
|
unsigned short CsConvert_Unicode_ASCII::convert(
|
|
unsigned char *pDest,
|
|
unsigned short nDest,
|
|
unsigned char *pSrcUC,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* CsConvert_Unicode_ASCII::convert
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert UNICODE to CHARACTER SET ASCII (wide char).
|
|
* Byte values below 128 treated as ASCII.
|
|
* Byte values >= 128 create CONVERT_ERROR
|
|
*
|
|
*************************************/
|
|
NCHAR *pStart;
|
|
UCS2_CHAR *pStart_src, *pSrc = (UCS2_CHAR*)pSrcUC;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (nSrc / 2);
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
|
|
if (*pSrc > 127) {
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
}
|
|
*pDest++ = *pSrc++;
|
|
nDest -= sizeof(*pDest);
|
|
nSrc -= sizeof(*pSrc);
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
unsigned short CsConvert_ASCII_Unicode::convert(
|
|
unsigned char *pDestUC,
|
|
unsigned short nDest,
|
|
unsigned char *pSrc,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* CsConvert_ASCII_Unicode::convert
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert CHARACTER SET NONE to UNICODE (wide char).
|
|
* Byte values below 128 treated as ASCII.
|
|
* Byte values >= 128 create BAD_INPUT
|
|
*
|
|
*************************************/
|
|
UCS2_CHAR *pStart, *pDest = (UCS2_CHAR*)pDestUC;
|
|
UCHAR *pStart_src;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (2 * nSrc);
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
|
|
if (*pSrc > 127) {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
*pDest++ = *pSrc++;
|
|
nDest -= sizeof(*pDest);
|
|
nSrc -= sizeof(*pSrc);
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
unsigned short CsConvert_Unicode_None::convert(
|
|
unsigned char *pDest,
|
|
unsigned short nDest,
|
|
unsigned char *pSrcUC,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* w c _ t o _ n c
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
NCHAR *pStart;
|
|
UCS2_CHAR *pStart_src, *pSrc = (UCS2_CHAR*)pSrcUC;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return ((nSrc + 1) / 2);
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest && nSrc >= sizeof(*pSrc)) {
|
|
if (*pSrc >= 256) {
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
}
|
|
*pDest++ = *pSrc++;
|
|
nDest -= sizeof(*pDest);
|
|
nSrc -= sizeof(*pSrc);
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
unsigned short CsConvert_None_Unicode::convert(
|
|
unsigned char *pDestUC,
|
|
unsigned short nDest,
|
|
unsigned char *pSrc,
|
|
unsigned short nSrc,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* CsConvert_None_Unicode::convert
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Convert CHARACTER SET NONE to UNICODE (wide char).
|
|
* Byte values below 128 treated as ASCII.
|
|
* Byte values >= 128 create CONVERT ERROR
|
|
*
|
|
*************************************/
|
|
UCS2_CHAR *pStart, *pDest = (UCS2_CHAR*)pDestUC;
|
|
UCHAR *pStart_src;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (2 * nSrc);
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
|
|
if (*pSrc > 127) {
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
}
|
|
*pDest++ = *pSrc++;
|
|
nDest -= sizeof(*pDest);
|
|
nSrc -= sizeof(*pSrc);
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
static unsigned short cvt_utffss_to_ascii(UCHAR * pDest, USHORT nDest, /* byte count */
|
|
UCHAR * pSrc, USHORT nSrc, /* byte count */
|
|
short *err_code, unsigned short *err_position)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c v t _ u t f f s s _ t o _ a s c i i
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Perform a pass-through transformation of ASCII to Unicode
|
|
* in FSS format. Note that any byte values greater than 127
|
|
* cannot be converted in either direction, so the same
|
|
* routine does double duty.
|
|
*
|
|
*************************************/
|
|
UCHAR *pStart;
|
|
UCHAR *pStart_src;
|
|
|
|
assert((pSrc != NULL) || (pDest == NULL));
|
|
assert(err_code != NULL);
|
|
|
|
*err_code = 0;
|
|
if (pDest == NULL) /* length estimate needed? */
|
|
return (nSrc);
|
|
pStart = pDest;
|
|
pStart_src = pSrc;
|
|
while (nDest >= sizeof(*pDest) && nSrc >= sizeof(*pSrc)) {
|
|
if (*pSrc > 127) {
|
|
/* In the cvt_ascii_to_utffss case this should be CS_BAD_INPUT */
|
|
/* but not in cvt_none_to_utffss or cvt_utffss_to_ascii */
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
}
|
|
*pDest++ = *pSrc++;
|
|
nDest -= sizeof(*pDest);
|
|
nSrc -= sizeof(*pSrc);
|
|
}
|
|
if (!*err_code && nSrc) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = (pSrc - pStart_src) * sizeof(*pSrc);
|
|
|
|
return ((pDest - pStart) * sizeof(*pDest));
|
|
}
|
|
|
|
/*
|
|
* The following was provided by Ken Thompson of AT&T Bell Laboratories,
|
|
* <ken@research.att.com>, on Tue, 8 Sep 92 03:22:07 EDT, to the X/Open
|
|
* Joint Internationalization Group. Some minor formatting changes have
|
|
* been made by Glenn Adams, <glenn@metis.com>.
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
* File System Safe Universal Character Set Transformation Format (FSS-UTF)
|
|
* -------------------------------------------------------------------------
|
|
*
|
|
* With the approval of ISO/IEC 10646 (Unicode) as an international
|
|
* standard and the anticipated wide spread use of this universal coded
|
|
* character set (UCS), it is necessary for historically ASCII based
|
|
* operating systems to devise ways to cope with representation and
|
|
* handling of the large number of characters that are possible to be
|
|
* encoded by this new standard.
|
|
*
|
|
* There are several challenges presented by UCS which must be dealt with
|
|
* by historical operating systems and the C-language programming
|
|
* environment. The most significant of these challenges is the encoding
|
|
* scheme used by UCS. More precisely, the challenge is the marrying of
|
|
* the UCS standard with existing programming languages and existing
|
|
* operating systems and utilities.
|
|
*
|
|
* The challenges of the programming languages and the UCS standard are
|
|
* being dealt with by other activities in the industry. However, we are
|
|
* still faced with the handling of UCS by historical operating systems
|
|
* and utilities. Prominent among the operating system UCS handling
|
|
* concerns is the representation of the data within the file system. An
|
|
* underlying assumption is that there is an absolute requirement to
|
|
* maintain the existing operating system software investment while at
|
|
* the same time taking advantage of the use the large number of
|
|
* characters provided by the UCS.
|
|
*
|
|
* UCS provides the capability to encode multi-lingual text within a
|
|
* single coded character set. However, UCS and its UTF variant do not
|
|
* protect null bytes and/or the ASCII slash ("/") making these character
|
|
* encodings incompatible with existing Unix implementations. The
|
|
* following proposal provides a Unix compatible transformation format of
|
|
* UCS such that Unix systems can support multi-lingual text in a single
|
|
* encoding. This transformation format encoding is intended to be used
|
|
* as a file code. This transformation format encoding of UCS is
|
|
* intended as an intermediate step towards full UCS support. However,
|
|
* since nearly all Unix implementations face the same obstacles in
|
|
* supporting UCS, this proposal is intended to provide a common and
|
|
* compatible encoding during this transition stage.
|
|
*
|
|
* Goal/Objective
|
|
* --------------
|
|
*
|
|
* With the assumption that most, if not all, of the issues surrounding
|
|
* the handling and storing of UCS in historical operating system file
|
|
* systems are understood, the objective is to define a UCS
|
|
* transformation format which also meets the requirement of being usable
|
|
* on a historical operating system file system in a non-disruptive
|
|
* manner. The intent is that UCS will be the process code for the
|
|
* transformation format, which is usable as a file code.
|
|
*
|
|
* Criteria for the Transformation Format
|
|
* --------------------------------------
|
|
*
|
|
* Below are the guidelines that were used in defining the UCS
|
|
* transformation format:
|
|
*
|
|
* 1) Compatibility with historical file systems:
|
|
*
|
|
* Historical file systems disallow the null byte and the ASCII
|
|
* slash character as a part of the file name.
|
|
*
|
|
* 2) Compatibility with existing programs:
|
|
*
|
|
* The existing model for multibyte processing is that ASCII does
|
|
* not occur anywhere in a multibyte encoding. There should be
|
|
* no ASCII code values for any part of a transformation format
|
|
* representation of a character that was not in the ASCII
|
|
* character set in the UCS representation of the character.
|
|
*
|
|
* 3) Ease of conversion from/to UCS.
|
|
*
|
|
* 4) The first byte should indicate the number of bytes to
|
|
* follow in a multibyte sequence.
|
|
*
|
|
* 5) The transformation format should not be extravagant in
|
|
* terms of number of bytes used for encoding.
|
|
*
|
|
* 6) It should be possible to find the start of a character
|
|
* efficiently starting from an arbitrary location in a byte
|
|
* stream.
|
|
*
|
|
* Proposed FSS-UTF
|
|
* ----------------
|
|
*
|
|
* The proposed UCS transformation format encodes UCS values in the range
|
|
* [0,0x7fffffff] using multibyte characters of lengths 1, 2, 3, 4, 5,
|
|
* and 6 bytes. For all encodings of more than one byte, the initial
|
|
* byte determines the number of bytes used and the high-order bit in
|
|
* each byte is set. Every byte that does not start 10xxxxxx is the
|
|
* start of a UCS character sequence.
|
|
*
|
|
* An easy way to remember this transformation format is to note that the
|
|
* number of high-order 1's in the first byte signifies the number of
|
|
* bytes in the multibyte character:
|
|
*
|
|
* Bits Hex Min Hex Max Byte Sequence in Binary
|
|
* 7 00000000 0000007f 0vvvvvvv
|
|
* 11 00000080 000007FF 110vvvvv 10vvvvvv
|
|
* 16 00000800 0000FFFF 1110vvvv 10vvvvvv 10vvvvvv
|
|
* 21 00010000 001FFFFF 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
|
|
* 26 00200000 03FFFFFF 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
|
|
* 31 04000000 7FFFFFFF 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
|
|
*
|
|
* The UCS value is just the concatenation of the v bits in the multibyte
|
|
* encoding. When there are multiple ways to encode a value, for example
|
|
* UCS 0, only the SSHORTest encoding is legal.
|
|
*
|
|
* Below are sample implementations of the C standard wctomb() and
|
|
* mbtowc() functions which demonstrate the algorithms for converting
|
|
* from UCS to the transformation format and converting from the
|
|
* transformation format to UCS. The sample implementations include error
|
|
* checks, some of which may not be necessary for conformance:
|
|
*
|
|
*/
|
|
|
|
typedef struct {
|
|
int cmask;
|
|
int cval;
|
|
int shift;
|
|
SLONG lmask;
|
|
SLONG lval;
|
|
} Tab;
|
|
|
|
static const Tab tab[] = {
|
|
{ 0x80, 0x00, 0 * 6, 0x7F, 0 }, /* 1 byte sequence */
|
|
{ 0xE0, 0xC0, 1 * 6, 0x7FF, 0x80 }, /* 2 byte sequence */
|
|
{ 0xF0, 0xE0, 2 * 6, 0xFFFF, 0x800 }, /* 3 byte sequence */
|
|
{ 0xF8, 0xF0, 3 * 6, 0x1FFFFF, 0x10000 }, /* 4 byte sequence */
|
|
{ 0xFC, 0xF8, 4 * 6, 0x3FFFFFF, 0x200000 }, /* 5 byte sequence */
|
|
{ 0xFE, 0xFC, 5 * 6, 0x7FFFFFFF, 0x4000000 }, /* 6 byte sequence */
|
|
{ 0, 0, 0, 0, 0 } /* end of table */
|
|
};
|
|
|
|
|
|
static fss_size_t fss_mbtowc(fss_wchar_t * p, UCHAR * s, fss_size_t n)
|
|
{
|
|
SLONG l;
|
|
int c0, c, nc;
|
|
Tab *t;
|
|
|
|
if (s == 0)
|
|
return 0;
|
|
|
|
nc = 0;
|
|
if (n <= nc)
|
|
return -1;
|
|
c0 = *s & 0xff;
|
|
l = c0;
|
|
for (t = const_cast < Tab * >(tab); t->cmask; t++) {
|
|
nc++;
|
|
if ((c0 & t->cmask) == t->cval) {
|
|
l &= t->lmask;
|
|
if (l < t->lval)
|
|
return -1;
|
|
*p = l;
|
|
return nc;
|
|
}
|
|
if (n <= nc)
|
|
return -1;
|
|
s++;
|
|
c = (*s ^ 0x80) & 0xFF;
|
|
if (c & 0xC0)
|
|
return -1;
|
|
l = (l << 6) | c;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static fss_size_t fss_wctomb(UCHAR * s, fss_wchar_t wc)
|
|
{
|
|
SLONG l;
|
|
int c, nc;
|
|
Tab *t;
|
|
|
|
if (s == 0)
|
|
return 0;
|
|
|
|
l = wc;
|
|
nc = 0;
|
|
for (t = const_cast < Tab * >(tab); t->cmask; t++) {
|
|
nc++;
|
|
if (l <= t->lmask) {
|
|
c = t->shift;
|
|
*s = t->cval | (l >> c);
|
|
while (c > 0) {
|
|
c -= 6;
|
|
s++;
|
|
*s = 0x80 | ((l >> c) & 0x3F);
|
|
}
|
|
return nc;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************************/
|
|
static unsigned short internal_string_to_key(
|
|
USHORT inLen,
|
|
UCHAR * src,
|
|
USHORT outLen,
|
|
UCHAR * dest, USHORT partial, UCHAR pad_char)
|
|
{
|
|
/**************************************
|
|
*
|
|
* i n t e r n a l _ s t r i n g _ t o _ k e y
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
UCHAR *pStart;
|
|
|
|
pStart = dest;
|
|
while (inLen-- && outLen--)
|
|
*dest++ = *src++;
|
|
|
|
/* strip off ending pad characters */
|
|
while (dest > pStart)
|
|
if (*(dest - 1) == pad_char)
|
|
dest--;
|
|
else
|
|
break;
|
|
|
|
return (dest - pStart);
|
|
}
|
|
|
|
static SSHORT internal_compare(USHORT length1,
|
|
UCHAR * p1, USHORT length2, UCHAR * p2, UCHAR pad)
|
|
{
|
|
/**************************************
|
|
*
|
|
* i n t e r n a l _ c o m p a r e
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
**************************************/
|
|
SSHORT fill;
|
|
|
|
fill = length1 - length2;
|
|
if (length1 >= length2) {
|
|
if (length2)
|
|
do
|
|
if (*p1++ != *p2++)
|
|
if (p1[-1] > p2[-1])
|
|
return 1;
|
|
else
|
|
return -1;
|
|
while (--length2);
|
|
if (fill > 0)
|
|
do
|
|
if (*p1++ != pad)
|
|
if (p1[-1] > pad)
|
|
return 1;
|
|
else
|
|
return -1;
|
|
while (--fill);
|
|
return 0;
|
|
}
|
|
|
|
if (length1)
|
|
do
|
|
if (*p1++ != *p2++)
|
|
if (p1[-1] > p2[-1])
|
|
return 1;
|
|
else
|
|
return -1;
|
|
while (--length1);
|
|
|
|
do
|
|
if (*p2++ != pad)
|
|
if (pad > p2[-1])
|
|
return 1;
|
|
else
|
|
return -1;
|
|
while (++fill);
|
|
|
|
return 0;
|
|
}
|
|
|
|
short TextType_Binary::str_to_upper(USHORT inLen,
|
|
UCHAR * src, USHORT outLen, UCHAR * dest)
|
|
{
|
|
/**************************************
|
|
*
|
|
* TextType_Binary::str_to_upper
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Note: dest may equal src.
|
|
*
|
|
**************************************/
|
|
UCHAR *pStart;
|
|
|
|
pStart = dest;
|
|
while (inLen-- && outLen--) {
|
|
*dest++ = *src++;
|
|
}
|
|
|
|
return (dest - pStart);
|
|
}
|
|
|
|
static SSHORT internal_str_to_upper(USHORT inLen,
|
|
UCHAR * src, USHORT outLen, UCHAR * dest)
|
|
{
|
|
/**************************************
|
|
*
|
|
* i n t e r n a l _ s t r _ t o _ u p p e r
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
* Note: dest may equal src.
|
|
*
|
|
**************************************/
|
|
UCHAR *pStart;
|
|
|
|
pStart = dest;
|
|
while (inLen-- && outLen--) {
|
|
*dest++ = UPPER7(*src);
|
|
src++;
|
|
}
|
|
|
|
return (dest - pStart);
|
|
}
|
|
|
|
unsigned short internal_unicode_to_fss(
|
|
unsigned char *fss_strUC,
|
|
unsigned short fss_len,
|
|
unsigned char *unicode_strUC,
|
|
unsigned short unicode_len,
|
|
short *err_code,
|
|
unsigned short *err_position)
|
|
{
|
|
MBCHAR *fss_str = (MBCHAR*)fss_strUC;
|
|
UCS2_CHAR *unicode_str = (UCS2_CHAR*)unicode_strUC;
|
|
UCHAR *start;
|
|
USHORT src_start = unicode_len;
|
|
UCHAR tmp_buffer[6];
|
|
UCHAR *p;
|
|
fss_size_t res;
|
|
|
|
assert(unicode_str != NULL || fss_str == NULL);
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
|
|
/* See if we're only after a length estimate */
|
|
if (fss_str == NULL)
|
|
return ((unicode_len + 1) / 2 * 3); /* worst case - all han character input */
|
|
|
|
start = fss_str;
|
|
while ((fss_len) && (unicode_len >= sizeof(*unicode_str))) {
|
|
/* Convert the wide character into temp buffer */
|
|
res = fss_wctomb(tmp_buffer, *unicode_str);
|
|
if (res == -1) {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
/* will the mb sequence fit into space left? */
|
|
if (res > fss_len) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
break;
|
|
}
|
|
/* copy the converted bytes into the destination */
|
|
p = tmp_buffer;
|
|
for (; res; res--, fss_len--)
|
|
*fss_str++ = *p++;
|
|
unicode_len -= sizeof(*unicode_str);
|
|
unicode_str++;
|
|
}
|
|
if (unicode_len && !*err_code) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = src_start - unicode_len;
|
|
return ((fss_str - start) * sizeof(*fss_str));
|
|
}
|
|
|
|
static USHORT internal_fss_to_unicode (
|
|
UCS2_CHAR *dest_ptr,
|
|
USHORT dest_len, /* BYTE count */
|
|
NCHAR *src_ptr,
|
|
USHORT src_len,
|
|
SSHORT *err_code,
|
|
USHORT *err_position)
|
|
{
|
|
UCS2_CHAR *start;
|
|
USHORT src_start = src_len;
|
|
fss_size_t res;
|
|
|
|
assert (src_ptr != NULL || dest_ptr == NULL);
|
|
assert (err_code != NULL);
|
|
assert (err_position != NULL);
|
|
|
|
*err_code = 0;
|
|
|
|
/* See if we're only after a length estimate */
|
|
if (dest_ptr == NULL)
|
|
return (src_len*2); /* All single byte narrow characters */
|
|
|
|
start = dest_ptr;
|
|
src_start = src_len;
|
|
while ((src_len) && (dest_len >= sizeof (*dest_ptr)))
|
|
{
|
|
res = fss_mbtowc (dest_ptr, src_ptr, src_len);
|
|
if (res == -1)
|
|
{
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
assert (res <= src_len);
|
|
dest_ptr++;
|
|
dest_len -= sizeof (*dest_ptr);
|
|
src_ptr += res;
|
|
src_len -= res;
|
|
}
|
|
if (src_len && !*err_code)
|
|
{
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = src_start - src_len;
|
|
return ((dest_ptr - start)*sizeof (*dest_ptr));
|
|
}
|
|
|
|
CharSetAllocFunc INTL_charset_alloc_func(short charset)
|
|
{
|
|
switch(charset)
|
|
{
|
|
case CS_NONE:
|
|
return CharSet_None::object_factory;
|
|
case CS_ASCII:
|
|
return CharSet_ASCII::object_factory;
|
|
case CS_UNICODE_FSS:
|
|
return CharSet_Unicode_FSS::object_factory;
|
|
case CS_UNICODE_UCS2:
|
|
return CharSet_Unicode::object_factory;
|
|
case CS_BINARY:
|
|
return CharSet_Binary::object_factory;
|
|
default:
|
|
return NULL;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
TextTypeAllocFunc INTL_texttype_alloc_func(short ttype)
|
|
{
|
|
switch(ttype)
|
|
{
|
|
case ttype_none:
|
|
return TextType_None::object_factory;
|
|
case ttype_ascii:
|
|
return TextType_ASCII::object_factory;
|
|
case ttype_unicode_fss:
|
|
return TextType_UFSS::object_factory;
|
|
case ttype_binary:
|
|
return TextType_Binary::object_factory;
|
|
default:
|
|
return NULL;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
CsConvertAllocFunc INTL_csconvert_alloc_func(short from, short to)
|
|
{
|
|
if (from == CS_ASCII && to == CS_UNICODE_FSS)
|
|
return CsConvert_ASCII_UFSS::object_factory;
|
|
if (from == CS_UNICODE_FSS && to == CS_ASCII)
|
|
return CsConvert_UFSS_ASCII::object_factory;
|
|
if (from == CS_UNICODE_FSS && to == CS_NONE)
|
|
return CsConvert_UFSS_None::object_factory;
|
|
return NULL;
|
|
}
|
|
|
|
#if NOT_USED_OR_REPLACED
|
|
//========================================================================================
|
|
//========================================================================================
|
|
//========================================================================================
|
|
//========================================================================================
|
|
//
|
|
// JMB: absolete code - to be removed later
|
|
#define TEXTTYPE_RETURN return (0)
|
|
|
|
#define FAMILY_INTERNAL(id_number, name, charset, country) \
|
|
cache->texttype_version = 40; \
|
|
cache->texttype_type = (id_number); \
|
|
cache->texttype_character_set = (charset); \
|
|
cache->texttype_country = (country); \
|
|
cache->texttype_bytes_per_char = 1; \
|
|
cache->texttype_fn_init = (FPTR_SHORT) (name); \
|
|
cache->texttype_fn_key_length = (FPTR_SHORT) internal_keylength; \
|
|
cache->texttype_fn_string_to_key = (FPTR_SHORT) internal_string_to_key; \
|
|
cache->texttype_fn_compare = (FPTR_short) internal_compare; \
|
|
cache->texttype_fn_to_upper = (FPTR_SHORT) internal_ch_to_upper; \
|
|
cache->texttype_fn_to_lower = (FPTR_SHORT) internal_ch_to_lower; \
|
|
cache->texttype_fn_str_to_upper = (FPTR_short) internal_str_to_upper; \
|
|
cache->texttype_fn_mbtowc = (FPTR_short) internal_nc_mbtowc; \
|
|
cache->texttype_collation_table = (BYTE *) " "; \
|
|
cache->texttype_toupper_table = (BYTE *) NULL; \
|
|
cache->texttype_tolower_table = (BYTE *) NULL; \
|
|
cache->texttype_compress_table = (BYTE *) NULL; \
|
|
cache->texttype_expand_table = (BYTE *) NULL; \
|
|
cache->texttype_name = const_cast<char*>(POSIX);
|
|
|
|
|
|
|
|
static USHORT ttype_ascii_init(TEXTTYPE cache, USHORT parm1, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* t t y p e _ a s c i i _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
static const ASCII POSIX[] = "C.ASCII";
|
|
|
|
FAMILY_INTERNAL(ttype_ascii, ttype_ascii_init, CS_ASCII, CC_C);
|
|
|
|
TEXTTYPE_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT ttype_none_init(TEXTTYPE cache, USHORT parm1, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* t t y p e _ n o n e _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
static const ASCII POSIX[] = "C";
|
|
|
|
FAMILY_INTERNAL(ttype_none, ttype_none_init, CS_NONE, CC_C);
|
|
|
|
TEXTTYPE_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT ttype_unicode_fss_init(
|
|
TEXTTYPE cache,
|
|
USHORT parm1, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* t t y p e _ u n i c o d e _ f s s _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
static const ASCII POSIX[] = "C.UNICODE_FSS";
|
|
|
|
FAMILY_INTERNAL(ttype_unicode_fss, ttype_unicode_fss_init, CS_UNICODE_FSS,
|
|
CC_C);
|
|
cache->texttype_bytes_per_char = 3;
|
|
cache->texttype_fn_to_wc = (FPTR_SHORT) INTL_fss_to_unicode;
|
|
cache->texttype_fn_mbtowc = (FPTR_short) INTL_fss_mbtowc;
|
|
|
|
TEXTTYPE_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT ttype_binary_init(TEXTTYPE cache, USHORT parm1, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* t t y p e _ b i n a r y _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
static const ASCII POSIX[] = "C.OCTETS";
|
|
|
|
FAMILY_INTERNAL(ttype_binary, ttype_binary_init, CS_BINARY, CC_C);
|
|
cache->texttype_fn_to_upper = (FPTR_SHORT) internal_ch_copy;
|
|
cache->texttype_fn_to_lower = (FPTR_SHORT) internal_ch_copy;
|
|
cache->texttype_fn_str_to_upper = (FPTR_short) internal_str_copy;
|
|
cache->texttype_collation_table = (BYTE *) "\0"; /* pad character */
|
|
|
|
TEXTTYPE_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
* Start of Character set definitions
|
|
*/
|
|
|
|
#define CHARSET_RETURN return (0)
|
|
|
|
static void common_8bit_init(
|
|
CHARSET csptr,
|
|
USHORT id,
|
|
ASCII * name,
|
|
BYTE * to_unicode_tbl,
|
|
BYTE * from_unicode_tbl1,
|
|
BYTE * from_unicode_tbl2)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c o m m o n _ 8 b i t _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
csptr->charset_version = 40;
|
|
csptr->charset_id = id;
|
|
csptr->charset_name = name;
|
|
csptr->charset_flags = 0;
|
|
csptr->charset_min_bytes_per_char = 1;
|
|
csptr->charset_max_bytes_per_char = 1;
|
|
csptr->charset_space_length = 1;
|
|
csptr->charset_space_character = (BYTE *) " ";
|
|
csptr->charset_well_formed = (FPTR_SHORT) NULL;
|
|
}
|
|
|
|
|
|
static USHORT cs_ascii_init(CHARSET csptr, USHORT cs_id, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c s _ a s c i i _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
common_8bit_init(csptr, CS_ASCII, (ASCII *) "ASCII", NULL, NULL, NULL);
|
|
common_convert_init(&csptr->charset_to_unicode, CS_UNICODE_UCS2, CS_ASCII,
|
|
(FPTR_SHORT) cvt_ascii_to_unicode, NULL, NULL);
|
|
common_convert_init(&csptr->charset_from_unicode, CS_ASCII, CS_UNICODE_UCS2,
|
|
(FPTR_SHORT) cvt_unicode_to_ascii, NULL, NULL);
|
|
CHARSET_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT cs_none_init(CHARSET csptr, USHORT cs_id, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c s _ n o n e _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
common_8bit_init(csptr, CS_NONE, (ASCII *) "NONE", NULL, NULL, NULL);
|
|
/*
|
|
common_convert_init (&csptr->charset_to_unicode, CS_UNICODE_UCS2, id,
|
|
nc_to_wc, to_unicode_tbl, NULL);
|
|
*/
|
|
common_convert_init(&csptr->charset_to_unicode, CS_UNICODE_UCS2, CS_NONE,
|
|
(FPTR_SHORT) cvt_none_to_unicode, NULL, NULL);
|
|
common_convert_init(&csptr->charset_from_unicode, CS_NONE, CS_UNICODE_UCS2,
|
|
(FPTR_SHORT) wc_to_nc, NULL, NULL);
|
|
CHARSET_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT cs_unicode_fss_init(CHARSET csptr, USHORT cs_id, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c s _ u n i c o d e _ f s s _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
common_8bit_init(csptr, CS_UNICODE_FSS, (ASCII *) "UNICODE_FSS", NULL,
|
|
NULL, NULL);
|
|
common_convert_init(&csptr->charset_to_unicode, CS_UNICODE_UCS2,
|
|
CS_UNICODE_FSS, (FPTR_SHORT) INTL_fss_to_unicode,
|
|
NULL, NULL);
|
|
common_convert_init(&csptr->charset_from_unicode, CS_UNICODE_FSS,
|
|
CS_UNICODE_UCS2, (FPTR_SHORT) INTL_unicode_to_fss, NULL,
|
|
NULL);
|
|
CHARSET_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT cs_unicode_ucs2_init(CHARSET csptr, USHORT cs_id, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c s _ u n i c o d e _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
static const UCS2_CHAR space = 0x0020;
|
|
|
|
csptr->charset_version = 40;
|
|
csptr->charset_id = CS_UNICODE;
|
|
csptr->charset_name = (*const* ASCII) "UNICODE_UCS2";
|
|
csptr->charset_flags = 0;
|
|
csptr->charset_min_bytes_per_char = 2;
|
|
csptr->charset_max_bytes_per_char = 2;
|
|
csptr->charset_space_length = 2;
|
|
csptr->charset_space_character = (BYTE *) & space; /* 0x0020 */
|
|
csptr->charset_well_formed = (FPTR_SHORT) NULL;
|
|
CHARSET_RETURN;
|
|
}
|
|
|
|
|
|
static USHORT cs_binary_init(CHARSET csptr, USHORT cs_id, USHORT dummy)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c s _ b i n a r y _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
common_8bit_init(csptr, CS_BINARY, (ASCII *) "BINARY", NULL, NULL, NULL);
|
|
csptr->charset_space_character = (BYTE *) "\0";
|
|
common_convert_init(&csptr->charset_to_unicode, CS_UNICODE_UCS2, CS_BINARY,
|
|
(FPTR_SHORT) mb_to_wc, NULL, NULL);
|
|
common_convert_init(&csptr->charset_from_unicode, CS_BINARY,
|
|
CS_UNICODE_UCS2, (FPTR_SHORT) wc_to_mb, NULL, NULL);
|
|
CHARSET_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
* Start of Conversion entries
|
|
*/
|
|
|
|
#define CONVERT_RETURN return (0)
|
|
|
|
|
|
static USHORT cvt_ascii_utf_init(
|
|
CSCONVERT csptr,
|
|
USHORT dest_cs, USHORT source_cs)
|
|
{
|
|
/**************************************
|
|
*
|
|
* c v t _ a s c i i _ u t f _ i n i t
|
|
*
|
|
**************************************
|
|
*
|
|
* Functional description
|
|
*
|
|
*************************************/
|
|
|
|
common_convert_init(csptr, dest_cs, source_cs,
|
|
(FPTR_SHORT) cvt_utffss_to_ascii, NULL, NULL);
|
|
CONVERT_RETURN;
|
|
}
|
|
#endif
|