8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-30 19:23:03 +01:00
firebird-mirror/src/intl/cv_big5.c

317 lines
7.2 KiB
C
Raw Normal View History

2001-05-23 15:26:42 +02:00
/*
* PROGRAM: InterBase International support
* MODULE: cv_big5.c
* DESCRIPTION: Codeset conversion for BIG5 family codesets
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "../intl/ldcommon.h"
/* These macros have a duplicate in lc_big5.c */
#define BIG51(uc) ((UCHAR)((uc)&0xff)>=0xa1 && \
(UCHAR)((uc)&0xff)<=0xfe) /* BIG-5 1st-byte */
#define BIG52(uc) ((UCHAR)((uc)&0xff)>=0x40 && \
(UCHAR)((uc)&0xff)<=0xfe) /* BIG-5 2nd-byte */
USHORT CVBIG5_big5_to_unicode(obj, dest_ptr, dest_len, src_ptr, src_len,
err_code, err_position)
CSCONVERT obj;
USHORT *dest_ptr;
USHORT dest_len;
UCHAR *src_ptr;
USHORT src_len;
SSHORT *err_code;
USHORT *err_position;
{
USHORT *start;
WCHAR ch;
WCHAR wide;
USHORT src_start = src_len;
USHORT this_len;
UCHAR c1, c2;
assert(src_ptr != NULL || dest_ptr == NULL);
assert(err_code != NULL);
assert(err_position != NULL);
assert(obj != NULL);
assert(obj->csconvert_convert == CVBIG5_big5_to_unicode);
assert(obj->csconvert_datatable != NULL);
assert(obj->csconvert_misc != NULL);
*err_code = 0;
/* See if we're only after a length estimate */
if (dest_ptr == NULL)
return (src_len * 2);
start = dest_ptr;
src_start = src_len;
while ((src_len) && (dest_len > 1)) {
if (*src_ptr & 0x80) {
c1 = *src_ptr++;
if (BIG51(c1)) { /* first byte is Big5 */
if (src_len == 1) {
*err_code = CS_BAD_INPUT;
break;
}
c2 = *src_ptr++;
if (!(BIG52(c2))) { /* Bad second byte */
*err_code = CS_BAD_INPUT;
break;
}
wide = (c1 << 8) + c2;
this_len = 2;
}
else {
*err_code = CS_BAD_INPUT;
break;
}
}
else { /* it is ASCII */
wide = *src_ptr++;
this_len = 1;
}
/* Convert from BIG5 to UNICODE */
ch = ((USHORT *) obj->csconvert_datatable)
[((USHORT *) obj->csconvert_misc)[(USHORT) wide / 256]
+ (wide % 256)];
if ((ch == CS_CANT_MAP) && !(wide == CS_CANT_MAP)) {
*err_code = CS_CONVERT_ERROR;
break;
}
*dest_ptr++ = ch;
dest_len -= 2;
src_len -= this_len;
};
if (src_len && !*err_code) {
*err_code = CS_TRUNCATION_ERROR;
};
*err_position = src_start - src_len;
return ((dest_ptr - start) * sizeof(*dest_ptr));
}
USHORT CVBIG5_unicode_to_big5(obj, big5_str, big5_len, unicode_str,
unicode_len, err_code, err_position)
CSCONVERT obj;
UCHAR *big5_str;
USHORT big5_len;
USHORT *unicode_str;
USHORT unicode_len;
SSHORT *err_code;
USHORT *err_position;
{
UCHAR *start;
WCHAR big5_ch;
WCHAR wide;
int tmp1, tmp2;
USHORT src_start = unicode_len;
assert(unicode_str != NULL || big5_str == NULL);
assert(err_code != NULL);
assert(err_position != NULL);
assert(obj != NULL);
assert(obj->csconvert_convert == CVBIG5_unicode_to_big5);
assert(obj->csconvert_datatable != NULL);
assert(obj->csconvert_misc != NULL);
*err_code = 0;
/* See if we're only after a length estimate */
if (big5_str == NULL)
return (unicode_len); /* worst case - all han character input */
start = big5_str;
while ((big5_len) && (unicode_len > 1)) {
/* Convert from UNICODE to BIG5 code */
wide = *unicode_str++;
big5_ch = ((USHORT *) obj->csconvert_datatable)[
((USHORT *) obj->
csconvert_misc)[
(USHORT)
wide
/
256]
+ (wide % 256)];
if ((big5_ch == CS_CANT_MAP) && !(wide == CS_CANT_MAP)) {
*err_code = CS_CONVERT_ERROR;
break;
};
tmp1 = big5_ch / 256;
tmp2 = big5_ch % 256;
if (tmp1 == 0) { /* ASCII character */
*big5_str++ = tmp2;
big5_len--;
unicode_len -= sizeof(*unicode_str);
continue;
};
if (big5_len < 2) {
*err_code = CS_TRUNCATION_ERROR;
break;
}
else {
assert(BIG51(tmp1));
assert(BIG52(tmp2));
*big5_str++ = tmp1;
*big5_str++ = tmp2;
unicode_len -= sizeof(*unicode_str);
big5_len -= 2;
};
}
if (unicode_len && !*err_code) {
*err_code = CS_TRUNCATION_ERROR;
}
*err_position = src_start - unicode_len;
return ((big5_str - start) * sizeof(*big5_str));
}
USHORT CVBIG5_check_big5(big5_str, big5_len)
UCHAR *big5_str;
USHORT big5_len;
{
/**************************************
* Functional description
* Make sure that the big5 string does not have any truncated 2 byte
* character at the end.
* If we have a truncated character then,
* return 1.
* else return(0);
**************************************/
UCHAR c1;
while (big5_len--) {
c1 = *big5_str;
if (BIG51(c1)) { /* Is it BIG-5 */
if (big5_len == 0) /* truncated BIG-5 */
return (1);
else {
big5_str += 2;
big5_len -= 1;
}
}
else { /* it is a ASCII */
big5_str++;
}
}
return (0);
}
USHORT CVBIG5_big5_byte2short(obj, dst, dst_len, src, src_len, err_code,
err_position)
CSCONVERT obj;
UCHAR *dst;
USHORT dst_len;
UCHAR *src;
USHORT src_len;
SSHORT *err_code;
USHORT *err_position;
{
/**************************************
* Functional description
* Convert len number of bytes of BIG5 string in
* src (char-based buffer) into dst (short-based buffer).
* This routine merges:
* 1-byte ASCII into 1 short, and
* 2-byte BIG5 character into 1 short.
*
**************************************/
USHORT x;
UCHAR *dst_start;
USHORT src_start = src_len;
assert(src != NULL || dst == NULL);
assert(err_code != NULL);
assert(err_position != NULL);
assert(obj != NULL);
*err_code = 0;
/* Length estimate needed? */
if (dst == NULL)
return (2 * src_len); /* worst case */
dst_start = dst;
while (src_len && (dst_len > (sizeof(USHORT) - 1))) {
if (BIG51(*src)) {
if (src_len < 2) {
*err_code = CS_BAD_INPUT;
break;
};
x = (*src << 8) + (*(src + 1));
src += 2;
src_len -= 2;
}
else {
x = *src++;
src_len--;
};
*(USHORT *) dst = x; /* Assumes alignment */
dst += sizeof(USHORT);
dst_len -= sizeof(USHORT);
}
if (src_len && !*err_code)
*err_code = CS_TRUNCATION_ERROR;
*err_position = src_start - src_len;
return (dst - dst_start) * sizeof(*dst);
}
SSHORT CVBIG5_big5_mbtowc(obj, wc, src, src_len)
CSCONVERT obj;
WCHAR *wc;
UCHAR *src;
USHORT src_len;
{
/**************************************
* Functional description
* Grab a single character from a mb stream.
*
**************************************/
assert(src != NULL);
assert(obj != NULL);
if (!src_len)
return -1;
if (BIG51(*src)) {
if (src_len < 2) {
return -1;
};
if (wc)
*wc = (*src << 8) + (*(src + 1));
return 2;
}
else {
if (wc)
*wc = *src++;
return 1;
};
}