mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-27 20:03:03 +01:00
316 lines
7.1 KiB
C
316 lines
7.1 KiB
C
/*
|
|
* PROGRAM: InterBase International support
|
|
* MODULE: cv_gb2312.c
|
|
* DESCRIPTION: Codeset conversion for GB2312 family codesets
|
|
*
|
|
* The contents of this file are subject to the Interbase Public
|
|
* License Version 1.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy
|
|
* of the License at http://www.Inprise.com/IPL.html
|
|
*
|
|
* Software distributed under the License is distributed on an
|
|
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
|
|
* or implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code was created by Inprise Corporation
|
|
* and its predecessors. Portions created by Inprise Corporation are
|
|
* Copyright (C) Inprise Corporation.
|
|
*
|
|
* All Rights Reserved.
|
|
* Contributor(s): ______________________________________.
|
|
*/
|
|
|
|
#include "../intl/ldcommon.h"
|
|
|
|
/* These macros have a duplicate in lc_gb2312.c */
|
|
#define GB1(uc) ((UCHAR)((uc)&0xff)>=0xa1 && \
|
|
(UCHAR)((uc)&0xff)<=0xfe) /* GB2312 1st-byte */
|
|
#define GB2(uc) ((UCHAR)((uc)&0xff)>=0xa1 && \
|
|
(UCHAR)((uc)&0xff)<=0xfe) /* GB2312 2nd-byte */
|
|
|
|
|
|
|
|
USHORT CVGB_gb2312_to_unicode(obj, dest_ptr, dest_len, src_ptr, src_len,
|
|
err_code, err_position)
|
|
CSCONVERT obj;
|
|
USHORT *dest_ptr;
|
|
USHORT dest_len;
|
|
UCHAR *src_ptr;
|
|
USHORT src_len;
|
|
SSHORT *err_code;
|
|
USHORT *err_position;
|
|
{
|
|
USHORT *start;
|
|
WCHAR ch;
|
|
WCHAR wide;
|
|
USHORT src_start = src_len;
|
|
USHORT this_len;
|
|
UCHAR c1, c2;
|
|
|
|
assert(src_ptr != NULL || dest_ptr == NULL);
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
assert(obj != NULL);
|
|
assert(obj->csconvert_convert == CVGB_gb2312_to_unicode);
|
|
assert(obj->csconvert_datatable != NULL);
|
|
assert(obj->csconvert_misc != NULL);
|
|
|
|
*err_code = 0;
|
|
|
|
/* See if we're only after a length estimate */
|
|
if (dest_ptr == NULL)
|
|
return (src_len * 2);
|
|
|
|
start = dest_ptr;
|
|
src_start = src_len;
|
|
while ((src_len) && (dest_len > 1)) {
|
|
if (*src_ptr & 0x80) {
|
|
c1 = *src_ptr++;
|
|
|
|
if (GB1(c1)) { /* first byte is GB2312 */
|
|
if (src_len == 1) {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
c2 = *src_ptr++;
|
|
if (!(GB2(c2))) { /* Bad second byte */
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
wide = (c1 << 8) + c2;
|
|
this_len = 2;
|
|
}
|
|
else {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
}
|
|
}
|
|
else { /* it is ASCII */
|
|
|
|
wide = *src_ptr++;
|
|
this_len = 1;
|
|
}
|
|
|
|
/* Convert from GB2312 to UNICODE */
|
|
ch = ((USHORT *) obj->csconvert_datatable)
|
|
[((USHORT *) obj->csconvert_misc)[(USHORT) wide / 256]
|
|
+ (wide % 256)];
|
|
|
|
if ((ch == CS_CANT_MAP) && !(wide == CS_CANT_MAP)) {
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
}
|
|
|
|
*dest_ptr++ = ch;
|
|
dest_len -= 2;
|
|
src_len -= this_len;
|
|
};
|
|
if (src_len && !*err_code) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
};
|
|
*err_position = src_start - src_len;
|
|
return ((dest_ptr - start) * sizeof(*dest_ptr));
|
|
}
|
|
|
|
|
|
USHORT CVGB_unicode_to_gb2312(obj, gb_str, gb_len, unicode_str, unicode_len,
|
|
err_code, err_position)
|
|
CSCONVERT obj;
|
|
UCHAR *gb_str;
|
|
USHORT gb_len;
|
|
USHORT *unicode_str;
|
|
USHORT unicode_len;
|
|
SSHORT *err_code;
|
|
USHORT *err_position;
|
|
{
|
|
UCHAR *start;
|
|
WCHAR gb_ch;
|
|
WCHAR wide;
|
|
int tmp1, tmp2;
|
|
USHORT src_start = unicode_len;
|
|
|
|
assert(unicode_str != NULL || gb_str == NULL);
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
assert(obj != NULL);
|
|
assert(obj->csconvert_convert == CVGB_unicode_to_gb2312);
|
|
assert(obj->csconvert_datatable != NULL);
|
|
assert(obj->csconvert_misc != NULL);
|
|
|
|
*err_code = 0;
|
|
|
|
/* See if we're only after a length estimate */
|
|
if (gb_str == NULL)
|
|
return (unicode_len); /* worst case - all han character input */
|
|
|
|
start = gb_str;
|
|
while ((gb_len) && (unicode_len > 1)) {
|
|
/* Convert from UNICODE to GB2312 code */
|
|
wide = *unicode_str++;
|
|
|
|
gb_ch = ((USHORT *) obj->csconvert_datatable)[
|
|
((USHORT *) obj->
|
|
csconvert_misc)[
|
|
(USHORT)
|
|
wide /
|
|
256] +
|
|
(wide % 256)];
|
|
if ((gb_ch == CS_CANT_MAP) && !(wide == CS_CANT_MAP)) {
|
|
*err_code = CS_CONVERT_ERROR;
|
|
break;
|
|
};
|
|
|
|
tmp1 = gb_ch / 256;
|
|
tmp2 = gb_ch % 256;
|
|
if (tmp1 == 0) { /* ASCII character */
|
|
*gb_str++ = tmp2;
|
|
gb_len--;
|
|
unicode_len -= sizeof(*unicode_str);
|
|
continue;
|
|
};
|
|
if (gb_len < 2) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
break;
|
|
}
|
|
else {
|
|
assert(GB1(tmp1));
|
|
assert(GB2(tmp2));
|
|
*gb_str++ = tmp1;
|
|
*gb_str++ = tmp2;
|
|
unicode_len -= sizeof(*unicode_str);
|
|
gb_len -= 2;
|
|
};
|
|
}
|
|
if (unicode_len && !*err_code) {
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
}
|
|
*err_position = src_start - unicode_len;
|
|
return ((gb_str - start) * sizeof(*gb_str));
|
|
}
|
|
|
|
|
|
USHORT CVGB_check_gb2312(gb_str, gb_len)
|
|
UCHAR *gb_str;
|
|
USHORT gb_len;
|
|
{
|
|
/**************************************
|
|
* Functional description
|
|
* Make sure that the GB2312 string does not have any truncated 2 byte
|
|
* character at the end.
|
|
* If we have a truncated character then,
|
|
* return 1.
|
|
* else return(0);
|
|
**************************************/
|
|
UCHAR c1;
|
|
|
|
while (gb_len--) {
|
|
c1 = *gb_str;
|
|
if (GB1(c1)) { /* Is it GB2312 */
|
|
if (gb_len == 0) /* truncated GB2312 */
|
|
return (1);
|
|
else {
|
|
gb_str += 2;
|
|
gb_len -= 1;
|
|
}
|
|
}
|
|
else { /* it is a ASCII */
|
|
|
|
gb_str++;
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
|
|
USHORT CVGB_gb2312_byte2short(obj, dst, dst_len, src, src_len, err_code,
|
|
err_position)
|
|
CSCONVERT obj;
|
|
UCHAR *dst;
|
|
USHORT dst_len;
|
|
UCHAR *src;
|
|
USHORT src_len;
|
|
SSHORT *err_code;
|
|
USHORT *err_position;
|
|
{
|
|
/**************************************
|
|
* Functional description
|
|
* Convert len number of bytes of GB2312 string in
|
|
* src (char-based buffer) into dst (short-based buffer).
|
|
* This routine merges:
|
|
* 1-byte ASCII into 1 short, and
|
|
* 2-byte GB2312 character into 1 short.
|
|
*
|
|
**************************************/
|
|
USHORT x;
|
|
UCHAR *dst_start;
|
|
USHORT src_start = src_len;
|
|
|
|
assert(src != NULL || dst == NULL);
|
|
assert(err_code != NULL);
|
|
assert(err_position != NULL);
|
|
assert(obj != NULL);
|
|
|
|
*err_code = 0;
|
|
/* Length estimate needed? */
|
|
if (dst == NULL)
|
|
return (2 * src_len); /* worst case */
|
|
|
|
dst_start = dst;
|
|
while (src_len && (dst_len > (sizeof(USHORT) - 1))) {
|
|
if (GB1(*src)) {
|
|
if (src_len < 2) {
|
|
*err_code = CS_BAD_INPUT;
|
|
break;
|
|
};
|
|
x = (*src << 8) + (*(src + 1));
|
|
src += 2;
|
|
src_len -= 2;
|
|
}
|
|
else {
|
|
x = *src++;
|
|
src_len--;
|
|
};
|
|
*(USHORT *) dst = x; /* Assumes alignment */
|
|
dst += sizeof(USHORT);
|
|
dst_len -= sizeof(USHORT);
|
|
}
|
|
if (src_len && !*err_code)
|
|
*err_code = CS_TRUNCATION_ERROR;
|
|
*err_position = src_start - src_len;
|
|
return (dst - dst_start) * sizeof(*dst);
|
|
}
|
|
|
|
|
|
SSHORT CVGB_gb2312_mbtowc(obj, wc, src, src_len)
|
|
CSCONVERT obj;
|
|
WCHAR *wc;
|
|
UCHAR *src;
|
|
USHORT src_len;
|
|
{
|
|
/**************************************
|
|
* Functional description
|
|
* Grab a single character from a mb stream.
|
|
*
|
|
**************************************/
|
|
assert(src != NULL);
|
|
assert(obj != NULL);
|
|
|
|
if (!src_len)
|
|
return -1;
|
|
|
|
if (GB1(*src)) {
|
|
if (src_len < 2) {
|
|
return -1;
|
|
};
|
|
if (wc)
|
|
*wc = (*src << 8) + (*(src + 1));
|
|
return 2;
|
|
}
|
|
else {
|
|
if (wc)
|
|
*wc = *src++;
|
|
return 1;
|
|
};
|
|
}
|