8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-26 06:43:04 +01:00
firebird-mirror/src/jrd/intlobj_new.h

364 lines
12 KiB
C
Raw Normal View History

2004-08-12 07:17:49 +02:00
/*
* PROGRAM: JRD International support
* MODULE: intlobj_new.h
2005-05-28 00:45:31 +02:00
* DESCRIPTION: New international text handling definitions
2004-08-12 07:17:49 +02:00
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Nickolay Samofatov
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Nickolay Samofatov <nickolay@broadviewsoftware.com>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*
*/
#ifndef JRD_INTLOBJ_NEW_H
#define JRD_INTLOBJ_NEW_H
#ifndef INCLUDE_FB_TYPES_H
typedef unsigned short USHORT;
typedef short SSHORT;
typedef unsigned char UCHAR;
typedef char CHAR;
typedef unsigned char BYTE;
typedef unsigned int ULONG;
typedef int LONG;
typedef signed int SLONG;
#endif
typedef SCHAR ASCII;
typedef USHORT INTL_BOOL;
#ifdef __cplusplus
namespace {
#endif
2004-08-12 07:17:49 +02:00
/* Forward declarations to be implemented in collation driver */
2004-08-18 05:09:47 +02:00
struct TextTypeImpl;
2004-08-12 07:17:49 +02:00
struct CharSetImpl;
struct CsConvertImpl;
#ifdef __cplusplus
}
#endif
2004-08-12 07:17:49 +02:00
struct texttype; /* forward decl for the fc signatures before the struct itself. */
struct csconvert;
struct charset;
2004-08-12 07:17:49 +02:00
#define INTL_BAD_KEY_LENGTH ((USHORT)(-1))
#define INTL_BAD_STR_LENGTH ((ULONG)(-1))
/* Returned value of INTL_BAD_KEY_LENGTH means that proposed key is too long */
typedef USHORT (*pfn_INTL_keylength) (
2004-08-12 21:21:03 +02:00
texttype* tt,
2004-08-12 07:17:49 +02:00
USHORT len
);
/* Types of the keys which may be returned by str2key routine */
#define INTL_KEY_SORT 0 /* Full sort key */
#define INTL_KEY_PARTIAL 1 /* Starting portion of sort key for equality class */
#define INTL_KEY_UNIQUE 2 /* Full key for the equality class of the string */
2004-08-17 02:04:52 +02:00
/* Returned value of INTL_BAD_KEY_LENGTH means that key error happened during
key construction. When partial key is requested returned string should
complement collated comparison.
*/
2004-08-12 07:17:49 +02:00
typedef USHORT (*pfn_INTL_str2key) (
2004-08-12 21:21:03 +02:00
texttype* tt,
2004-08-12 07:17:49 +02:00
USHORT srcLen,
const UCHAR* src,
USHORT dstLen,
UCHAR* dst,
USHORT key_type
2004-08-12 07:17:49 +02:00
);
2004-08-17 02:04:52 +02:00
/* Collate two potentially long strings. According to SQL 2003 standard
collation is a process by which two strings are determined to be in exactly
one of the relationships of less than, greater than, or equal to one another.
2004-08-12 21:44:31 +02:00
*/
2004-08-17 02:04:52 +02:00
typedef SSHORT (*pfn_INTL_compare) (
2004-08-12 21:21:03 +02:00
texttype* tt,
2004-08-12 07:17:49 +02:00
ULONG len1,
const UCHAR* str1,
ULONG len2,
const UCHAR* str2,
INTL_BOOL* error_flag
);
/* Returns resulting string length in bytes or INTL_BAD_STR_LENGTH in case of error */
typedef ULONG (*pfn_INTL_str2case) (
2004-08-12 21:21:03 +02:00
texttype* tt,
2004-08-12 07:17:49 +02:00
ULONG srcLen,
const UCHAR* src,
ULONG dstLen,
UCHAR* dst
);
/*
Places exactly texttype_canonical_width number of bytes into dst for each character from src.
Returns INTL_BAD_STR_LENGTH in case of error or number of characters processed if successful.
2004-08-17 02:04:52 +02:00
*/
typedef ULONG (*pfn_INTL_canonical) (
2004-08-12 21:21:03 +02:00
texttype* t,
2004-08-12 07:17:49 +02:00
ULONG srcLen,
const UCHAR* src,
ULONG dstLen,
2004-08-12 21:21:03 +02:00
UCHAR* dst
2004-08-12 07:17:49 +02:00
);
/* Releases resources associated with collation */
typedef void (*pfn_INTL_tt_destroy) (
2004-08-12 21:21:03 +02:00
texttype* tt
2004-08-12 07:17:49 +02:00
);
2005-05-28 00:45:31 +02:00
/* texttype version */
#define TEXTTYPE_VERSION_1 1
/* texttype flag values */
#define TEXTTYPE_DIRECT_MATCH 1 /* Pattern-matching may be performed directly on
string without going to canonical form */
#define TEXTTYPE_SEPARATE_UNIQUE 2 /* Full key does not define equality class.
To be used with multi-level collations which are
case- or accent- insensitive */
#define TEXTTYPE_UNSORTED_UNIQUE 4 /* Unique keys may not be used for ordered access,
such as for multi-level collation having weights
(char, case, accent) which is case-insensitive,
but accent-sensitive */
2004-08-12 07:17:49 +02:00
typedef struct texttype {
2005-05-28 00:45:31 +02:00
/* Data which needs to be initialized by collation driver */
2004-08-12 07:17:49 +02:00
USHORT texttype_version; /* version ID of object */
2004-08-18 05:09:47 +02:00
TextTypeImpl* texttype_impl; /* collation object implemented in driver */
/* Used only for debugging purposes. Should contain string in form
<charset>.<collation>. For example "WIN1251.PXW_CYRL"
*/
2004-08-12 07:17:49 +02:00
const ASCII* texttype_name;
2004-08-12 07:17:49 +02:00
SSHORT texttype_country; /* ID of base country values */
BYTE texttype_canonical_width; /* number bytes in canonical character representation */
2004-08-17 02:04:52 +02:00
USHORT texttype_flags; /* Misc texttype flags filled by driver */
2004-08-17 02:04:52 +02:00
/* do we logically pad string with spaces for comparison purposes.
this is the job of string_to_key and compare routines to care or not to
care about trailing spaces */
INTL_BOOL texttype_pad_option;
2004-08-12 07:17:49 +02:00
2005-05-28 00:45:31 +02:00
/* If not set for fixed width charset key length is assumed to be equal to string length.
If not set for MBCS key length is assumed to be equal to length of string converted to BOCU-1. */
2004-08-12 07:17:49 +02:00
pfn_INTL_keylength texttype_fn_key_length; /* Return key length for given string */
2005-05-28 00:45:31 +02:00
/* If not set for fixed width charset string itself is used as a key with binary lexical ordering.
If not set for MBCS string converted to BOCU-1 is used as a key with UCS_BASIC ordering. */
2004-08-12 07:17:49 +02:00
pfn_INTL_str2key texttype_fn_string_to_key;
2005-05-28 00:45:31 +02:00
/* If not set for fixed width charset string itself is assumed to be binary-comparable both for sorting
and equality purposes. If not set for MBCS string converted to UTF-16 is compared. */
2004-08-12 21:44:31 +02:00
pfn_INTL_compare texttype_fn_compare;
2004-08-12 07:17:49 +02:00
2005-05-28 00:45:31 +02:00
/* If not set string is converted to Unicode and then uppercased via default case folding table.
NOTE: Source buffer may be used by engine as a target for conversion.
Driver must handle this situation appropriately. */
2004-08-12 07:17:49 +02:00
pfn_INTL_str2case texttype_fn_str_to_upper; /* Convert string to uppercase */
2005-05-28 00:45:31 +02:00
/* If not set string is converted to Unicode and then lowercased via default case folding table.
NOTE: Source buffer may be used by engine as a target for conversion.
Driver must handle this situation appropriately. */
2004-08-12 07:17:49 +02:00
pfn_INTL_str2case texttype_fn_str_to_lower; /* Convert string to lowercase */
2004-08-18 05:09:47 +02:00
/* If not set for fixed width charset string itself is used as canonical
representation. If not set for MBCS charset string converted to UTF-32
2005-05-28 00:45:31 +02:00
is used as canonical representation */
2004-08-12 07:17:49 +02:00
pfn_INTL_canonical texttype_fn_canonical; /* convert string to canonical representation for equality */
/* May be omitted if not needed */
pfn_INTL_tt_destroy texttype_fn_destroy; /* release resources associated with collation */
2004-08-12 21:21:03 +02:00
/* Some space for future extension of collation interface */
void* reserved_for_interface[5];
/* Some space which may be freely used by collation driver */
void* reserved_for_driver[10];
2004-08-12 07:17:49 +02:00
} *TEXTTYPE;
2005-05-28 00:45:31 +02:00
/* Returns resulting string length or INTL_BAD_STR_LENGTH in case of error */
2004-08-12 07:17:49 +02:00
typedef ULONG (*pfn_INTL_convert) (
csconvert* cv,
2004-08-12 07:17:49 +02:00
ULONG srcLen,
const UCHAR* src,
ULONG dstLen,
UCHAR* dst,
USHORT* error_code,
ULONG* offending_source_character
);
/* Releases resources associated with conversion */
typedef void (*pfn_INTL_cv_destroy) (
csconvert* cv
);
2005-05-28 00:45:31 +02:00
/* csconvert version */
#define CSCONVERT_VERSION_1 1
2004-08-12 07:17:49 +02:00
struct csconvert {
USHORT csconvert_version;
2004-08-18 05:09:47 +02:00
CsConvertImpl* csconvert_impl;
2004-08-17 02:04:52 +02:00
/* Used only for debugging purposes. Should contain string in form
<source_charset>-><destination_charset>. For example "WIN1251->DOS866"
*/
const ASCII* csconvert_name;
2004-08-12 07:17:49 +02:00
2004-08-12 21:21:03 +02:00
/* Conversion routine. Must be present. */
pfn_INTL_convert csconvert_fn_convert;
2005-05-28 00:45:31 +02:00
/* May be omitted if not needed. Is not called for converters embedded into charset interface */
pfn_INTL_cv_destroy csconvert_fn_destroy;
2004-08-12 21:21:03 +02:00
/* Some space for future extension of conversion interface */
void* reserved_for_interface[2];
/* Some space which may be freely used by conversion driver */
void* reserved_for_driver[10];
2004-08-12 07:17:49 +02:00
};
/* Conversion error codes */
#define CS_TRUNCATION_ERROR 1 /* output buffer too small */
#define CS_CONVERT_ERROR 2 /* can't remap a character */
#define CS_BAD_INPUT 3 /* input string detected as bad */
#define CS_CANT_MAP 0 /* Flag table entries that don't map */
/* Returns whether string is well-formed or not */
typedef INTL_BOOL (*pfn_INTL_well_formed) (
2004-08-12 21:21:03 +02:00
charset* cs,
ULONG len,
2005-05-28 00:45:31 +02:00
const UCHAR* str,
ULONG* offending_position
2004-08-12 07:17:49 +02:00
);
2004-08-12 21:21:03 +02:00
/* Extracts a portion from a string. Returns INTL_BAD_STR_LENGTH in case of problems. */
typedef ULONG (*pfn_INTL_substring) (
charset* cs,
ULONG srcLen,
const UCHAR* src,
ULONG dstLen,
UCHAR* dst,
ULONG startPos,
ULONG length
);
/* Measures the length of string in characters. Returns INTL_BAD_STR_LENGTH in case of problems. */
typedef ULONG (*pfn_INTL_length) (
charset* cs,
ULONG srcLen,
const UCHAR* src
);
/* Releases resources associated with charset */
typedef void (*pfn_INTL_cs_destroy) (
charset* cv
);
2005-05-28 00:45:31 +02:00
/* charset version */
#define CHARSET_VERSION_1 1
/* charset flag values */
#define CHARSET_LEGACY_SEMANTICS 1 /* MBCS strings may overflow declared lengths
in characters (but not in bytes) */
2005-05-28 00:45:31 +02:00
#define CHARSET_ASCII_BASED 2 /* Value of ASCII characters is equal to the
ASCII character set */
2004-08-12 07:17:49 +02:00
struct charset
{
USHORT charset_version;
2004-08-18 05:09:47 +02:00
CharSetImpl* charset_impl;
2004-08-12 07:17:49 +02:00
const ASCII* charset_name;
BYTE charset_min_bytes_per_char;
BYTE charset_max_bytes_per_char;
2004-08-17 02:04:52 +02:00
BYTE charset_space_length; /* Length of space character in bytes */
const BYTE* charset_space_character; /* Space character, may be used for string padding */
USHORT charset_flags; /* Misc charset flags filled by driver */
2004-08-12 07:17:49 +02:00
/* Conversions to and from UTF-16 intermediate encodings. BOM marker should not be used.
Endianness of transient encoding is the native endianness for the platform */
2004-08-18 05:09:47 +02:00
csconvert charset_to_unicode; /* Result of this conversion should be in Normalization Form C */
2004-08-12 07:17:49 +02:00
csconvert charset_from_unicode;
2004-08-12 21:21:03 +02:00
/* If omitted any string is considered well-formed */
pfn_INTL_well_formed charset_fn_well_formed;
2004-08-12 21:21:03 +02:00
2004-08-12 07:17:49 +02:00
/* If not set Unicode representation is used to measure string length. */
pfn_INTL_length charset_fn_length; /* get length of string in characters */
2004-08-12 07:17:49 +02:00
/* May be omitted for fixed-width character sets.
If not present for MBCS charset string operation is performed by the engine
via intermediate translation of string to Unicode */
pfn_INTL_substring charset_fn_substring; /* get a portion of string */
2004-08-12 21:21:03 +02:00
2005-05-28 00:45:31 +02:00
/* May be omitted if not needed */
pfn_INTL_cs_destroy charset_fn_destroy;
2004-08-12 21:21:03 +02:00
/* Some space for future extension of charset interface */
void* reserved_for_interface[5];
/* Some space which may be freely used by charset driver */
void* reserved_for_driver[10];
2004-08-12 07:17:49 +02:00
};
2005-05-28 00:45:31 +02:00
/* attributes passed by the engine to texttype entry-point */
#define TEXTTYPE_ATTR_PAD_SPACE 1
#define TEXTTYPE_ATTR_CASE_INSENSITIVE 2
#define TEXTTYPE_ATTR_ACCENT_INSENSITIVE 4
/* typedef for texttype lookup entry-point */
typedef INTL_BOOL (*pfn_INTL_lookup_texttype) (
texttype* tt,
const ASCII* texttype_name,
const ASCII* charset_name,
USHORT attributes,
const UCHAR* specific_attributes,
ULONG specific_attributes_length,
INTL_BOOL ignore_attributes
);
/* typedef for charset lookup entry-point */
typedef INTL_BOOL (*pfn_INTL_lookup_charset) (
charset* cs,
const ASCII* name
);
#define TEXTTYPE_ENTRYPOINT LD_lookup_texttype
#define CHARSET_ENTRYPOINT LD_lookup_charset
2004-08-12 07:17:49 +02:00
#endif /* JRD_INTLOBJ_NEW_H */