2004-08-12 07:17:49 +02:00
|
|
|
/*
|
|
|
|
* PROGRAM: JRD International support
|
|
|
|
* MODULE: intlobj_new.h
|
|
|
|
* DESCRIPTION: New international text handling definitions (DRAFT)
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the Initial
|
|
|
|
* Developer's Public License Version 1.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the
|
|
|
|
* License. You may obtain a copy of the License at
|
|
|
|
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed AS IS,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing rights
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code was created by Nickolay Samofatov
|
|
|
|
* for the Firebird Open Source RDBMS project.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2004 Nickolay Samofatov <nickolay@broadviewsoftware.com>
|
|
|
|
* and all contributors signed below.
|
|
|
|
*
|
|
|
|
* All Rights Reserved.
|
|
|
|
* Contributor(s): ______________________________________.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef JRD_INTLOBJ_NEW_H
|
|
|
|
#define JRD_INTLOBJ_NEW_H
|
|
|
|
|
|
|
|
#ifndef INCLUDE_FB_TYPES_H
|
|
|
|
typedef unsigned short USHORT;
|
|
|
|
typedef short SSHORT;
|
|
|
|
typedef unsigned char UCHAR;
|
|
|
|
typedef char CHAR;
|
|
|
|
typedef unsigned char BYTE;
|
|
|
|
|
|
|
|
typedef unsigned int ULONG;
|
|
|
|
typedef int LONG;
|
|
|
|
typedef signed int SLONG;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef SCHAR ASCII;
|
|
|
|
|
|
|
|
typedef USHORT INTL_BOOL;
|
|
|
|
|
|
|
|
/* Forward declarations to be implemented in collation driver */
|
2004-08-18 05:09:47 +02:00
|
|
|
struct TextTypeImpl;
|
2004-08-12 07:17:49 +02:00
|
|
|
struct CharSetImpl;
|
|
|
|
struct CsConvertImpl;
|
|
|
|
|
|
|
|
struct texttype; /* forward decl for the fc signatures before the struct itself. */
|
|
|
|
struct csconvert;
|
2004-08-13 21:27:45 +02:00
|
|
|
struct charset;
|
2004-08-12 07:17:49 +02:00
|
|
|
|
|
|
|
#define INTL_BAD_KEY_LENGTH ((USHORT)(-1))
|
|
|
|
#define INTL_BAD_STR_LENGTH ((ULONG)(-1))
|
|
|
|
|
|
|
|
/* Returned value of INTL_BAD_KEY_LENGTH means that proposed key is too long */
|
|
|
|
typedef USHORT (*pfn_INTL_keylength) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* tt,
|
2004-08-12 07:17:49 +02:00
|
|
|
USHORT len
|
|
|
|
);
|
|
|
|
|
2004-09-15 03:36:13 +02:00
|
|
|
/* Types of the keys which may be returned by str2key routine */
|
|
|
|
|
|
|
|
#define INTL_KEY_SORT 0 /* Full sort key */
|
|
|
|
#define INTL_KEY_PARTIAL 1 /* Starting portion of sort key for equality class */
|
|
|
|
#define INTL_KEY_UNIQUE 2 /* Full key for the equality class of the string */
|
|
|
|
|
2004-08-17 02:04:52 +02:00
|
|
|
/* Returned value of INTL_BAD_KEY_LENGTH means that key error happened during
|
|
|
|
key construction. When partial key is requested returned string should
|
|
|
|
complement collated comparison.
|
|
|
|
*/
|
2004-08-12 07:17:49 +02:00
|
|
|
typedef USHORT (*pfn_INTL_str2key) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* tt,
|
2004-08-12 07:17:49 +02:00
|
|
|
USHORT srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
USHORT dstLen,
|
|
|
|
UCHAR* dst,
|
2004-09-15 03:36:13 +02:00
|
|
|
USHORT key_type
|
2004-08-12 07:17:49 +02:00
|
|
|
);
|
|
|
|
|
2004-08-17 02:04:52 +02:00
|
|
|
/* Collate two potentially long strings. According to SQL 2003 standard
|
|
|
|
collation is a process by which two strings are determined to be in exactly
|
|
|
|
one of the relationships of less than, greater than, or equal to one another.
|
2004-08-12 21:44:31 +02:00
|
|
|
*/
|
2004-08-17 02:04:52 +02:00
|
|
|
typedef SSHORT (*pfn_INTL_compare) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* tt,
|
2004-08-12 07:17:49 +02:00
|
|
|
ULONG len1,
|
|
|
|
const UCHAR* str1,
|
|
|
|
ULONG len2,
|
|
|
|
const UCHAR* str2,
|
|
|
|
INTL_BOOL* error_flag
|
|
|
|
);
|
|
|
|
|
|
|
|
/* Returns resulting string length in bytes or INTL_BAD_STR_LENGTH in case of error */
|
|
|
|
typedef ULONG (*pfn_INTL_str2case) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* tt,
|
2004-08-12 07:17:49 +02:00
|
|
|
ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst
|
|
|
|
);
|
|
|
|
|
2004-09-09 21:24:36 +02:00
|
|
|
/*
|
|
|
|
Places exactly texttype_canonical_width number of bytes into dst for each character from src.
|
|
|
|
Returns INTL_BAD_STR_LENGTH in case of error or number of characters processed if successful.
|
2004-08-17 02:04:52 +02:00
|
|
|
*/
|
2004-09-09 21:24:36 +02:00
|
|
|
typedef ULONG (*pfn_INTL_canonical) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* t,
|
2004-08-12 07:17:49 +02:00
|
|
|
ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
2004-08-12 21:21:03 +02:00
|
|
|
UCHAR* dst
|
2004-08-12 07:17:49 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
/* Releases resources associated with collation */
|
|
|
|
typedef void (*pfn_INTL_tt_destroy) (
|
2004-08-12 21:21:03 +02:00
|
|
|
texttype* tt
|
2004-08-12 07:17:49 +02:00
|
|
|
);
|
|
|
|
|
2004-09-14 03:06:31 +02:00
|
|
|
/* texttype flag values */
|
2004-09-15 03:36:13 +02:00
|
|
|
|
2004-09-14 03:06:31 +02:00
|
|
|
#define TEXTTYPE_DIRECT_MATCH 1 /* Pattern-matching may be performed directly on
|
|
|
|
string without going to canonical form */
|
|
|
|
|
2004-09-15 03:36:13 +02:00
|
|
|
#define TEXTTYPE_SEPARATE_UNIQUE 2 /* Full key does not define equality class.
|
|
|
|
To be used with multi-level collations which are
|
|
|
|
case- or accent- insensitive */
|
|
|
|
|
|
|
|
#define TEXTTYPE_UNSORTED_UNIQUE 4 /* Unique keys may not be used for ordered access,
|
|
|
|
such as for multi-level collation having weights
|
|
|
|
(char, case, accent) which is case-insensitive,
|
|
|
|
but accent-sensitive */
|
|
|
|
|
2004-08-12 07:17:49 +02:00
|
|
|
|
|
|
|
typedef struct texttype {
|
|
|
|
// Data which needs to be initialized by collation driver
|
|
|
|
USHORT texttype_version; /* version ID of object */
|
2004-08-18 05:09:47 +02:00
|
|
|
TextTypeImpl* texttype_impl; /* collation object implemented in driver */
|
2004-09-14 03:06:31 +02:00
|
|
|
|
|
|
|
/* Used only for debugging purposes. Should contain string in form
|
|
|
|
<charset>.<collation>. For example "WIN1251.PXW_CYRL"
|
|
|
|
*/
|
2004-08-12 07:17:49 +02:00
|
|
|
const ASCII* texttype_name;
|
2004-09-14 03:06:31 +02:00
|
|
|
|
2004-08-12 07:17:49 +02:00
|
|
|
SSHORT texttype_country; /* ID of base country values */
|
|
|
|
BYTE texttype_canonical_width; /* number bytes in canonical character representation */
|
2004-08-17 02:04:52 +02:00
|
|
|
|
2004-09-14 03:06:31 +02:00
|
|
|
USHORT texttype_flags; /* Misc texttype flags filled by driver */
|
|
|
|
|
2004-08-17 02:04:52 +02:00
|
|
|
/* do we logically pad string with spaces for comparison purposes.
|
|
|
|
this is the job of string_to_key and compare routines to care or not to
|
|
|
|
care about trailing spaces */
|
|
|
|
INTL_BOOL texttype_pad_option;
|
2004-08-12 07:17:49 +02:00
|
|
|
|
|
|
|
/* If not set key length is assumed to be equal to string length */
|
|
|
|
pfn_INTL_keylength texttype_fn_key_length; /* Return key length for given string */
|
|
|
|
|
|
|
|
/* If not set string itself is used as a key */
|
|
|
|
pfn_INTL_str2key texttype_fn_string_to_key;
|
|
|
|
|
2004-08-12 21:44:31 +02:00
|
|
|
/* If not set string is assumed to be binary-comparable both for sorting and equality purposes */
|
|
|
|
pfn_INTL_compare texttype_fn_compare;
|
2004-08-12 07:17:49 +02:00
|
|
|
|
|
|
|
/* If not set string is converted to Unicode and then uppercased via default case folding table */
|
|
|
|
pfn_INTL_str2case texttype_fn_str_to_upper; /* Convert string to uppercase */
|
|
|
|
|
|
|
|
/* If not set string is converted to Unicode and then lowercased via default case folding table */
|
|
|
|
pfn_INTL_str2case texttype_fn_str_to_lower; /* Convert string to lowercase */
|
|
|
|
|
2004-08-18 05:09:47 +02:00
|
|
|
/* If not set for fixed width charset string itself is used as canonical
|
|
|
|
representation. If not set for MBCS charset string converted to UTF-32
|
|
|
|
Normalization Form C is used as canonical representation */
|
2004-08-12 07:17:49 +02:00
|
|
|
pfn_INTL_canonical texttype_fn_canonical; /* convert string to canonical representation for equality */
|
|
|
|
|
|
|
|
/* May be omitted if not needed */
|
|
|
|
pfn_INTL_tt_destroy texttype_fn_destroy; /* release resources associated with collation */
|
2004-08-12 21:21:03 +02:00
|
|
|
|
|
|
|
/* Some space for future extension of collation interface */
|
|
|
|
void* reserved_for_interface[5];
|
|
|
|
|
|
|
|
/* Some space which may be freely used by collation driver */
|
|
|
|
void* reserved_for_driver[10];
|
2004-08-12 07:17:49 +02:00
|
|
|
} *TEXTTYPE;
|
|
|
|
|
|
|
|
// Returns resulting string length or INTL_BAD_STR_LENGTH in case of error
|
|
|
|
typedef ULONG (*pfn_INTL_convert) (
|
2004-08-12 21:51:26 +02:00
|
|
|
csconvert* cv,
|
2004-08-12 07:17:49 +02:00
|
|
|
ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst,
|
|
|
|
USHORT* error_code,
|
|
|
|
ULONG* offending_source_character
|
|
|
|
);
|
|
|
|
|
2004-08-12 21:51:26 +02:00
|
|
|
/* Releases resources associated with conversion */
|
|
|
|
typedef void (*pfn_INTL_cv_destroy) (
|
|
|
|
csconvert* cv
|
|
|
|
);
|
|
|
|
|
2004-08-12 07:17:49 +02:00
|
|
|
struct csconvert {
|
|
|
|
USHORT csconvert_version;
|
2004-08-18 05:09:47 +02:00
|
|
|
CsConvertImpl* csconvert_impl;
|
2004-08-17 02:04:52 +02:00
|
|
|
|
|
|
|
/* Used only for debugging purposes. Should contain string in form
|
|
|
|
<source_charset>-><destination_charset>. For example "WIN1251->DOS866"
|
|
|
|
*/
|
|
|
|
const ASCII* csconvert_name;
|
2004-08-12 07:17:49 +02:00
|
|
|
|
2004-08-12 21:21:03 +02:00
|
|
|
/* Conversion routine. Must be present. */
|
2004-08-12 21:51:26 +02:00
|
|
|
pfn_INTL_convert csconvert_fn_convert;
|
|
|
|
|
|
|
|
/* May be omitted if not needed. Is not called for collations embedded into charset interface */
|
|
|
|
pfn_INTL_cv_destroy csconvert_fn_destroy;
|
2004-08-12 21:21:03 +02:00
|
|
|
|
|
|
|
/* Some space for future extension of conversion interface */
|
|
|
|
void* reserved_for_interface[2];
|
|
|
|
|
|
|
|
/* Some space which may be freely used by conversion driver */
|
|
|
|
void* reserved_for_driver[10];
|
2004-08-12 07:17:49 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Conversion error codes */
|
|
|
|
|
|
|
|
#define CS_TRUNCATION_ERROR 1 /* output buffer too small */
|
|
|
|
#define CS_CONVERT_ERROR 2 /* can't remap a character */
|
|
|
|
#define CS_BAD_INPUT 3 /* input string detected as bad */
|
|
|
|
|
|
|
|
#define CS_CANT_MAP 0 /* Flag table entries that don't map */
|
|
|
|
|
|
|
|
|
|
|
|
/* Returns whether string is well-formed or not */
|
|
|
|
typedef INTL_BOOL (*pfn_INTL_well_formed) (
|
2004-08-12 21:21:03 +02:00
|
|
|
charset* cs,
|
|
|
|
ULONG len,
|
2004-08-13 21:27:45 +02:00
|
|
|
const UCHAR* str
|
2004-08-12 07:17:49 +02:00
|
|
|
);
|
|
|
|
|
2004-08-12 21:21:03 +02:00
|
|
|
/* Extracts a portion from a string. Returns INTL_BAD_STR_LENGTH in case of problems. */
|
|
|
|
typedef ULONG (*pfn_INTL_substring) (
|
|
|
|
charset* cs,
|
|
|
|
ULONG srcLen,
|
|
|
|
const UCHAR* src,
|
|
|
|
ULONG dstLen,
|
|
|
|
UCHAR* dst,
|
|
|
|
ULONG startPos,
|
|
|
|
ULONG length
|
|
|
|
);
|
|
|
|
|
|
|
|
/* Measures the length of string in characters. Returns INTL_BAD_STR_LENGTH in case of problems. */
|
|
|
|
typedef ULONG (*pfn_INTL_length) (
|
|
|
|
charset* cs,
|
|
|
|
ULONG srcLen,
|
|
|
|
const UCHAR* src
|
|
|
|
);
|
|
|
|
|
2004-08-12 21:51:26 +02:00
|
|
|
/* Releases resources associated with charset */
|
|
|
|
typedef void (*pfn_INTL_cs_destroy) (
|
|
|
|
charset* cv
|
|
|
|
);
|
|
|
|
|
2004-09-14 03:06:31 +02:00
|
|
|
/* charset flag values */
|
|
|
|
#define CHARSET_LEGACY_SEMANTICS 1 /* MBCS strings may overflow declared lengths
|
|
|
|
in characters (but not in bytes) */
|
|
|
|
|
2004-08-12 07:17:49 +02:00
|
|
|
struct charset
|
|
|
|
{
|
|
|
|
USHORT charset_version;
|
2004-08-18 05:09:47 +02:00
|
|
|
CharSetImpl* charset_impl;
|
2004-08-12 07:17:49 +02:00
|
|
|
const ASCII* charset_name;
|
|
|
|
BYTE charset_min_bytes_per_char;
|
|
|
|
BYTE charset_max_bytes_per_char;
|
2004-08-17 02:04:52 +02:00
|
|
|
BYTE charset_space_length; /* Length of space character in bytes */
|
|
|
|
const BYTE* charset_space_character; /* Space character, may be used for string padding */
|
2004-09-14 03:06:31 +02:00
|
|
|
USHORT charset_flags; /* Misc charset flags filled by driver */
|
2004-08-12 07:17:49 +02:00
|
|
|
|
2004-08-18 00:21:19 +02:00
|
|
|
/* Conversions to and from UTF-16 intermediate encodings. BOM marker should not be used.
|
2004-08-17 19:30:39 +02:00
|
|
|
Endianness of transient encoding is the native endianness for the platform */
|
2004-08-18 05:09:47 +02:00
|
|
|
csconvert charset_to_unicode; /* Result of this conversion should be in Normalization Form C */
|
2004-08-12 07:17:49 +02:00
|
|
|
csconvert charset_from_unicode;
|
|
|
|
|
2004-08-12 21:21:03 +02:00
|
|
|
/* If omitted any string is considered well-formed */
|
2004-08-12 21:51:26 +02:00
|
|
|
pfn_INTL_well_formed charset_fn_well_formed;
|
2004-08-12 21:21:03 +02:00
|
|
|
|
2004-08-12 07:17:49 +02:00
|
|
|
/* If not set Unicode representation is used to measure string length. */
|
2004-08-12 21:51:26 +02:00
|
|
|
pfn_INTL_length charset_fn_length; /* get length of string in characters */
|
2004-08-12 07:17:49 +02:00
|
|
|
|
|
|
|
/* May be omitted for fixed-width character sets.
|
|
|
|
If not present for MBCS charset string operation is performed by the engine
|
|
|
|
via intermediate translation of string to Unicode */
|
|
|
|
pfn_INTL_substring charset_fn_substring; /* get a portion of string */
|
2004-08-12 21:21:03 +02:00
|
|
|
|
2004-08-12 21:51:26 +02:00
|
|
|
/* May be omitted if not needed. Is not called for collations embedded into charset interface */
|
|
|
|
pfn_INTL_cs_destroy charset_fn_destroy;
|
|
|
|
|
2004-08-12 21:21:03 +02:00
|
|
|
/* Some space for future extension of charset interface */
|
|
|
|
void* reserved_for_interface[5];
|
|
|
|
|
|
|
|
/* Some space which may be freely used by charset driver */
|
|
|
|
void* reserved_for_driver[10];
|
2004-08-12 07:17:49 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* JRD_INTLOBJ_NEW_H */
|
|
|
|
|