firebird-mirror/src/jrd/intlobj_new.h

/*
 *	PROGRAM:	JRD International support
 *	MODULE:		intlobj_new.h
 *	DESCRIPTION:	New international text handling definitions (DRAFT)
 *
 *  The contents of this file are subject to the Initial
 *  Developer's Public License Version 1.0 (the "License");
 *  you may not use this file except in compliance with the
 *  License. You may obtain a copy of the License at
 *  http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
 *
 *  Software distributed under the License is distributed AS IS,
 *  WITHOUT WARRANTY OF ANY KIND, either express or implied.
 *  See the License for the specific language governing rights
 *  and limitations under the License.
 *
 *  The Original Code was created by Nickolay Samofatov
 *  for the Firebird Open Source RDBMS project.
 *
 *  Copyright (c) 2004 Nickolay Samofatov <nickolay@broadviewsoftware.com>
 *  and all contributors signed below.
 *
 *  All Rights Reserved.
 *  Contributor(s): ______________________________________.
 *
 */

#ifndef JRD_INTLOBJ_NEW_H
#define JRD_INTLOBJ_NEW_H

#ifndef INCLUDE_FB_TYPES_H
typedef unsigned short USHORT;
typedef short SSHORT;
typedef unsigned char UCHAR;
typedef char CHAR;
typedef unsigned char BYTE;

typedef unsigned int ULONG;
typedef int LONG;
typedef signed int SLONG;
#endif

typedef SCHAR ASCII;

typedef USHORT INTL_BOOL;

/* Forward declarations to be implemented in collation driver */
struct TextTypeImpl;
struct CharSetImpl;
struct CsConvertImpl;

struct texttype; /* forward decl for the fc signatures before the struct itself. */
struct csconvert;
struct charset;

#define INTL_BAD_KEY_LENGTH ((USHORT)(-1))
#define INTL_BAD_STR_LENGTH ((ULONG)(-1))

/* Returned value of INTL_BAD_KEY_LENGTH means that proposed key is too long */
typedef USHORT (*pfn_INTL_keylength) (
	texttype* tt, 
	USHORT len
);

/* Types of the keys which may be returned by str2key routine */

#define INTL_KEY_SORT    0 /* Full sort key */
#define INTL_KEY_PARTIAL 1 /* Starting portion of sort key for equality class */
#define INTL_KEY_UNIQUE  2 /* Full key for the equality class of the string */

/* Returned value of INTL_BAD_KEY_LENGTH means that key error happened during 
  key construction. When partial key is requested returned string should 
  complement collated comparison. 
*/
typedef USHORT (*pfn_INTL_str2key) (
	texttype* tt, 
	USHORT srcLen, 
	const UCHAR* src, 
	USHORT dstLen, 
	UCHAR* dst, 
	USHORT key_type
);

/* Collate two potentially long strings. According to SQL 2003 standard 
  collation is a process by which two strings are determined to be in exactly 
  one of the relationships of less than, greater than, or equal to one another.  
*/
typedef SSHORT (*pfn_INTL_compare) (
	texttype* tt, 
	ULONG len1, 
	const UCHAR* str1, 
	ULONG len2, 
	const UCHAR* str2,
	INTL_BOOL* error_flag
);

/* Returns resulting string length in bytes or INTL_BAD_STR_LENGTH in case of error */
typedef ULONG (*pfn_INTL_str2case) (
	texttype* tt, 
	ULONG srcLen, 
	const UCHAR* src, 
	ULONG dstLen, 
	UCHAR* dst
);

/* 
  Places exactly texttype_canonical_width number of bytes into dst for each character from src.
  Returns INTL_BAD_STR_LENGTH in case of error or number of characters processed if successful.
 */
typedef ULONG (*pfn_INTL_canonical) (
	texttype* t, 
	ULONG srcLen,
	const UCHAR* src,
	ULONG dstLen,
	UCHAR* dst
);

/* Releases resources associated with collation */
typedef void (*pfn_INTL_tt_destroy) (
	texttype* tt
);

/* texttype flag values */

#define TEXTTYPE_DIRECT_MATCH 1 /* Pattern-matching may be performed directly on
                                   string without going to canonical form */

#define TEXTTYPE_SEPARATE_UNIQUE 2 /* Full key does not define equality class.
                                      To be used with multi-level collations which are
                                      case- or accent- insensitive */

#define TEXTTYPE_UNSORTED_UNIQUE 4 /* Unique keys may not be used for ordered access,
                                      such as for multi-level collation having weights 
                                      (char, case, accent) which is case-insensitive, 
                                      but accent-sensitive */


typedef struct texttype {
	// Data which needs to be initialized by collation driver	
	USHORT texttype_version;	/* version ID of object */
	TextTypeImpl* texttype_impl;   /* collation object implemented in driver */

    /* Used only for debugging purposes. Should contain string in form 
      <charset>.<collation>. For example "WIN1251.PXW_CYRL"
    */
	const ASCII* texttype_name;

	SSHORT texttype_country;	    /* ID of base country values */
	BYTE texttype_canonical_width;  /* number bytes in canonical character representation */

	USHORT texttype_flags; /* Misc texttype flags filled by driver */

	/* do we logically pad string with spaces for comparison purposes.
       this is the job of string_to_key and compare routines to care or not to
       care about trailing spaces */
	INTL_BOOL texttype_pad_option;

	/* If not set key length is assumed to be equal to string length */
	pfn_INTL_keylength	texttype_fn_key_length; /* Return key length for given string */

	/* If not set string itself is used as a key */
	pfn_INTL_str2key	texttype_fn_string_to_key;

	/* If not set string is assumed to be binary-comparable both for sorting and equality purposes */
	pfn_INTL_compare	texttype_fn_compare;

	/* If not set string is converted to Unicode and then uppercased via default case folding table */
	pfn_INTL_str2case	texttype_fn_str_to_upper;	/* Convert string to uppercase */

	/* If not set string is converted to Unicode and then lowercased via default case folding table */
	pfn_INTL_str2case	texttype_fn_str_to_lower;	/* Convert string to lowercase */

	/* If not set for fixed width charset string itself is used as canonical 
       representation. If not set for MBCS charset string converted to UTF-32
       Normalization Form C is used as canonical representation */
	pfn_INTL_canonical	texttype_fn_canonical;	/* convert string to canonical representation for equality */

	/* May be omitted if not needed */
	pfn_INTL_tt_destroy	texttype_fn_destroy;	/* release resources associated with collation */

	/* Some space for future extension of collation interface */
	void* reserved_for_interface[5];

	/* Some space which may be freely used by collation driver */
	void* reserved_for_driver[10];
} *TEXTTYPE;

// Returns resulting string length or INTL_BAD_STR_LENGTH in case of error
typedef ULONG (*pfn_INTL_convert) (
	csconvert* cv, 
	ULONG srcLen,
	const UCHAR* src,
	ULONG dstLen,
	UCHAR* dst,
	USHORT* error_code,
	ULONG* offending_source_character	
);

/* Releases resources associated with conversion */
typedef void (*pfn_INTL_cv_destroy) (
	csconvert* cv
);

struct csconvert {
	USHORT csconvert_version;
	CsConvertImpl* csconvert_impl;

    /* Used only for debugging purposes. Should contain string in form 
      <source_charset>-><destination_charset>. For example "WIN1251->DOS866"
    */
	const ASCII* csconvert_name; 

	/* Conversion routine. Must be present. */
	pfn_INTL_convert csconvert_fn_convert;

	/* May be omitted if not needed. Is not called for collations embedded into charset interface */
	pfn_INTL_cv_destroy	csconvert_fn_destroy;

	/* Some space for future extension of conversion interface */
	void* reserved_for_interface[2];

	/* Some space which may be freely used by conversion driver */
	void* reserved_for_driver[10];
};

/* Conversion error codes */

#define	CS_TRUNCATION_ERROR	1	/* output buffer too small  */
#define	CS_CONVERT_ERROR	2	/* can't remap a character      */
#define	CS_BAD_INPUT		3	/* input string detected as bad */

#define	CS_CANT_MAP		0		/* Flag table entries that don't map */


/* Returns whether string is well-formed or not */
typedef INTL_BOOL (*pfn_INTL_well_formed) (
	charset* cs, 
	ULONG len,
	const UCHAR* str
);

/* Extracts a portion from a string. Returns INTL_BAD_STR_LENGTH in case of problems. */
typedef ULONG (*pfn_INTL_substring) (
	charset* cs, 
	ULONG srcLen,
	const UCHAR* src,
	ULONG dstLen,
	UCHAR* dst,
	ULONG startPos,
	ULONG length
);

/* Measures the length of string in characters. Returns INTL_BAD_STR_LENGTH in case of problems. */
typedef ULONG (*pfn_INTL_length) (
	charset* cs, 
	ULONG srcLen,
	const UCHAR* src
);

/* Releases resources associated with charset */
typedef void (*pfn_INTL_cs_destroy) (
	charset* cv
);

/* charset flag values */
#define CHARSET_LEGACY_SEMANTICS 1 /* MBCS strings may overflow declared lengths
                                      in characters (but not in bytes) */

struct charset
{
	USHORT charset_version;
	CharSetImpl* charset_impl;
	const ASCII* charset_name;
	BYTE charset_min_bytes_per_char;
	BYTE charset_max_bytes_per_char;
	BYTE charset_space_length;       /* Length of space character in bytes */
	const BYTE* charset_space_character; /* Space character, may be used for string padding */
	USHORT charset_flags; /* Misc charset flags filled by driver */

	/* Conversions to and from UTF-16 intermediate encodings. BOM marker should not be used.
      Endianness of transient encoding is the native endianness for the platform */
	csconvert		charset_to_unicode; /* Result of this conversion should be in Normalization Form C */
	csconvert		charset_from_unicode;

	/* If omitted any string is considered well-formed */
	pfn_INTL_well_formed	charset_fn_well_formed;

	/* If not set Unicode representation is used to measure string length. */
	pfn_INTL_length		charset_fn_length;	/* get length of string in characters */

	/* May be omitted for fixed-width character sets. 
	   If not present for MBCS charset string operation is performed by the engine
       via intermediate translation of string to Unicode */
	pfn_INTL_substring	charset_fn_substring;	/* get a portion of string */

	/* May be omitted if not needed. Is not called for collations embedded into charset interface */
	pfn_INTL_cs_destroy	charset_fn_destroy;

	/* Some space for future extension of charset interface */
	void* reserved_for_interface[5];

	/* Some space which may be freely used by charset driver */
	void* reserved_for_driver[10];
};

#endif /* JRD_INTLOBJ_NEW_H */
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`/*`
			`* PROGRAM: JRD International support`
			`* MODULE: intlobj_new.h`
			`* DESCRIPTION: New international text handling definitions (DRAFT)`
			`*`
			`* The contents of this file are subject to the Initial`
			`* Developer's Public License Version 1.0 (the "License");`
			`* you may not use this file except in compliance with the`
			`* License. You may obtain a copy of the License at`
			`* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.`
			`*`
			`* Software distributed under the License is distributed AS IS,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing rights`
			`* and limitations under the License.`
			`*`
			`* The Original Code was created by Nickolay Samofatov`
			`* for the Firebird Open Source RDBMS project.`
			`*`
			`* Copyright (c) 2004 Nickolay Samofatov <nickolay@broadviewsoftware.com>`
			`* and all contributors signed below.`
			`*`
			`* All Rights Reserved.`
			`* Contributor(s): ______________________________________.`
			`*`
			`*/`

			`#ifndef JRD_INTLOBJ_NEW_H`
			`#define JRD_INTLOBJ_NEW_H`

			`#ifndef INCLUDE_FB_TYPES_H`
			`typedef unsigned short USHORT;`
			`typedef short SSHORT;`
			`typedef unsigned char UCHAR;`
			`typedef char CHAR;`
			`typedef unsigned char BYTE;`

			`typedef unsigned int ULONG;`
			`typedef int LONG;`
			`typedef signed int SLONG;`
			`#endif`

			`typedef SCHAR ASCII;`

			`typedef USHORT INTL_BOOL;`

			`/* Forward declarations to be implemented in collation driver */`
A little bit more clarification 2004-08-18 05:09:47 +02:00			`struct TextTypeImpl;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`struct CharSetImpl;`
			`struct CsConvertImpl;`

			`struct texttype; /* forward decl for the fc signatures before the struct itself. */`
			`struct csconvert;`
Apply changes by Adriano to make compilable 2004-08-13 21:27:45 +02:00			`struct charset;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
			`#define INTL_BAD_KEY_LENGTH ((USHORT)(-1))`
			`#define INTL_BAD_STR_LENGTH ((ULONG)(-1))`

			`/* Returned value of INTL_BAD_KEY_LENGTH means that proposed key is too long */`
			`typedef USHORT (*pfn_INTL_keylength) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* tt,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`USHORT len`
			`);`

Adjust INTL interface according to new talks with Adriano 2004-09-15 03:36:13 +02:00			`/* Types of the keys which may be returned by str2key routine */`

			`#define INTL_KEY_SORT 0 /* Full sort key */`
			`#define INTL_KEY_PARTIAL 1 /* Starting portion of sort key for equality class */`
			`#define INTL_KEY_UNIQUE 2 /* Full key for the equality class of the string */`

New INTL interface amendments 2004-08-17 02:04:52 +02:00			`/* Returned value of INTL_BAD_KEY_LENGTH means that key error happened during`
			`key construction. When partial key is requested returned string should`
			`complement collated comparison.`
			`*/`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`typedef USHORT (*pfn_INTL_str2key) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* tt,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`USHORT srcLen,`
			`const UCHAR* src,`
			`USHORT dstLen,`
			`UCHAR* dst,`
Adjust INTL interface according to new talks with Adriano 2004-09-15 03:36:13 +02:00			`USHORT key_type`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`);`

New INTL interface amendments 2004-08-17 02:04:52 +02:00			`/* Collate two potentially long strings. According to SQL 2003 standard`
			`collation is a process by which two strings are determined to be in exactly`
			`one of the relationships of less than, greater than, or equal to one another.`
Clarify string comparison interface 2004-08-12 21:44:31 +02:00			`*/`
New INTL interface amendments 2004-08-17 02:04:52 +02:00			`typedef SSHORT (*pfn_INTL_compare) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* tt,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`ULONG len1,`
			`const UCHAR* str1,`
			`ULONG len2,`
			`const UCHAR* str2,`
			`INTL_BOOL* error_flag`
			`);`

			`/* Returns resulting string length in bytes or INTL_BAD_STR_LENGTH in case of error */`
			`typedef ULONG (*pfn_INTL_str2case) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* tt,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`ULONG srcLen,`
			`const UCHAR* src,`
			`ULONG dstLen,`
			`UCHAR* dst`
			`);`

Fix bug in canonical representation routine 2004-09-09 21:24:36 +02:00			`/*`
			`Places exactly texttype_canonical_width number of bytes into dst for each character from src.`
			`Returns INTL_BAD_STR_LENGTH in case of error or number of characters processed if successful.`
New INTL interface amendments 2004-08-17 02:04:52 +02:00			`*/`
Fix bug in canonical representation routine 2004-09-09 21:24:36 +02:00			`typedef ULONG (*pfn_INTL_canonical) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* t,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`ULONG srcLen,`
			`const UCHAR* src,`
			`ULONG dstLen,`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`UCHAR* dst`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`);`

			`/* Releases resources associated with collation */`
			`typedef void (*pfn_INTL_tt_destroy) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`texttype* tt`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`);`

Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00			`/* texttype flag values */`
Adjust INTL interface according to new talks with Adriano 2004-09-15 03:36:13 +02:00
Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00			`#define TEXTTYPE_DIRECT_MATCH 1 /* Pattern-matching may be performed directly on`
			`string without going to canonical form */`

Adjust INTL interface according to new talks with Adriano 2004-09-15 03:36:13 +02:00			`#define TEXTTYPE_SEPARATE_UNIQUE 2 /* Full key does not define equality class.`
			`To be used with multi-level collations which are`
			`case- or accent- insensitive */`

			`#define TEXTTYPE_UNSORTED_UNIQUE 4 /* Unique keys may not be used for ordered access,`
			`such as for multi-level collation having weights`
			`(char, case, accent) which is case-insensitive,`
			`but accent-sensitive */`

Draft of new INTL interface 2004-08-12 07:17:49 +02:00
			`typedef struct texttype {`
			`// Data which needs to be initialized by collation driver`
			`USHORT texttype_version; /* version ID of object */`
A little bit more clarification 2004-08-18 05:09:47 +02:00			`TextTypeImpl* texttype_impl; /* collation object implemented in driver */`
Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00
			`/* Used only for debugging purposes. Should contain string in form`
			`<charset>.<collation>. For example "WIN1251.PXW_CYRL"`
			`*/`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`const ASCII* texttype_name;`
Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`SSHORT texttype_country; /* ID of base country values */`
			`BYTE texttype_canonical_width; /* number bytes in canonical character representation */`
New INTL interface amendments 2004-08-17 02:04:52 +02:00
Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00			`USHORT texttype_flags; /* Misc texttype flags filled by driver */`

New INTL interface amendments 2004-08-17 02:04:52 +02:00			`/* do we logically pad string with spaces for comparison purposes.`
			`this is the job of string_to_key and compare routines to care or not to`
			`care about trailing spaces */`
			`INTL_BOOL texttype_pad_option;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
			`/* If not set key length is assumed to be equal to string length */`
			`pfn_INTL_keylength texttype_fn_key_length; /* Return key length for given string */`

			`/* If not set string itself is used as a key */`
			`pfn_INTL_str2key texttype_fn_string_to_key;`

Clarify string comparison interface 2004-08-12 21:44:31 +02:00			`/* If not set string is assumed to be binary-comparable both for sorting and equality purposes */`
			`pfn_INTL_compare texttype_fn_compare;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
			`/* If not set string is converted to Unicode and then uppercased via default case folding table */`
			`pfn_INTL_str2case texttype_fn_str_to_upper; /* Convert string to uppercase */`

			`/* If not set string is converted to Unicode and then lowercased via default case folding table */`
			`pfn_INTL_str2case texttype_fn_str_to_lower; /* Convert string to lowercase */`

A little bit more clarification 2004-08-18 05:09:47 +02:00			`/* If not set for fixed width charset string itself is used as canonical`
			`representation. If not set for MBCS charset string converted to UTF-32`
			`Normalization Form C is used as canonical representation */`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`pfn_INTL_canonical texttype_fn_canonical; /* convert string to canonical representation for equality */`

			`/* May be omitted if not needed */`
			`pfn_INTL_tt_destroy texttype_fn_destroy; /* release resources associated with collation */`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00
			`/* Some space for future extension of collation interface */`
			`void* reserved_for_interface[5];`

			`/* Some space which may be freely used by collation driver */`
			`void* reserved_for_driver[10];`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`} *TEXTTYPE;`

			`// Returns resulting string length or INTL_BAD_STR_LENGTH in case of error`
			`typedef ULONG (*pfn_INTL_convert) (`
Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`csconvert* cv,`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`ULONG srcLen,`
			`const UCHAR* src,`
			`ULONG dstLen,`
			`UCHAR* dst,`
			`USHORT* error_code,`
			`ULONG* offending_source_character`
			`);`

Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`/* Releases resources associated with conversion */`
			`typedef void (*pfn_INTL_cv_destroy) (`
			`csconvert* cv`
			`);`

Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`struct csconvert {`
			`USHORT csconvert_version;`
A little bit more clarification 2004-08-18 05:09:47 +02:00			`CsConvertImpl* csconvert_impl;`
New INTL interface amendments 2004-08-17 02:04:52 +02:00
			`/* Used only for debugging purposes. Should contain string in form`
			`<source_charset>-><destination_charset>. For example "WIN1251->DOS866"`
			`*/`
			`const ASCII* csconvert_name;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`/* Conversion routine. Must be present. */`
Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`pfn_INTL_convert csconvert_fn_convert;`

			`/* May be omitted if not needed. Is not called for collations embedded into charset interface */`
			`pfn_INTL_cv_destroy csconvert_fn_destroy;`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00
			`/* Some space for future extension of conversion interface */`
			`void* reserved_for_interface[2];`

			`/* Some space which may be freely used by conversion driver */`
			`void* reserved_for_driver[10];`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`};`

			`/* Conversion error codes */`

			`#define CS_TRUNCATION_ERROR 1 /* output buffer too small */`
			`#define CS_CONVERT_ERROR 2 /* can't remap a character */`
			`#define CS_BAD_INPUT 3 /* input string detected as bad */`

			`#define CS_CANT_MAP 0 /* Flag table entries that don't map */`


			`/* Returns whether string is well-formed or not */`
			`typedef INTL_BOOL (*pfn_INTL_well_formed) (`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`charset* cs,`
			`ULONG len,`
Apply changes by Adriano to make compilable 2004-08-13 21:27:45 +02:00			`const UCHAR* str`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`);`

Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`/* Extracts a portion from a string. Returns INTL_BAD_STR_LENGTH in case of problems. */`
			`typedef ULONG (*pfn_INTL_substring) (`
			`charset* cs,`
			`ULONG srcLen,`
			`const UCHAR* src,`
			`ULONG dstLen,`
			`UCHAR* dst,`
			`ULONG startPos,`
			`ULONG length`
			`);`

			`/* Measures the length of string in characters. Returns INTL_BAD_STR_LENGTH in case of problems. */`
			`typedef ULONG (*pfn_INTL_length) (`
			`charset* cs,`
			`ULONG srcLen,`
			`const UCHAR* src`
			`);`

Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`/* Releases resources associated with charset */`
			`typedef void (*pfn_INTL_cs_destroy) (`
			`charset* cv`
			`);`

Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00			`/* charset flag values */`
			`#define CHARSET_LEGACY_SEMANTICS 1 /* MBCS strings may overflow declared lengths`
			`in characters (but not in bytes) */`

Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`struct charset`
			`{`
			`USHORT charset_version;`
A little bit more clarification 2004-08-18 05:09:47 +02:00			`CharSetImpl* charset_impl;`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`const ASCII* charset_name;`
			`BYTE charset_min_bytes_per_char;`
			`BYTE charset_max_bytes_per_char;`
New INTL interface amendments 2004-08-17 02:04:52 +02:00			`BYTE charset_space_length; /* Length of space character in bytes */`
			`const BYTE* charset_space_character; /* Space character, may be used for string padding */`
Little INTL interface changes as discussed with Adriano 2004-09-14 03:06:31 +02:00			`USHORT charset_flags; /* Misc charset flags filled by driver */`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
Let's use UTF-16 as intermediate encoding. It is going to cause us more grief, but it seems to be supported better in third-party libraries 2004-08-18 00:21:19 +02:00			`/* Conversions to and from UTF-16 intermediate encodings. BOM marker should not be used.`
Add comment about Unicode encoding used for intermediate representation 2004-08-17 19:30:39 +02:00			`Endianness of transient encoding is the native endianness for the platform */`
A little bit more clarification 2004-08-18 05:09:47 +02:00			`csconvert charset_to_unicode; /* Result of this conversion should be in Normalization Form C */`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`csconvert charset_from_unicode;`

Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`/* If omitted any string is considered well-formed */`
Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`pfn_INTL_well_formed charset_fn_well_formed;`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`/* If not set Unicode representation is used to measure string length. */`
Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`pfn_INTL_length charset_fn_length; /* get length of string in characters */`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00
			`/* May be omitted for fixed-width character sets.`
			`If not present for MBCS charset string operation is performed by the engine`
			`via intermediate translation of string to Unicode */`
			`pfn_INTL_substring charset_fn_substring; /* get a portion of string */`
Some small changes in INTL interface 2004-08-12 21:21:03 +02:00
Further clarification of new INTL interface 2004-08-12 21:51:26 +02:00			`/* May be omitted if not needed. Is not called for collations embedded into charset interface */`
			`pfn_INTL_cs_destroy charset_fn_destroy;`

Some small changes in INTL interface 2004-08-12 21:21:03 +02:00			`/* Some space for future extension of charset interface */`
			`void* reserved_for_interface[5];`

			`/* Some space which may be freely used by charset driver */`
			`void* reserved_for_driver[10];`
Draft of new INTL interface 2004-08-12 07:17:49 +02:00			`};`

			`#endif /* JRD_INTLOBJ_NEW_H */`