mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-23 00:03:02 +01:00
258 lines
9.7 KiB
ObjectPascal
258 lines
9.7 KiB
ObjectPascal
{
|
|
* PROGRAM: UDR samples.
|
|
* MODULE: FbCharsets.pas
|
|
* DESCRIPTION: Charset helpers.
|
|
*
|
|
* The contents of this file are subject to the Initial
|
|
* Developer's Public License Version 1.0 (the "License");
|
|
* you may not use this file except in compliance with the
|
|
* License. You may obtain a copy of the License at
|
|
* https://www.ibphoenix.com/about/firebird/idpl.
|
|
*
|
|
* Software distributed under the License is distributed AS IS,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing rights
|
|
* and limitations under the License.
|
|
*
|
|
* The Original Code was created by Simonov Denis
|
|
* for the book Writing UDR Firebird in Pascal.
|
|
*
|
|
* Copyright (c) 2018 Simonov Denis <sim-mail@list.ru>
|
|
* and all contributors signed below.
|
|
*
|
|
* All Rights Reserved.
|
|
* Contributor(s): ______________________________________. }
|
|
|
|
unit FbCharsets;
|
|
|
|
{$IFDEF MSWINDOWS}
|
|
{$DEFINE WINDOWS}
|
|
{$ENDIF}
|
|
{$IFDEF FPC}
|
|
{$mode delphi}
|
|
{$ENDIF}
|
|
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils {$IFDEF WINDOWS}, windows {$ENDIF};
|
|
|
|
type
|
|
|
|
// Firebird character sets
|
|
TFBCharSet = (
|
|
CS_NONE = 0, // No Character Set
|
|
CS_BINARY = 1, // BINARY BYTES
|
|
CS_ASCII = 2, // ASCII
|
|
CS_UNICODE_FSS = 3, // UNICODE in FSS format
|
|
CS_UTF8 = 4, // UTF-8
|
|
CS_SJIS = 5, // SJIS
|
|
CS_EUCJ = 6, // EUC-J
|
|
|
|
CS_JIS_0208 = 7 , // JIS 0208; 1990
|
|
CS_UNICODE_UCS2 = 8 , // UNICODE v 1.10
|
|
|
|
CS_DOS_737 = 9,
|
|
CS_DOS_437 = 10 , // DOS CP 437
|
|
CS_DOS_850 = 11 , // DOS CP 850
|
|
CS_DOS_865 = 12 , // DOS CP 865
|
|
CS_DOS_860 = 13 , // DOS CP 860
|
|
CS_DOS_863 = 14 , // DOS CP 863
|
|
|
|
CS_DOS_775 = 15,
|
|
CS_DOS_858 = 16,
|
|
CS_DOS_862 = 17,
|
|
CS_DOS_864 = 18,
|
|
|
|
CS_NEXT = 19, // NeXTSTEP OS native charset
|
|
|
|
CS_ISO8859_1 = 21, // ISO-8859.1
|
|
CS_ISO8859_2 = 22, // ISO-8859.2
|
|
CS_ISO8859_3 = 23, // ISO-8859.3
|
|
CS_ISO8859_4 = 34, // ISO-8859.4
|
|
CS_ISO8859_5 = 35, // ISO-8859.5
|
|
CS_ISO8859_6 = 36, // ISO-8859.6
|
|
CS_ISO8859_7 = 37, // ISO-8859.7
|
|
CS_ISO8859_8 = 38, // ISO-8859.8
|
|
CS_ISO8859_9 = 39, // ISO-8859.9
|
|
CS_ISO8859_13 = 40, // ISO-8859.13
|
|
|
|
CS_KSC5601 = 44, // KOREAN STANDARD 5601
|
|
|
|
CS_DOS_852 = 45 , // DOS CP 852
|
|
CS_DOS_857 = 46 , // DOS CP 857
|
|
CS_DOS_861 = 47 , // DOS CP 861
|
|
|
|
CS_DOS_866 = 48,
|
|
CS_DOS_869 = 49,
|
|
|
|
CS_CYRL = 50 ,
|
|
CS_WIN1250 = 51, // Windows cp 1250
|
|
CS_WIN1251 = 52, // Windows cp 1251
|
|
CS_WIN1252 = 53, // Windows cp 1252
|
|
CS_WIN1253 = 54, // Windows cp 1253
|
|
CS_WIN1254 = 55, // Windows cp 1254
|
|
|
|
CS_BIG5 = 56, // Big Five unicode cs
|
|
CS_GB2312 = 57, // GB 2312-80 cs
|
|
|
|
CS_WIN1255 = 58, // Windows cp 1255
|
|
CS_WIN1256 = 59, // Windows cp 1256
|
|
CS_WIN1257 = 60, // Windows cp 1257
|
|
|
|
CS_UTF16 = 61, // UTF-16
|
|
CS_UTF32 = 62, // UTF-32
|
|
|
|
CS_KOI8R = 63, // Russian KOI8R
|
|
CS_KOI8U = 64, // Ukrainian KOI8U
|
|
|
|
CS_WIN1258 = 65, // Windows cp 1258
|
|
|
|
CS_TIS620 = 66 , // TIS620
|
|
CS_GBK = 67, // GBK
|
|
CS_CP943C = 68, // CP943C
|
|
|
|
CS_GB18030 = 69 // GB18030
|
|
);
|
|
|
|
// Firebird character set mappig to code pages
|
|
TCharsetMap = record
|
|
CharsetID: Integer;
|
|
CharSetName: AnsiString;
|
|
CharSetWidth: Word;
|
|
CodePage: Integer;
|
|
end;
|
|
|
|
{ TFbCharsetHelper }
|
|
|
|
TFbCharsetHelper = record helper for TFBCharSet
|
|
function GetCharset : TCharsetMap;
|
|
function GetCodePage: Integer;
|
|
function GetCharWidth: Word;
|
|
function GetCharSetName: string;
|
|
function GetEncoding : TEncoding;
|
|
function GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
|
|
end;
|
|
|
|
|
|
implementation
|
|
|
|
const
|
|
CharSetMap: array [0 .. 69] of TCharsetMap = (
|
|
(CharsetID: 0; CharSetName: 'NONE'; CharSetWidth: 1; CodePage: CP_ACP),
|
|
(CharsetID: 1; CharSetName: 'OCTETS'; CharSetWidth: 1; CodePage: CP_NONE),
|
|
(CharsetID: 2; CharSetName: 'ASCII'; CharSetWidth: 1; CodePage: {CP_ASCII} CP_ACP),
|
|
(CharsetID: 3; CharSetName: 'UNICODE_FSS'; CharSetWidth: 3; CodePage: CP_UTF8),
|
|
(CharsetID: 4; CharSetName: 'UTF8'; CharSetWidth: 4; CodePage: CP_UTF8),
|
|
(CharsetID: 5; CharSetName: 'SJIS_0208'; CharSetWidth: 2; CodePage: 20932),
|
|
(CharsetID: 6; CharSetName: 'EUCJ_0208'; CharSetWidth: 2; CodePage: 20932),
|
|
(CharsetID: 7; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 8; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 9; CharSetName: 'DOS737'; CharSetWidth: 1; CodePage: 737),
|
|
(CharsetID: 10; CharSetName: 'DOS437'; CharSetWidth: 1; CodePage: 437),
|
|
(CharsetID: 11; CharSetName: 'DOS850'; CharSetWidth: 1; CodePage: 850),
|
|
(CharsetID: 12; CharSetName: 'DOS865'; CharSetWidth: 1; CodePage: 865),
|
|
(CharsetID: 13; CharSetName: 'DOS860'; CharSetWidth: 1; CodePage: 860),
|
|
(CharsetID: 14; CharSetName: 'DOS863'; CharSetWidth: 1; CodePage: 863),
|
|
(CharsetID: 15; CharSetName: 'DOS775'; CharSetWidth: 1; CodePage: 775),
|
|
(CharsetID: 16; CharSetName: 'DOS858'; CharSetWidth: 1; CodePage: 858),
|
|
(CharsetID: 17; CharSetName: 'DOS862'; CharSetWidth: 1; CodePage: 862),
|
|
(CharsetID: 18; CharSetName: 'DOS864'; CharSetWidth: 1; CodePage: 864),
|
|
(CharsetID: 19; CharSetName: 'NEXT'; CharSetWidth: 1; CodePage: CP_NONE),
|
|
(CharsetID: 20; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 21; CharSetName: 'ISO8859_1'; CharSetWidth: 1; CodePage: 28591),
|
|
(CharsetID: 22; CharSetName: 'ISO8859_2'; CharSetWidth: 1; CodePage: 28592),
|
|
(CharsetID: 23; CharSetName: 'ISO8859_3'; CharSetWidth: 1; CodePage: 28593),
|
|
(CharsetID: 24; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 25; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 26; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 27; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 28; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 29; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 30; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 31; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 32; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 33; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 34; CharSetName: 'ISO8859_4'; CharSetWidth: 1; CodePage: 28594),
|
|
(CharsetID: 35; CharSetName: 'ISO8859_5'; CharSetWidth: 1; CodePage: 28595),
|
|
(CharsetID: 36; CharSetName: 'ISO8859_6'; CharSetWidth: 1; CodePage: 28596),
|
|
(CharsetID: 37; CharSetName: 'ISO8859_7'; CharSetWidth: 1; CodePage: 28597),
|
|
(CharsetID: 38; CharSetName: 'ISO8859_8'; CharSetWidth: 1; CodePage: 28598),
|
|
(CharsetID: 39; CharSetName: 'ISO8859_9'; CharSetWidth: 1; CodePage: 28599),
|
|
(CharsetID: 40; CharSetName: 'ISO8859_13'; CharSetWidth: 1; CodePage: 28603),
|
|
(CharsetID: 41; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 42; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 43; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 44; CharSetName: 'KSC_5601'; CharSetWidth: 2; CodePage: 949),
|
|
(CharsetID: 45; CharSetName: 'DOS852'; CharSetWidth: 1; CodePage: 852),
|
|
(CharsetID: 46; CharSetName: 'DOS857'; CharSetWidth: 1; CodePage: 857),
|
|
(CharsetID: 47; CharSetName: 'DOS861'; CharSetWidth: 1; CodePage: 861),
|
|
(CharsetID: 48; CharSetName: 'DOS866'; CharSetWidth: 1; CodePage: 866),
|
|
(CharsetID: 49; CharSetName: 'DOS869'; CharSetWidth: 1; CodePage: 869),
|
|
(CharsetID: 50; CharSetName: 'CYRL'; CharSetWidth: 1; CodePage: 1251),
|
|
(CharsetID: 51; CharSetName: 'WIN1250'; CharSetWidth: 1; CodePage: 1250),
|
|
(CharsetID: 52; CharSetName: 'WIN1251'; CharSetWidth: 1; CodePage: 1251),
|
|
(CharsetID: 53; CharSetName: 'WIN1252'; CharSetWidth: 1; CodePage: 1252),
|
|
(CharsetID: 54; CharSetName: 'WIN1253'; CharSetWidth: 1; CodePage: 1253),
|
|
(CharsetID: 55; CharSetName: 'WIN1254'; CharSetWidth: 1; CodePage: 1254),
|
|
(CharsetID: 56; CharSetName: 'BIG_5'; CharSetWidth: 2; CodePage: 950),
|
|
(CharsetID: 57; CharSetName: 'GB_2312'; CharSetWidth: 2; CodePage: 936),
|
|
(CharsetID: 58; CharSetName: 'WIN1255'; CharSetWidth: 1; CodePage: 1255),
|
|
(CharsetID: 59; CharSetName: 'WIN1256'; CharSetWidth: 1; CodePage: 1256),
|
|
(CharsetID: 60; CharSetName: 'WIN1257'; CharSetWidth: 1; CodePage: 1257),
|
|
(CharsetID: 61; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 62; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
|
|
(CharsetID: 63; CharSetName: 'KOI8R'; CharSetWidth: 1; CodePage: 20866),
|
|
(CharsetID: 64; CharSetName: 'KOI8U'; CharSetWidth: 1; CodePage: 21866),
|
|
(CharsetID: 65; CharSetName: 'WIN1258'; CharSetWidth: 1; CodePage: 1258),
|
|
(CharsetID: 66; CharSetName: 'TIS620'; CharSetWidth: 1; CodePage: 874),
|
|
(CharsetID: 67; CharSetName: 'GBK'; CharSetWidth: 2; CodePage: 936),
|
|
(CharsetID: 68; CharSetName: 'CP943C'; CharSetWidth: 2; CodePage: 943),
|
|
(CharsetID: 69; CharSetName: 'GB18030'; CharSetWidth: 4; CodePage: 54936));
|
|
|
|
{ TFbCharset }
|
|
|
|
function TFbCharsetHelper.GetCharset(): TCharsetMap;
|
|
begin
|
|
Result := CharSetMap[Integer(Self)];
|
|
end;
|
|
|
|
function TFbCharsetHelper.GetCodePage(): Integer;
|
|
begin
|
|
Result := CharSetMap[Integer(Self)].CodePage;
|
|
end;
|
|
|
|
function TFbCharsetHelper.GetCharWidth(): Word;
|
|
begin
|
|
Result := CharSetMap[Integer(Self)].CharSetWidth;
|
|
end;
|
|
|
|
function TFbCharsetHelper.GetCharSetName(): string;
|
|
begin
|
|
Result := CharSetMap[Integer(Self)].CharSetName;
|
|
end;
|
|
|
|
function TFbCharsetHelper.GetEncoding (): TEncoding;
|
|
begin
|
|
Result := TEncoding.GetEncoding(CharSetMap[Integer(Self)].CodePage);
|
|
end;
|
|
|
|
function TFbCharsetHelper.GetString(
|
|
const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
|
|
var
|
|
xEncoding: TEncoding;
|
|
begin
|
|
xEncoding := GetEncoding();
|
|
try
|
|
Result := xEncoding.GetString(Bytes, ByteIndex, ByteCount);
|
|
finally
|
|
xEncoding.Free;
|
|
end;
|
|
end;
|
|
|
|
|
|
end.
|
|
|
|
|