8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-02-02 10:00:38 +01:00

Ability to use:

1) ICU charsets through fbintl
2) UNICODE collation (charset_UNICODE) for all fbintl charsets
This commit is contained in:
asfernandes 2006-06-13 02:38:39 +00:00
parent 7a1d5d0fa5
commit 93b5358e58
16 changed files with 919 additions and 53 deletions

View File

@ -27,7 +27,7 @@
# Contributor(s):
#
#
# $Id: Makefile.in.intl,v 1.23 2005-09-24 22:50:21 asfernandes Exp $
# $Id: Makefile.in.intl,v 1.24 2006-06-13 02:35:39 asfernandes Exp $
#
ROOT=..
ObjModuleType=superclient
@ -50,7 +50,8 @@ INTL_Files1= ld.cpp cv_narrow.cpp \
cs_big5.cpp cv_big5.cpp lc_big5.cpp \
cs_gb2312.cpp cv_gb2312.cpp lc_gb2312.cpp \
cs_jis.cpp cv_jis.cpp lc_jis.cpp \
cs_ksc.cpp cv_ksc.cpp lc_ksc.cpp
cs_ksc.cpp cv_ksc.cpp lc_ksc.cpp \
cs_icu.cpp cv_icu.cpp lc_icu.cpp
# INTL_Sources = $(INTL_Files1)
@ -74,7 +75,7 @@ all : libfbintl
libfbintl : $(LIBFBINTL_SO)
$(LIBFBINTL_SO): $(INTL_Objects)
$(LIBFBINTL_SO): $(INTL_Objects) $(FBCOMMON_Objects) $(FBCLASSES_Objects)
$(LIB_LINK) $(LINK_FBINTL_SYMBOLS) $(LIB_LINK_OPTIONS) $(LIB_LINK_SONAME)libintl.$(SHRLIB_EXT).1 \
$(LIB_LINK_RPATH)$(FirebirdInstallPrefix)/lib -o $@ $^ $(SO_LINK_LIBS) @PTHREAD_CFLAGS@ @PTHREAD_LIBS@

View File

@ -43,7 +43,7 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GR /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "INTL_EXPORTS" /YX /FD /GZ /c
# ADD CPP /nologo /MDd /W3 /Gm /GR- /GX /Zi /Od /I "../../../src/include" /I "../../../src/include/gen" /D "_DEBUG" /D "_WINDOWS" /D "_USRDLL" /D "WINDOWS_ONLY" /D "SUPERCLIENT" /D "WIN32" /D "_MBCS" /D "_X86_" /D "DEV_BUILD" /FR /FD /GZ /c
# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /I "../../../src/include" /I "../../../src/include/gen" /I "../../../extern/icu/include" /D "_DEBUG" /D "_WINDOWS" /D "_USRDLL" /D "WINDOWS_ONLY" /D "SUPERCLIENT" /D "WIN32" /D "_MBCS" /D "_X86_" /D "DEV_BUILD" /FR /FD /GZ /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x41d /d "_DEBUG"
@ -53,7 +53,7 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib shell32.lib comctl32.lib advapi32.lib ws2_32.lib mpr.lib version.lib /nologo /dll /incremental:no /debug /machine:I386 /out:"..\..\..\temp\debug\firebird/intl/fbintl.dll" /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib shell32.lib comctl32.lib advapi32.lib ws2_32.lib mpr.lib version.lib icuuc.lib icuin.lib /nologo /dll /incremental:no /debug /machine:I386 /out:"..\..\..\temp\debug\firebird/intl/fbintl.dll" /pdbtype:sept /libpath:../../../extern/icu/lib
!ELSEIF "$(CFG)" == "intl - Win32 Release"
@ -69,7 +69,7 @@ LINK32=link.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MDd /W3 /Gm /GR /GX /ZI /Od /I "../../../src/include" /I "../../../src/include/gen" /D "_DEBUG" /D "_WINDOWS" /D "_USRDLL" /D "INTL_EXPORTS" /D "WINDOWS_ONLY" /D "SUPERCLIENT" /D "WIN32" /D "_MBCS" /D "_X86_" /FR /FD /GZ /c
# ADD CPP /nologo /MD /W3 /GR- /GX /Ot /Og /Oi /Op /Oy /Ob1 /I "../../../src/include" /I "../../../src/include/gen" /D "NDEBUG" /D "_WINDOWS" /D "_USRDLL" /D "INTL_EXPORTS" /D "WINDOWS_ONLY" /D "SUPERCLIENT" /D "WIN32" /D "_MBCS" /D "_X86_" /FR /FD /EHc- /c
# ADD CPP /nologo /MD /W3 /GX /Ot /Og /Oi /Op /Oy /Ob1 /I "../../../src/include" /I "../../../src/include/gen" /I "../../../extern/icu/include" /D "NDEBUG" /D "_WINDOWS" /D "_USRDLL" /D "INTL_EXPORTS" /D "WINDOWS_ONLY" /D "SUPERCLIENT" /D "WIN32" /D "_MBCS" /D "_X86_" /FR /FD /EHc- /c
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x41d /d "_DEBUG"
@ -79,7 +79,7 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib shell32.lib comctl32.lib advapi32.lib ws2_32.lib mpr.lib version.lib /nologo /dll /incremental:no /machine:I386 /out:"..\..\..\temp\release\firebird/intl/fbintl.dll" /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib shell32.lib comctl32.lib advapi32.lib ws2_32.lib mpr.lib version.lib icuuc.lib icuin.lib /nologo /dll /incremental:no /machine:I386 /out:"..\..\..\temp\release\firebird/intl/fbintl.dll" /pdbtype:sept /libpath:../../../extern/icu/lib
# SUBTRACT LINK32 /debug
!ENDIF
@ -101,6 +101,10 @@ SOURCE=..\..\..\src\intl\cs_gb2312.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cs_icu.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cs_jis.cpp
# End Source File
# Begin Source File
@ -129,6 +133,10 @@ SOURCE=..\..\..\src\intl\cv_gb2312.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cv_icu.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cv_jis.cpp
# End Source File
# Begin Source File
@ -161,6 +169,10 @@ SOURCE=..\..\..\src\intl\lc_gb2312.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\lc_icu.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\lc_iso8859_1.cpp
# End Source File
# Begin Source File
@ -341,6 +353,10 @@ SOURCE=..\..\..\src\intl\charsets\cs_gb2312.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cs_icu.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\charsets\cs_iso8859_1.h
# End Source File
# Begin Source File
@ -393,6 +409,10 @@ SOURCE=..\..\..\src\intl\charsets\cs_w1254.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\cv_icu.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\collations\db437de0.h
# End Source File
# Begin Source File
@ -529,6 +549,10 @@ SOURCE=..\..\..\src\intl\langdrv.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\lc_icu.h
# End Source File
# Begin Source File
SOURCE=..\..\..\src\intl\ld.h
# End Source File
# Begin Source File
@ -700,6 +724,18 @@ SOURCE=..\..\..\src\intl\collations\xx88592plk.h
SOURCE=..\..\..\src\jrd\version.rc
# End Source File
# End Group
# Begin Group "OTHER files"
# PROP Default_Filter ""
# Begin Source File
SOURCE=..\..\..\src\common\classes\alloc.cpp
# End Source File
# Begin Source File
SOURCE=..\..\..\src\common\fb_exception.cpp
# End Source File
# End Group
# Begin Source File
SOURCE=..\defs\intl.def

View File

@ -28,7 +28,7 @@
ImproveFloatingPointConsistency="TRUE"
FavorSizeOrSpeed="1"
OmitFramePointers="TRUE"
AdditionalIncludeDirectories="../../../src/include,../../../src/include/gen"
AdditionalIncludeDirectories="../../../src/include,../../../src/include/gen;../../../extern/icu/include"
PreprocessorDefinitions="NDEBUG;_WINDOWS;_USRDLL;INTL_EXPORTS;WINDOWS_ONLY;SUPERCLIENT;WIN32;_X86_"
ExceptionHandling="FALSE"
RuntimeLibrary="2"
@ -47,10 +47,11 @@
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="comctl32.lib ws2_32.lib mpr.lib version.lib"
AdditionalDependencies="comctl32.lib ws2_32.lib mpr.lib version.lib icuuc.lib icuin.lib"
OutputFile="..\..\..\temp\release\firebird\intl\fbintl.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories="../../../extern/icu/lib"
ProgramDatabaseFile="..\..\..\temp\release\intl\fbintl.pdb"
ImportLibrary="..\..\..\temp\release\intl\fbintl.lib"/>
<Tool
@ -93,7 +94,7 @@
Name="VCCLCompilerTool"
AdditionalOptions="/EHsc-"
Optimization="0"
AdditionalIncludeDirectories="../../../src/include,../../../src/include/gen"
AdditionalIncludeDirectories="../../../src/include,../../../src/include/gen;../../../extern/icu/include"
PreprocessorDefinitions="_DEBUG;_WINDOWS;_USRDLL;WINDOWS_ONLY;SUPERCLIENT;WIN32;_X86_;DEV_BUILD"
ExceptionHandling="FALSE"
BasicRuntimeChecks="3"
@ -113,10 +114,11 @@
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="comctl32.lib ws2_32.lib mpr.lib version.lib"
AdditionalDependencies="comctl32.lib ws2_32.lib mpr.lib version.lib icuuc.lib icuin.lib"
OutputFile="..\..\..\temp\debug\firebird\intl\fbintl.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories="../../../extern/icu/lib"
GenerateDebugInformation="TRUE"
ProgramDatabaseFile="..\..\..\temp\debug\intl\fbintl.pdb"
ImportLibrary="..\..\..\temp\debug\intl\fbintl.lib"/>
@ -161,6 +163,9 @@
<File
RelativePath="..\..\..\src\intl\cs_gb2312.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\cs_icu.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\cs_jis.cpp">
</File>
@ -182,6 +187,9 @@
<File
RelativePath="..\..\..\src\intl\cv_gb2312.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\cv_icu.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\cv_jis.cpp">
</File>
@ -206,6 +214,9 @@
<File
RelativePath="..\..\..\src\intl\lc_gb2312.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\lc_icu.cpp">
</File>
<File
RelativePath="..\..\..\src\intl\lc_iso8859_1.cpp">
</File>
@ -339,6 +350,9 @@
<File
RelativePath="..\..\..\src\intl\charsets\cs_gb2312.h">
</File>
<File
RelativePath="..\..\..\src\intl\cs_icu.h">
</File>
<File
RelativePath="..\..\..\src\intl\charsets\cs_iso8859_1.h">
</File>
@ -378,6 +392,9 @@
<File
RelativePath="..\..\..\src\intl\charsets\cs_w1254.h">
</File>
<File
RelativePath="..\..\..\src\intl\cv_icu.h">
</File>
<File
RelativePath="..\..\..\src\intl\collations\db437de0.h">
</File>
@ -480,6 +497,9 @@
<File
RelativePath="..\..\..\src\intl\langdrv.h">
</File>
<File
RelativePath="..\..\..\src\intl\lc_icu.h">
</File>
<File
RelativePath="..\..\..\src\intl\ld.h">
</File>
@ -623,6 +643,16 @@
</FileConfiguration>
</File>
</Filter>
<Filter
Name="COMMON files"
Filter="">
<File
RelativePath="..\..\..\src\common\classes\alloc.cpp">
</File>
<File
RelativePath="..\..\..\src\common\fb_exception.cpp">
</File>
</Filter>
<File
RelativePath="..\defs\intl.def">
</File>

74
src/intl/cs_icu.cpp Normal file
View File

@ -0,0 +1,74 @@
/*
* PROGRAM: Firebird International support
* MODULE: cs_icu.cpp
* DESCRIPTION: Character set definitions for ICU character sets.
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
#include "../intl/ldcommon.h"
#include "cs_icu.h"
#include "cv_icu.h"
#include "unicode/ucnv.h"
static void charset_destroy(charset* cs)
{
delete [] const_cast<ASCII*>(cs->charset_name);
delete [] const_cast<BYTE*>(cs->charset_space_character);
}
bool CSICU_charset_init(charset* cs,
const ASCII* charSetName)
{
UErrorCode status = U_ZERO_ERROR;
UConverter* conv = ucnv_open(charSetName, &status);
if (U_SUCCESS(status))
{
// charSetName comes from stack. Copy it.
cs->charset_name = new ASCII[strlen(charSetName) + 1];
strcpy(const_cast<ASCII*>(cs->charset_name), charSetName);
cs->charset_version = CHARSET_VERSION_1;
cs->charset_flags |= CHARSET_ASCII_BASED;
cs->charset_min_bytes_per_char = ucnv_getMinCharSize(conv);
cs->charset_max_bytes_per_char = ucnv_getMaxCharSize(conv);
cs->charset_fn_destroy = charset_destroy;
cs->charset_fn_well_formed = NULL;
const UChar unicodeSpace = 32;
cs->charset_space_character = new BYTE[cs->charset_max_bytes_per_char];
cs->charset_space_length = ucnv_fromUChars(conv,
const_cast<char*>(reinterpret_cast<const char*>(cs->charset_space_character)),
cs->charset_max_bytes_per_char, &unicodeSpace, 1, &status);
fb_assert(U_SUCCESS(status));
ucnv_close(conv);
CVICU_convert_init(cs);
}
return U_SUCCESS(status);
}

32
src/intl/cs_icu.h Normal file
View File

@ -0,0 +1,32 @@
/*
* PROGRAM: Firebird International support
* MODULE: cv_icu.h
* DESCRIPTION: Codeset conversion for ICU character sets.
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#ifndef INTL_CS_ICU_H
#define INTL_CS_ICU_H
bool CSICU_charset_init(charset* cs, const ASCII* name);
#endif // INTL_CS_ICU_H

177
src/intl/cv_icu.cpp Normal file
View File

@ -0,0 +1,177 @@
/*
* PROGRAM: Firebird International support
* MODULE: cv_icu.cpp
* DESCRIPTION: Codeset conversion for ICU character sets.
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
#include "../intl/ldcommon.h"
#include "ld_proto.h"
#include "cv_icu.h"
#include "unicode/ucnv.h"
namespace
{
struct CsConvertImpl
{
charset* cs;
};
}
static UConverter* create_converter(csconvert* cv, UErrorCode* status)
{
UConverter* conv = ucnv_open(cv->csconvert_impl->cs->charset_name, status);
const void* oldContext;
UConverterFromUCallback oldFromAction;
ucnv_setFromUCallBack(
conv,
UCNV_FROM_U_CALLBACK_STOP,
NULL,
&oldFromAction,
&oldContext,
status);
UConverterToUCallback oldToAction;
ucnv_setToUCallBack(
conv,
UCNV_TO_U_CALLBACK_STOP,
NULL,
&oldToAction,
&oldContext,
status);
fb_assert(U_SUCCESS(*status));
return conv;
}
static void convert_destroy(csconvert* cv)
{
delete cv->csconvert_impl;
}
static ULONG unicode_to_icu(csconvert* cv,
ULONG srcLen,
const BYTE* src,
ULONG dstLen,
BYTE* dst,
USHORT* errCode,
ULONG* errPosition)
{
*errCode = 0;
*errPosition = 0;
if (dst == NULL)
return srcLen / sizeof(UChar) * cv->csconvert_impl->cs->charset_max_bytes_per_char;
UErrorCode status = U_ZERO_ERROR;
UConverter* conv = create_converter(cv, &status);
ULONG len = ucnv_fromUChars(conv, reinterpret_cast<char*>(dst), dstLen,
reinterpret_cast<const UChar*>(src), srcLen / sizeof(UChar), &status);
if (!U_SUCCESS(status))
{
len = INTL_BAD_STR_LENGTH;
if (status == U_INVALID_CHAR_FOUND)
*errCode = CS_CONVERT_ERROR;
else if (status == U_TRUNCATED_CHAR_FOUND)
*errCode = CS_TRUNCATION_ERROR;
else
{
fb_assert(false);
*errCode = CS_CONVERT_ERROR;
}
}
ucnv_close(conv);
return len;
}
static ULONG icu_to_unicode(csconvert* cv,
ULONG srcLen,
const BYTE* src,
ULONG dstLen,
BYTE* dst,
USHORT* errCode,
ULONG* errPosition)
{
*errCode = 0;
*errPosition = 0;
if (dst == NULL)
return srcLen / cv->csconvert_impl->cs->charset_min_bytes_per_char * sizeof(UChar);
UErrorCode status = U_ZERO_ERROR;
UConverter* conv = create_converter(cv, &status);
ULONG len = ucnv_toUChars(conv, reinterpret_cast<UChar*>(dst), dstLen / sizeof(UChar),
reinterpret_cast<const char*>(src), srcLen, &status);
if (!U_SUCCESS(status))
{
len = INTL_BAD_STR_LENGTH;
if (status == U_INVALID_CHAR_FOUND)
*errCode = CS_BAD_INPUT;
else if (status == U_TRUNCATED_CHAR_FOUND)
*errCode = CS_TRUNCATION_ERROR;
else
{
fb_assert(false);
*errCode = CS_BAD_INPUT;
}
}
else
len *= sizeof(UChar);
ucnv_close(conv);
return len;
}
void CVICU_convert_init(charset* cs)
{
cs->charset_to_unicode.csconvert_version = CSCONVERT_VERSION_1;
cs->charset_to_unicode.csconvert_name = "ICU->UNICODE";
cs->charset_to_unicode.csconvert_fn_convert = icu_to_unicode;
cs->charset_to_unicode.csconvert_fn_destroy = convert_destroy;
cs->charset_to_unicode.csconvert_impl = new CsConvertImpl();
cs->charset_to_unicode.csconvert_impl->cs = cs;
cs->charset_from_unicode.csconvert_version = CSCONVERT_VERSION_1;
cs->charset_from_unicode.csconvert_name = "UNICODE->ICU";
cs->charset_from_unicode.csconvert_fn_convert = unicode_to_icu;
cs->charset_from_unicode.csconvert_fn_destroy = convert_destroy;
cs->charset_from_unicode.csconvert_impl = new CsConvertImpl();
cs->charset_from_unicode.csconvert_impl->cs = cs;
}

32
src/intl/cv_icu.h Normal file
View File

@ -0,0 +1,32 @@
/*
* PROGRAM: Firebird International support
* MODULE: cv_icu.h
* DESCRIPTION: Codeset conversion for ICU character sets.
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#ifndef INTL_CV_ICU_H
#define INTL_CV_ICU_H
void CVICU_convert_init(charset* cs);
#endif // INTL_CV_ICU_H

View File

@ -21,11 +21,14 @@
* Contributor(s): ______________________________________.
*/
struct CsConvertImpl
namespace
{
const BYTE* csconvert_datatable;
const BYTE* csconvert_misc;
};
struct CsConvertImpl
{
const BYTE* csconvert_datatable;
const BYTE* csconvert_misc;
};
}
void CV_convert_init(csconvert* csptr,
pfn_INTL_convert cvt_fn, const void *datatable, const void *datatable2);

348
src/intl/lc_icu.cpp Normal file
View File

@ -0,0 +1,348 @@
/*
* PROGRAM: Firebird International support
* MODULE: lc_icu.cpp
* DESCRIPTION: Collations for ICU character sets
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
#include "ldcommon.h"
#include "ld_proto.h"
#include "lc_icu.h"
#include "cs_icu.h"
#include "../../common/classes/array.h"
#include "../../include/fb_exception.h"
#include "unicode/ustring.h"
#include "unicode/ucol.h"
namespace
{
struct TextTypeImpl
{
TextTypeImpl()
: collator(NULL),
partialCollator(NULL)
{
memset(&cs, 0, sizeof(cs));
}
~TextTypeImpl()
{
if (cs.charset_fn_destroy)
cs.charset_fn_destroy(&cs);
if (collator)
ucol_close(collator);
if (partialCollator)
ucol_close(partialCollator);
}
charset cs;
UCollator* collator;
UCollator* partialCollator;
};
}
static USHORT unicode_keylength(
texttype* tt,
USHORT len)
{
return len / tt->texttype_impl->cs.charset_min_bytes_per_char * 4;
}
static USHORT unicode_str2key(
texttype* tt,
USHORT srcLen,
const UCHAR* src,
USHORT dstLen,
UCHAR* dst,
USHORT key_type)
{
try
{
charset* cs = &tt->texttype_impl->cs;
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str;
USHORT errorCode;
ULONG offendingPos;
utf16Str.getBuffer(
cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
srcLen,
src,
0,
NULL,
&errorCode,
&offendingPos));
if (utf16Str.getCapacity() < unicode_keylength(tt, srcLen))
{
fb_assert(false);
return INTL_BAD_KEY_LENGTH;
}
ULONG utf16Len = cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
srcLen,
src,
utf16Str.getCapacity(),
utf16Str.begin(),
&errorCode,
&offendingPos);
return ucol_getSortKey(
(key_type == INTL_KEY_PARTIAL ? tt->texttype_impl->partialCollator : tt->texttype_impl->collator),
reinterpret_cast<const UChar*>(utf16Str.begin()), utf16Len / sizeof(UChar), dst, dstLen);
}
catch (Firebird::BadAlloc)
{
fb_assert(false);
return INTL_BAD_KEY_LENGTH;
}
}
static SSHORT unicode_compare(
texttype* tt,
ULONG len1,
const UCHAR* str1,
ULONG len2,
const UCHAR* str2,
INTL_BOOL* error_flag)
{
try
{
*error_flag = false;
charset* cs = &tt->texttype_impl->cs;
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str1;
Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> utf16Str2;
USHORT errorCode;
ULONG offendingPos;
utf16Str1.getBuffer(
cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
len1,
str1,
0,
NULL,
&errorCode,
&offendingPos));
ULONG utf16Len1 = cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
len1,
str1,
utf16Str1.getCapacity(),
utf16Str1.begin(),
&errorCode,
&offendingPos);
utf16Str2.getBuffer(
cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
len2,
str2,
0,
NULL,
&errorCode,
&offendingPos));
ULONG utf16Len2 = cs->charset_to_unicode.csconvert_fn_convert(
&cs->charset_to_unicode,
len2,
str2,
utf16Str2.getCapacity(),
utf16Str2.begin(),
&errorCode,
&offendingPos);
if (tt->texttype_pad_option)
{
const UCHAR* pad;
for (pad = utf16Str1.begin() + utf16Len1 - sizeof(USHORT); pad >= utf16Str1.begin(); pad -= sizeof(USHORT))
{
if (*reinterpret_cast<const USHORT*>(pad) != 32)
break;
}
utf16Len1 = pad - utf16Str1.begin() + sizeof(USHORT);
for (pad = utf16Str2.begin() + utf16Len2 - sizeof(USHORT); pad >= utf16Str2.begin(); pad -= sizeof(USHORT))
{
if (*reinterpret_cast<const USHORT*>(pad) != 32)
break;
}
utf16Len2 = pad - utf16Str2.begin() + sizeof(USHORT);
}
int32_t cmp = u_strCompare(
reinterpret_cast<const UChar*>(utf16Str1.begin()), utf16Len1 / sizeof(UChar),
reinterpret_cast<const UChar*>(utf16Str2.begin()), utf16Len2 / sizeof(UChar), true);
return (cmp < 0 ? -1 : (cmp > 0 ? 1 : 0));
}
catch (Firebird::BadAlloc)
{
fb_assert(false);
return 0;
}
}
static void texttype_destroy(texttype* tt)
{
delete [] const_cast<ASCII*>(tt->texttype_name);
delete tt->texttype_impl;
}
static bool texttype_default_init(texttype* tt,
const ASCII* name,
const ASCII* charSetName,
USHORT attributes,
const UCHAR* specificAttributes,
ULONG specificAttributesLength)
{
charset cs;
memset(&cs, 0, sizeof(cs));
// test if that ICU charset exist
if (CSICU_charset_init(&cs, charSetName))
{
if (cs.charset_fn_destroy)
cs.charset_fn_destroy(&cs);
}
else
return false;
if ((attributes & ~TEXTTYPE_ATTR_PAD_SPACE) ||
// disabled TEXTTYPE_ATTR_PAD_SPACE isn't allowed
// for our ICU collations yet
!(attributes & TEXTTYPE_ATTR_PAD_SPACE) ||
specificAttributesLength)
{
return false;
}
// name comes from stack. Copy it.
tt->texttype_name = new ASCII[strlen(name) + 1];
strcpy(const_cast<ASCII*>(tt->texttype_name), name);
tt->texttype_version = TEXTTYPE_VERSION_1;
tt->texttype_country = CC_INTL;
tt->texttype_pad_option = (attributes & TEXTTYPE_ATTR_PAD_SPACE) ? true : false;
tt->texttype_fn_destroy = texttype_destroy;
return true;
}
static bool texttype_unicode_init(texttype* tt,
const ASCII* name,
const ASCII* charSetName,
USHORT attributes,
const UCHAR* specificAttributes,
ULONG specificAttributesLength)
{
tt->texttype_impl = new TextTypeImpl();
// test if that charset exist
if (!LD_lookup_charset(&tt->texttype_impl->cs, charSetName))
{
delete tt->texttype_impl;
return false;
}
if ((attributes & ~TEXTTYPE_ATTR_PAD_SPACE) ||
// disabled TEXTTYPE_ATTR_PAD_SPACE isn't allowed
// for our ICU collations yet
!(attributes & TEXTTYPE_ATTR_PAD_SPACE) ||
specificAttributesLength)
{
delete tt->texttype_impl;
return false;
}
const char* locale = "";
UErrorCode status = U_ZERO_ERROR;
tt->texttype_impl->collator = ucol_open(locale, &status);
tt->texttype_impl->partialCollator = ucol_open(locale, &status);
if (!tt->texttype_impl->collator || !tt->texttype_impl->partialCollator)
{
delete tt->texttype_impl;
return false;
}
ucol_setAttribute(tt->texttype_impl->partialCollator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
// name comes from stack. Copy it.
tt->texttype_name = new ASCII[strlen(name) + 1];
strcpy(const_cast<ASCII*>(tt->texttype_name), name);
tt->texttype_version = TEXTTYPE_VERSION_1;
tt->texttype_country = CC_INTL;
tt->texttype_pad_option = (attributes & TEXTTYPE_ATTR_PAD_SPACE) ? true : false;
tt->texttype_fn_destroy = texttype_destroy;
tt->texttype_fn_compare = unicode_compare;
tt->texttype_fn_key_length = unicode_keylength;
tt->texttype_fn_string_to_key = unicode_str2key;
return true;
}
bool LCICU_texttype_init(texttype* tt,
const ASCII* name,
const ASCII* charSetName,
USHORT attributes,
const UCHAR* specificAttributes,
ULONG specificAttributesLength)
{
int len = strlen(name);
if (strcmp(name, charSetName) == 0)
{
return texttype_unicode_init(
tt, name, charSetName, attributes,
specificAttributes, specificAttributesLength);
}
else if (len > 8 && strcmp(name + len - 8, "_UNICODE") == 0)
{
return texttype_unicode_init(
tt, name, charSetName, attributes,
specificAttributes, specificAttributesLength);
}
else
return false;
}

37
src/intl/lc_icu.h Normal file
View File

@ -0,0 +1,37 @@
/*
* PROGRAM: Firebird International support
* MODULE: lc_icu.h
* DESCRIPTION: Collations of ICU character sets
*
* The contents of this file are subject to the Initial
* Developer's Public License Version 1.0 (the "License");
* you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
* http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl.
*
* Software distributed under the License is distributed AS IS,
* WITHOUT WARRANTY OF ANY KIND, either express or implied.
* See the License for the specific language governing rights
* and limitations under the License.
*
* The Original Code was created by Adriano dos Santos Fernandes
* for the Firebird Open Source RDBMS project.
*
* Copyright (c) 2004 Adriano dos Santos Fernandes <adrianosf@uol.com.br>
* and all contributors signed below.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#ifndef INTL_LC_ICU_H
#define INTL_LC_ICU_H
bool LCICU_texttype_init(texttype* tt,
const ASCII* name,
const ASCII* charSetName,
USHORT attributes,
const UCHAR* specificAttributes,
ULONG specificAttributesLength);
#endif // INTL_LC_ICU_H

View File

@ -29,27 +29,30 @@
#define TEXTTYPE_tertiary_insensitive 0x10 /* Don't use tertiary level for comparisions */
#define TEXTTYPE_non_multi_level 0x20 /* Sortkey isn't more precise than equivalence class */
struct TextTypeImpl
namespace
{
TextTypeImpl()
: texttype_flags(0),
texttype_bytes_per_key(0),
texttype_collation_table(NULL),
texttype_expand_table(NULL),
texttype_compress_table(NULL),
texttype_toupper_table(NULL),
texttype_tolower_table(NULL)
struct TextTypeImpl
{
}
TextTypeImpl()
: texttype_flags(0),
texttype_bytes_per_key(0),
texttype_collation_table(NULL),
texttype_expand_table(NULL),
texttype_compress_table(NULL),
texttype_toupper_table(NULL),
texttype_tolower_table(NULL)
{
}
USHORT texttype_flags;
BYTE texttype_bytes_per_key;
const BYTE* texttype_collation_table;
const BYTE* texttype_expand_table;
const BYTE* texttype_compress_table;
const BYTE* texttype_toupper_table;
const BYTE* texttype_tolower_table;
};
USHORT texttype_flags;
BYTE texttype_bytes_per_key;
const BYTE* texttype_collation_table;
const BYTE* texttype_expand_table;
const BYTE* texttype_compress_table;
const BYTE* texttype_toupper_table;
const BYTE* texttype_tolower_table;
};
}
USHORT LC_NARROW_key_length(TEXTTYPE obj, USHORT inLen);
USHORT LC_NARROW_string_to_key(TEXTTYPE obj, USHORT iInLen, const BYTE* pInChar,

View File

@ -24,6 +24,9 @@
#include "firebird.h"
#include "../intl/ldcommon.h"
#include "../intl/ld_proto.h"
#include "../intl/cs_icu.h"
#include "../intl/lc_icu.h"
#include "../../include/fb_exception.h"
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h> /* for MAXPATHLEN */
@ -291,9 +294,15 @@ EXTERN_convert(CVJIS_sjis_x_eucj);
INTL_BOOL FB_DLL_EXPORT LD_lookup_charset(charset* cs, const ASCII* name)
{
#define CHARSET(cs_name, cs_id, coll_id, bytes, num, cs_symbol, cp_symbol, attr) \
if (strcmp(name, cs_name) == 0) \
return cs_symbol(cs, name);
try
{
#define CHARSET(cs_name, cs_id, coll_id, bytes, num, cs_symbol, cp_symbol, attr) \
{ \
EXTERN_charset((*lookup_symbol)) = cs_symbol; \
\
if (lookup_symbol && strcmp(name, cs_name) == 0) \
return lookup_symbol(cs, name); \
}
#define CSALIAS(name, cs_id)
#define COLLATION(name, cc_id, cs_id, coll_id, symbol, attr)
#define COLLATE_ALIAS(name, coll_id)
@ -309,26 +318,50 @@ INTL_BOOL FB_DLL_EXPORT LD_lookup_charset(charset* cs, const ASCII* name)
#undef COLLATE_ALIAS
#undef END_CHARSET
return (false);
return CSICU_charset_init(cs, name);
}
catch (Firebird::BadAlloc)
{
fb_assert(false);
return false;
}
}
INTL_BOOL FB_DLL_EXPORT LD_lookup_texttype(texttype* tt, const ASCII* texttype_name, const ASCII* charset_name,
USHORT attributes, const UCHAR* specific_attributes,
ULONG specific_attributes_length, INTL_BOOL ignore_attributes)
{
#define CHARSET(cs_name, cs_id, coll_id, bytes, num, cs_symbol, cp_symbol, coll_attr) \
if (strcmp(charset_name, cs_name) == 0) { \
if (strcmp(texttype_name, cs_name) == 0) \
return cp_symbol(tt, texttype_name, charset_name, (ignore_attributes ? coll_attr : attributes), \
(ignore_attributes ? NULL : specific_attributes), \
(ignore_attributes ? 0 : specific_attributes_length));
try
{
#define CHARSET(cs_name, cs_id, coll_id, bytes, num, cs_symbol, cp_symbol, coll_attr) \
if (strcmp(charset_name, cs_name) == 0) { \
{ \
EXTERN_texttype((*lookup_symbol)) = cp_symbol; \
\
if (lookup_symbol != NULL && strcmp(texttype_name, cs_name) == 0) \
{ \
return lookup_symbol( \
tt, texttype_name, charset_name, \
(ignore_attributes ? coll_attr : attributes), \
(ignore_attributes ? NULL : specific_attributes), \
(ignore_attributes ? 0 : specific_attributes_length)); \
} \
}
#define CSALIAS(name, cs_id)
#define END_CHARSET }
#define COLLATION(tt_name, cc_id, cs_id, coll_id, symbol, coll_attr) \
if (strcmp(texttype_name, tt_name) == 0) \
return symbol(tt, texttype_name, charset_name, (ignore_attributes ? coll_attr : attributes), \
(ignore_attributes ? NULL : specific_attributes), \
(ignore_attributes ? 0 : specific_attributes_length));
#define COLLATION(tt_name, cc_id, cs_id, coll_id, symbol, coll_attr) \
{ \
EXTERN_texttype((*lookup_symbol)) = symbol; \
\
if (lookup_symbol && strcmp(texttype_name, tt_name) == 0) \
{ \
return lookup_symbol( \
tt, texttype_name, charset_name, \
(ignore_attributes ? coll_attr : attributes), \
(ignore_attributes ? NULL : specific_attributes), \
(ignore_attributes ? 0 : specific_attributes_length)); \
} \
}
#define COLLATE_ALIAS(name, coll_id)
#define INTL_COMPONENT_FB
@ -341,7 +374,16 @@ INTL_BOOL FB_DLL_EXPORT LD_lookup_texttype(texttype* tt, const ASCII* texttype_n
#undef COLLATE_ALIAS
#undef END_CHARSET
return (false);
return LCICU_texttype_init(
tt, texttype_name, charset_name, (ignore_attributes ? TEXTTYPE_ATTR_PAD_SPACE : attributes),
(ignore_attributes ? NULL : specific_attributes),
(ignore_attributes ? 0 : specific_attributes_length));
}
catch (Firebird::BadAlloc)
{
fb_assert(false);
return false;
}
}
#undef DRIVER

View File

@ -207,7 +207,9 @@ bool IntlManager::lookupCharSet(const Firebird::string& charSetName, charset* cs
(pfn_INTL_lookup_charset)module->findSymbol(STRINGIZE(CHARSET_ENTRYPOINT));
if (lookupFunction && (*lookupFunction)(cs, externalInfo.name.c_str()))
return true;
{
return validateCharSet(charSetName, cs);
}
}
#endif
}
@ -281,4 +283,40 @@ bool IntlManager::registerCharSetCollation(const Firebird::string& name, const F
}
bool IntlManager::validateCharSet(const Firebird::string& charSetName, charset* cs)
{
bool valid = true;
string s;
string unsupportedMsg;
unsupportedMsg.printf("Unsupported character set %s.", charSetName.c_str());
if (!(cs->charset_flags & CHARSET_ASCII_BASED))
{
valid = false;
s.printf("%s. Only ASCII-based character sets are supported yet.",
unsupportedMsg.c_str());
gds__log(s.c_str());
}
if (cs->charset_min_bytes_per_char != 1)
{
valid = false;
s.printf("%s. Wide character sets are not supported yet.",
unsupportedMsg.c_str());
gds__log(s.c_str());
}
if (cs->charset_space_length != 1 || *cs->charset_space_character != ' ')
{
valid = false;
s.printf("%s. Only ASCII space is supported in charset_space_character yet.",
unsupportedMsg.c_str());
gds__log(s.c_str());
}
return valid;
}
} // namespace Jrd

View File

@ -54,6 +54,8 @@ private:
static bool registerCharSetCollation(const Firebird::string& name,
const Firebird::PathName& filename,
const Firebird::string& externalName);
static bool validateCharSet(const Firebird::string& charSetName, charset* cs);
};
} // namespace Jrd

View File

@ -23,10 +23,13 @@ static void internal_destroy(TEXTTYPE);
static void unicode_destroy(TEXTTYPE obj);
struct TextTypeImpl
namespace
{
BYTE texttype_pad_char;
};
struct TextTypeImpl
{
BYTE texttype_pad_char;
};
}
static inline bool FAMILY_INTERNAL(TEXTTYPE cache,

View File

@ -44,11 +44,19 @@ typedef SCHAR ASCII;
typedef USHORT INTL_BOOL;
#ifdef __cplusplus
namespace {
#endif
/* Forward declarations to be implemented in collation driver */
struct TextTypeImpl;
struct CharSetImpl;
struct CsConvertImpl;
#ifdef __cplusplus
}
#endif
struct texttype; /* forward decl for the fc signatures before the struct itself. */
struct csconvert;
struct charset;