mirror of
https://github.com/FirebirdSQL/firebird.git
synced 2025-01-22 20:43:02 +01:00
Doc.
This commit is contained in:
parent
7d6faaa52d
commit
3bb7c2ee92
255
doc/README.intl
Normal file
255
doc/README.intl
Normal file
@ -0,0 +1,255 @@
|
||||
Firebird INTL
|
||||
=============
|
||||
|
||||
Author: Adriano dos Santos Fernandes <adrianosf at uol.com.br>
|
||||
|
||||
|
||||
Architecture
|
||||
------------
|
||||
|
||||
Firebird allow you to specify character sets and collations in every field/variable declaration.
|
||||
You can also specify the default character set at database create time and every CHAR/VARCHAR declaration that omit character set will use it.
|
||||
|
||||
At attachment time you can specify the character set that the client want to read all the strings.
|
||||
If you don't specify one, NONE is assumed.
|
||||
|
||||
There are two specials character sets: NONE and OCTETS.
|
||||
Both can be used in declarations but OCTETS can't be used in attachment.
|
||||
They are very similar with the exception that space of NONE is ASCII 0x20 and space of OCTETS is 0x00.
|
||||
They are specials because they don't follow the rule of others character sets regarding conversions.
|
||||
With others character sets conversion is performed with CHARSET1->UNICODE->CHARSET2. With NONE/OCTETS the bytes is just copied: NONE/OCTETS->CHARSET2 and CHARSET1->NONE/OCTETS.
|
||||
|
||||
|
||||
Enhancements
|
||||
------------
|
||||
|
||||
|
||||
Well-formedness checks
|
||||
----------------------
|
||||
|
||||
Some character sets (specially multi-byte) don't accept everything.
|
||||
Now, the engine verify if strings are wellformed when assigning from NONE/OCTETS and strings sended by the client (the statement string and parameters).
|
||||
|
||||
|
||||
Uppercase
|
||||
---------
|
||||
|
||||
In FB 1.5.X only ASCII characters are uppercased in character sets default collation order (without collation specified). Ex:
|
||||
|
||||
isql -q -ch dos850
|
||||
SQL> create database 'test.fdb';
|
||||
SQL> create table t (c char(1) character set dos850);
|
||||
SQL> insert into t values ('a');
|
||||
SQL> insert into t values ('e');
|
||||
SQL> insert into t values ('á');
|
||||
SQL> insert into t values ('é');
|
||||
SQL>
|
||||
SQL> select c, upper(c) from t;
|
||||
|
||||
C UPPER
|
||||
====== ======
|
||||
a A
|
||||
e E
|
||||
á á
|
||||
é é
|
||||
|
||||
In FB 2.0 the result is:
|
||||
|
||||
C UPPER
|
||||
====== ======
|
||||
a A
|
||||
e E
|
||||
á Á
|
||||
é É
|
||||
|
||||
|
||||
Maximum string length
|
||||
---------------------
|
||||
|
||||
In FB 1.5.X the engine don't verify logical length of MBCS strings.
|
||||
Hence a UNICODE_FSS field can accept three (maximum length of one UNICODE_FSS character) times more characters than what's declared in the field size.
|
||||
For compatibility purpose this was maintained for legacy character sets but new character sets (UTF8, for example) don't suffer from this problem.
|
||||
|
||||
|
||||
NONE as attachment character set
|
||||
--------------------------------
|
||||
|
||||
When NONE is used as attachment character set, the sqlsubtype member of XSQLVAR has the character set number of the readed field, instead of always 0 as in previous versions.
|
||||
|
||||
|
||||
BLOBs and collations
|
||||
--------------------
|
||||
|
||||
Allow usage of DML COLLATE clause with BLOBs. Ex:
|
||||
select blob_column from table where blob_column collate unicode = 'foo';
|
||||
|
||||
|
||||
New character sets and collations
|
||||
---------------------------------
|
||||
|
||||
|
||||
UTF8 character set
|
||||
------------------
|
||||
|
||||
The UNICODE_FSS character set has a number of problems: it's a old version of UTF8, accept malformed strings and don't enforce correct maximum string length. In FB 1.5.X UTF8 is a alias to UNICODE_FSS.
|
||||
Now UTF8 is a new character set, without these problems of UNICODE_FSS.
|
||||
|
||||
|
||||
UNICODE collations (for UTF8)
|
||||
-----------------------------
|
||||
|
||||
UCS_BASIC works identical as UTF8 without collation specified (sorts in UNICODE code-point order).
|
||||
UNICODE sorts using UCA (Unicode Collation Algorithm).
|
||||
Sort order sample:
|
||||
|
||||
isql -q -ch dos850
|
||||
SQL> create database 'test.fdb';
|
||||
SQL> create table t (c char(1) character set utf8);
|
||||
SQL> insert into t values ('a');
|
||||
SQL> insert into t values ('A');
|
||||
SQL> insert into t values ('á');
|
||||
SQL> insert into t values ('b');
|
||||
SQL> insert into t values ('B');
|
||||
SQL> select * from t order by c collate ucs_basic;
|
||||
|
||||
C
|
||||
======
|
||||
A
|
||||
B
|
||||
a
|
||||
b
|
||||
á
|
||||
|
||||
SQL> select * from t order by c collate unicode;
|
||||
|
||||
C
|
||||
======
|
||||
a
|
||||
A
|
||||
á
|
||||
b
|
||||
B
|
||||
|
||||
|
||||
Brazilian collations
|
||||
--------------------
|
||||
|
||||
Two case-insensitive/accent-insensitive collations was created for Brazil: PT_BR/WIN_PTBR (for WIN1252) and PT_BR (for ISO8859_1).
|
||||
Sort order and equality sample:
|
||||
|
||||
isql -q -ch dos850
|
||||
SQL> create database 'test.fdb';
|
||||
SQL> create table t (c char(1) character set iso8859_1 collate pt_br);
|
||||
SQL> insert into t values ('a');
|
||||
SQL> insert into t values ('A');
|
||||
SQL> insert into t values ('á');
|
||||
SQL> insert into t values ('b');
|
||||
SQL> select * from t order by c;
|
||||
|
||||
C
|
||||
======
|
||||
A
|
||||
a
|
||||
á
|
||||
b
|
||||
|
||||
SQL> select * from t where c = 'â';
|
||||
|
||||
C
|
||||
======
|
||||
a
|
||||
A
|
||||
á
|
||||
|
||||
|
||||
Drivers
|
||||
-------
|
||||
|
||||
New character sets and collations are implemented through dynamic libraries and installed in the server with a manifest file in intl subdirectory. For a example see fbintl.conf.
|
||||
Not all implemented character sets and collations need to be listed in the manifest file. Only those listed are available and duplications are not loaded.
|
||||
|
||||
After installed in the server, they should be registered in the database's system tables (rdb$character_sets and rdb$collations). The followings stored procedures do the job:
|
||||
|
||||
set term !;
|
||||
|
||||
create or alter procedure sp_register_character_set
|
||||
(
|
||||
name char(31) character set unicode_fss,
|
||||
max_bytes_per_character smallint
|
||||
)
|
||||
as
|
||||
declare variable id smallint;
|
||||
declare variable temp_id smallint;
|
||||
begin
|
||||
name = upper(name);
|
||||
id = 255;
|
||||
|
||||
for select rdb$character_set_id
|
||||
from rdb$character_sets
|
||||
order by rdb$character_set_id desc
|
||||
into temp_id do
|
||||
begin
|
||||
if (temp_id = id) then
|
||||
id = id - 1;
|
||||
else
|
||||
break;
|
||||
end
|
||||
|
||||
insert into rdb$character_sets
|
||||
(rdb$character_set_name, rdb$character_set_id, rdb$system_flag, rdb$bytes_per_character)
|
||||
values (:name, :id, 0, :max_bytes_per_character);
|
||||
|
||||
insert into rdb$collations
|
||||
(rdb$collation_name, rdb$collation_id, rdb$character_set_id, rdb$system_flag)
|
||||
values (:name, 0, :id, 0);
|
||||
end!
|
||||
|
||||
create or alter procedure sp_register_collation
|
||||
(
|
||||
character_set char(31) character set unicode_fss,
|
||||
name char(31) character set unicode_fss,
|
||||
base_name char(31) character set unicode_fss = null,
|
||||
attributes smallint = null,
|
||||
specific_attributes blob sub_type text character set unicode_fss = null
|
||||
)
|
||||
as
|
||||
declare variable id smallint;
|
||||
declare variable temp_id smallint;
|
||||
declare variable charset_id smallint;
|
||||
begin
|
||||
character_set = upper(character_set);
|
||||
name = upper(name);
|
||||
base_name = coalesce(upper(base_name), name);
|
||||
id = 126;
|
||||
|
||||
select rdb$character_set_id
|
||||
from rdb$character_sets
|
||||
where rdb$character_set_name = :character_set into charset_id;
|
||||
|
||||
for select rdb$collation_id
|
||||
from rdb$collations
|
||||
where rdb$character_set_id = :charset_id
|
||||
order by rdb$collation_id desc
|
||||
into temp_id do
|
||||
begin
|
||||
if (temp_id = id) then
|
||||
id = id - 1;
|
||||
else
|
||||
break;
|
||||
end
|
||||
|
||||
insert into rdb$collations
|
||||
(rdb$collation_name, rdb$collation_id, rdb$character_set_id, rdb$system_flag,
|
||||
rdb$base_collation_name, rdb$collation_attributes, rdb$specific_attributes)
|
||||
values (:name, :id, :charset_id, 0, :base_name, :attributes, :specific_attributes);
|
||||
end!
|
||||
|
||||
set term ;!
|
||||
commit;
|
||||
|
||||
Usage example:
|
||||
execute procedure sp_register_character_set ('CHARSET_NAME', 1);
|
||||
commit;
|
||||
|
||||
execute procedure sp_register_collation ('ISO8859_1', 'COLLATION_NAME');
|
||||
commit;
|
Loading…
Reference in New Issue
Block a user