6
0
mirror of https://github.com/FirebirdSQL/firebird-qa.git synced 2025-01-22 13:33:07 +01:00
firebird-qa/tests/bugs/core_3601_test.py

327 lines
14 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#coding:utf-8
"""
ID: issue-3955
ISSUE: 3955
TITLE: Incorrect TEXT BLOB charset transliteration on VIEW with trigger
DESCRIPTION:
Test verifies that all OK when connection charset = UTF8
JIRA: CORE-3601
FBTEST: bugs.core_3601
"""
import pytest
from firebird.qa import *
init_script = """
-- This part of test (for 3.0) should be encoded in UTF8 as for running under ISQL and under fbt-run.
recreate view v_t_test as select 1 id from rdb$database;
commit;
recreate table t_test (id int);
commit;
set term ^;
execute block as
begin
begin execute statement 'drop domain MEMO_UTF8'; when any do begin end end
begin execute statement 'drop domain MEMO_WIN1250'; when any do begin end end
begin execute statement 'drop sequence gen_test'; when any do begin end end
end
^ set term ;^
commit;
create sequence gen_test;
create domain memo_utf8 as
blob sub_type 1 segment size 100 character set utf8;
create domain memo_win1250 as
blob sub_type 1 segment size 100 character set win1250;
commit;
recreate table t_test (
id bigint not null,
action varchar(80),
memo_utf8 memo_utf8,
memo_win1250 memo_win1250,
constraint pk_t_test primary key (id)
);
commit;
create or alter view v_t_test as
select
t.id,
t.action,
t.memo_utf8,
t.memo_win1250
from t_test t
;
set term ^ ;
create or alter trigger v_t_test_bd for v_t_test
active before insert or update or delete position 0
as
begin
if (inserting) then
insert into t_test(
id,
action,
memo_utf8,
memo_win1250)
values(
coalesce( new.id, gen_id(gen_test,1) ),
new.action,
new.memo_utf8,
new.memo_win1250);
else if (updating) then
update t_test set
id = new.id,
action = new.action,
memo_utf8 = new.memo_utf8,
memo_win1250 = new.memo_win1250
where id = old.id;
else
delete from t_test
where id = old.id;
end
^
set term ; ^
commit;
"""
db = db_factory(init=init_script)
test_script = """
set term ^;
execute block as
declare v_text blob sub_type 1 segment size 100 character set utf8; -- ###### B L O B C H A R S E T = U T F 8 #####
begin
-- http://www.columbia.edu/kermit/cp1250.html
v_text =
''
|| '' -- 128 08/00 200 80 EURO SYMBOL
|| '' -- 130 08/02 202 82 LOW 9 SINGLE QUOTE
|| '' -- 132 08/04 204 84 LOW 9 DOUBLE QUOTE
|| '' -- 133 08/05 205 85 ELLIPSIS
|| '' -- 134 08/06 206 86 DAGGER
|| '' -- 135 08/07 207 87 DOUBLE DAGGER
|| '' -- 137 08/09 211 89 PER MIL SIGN
|| 'Š' -- 138 08/10 212 8A CAPITAL LETTER S WITH CARON
|| '' -- 139 08/11 213 8B LEFT SINGLE QUOTE BRACKET
|| 'Ś' -- 140 08/12 214 8C CAPITAL LETTER S WITH ACUTE ACCENT
|| 'Ť' -- 141 08/13 215 8D CAPITAL LETTER T WITH CARON
|| 'Ž' -- 142 08/14 216 8E CAPITAL LETTER Z WITH CARON
|| 'Ź' -- 143 08/15 217 8F CAPITAL LETTER Z WITH ACUTE ACCENT
|| '' -- 145 09/01 221 91 HIGH 6 SINGLE QUOTE
|| '' -- 146 09/02 222 92 HIGH 9 SINGLE QUOTE
|| '' -- 147 09/03 223 93 HIGH 6 DOUBLE QUOTE
|| '' -- 148 09/04 224 94 HIGH 9 DOUBLE QUOTE
|| '' -- 149 09/05 225 95 LARGE CENTERED DOT
|| '' -- 150 09/06 226 96 EN DASH
|| '' -- 151 09/07 227 97 EM DASH
|| '' -- 153 09/09 231 99 TRADEMARK SIGN
|| 'š' -- 154 09/10 232 9A SMALL LETTER S WITH CARON
|| '' -- 155 09/11 233 9B RIGHT SINGLE QUOTE BRACKET
|| 'ś' -- 156 09/12 234 9C SMALL LETTER S WITH ACUTE ACCENT
|| 'ť' -- 157 09/13 235 9D SMALL LETTER T WITH CARON
|| 'ž' -- 158 09/14 236 9E SMALL LETTER Z WITH CARON
|| 'ź' -- 159 09/15 237 9F SMALL LETTER Z WITH ACUTE ACCENT
|| ' ' -- 160 10/00 240 A0 NO-BREAK SPACE
|| 'ˇ' -- 161 10/01 241 A1 CARON
|| '˘' -- 162 10/02 242 A2 BREVE
|| 'Ł' -- 163 10/03 243 A3 CAPITAL LETTER L WITH STROKE
|| '¤' -- 164 10/04 244 A4 CURRENCY SIGN
|| 'Ą' -- 165 10/05 245 A5 CAPITAL LETTER A WITH OGONEK
|| '¦' -- 166 10/06 246 A6 BROKEN BAR
|| '§' -- 167 10/07 247 A7 PARAGRAPH SIGN
|| '¨' -- 168 10/08 250 A8 DIAERESIS
|| '©' -- 169 10/09 251 A9 COPYRIGHT SIGN
|| 'Ş' -- 170 10/10 252 AA CAPITAL LETTER S WITH CEDILLA
|| '«' -- 171 10/11 253 AB LEFT ANGLE QUOTATION MARK
|| '¬' -- 172 10/12 254 AC NOT SIGN
|| '­' -- 173 10/13 255 AD SOFT HYPHEN
|| '®' -- 174 10/14 256 AE REGISTERED TRADE MARK SIGN
|| 'Ż' -- 175 10/15 257 AF CAPITAL LETTER Z WITH DOT ABOVE
|| '°' -- 176 11/00 260 B0 DEGREE SIGN, RING ABOVE
|| '±' -- 177 11/01 261 B1 PLUS-MINUS SIGN
|| '˛' -- 178 11/02 262 B2 OGONEK
|| 'ł' -- 179 11/03 263 B3 SMALL LETTER L WITH STROKE
|| '´' -- 180 11/04 264 B4 ACUTE ACCENT
|| 'µ' -- 181 11/05 265 B5 MICRO SIGN
|| '' -- 182 11/06 266 B6 PILCROW SIGN
|| '·' -- 183 11/07 267 B7 MIDDLE DOT
|| '¸' -- 184 11/08 270 B8 CEDILLA
|| 'ą' -- 185 11/09 271 B9 SMALL LETTER A WITH OGONEK
|| 'ş' -- 186 11/10 272 BA SMALL LETTER S WITH CEDILLA
|| '»' -- 187 11/11 273 BB RIGHT ANGLE QUOTATION MARK
|| 'Ľ' -- 188 11/12 274 BC CAPITAL LETTER L WITH CARON
|| '˝' -- 189 11/13 275 BD DOUBLE ACUTE ACCENT
|| 'ľ' -- 190 11/14 276 BE CAPITAL LETTER I WITH CARON
|| 'ż' -- 191 11/15 277 BF SMALL LETTER Z WITH DOT ABOVE
|| 'Ŕ' -- 192 12/00 300 C0 CAPITAL LETTER R WITH ACUTE ACCENT
|| 'Á' -- 193 12/01 301 C1 CAPITAL LETTER A WITH ACUTE ACCENT
|| 'Â' -- 194 12/02 302 C2 CAPITAL LETTER A WITH CIRCUMFLEX
|| 'Ă' -- 195 12/03 303 C3 CAPITAL LETTER A WITH BREVE
|| 'Ä' -- 196 12/04 304 C4 CAPITAL LETTER A WITH DIAERESIS
|| 'Ĺ' -- 197 12/05 305 C5 CAPITAL LETTER L WITH ACUTE ACCENT
|| 'Ć' -- 198 12/06 306 C6 CAPITAL LETTER C WITH ACUTE ACCENT
|| 'Ç' -- 199 12/07 307 C7 CAPITAL LETTER C WITH CEDILLA
|| 'Č' -- 200 12/08 310 C8 CAPITAL LETTER C WITH CARON
|| 'É' -- 201 12/09 311 C9 CAPITAL LETTER E WITH ACUTE ACCENT
|| 'Ę' -- 202 12/10 312 CA CAPITAL LETTER E WITH OGONEK
|| 'Ë' -- 203 12/11 313 CB CAPITAL LETTER E WITH DIAERESIS
|| 'Ě' -- 204 12/12 314 CC CAPITAL LETTER E WITH CARON
|| 'Í' -- 205 12/13 315 CD CAPITAL LETTER I WITH ACUTE ACCENT
|| 'Î' -- 206 12/14 316 CE CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
|| 'Ď' -- 207 12/15 317 CF CAPITAL LETTER D WITH CARON
|| 'Đ' -- 208 13/00 320 D0 CAPITAL LETTER D WITH STROKE
|| 'Ń' -- 209 13/01 321 D1 CAPITAL LETTER N WITH ACUTE ACCENT
|| 'Ň' -- 210 13/02 322 D2 CAPITAL LETTER N WITH CARON
|| 'Ó' -- 211 13/03 323 D3 CAPITAL LETTER O WITH ACUTE ACCENT
|| 'Ô' -- 212 13/04 324 D4 CAPITAL LETTER O WITH CIRCUMFLEX
|| 'Ő' -- 213 13/05 325 D5 CAPITAL LETTER O WITH DOUBLE ACUTE ACCENT
|| 'Ö' -- 214 13/06 326 D6 CAPITAL LETTER O WITH DIAERESIS
|| '×' -- 215 13/07 327 D7 MULTIPLICATION SIGN
|| 'Ř' -- 216 13/08 330 D8 CAPITAL LETTER R WITH CARON
|| 'Ů' -- 217 13/09 331 D9 CAPITAL LETTER U WITH RING ABOVE
|| 'Ú' -- 218 13/10 332 DA CAPITAL LETTER U WITH ACUTE ACCENT
|| 'Ű' -- 219 13/11 333 DB CAPITAL LETTER U WITH DOUBLE ACUTE ACCENT
|| 'Ü' -- 220 13/12 334 DC CAPITAL LETTER U WITH DIAERESIS
|| 'Ý' -- 221 13/13 335 DD CAPITAL LETTER Y WITH ACUTE ACCENT
|| 'Ţ' -- 222 13/14 336 DE CAPITAL LETTER T WITH CEDILLA
|| 'ß' -- 223 13/15 337 DF SMALL GERMAN LETTER SHARP s
|| 'ŕ' -- 224 14/00 340 E0 SMALL LETTER R WITH ACUTE ACCENT
|| 'á' -- 225 14/01 341 E1 SMALL LETTER A WITH ACUTE ACCENT
|| 'â' -- 226 14/02 342 E2 SMALL LETTER A WITH CIRCUMFLEX
|| 'ă' -- 227 14/03 343 E3 SMALL LETTER A WITH BREVE
|| 'ä' -- 228 14/04 344 E4 SMALL LETTER A WITH DIAERESIS
|| 'ĺ' -- 229 14/05 345 E5 SMALL LETTER L WITH ACUTE ACCENT
|| 'ć' -- 230 14/06 346 E6 SMALL LETTER C WITH ACUTE ACCENT
|| 'ç' -- 231 14/07 347 E7 SMALL LETTER C WITH CEDILLA
|| 'č' -- 232 14/08 350 E8 SMALL LETTER C WITH CARON
|| 'é' -- 233 14/09 351 E9 SMALL LETTER E WITH ACUTE ACCENT
|| 'ę' -- 234 14/10 352 EA SMALL LETTER E WITH OGONEK
|| 'ë' -- 235 14/11 353 EB SMALL LETTER E WITH DIAERESIS
|| 'ě' -- 236 14/12 354 EC SMALL LETTER E WITH CARON
|| 'í' -- 237 14/13 355 ED SMALL LETTER I WITH ACUTE ACCENT
|| 'î' -- 238 14/14 356 EE SMALL LETTER I WITH CIRCUMFLEX ACCENT
|| 'ď' -- 239 14/15 357 EF SMALL LETTER D WITH CARON
|| 'đ' -- 240 15/00 360 F0 SMALL LETTER D WITH STROKE
|| 'ń' -- 241 15/01 361 F1 SMALL LETTER N WITH ACUTE ACCENT
|| 'ň' -- 242 15/02 362 F2 SMALL LETTER N WITH CARON
|| 'ó' -- 243 15/03 363 F3 SMALL LETTER O WITH ACUTE ACCENT
|| 'ô' -- 244 15/04 364 F4 SMALL LETTER O WITH CIRCUMFLEX
|| 'ő' -- 245 15/05 365 F5 SMALL LETTER O WITH DOUBLE ACUTE ACCENT
|| 'ö' -- 246 15/06 366 F6 SMALL LETTER O WITH DIAERESIS
|| '÷' -- 247 15/07 367 F7 DIVISION SIGN
|| 'ř' -- 248 15/08 370 F8 SMALL LETTER R WITH CARON
|| 'ů' -- 249 15/09 371 F9 SMALL LETTER U WITH RING ABOVE
|| 'ú' -- 250 15/10 372 FA SMALL LETTER U WITH ACUTE ACCENT
|| 'ű' -- 251 15/11 373 FB SMALL LETTER U WITH DOUBLE ACUTE ACCENT
|| 'ü' -- 252 15/12 374 FC SMALL LETTER U WITH DIAERESIS
|| 'ý' -- 253 15/13 375 FD SMALL LETTER Y WITH ACUTE ACCENT
|| 'ţ' -- 254 15/14 376 FE SMALL LETTER T WITH CEDILLA
|| '˙' -- 255 15/15 377 FF DOT ABOVE
;
insert into t_test(
id,
action,
memo_utf8,
memo_win1250)
values(
gen_id(gen_test,1),
'insert directly in the table, connect charset = utf8, blob charset = utf8',
:v_text,
:v_text);
insert into v_t_test(
action,
memo_utf8, -- isc_put_segment on V_T_TEST.MEMO_UTF8 with buffer as utf-8 is OK
memo_win1250 -- isc_put_segment on V_T_TEST.MEMO_WIN1250 with buffer as utf-8 not transliterate character set and stores in table incorrect as utf-8
)
values(
'insert through the view, connect charset = utf8, blob charset = utf8',
:v_text, -- to be written in memo_utf8
:v_text -- to be written in memo_win1250
);
insert into v_t_test(
action,
memo_utf8, -- isc_put_segment on V_T_TEST.MEMO_UTF8 with buffer as utf-8 is OK
memo_win1250 -- isc_put_segment on V_T_TEST.MEMO_WIN1250 with buffer as utf-8 not transliterate character set and stores in table incorrect as utf-8
)
values(
'insert through the view, connect charset = utf8, blob charset = win1250',
cast( :v_text as blob sub_type 1 character set win1250), -- to be written in memo_utf8
cast( :v_text as blob sub_type 1 character set win1250) -- to be written in memo_win1250
);
end
^
set term ;^
-- Confirmed on WI-V2.5.1.26351: insert into TABLE works fine, but insert into view produces:
-- Statement failed, SQLSTATE = 22018
-- arithmetic exception, numeric overflow, or string truncation
-- -Cannot transliterate character between character sets
-- On Win1250 connection:
set blob all;
set list on;
select
id,
action,
memo_utf8,
octet_length(memo_utf8) oct_utf8,
char_length(memo_utf8) chr_utf8,
memo_win1250,
octet_length(memo_win1250) oct_w1250,
char_length(memo_win1250) chr_w1250
from v_t_test;
"""
act = isql_act('db', test_script, substitutions=[('MEMO_UTF8.*', 'MEMO_UTF8')])
expected_stdout = """
ID 1
ACTION insert directly in the table, connect charset = utf8, blob charset = utf8
MEMO_UTF8 82:0
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_UTF8 262
CHR_UTF8 123
MEMO_WIN1250 0:17
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_W1250 123
CHR_W1250 123
ID 2
ACTION insert through the view, connect charset = utf8, blob charset = utf8
MEMO_UTF8 82:3
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_UTF8 262
CHR_UTF8 123
MEMO_WIN1250 0:1e
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_W1250 123
CHR_W1250 123
ID 3
ACTION insert through the view, connect charset = utf8, blob charset = win1250
MEMO_UTF8 82:6
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_UTF8 262
CHR_UTF8 123
MEMO_WIN1250 0:25
€‚„…†‡‰Š‹ŚŤŽŹ‘’“”•–—™š›śťžź ˇ˘Ł¤Ą¦§¨©Ş«¬­®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľżŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢßŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙
OCT_W1250 123
CHR_W1250 123
"""
@pytest.mark.version('>=3.0')
def test_1(act: Action):
act.expected_stdout = expected_stdout
act.execute(charset='utf8')
assert act.clean_stdout == act.clean_expected_stdout