#coding:utf-8 """ ID: issue-8015 ISSUE: https://github.com/FirebirdSQL/firebird/issues/8015 TITLE: Add multi-character TRIM function DESCRIPTION: NOTES: [26.03.2024] pzotov Test verifies only basic feature of BTRIM() function: * proper work when source text is specified in different character sets (utf8 and several single-byte charsets are checked); * proper work when text contains diacrits and/or accents and are specified in different case and/or accents; * proper work for both varchar and blob datatypes. Other features (and also functions LTRIM, RTRIM) will be verified in other tests. Checked on (Windows). """ import pytest from firebird.qa import * import locale db = db_factory(charset='utf8') test_script = """ set bail on; create collation nm_utf8_ci_ai for utf8 from unicode no pad case insensitive accent insensitive; create domain dm_txt_utf8_ci_ai varchar(50) character set utf8 collate nm_utf8_ci_ai; create domain dm_blb_utf8_ci_ai blob character set utf8 collate nm_utf8_ci_ai; create table test_vchr( id int generated by default as identity primary key ,txt_utf8 varchar(50) character set utf8 ,txt_1250 varchar(50) character set win1250 -- central europe ,txt_1251 varchar(50) character set win1251 -- cyrillic ,txt_1252 varchar(50) character set win1252 -- ~ ISO-8859-1; except for the code points 128-159 (0x80-0x9F). ,txt_1253 varchar(50) character set win1253 -- greek ,txt_1254 varchar(50) character set win1254 -- turkish ,txt_1257 varchar(50) character set win1257 -- baltic ,txt_utf8_ci_ai dm_txt_utf8_ci_ai ,txt_estonian_ci_ai varchar(50) character set iso8859_1 collate ES_ES_CI_AI ,txt_czech_ci_ai varchar(50) character set win1250 collate WIN_CZ_CI_AI ); ------------------------------------------------- insert into test_vchr ( txt_utf8 ,txt_1250 ,txt_1251 ,txt_1252 ,txt_1253 ,txt_1254 ,txt_1257 ,txt_utf8_ci_ai ,txt_estonian_ci_ai ,txt_czech_ci_ai ) values ( 'შობას გილოცავთ' -- georgian ,'boldog Karácsonyt' -- hungarian ,'з Різдвом' -- ukrainian ,'Joyeux noël' -- french ,'Καλό απόγευμα' -- greek ,'Teşekkür ederim' -- turkish ,'Priecīgus Ziemassvētkus' -- latvian ,'Täze ýyl gutly bolsun' -- turkmenian; will be used to check ability to use characters with diff case and accents ,'häid jõule' -- estonian; will be used to check ability to use characters with diff case and accents ,'veselé Vánoce' -- czech; will be used to check ability to use characters with diff case and accents ); set list on; select btrim(txt_utf8, 'ოშათვ') as btrim_utf8 ,btrim(txt_1251, 'з м') as btrim_1250 ,btrim(txt_1252, 'oëlJ') as btrim_1252 ,btrim(txt_1253, 'αμΚ') as btrim_1253 ,btrim(txt_1254, 'eiTmşr') as btrim_1254 ,btrim(txt_1257, 'ktPrciīeēuvs') as btrim_1257 ,btrim(txt_utf8_ci_ai, 'ÜYETAÑZ ') as btrim_txt_utf8_ci_ai ,btrim(txt_estonian_ci_ai, 'AH') as btrim_txt_estonian_ci_ai ,btrim(txt_czech_ci_ai, 'ELVS ') as btrim_txt_czech_ci_ai from test_vchr order by id ; commit; recreate table test_blob( id int generated by default as identity primary key ,txt_utf8 blob character set utf8 ,txt_1250 blob character set win1250 -- central europe ,txt_1251 blob character set win1251 -- cyrillic ,txt_1252 blob character set win1252 -- ~ ISO-8859-1; except for the code points 128-159 (0x80-0x9F). ,txt_1253 blob character set win1253 -- greek ,txt_1254 blob character set win1254 -- turkish ,txt_1257 blob character set win1257 -- baltic ,txt_utf8_ci_ai dm_blb_utf8_ci_ai ,txt_estonian_ci_ai blob character set iso8859_1 collate ES_ES_CI_AI ,txt_czech_ci_ai blob character set win1250 collate WIN_CZ_CI_AI ); ------------------------------------------------- insert into test_blob ( txt_utf8 ,txt_1250 ,txt_1251 ,txt_1252 ,txt_1253 ,txt_1254 ,txt_1257 ,txt_utf8_ci_ai ,txt_estonian_ci_ai ,txt_czech_ci_ai ) values ( 'შობას გილოცავთ' ,'boldog Karácsonyt' ,'з Різдвом' ,'Joyeux noël' ,'Καλό απόγευμα' ,'Teşekkür ederim' ,'Priecīgus Ziemassvētkus' ,'Täze ýyl gutly bolsun' ,'häid jõule' ,'veselé Vánoce' ); select btrim(txt_utf8, 'ოშათვ') as blob_id_btrim_utf8 ,btrim(txt_1251, 'з м') as blob_id_btrim_1250 ,btrim(txt_1252, 'oëlJ') as blob_id_btrim_1252 ,btrim(txt_1253, 'αμΚ') as blob_id_btrim_1253 ,btrim(txt_1254, 'eiTmşr') as blob_id_btrim_1254 ,btrim(txt_1257, 'ktPrciīeēuvs') as blob_id_btrim_1257 ,btrim(txt_utf8_ci_ai, 'ÜYETAÑZ ') as blob_id_btrim_txt_utf8_ci_ai ,btrim(txt_estonian_ci_ai, 'AH') as blob_id_btrim_txt_estonian_ci_ai ,btrim(txt_czech_ci_ai, 'ELVS ') as blob_id_btrim_txt_czech_ci_ai from test_blob order by id ; """ act = isql_act('db', test_script, substitutions=[('[ \t]+', ' '), ('BLOB_ID_.*','')]) expected_stdout = """ BTRIM_UTF8 ბას გილოც BTRIM_1250 Різдво BTRIM_1252 yeux n BTRIM_1253 λό απόγευ BTRIM_1254 kkür ed BTRIM_1257 gus Ziema BTRIM_TXT_UTF8_CI_AI l gutly bols BTRIM_TXT_ESTONIAN_CI_AI id jõule BTRIM_TXT_CZECH_CI_AI ánoc ბას გილოც Різдво yeux n λό απόγευ kkür ed gus Ziema l gutly bols id jõule ánoc """ @pytest.mark.intl @pytest.mark.version('>=6.0.0') def test_1(act: Action): act.expected_stdout = expected_stdout # NB: io_enc must be 'utf8' because 'act.execute' essentially calls isql using PIPE # with sending as input text from test_script. # We must NOT specify here locale.getpreferredencoding() otherwise charmap error # will raise in case if our system has non-ascii locale ('cp1251') etc. act.execute(combine_output = True, charset = 'utf8', io_enc = 'utf8') assert act.clean_stdout == act.clean_expected_stdout