2021-04-26 20:07:00 +02:00
#coding:utf-8
#
# id: bugs.core_6158
# title: substring similar - extra characters in the result for non latin characters
2021-12-14 20:56:34 +01:00
# decription:
# Confirmed regression on build 4.0.0.1629, 4.0.0.1631
2021-04-26 20:07:00 +02:00
# Worked as expected on 4.0.0.1535 (build 24.06.2019, before replacement old regexp library with 're2')
# Works fine on: 4.0.0.1632 (build 19.10.2019)
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# 05-mar-2021. Re-implemented in order to have ability to run this test on Linux.
# Test encodes to UTF8 all needed statements (SET NAMES; CONNECT; DDL and DML) and stores this text in .sql file.
# NOTE: 'SET NAMES' contain character set that must be used for reproducing problem (WIN1251 in this test).
# Then ISQL is launched in separate (child) process which performs all necessary actions (using required charset).
# Result will be redirected to log(s) which will be opened further via codecs.open(...encoding='cp1251').
# Finally, its content will be converted to UTF8 for showing in expected_stdout.
# Checked on:
# * Windows: 4.0.0.2377
# * Linux: 4.0.0.2379
2021-12-14 20:56:34 +01:00
#
#
2021-04-26 20:07:00 +02:00
# tracker_id: CORE-6158
# min_versions: ['4.0']
# versions: 4.0
# qmid: None
import pytest
2021-12-14 20:56:34 +01:00
from pathlib import Path
from firebird . qa import db_factory , python_act , Action , temp_file
2021-04-26 20:07:00 +02:00
# version: 4.0
# resources: None
substitutions_1 = [ ( ' RESULT_3_BLOB_ID.* ' , ' ' ) ]
init_script_1 = """ """
db_1 = db_factory ( charset = ' WIN1251 ' , sql_dialect = 3 , init = init_script_1 )
# test_script_1
#---
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# import os
# import codecs
# import subprocess
# import time
# engine = db_conn.engine_version
# db_conn.close()
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# #--------------------------------------------
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# def flush_and_close( file_handle ):
# # https://docs.python.org/2/library/os.html#os.fsync
# # If you're starting with a Python file object f,
# # first do f.flush(), and
# # then do os.fsync(f.fileno()), to ensure that all internal buffers associated with f are written to disk.
# global os
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# file_handle.flush()
# if file_handle.mode not in ('r', 'rb') and file_handle.name != os.devnull:
# # otherwise: "OSError: [Errno 9] Bad file descriptor"!
# os.fsync(file_handle.fileno())
# file_handle.close()
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# #--------------------------------------------
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# def cleanup( f_names_list ):
# global os
# for i in range(len( f_names_list )):
# if type(f_names_list[i]) == file:
# del_name = f_names_list[i].name
# elif type(f_names_list[i]) == str:
# del_name = f_names_list[i]
# else:
# print('Unrecognized type of element:', f_names_list[i], ' - can not be treated as file.')
# del_name = None
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# if del_name and os.path.isfile( del_name ):
# os.remove( del_name )
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# #--------------------------------------------
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# # Put patterns outside of sql_txt in order to avoid replacement percent sign
# # with its duplicate ('%' --> '%%') because of Python substitution requirements:
# pattern_01 = '%/#*(=){3,}#"%#"(=){3,}#*/%'
# pattern_02 = '%/#*(#-){3,}#"%#"(#-){3,}#*/%'
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# sql_txt=''' set bail on;
# set names win1251;
# connect '%(dsn)s' user '%(user_name)s' password '%(user_password)s';
2021-12-14 20:56:34 +01:00
#
#
2021-04-26 20:07:00 +02:00
# -- This is needed to get "cannot transliterate character between character sets"
# -- on build 4.0.0.1631, see comment in the tracker 18/Oct/19 02:57 PM:
# create domain T_A64 as varchar (64) character set WIN1251 collate WIN1251;
# create table VALUT_LIST (NAME T_A64 not null);
# commit;
# insert into VALUT_LIST (NAME) values ('Российский рубль');
# insert into VALUT_LIST (NAME) values ('Турецкая лира');
# insert into VALUT_LIST (NAME) values ('Доллар США');
# commit;
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# set list on;
# set blob all;
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# select substring('
# aaa
# /*==== Комментарий между символами "равно" ====*/
# bbb
# ccc
# ddd
# eee
# fff
# jjj
# ' similar '%(pattern_01)s' escape '#') as result1
# from rdb$database;
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# -- additional check for special character '-' as delimiter:
# select substring('здесь написан /*---- Комментарий между символами "дефис" ----*/ - и больше ничего!' similar '%(pattern_02)s' escape '#') as result2
# from rdb$database;
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# -- Confirmed fail on 4.0.0.1631 with "cannot transliterate character between character sets":
# select substring(list(T.NAME, '; ') from 1 for 250) as result_3_blob_id from VALUT_LIST T;
2021-12-14 20:56:34 +01:00
#
#
2021-04-26 20:07:00 +02:00
# ''' % dict(globals(), **locals())
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# f_run_sql = open( os.path.join(context['temp_directory'], 'tmp_6158_win1251.sql'), 'w' )
# f_run_sql.write( sql_txt.decode('utf8').encode('cp1251') )
# flush_and_close( f_run_sql )
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# # result: file tmp_6158_win1251.sql is encoded in win1251
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# f_run_log = open( os.path.splitext(f_run_sql.name)[0]+'.log', 'w')
# subprocess.call( [ context['isql_path'], '-q', '-i', f_run_sql.name ],
# stdout = f_run_log,
# stderr = subprocess.STDOUT
# )
# flush_and_close( f_run_log ) # result: output will be encoded in win1251
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# with codecs.open(f_run_log.name, 'r', encoding='cp1251' ) as f:
# result_in_win1251 = f.readlines()
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# for i in result_in_win1251:
# print( i.encode('utf8') )
2021-12-14 20:56:34 +01:00
#
2021-04-26 20:07:00 +02:00
# # cleanup:
# ###########
# cleanup( (f_run_sql, f_run_log) )
2021-12-14 20:56:34 +01:00
#
#
2021-04-26 20:07:00 +02:00
#---
2021-12-14 20:56:34 +01:00
act_1 = python_act ( ' db_1 ' , substitutions = substitutions_1 )
2021-04-26 20:07:00 +02:00
expected_stdout_1 = """
RESULT1 = Комментарий между символами " равно " =
RESULT2 - Комментарий между символами " дефис " -
Российский рубль ; Турецкая лира ; Доллар США
2021-12-14 20:56:34 +01:00
"""
test_script = temp_file ( ' test-script.sql ' )
2021-04-26 20:07:00 +02:00
@pytest.mark.version ( ' >=4.0 ' )
2021-12-14 20:56:34 +01:00
def test_1 ( act_1 : Action , test_script : Path ) :
pattern_01 = ' % /#*(=) { 3,}# " % # " (=) { 3,}#*/ % '
pattern_02 = ' % /#*(#-) { 3,}# " % # " (#-) { 3,}#*/ % '
test_script . write_text ( f """
- - This is needed to get " cannot transliterate character between character sets "
- - on build 4.0 .0 .1631 , see comment in the tracker 18 / Oct / 19 02 : 57 PM :
create domain T_A64 as varchar ( 64 ) character set WIN1251 collate WIN1251 ;
create table VALUT_LIST ( NAME T_A64 not null ) ;
commit ;
insert into VALUT_LIST ( NAME ) values ( ' Российский рубль ' ) ;
insert into VALUT_LIST ( NAME ) values ( ' Турецкая лира ' ) ;
insert into VALUT_LIST ( NAME ) values ( ' Доллар США ' ) ;
commit ;
set list on ;
set blob all ;
select substring ( '
aaa
/ * == == Комментарий между символами " равно " == == * /
bbb
ccc
ddd
eee
fff
jjj
' similar ' { pattern_01 } ' escape ' #') as result1
from rdb $ database ;
- - additional check for special character ' - ' as delimiter :
select substring ( ' здесь написан /*---- Комментарий между символами " дефис " ----*/ - и больше ничего! ' similar ' {pattern_02} ' escape ' # ' ) as result2
from rdb $ database ;
- - Confirmed fail on 4.0 .0 .1631 with " cannot transliterate character between character sets " :
select substring ( list ( T . NAME , ' ; ' ) from 1 for 250 ) as result_3_blob_id from VALUT_LIST T ;
""" , encoding= ' cp1251 ' )
act_1 . expected_stdout = expected_stdout_1
act_1 . isql ( switches = [ ' -q ' ] , input_file = test_script , charset = ' win1251 ' )
assert act_1 . clean_stdout == act_1 . clean_expected_stdout
2021-04-26 20:07:00 +02:00