2021-04-26 20:07:00 +02:00
#coding:utf-8
2022-01-23 20:41:55 +01:00
"""
ID : issue - 1535
ISSUE : 1535
TITLE : gfix - sweep makes " reconnect " when it is removed from mon $ attachments by delete command ( issued in another window )
DESCRIPTION :
2022-09-14 15:25:26 +02:00
We create table with several very wide char fields and fill it with uuid data .
2022-01-23 20:41:55 +01:00
2022-09-14 15:25:26 +02:00
Then we create indexes for these fields and finally - delete all rows .
Such table will require valuable time to be swept , about 4. .5 seconds
( we set DB forced_writes = ON and small buffers number in the DB header ) .
2022-01-23 20:41:55 +01:00
2022-09-14 15:25:26 +02:00
After this we launch trace and ' gfix -sweep ' ( asynchronousl , via subprocess . Popen ( ) ) .
Then we immediately open cursor and start LOOP with query to mon $ attachments .
Loop will work until connection created by gfix will be seen there ( usually this must occurs instantly ) .
2022-01-23 20:41:55 +01:00
2022-09-14 15:25:26 +02:00
When gfix connection is known , we execute ' DELETE FROM MON$ATTACHMENT ' command which should kill its attachment .
Process of GFIX should raise error ' connection shutdown ' - and we check this by saving its output to log .
Further , we have to REPEAT check of mon $ attachments after this and connection by ' gfix ' must NOT present there .
Then we stop trace and check its log . It must contain line with text ' SWEEP_FAILED ' and after this line
we should NOT discover any lines with ' ATTACH_DATABASE ' event - this is especially verified .
2022-01-23 20:41:55 +01:00
Finally , we compare content of firebird . log : new lines in it should contain messages about
interrupted sweep ( ' error during sweep ' and ' connection shutdown ' ) .
JIRA : CORE - 4337
2022-02-02 15:46:19 +01:00
FBTEST : bugs . core_4337
2022-09-14 15:25:26 +02:00
NOTES :
[ 26.09 .2017 ]
adjusted expected_stdout section to current FB 3.0 + versions : firebird . log now does contain info
about DB name which was swept ( see CORE - 5610 , " Provide info about database (or alias) which was in use... " )
[ 19.11 .2021 ] pcisar
Small difference in reimplementation of sweep killer script
On v4 .0 .0 .2496 COMMIT after delete from mon #attachments fails with:
STATEMENT FAILED , SQLSTATE = HY008 , OPERATION WAS CANCELLED
Without commit this test PASSES , i . e . sweep is terminated with all outputs as expected
[ 14.09 .2022 ] pzotov
Re - implemented : avoid usage of ISQL with fragile timeout setting ( gfix can complete FASTER than isql will see it ! ) .
Appearance of ' gfix ' process is checked in loop by querying mon $ attachments , see also : MAX_WAIT_FOR_SWEEP_START_MS .
More precise pattern for line with message about failed sweep ( see ' p_sweep_failed ' ) .
Checked on Windows and Linux : 3.0 .8 .33535 ( SS / CS ) , 4.0 .1 .2692 ( SS / CS ) , 5.0 .0 .730
2022-01-23 20:41:55 +01:00
"""
2021-04-26 20:07:00 +02:00
2022-09-14 15:25:26 +02:00
import datetime as py_dt
from datetime import timedelta
2021-11-19 20:16:48 +01:00
import time
2022-09-14 15:25:26 +02:00
2021-11-19 20:16:48 +01:00
import subprocess
from difflib import unified_diff
2022-09-14 15:25:26 +02:00
import re
2021-11-19 20:16:48 +01:00
from pathlib import Path
2022-09-14 15:25:26 +02:00
2022-01-23 20:41:55 +01:00
from firebird . qa import *
2021-11-19 20:16:48 +01:00
from firebird . driver import DbWriteMode
2022-09-14 15:25:26 +02:00
import pytest
2021-04-26 20:07:00 +02:00
2022-09-14 15:25:26 +02:00
substitutions = [
( ' [ ]+ ' , ' ' ) ,
( ' TRACE_LOG:.* SWEEP_START ' , ' TRACE_LOG: SWEEP_START ' ) ,
( ' TRACE_LOG:.* SWEEP_FAILED ' , ' TRACE_LOG: SWEEP_FAILED ' ) ,
( ' TRACE_LOG:.* ERROR AT JPROVIDER::ATTACHDATABASE ' , ' TRACE_LOG: ERROR AT JPROVIDER::ATTACHDATABASE ' ) ,
( ' .*KILLED BY DATABASE ADMINISTRATOR.* ' , ' ' ) ,
( ' TRACE_LOG:.*GFIX.EXE.* ' , ' TRACE_LOG: GFIX.EXE ' ) ,
( ' OIT [0-9]+ ' , ' OIT ' ) , ( ' OAT [0-9]+ ' , ' OAT ' ) , ( ' OST [0-9]+ ' , ' OST ' ) ,
( ' NEXT [0-9]+ ' , ' NEXT ' ) ,
( ' FIREBIRD.LOG:.* ERROR DURING SWEEP OF .*TEST.FDB.* ' , ' FIREBIRD.LOG: + ERROR DURING SWEEP OF TEST.FDB ' )
]
2021-04-26 20:07:00 +02:00
2022-09-25 15:02:24 +02:00
init_sql = """
2021-11-19 20:16:48 +01:00
set list on ;
2022-09-14 15:25:26 +02:00
recreate table t (
s01 varchar ( 4000 )
, s02 varchar ( 4000 )
, s03 varchar ( 4000 )
, s04 varchar ( 4000 )
) ;
2021-11-19 20:16:48 +01:00
commit ;
set term ^ ;
execute block as
declare n int = 20000 ;
declare w int ;
begin
select f . rdb $ field_length
from rdb $ relation_fields rf
join rdb $ fields f on rf . rdb $ field_source = f . rdb $ field_name
2022-09-14 15:25:26 +02:00
where rf . rdb $ relation_name = upper ( ' t ' ) and rf . rdb $ field_name = upper ( ' s01 ' )
2021-11-19 20:16:48 +01:00
into w ;
while ( n > 0 ) do
2022-09-14 15:25:26 +02:00
insert into t ( s01 , s02 , s03 , s04 )
values (
rpad ( ' ' , : w , uuid_to_char ( gen_uuid ( ) ) )
, rpad ( ' ' , : w , uuid_to_char ( gen_uuid ( ) ) )
, rpad ( ' ' , : w , uuid_to_char ( gen_uuid ( ) ) )
, rpad ( ' ' , : w , uuid_to_char ( gen_uuid ( ) ) )
)
returning : n - 1 into n
;
2021-11-19 20:16:48 +01:00
end ^
set term ; ^
commit ;
create index t_s01 on t ( s01 ) ;
2022-09-14 15:25:26 +02:00
create index t_s02 on t ( s02 ) ;
create index t_s03 on t ( s03 ) ;
create index t_s04 on t ( s04 ) ;
2021-11-19 20:16:48 +01:00
commit ;
delete from t ;
commit ;
quit ;
2021-12-22 20:23:11 +01:00
"""
2021-04-26 20:07:00 +02:00
2022-09-14 15:25:26 +02:00
# 16K page is neccessary ! Smaller size will cause failure with:
# key size exceeds implementation restriction for index "T_S01"
#
2022-09-25 15:02:24 +02:00
db = db_factory ( page_size = 16384 , init = init_sql )
2021-04-26 20:07:00 +02:00
2022-09-14 15:25:26 +02:00
act = python_act ( ' db ' , substitutions = substitutions )
2022-01-23 20:41:55 +01:00
2022-09-14 15:25:26 +02:00
sweep_log = temp_file ( ' tmp_4337_sweep.log ' )
2022-01-23 20:41:55 +01:00
trace = [ ' time_threshold = 0 ' ,
' log_errors = true ' ,
' log_sweep = true ' ,
' log_connections = true ' ,
]
@pytest.mark.version ( ' >=3.0 ' )
2022-09-14 15:25:26 +02:00
def test_1 ( act : Action , sweep_log : Path , capsys ) :
2022-01-23 20:41:55 +01:00
with act . connect_server ( ) as srv :
2022-09-14 15:25:26 +02:00
2021-11-19 20:16:48 +01:00
# REDUCE number of cache buffers in DB header in order to sweep make its work as long as possible
2022-09-14 15:25:26 +02:00
srv . database . set_default_cache_size ( database = act . db . db_path , size = 75 )
2021-11-19 20:16:48 +01:00
# Change FW to ON (in order to make sweep life harder :))
2022-01-23 20:41:55 +01:00
srv . database . set_write_mode ( database = act . db . db_path , mode = DbWriteMode . SYNC )
2022-09-14 15:25:26 +02:00
srv . info . get_log ( )
log_before = srv . readlines ( )
#---------------------------------------------------------------
2022-09-25 15:02:24 +02:00
# How long can we wait (milliseconds) for the FIRST appearance of 'gfix' process in mon$attachments:
2022-09-14 15:25:26 +02:00
#
MAX_WAIT_FOR_SWEEP_START_MS = 3000
2022-09-25 15:02:24 +02:00
# How long we wait (milliseconds) for SECOND appearance of 'gfix' process in mon$attachments, after it was killed.
# NOTE. If it appears then we have a BUG
MAX_WAIT_FOR_GFIX_RESTART_MS = 3000
2022-09-14 15:25:26 +02:00
sweep_attach_id = None
sweep_reconnect = None
2021-11-19 20:16:48 +01:00
# Start trace
2022-01-23 20:41:55 +01:00
with act . trace ( db_events = trace ) :
2022-09-14 15:25:26 +02:00
with act . db . connect ( ) as con :
f_sweep_log = open ( sweep_log , ' w ' )
stm = r " select first 1 a.mon$attachment_id from mon$attachments a where a.mon$system_flag <> 1 and lower(a.mon$remote_process) similar to ' ( % [ \\ /](gfix|fbsvcmgr)(.exe)?) ' "
t1 = py_dt . datetime . now ( )
with con . cursor ( ) as cur :
ps = cur . prepare ( stm )
2022-09-25 15:02:24 +02:00
p_sweep = subprocess . Popen ( [ act . vars [ ' gfix ' ] , ' -sweep ' , ' -user ' , act . db . user , ' -password ' , act . db . password , act . db . dsn ] ,
stdout = f_sweep_log ,
stderr = subprocess . STDOUT
)
##########################################################################
# LOOP-1: WAIT FOR FIRST APPEARANCE OF GFIX PROCESS IN THE MON$ATTACHMENTS
##########################################################################
2022-09-14 15:25:26 +02:00
while True :
t2 = py_dt . datetime . now ( )
d1 = t2 - t1
dd = d1 . seconds * 1000 + d1 . microseconds / / 1000
if dd > MAX_WAIT_FOR_SWEEP_START_MS :
print ( f ' TIMEOUT EXPIRATION: waiting for SWEEP process took { dd } ms which exceeds limit = { MAX_WAIT_FOR_SWEEP_START_MS } ms. ' )
break
cur . execute ( ps )
for r in cur :
sweep_attach_id = r [ 0 ]
con . commit ( )
if sweep_attach_id :
break
else :
time . sleep ( 0.1 )
#<while True (loop for search gfix process in mon$attachments)
2022-09-25 15:02:24 +02:00
2022-09-14 15:25:26 +02:00
#< with con.cursor() as cur
#assert sweep_attach_id is None, f'attacment_id of SWEEP process is {sweep_attach_id}'
assert sweep_attach_id , ' attacment_id of SWEEP process was not found. '
2022-09-25 15:02:24 +02:00
# Let sweep to have some 'useful work':
#
time . sleep ( 1 )
2022-09-14 15:25:26 +02:00
# Now we can KILL attachment that belongs to SWEEP process, <sweep_attach_id>:
con . execute_immediate ( f ' delete from mon$attachments where mon$attachment_id = { sweep_attach_id } ' )
p_sweep . wait ( )
f_sweep_log . close ( )
2022-09-25 15:02:24 +02:00
assert p_sweep . returncode == 1 , ' p_sweep.returncode: {p_sweep.returncode} '
##################################################################################################
# LOOP-2: WAIT FOR POSSIBLE SECOND APPEARENCE (RECONNECT) OF GFIX. IF IT OCCURS THEN WE HAVE A BUG
##################################################################################################
ps = cur . prepare ( stm . replace ( ' select ' , ' select /* search re-connect that could be made */ ' ) )
t1 = py_dt . datetime . now ( )
2022-09-14 15:25:26 +02:00
with con . cursor ( ) as cur :
2022-09-25 15:02:24 +02:00
while True :
t2 = py_dt . datetime . now ( )
d1 = t2 - t1
dd = d1 . seconds * 1000 + d1 . microseconds / / 1000
if dd > MAX_WAIT_FOR_GFIX_RESTART_MS :
# Expected: gfix reconnect was not detected for last {MAX_WAIT_FOR_GFIX_RESTART_MS} ms.
break
con . commit ( )
cur . execute ( ps )
# Resultset now must be EMPTY. we must not find any record!
for r in cur :
sweep_reconnect = r [ 0 ]
#con.commit()
if sweep_reconnect :
# UNEXPECTED: gfix reconnect found, with attach_id={sweep_reconnect}
break
else :
time . sleep ( 0.1 )
2022-09-14 15:25:26 +02:00
assert sweep_reconnect is None , f ' Found re-connect of SWEEP process, attachment: { sweep_reconnect } '
#< with db.connect as con
#---------------------------------------------------------------
2022-09-25 15:02:24 +02:00
# DISABLED 25.09.2022. SWEEP log can remain empty (4.0.1.2692 Classic)
## Check-1: log of sweep must contain text: 'connection shutdown':
#for line in sweep_log.read_text().splitlines():
# if line:
# print('SWEEP LOG:', line.upper())
#
#act.expected_stdout = 'sweep log: connection shutdown'.upper()
#act.stdout = capsys.readouterr().out
#assert act.clean_stdout == act.clean_expected_stdout
#act.reset()
2022-09-14 15:25:26 +02:00
#---------------------------------------------------------------
2021-11-19 20:16:48 +01:00
# Trace log
found_sweep_failed = 0
2022-09-14 15:25:26 +02:00
p_sweep_failed = re . compile ( r ' [.* \ s+]*20 \ d {2} (- \ d {2} ) {2} T \ d {2} (: \ d {2} ) {2} . \ d { 3,4} \ s+ \ (.+ \ ) \ s+SWEEP_FAILED$ ' , re . IGNORECASE )
2022-09-25 15:02:24 +02:00
p_att_success = re . compile ( r ' [.* \ s+]*20 \ d {2} (- \ d {2} ) {2} T \ d {2} (: \ d {2} ) {2} . \ d { 3,4} \ s+ \ (.+ \ ) \ s+ATTACH_DATABASE$ ' , re . IGNORECASE )
2022-09-14 15:25:26 +02:00
trace_expected = ' FOUND SWEEP_FAILED MESSAGE. '
2022-09-25 15:02:24 +02:00
for i , line in enumerate ( act . trace_log ) :
2022-09-14 15:25:26 +02:00
if line . strip ( ) :
if p_sweep_failed . search ( line . strip ( ) ) :
print ( trace_expected )
found_sweep_failed = 1
2022-09-25 15:02:24 +02:00
if found_sweep_failed == 1 and p_att_success . search ( line ) and i < len ( act . trace_log ) - 2 and ' gfix ' in act . trace_log [ i + 2 ] . lower ( ) :
# NB: we have to ignore "FAILED ATTACH_DATABASE".
# It can occur in some cases.
print ( ' TRACE: UNEXPECTED ATTACH FOUND AFTER KILL SWEEP! CHECK LINE N {i} : ' )
2022-09-14 15:25:26 +02:00
print ( ' TRACE_LOG: ' + line )
act . expected_stdout = trace_expected
act . stdout = capsys . readouterr ( ) . out
assert act . clean_stdout == act . clean_expected_stdout
act . reset ( )
#----------------------------------------------------------------
with act . connect_server ( ) as srv :
srv . info . get_log ( )
log_after = srv . readlines ( )
'''
Example of diff :
< COMPUTERNAME > Wed Sep 14 15 : 58 : 37 2022
Sweep is started by SYSDBA
Database " C: \ TEMP \ <PYTEST_PATH> \ TEST.FDB "
OIT 20 , OAT 21 , OST 21 , Next 21
< COMPUTERNAME > Wed Sep 14 15 : 58 : 37 2022
Error during sweep of C : \< PYTEST_PATH > \TEST . FDB :
connection shutdown
'''
p_tx_counter = re . compile ( " \\ +[ \\ s]+OIT[ ]+ \\ d+,[ \\ s]*OAT[ \\ s]+ \\ d+,[ \\ s]*OST[ \\ s]+ \\ d+,[ \\ s]*NEXT[ \\ s]+ \\ d+ " )
2021-11-19 20:16:48 +01:00
for line in unified_diff ( log_before , log_after ) :
if line . startswith ( ' + ' ) :
line = line . upper ( )
2022-09-14 15:25:26 +02:00
if ' SWEEP ' in line or ' CONNECTION ' in line or p_tx_counter . match ( line ) :
2021-11-19 20:16:48 +01:00
print ( ' FIREBIRD.LOG: ' + ( ' ' . join ( line . split ( ) ) ) )
2022-09-14 15:25:26 +02:00
fb_log_expected = """
FIREBIRD . LOG : + SWEEP IS STARTED BY SYSDBA
FIREBIRD . LOG : + OIT , OAT , OST , NEXT
FIREBIRD . LOG : + ERROR DURING SWEEP OF TEST . FDB
FIREBIRD . LOG : + CONNECTION SHUTDOWN
"""
act . expected_stdout = fb_log_expected
2022-01-23 20:41:55 +01:00
act . stdout = capsys . readouterr ( ) . out
assert act . clean_stdout == act . clean_expected_stdout