mirror of
https://github.com/FirebirdSQL/firebird-qa.git
synced 2025-01-22 21:43:06 +01:00
309 lines
13 KiB
Python
309 lines
13 KiB
Python
#coding:utf-8
|
|
|
|
"""
|
|
ID: issue-1535
|
|
ISSUE: 1535
|
|
TITLE: gfix -sweep makes "reconnect" when it is removed from mon$attachments by delete command (issued in another window)
|
|
DESCRIPTION:
|
|
We create table with several very wide char fields and fill it with uuid data.
|
|
|
|
Then we create indexes for these fields and finally - delete all rows.
|
|
Such table will require valuable time to be swept, about 4..5 seconds
|
|
(we set DB forced_writes = ON and small buffers number in the DB header).
|
|
|
|
After this we launch trace and 'gfix -sweep' (asynchronousl, via subprocess.Popen()).
|
|
Then we immediately open cursor and start LOOP with query to mon$attachments.
|
|
Loop will work until connection created by gfix will be seen there (usually this must occurs instantly).
|
|
|
|
When gfix connection is known, we execute 'DELETE FROM MON$ATTACHMENT' command which should kill its attachment.
|
|
Process of GFIX should raise error 'connection shutdown' - and we check this by saving its output to log.
|
|
|
|
Further, we have to REPEAT check of mon$attachments after this and connection by 'gfix' must NOT present there.
|
|
|
|
Then we stop trace and check its log. It must contain line with text 'SWEEP_FAILED' and after this line
|
|
we should NOT discover any lines with 'ATTACH_DATABASE' event - this is especially verified.
|
|
|
|
Finally, we compare content of firebird.log: new lines in it should contain messages about
|
|
interrupted sweep ('error during sweep' and 'connection shutdown').
|
|
JIRA: CORE-4337
|
|
FBTEST: bugs.core_4337
|
|
NOTES:
|
|
[26.09.2017]
|
|
adjusted expected_stdout section to current FB 3.0+ versions: firebird.log now does contain info
|
|
about DB name which was swept (see CORE-5610, "Provide info about database (or alias) which was in use...")
|
|
[19.11.2021] pcisar
|
|
Small difference in reimplementation of sweep killer script
|
|
On v4.0.0.2496 COMMIT after delete from mon#attachments fails with:
|
|
STATEMENT FAILED, SQLSTATE = HY008, OPERATION WAS CANCELLED
|
|
Without commit this test PASSES, i.e. sweep is terminated with all outputs as expected
|
|
[14.09.2022] pzotov
|
|
Re-implemented: avoid usage of ISQL with fragile timeout setting (gfix can complete FASTER than isql will see it!).
|
|
Appearance of 'gfix' process is checked in loop by querying mon$attachments, see also: MAX_WAIT_FOR_SWEEP_START_MS.
|
|
More precise pattern for line with message about failed sweep (see 'p_sweep_failed').
|
|
Checked on Windows and Linux: 3.0.8.33535 (SS/CS), 4.0.1.2692 (SS/CS), 5.0.0.730
|
|
"""
|
|
|
|
import datetime as py_dt
|
|
from datetime import timedelta
|
|
import time
|
|
|
|
import subprocess
|
|
from difflib import unified_diff
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from firebird.qa import *
|
|
from firebird.driver import DbWriteMode
|
|
import pytest
|
|
|
|
substitutions = [
|
|
('[ ]+', ' '),
|
|
('TRACE_LOG:.* SWEEP_START', 'TRACE_LOG: SWEEP_START'),
|
|
('TRACE_LOG:.* SWEEP_FAILED', 'TRACE_LOG: SWEEP_FAILED'),
|
|
('TRACE_LOG:.* ERROR AT JPROVIDER::ATTACHDATABASE', 'TRACE_LOG: ERROR AT JPROVIDER::ATTACHDATABASE'),
|
|
('.*KILLED BY DATABASE ADMINISTRATOR.*', ''),
|
|
('TRACE_LOG:.*GFIX.EXE.*', 'TRACE_LOG: GFIX.EXE'),
|
|
('OIT [0-9]+', 'OIT'), ('OAT [0-9]+', 'OAT'), ('OST [0-9]+', 'OST'),
|
|
('NEXT [0-9]+', 'NEXT'),
|
|
('FIREBIRD.LOG:.* ERROR DURING SWEEP OF .*TEST.FDB.*', 'FIREBIRD.LOG: + ERROR DURING SWEEP OF TEST.FDB')
|
|
]
|
|
|
|
init_sql = """
|
|
set list on;
|
|
recreate table t(
|
|
s01 varchar(4000)
|
|
,s02 varchar(4000)
|
|
,s03 varchar(4000)
|
|
,s04 varchar(4000)
|
|
);
|
|
commit;
|
|
set term ^;
|
|
execute block as
|
|
declare n int = 20000;
|
|
declare w int;
|
|
begin
|
|
select f.rdb$field_length
|
|
from rdb$relation_fields rf
|
|
join rdb$fields f on rf.rdb$field_source=f.rdb$field_name
|
|
where rf.rdb$relation_name=upper('t') and rf.rdb$field_name=upper('s01')
|
|
into w;
|
|
|
|
while (n>0) do
|
|
insert into t(s01, s02, s03, s04)
|
|
values(
|
|
rpad('', :w, uuid_to_char(gen_uuid()))
|
|
,rpad('', :w, uuid_to_char(gen_uuid()))
|
|
,rpad('', :w, uuid_to_char(gen_uuid()))
|
|
,rpad('', :w, uuid_to_char(gen_uuid()))
|
|
)
|
|
returning :n-1 into n
|
|
;
|
|
end^
|
|
set term ;^
|
|
commit;
|
|
create index t_s01 on t(s01);
|
|
create index t_s02 on t(s02);
|
|
create index t_s03 on t(s03);
|
|
create index t_s04 on t(s04);
|
|
commit;
|
|
|
|
delete from t;
|
|
commit;
|
|
quit;
|
|
"""
|
|
|
|
# 16K page is neccessary ! Smaller size will cause failure with:
|
|
# key size exceeds implementation restriction for index "T_S01"
|
|
#
|
|
db = db_factory(page_size=16384, init = init_sql)
|
|
|
|
act = python_act('db', substitutions = substitutions)
|
|
|
|
sweep_log = temp_file('tmp_4337_sweep.log')
|
|
|
|
trace = ['time_threshold = 0',
|
|
'log_errors = true',
|
|
'log_sweep = true',
|
|
'log_connections = true',
|
|
]
|
|
|
|
@pytest.mark.version('>=3.0')
|
|
def test_1(act: Action, sweep_log: Path, capsys):
|
|
|
|
with act.connect_server() as srv:
|
|
|
|
# REDUCE number of cache buffers in DB header in order to sweep make its work as long as possible
|
|
srv.database.set_default_cache_size(database=act.db.db_path, size=75)
|
|
|
|
# Change FW to ON (in order to make sweep life harder :))
|
|
srv.database.set_write_mode(database=act.db.db_path, mode=DbWriteMode.SYNC)
|
|
|
|
srv.info.get_log()
|
|
log_before = srv.readlines()
|
|
|
|
#---------------------------------------------------------------
|
|
|
|
# How long can we wait (milliseconds) for the FIRST appearance of 'gfix' process in mon$attachments:
|
|
#
|
|
MAX_WAIT_FOR_SWEEP_START_MS = 3000
|
|
|
|
# How long we wait (milliseconds) for SECOND appearance of 'gfix' process in mon$attachments, after it was killed.
|
|
# NOTE. If it appears then we have a BUG
|
|
MAX_WAIT_FOR_GFIX_RESTART_MS = 3000
|
|
|
|
sweep_attach_id = None
|
|
sweep_reconnect = None
|
|
# Start trace
|
|
with act.trace(db_events=trace):
|
|
with act.db.connect() as con:
|
|
f_sweep_log = open(sweep_log,'w')
|
|
stm = r"select first 1 a.mon$attachment_id from mon$attachments a where a.mon$system_flag <> 1 and lower(a.mon$remote_process) similar to '(%[\\/](gfix|fbsvcmgr)(.exe)?)'"
|
|
t1=py_dt.datetime.now()
|
|
with con.cursor() as cur:
|
|
ps = cur.prepare(stm)
|
|
p_sweep = subprocess.Popen( [act.vars['gfix'], '-sweep', '-user', act.db.user, '-password', act.db.password, act.db.dsn],
|
|
stdout = f_sweep_log,
|
|
stderr = subprocess.STDOUT
|
|
)
|
|
|
|
##########################################################################
|
|
# LOOP-1: WAIT FOR FIRST APPEARANCE OF GFIX PROCESS IN THE MON$ATTACHMENTS
|
|
##########################################################################
|
|
while True:
|
|
t2=py_dt.datetime.now()
|
|
d1=t2-t1
|
|
dd = d1.seconds*1000 + d1.microseconds//1000
|
|
if dd > MAX_WAIT_FOR_SWEEP_START_MS:
|
|
print(f'TIMEOUT EXPIRATION: waiting for SWEEP process took {dd} ms which exceeds limit = {MAX_WAIT_FOR_SWEEP_START_MS} ms.')
|
|
break
|
|
|
|
cur.execute(ps)
|
|
for r in cur:
|
|
sweep_attach_id = r[0]
|
|
|
|
con.commit()
|
|
if sweep_attach_id:
|
|
break
|
|
else:
|
|
time.sleep(0.1)
|
|
#<while True (loop for search gfix process in mon$attachments)
|
|
|
|
#< with con.cursor() as cur
|
|
|
|
#assert sweep_attach_id is None, f'attacment_id of SWEEP process is {sweep_attach_id}'
|
|
assert sweep_attach_id, 'attacment_id of SWEEP process was not found.'
|
|
|
|
# Let sweep to have some 'useful work':
|
|
#
|
|
time.sleep(1)
|
|
|
|
# Now we can KILL attachment that belongs to SWEEP process, <sweep_attach_id>:
|
|
con.execute_immediate(f'delete from mon$attachments where mon$attachment_id = {sweep_attach_id}')
|
|
|
|
p_sweep.wait()
|
|
f_sweep_log.close()
|
|
|
|
assert p_sweep.returncode == 1, 'p_sweep.returncode: {p_sweep.returncode}'
|
|
|
|
|
|
##################################################################################################
|
|
# LOOP-2: WAIT FOR POSSIBLE SECOND APPEARENCE (RECONNECT) OF GFIX. IF IT OCCURS THEN WE HAVE A BUG
|
|
##################################################################################################
|
|
ps = cur.prepare( stm.replace('select ', 'select /* search re-connect that could be made */ ') )
|
|
t1=py_dt.datetime.now()
|
|
with con.cursor() as cur:
|
|
while True:
|
|
t2=py_dt.datetime.now()
|
|
d1=t2-t1
|
|
dd = d1.seconds*1000 + d1.microseconds//1000
|
|
if dd > MAX_WAIT_FOR_GFIX_RESTART_MS:
|
|
# Expected: gfix reconnect was not detected for last {MAX_WAIT_FOR_GFIX_RESTART_MS} ms.
|
|
break
|
|
con.commit()
|
|
cur.execute(ps)
|
|
# Resultset now must be EMPTY. we must not find any record!
|
|
for r in cur:
|
|
sweep_reconnect = r[0]
|
|
|
|
#con.commit()
|
|
if sweep_reconnect:
|
|
# UNEXPECTED: gfix reconnect found, with attach_id={sweep_reconnect}
|
|
break
|
|
else:
|
|
time.sleep(0.1)
|
|
|
|
assert sweep_reconnect is None, f'Found re-connect of SWEEP process, attachment: {sweep_reconnect}'
|
|
|
|
#< with db.connect as con
|
|
|
|
#---------------------------------------------------------------
|
|
# DISABLED 25.09.2022. SWEEP log can remain empty (4.0.1.2692 Classic)
|
|
## Check-1: log of sweep must contain text: 'connection shutdown':
|
|
#for line in sweep_log.read_text().splitlines():
|
|
# if line:
|
|
# print('SWEEP LOG:', line.upper())
|
|
#
|
|
#act.expected_stdout = 'sweep log: connection shutdown'.upper()
|
|
#act.stdout = capsys.readouterr().out
|
|
#assert act.clean_stdout == act.clean_expected_stdout
|
|
#act.reset()
|
|
#---------------------------------------------------------------
|
|
|
|
# Trace log
|
|
found_sweep_failed = 0
|
|
p_sweep_failed = re.compile( r'[.*\s+]*20\d{2}(-\d{2}){2}T\d{2}(:\d{2}){2}.\d{3,4}\s+\(.+\)\s+SWEEP_FAILED$', re.IGNORECASE)
|
|
p_att_success = re.compile( r'[.*\s+]*20\d{2}(-\d{2}){2}T\d{2}(:\d{2}){2}.\d{3,4}\s+\(.+\)\s+ATTACH_DATABASE$', re.IGNORECASE)
|
|
|
|
trace_expected = 'FOUND SWEEP_FAILED MESSAGE.'
|
|
for i,line in enumerate(act.trace_log):
|
|
if line.strip():
|
|
if p_sweep_failed.search(line.strip()):
|
|
print(trace_expected)
|
|
found_sweep_failed = 1
|
|
if found_sweep_failed == 1 and p_att_success.search(line) and i < len(act.trace_log)-2 and 'gfix' in act.trace_log[i+2].lower():
|
|
# NB: we have to ignore "FAILED ATTACH_DATABASE".
|
|
# It can occur in some cases.
|
|
print('TRACE: UNEXPECTED ATTACH FOUND AFTER KILL SWEEP! CHECK LINE N {i}:')
|
|
print('TRACE_LOG: ' + line)
|
|
|
|
act.expected_stdout = trace_expected
|
|
act.stdout = capsys.readouterr().out
|
|
assert act.clean_stdout == act.clean_expected_stdout
|
|
act.reset()
|
|
|
|
#----------------------------------------------------------------
|
|
|
|
with act.connect_server() as srv:
|
|
srv.info.get_log()
|
|
log_after = srv.readlines()
|
|
|
|
'''
|
|
Example of diff:
|
|
<COMPUTERNAME> Wed Sep 14 15:58:37 2022
|
|
Sweep is started by SYSDBA
|
|
Database "C:\TEMP\<PYTEST_PATH>\TEST.FDB"
|
|
OIT 20, OAT 21, OST 21, Next 21
|
|
|
|
<COMPUTERNAME> Wed Sep 14 15:58:37 2022
|
|
Error during sweep of C:\<PYTEST_PATH>\TEST.FDB:
|
|
connection shutdown
|
|
'''
|
|
|
|
p_tx_counter = re.compile("\\+[\\s]+OIT[ ]+\\d+,[\\s]*OAT[\\s]+\\d+,[\\s]*OST[\\s]+\\d+,[\\s]*NEXT[\\s]+\\d+")
|
|
for line in unified_diff(log_before, log_after):
|
|
if line.startswith('+'):
|
|
line = line.upper()
|
|
if 'SWEEP' in line or 'CONNECTION' in line or p_tx_counter.match(line):
|
|
print( 'FIREBIRD.LOG: ' + (' '.join(line.split())) )
|
|
|
|
fb_log_expected = """
|
|
FIREBIRD.LOG: + SWEEP IS STARTED BY SYSDBA
|
|
FIREBIRD.LOG: + OIT, OAT, OST, NEXT
|
|
FIREBIRD.LOG: + ERROR DURING SWEEP OF TEST.FDB
|
|
FIREBIRD.LOG: + CONNECTION SHUTDOWN
|
|
"""
|
|
|
|
act.expected_stdout = fb_log_expected
|
|
act.stdout = capsys.readouterr().out
|
|
assert act.clean_stdout == act.clean_expected_stdout
|