Added/Updated tests\functional\replication\test_disallow_rdb_backup_history_replication.py: Checked on 4.0.3.2943 - both SS and CS.

2025-02-02 02:40:42 +01:00 · 2023-05-29 23:08:14 +03:00 · 2023-05-29 23:08:14 +03:00 · 8eb64cae2c
commit 8eb64cae2c
parent 735578968e
1 changed files with 417 additions and 0 deletions
--- a/tests/functional/replication/test_disallow_rdb_backup_history_replication.py
+++ b/tests/functional/replication/test_disallow_rdb_backup_history_replication.py
@ -0,0 +1,417 @@
+#coding:utf-8
+
+"""
+ID:          replication.disallow_rdb$backup_history_replication
+ISSUE:       https://github.com/FirebirdSQL/firebird/issues/7605
+TITLE:       Disallow replication of RDB$BACKUP_HISTORY
+DESCRIPTION:
+
+    We query table RDB$BACKUP_HISTORY on *replica* database and store RDB$GUID values in the list 'rdb_backup_history_init'.
+    Then we run NBACKUP of *master* two times, using levels 0 and 1.
+    After this we create table with name 't_completed'. It serves as 'flag' to be checked that all actions on master finished.
+
+    Then we wait until replica becomes actual to master, and this delay will last no more then threshold that
+    is defined by MAX_TIME_FOR_WAIT_DATA_IN_REPLICA variable (measured in seconds), see QA_ROOT/files/test_config.ini
+
+    When table 't_completed' will appear in replica, we run AGAIN query to the table RDB$BACKUP_HISTORY on *replica*  and check
+    that it contains the same data that were stored initially in 'rdb_backup_history_init' -- see usage of Counter class.
+
+    Further, we invoke ISQL with executing auxiliary script for drop all DB objects on master (with '-nod' command switch).
+    After all objects will be dropped, we have to wait again until replica becomes actual with master.
+    Check that both DB have no custom objects is performed (see UNION-ed query to rdb$ tables + filtering on rdb$system_flag).
+
+    Finally, we extract metadata for master and replica and make comparison.
+    The only difference in metadata must be 'CREATE DATABASE' statement with different DB names - we suppress it,
+    thus metadata difference must not be issued.
+NOTES:
+    [29.05.2023] pzotov
+    NOTE-1.
+        We use 'assert' only at the final point of test, with printing detalization about encountered problem(s).
+        During all previous steps, we only store unexpected output to variables, e.g.: out_main = capsys.readouterr().out etc.
+    NOTE-2.
+        Temporary DISABLED execution on Linux when ServerMode = Classic. Replication can unexpectedly stop with message
+        'Engine is shutdown' appears in replication.log. Sent report to dimitr, waiting for fix.
+    
+    Confirmed problem on 4.0.3.2942: records from rdb$backup_history table on master are transferred to replica DB.
+    Checked on 4.0.3.2943 - both SS and CS.
+"""
+
+import os
+import shutil
+from difflib import unified_diff
+from pathlib import Path
+from collections import Counter
+import time
+
+import pytest
+from firebird.qa import *
+from firebird.driver import connect, create_database, DbWriteMode, ReplicaMode, ShutdownMode, ShutdownMethod, OnlineMode, DatabaseError
+
+# QA_GLOBALS -- dict, is defined in qa/plugin.py, obtain settings
+# from act.files_dir/'test_config.ini':
+repl_settings = QA_GLOBALS['replication']
+
+MAX_TIME_FOR_WAIT_DATA_IN_REPLICA = int(repl_settings['max_time_for_wait_data_in_replica'])
+MAIN_DB_ALIAS = repl_settings['main_db_alias']
+REPL_DB_ALIAS = repl_settings['repl_db_alias']
+
+db_main = db_factory( filename = '#' + MAIN_DB_ALIAS, do_not_create = True, do_not_drop = True)
+db_repl = db_factory( filename = '#' + REPL_DB_ALIAS, do_not_create = True, do_not_drop = True)
+
+substitutions = [('Start removing objects in:.*', 'Start removing objects'),
+                 ('Finish. Total objects removed:  [1-9]\\d*', 'Finish. Total objects removed'),
+                 ('.* CREATE DATABASE .*', ''),
+                ]
+
+act_db_main = python_act('db_main', substitutions=substitutions)
+act_db_repl = python_act('db_repl', substitutions=substitutions)
+
+db_main_nbk0 = temp_file(filename = 'tmp_db_main.nbk0')
+db_main_nbk1 = temp_file(filename = 'tmp_db_main.nbk1')
+
+#--------------------------------------------
+
+def cleanup_folder(p):
+    # Removed all files and subdirs in the folder <p>
+    # Used for cleanup <repl_journal> and <repl_archive> when replication must be reset
+    # in case when any error occurred during test execution.
+    assert os.path.dirname(p) != p, f"@@@ ABEND @@@ CAN NOT operate in the file system root directory. Check your code!"
+    for root, dirs, files in os.walk(p):
+        for f in files:
+            os.unlink(os.path.join(root, f))
+        for d in dirs:
+            shutil.rmtree(os.path.join(root, d))
+    return len(os.listdir(p))
+
+#--------------------------------------------
+
+def reset_replication(act_db_main, act_db_repl, db_main_file, db_repl_file):
+    out_reset = ''
+
+    with act_db_main.connect_server() as srv:
+
+        # !! IT IS ASSUMED THAT REPLICATION FOLDERS ARE IN THE SAME DIR AS <DB_MAIN> !!
+        # DO NOT use 'a.db.db_path' for ALIASED database!
+        # It will return '.' rather than full path+filename.
+
+        repl_root_path = Path(db_main_file).parent
+        repl_jrn_sub_dir = repl_settings['journal_sub_dir']
+        repl_arc_sub_dir = repl_settings['archive_sub_dir']
+
+        for f in (db_main_file, db_repl_file):
+            # Method db.drop() changes LINGER to 0, issues 'delete from mon$att' with suppressing exceptions
+            # and calls 'db.drop_database()' (also with suppressing exceptions).
+            # We change DB state to FULL SHUTDOWN instead of call action.db.drop() because
+            # this is more reliable (it kills all attachments in all known cases and does not use mon$ table)
+            #
+            try:
+                srv.database.shutdown(database = f, mode = ShutdownMode.FULL, method = ShutdownMethod.FORCED, timeout = 0)
+            except DatabaseError as e:
+                out_reset += e.__str__()
+
+            # REMOVE db file from disk:
+            ###########################
+            os.unlink(f)
+
+        # Clean folders repl_journal and repl_archive: remove all files from there.
+        for p in (repl_jrn_sub_dir,repl_arc_sub_dir):
+            if cleanup_folder(repl_root_path / p) > 0:
+                out_reset += f"Directory {str(p)} remains non-empty.\n"
+
+    if out_reset == '':
+        for a in (act_db_main,act_db_repl):
+            d = a.db.db_path
+
+            try:
+                dbx = create_database(str(d), user = a.db.user)
+                dbx.close()
+                with a.connect_server() as srv:
+                    srv.database.set_write_mode(database = d, mode = DbWriteMode.ASYNC)
+                    srv.database.set_sweep_interval(database = d, interval = 0)
+                    if a == act_db_repl:
+                        srv.database.set_replica_mode(database = d, mode = ReplicaMode.READ_ONLY)
+                    else:
+                        with a.db.connect() as con:
+                            con.execute_immediate('alter database enable publication')
+                            con.execute_immediate('alter database include all to publication')
+                            con.commit()
+            except DatabaseError as e:
+                out_reset += e.__str__()
+            
+    # Must remain EMPTY:
+    return out_reset
+
+#--------------------------------------------
+
+def watch_replica( a: Action, max_allowed_time_for_wait, ddl_ready_query = '', isql_check_script = '', replica_expected_out = ''):
+
+    retcode = 1;
+    ready_to_check = False
+    if ddl_ready_query:
+        with a.db.connect(no_db_triggers = True) as con:
+            with con.cursor() as cur:
+                for i in range(0,max_allowed_time_for_wait):
+                    cur.execute(ddl_ready_query)
+                    count_actual = cur.fetchone()
+                    if count_actual:
+                        ready_to_check = True
+                        break
+                    else:
+                        con.rollback()
+                        time.sleep(1)
+    else:
+        ready_to_check = True
+
+    if not ready_to_check:
+        print( f'UNEXPECTED. Initial check query did not return any rows for {max_allowed_time_for_wait} seconds.' )
+        print('Initial check query:')
+        print(ddl_ready_query)
+        return
+    
+    final_check_pass = False
+    if isql_check_script:
+        retcode = 0
+        for i in range(max_allowed_time_for_wait):
+            a.reset()
+            a.expected_stdout = replica_expected_out
+            a.isql(switches=['-q', '-nod'], input = isql_check_script, combine_output = True)
+
+            if a.return_code:
+                # "Token unknown", "Name longer than database column size" etc: we have to
+                # immediately break from this loop because isql_check_script is incorrect!
+                break
+            
+            if a.clean_stdout == a.clean_expected_stdout:
+                final_check_pass = True
+                break
+            if i < max_allowed_time_for_wait-1:
+                time.sleep(1)
+
+        if not final_check_pass:
+            print(f'UNEXPECTED. Final check query did not return expected dataset for {max_allowed_time_for_wait} seconds.')
+            print('Final check query:')
+            print(isql_check_script)
+            print('Expected output:')
+            print(a.clean_expected_stdout)
+            print('Actual output:')
+            print(a.clean_stdout)
+            print(f'ISQL return_code={a.return_code}')
+            print(f'Waited for {i} seconds')
+
+        a.reset()
+
+    else:
+        final_check_pass = True
+
+    return
+
+#--------------------------------------------
+
+def drop_db_objects(act_db_main: Action,  act_db_repl: Action, capsys):
+
+    # return initial state of master DB:
+    # remove all DB objects (tables, views, ...):
+    #
+    db_main_meta, db_repl_meta = '', ''
+    for a in (act_db_main,act_db_repl):
+        if a == act_db_main:
+            sql_clean = (a.files_dir / 'drop-all-db-objects.sql').read_text()
+            a.expected_stdout = """
+                Start removing objects
+                Finish. Total objects removed
+            """
+            a.isql(switches=['-q', '-nod'], input = sql_clean, combine_output = True)
+
+            if a.clean_stdout == a.clean_expected_stdout:
+                a.reset()
+            else:
+                print(a.clean_expected_stdout)
+                a.reset()
+                break
+
+            # NB: one need to remember that rdb$system_flag can be NOT ONLY 1 for system used objects!
+            # For example, it has value =3 for triggers that are created to provide CHECK-constraints,
+            # Custom DB objects always have rdb$system_flag = 0 (or null for some very old databases).
+            # We can be sure that there are no custom DB objects if following query result is NON empty:
+            #
+            ddl_ready_query = """
+                select 1
+                from rdb$database
+                where NOT exists (
+                    select custom_db_object_flag
+                    from (
+                        select rt.rdb$system_flag as custom_db_object_flag from rdb$triggers rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$relations rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$functions rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$procedures rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$exceptions rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$fields rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$collations rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$generators rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$roles rt
+                        UNION ALL
+                        select rt.rdb$system_flag from rdb$auth_mapping rt
+                        UNION ALL
+                        select 1 from sec$users s
+                        where upper(s.sec$user_name) <> 'SYSDBA'
+                    ) t
+                    where coalesce(t.custom_db_object_flag,0) = 0
+                )
+            """
+
+
+            ##############################################################################
+            ###  W A I T   U N T I L    R E P L I C A    B E C O M E S   A C T U A L   ###
+            ##############################################################################
+            watch_replica( act_db_repl, MAX_TIME_FOR_WAIT_DATA_IN_REPLICA, ddl_ready_query)
+
+            # Must be EMPTY:
+            print(capsys.readouterr().out)
+
+            db_main_meta = a.extract_meta(charset = 'utf8', io_enc = 'utf8')
+        else:
+            db_repl_meta = a.extract_meta(charset = 'utf8', io_enc = 'utf8')
+
+        ######################
+        ### A C H T U N G  ###
+        ######################
+        # MANDATORY, OTHERWISE REPLICATION GETS STUCK ON SECOND RUN OF THIS TEST
+        # WITH 'ERROR: Record format with length NN is not found for table TEST':
+        a.gfix(switches=['-sweep', a.db.dsn])
+
+
+    # Final point: metadata must become equal:
+    #
+    diff_meta = ''.join(unified_diff( \
+                         [x for x in db_main_meta.splitlines() if 'CREATE DATABASE' not in x],
+                         [x for x in db_repl_meta.splitlines() if 'CREATE DATABASE' not in x])
+                       )
+    # Must be EMPTY:
+    print(diff_meta)
+
+#--------------------------------------------
+
+@pytest.mark.version('>=4.0.3')
+def test_1(act_db_main: Action,  act_db_repl: Action, db_main_nbk0: Path, db_main_nbk1: Path, capsys):
+
+    out_prep, out_main, out_drop = '', '', ''
+    
+    rdb_backup_history_init = []
+    rdb_backup_history_curr = []
+    rdb_bkp_sttm = 'select rb.rdb$guid as db_guid from rdb$backup_history rb'
+
+    # Obtain full path + filename for DB_MAIN and DB_REPL aliases.
+    # NOTE: we must NOT use 'a.db.db_path' for ALIASED databases!
+    # It will return '.' rather than full path+filename.
+    # Use only con.info.name for that!
+    #
+    db_info = {}
+    for a in (act_db_main, act_db_repl):
+        with a.db.connect(no_db_triggers = True) as con:
+            if a == act_db_main and a.vars['server-arch'] == 'Classic' and os.name != 'nt':
+                pytest.skip("Waiting for FIX: 'Engine is shutdown' in replication log for CS. Linux only.")
+            db_info[a,  'db_full_path'] = con.info.name
+
+
+    # Must be EMPTY:
+    out_prep = capsys.readouterr().out
+    if out_prep:
+        # Some problem raised during establishing connections to master/replica DB
+        pass
+    else:
+
+        with act_db_main.connect_server() as srv, \
+             act_db_main.db.connect() as con_main, \
+             act_db_repl.db.connect() as con_repl:
+
+            cur = con_repl.cursor()
+            cur.execute(rdb_bkp_sttm)
+            rdb_backup_history_init = []
+            for r in cur:
+                rdb_backup_history_init.append(r[0])
+
+            try:
+                
+                srv.database.nbackup(database = act_db_main.db.db_path, backup = db_main_nbk0, level=0)
+                srv.database.nbackup(database = act_db_main.db.db_path, backup = db_main_nbk1, level=1)
+
+                # Create 'signal' table to be checked on replica:
+                con_main.execute_immediate('recreate table t_completed(id int primary key)')
+                con_main.commit()
+
+            except DatabaseError as e:
+                print('UNEXPECTED STDERR during nbackup or running DDL:', e.__str__())
+
+        # Must be EMPTY:
+        out_prep = capsys.readouterr().out
+
+    if out_prep:
+        # Some problem raised during execution of initial SQL
+        pass
+    else:
+        # Query to be used for check that all DB objects present in replica (after last DML statement completed on master DB):
+        ddl_ready_query = "select 1 from rdb$relations where rdb$relation_name = upper('t_completed')"
+
+        ##############################################################################
+        ###  W A I T   U N T I L    R E P L I C A    B E C O M E S   A C T U A L   ###
+        ##############################################################################
+        watch_replica( act_db_repl, MAX_TIME_FOR_WAIT_DATA_IN_REPLICA, ddl_ready_query )
+
+        # Must be EMPTY:
+        out_main = capsys.readouterr().out
+
+    if out_main:
+        # Some problem raised during execution of watch_replica
+        pass
+    else:
+        # Main check: we have to query RDB$BACKUP_HISTORY on replica and compare it with initial content of that table
+        # that was queried at the start of test, see 'rdb_backup_history_init' above:
+        with act_db_repl.db.connect() as con_repl:
+            cur = con_repl.cursor()
+            cur.execute(rdb_bkp_sttm)
+            rdb_backup_history_curr = []
+            for r in cur:
+                rdb_backup_history_curr.append(r[0])
+
+            if Counter(rdb_backup_history_init) == Counter(rdb_backup_history_curr):
+                pass # OK: no new records appear in replica RDB$BACKUP_HISTORY table.
+            else:
+                print('UNEXPECTED record(s) appeared in replica RDB$BACKUP_HISTORY table:')
+                print( '\n'.join( list(set(rdb_backup_history_curr) - set(rdb_backup_history_init) ) ) )
+
+        # Must be EMPTY:
+        out_main = capsys.readouterr().out
+
+
+    drop_db_objects(act_db_main, act_db_repl, capsys)
+
+    # Must be EMPTY:
+    out_drop = capsys.readouterr().out
+
+    if [ x for x in (out_prep, out_main, out_drop) if x.strip() ]:
+        # We have a problem either with DDL/DML or with dropping DB objects.
+        # First, we have to RECREATE both master and slave databases
+        # (otherwise further execution of this test or other replication-related tests most likely will fail):
+        out_reset = reset_replication(act_db_main, act_db_repl, db_info[act_db_main,'db_full_path'], db_info[act_db_repl,'db_full_path'])
+
+        # Next, we display out_main, out_drop and out_reset:
+        #
+        print('Problem(s) detected:')
+        if out_prep.strip():
+            print('out_prep:\n', out_prep)
+        if out_main.strip():
+            print('out_main:\n', out_main)
+        if out_drop.strip():
+            print('out_drop:\n', out_drop)
+        if out_reset.strip():
+            print('out_reset:\n', out_reset)
+
+        assert '' == capsys.readouterr().out