From 872fd5ed13bf488a93e9aca09578b37823bb6b44 Mon Sep 17 00:00:00 2001
From: pavel-zotov <p519446@yandex.ru>
Date: Sat, 21 Dec 2024 11:27:42 +0300
Subject: [PATCH] Added/Updated tests\bugs\gh_8057_test.py: Checked on
 6.0.0.295-ef66a9b (22-mar-2024 13:48): all OK,every of checked system tables
 has non-zero statistics for its indices.

---
 tests/bugs/gh_8057_test.py | 470 +++++++++++++++++++++++++++++++++++++
 1 file changed, 470 insertions(+)
 create mode 100644 tests/bugs/gh_8057_test.py
diff --git a/tests/bugs/gh_8057_test.py b/tests/bugs/gh_8057_test.py
new file mode 100644
index 00000000..c1a797e9
--- /dev/null
+++ b/tests/bugs/gh_8057_test.py
@@ -0,0 +1,470 @@
+#coding:utf-8
+
+"""
+ID:          issue-8057
+ISSUE:       https://github.com/FirebirdSQL/firebird/issues/8057
+TITLE:       Let optimizer automatically update empty index statistics for relatively small system tables
+DESCRIPTION:
+    Only tables from <CHECKED_SYS_TABLES> list are checked (with additional filtering only those which have indices).
+    For each index of selected RDB tables we get name of its first field (i.e. started part of index key).
+    Then we construct query with INNER JOIN that involves such index to be included in execution plan.
+    For example, for table RDB$DEPENDENCIES with two indices:
+        CREATE INDEX RDB$INDEX_27 ON RDB$DEPENDENCIES (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE);
+        CREATE INDEX RDB$INDEX_28 ON RDB$DEPENDENCIES (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME);
+    - following queries will be generated:
+    -------------------------------------
+        1) Index for columns (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE) will be involved here:
+           select a.rdb$dependent_name, count(*)
+           from rdb$dependencies a
+           join rdb$dependencies b on a.rdb$dependent_name = b.rdb$dependent_name
+           where a.rdb$dependent_name = ? and a.rdb$dependent_name < b.rdb$dependent_name
+           group by 1
+
+        2) Index for column (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME) will be involved here: 
+            select a.rdb$depended_on_name, count(*)
+            from rdb$dependencies a
+            join rdb$dependencies b on a.rdb$depended_on_name = b.rdb$depended_on_name
+            where a.rdb$depended_on_name = ? and a.rdb$depended_on_name < b.rdb$depended_on_name
+            group by 1
+    -------------------------------------
+
+    When every such query is prepared, optimizer must automatically update appropriate index statistics.
+    KEY NOTE: this will be done only when system table is not empty and has no more than 100 data pages,
+    see FB (see in Optimizer.cpp: "... (relPages->rel_data_pages > 0) && (relPages->rel_data_pages < 100)").
+
+    Because of need to have some data in RDB tables, we must run DDL which force these tables to be fulfilled.
+    For this purpose test creates <INIT_DB_OBJECTS_CNT> DB objects for each object type (mapping, sequences etc).
+    Also, "NBACKUP -B ..." is called <NBACKUP_RUNS_CNT> times in order to fulfill table RDB$BACKUP_HISTORY table.
+
+    Then, we get IndexRoot page number for each of selected RDB tables and start to parse this page content.
+    During this parsing (see func 'parse_index_root_page') we get values of <K>th statistics for each of <N> starting
+    parts of compound index (or for single segment for usual index). We accumulate this data in rel_sel_map{}.
+    Finally, we check that all indices has NON-zero statistics: rel_sel_map{} must have NO items with zero values.
+NOTES:
+    [21.12.2024] pzotov
+    Currently only FB 6.x has this feature (since 22-mar-2024 11:46).
+    Commit:  https://github.com/FirebirdSQL/firebird/commit/ef66a9b4d803d5129a10350c54f00bc637c09b48
+
+    ::: ACHTUNG ::: Index statistics must be searched in the Index Root page rather than in RDB$INDICES!
+    Internals of misc FB page types can be found here:
+    https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html
+    
+    It is supposed that there are no expression-based indices for selected system tables (this case was not investigated).
+
+    Confirmed ticket issue on 6.0.0.294-c353de4 (21-mar-2024 16:45): some of system tables remain with non-updated index statistics.
+    Checked on 6.0.0.295-ef66a9b (22-mar-2024 13:48): all OK,every of checked system tables has non-zero statistics for its indices.
+    Test executiuon time: ~8...10 seconds.
+
+    Thanks to Vlad for explanations.
+"""
+import sys
+import binascii
+import struct
+from typing import List
+from pathlib import Path
+
+import pytest
+from firebird.qa import *
+from firebird.driver import DatabaseError, DbInfoCode
+
+import locale
+import time
+
+EXPECTED_MSG = 'Expected: every checked system table has non-zero index statistics.'
+
+IRT_PAGE_TYPE = 6
+CHECKED_SYS_TABLES = """
+   (
+        'RDB$AUTH_MAPPING'
+       ,'RDB$BACKUP_HISTORY'
+       ,'RDB$CHARACTER_SETS'
+       ,'RDB$CHECK_CONSTRAINTS'
+       ,'RDB$COLLATIONS'
+       ,'RDB$DEPENDENCIES'
+       ,'RDB$EXCEPTIONS'
+       ,'RDB$FIELDS'
+       ,'RDB$FIELD_DIMENSIONS'
+       ,'RDB$FILTERS'
+       ,'RDB$FORMATS'
+       ,'RDB$FUNCTIONS'
+       ,'RDB$FUNCTION_ARGUMENTS'
+       ,'RDB$GENERATORS'
+       ,'RDB$INDEX_SEGMENTS'
+       ,'RDB$INDICES'
+       ,'RDB$PACKAGES'
+       ,'RDB$PROCEDURES'
+       ,'RDB$PROCEDURE_PARAMETERS'
+       ,'RDB$PUBLICATIONS'
+       ,'RDB$PUBLICATION_TABLES'
+       ,'RDB$REF_CONSTRAINTS'
+       ,'RDB$RELATIONS'
+       ,'RDB$RELATION_CONSTRAINTS'
+       ,'RDB$RELATION_FIELDS'
+       ,'RDB$ROLES'
+       ,'RDB$SECURITY_CLASSES'
+       ,'RDB$TRIGGERS'
+       ,'RDB$TYPES'
+       ,'RDB$USER_PRIVILEGES'
+       ,'RDB$VIEW_RELATIONS'
+   )
+"""
+
+NBACKUP_RUNS_CNT = 10
+INIT_DB_OBJECTS_CNT = 30
+
+# SQL script which will cause filling of RDB tables with some data:
+INIT_DB_OBJECTS_SQL = f"""
+    alter database enable publication;
+    set term ^;
+    execute block as
+        declare n_obj_cnt smallint = {INIT_DB_OBJECTS_CNT};
+        declare i int;
+        declare v_sttm varchar(8190);
+    begin
+        i = 0;
+        while (i < n_obj_cnt) do
+        begin
+            v_sttm = 'recreate sequence g_' || i;
+            execute statement v_sttm;
+            v_sttm = 'create or alter mapping local_map_' || i || ' using any plugin from group musicians to role guitarist';
+            execute statement v_sttm;
+
+            begin
+                v_sttm = 'drop collation name_coll_' || i;
+                execute statement v_sttm;
+                when any do
+                begin
+                end
+            end
+
+            begin
+                v_sttm = 'drop role r_manager_' || i;
+                execute statement v_sttm;
+                when any do
+                begin
+                end
+            end
+
+            begin
+                v_sttm = 'drop table tbl_' || i;
+                execute statement v_sttm;
+                when any do
+                begin
+                end
+            end
+
+            begin
+                v_sttm = 'drop domain dm_' || i;
+                execute statement v_sttm;
+                when any do
+                begin
+                end
+            end
+
+            begin
+                v_sttm = 'drop filter jpg_' || i;
+                execute statement v_sttm;
+                when any do
+                begin
+                end
+            end
+
+            -- examples\api\api9f.sql
+            -- declare filter desc_filter_01 -- ==> will be saved in indexed column rdb$filters.rdb$function_name
+            -- input_type 1
+            -- output_type -4
+            -- entry_point 'desc_filter'
+            -- module_name 'api9f'
+            -- ;
+            
+            v_sttm = 'declare filter jpg_' || i || ' input_type ' || i || ' output_type -4 entry_point ''desc_filter'' module_name ''api9f''';
+            execute statement v_sttm;
+
+
+            v_sttm = 'create collation name_coll_' || i || ' for utf8 from unicode case insensitive';
+            execute statement v_sttm;
+
+            v_sttm = 'recreate exception exc_' || i || ' ''missing element with index @1''';
+            execute statement v_sttm;
+
+            v_sttm = 'create role r_manager_' || i;
+            execute statement v_sttm;
+
+            v_sttm = 'create domain dm_' || i || ' as int not null check(value > 0)';
+            execute statement v_sttm;
+
+            v_sttm = 'recreate table tbl_' || i
+                     || '( id int generated by default as identity constraint pk_tbl_'|| i  || ' primary key'
+                     || ', pid int'
+                     || ', f_0 dm_' || i
+                     || ', f_01 int'
+                     || ', f_02 int[3,4]'
+                     || ', constraint fktbl_' || i || ' foreign key(pid) references tbl_' || i || '(id)'
+                     || ', constraint chk_tbl_' || i || ' check (f_01 > 0)'
+                     || ')'
+            ;
+            execute statement v_sttm;
+
+            v_sttm = 'recreate trigger trg_' || i || '_bi for tbl_' || i
+                     || ' active before insert as'
+                     || ' begin'
+                     || ' end'
+            ;
+            execute statement v_sttm;
+
+            execute statement 'alter database include table tbl_' || i || ' to publication';
+
+
+            v_sttm = 'recreate view vew_' || i || ' as select 1 x from rdb$database';
+            execute statement v_sttm;
+
+            v_sttm = 'create or alter procedure sp_' || i || '(a_0 int, a_1 varchar(10)) as begin end';
+            execute statement v_sttm;
+
+            v_sttm = 'create or alter function fn_' || i || '(a_0 int, a_1 int) returns int as begin return a_0 + a_1; end';
+            execute statement v_sttm;
+
+            v_sttm = 'create or alter package pg_' || i || ' as begin function pg_fn() returns int; end';
+            execute statement v_sttm;
+
+            v_sttm = 'recreate package body pg_' || i || ' as begin function pg_fn() returns int as begin return 1; end end';
+            execute statement v_sttm;
+
+            i = i + 1;
+        end
+    end
+    ^
+    ------------------------------------
+    set term ;^
+    commit;
+"""
+
+db = db_factory(init = INIT_DB_OBJECTS_SQL)
+act = python_act('db', substitutions = [('[ \t]+', ' ')])
+tmp_nbk_lst = temp_files( [ f'tmp_8057.{i}.nbk' for i in range(NBACKUP_RUNS_CNT) ] )
+
+#-----------------------------------------------------------------------
+
+def parse_index_root_page(db_file, pg_size, rel_name, irt_page_number, rel_sel_map, verbose = False):
+    
+    # rel_sel_map -- byref
+    min_irtd_selec = sys.float_info.max
+
+    with open( db_file, "rb") as db_handle:
+        db_handle.seek( irt_page_number * pg_size )
+        page_content = db_handle.read( pg_size )
+        page_as_hex=binascii.hexlify( page_content )
+        if verbose:
+            print(f'{pg_size=}, page_as_hex:')
+            print(page_as_hex.decode("utf-8"))
+
+        # https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html#fbint-page-6
+        # https://docs.python.org/3/library/struct.html#format-characters
+
+        # Two bytes, UNsigned. Offset 0x10 on the page. The relation id. This is the value of RDB$RELATIONS.RDB$RELATION_ID.
+        irt_relation = struct.unpack_from('@H', page_content[0x10:0x12])[0] # (128,) --> 128
+
+        # Two bytes, UNsigned. Offset 0x12 on the page. The number of indices defined for this table. 
+        irt_count = struct.unpack_from('@H', page_content[0x12:0x14])[0]
+
+        if verbose:
+            print(f'{irt_relation=}, {rel_name.strip()=}, {irt_count=}')
+
+        for i in range(irt_count):
+            # 4+4+2+1+1 = 12
+
+            irt_root_offset_i = i * 12 + int(0x14)
+            irt_tran_offset_i = i * 12 + int(0x18)
+            irt_desc_offset_i = i * 12 + int(0x1c)
+            irt_keys_offset_i = i * 12 + int(0x1e)
+            irt_flags_offset_i = i * 12 + int(0x1f)
+
+            # Four bytes, SIGNED. Offset 0x00 in each descriptor array entry.
+            # This field is the page number where the root page for the individual index (page type 0x07) is located.
+            irt_root_i = struct.unpack_from('@i', page_content[irt_root_offset_i : irt_root_offset_i + 4])[0]
+
+            # Four bytes, SIGNED. Offset 0x04 in each descriptor array entry.
+            # Normally this field will be zero but if an index is in the process of being created, the transaction id will be found here.
+            irt_tran_i = struct.unpack_from('@i', page_content[irt_tran_offset_i : irt_tran_offset_i + 4])[0]
+
+            # Two bytes, UNsigned. Offset 0x08 in each descriptor array entry. This field holds the offset, from the start of the page,
+            # to the index field descriptors which are located at the bottom end (ie, highest addresses) of the page.
+            # To calculate the starting address, add the value in this field to the address of the start of the page.
+            irt_desc_i = struct.unpack_from('@H', page_content[irt_desc_offset_i : irt_desc_offset_i + 2])[0]
+
+            # One byte, UNsigned. This defines the number of keys (columns) in this index.
+            irt_keys_i = struct.unpack_from('@B', page_content[ irt_keys_offset_i : irt_keys_offset_i + 1])[0]
+
+            # One byte, UNsigned. The flags define various attributes for this index, these are encoded into various bits in the field, as follows:
+            # See src/jrd/btr.h
+            #   Bit 0 : Index is unique (set) or not (unset).
+            #   Bit 1 : Index is descending (set) or ascending (unset).
+            #   Bit 2 : Index [creation?] is in progress (set) or not (unset).
+            #   Bit 3 : Index is a foreign key index (set) or not (unset).
+            #   Bit 4 : Index is a primary key index (set) or not (unset).
+            #   Bit 5 : Index is expression based (set) or not (unset).
+            #   Bit 6 : Index is conditional
+            irt_flags_i = struct.unpack_from('@B', page_content[ irt_flags_offset_i : irt_flags_offset_i + 1])[0]
+
+            if verbose:
+                print(f'  {i=} ::: {irt_root_i=}, {irt_tran_i=}, {irt_desc_i=}, {irt_keys_i=}, {irt_flags_i=}, bin(irt_flags_i)=','{0:08b}'.format(irt_flags_i))
+
+            for j in range(irt_keys_i):
+                # 2+2+4 = 8
+                # Two bytes, UNsigned. Offset 0x00 in each field descriptor. This field defines the field number of the table that makes up 'this' field in the index.
+                # This number is equivalent to RDB$RELATION_FIELDS.RDB$FIELD_ID.
+                irtd_field = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i : j*8 + irt_desc_i + 2])[0]
+                #print(f'       column: {j} ::: {irtd_field=}')
+
+                # Two bytes, UNsigned. Offset 0x02 in each field descriptor. This determines the data type of the appropriate field in the index.
+                irtd_itype = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i + 2 : j*8 + irt_desc_i + 4])[0]
+                #print(f'                       {irtd_itype=}')
+
+                # Four bytes, floating point format. Offset 0x04 in each field descriptor. This field holds the selectivity of this particular column in the index. 
+                irtd_selec = struct.unpack_from('@f', page_content[ j*8 + irt_desc_i + 4 : j*8 + irt_desc_i + 8])[0]
+                min_irtd_selec = min(min_irtd_selec, irtd_selec)
+                #print(f'                       {irtd_selec=}')
+
+            # Input, byRef:
+            rel_sel_map[rel_name.strip()] = min_irtd_selec
+
+#-----------------------------------------------------------------------------
+
+def replace_leading(source, char="."):
+    stripped = source.lstrip()
+    return char * (len(source) - len(stripped)) + stripped
+
+#-----------------------------------------------------------------------
+
+@pytest.mark.version('>=6.0')
+def test_1(act: Action, tmp_nbk_lst: List[Path], capsys):
+
+    try:
+        for i,tmp_nbk_i in enumerate(tmp_nbk_lst):
+            act.expected_stderr = ''
+            tmp_nbk_i.unlink(missing_ok = True)
+            act.nbackup(switches=['-b', str(i), act.db.dsn, tmp_nbk_i], io_enc = locale.getpreferredencoding())
+            assert act.clean_stderr == act.clean_expected_stderr
+            act.reset()
+    except DatabaseError as e:
+        print(e.__str__())
+    
+    #-----------------------------------------------------------
+    sql_get_sys_tables_index_info = f"""
+        with recursive r
+        as (
+                select
+                     rr.rdb$relation_id as rel_id
+                    ,rr.rdb$relation_name rel_name
+                    ,ri.rdb$index_id-1 as idx_id
+                    ,ri.rdb$index_name idx_name
+                    ,ri.rdb$segment_count seg_cnt
+                    ,rs.rdb$field_position fpos
+                    ,rs.rdb$field_name fname
+                    ,',' || cast( trim(rs.rdb$field_name) as varchar(8190) ) as idx_key
+                    ,sign(octet_length(ri.rdb$expression_blr)) as idx_on_expr
+                from rdb$relations rr
+                join rdb$indices ri on rr.rdb$relation_name = ri.rdb$relation_name
+                join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name
+                where
+                    ri.rdb$system_flag = 1
+                    and ri.rdb$relation_name in {CHECKED_SYS_TABLES}
+                    and ri.rdb$index_inactive is distinct from 1
+                    and (rs.rdb$field_position = 0 or ri.rdb$expression_blr is not null)
+
+                union all
+
+                select
+                     r.rel_id
+                    ,r.rel_name
+                    ,ri.rdb$index_id-1
+                    ,ri.rdb$index_name
+                    ,r.seg_cnt
+                    ,rs.rdb$field_position
+                    ,rs.rdb$field_name fname
+                    ,r.idx_key || ',' || trim(rs.rdb$field_name )
+                    ,r.idx_on_expr
+                from rdb$indices ri
+                join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name
+                join r on ri.rdb$relation_name = r.rel_name
+                and ri.rdb$index_name = r.idx_name
+                and (rs.rdb$field_position = r.fpos+1 or r.idx_on_expr = 1)
+        )
+        --select * from r
+
+        ,m as (
+            select
+                 rel_id
+                ,rel_name
+                ,idx_id
+                ,idx_name
+                ,iif(idx_on_expr = 1, '<expr>', substring(idx_key from 2)) as idx_key
+            from r
+            where fpos = seg_cnt-1 or idx_on_expr = 1
+        )
+        select m.rel_id, m.rel_name, m.idx_id, m.idx_name, m.idx_key, p.rdb$page_number as irt_page
+        from m
+        join rdb$pages p on m.rel_id = p.rdb$relation_id and p.rdb$page_type = {IRT_PAGE_TYPE}
+        order by rel_id, idx_id
+    """
+
+    with act.db.connect() as con:
+        cur = con.cursor()
+        cur.execute(sql_get_sys_tables_index_info)
+        rel_irt_map = {}
+        for r in cur:
+            rel_id, rel_name, idx_id, idx_name, idx_key, irt_page = r[:6]
+            idx_starting_fld = idx_key.split(",")[0]
+
+            #print(f'{rel_id=}, {rel_name.strip()=}, {irt_page=}')
+            #print(f'{idx_id=}, {idx_name.strip()=}')
+            #print(f'{idx_key=}, starting field: {idx_starting_fld}')
+
+            sql_to_make_recalc_idx_stat = f"""
+                select a.{idx_starting_fld}, count(*)
+                from {rel_name.strip()} a
+                join {rel_name.strip()} b on a.{idx_starting_fld} = b.{idx_starting_fld}
+                where a.{idx_starting_fld} = ? and a.{idx_starting_fld} < b.{idx_starting_fld}
+                group by 1
+            """
+
+            #print(f'{sql_to_make_recalc_idx_stat=}')
+            ps = None
+            try:
+                # This must cause update index statistics of <idx_name>:
+                ps = cur.prepare(sql_to_make_recalc_idx_stat)
+
+                # Print explained plan with padding eash line by dots in order to see indentations:
+                #print( '\n'.join([replace_leading(s) for s in ps.detailed_plan.split('\n')]) )
+                #print('')
+            except DatabaseError as e:
+                print(e.__str__())
+                print(e.gds_codes)
+            finally:
+                if ps:
+                    ps.free()
+
+            rel_irt_map[ rel_id ] = (rel_name, irt_page)
+
+    rel_sel_map = {} # K = rel_name, V = minimal selectivity (i.e. min irtd_selec for all indices)
+
+    # NB: we have to re-connect in order to see updated indices statistics!
+    with act.db.connect() as con:
+        cur = con.cursor()
+
+        for rel_id, (rel_name, irt_no) in rel_irt_map.items():
+            parse_index_root_page(act.db.db_path, con.info.page_size, rel_name, irt_no, rel_sel_map, verbose = False)
+    
+    if min(rel_sel_map.values()) > 0:
+        print(EXPECTED_MSG)
+    else:
+        print('UNEXPECTED: AT LEAST ONE OF SYSTEM TABLES HAS ZERO INDEX STATISTICS')
+        for rel_name, min_idx_selectivity in rel_sel_map.items():
+            print(f'{rel_name=}, {min_idx_selectivity=}')
+
+    act.expected_stdout = f"""
+        {EXPECTED_MSG}
+    """
+
+    act.stdout = capsys.readouterr().out
+    assert act.clean_stdout == act.clean_expected_stdout