From 872fd5ed13bf488a93e9aca09578b37823bb6b44 Mon Sep 17 00:00:00 2001 From: pavel-zotov Date: Sat, 21 Dec 2024 11:27:42 +0300 Subject: [PATCH] Added/Updated tests\bugs\gh_8057_test.py: Checked on 6.0.0.295-ef66a9b (22-mar-2024 13:48): all OK,every of checked system tables has non-zero statistics for its indices. --- tests/bugs/gh_8057_test.py | 470 +++++++++++++++++++++++++++++++++++++ 1 file changed, 470 insertions(+) create mode 100644 tests/bugs/gh_8057_test.py diff --git a/tests/bugs/gh_8057_test.py b/tests/bugs/gh_8057_test.py new file mode 100644 index 00000000..c1a797e9 --- /dev/null +++ b/tests/bugs/gh_8057_test.py @@ -0,0 +1,470 @@ +#coding:utf-8 + +""" +ID: issue-8057 +ISSUE: https://github.com/FirebirdSQL/firebird/issues/8057 +TITLE: Let optimizer automatically update empty index statistics for relatively small system tables +DESCRIPTION: + Only tables from list are checked (with additional filtering only those which have indices). + For each index of selected RDB tables we get name of its first field (i.e. started part of index key). + Then we construct query with INNER JOIN that involves such index to be included in execution plan. + For example, for table RDB$DEPENDENCIES with two indices: + CREATE INDEX RDB$INDEX_27 ON RDB$DEPENDENCIES (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE); + CREATE INDEX RDB$INDEX_28 ON RDB$DEPENDENCIES (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME); + - following queries will be generated: + ------------------------------------- + 1) Index for columns (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE) will be involved here: + select a.rdb$dependent_name, count(*) + from rdb$dependencies a + join rdb$dependencies b on a.rdb$dependent_name = b.rdb$dependent_name + where a.rdb$dependent_name = ? and a.rdb$dependent_name < b.rdb$dependent_name + group by 1 + + 2) Index for column (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME) will be involved here: + select a.rdb$depended_on_name, count(*) + from rdb$dependencies a + join rdb$dependencies b on a.rdb$depended_on_name = b.rdb$depended_on_name + where a.rdb$depended_on_name = ? and a.rdb$depended_on_name < b.rdb$depended_on_name + group by 1 + ------------------------------------- + + When every such query is prepared, optimizer must automatically update appropriate index statistics. + KEY NOTE: this will be done only when system table is not empty and has no more than 100 data pages, + see FB (see in Optimizer.cpp: "... (relPages->rel_data_pages > 0) && (relPages->rel_data_pages < 100)"). + + Because of need to have some data in RDB tables, we must run DDL which force these tables to be fulfilled. + For this purpose test creates DB objects for each object type (mapping, sequences etc). + Also, "NBACKUP -B ..." is called times in order to fulfill table RDB$BACKUP_HISTORY table. + + Then, we get IndexRoot page number for each of selected RDB tables and start to parse this page content. + During this parsing (see func 'parse_index_root_page') we get values of th statistics for each of starting + parts of compound index (or for single segment for usual index). We accumulate this data in rel_sel_map{}. + Finally, we check that all indices has NON-zero statistics: rel_sel_map{} must have NO items with zero values. +NOTES: + [21.12.2024] pzotov + Currently only FB 6.x has this feature (since 22-mar-2024 11:46). + Commit: https://github.com/FirebirdSQL/firebird/commit/ef66a9b4d803d5129a10350c54f00bc637c09b48 + + ::: ACHTUNG ::: Index statistics must be searched in the Index Root page rather than in RDB$INDICES! + Internals of misc FB page types can be found here: + https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html + + It is supposed that there are no expression-based indices for selected system tables (this case was not investigated). + + Confirmed ticket issue on 6.0.0.294-c353de4 (21-mar-2024 16:45): some of system tables remain with non-updated index statistics. + Checked on 6.0.0.295-ef66a9b (22-mar-2024 13:48): all OK,every of checked system tables has non-zero statistics for its indices. + Test executiuon time: ~8...10 seconds. + + Thanks to Vlad for explanations. +""" +import sys +import binascii +import struct +from typing import List +from pathlib import Path + +import pytest +from firebird.qa import * +from firebird.driver import DatabaseError, DbInfoCode + +import locale +import time + +EXPECTED_MSG = 'Expected: every checked system table has non-zero index statistics.' + +IRT_PAGE_TYPE = 6 +CHECKED_SYS_TABLES = """ + ( + 'RDB$AUTH_MAPPING' + ,'RDB$BACKUP_HISTORY' + ,'RDB$CHARACTER_SETS' + ,'RDB$CHECK_CONSTRAINTS' + ,'RDB$COLLATIONS' + ,'RDB$DEPENDENCIES' + ,'RDB$EXCEPTIONS' + ,'RDB$FIELDS' + ,'RDB$FIELD_DIMENSIONS' + ,'RDB$FILTERS' + ,'RDB$FORMATS' + ,'RDB$FUNCTIONS' + ,'RDB$FUNCTION_ARGUMENTS' + ,'RDB$GENERATORS' + ,'RDB$INDEX_SEGMENTS' + ,'RDB$INDICES' + ,'RDB$PACKAGES' + ,'RDB$PROCEDURES' + ,'RDB$PROCEDURE_PARAMETERS' + ,'RDB$PUBLICATIONS' + ,'RDB$PUBLICATION_TABLES' + ,'RDB$REF_CONSTRAINTS' + ,'RDB$RELATIONS' + ,'RDB$RELATION_CONSTRAINTS' + ,'RDB$RELATION_FIELDS' + ,'RDB$ROLES' + ,'RDB$SECURITY_CLASSES' + ,'RDB$TRIGGERS' + ,'RDB$TYPES' + ,'RDB$USER_PRIVILEGES' + ,'RDB$VIEW_RELATIONS' + ) +""" + +NBACKUP_RUNS_CNT = 10 +INIT_DB_OBJECTS_CNT = 30 + +# SQL script which will cause filling of RDB tables with some data: +INIT_DB_OBJECTS_SQL = f""" + alter database enable publication; + set term ^; + execute block as + declare n_obj_cnt smallint = {INIT_DB_OBJECTS_CNT}; + declare i int; + declare v_sttm varchar(8190); + begin + i = 0; + while (i < n_obj_cnt) do + begin + v_sttm = 'recreate sequence g_' || i; + execute statement v_sttm; + v_sttm = 'create or alter mapping local_map_' || i || ' using any plugin from group musicians to role guitarist'; + execute statement v_sttm; + + begin + v_sttm = 'drop collation name_coll_' || i; + execute statement v_sttm; + when any do + begin + end + end + + begin + v_sttm = 'drop role r_manager_' || i; + execute statement v_sttm; + when any do + begin + end + end + + begin + v_sttm = 'drop table tbl_' || i; + execute statement v_sttm; + when any do + begin + end + end + + begin + v_sttm = 'drop domain dm_' || i; + execute statement v_sttm; + when any do + begin + end + end + + begin + v_sttm = 'drop filter jpg_' || i; + execute statement v_sttm; + when any do + begin + end + end + + -- examples\api\api9f.sql + -- declare filter desc_filter_01 -- ==> will be saved in indexed column rdb$filters.rdb$function_name + -- input_type 1 + -- output_type -4 + -- entry_point 'desc_filter' + -- module_name 'api9f' + -- ; + + v_sttm = 'declare filter jpg_' || i || ' input_type ' || i || ' output_type -4 entry_point ''desc_filter'' module_name ''api9f'''; + execute statement v_sttm; + + + v_sttm = 'create collation name_coll_' || i || ' for utf8 from unicode case insensitive'; + execute statement v_sttm; + + v_sttm = 'recreate exception exc_' || i || ' ''missing element with index @1'''; + execute statement v_sttm; + + v_sttm = 'create role r_manager_' || i; + execute statement v_sttm; + + v_sttm = 'create domain dm_' || i || ' as int not null check(value > 0)'; + execute statement v_sttm; + + v_sttm = 'recreate table tbl_' || i + || '( id int generated by default as identity constraint pk_tbl_'|| i || ' primary key' + || ', pid int' + || ', f_0 dm_' || i + || ', f_01 int' + || ', f_02 int[3,4]' + || ', constraint fktbl_' || i || ' foreign key(pid) references tbl_' || i || '(id)' + || ', constraint chk_tbl_' || i || ' check (f_01 > 0)' + || ')' + ; + execute statement v_sttm; + + v_sttm = 'recreate trigger trg_' || i || '_bi for tbl_' || i + || ' active before insert as' + || ' begin' + || ' end' + ; + execute statement v_sttm; + + execute statement 'alter database include table tbl_' || i || ' to publication'; + + + v_sttm = 'recreate view vew_' || i || ' as select 1 x from rdb$database'; + execute statement v_sttm; + + v_sttm = 'create or alter procedure sp_' || i || '(a_0 int, a_1 varchar(10)) as begin end'; + execute statement v_sttm; + + v_sttm = 'create or alter function fn_' || i || '(a_0 int, a_1 int) returns int as begin return a_0 + a_1; end'; + execute statement v_sttm; + + v_sttm = 'create or alter package pg_' || i || ' as begin function pg_fn() returns int; end'; + execute statement v_sttm; + + v_sttm = 'recreate package body pg_' || i || ' as begin function pg_fn() returns int as begin return 1; end end'; + execute statement v_sttm; + + i = i + 1; + end + end + ^ + ------------------------------------ + set term ;^ + commit; +""" + +db = db_factory(init = INIT_DB_OBJECTS_SQL) +act = python_act('db', substitutions = [('[ \t]+', ' ')]) +tmp_nbk_lst = temp_files( [ f'tmp_8057.{i}.nbk' for i in range(NBACKUP_RUNS_CNT) ] ) + +#----------------------------------------------------------------------- + +def parse_index_root_page(db_file, pg_size, rel_name, irt_page_number, rel_sel_map, verbose = False): + + # rel_sel_map -- byref + min_irtd_selec = sys.float_info.max + + with open( db_file, "rb") as db_handle: + db_handle.seek( irt_page_number * pg_size ) + page_content = db_handle.read( pg_size ) + page_as_hex=binascii.hexlify( page_content ) + if verbose: + print(f'{pg_size=}, page_as_hex:') + print(page_as_hex.decode("utf-8")) + + # https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html#fbint-page-6 + # https://docs.python.org/3/library/struct.html#format-characters + + # Two bytes, UNsigned. Offset 0x10 on the page. The relation id. This is the value of RDB$RELATIONS.RDB$RELATION_ID. + irt_relation = struct.unpack_from('@H', page_content[0x10:0x12])[0] # (128,) --> 128 + + # Two bytes, UNsigned. Offset 0x12 on the page. The number of indices defined for this table. + irt_count = struct.unpack_from('@H', page_content[0x12:0x14])[0] + + if verbose: + print(f'{irt_relation=}, {rel_name.strip()=}, {irt_count=}') + + for i in range(irt_count): + # 4+4+2+1+1 = 12 + + irt_root_offset_i = i * 12 + int(0x14) + irt_tran_offset_i = i * 12 + int(0x18) + irt_desc_offset_i = i * 12 + int(0x1c) + irt_keys_offset_i = i * 12 + int(0x1e) + irt_flags_offset_i = i * 12 + int(0x1f) + + # Four bytes, SIGNED. Offset 0x00 in each descriptor array entry. + # This field is the page number where the root page for the individual index (page type 0x07) is located. + irt_root_i = struct.unpack_from('@i', page_content[irt_root_offset_i : irt_root_offset_i + 4])[0] + + # Four bytes, SIGNED. Offset 0x04 in each descriptor array entry. + # Normally this field will be zero but if an index is in the process of being created, the transaction id will be found here. + irt_tran_i = struct.unpack_from('@i', page_content[irt_tran_offset_i : irt_tran_offset_i + 4])[0] + + # Two bytes, UNsigned. Offset 0x08 in each descriptor array entry. This field holds the offset, from the start of the page, + # to the index field descriptors which are located at the bottom end (ie, highest addresses) of the page. + # To calculate the starting address, add the value in this field to the address of the start of the page. + irt_desc_i = struct.unpack_from('@H', page_content[irt_desc_offset_i : irt_desc_offset_i + 2])[0] + + # One byte, UNsigned. This defines the number of keys (columns) in this index. + irt_keys_i = struct.unpack_from('@B', page_content[ irt_keys_offset_i : irt_keys_offset_i + 1])[0] + + # One byte, UNsigned. The flags define various attributes for this index, these are encoded into various bits in the field, as follows: + # See src/jrd/btr.h + # Bit 0 : Index is unique (set) or not (unset). + # Bit 1 : Index is descending (set) or ascending (unset). + # Bit 2 : Index [creation?] is in progress (set) or not (unset). + # Bit 3 : Index is a foreign key index (set) or not (unset). + # Bit 4 : Index is a primary key index (set) or not (unset). + # Bit 5 : Index is expression based (set) or not (unset). + # Bit 6 : Index is conditional + irt_flags_i = struct.unpack_from('@B', page_content[ irt_flags_offset_i : irt_flags_offset_i + 1])[0] + + if verbose: + print(f' {i=} ::: {irt_root_i=}, {irt_tran_i=}, {irt_desc_i=}, {irt_keys_i=}, {irt_flags_i=}, bin(irt_flags_i)=','{0:08b}'.format(irt_flags_i)) + + for j in range(irt_keys_i): + # 2+2+4 = 8 + # Two bytes, UNsigned. Offset 0x00 in each field descriptor. This field defines the field number of the table that makes up 'this' field in the index. + # This number is equivalent to RDB$RELATION_FIELDS.RDB$FIELD_ID. + irtd_field = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i : j*8 + irt_desc_i + 2])[0] + #print(f' column: {j} ::: {irtd_field=}') + + # Two bytes, UNsigned. Offset 0x02 in each field descriptor. This determines the data type of the appropriate field in the index. + irtd_itype = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i + 2 : j*8 + irt_desc_i + 4])[0] + #print(f' {irtd_itype=}') + + # Four bytes, floating point format. Offset 0x04 in each field descriptor. This field holds the selectivity of this particular column in the index. + irtd_selec = struct.unpack_from('@f', page_content[ j*8 + irt_desc_i + 4 : j*8 + irt_desc_i + 8])[0] + min_irtd_selec = min(min_irtd_selec, irtd_selec) + #print(f' {irtd_selec=}') + + # Input, byRef: + rel_sel_map[rel_name.strip()] = min_irtd_selec + +#----------------------------------------------------------------------------- + +def replace_leading(source, char="."): + stripped = source.lstrip() + return char * (len(source) - len(stripped)) + stripped + +#----------------------------------------------------------------------- + +@pytest.mark.version('>=6.0') +def test_1(act: Action, tmp_nbk_lst: List[Path], capsys): + + try: + for i,tmp_nbk_i in enumerate(tmp_nbk_lst): + act.expected_stderr = '' + tmp_nbk_i.unlink(missing_ok = True) + act.nbackup(switches=['-b', str(i), act.db.dsn, tmp_nbk_i], io_enc = locale.getpreferredencoding()) + assert act.clean_stderr == act.clean_expected_stderr + act.reset() + except DatabaseError as e: + print(e.__str__()) + + #----------------------------------------------------------- + sql_get_sys_tables_index_info = f""" + with recursive r + as ( + select + rr.rdb$relation_id as rel_id + ,rr.rdb$relation_name rel_name + ,ri.rdb$index_id-1 as idx_id + ,ri.rdb$index_name idx_name + ,ri.rdb$segment_count seg_cnt + ,rs.rdb$field_position fpos + ,rs.rdb$field_name fname + ,',' || cast( trim(rs.rdb$field_name) as varchar(8190) ) as idx_key + ,sign(octet_length(ri.rdb$expression_blr)) as idx_on_expr + from rdb$relations rr + join rdb$indices ri on rr.rdb$relation_name = ri.rdb$relation_name + join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name + where + ri.rdb$system_flag = 1 + and ri.rdb$relation_name in {CHECKED_SYS_TABLES} + and ri.rdb$index_inactive is distinct from 1 + and (rs.rdb$field_position = 0 or ri.rdb$expression_blr is not null) + + union all + + select + r.rel_id + ,r.rel_name + ,ri.rdb$index_id-1 + ,ri.rdb$index_name + ,r.seg_cnt + ,rs.rdb$field_position + ,rs.rdb$field_name fname + ,r.idx_key || ',' || trim(rs.rdb$field_name ) + ,r.idx_on_expr + from rdb$indices ri + join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name + join r on ri.rdb$relation_name = r.rel_name + and ri.rdb$index_name = r.idx_name + and (rs.rdb$field_position = r.fpos+1 or r.idx_on_expr = 1) + ) + --select * from r + + ,m as ( + select + rel_id + ,rel_name + ,idx_id + ,idx_name + ,iif(idx_on_expr = 1, '', substring(idx_key from 2)) as idx_key + from r + where fpos = seg_cnt-1 or idx_on_expr = 1 + ) + select m.rel_id, m.rel_name, m.idx_id, m.idx_name, m.idx_key, p.rdb$page_number as irt_page + from m + join rdb$pages p on m.rel_id = p.rdb$relation_id and p.rdb$page_type = {IRT_PAGE_TYPE} + order by rel_id, idx_id + """ + + with act.db.connect() as con: + cur = con.cursor() + cur.execute(sql_get_sys_tables_index_info) + rel_irt_map = {} + for r in cur: + rel_id, rel_name, idx_id, idx_name, idx_key, irt_page = r[:6] + idx_starting_fld = idx_key.split(",")[0] + + #print(f'{rel_id=}, {rel_name.strip()=}, {irt_page=}') + #print(f'{idx_id=}, {idx_name.strip()=}') + #print(f'{idx_key=}, starting field: {idx_starting_fld}') + + sql_to_make_recalc_idx_stat = f""" + select a.{idx_starting_fld}, count(*) + from {rel_name.strip()} a + join {rel_name.strip()} b on a.{idx_starting_fld} = b.{idx_starting_fld} + where a.{idx_starting_fld} = ? and a.{idx_starting_fld} < b.{idx_starting_fld} + group by 1 + """ + + #print(f'{sql_to_make_recalc_idx_stat=}') + ps = None + try: + # This must cause update index statistics of : + ps = cur.prepare(sql_to_make_recalc_idx_stat) + + # Print explained plan with padding eash line by dots in order to see indentations: + #print( '\n'.join([replace_leading(s) for s in ps.detailed_plan.split('\n')]) ) + #print('') + except DatabaseError as e: + print(e.__str__()) + print(e.gds_codes) + finally: + if ps: + ps.free() + + rel_irt_map[ rel_id ] = (rel_name, irt_page) + + rel_sel_map = {} # K = rel_name, V = minimal selectivity (i.e. min irtd_selec for all indices) + + # NB: we have to re-connect in order to see updated indices statistics! + with act.db.connect() as con: + cur = con.cursor() + + for rel_id, (rel_name, irt_no) in rel_irt_map.items(): + parse_index_root_page(act.db.db_path, con.info.page_size, rel_name, irt_no, rel_sel_map, verbose = False) + + if min(rel_sel_map.values()) > 0: + print(EXPECTED_MSG) + else: + print('UNEXPECTED: AT LEAST ONE OF SYSTEM TABLES HAS ZERO INDEX STATISTICS') + for rel_name, min_idx_selectivity in rel_sel_map.items(): + print(f'{rel_name=}, {min_idx_selectivity=}') + + act.expected_stdout = f""" + {EXPECTED_MSG} + """ + + act.stdout = capsys.readouterr().out + assert act.clean_stdout == act.clean_expected_stdout