mirror of
https://github.com/FirebirdSQL/firebird-qa.git
synced 2025-01-22 13:33:07 +01:00
471 lines
20 KiB
Python
471 lines
20 KiB
Python
#coding:utf-8
|
|
|
|
"""
|
|
ID: issue-8057
|
|
ISSUE: https://github.com/FirebirdSQL/firebird/issues/8057
|
|
TITLE: Let optimizer automatically update empty index statistics for relatively small system tables
|
|
DESCRIPTION:
|
|
Only tables from <CHECKED_SYS_TABLES> list are checked (with additional filtering only those which have indices).
|
|
For each index of selected RDB tables we get name of its first field (i.e. started part of index key).
|
|
Then we construct query with INNER JOIN that involves such index to be included in execution plan.
|
|
For example, for table RDB$DEPENDENCIES with two indices:
|
|
CREATE INDEX RDB$INDEX_27 ON RDB$DEPENDENCIES (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE);
|
|
CREATE INDEX RDB$INDEX_28 ON RDB$DEPENDENCIES (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME);
|
|
- following queries will be generated:
|
|
-------------------------------------
|
|
1) Index for columns (RDB$DEPENDENT_NAME, RDB$DEPENDENT_TYPE) will be involved here:
|
|
select a.rdb$dependent_name, count(*)
|
|
from rdb$dependencies a
|
|
join rdb$dependencies b on a.rdb$dependent_name = b.rdb$dependent_name
|
|
where a.rdb$dependent_name = ? and a.rdb$dependent_name < b.rdb$dependent_name
|
|
group by 1
|
|
|
|
2) Index for column (RDB$DEPENDED_ON_NAME, RDB$DEPENDED_ON_TYPE, RDB$FIELD_NAME) will be involved here:
|
|
select a.rdb$depended_on_name, count(*)
|
|
from rdb$dependencies a
|
|
join rdb$dependencies b on a.rdb$depended_on_name = b.rdb$depended_on_name
|
|
where a.rdb$depended_on_name = ? and a.rdb$depended_on_name < b.rdb$depended_on_name
|
|
group by 1
|
|
-------------------------------------
|
|
|
|
When every such query is prepared, optimizer must automatically update appropriate index statistics.
|
|
KEY NOTE: this will be done only when system table is not empty and has no more than 100 data pages,
|
|
see FB (see in Optimizer.cpp: "... (relPages->rel_data_pages > 0) && (relPages->rel_data_pages < 100)").
|
|
|
|
Because of need to have some data in RDB tables, we must run DDL which force these tables to be fulfilled.
|
|
For this purpose test creates <INIT_DB_OBJECTS_CNT> DB objects for each object type (mapping, sequences etc).
|
|
Also, "NBACKUP -B ..." is called <NBACKUP_RUNS_CNT> times in order to fulfill table RDB$BACKUP_HISTORY table.
|
|
|
|
Then, we get IndexRoot page number for each of selected RDB tables and start to parse this page content.
|
|
During this parsing (see func 'parse_index_root_page') we get values of <K>th statistics for each of <N> starting
|
|
parts of compound index (or for single segment for usual index). We accumulate this data in rel_sel_map{}.
|
|
Finally, we check that all indices has NON-zero statistics: rel_sel_map{} must have NO items with zero values.
|
|
NOTES:
|
|
[21.12.2024] pzotov
|
|
Currently only FB 6.x has this feature (since 22-mar-2024 11:46).
|
|
Commit: https://github.com/FirebirdSQL/firebird/commit/ef66a9b4d803d5129a10350c54f00bc637c09b48
|
|
|
|
::: ACHTUNG ::: Index statistics must be searched in the Index Root page rather than in RDB$INDICES!
|
|
Internals of misc FB page types can be found here:
|
|
https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html
|
|
|
|
It is supposed that there are no expression-based indices for selected system tables (this case was not investigated).
|
|
|
|
Confirmed ticket issue on 6.0.0.294-c353de4 (21-mar-2024 16:45): some of system tables remain with non-updated index statistics.
|
|
Checked on 6.0.0.295-ef66a9b (22-mar-2024 13:48): all OK,every of checked system tables has non-zero statistics for its indices.
|
|
Test executiuon time: ~8...10 seconds.
|
|
|
|
Thanks to Vlad for explanations.
|
|
"""
|
|
import sys
|
|
import binascii
|
|
import struct
|
|
from typing import List
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from firebird.qa import *
|
|
from firebird.driver import DatabaseError, DbInfoCode
|
|
|
|
import locale
|
|
import time
|
|
|
|
EXPECTED_MSG = 'Expected: every checked system table has non-zero index statistics.'
|
|
|
|
IRT_PAGE_TYPE = 6
|
|
CHECKED_SYS_TABLES = """
|
|
(
|
|
'RDB$AUTH_MAPPING'
|
|
,'RDB$BACKUP_HISTORY'
|
|
,'RDB$CHARACTER_SETS'
|
|
,'RDB$CHECK_CONSTRAINTS'
|
|
,'RDB$COLLATIONS'
|
|
,'RDB$DEPENDENCIES'
|
|
,'RDB$EXCEPTIONS'
|
|
,'RDB$FIELDS'
|
|
,'RDB$FIELD_DIMENSIONS'
|
|
,'RDB$FILTERS'
|
|
,'RDB$FORMATS'
|
|
,'RDB$FUNCTIONS'
|
|
,'RDB$FUNCTION_ARGUMENTS'
|
|
,'RDB$GENERATORS'
|
|
,'RDB$INDEX_SEGMENTS'
|
|
,'RDB$INDICES'
|
|
,'RDB$PACKAGES'
|
|
,'RDB$PROCEDURES'
|
|
,'RDB$PROCEDURE_PARAMETERS'
|
|
,'RDB$PUBLICATIONS'
|
|
,'RDB$PUBLICATION_TABLES'
|
|
,'RDB$REF_CONSTRAINTS'
|
|
,'RDB$RELATIONS'
|
|
,'RDB$RELATION_CONSTRAINTS'
|
|
,'RDB$RELATION_FIELDS'
|
|
,'RDB$ROLES'
|
|
,'RDB$SECURITY_CLASSES'
|
|
,'RDB$TRIGGERS'
|
|
,'RDB$TYPES'
|
|
,'RDB$USER_PRIVILEGES'
|
|
,'RDB$VIEW_RELATIONS'
|
|
)
|
|
"""
|
|
|
|
NBACKUP_RUNS_CNT = 10
|
|
INIT_DB_OBJECTS_CNT = 30
|
|
|
|
# SQL script which will cause filling of RDB tables with some data:
|
|
INIT_DB_OBJECTS_SQL = f"""
|
|
alter database enable publication;
|
|
set term ^;
|
|
execute block as
|
|
declare n_obj_cnt smallint = {INIT_DB_OBJECTS_CNT};
|
|
declare i int;
|
|
declare v_sttm varchar(8190);
|
|
begin
|
|
i = 0;
|
|
while (i < n_obj_cnt) do
|
|
begin
|
|
v_sttm = 'recreate sequence g_' || i;
|
|
execute statement v_sttm;
|
|
v_sttm = 'create or alter mapping local_map_' || i || ' using any plugin from group musicians to role guitarist';
|
|
execute statement v_sttm;
|
|
|
|
begin
|
|
v_sttm = 'drop collation name_coll_' || i;
|
|
execute statement v_sttm;
|
|
when any do
|
|
begin
|
|
end
|
|
end
|
|
|
|
begin
|
|
v_sttm = 'drop role r_manager_' || i;
|
|
execute statement v_sttm;
|
|
when any do
|
|
begin
|
|
end
|
|
end
|
|
|
|
begin
|
|
v_sttm = 'drop table tbl_' || i;
|
|
execute statement v_sttm;
|
|
when any do
|
|
begin
|
|
end
|
|
end
|
|
|
|
begin
|
|
v_sttm = 'drop domain dm_' || i;
|
|
execute statement v_sttm;
|
|
when any do
|
|
begin
|
|
end
|
|
end
|
|
|
|
begin
|
|
v_sttm = 'drop filter jpg_' || i;
|
|
execute statement v_sttm;
|
|
when any do
|
|
begin
|
|
end
|
|
end
|
|
|
|
-- examples\api\api9f.sql
|
|
-- declare filter desc_filter_01 -- ==> will be saved in indexed column rdb$filters.rdb$function_name
|
|
-- input_type 1
|
|
-- output_type -4
|
|
-- entry_point 'desc_filter'
|
|
-- module_name 'api9f'
|
|
-- ;
|
|
|
|
v_sttm = 'declare filter jpg_' || i || ' input_type ' || i || ' output_type -4 entry_point ''desc_filter'' module_name ''api9f''';
|
|
execute statement v_sttm;
|
|
|
|
|
|
v_sttm = 'create collation name_coll_' || i || ' for utf8 from unicode case insensitive';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'recreate exception exc_' || i || ' ''missing element with index @1''';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'create role r_manager_' || i;
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'create domain dm_' || i || ' as int not null check(value > 0)';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'recreate table tbl_' || i
|
|
|| '( id int generated by default as identity constraint pk_tbl_'|| i || ' primary key'
|
|
|| ', pid int'
|
|
|| ', f_0 dm_' || i
|
|
|| ', f_01 int'
|
|
|| ', f_02 int[3,4]'
|
|
|| ', constraint fktbl_' || i || ' foreign key(pid) references tbl_' || i || '(id)'
|
|
|| ', constraint chk_tbl_' || i || ' check (f_01 > 0)'
|
|
|| ')'
|
|
;
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'recreate trigger trg_' || i || '_bi for tbl_' || i
|
|
|| ' active before insert as'
|
|
|| ' begin'
|
|
|| ' end'
|
|
;
|
|
execute statement v_sttm;
|
|
|
|
execute statement 'alter database include table tbl_' || i || ' to publication';
|
|
|
|
|
|
v_sttm = 'recreate view vew_' || i || ' as select 1 x from rdb$database';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'create or alter procedure sp_' || i || '(a_0 int, a_1 varchar(10)) as begin end';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'create or alter function fn_' || i || '(a_0 int, a_1 int) returns int as begin return a_0 + a_1; end';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'create or alter package pg_' || i || ' as begin function pg_fn() returns int; end';
|
|
execute statement v_sttm;
|
|
|
|
v_sttm = 'recreate package body pg_' || i || ' as begin function pg_fn() returns int as begin return 1; end end';
|
|
execute statement v_sttm;
|
|
|
|
i = i + 1;
|
|
end
|
|
end
|
|
^
|
|
------------------------------------
|
|
set term ;^
|
|
commit;
|
|
"""
|
|
|
|
db = db_factory(init = INIT_DB_OBJECTS_SQL)
|
|
act = python_act('db', substitutions = [('[ \t]+', ' ')])
|
|
tmp_nbk_lst = temp_files( [ f'tmp_8057.{i}.nbk' for i in range(NBACKUP_RUNS_CNT) ] )
|
|
|
|
#-----------------------------------------------------------------------
|
|
|
|
def parse_index_root_page(db_file, pg_size, rel_name, irt_page_number, rel_sel_map, verbose = False):
|
|
|
|
# rel_sel_map -- byref
|
|
min_irtd_selec = sys.float_info.max
|
|
|
|
with open( db_file, "rb") as db_handle:
|
|
db_handle.seek( irt_page_number * pg_size )
|
|
page_content = db_handle.read( pg_size )
|
|
page_as_hex=binascii.hexlify( page_content )
|
|
if verbose:
|
|
print(f'{pg_size=}, page_as_hex:')
|
|
print(page_as_hex.decode("utf-8"))
|
|
|
|
# https://firebirdsql.org/file/documentation/html/en/firebirddocs/firebirdinternals/firebird-internals.html#fbint-page-6
|
|
# https://docs.python.org/3/library/struct.html#format-characters
|
|
|
|
# Two bytes, UNsigned. Offset 0x10 on the page. The relation id. This is the value of RDB$RELATIONS.RDB$RELATION_ID.
|
|
irt_relation = struct.unpack_from('@H', page_content[0x10:0x12])[0] # (128,) --> 128
|
|
|
|
# Two bytes, UNsigned. Offset 0x12 on the page. The number of indices defined for this table.
|
|
irt_count = struct.unpack_from('@H', page_content[0x12:0x14])[0]
|
|
|
|
if verbose:
|
|
print(f'{irt_relation=}, {rel_name.strip()=}, {irt_count=}')
|
|
|
|
for i in range(irt_count):
|
|
# 4+4+2+1+1 = 12
|
|
|
|
irt_root_offset_i = i * 12 + int(0x14)
|
|
irt_tran_offset_i = i * 12 + int(0x18)
|
|
irt_desc_offset_i = i * 12 + int(0x1c)
|
|
irt_keys_offset_i = i * 12 + int(0x1e)
|
|
irt_flags_offset_i = i * 12 + int(0x1f)
|
|
|
|
# Four bytes, SIGNED. Offset 0x00 in each descriptor array entry.
|
|
# This field is the page number where the root page for the individual index (page type 0x07) is located.
|
|
irt_root_i = struct.unpack_from('@i', page_content[irt_root_offset_i : irt_root_offset_i + 4])[0]
|
|
|
|
# Four bytes, SIGNED. Offset 0x04 in each descriptor array entry.
|
|
# Normally this field will be zero but if an index is in the process of being created, the transaction id will be found here.
|
|
irt_tran_i = struct.unpack_from('@i', page_content[irt_tran_offset_i : irt_tran_offset_i + 4])[0]
|
|
|
|
# Two bytes, UNsigned. Offset 0x08 in each descriptor array entry. This field holds the offset, from the start of the page,
|
|
# to the index field descriptors which are located at the bottom end (ie, highest addresses) of the page.
|
|
# To calculate the starting address, add the value in this field to the address of the start of the page.
|
|
irt_desc_i = struct.unpack_from('@H', page_content[irt_desc_offset_i : irt_desc_offset_i + 2])[0]
|
|
|
|
# One byte, UNsigned. This defines the number of keys (columns) in this index.
|
|
irt_keys_i = struct.unpack_from('@B', page_content[ irt_keys_offset_i : irt_keys_offset_i + 1])[0]
|
|
|
|
# One byte, UNsigned. The flags define various attributes for this index, these are encoded into various bits in the field, as follows:
|
|
# See src/jrd/btr.h
|
|
# Bit 0 : Index is unique (set) or not (unset).
|
|
# Bit 1 : Index is descending (set) or ascending (unset).
|
|
# Bit 2 : Index [creation?] is in progress (set) or not (unset).
|
|
# Bit 3 : Index is a foreign key index (set) or not (unset).
|
|
# Bit 4 : Index is a primary key index (set) or not (unset).
|
|
# Bit 5 : Index is expression based (set) or not (unset).
|
|
# Bit 6 : Index is conditional
|
|
irt_flags_i = struct.unpack_from('@B', page_content[ irt_flags_offset_i : irt_flags_offset_i + 1])[0]
|
|
|
|
if verbose:
|
|
print(f' {i=} ::: {irt_root_i=}, {irt_tran_i=}, {irt_desc_i=}, {irt_keys_i=}, {irt_flags_i=}, bin(irt_flags_i)=','{0:08b}'.format(irt_flags_i))
|
|
|
|
for j in range(irt_keys_i):
|
|
# 2+2+4 = 8
|
|
# Two bytes, UNsigned. Offset 0x00 in each field descriptor. This field defines the field number of the table that makes up 'this' field in the index.
|
|
# This number is equivalent to RDB$RELATION_FIELDS.RDB$FIELD_ID.
|
|
irtd_field = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i : j*8 + irt_desc_i + 2])[0]
|
|
#print(f' column: {j} ::: {irtd_field=}')
|
|
|
|
# Two bytes, UNsigned. Offset 0x02 in each field descriptor. This determines the data type of the appropriate field in the index.
|
|
irtd_itype = struct.unpack_from('@H', page_content[ j*8 + irt_desc_i + 2 : j*8 + irt_desc_i + 4])[0]
|
|
#print(f' {irtd_itype=}')
|
|
|
|
# Four bytes, floating point format. Offset 0x04 in each field descriptor. This field holds the selectivity of this particular column in the index.
|
|
irtd_selec = struct.unpack_from('@f', page_content[ j*8 + irt_desc_i + 4 : j*8 + irt_desc_i + 8])[0]
|
|
min_irtd_selec = min(min_irtd_selec, irtd_selec)
|
|
#print(f' {irtd_selec=}')
|
|
|
|
# Input, byRef:
|
|
rel_sel_map[rel_name.strip()] = min_irtd_selec
|
|
|
|
#-----------------------------------------------------------------------------
|
|
|
|
def replace_leading(source, char="."):
|
|
stripped = source.lstrip()
|
|
return char * (len(source) - len(stripped)) + stripped
|
|
|
|
#-----------------------------------------------------------------------
|
|
|
|
@pytest.mark.version('>=6.0')
|
|
def test_1(act: Action, tmp_nbk_lst: List[Path], capsys):
|
|
|
|
try:
|
|
for i,tmp_nbk_i in enumerate(tmp_nbk_lst):
|
|
act.expected_stderr = ''
|
|
tmp_nbk_i.unlink(missing_ok = True)
|
|
act.nbackup(switches=['-b', str(i), act.db.dsn, tmp_nbk_i], io_enc = locale.getpreferredencoding())
|
|
assert act.clean_stderr == act.clean_expected_stderr
|
|
act.reset()
|
|
except DatabaseError as e:
|
|
print(e.__str__())
|
|
|
|
#-----------------------------------------------------------
|
|
sql_get_sys_tables_index_info = f"""
|
|
with recursive r
|
|
as (
|
|
select
|
|
rr.rdb$relation_id as rel_id
|
|
,rr.rdb$relation_name rel_name
|
|
,ri.rdb$index_id-1 as idx_id
|
|
,ri.rdb$index_name idx_name
|
|
,ri.rdb$segment_count seg_cnt
|
|
,rs.rdb$field_position fpos
|
|
,rs.rdb$field_name fname
|
|
,',' || cast( trim(rs.rdb$field_name) as varchar(8190) ) as idx_key
|
|
,sign(octet_length(ri.rdb$expression_blr)) as idx_on_expr
|
|
from rdb$relations rr
|
|
join rdb$indices ri on rr.rdb$relation_name = ri.rdb$relation_name
|
|
join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name
|
|
where
|
|
ri.rdb$system_flag = 1
|
|
and ri.rdb$relation_name in {CHECKED_SYS_TABLES}
|
|
and ri.rdb$index_inactive is distinct from 1
|
|
and (rs.rdb$field_position = 0 or ri.rdb$expression_blr is not null)
|
|
|
|
union all
|
|
|
|
select
|
|
r.rel_id
|
|
,r.rel_name
|
|
,ri.rdb$index_id-1
|
|
,ri.rdb$index_name
|
|
,r.seg_cnt
|
|
,rs.rdb$field_position
|
|
,rs.rdb$field_name fname
|
|
,r.idx_key || ',' || trim(rs.rdb$field_name )
|
|
,r.idx_on_expr
|
|
from rdb$indices ri
|
|
join rdb$index_segments rs on ri.rdb$index_name = rs.rdb$index_name
|
|
join r on ri.rdb$relation_name = r.rel_name
|
|
and ri.rdb$index_name = r.idx_name
|
|
and (rs.rdb$field_position = r.fpos+1 or r.idx_on_expr = 1)
|
|
)
|
|
--select * from r
|
|
|
|
,m as (
|
|
select
|
|
rel_id
|
|
,rel_name
|
|
,idx_id
|
|
,idx_name
|
|
,iif(idx_on_expr = 1, '<expr>', substring(idx_key from 2)) as idx_key
|
|
from r
|
|
where fpos = seg_cnt-1 or idx_on_expr = 1
|
|
)
|
|
select m.rel_id, m.rel_name, m.idx_id, m.idx_name, m.idx_key, p.rdb$page_number as irt_page
|
|
from m
|
|
join rdb$pages p on m.rel_id = p.rdb$relation_id and p.rdb$page_type = {IRT_PAGE_TYPE}
|
|
order by rel_id, idx_id
|
|
"""
|
|
|
|
with act.db.connect() as con:
|
|
cur = con.cursor()
|
|
cur.execute(sql_get_sys_tables_index_info)
|
|
rel_irt_map = {}
|
|
for r in cur:
|
|
rel_id, rel_name, idx_id, idx_name, idx_key, irt_page = r[:6]
|
|
idx_starting_fld = idx_key.split(",")[0]
|
|
|
|
#print(f'{rel_id=}, {rel_name.strip()=}, {irt_page=}')
|
|
#print(f'{idx_id=}, {idx_name.strip()=}')
|
|
#print(f'{idx_key=}, starting field: {idx_starting_fld}')
|
|
|
|
sql_to_make_recalc_idx_stat = f"""
|
|
select a.{idx_starting_fld}, count(*)
|
|
from {rel_name.strip()} a
|
|
join {rel_name.strip()} b on a.{idx_starting_fld} = b.{idx_starting_fld}
|
|
where a.{idx_starting_fld} = ? and a.{idx_starting_fld} < b.{idx_starting_fld}
|
|
group by 1
|
|
"""
|
|
|
|
#print(f'{sql_to_make_recalc_idx_stat=}')
|
|
ps = None
|
|
try:
|
|
# This must cause update index statistics of <idx_name>:
|
|
ps = cur.prepare(sql_to_make_recalc_idx_stat)
|
|
|
|
# Print explained plan with padding eash line by dots in order to see indentations:
|
|
#print( '\n'.join([replace_leading(s) for s in ps.detailed_plan.split('\n')]) )
|
|
#print('')
|
|
except DatabaseError as e:
|
|
print(e.__str__())
|
|
print(e.gds_codes)
|
|
finally:
|
|
if ps:
|
|
ps.free()
|
|
|
|
rel_irt_map[ rel_id ] = (rel_name, irt_page)
|
|
|
|
rel_sel_map = {} # K = rel_name, V = minimal selectivity (i.e. min irtd_selec for all indices)
|
|
|
|
# NB: we have to re-connect in order to see updated indices statistics!
|
|
with act.db.connect() as con:
|
|
cur = con.cursor()
|
|
|
|
for rel_id, (rel_name, irt_no) in rel_irt_map.items():
|
|
parse_index_root_page(act.db.db_path, con.info.page_size, rel_name, irt_no, rel_sel_map, verbose = False)
|
|
|
|
if min(rel_sel_map.values()) > 0:
|
|
print(EXPECTED_MSG)
|
|
else:
|
|
print('UNEXPECTED: AT LEAST ONE OF SYSTEM TABLES HAS ZERO INDEX STATISTICS')
|
|
for rel_name, min_idx_selectivity in rel_sel_map.items():
|
|
print(f'{rel_name=}, {min_idx_selectivity=}')
|
|
|
|
act.expected_stdout = f"""
|
|
{EXPECTED_MSG}
|
|
"""
|
|
|
|
act.stdout = capsys.readouterr().out
|
|
assert act.clean_stdout == act.clean_expected_stdout
|