#coding:utf-8 """ ID: 8250 ISSUE: https://github.com/FirebirdSQL/firebird/issues/8250 TITLE: Bad performance on simple two joins query on tables with composed index DESCRIPTION: NOTES: [25.09.2024] pzotov 0. Tables in explained plans are specified in the same order for snapshots before and after fix. Difference can be seen only in added "keys: " clause. This clause currently exists only in FB 6.x, commit: https://github.com/FirebirdSQL/firebird/commit/c50b0aa652014ce3610a1890017c9dd436388c43 Because of this, test min_version = 6.0. 1. Improvement can be checked by parsing of explained plan if its top-level hash join looks like "Hash Join (inner) (keys: N, total key length: ...)" and value of is NOT LESS THAN some minimal threshold defined by variable MIN_REQUIRED_KEYS_IN_TOP_LEVEL_HJ. As explained by dimitr, for this test such value must be 5 (letter 24.09.2024 07:22). 2. Tables must have indices (defined by PK and FK constraints) for explained plan to show 'keys: N' with N > 1. Otherwise top-level HJ line will be 'keys: 1' and query will run as slow as it was specified in the ticket. The reason is that in case of missed indices optimizer uses other criteria for estimation of cost and plan will remain ineffective (at least for current FB 6.x). Explained by dimitr, letter 24.09.2024 12:29. Confirmed problem on 6.0.0.461. Checked on 6.0.0.467. """ import re import time import pytest from firebird.qa import * MIN_REQUIRED_KEYS_IN_TOP_LEVEL_HJ = 5 ROWS_LIMIT_FOR_CHILD_TABLES = 5000 init_sql = f""" create table test1(id1 int not null, id2 int not null, name varchar(30) not null); create table test2(id1 int not null, id2 int not null, code30 varchar(36) not null, descr varchar(36)); create table test3(id1 int not null, id2 int not null, code30 varchar(36) not null, code15 varchar(15) not null, price double precision); set term ^; execute block as declare n_cnt int = {ROWS_LIMIT_FOR_CHILD_TABLES}; declare i int; declare v_code30 type of column test2.code30; begin insert into test1(id1, id2, name) select 1, i, left(uuid_to_char(gen_uuid()), 30) from (select row_number()over()-1 i from rdb$types rows 16); i = 0; while (i < n_cnt) do begin v_code30 = lpad('', 36, uuid_to_char(gen_uuid())); insert into test2(id1, id2, code30, descr) values( 1, mod(:i, 4), :v_code30, :v_code30); insert into test3(id1, id2, code30, code15, price) values( 1, mod(:i, 4) , :v_code30, left(uuid_to_char(gen_uuid()), 15), round(1000)); i = i + 1; end end ^ set term ;^ commit; set echo on; alter table test1 add constraint t1_pk primary key(id1,id2); alter table test2 add constraint t2_pk primary key(id1,id2,code30); alter table test3 add constraint t3_pk primary key(id1,id2,code30,code15); """ db = db_factory(init = init_sql, page_size = 8192) substitutions = [ ('[ \t]+', ' ') ,('keys: \\d+, total key length: \\d+', 'keys, total key length') ,('record length: \\d+', 'record length') ] act = python_act('db', substitutions = substitutions) #----------------------------------------------------------- def replace_leading(source, char="."): stripped = source.lstrip() return char * (len(source) - len(stripped)) + stripped #----------------------------------------------------------- @pytest.mark.version('>=6.0') def test_1(act: Action, capsys): test_sql = """ select 1 x from test2 t2 join test3 t3 on t3.id1 = t2.id1 and t3.id2 = t2.id2 and t3.code30 = t2.code30 join test1 t1 on t1.id1 = t2.id1 and t1.id2 = t2.id2 """ # 'Hash Join (inner) (keys: 3, total key length: 38)' p_hj_keys = re.compile(r'Hash Join $inner$ \(keys: \d+', re.IGNORECASE) top_level_keys_found = -1 with act.db.connect() as con: cur = con.cursor() ps = cur.prepare(test_sql) print( '\n'.join([replace_leading(s) for s in ps.detailed_plan.split('\n')]) ) for s in ps.detailed_plan.split('\n'): if (pm := p_hj_keys.search(s)): top_level_keys_found = max(top_level_keys_found, int(pm.group().split()[-1])) break max_keys_msg = 'Top-level hash join keys: ' expected_keys = '' if top_level_keys_found >= MIN_REQUIRED_KEYS_IN_TOP_LEVEL_HJ: max_keys_msg += 'EXPECTED' expected_keys = max_keys_msg else: max_keys_msg += f'UNEXPECTED, {" too low: "+str(top_level_keys_found)+" - less than "+str(MIN_REQUIRED_KEYS_IN_TOP_LEVEL_HJ) if top_level_keys_found > 0 else "NOT FOUND"}' print(max_keys_msg) act.expected_stdout = f""" Select Expression ....-> Filter ........-> Hash Join (inner) (keys, total key length) ............-> Hash Join (inner) (keys, total key length) ................-> Table "TEST3" as "T3" Full Scan ................-> Record Buffer (record length) ....................-> Table "TEST1" as "T1" Full Scan ............-> Record Buffer (record length) ................-> Table "TEST2" as "T2" Full Scan {expected_keys} """ act.stdout = capsys.readouterr().out assert act.clean_stdout == act.clean_expected_stdout act.reset()