2021-04-26 20:07:00 +02:00
|
|
|
#coding:utf-8
|
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
"""
|
|
|
|
ID: issue-2513
|
|
|
|
ISSUE: 2513
|
|
|
|
TITLE: Suboptimal join plan if there are selective non-indexed predicates involved
|
|
|
|
DESCRIPTION:
|
|
|
|
This test operates with three tables: "small", "medium" and "big" - which are INNER-join'ed.
|
|
|
|
It was found that there is some threshold ratio between number of rows in "small" vs "medium"
|
|
|
|
tables which affects on generated PLAN after reaching this ratio.
|
|
|
|
In particular, when tables have following values of rows: 26, 300 and 3000 - than optimizer
|
|
|
|
still DOES take in account "WHERE" condition with non-indexed field in SMALL table ("where s.sf = 0"),
|
|
|
|
and this lead to GOOD (fast) performance because SMALL table will be FIRST in the join order.
|
|
|
|
However, if number of rows in SMALL table will change from 26 to 27 (yes, just one row) than
|
|
|
|
optimizer do NOT consider additional condition (WHERE-filter) on that table and begin choose
|
|
|
|
SLOW (ineffective) plan where MEDIUM table is first in join order.
|
|
|
|
After discussion with dimitr, it was decided to:
|
|
|
|
1) put here TWO cases of query: with "fast" and "slow" plan;
|
|
|
|
2) replace too narrow threshold-pair (26 vs 27) with more wider (now: 15 and 45) because otherwise
|
|
|
|
even minor changes in optimizer can breake this test expected output.
|
|
|
|
Test make TWO PAIRS (i.e. total FOUR) runs:
|
|
|
|
1. When number of rows in SMALL table is less than threshold:
|
|
|
|
1.1. Without 'where'-filter on small table - to ensure that plan will contain MEDIUM table as first in join;
|
|
|
|
1.2. WITH 'where'-filter on small table - to ensure that plan will be CHANGED and SMALL table will be first;
|
|
|
|
2. When number of rows in SMALL table is beyond the threshold:
|
|
|
|
2.1. Without 'where'-filter on small table - plan will contain MEDIUM table as first in join;
|
|
|
|
2.2. WITH 'where'-filter on small table - plan will NOT changed, MEDIUM table will remain first in join.
|
|
|
|
Beside output of PLAN itself, test also:
|
|
|
|
1) displays index statistics
|
|
|
|
2) compares fetches with some 'upper-limit' constants in order to alert us in case when fetches become too high.
|
|
|
|
These constants have been obtained after sereval experiments with page_size = 4k, and their values are base on
|
|
|
|
following typical results (which are the same on 2.5 and 3.0):
|
|
|
|
FETCHES_1_1 19636
|
|
|
|
FETCHES_1_2 9094
|
|
|
|
FETCHES_2_1 19548
|
|
|
|
FETCHES_2_2 19548
|
|
|
|
NOTES:
|
|
|
|
[18.08.2020]
|
|
|
|
Test uses pre-created database which has several procedures for analyzing performance by with the help of MON$ tables.
|
|
|
|
Performance results are gathered in the table STAT_LOG, each odd run will save mon$ counters with "-" sign and next
|
|
|
|
(even) run will save them with "+" -- see SP_GATHER_STAT.
|
|
|
|
Aggegation of results is done in the view V_AGG_STAT (negative values relate to start, positive to the end of measure,
|
|
|
|
difference between them means performance expenses which we want to evaluate).
|
|
|
|
NOTE. Before each new measure we have to set generator G_GATHER_STAT to zero in order to make it produce proper values
|
|
|
|
starting with 1 (odd --> NEGATIVE sign for counters). This is done in SP_TRUNCATE_STAT.
|
|
|
|
[18.08.2020]
|
|
|
|
FB 4.x has incompatible behaviour with all previous versions since build 4.0.0.2131 (06-aug-2020):
|
|
|
|
statement 'alter sequence <seq_name> restart with 0' changes rdb$generators.rdb$initial_value to -1 thus next call
|
|
|
|
gen_id(<seq_name>,1) will return 0 (ZERO!) rather than 1.
|
|
|
|
See also CORE-6084 and its fix: https://github.com/FirebirdSQL/firebird/commit/23dc0c6297825b2e9006f4d5a2c488702091033d
|
|
|
|
|
|
|
|
This is considered as *expected* and is noted in doc/README.incompatibilities.3to4.txt
|
|
|
|
|
|
|
|
Because of this, it was decided to change code of SP_TRUNCATE_STAT: instead of 'alter sequence restart...' we do
|
|
|
|
reset like this: c = gen_id(g_gather_stat, -gen_id(g_gather_stat, 0));
|
|
|
|
JIRA: CORE-2078
|
|
|
|
"""
|
2021-04-26 20:07:00 +02:00
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
import pytest
|
|
|
|
from firebird.qa import *
|
2021-04-26 20:07:00 +02:00
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
db = db_factory(from_backup='mon-stat-gathering-3_0.fbk')
|
2021-04-26 20:07:00 +02:00
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
test_script = """
|
2021-04-26 20:07:00 +02:00
|
|
|
create or alter procedure sp_fill_data(a_sml_rows int, a_med_rows int, a_big_rows int) as begin end;
|
|
|
|
recreate table tbig(id int, sid int, mid int);
|
|
|
|
commit;
|
|
|
|
recreate table tsml(id int not null, sf int);
|
|
|
|
commit;
|
|
|
|
recreate table tmed(id int not null);
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set term ^;
|
|
|
|
create or alter procedure sp_fill_data(a_sml_rows int, a_med_rows int, a_big_rows int)
|
|
|
|
as
|
|
|
|
declare i int;
|
|
|
|
declare i_mod2 smallint;
|
|
|
|
begin
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
-- gather old record versions if they are from previous run:
|
2021-11-09 11:01:26 +01:00
|
|
|
select count(*) from tsml into i;
|
2021-04-26 20:07:00 +02:00
|
|
|
select count(*) from tmed into i;
|
|
|
|
select count(*) from tbig into i;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
i=0;
|
|
|
|
while (i < a_sml_rows) do
|
|
|
|
begin
|
|
|
|
insert into tsml(id, sf) values( :i, :i - (:i/2)*2 );
|
|
|
|
i = i+1;
|
|
|
|
end
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
i=0;
|
|
|
|
while (i < a_med_rows) do
|
|
|
|
begin
|
|
|
|
insert into tmed(id) values( :i );
|
|
|
|
i = i+1;
|
|
|
|
end
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
i=0;
|
|
|
|
while (i < a_big_rows) do
|
|
|
|
begin
|
|
|
|
insert into tbig(id, sid, mid) values( :i, :i - (:i/2)*2, :i - (:i/:a_med_rows)*:a_med_rows );
|
|
|
|
i = i+1;
|
|
|
|
end
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
end
|
|
|
|
^
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
create or alter procedure srv_recalc_idx_stat
|
|
|
|
returns (
|
|
|
|
tab_name varchar(31),
|
|
|
|
idx_name varchar(31),
|
|
|
|
idx_stat_afte numeric(12, 10) -- double precision
|
|
|
|
)
|
|
|
|
as
|
|
|
|
begin
|
|
|
|
-- Refresh index statistics all user (non-system) tables.
|
2021-11-09 11:01:26 +01:00
|
|
|
-- Needs to be run in regular basis (`cron` on linux, `at` on windows)
|
2021-04-26 20:07:00 +02:00
|
|
|
-- otherwise ineffective plans can be generated when doing inner joins!
|
|
|
|
-- Example to run: select * from srv_recalc_idx_stat;
|
|
|
|
for
|
|
|
|
select ri.rdb$relation_name, ri.rdb$index_name
|
|
|
|
from rdb$indices ri
|
|
|
|
join rdb$relations rr on ri.rdb$relation_name = rr.rdb$relation_name
|
|
|
|
where
|
|
|
|
coalesce(ri.rdb$system_flag,0)=0
|
|
|
|
and rr.rdb$relation_type = 0 -- exclude GTTs!
|
|
|
|
order by ri.rdb$relation_name, ri.rdb$index_name
|
|
|
|
into
|
|
|
|
tab_name, idx_name
|
|
|
|
do begin
|
|
|
|
execute statement( 'set statistics index '||idx_name )
|
|
|
|
with autonomous transaction
|
|
|
|
;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
select ri.rdb$statistics
|
|
|
|
from rdb$indices ri
|
|
|
|
where ri.rdb$relation_name = :tab_name and ri.rdb$index_name = :idx_name
|
|
|
|
into idx_stat_afte;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
suspend;
|
|
|
|
end
|
|
|
|
end
|
|
|
|
^
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set term ;^
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
alter table tsml add constraint tsml_pk primary key(id) using index tsml_pk;
|
|
|
|
alter table tmed add constraint tmed_pk primary key(id) using index tmed_pk;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
alter table tbig add constraint tbig_fk_sml foreign key(sid) references tsml using index tbig_idx1_fk_sml;
|
|
|
|
alter table tbig add constraint tbig_fk_med foreign key(mid) references tmed using index tbig_idx2_fk_med;
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set width tab_name 31;
|
|
|
|
set width idx_name 31;
|
|
|
|
set list on;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
-------------------- prepare-1 ------------------
|
|
|
|
--execute procedure sp_fill_data(26, 300, 3000);
|
|
|
|
execute procedure sp_fill_data(15, 300, 3000);
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set transaction read committed;
|
2021-11-09 11:01:26 +01:00
|
|
|
select
|
|
|
|
tab_name as run1_tab_name
|
2021-04-26 20:07:00 +02:00
|
|
|
,idx_name as run1_idx_name
|
|
|
|
,idx_stat_afte run1_idx_stat
|
|
|
|
from srv_recalc_idx_stat where tab_name in ( upper('tsml'), upper('tmed'), upper('tbig') );
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
--------------------- run-1.1 -------------------
|
|
|
|
-----alter sequence g_gather_stat restart with 0;
|
|
|
|
execute procedure sp_truncate_stat;
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
execute procedure sp_gather_stat; ------- catch statistics BEFORE measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set plan on;
|
|
|
|
select count(*) cnt_1_1
|
|
|
|
from tsml s
|
|
|
|
join tbig b on b.sid = s.id
|
|
|
|
join tmed m on b.mid = m.id
|
|
|
|
;
|
|
|
|
set plan off;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
execute procedure sp_gather_stat; ------- catch statistics AFTER measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
--------------------- run-1.2 -------------------
|
|
|
|
execute procedure sp_gather_stat; ------- catch statistics BEFORE measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set plan on;
|
|
|
|
select count(*) cnt_1_2
|
|
|
|
from tsml s
|
|
|
|
join tbig b on b.sid = s.id
|
|
|
|
join tmed m on b.mid = m.id
|
|
|
|
where s.sf = 0 -- selective non-indexed boolean
|
|
|
|
;
|
|
|
|
set plan off;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
execute procedure sp_gather_stat; ------- catch statistics AFTER measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
-------------------- prepare-2 ------------------
|
|
|
|
delete from tbig;
|
|
|
|
delete from tmed;
|
|
|
|
delete from tsml;
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
--execute procedure sp_fill_data(27, 300, 3000);
|
|
|
|
execute procedure sp_fill_data(45, 300, 3000);
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set transaction read committed;
|
2021-11-09 11:01:26 +01:00
|
|
|
select
|
|
|
|
tab_name as run2_tab_name
|
2021-04-26 20:07:00 +02:00
|
|
|
,idx_name as run2_idx_name
|
|
|
|
,idx_stat_afte run2_idx_stat
|
|
|
|
from srv_recalc_idx_stat where tab_name in ( upper('tsml'), upper('tmed'), upper('tbig') );
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
--------------------- run-2.1 -------------------
|
|
|
|
execute procedure sp_gather_stat; ------- catch statistics BEFORE measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
set plan on;
|
|
|
|
select count(*) cnt_2_1
|
|
|
|
from tsml s
|
|
|
|
join tbig b on b.sid = s.id
|
|
|
|
join tmed m on b.mid = m.id
|
|
|
|
;
|
|
|
|
set plan off;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
execute procedure sp_gather_stat; ------- catch statistics AFTER measured statement(s)
|
|
|
|
commit;
|
|
|
|
--------------------- run-2.2 -------------------
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
execute procedure sp_gather_stat; ------- catch statistics BEFORE measured statement(s)
|
|
|
|
commit;
|
|
|
|
set plan on;
|
|
|
|
select count(*) cnt_2_2
|
|
|
|
from tsml s
|
|
|
|
join tbig b on b.sid = s.id
|
|
|
|
join tmed m on b.mid = m.id
|
|
|
|
where s.sf = 0 -- selective non-indexed boolean
|
|
|
|
;
|
|
|
|
set plan off;
|
|
|
|
execute procedure sp_gather_stat; ------- catch statistics AFTER measured statement(s)
|
|
|
|
commit;
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
-- Here we define constants that serve as *upper* limit for fetches:
|
|
|
|
set term ^;
|
|
|
|
execute block as
|
|
|
|
begin
|
|
|
|
rdb$set_context('USER_SESSION','MAX_FETCHES_FOR_SLOW', 22000);
|
|
|
|
rdb$set_context('USER_SESSION','MAX_FETCHES_FOR_FAST', 11000);
|
|
|
|
end
|
|
|
|
^
|
|
|
|
set term ;^
|
2021-11-09 11:01:26 +01:00
|
|
|
|
2021-04-26 20:07:00 +02:00
|
|
|
-- Typical values for page_size 4K on 2.5 and 3.0:
|
|
|
|
-- FETCHES_1_1 19636
|
|
|
|
-- FETCHES_1_2 9094
|
|
|
|
-- FETCHES_2_1 19548
|
|
|
|
-- FETCHES_2_2 19548
|
|
|
|
|
|
|
|
-- Show results:
|
|
|
|
-- =============
|
|
|
|
select
|
|
|
|
iif( fetches_1_1 <= max_fetches_for_slow, 'acceptable', 'regression: '|| fetches_1_1 || ' > ' || max_fetches_for_slow) as fetches_1_1
|
|
|
|
,iif( fetches_1_2 <= max_fetches_for_fast, 'acceptable', 'regression: '|| fetches_1_2 || ' > ' || max_fetches_for_fast ) as fetches_1_2
|
|
|
|
,iif( fetches_2_1 <= max_fetches_for_slow, 'acceptable', 'regression: '|| fetches_2_1 || ' > ' || max_fetches_for_slow ) as fetches_2_1
|
|
|
|
,iif( fetches_2_2 <= max_fetches_for_slow, 'acceptable', 'regression: '|| fetches_2_2 || ' > ' || max_fetches_for_slow ) as fetches_2_2
|
|
|
|
from (
|
|
|
|
select
|
|
|
|
max( iif(rowset = 1, page_fetches, null) ) fetches_1_1
|
|
|
|
,max( iif(rowset = 2, page_fetches, null) ) fetches_1_2
|
|
|
|
,max( iif(rowset = 3, page_fetches, null) ) fetches_2_1
|
|
|
|
,max( iif(rowset = 4, page_fetches, null) ) fetches_2_2
|
|
|
|
,cast(rdb$get_context('USER_SESSION','MAX_FETCHES_FOR_SLOW') as int) as max_fetches_for_slow
|
|
|
|
,cast(rdb$get_context('USER_SESSION','MAX_FETCHES_FOR_FAST') as int) as max_fetches_for_fast
|
|
|
|
from v_agg_stat
|
|
|
|
)
|
|
|
|
;
|
2021-12-22 20:23:11 +01:00
|
|
|
"""
|
2021-04-26 20:07:00 +02:00
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
act = isql_act('db', test_script)
|
2021-04-26 20:07:00 +02:00
|
|
|
|
2022-01-21 18:49:26 +01:00
|
|
|
expected_stdout = """
|
2021-11-09 11:01:26 +01:00
|
|
|
RUN1_TAB_NAME TBIG
|
|
|
|
RUN1_IDX_NAME TBIG_IDX1_FK_SML
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN1_IDX_STAT 0.5000000000
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN1_TAB_NAME TBIG
|
|
|
|
RUN1_IDX_NAME TBIG_IDX2_FK_MED
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN1_IDX_STAT 0.0033333334
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN1_TAB_NAME TMED
|
|
|
|
RUN1_IDX_NAME TMED_PK
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN1_IDX_STAT 0.0033333334
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN1_TAB_NAME TSML
|
|
|
|
RUN1_IDX_NAME TSML_PK
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN1_IDX_STAT 0.0666666701
|
|
|
|
|
|
|
|
PLAN JOIN (M NATURAL, B INDEX (TBIG_IDX2_FK_MED), S INDEX (TSML_PK))
|
|
|
|
CNT_1_1 3000
|
|
|
|
|
|
|
|
PLAN JOIN (S NATURAL, B INDEX (TBIG_IDX1_FK_SML), M INDEX (TMED_PK))
|
|
|
|
CNT_1_2 1500
|
|
|
|
|
2021-11-09 11:01:26 +01:00
|
|
|
RUN2_TAB_NAME TBIG
|
|
|
|
RUN2_IDX_NAME TBIG_IDX1_FK_SML
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN2_IDX_STAT 0.5000000000
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN2_TAB_NAME TBIG
|
|
|
|
RUN2_IDX_NAME TBIG_IDX2_FK_MED
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN2_IDX_STAT 0.0033333334
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN2_TAB_NAME TMED
|
|
|
|
RUN2_IDX_NAME TMED_PK
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN2_IDX_STAT 0.0033333334
|
2021-11-09 11:01:26 +01:00
|
|
|
|
|
|
|
RUN2_TAB_NAME TSML
|
|
|
|
RUN2_IDX_NAME TSML_PK
|
2021-04-26 20:07:00 +02:00
|
|
|
RUN2_IDX_STAT 0.0222222228
|
|
|
|
|
|
|
|
PLAN JOIN (M NATURAL, B INDEX (TBIG_IDX2_FK_MED), S INDEX (TSML_PK))
|
|
|
|
CNT_2_1 3000
|
|
|
|
|
|
|
|
PLAN JOIN (M NATURAL, B INDEX (TBIG_IDX2_FK_MED), S INDEX (TSML_PK))
|
|
|
|
CNT_2_2 1500
|
|
|
|
|
|
|
|
FETCHES_1_1 acceptable
|
|
|
|
FETCHES_1_2 acceptable
|
|
|
|
FETCHES_2_1 acceptable
|
|
|
|
FETCHES_2_2 acceptable
|
2021-12-22 20:23:11 +01:00
|
|
|
"""
|
2021-04-26 20:07:00 +02:00
|
|
|
|
|
|
|
@pytest.mark.version('>=3.0')
|
2022-01-21 18:49:26 +01:00
|
|
|
def test_1(act: Action):
|
|
|
|
act.expected_stdout = expected_stdout
|
|
|
|
act.execute()
|
|
|
|
assert act.clean_stdout == act.clean_expected_stdout
|