6
0
mirror of https://github.com/FirebirdSQL/firebird-qa.git synced 2025-01-22 13:33:07 +01:00
firebird-qa/tests/bugs/core_5302_test.py

202 lines
7.6 KiB
Python

#coding:utf-8
"""
ID: issue-2047
ISSUE: 2047
TITLE: Performance regression when bulk inserting into table with indices
DESCRIPTION:
21.11.2021. Totally re-implemented, package 'psutil' must be installed.
We make two calls of psutil.Process(fb_pid).cpu_times() (before and after SQL code) and obtain CPU User Time
values from each result.
Difference between them can be considered as much more accurate performance estimation.
Test creates two tables: 'test_non_idx' and 'test_indexed' (second of them with several indices).
Then we create two stored procedures ('sp_non_idx' and 'sp_indexed') which perform INSERTs into these tables.
Number of INSERTs it set by variable N_COUNT_PER_MEASURE.
Each of these procedures is called <N_MEASURES> times, with saving CPU 'user_time' that OS dedicated to FB process
to complete its job (we use psutil package to obtain this value).
After each call we drop procedures, recreate tables again and create procedures in order to repeat DML.
Each result (difference between cpu_times().user values when apropriate procedure finishes) is added to the list.
Finally, we evaluate MEDIAN of ratios between cpu user time which was received for INDEXED and NON_INDEXED tables.
Obviously, time for indexed table must be greater than for non-indexed one.
If ratios median is less then threshold (see var. INSERTS_TIME_MAX_RATIO) then result can be considered as ACCEPTABLE.
Otherwise we show ratios median and list of their values for each measurement.
See also:
https://psutil.readthedocs.io/en/latest/#psutil.cpu_times
Result for FB _before_ fix:
3.0.0.32483 (built 15-apr-2016):
38 27 49 26 27 30 26 27 46 28 50 26 46 26 27 26 26 26 48 48 // median = 26.875; max = 50
Result for FB _after_ fix:
3.0.1.32609 (built 27-sep-2016):
10 11 10 10 11 10 9 10 11 9 14 11 10 10 13 10 10 11 10 10 // median = 10; max = 13
All builds of nov-2021 (3.0, 4.0 and 5.0; checked SS/CS) have similar result to this:
build mode median max
3.0.8.33446 CS 14 17
3.0.8.33527 SS 14 16
4.0.0.2422 CS 13 14
4.0.1.2660 SS 12 15
5.0.0.311 SS 14 17
5.0.0.309 CS 12 16
Reasonable value of threshold can be equal to MAXIMAL of these values plus 2..3.
Checked on: 5.0.0.311, 5.0.0.309; 4.0.1.2660, 4.0.0.2422; 3.0.8.33527, 3.0.8.33446 (all: SS/CS)
############################################################################################
Old comments (before 16.11.2021):
_BEFORE_ this ticket was fixed ratio was following:
Ratio for 4.0.0.258: ~32...34 -- poor
Ratio for 3.0.1.32566: ~23...24 -- poor
_AFTER_ ticket was fixed ratio is:
Ratio for 4.0.0.313: ~11...13 -- OK
Ratio for 3.0.1.32568: ~10...11 -- OK
21.11.2021. Checked on Linux (after installing pakage psutil):
5.0.0.313 SS: 94.000s
4.0.1.2668 SS: 87.167s
3.0.8.33540 SS: 86.931s
Fix for 4.0 was 07-jul-2016, see here:
https://github.com/FirebirdSQL/firebird/commit/a75e0af175ea6e803101b5fd62ec91cdf039b951
Fix for 3.0 was 27-jul-2016, see here:
https://github.com/FirebirdSQL/firebird/commit/96a24228b61003e72c68596faf3c4c4ed0b95ea1
JIRA: CORE-5302
FBTEST: bugs.core_5302
[25.07.2022] pzotov
Checked on 3.0.8.33535, 4.0.1.2692, 5.0.0.591
"""
import psutil
import pytest
from firebird.qa import *
#--------------------------------------------------------------------
def median(lst):
n = len(lst)
s = sorted(lst)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
#--------------------------------------------------------------------
###########################
### S E T T I N G S ###
###########################
# Number of PSQL calls:
#
N_MEASURES = 21
# How many iterations must be done in each of stored procedures when they work.
# DO NOT set this value less then 20'000, otherwise ratios will be unreal.
#
N_COUNT_PER_MEASURE = 50000
# Maximal value for MEDIAN of ratios between CPU user time when comparison was made.
# Results of 25.07.2022: 3.0.8.33535: ~12; 4.0.1.2692: ~9.5; 5.0.0.591: ~13
#
INSERTS_TIME_MAX_RATIO = 20
#############################
db = db_factory()
act = python_act('db')
MSG_PREFIX = 'CPU time ratio for Indexed vs NON-indexed inserts: '
expected_stdout = f"""
{MSG_PREFIX}acceptable.
"""
@pytest.mark.version('>=3.0')
def test_1(act: Action, capsys):
sel_tables_ddl='''
recreate table test_non_idx(x int, y int, z int)^
recreate table test_indexed(x int, y int, z int)^
create index test_indexed_x on test_indexed(x)^
create index test_indexed_y on test_indexed(y)^
create index test_indexed_z on test_indexed(z)^
create index test_indexed_xy on test_indexed(x, y)^
create index test_indexed_xz on test_indexed(x, z)^
create index test_indexed_xyz on test_indexed(x, y, z)^
'''
sp_make_proc_ddl='''
create or alter procedure sp_%(x_name)s ( n_count int = 50000) as
begin
while (n_count > 0) do
begin
insert into test_%(x_name)s (x, y, z) values(:n_count, :n_count, :n_count);
n_count = n_count - 1;
end
end
'''
sp_drop_proc_ddl='''
create or alter procedure sp_%(x_name)s ( n_count int = 50000) as begin end
'''
name_suffixes = ( 'non_idx', 'indexed' )
# --------------------------------------
sp_time = {}
with act.db.connect() as con:
cur = con.cursor()
cur.execute('select mon$server_pid as p from mon$attachments where mon$attachment_id = current_connection')
fb_pid = int(cur.fetchone()[0])
for i in range(0, N_MEASURES):
for x_name in name_suffixes:
#--- recreate procs with empty bodies ---
cur.execute( sp_drop_proc_ddl % locals() )
for x in sel_tables_ddl.split('^'):
#--- recreate tables ---
if x.strip():
cur.execute( x.strip() )
for x_name in name_suffixes:
#--- recreate procs with normal bodies ---
cur.execute( sp_make_proc_ddl % locals() )
con.commit()
for x_name in name_suffixes:
fb_info_init = psutil.Process(fb_pid).cpu_times()
cur.callproc( 'sp_%(x_name)s' % locals(), (N_COUNT_PER_MEASURE,) )
fb_info_curr = psutil.Process(fb_pid).cpu_times()
sp_time[ x_name, i ] = max(fb_info_curr.user - fb_info_init.user, 0.000001)
con.commit()
# --------------------------------------
ratio_lst = []
for i in range(0, N_MEASURES):
ratio_lst.append( sp_time['indexed',i] / sp_time['non_idx',i] )
median_ratio = median(ratio_lst)
print( MSG_PREFIX + ('acceptable.' if median_ratio < INSERTS_TIME_MAX_RATIO else '/* perf_issue_tag */ POOR: %s, more than threshold: %s' % ( '{:9g}'.format(median_ratio), '{:9g}'.format(INSERTS_TIME_MAX_RATIO) ) ) )
if median_ratio >= INSERTS_TIME_MAX_RATIO:
print('Ratio statistics for %d measurements' % N_MEASURES)
for p in ratio_lst:
print( '%12.2f' % p )
act.expected_stdout = expected_stdout
act.stdout = capsys.readouterr().out
assert act.clean_stdout == act.clean_expected_stdout