6
0
mirror of https://github.com/FirebirdSQL/firebird-qa.git synced 2025-01-22 21:43:06 +01:00
firebird-qa/tests/bugs/core_6542_test.py

157 lines
5.9 KiB
Python

#coding:utf-8
"""
ID: issue-6769
ISSUE: 6769
TITLE: More efficient implementation of SUBSTRING for UTF8 character set
DESCRIPTION:
21.11.2021. Totally re-implemented, package 'psutil' must be installed.
We make two calls of psutil.Process(fb_pid).cpu_times() (before and after SQL code) and obtain CPU User Time
values from each result.
Difference between them can be considered as much more accurate performance estimation.
We make <N_MEASURES> calls of two stored procedures with names: 'sp_ufss_test' and 'sp_utf8_test'.
Each procedure has input argument which is required number of iterations with evaluation result of SUBSTRING.
Number of iterations it set in variable N_COUNT_PER_MEASURE.
Each result (difference between cpu_times().user values when apropriate procedure finishes) is added to the list.
Finally, we evaluate MEDIAN of ratios between cpu user time which was received for UTF8 and UNICODE_FSS charsets.
If this median is less then threshold (see var. UTF8_TO_UFSS_MAX_RATIO) then result can be considered as ACCEPTABLE.
See also: https://psutil.readthedocs.io/en/latest/#psutil.cpu_times
Confirmed poor performance on 3.0.8.33446 (16.04.2021) and 4.0.0.2422 (15.04.2021):
median ratios are: 15.57 for FB-3 and 13.57 for FB-4.
Checked on Windows, builds: 3.0.8.33527, 4.0.1.2660, 5.0.0.310.
Result: median ratios are 1.0 in all cases.
When heavy workload (created by another apps) exist, threshold must be increased up to ~1.3.
21.11.2021. Checked on Linux (after installing pakage psutil):
5.0.0.313 SS: 4.159s
4.0.1.2668 SS: 3.786s
3.0.8.33540 SS: 3.274s
Old comment about other string functions (data obtained using datediff() values): // DO NOT DELETE!
=================
SUBSTRING remains most problematic from performance POV.
Ratio TIME_UTF8 / TIME_UFSS for other string functions (checked on 4.0.0.2448):
right(..., 10): 0.4; right(..., 8000): 0.4
left(...,10): 1.14; left(..., 8000): 0.3
reverse(): 0.7
upper() / lower(): 0.576 ... 0.635
position(): 0.5
trim(): 0.35
octet_length(): 1.00
char_length(): 0.632
bit_length(): 1.00
ascii_val(): 1.00
hash(): 1.00
overlay(): 0.31
replace(): 0.5
lpad()/rpad(): 0.73
=================
JIRA: CORE-6542
FBTEST: bugs.core_6542
NOTES:
[20.06.2022] pzotov
Checked on 4.0.1.2692, 3.0.8.33535 -- both Windows and Linux.
"""
import os
import psutil
import time
import pytest
from firebird.qa import *
#--------------------------------------------------------------------
def median(lst):
n = len(lst)
s = sorted(lst)
return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
#--------------------------------------------------------------------
db = db_factory()
act = python_act('db')
expected_stdout = """
Duration ratio: acceptable
"""
#@pytest.mark.version('>=3.0.8')
@pytest.mark.version('>=3.0')
def test_1(act: Action, capsys):
###########################
### S E T T I N G S ###
###########################
# How many times we call PSQL code (two stored procedures:
# one for performing comparisons based on LIKE, second based on SIMILAR TO statements):
N_MEASURES = 31
# How many iterations must be done in each of stored procedures when they work:
N_COUNT_PER_MEASURE = 100000
# Maximal value for MEDIAN of ratios between CPU user time when comparison was made.
#
UTF8_TO_UFSS_MAX_RATIO = 3.00
#############################
cset_map = {'sp_ufss': 'unicode_fss', 'sp_utf8': 'utf8'}
with act.db.connect() as con:
for k,v in cset_map.items():
cset_brief = k
cset_name = v
sp_cset_ddl = f''' create or alter procedure {cset_brief}_test (
n_count int = 100000
) as
declare str1 varchar(8000) character set {cset_name}; -- unicode_fss / utf8
declare str2 varchar(10) character set {cset_name}; -- unicode_fss / utf8
begin
str1 = lpad('abcd', 8000, '--');
while (n_count > 0) do
begin
str2 = substring(str1 from 1 for 10);
n_count = n_count - 1;
end
end
'''
con.execute_immediate(sp_cset_ddl)
con.commit()
sp_time = {}
with act.db.connect() as con:
cur=con.cursor()
cur.execute('select mon$server_pid as p from mon$attachments where mon$attachment_id = current_connection')
fb_pid = int(cur.fetchone()[0])
for i in range(0, N_MEASURES):
for x_charset in cset_map.keys():
fb_info_init = psutil.Process(fb_pid).cpu_times()
cur.callproc( x_charset + '_test', (N_COUNT_PER_MEASURE,) )
fb_info_curr = psutil.Process(fb_pid).cpu_times()
sp_time[ x_charset, i ] = max(fb_info_curr.user - fb_info_init.user, 0.000001)
ratio_lst = []
for i in range(0, N_MEASURES):
ratio_lst.append( sp_time['sp_utf8',i] / sp_time['sp_ufss',i] )
median_ratio = median(ratio_lst)
print( 'Duration ratio: ' + ('acceptable' if median_ratio < UTF8_TO_UFSS_MAX_RATIO else '/* perf_issue_tag */ POOR: %12.2f, more than threshold: %12.2f' % ( median_ratio, UTF8_TO_UFSS_MAX_RATIO ) ) )
if median_ratio >= UTF8_TO_UFSS_MAX_RATIO:
print('Ratio statistics for %d measurements' % N_MEASURES)
for p in ratio_lst:
print( '%12.2f' % p )
act.expected_stdout = expected_stdout
act.stdout = capsys.readouterr().out
assert act.clean_stdout == act.clean_expected_stdout