2021-12-30 19:43:52 +01:00
#coding:utf-8
2022-01-27 20:08:36 +01:00
"""
ID : issue - 7038
ISSUE : 7038
TITLE : Improve performance of STARTING WITH with insensitive collations
DESCRIPTION :
21.11 .2021 . Totally re - implemented , package ' psutil ' must be installed .
We make two calls of psutil . Process ( fb_pid ) . cpu_times ( ) ( before and after SQL code ) and obtain CPU User Time
values from each result .
Difference between them can be considered as much more accurate performance estimation .
We make < N_MEASURES > calls of two stored procedures with names : ' sp_ptbr_test ' and ' sp_utf8_test ' .
Each procedure has input argument which is required number of iterations with evaluation result of STARTING WITH .
Number of iterations it set in variable N_COUNT_PER_MEASURE .
Each result ( difference between cpu_times ( ) . user values when apropriate procedure finishes ) is added to the list .
Finally , we evaluate MEDIAN of ration between cpu user time which was received for SIMILAR_TO and LIKE statements .
If this median is less then threshold ( see var . UTF8_TO_PTBR_MAX_RATIO ) then result can be considered as ACCEPTABLE .
See also : https : / / psutil . readthedocs . io / en / latest / #psutil.cpu_times
Confirmed poor perfromance on 5.0 .0 .279 : ratio median is about 1.8 .
Checked on 5.0 .0 .298 ( intermediate build , date : 05.11 .2021 13 : 10 ) - performance is better , ratio is about 1.35 .
Example of data ( for 15 calls ) :
[ 1.36 , 1.36 , 1.33 , 1.35 , 1.28 , 1.36 , 1.36 , 1.37 , 1.36 , 1.33 , 1.30 , 1.79 , 1.31 , 1.30 , 1.30 ]
21.11 .2021 . Checked on Linux ( after installing pakage psutil ) :
5.0 .0 .313 SS : 17.076 s
5.0 .0 .313 CS : median = 1.51163 , data : 1.56 , 1.40 , 1.52 , 1.50 , 1.63 , 1.53 , 1.52 , 1.28 , 1.40 , 1.75 , 1.45 , 1.43 , 1.51 , 1.48 , 1.52 .
2022-02-02 15:46:19 +01:00
FBTEST : bugs . gh_7038
2022-07-21 22:34:34 +02:00
NOTES :
[ 21.07 .2022 ] pzotov
Checked on 5.0 .0 .591
2022-01-27 20:08:36 +01:00
"""
2022-07-21 22:34:34 +02:00
import os
import psutil
2021-12-30 19:43:52 +01:00
import pytest
2022-01-27 20:08:36 +01:00
from firebird . qa import *
2021-12-30 19:43:52 +01:00
2022-07-21 22:34:34 +02:00
#--------------------------------------------------------------------
def median ( lst ) :
n = len ( lst )
s = sorted ( lst )
return ( sum ( s [ n / / 2 - 1 : n / / 2 + 1 ] ) / 2.0 , s [ n / / 2 ] ) [ n % 2 ] if n else None
#--------------------------------------------------------------------
###########################
### S E T T I N G S ###
###########################
# How many times we call PSQL code (two stored procedures:
# one for performing comparisons based on LIKE, second based on SIMILAR TO statements):
N_MEASURES = 21
# How many iterations must be done in each of stored procedures when they work:
N_COUNT_PER_MEASURE = 1000000
# Maximal value for MEDIAN of ratios between CPU user time when comparison was made.
#
2021-12-30 19:43:52 +01:00
2022-07-21 22:34:34 +02:00
UTF8_TO_PTBR_MAX_RATIO = 1.45 if os . name == ' nt ' else 1.85
#############################
init_script = \
'''
set term ^ ;
create or alter procedure sp_ptbr_test (
n_count int
) as
declare p varchar ( 1 ) character set win1252 collate win_ptbr = ' x ' ;
declare s varchar ( 60 ) character set win1252 collate win_ptbr = ' x12345678901234567890123456789012345678901234567890123456789 ' ;
declare b boolean ;
begin
while ( n_count > 0 )
do
begin
b = s starting with p ;
n_count = n_count - 1 ;
end
end
^
create or alter procedure sp_utf8_test (
n_count int
) as
declare p varchar ( 1 ) character set utf8 collate unicode_ci = ' x ' ;
declare s varchar ( 60 ) character set utf8 collate unicode_ci = ' x12345678901234567890123456789012345678901234567890123456789 ' ;
declare b boolean ;
begin
while ( n_count > 0 )
do
begin
b = s starting with p ;
n_count = n_count - 1 ;
end
end
^
commit
^
'''
db = db_factory ( init = init_script )
2022-01-27 20:08:36 +01:00
act = python_act ( ' db ' )
2021-12-30 19:43:52 +01:00
2022-01-27 20:08:36 +01:00
expected_stdout = """
Duration ratio : acceptable
"""
2021-12-30 19:43:52 +01:00
2022-01-27 20:08:36 +01:00
@pytest.mark.version ( ' >=5.0 ' )
2022-07-21 22:34:34 +02:00
def test_1 ( act : Action , capsys ) :
with act . db . connect ( ) as con :
cur = con . cursor ( )
cur . execute ( ' select mon$server_pid as p from mon$attachments where mon$attachment_id = current_connection ' )
2021-12-30 19:43:52 +01:00
2022-07-21 22:34:34 +02:00
fb_pid = int ( cur . fetchone ( ) [ 0 ] )
sp_time = { }
for i in range ( 0 , N_MEASURES ) :
for x_charset in ( ' ptbr ' , ' utf8 ' ) :
fb_info_init = psutil . Process ( fb_pid ) . cpu_times ( )
cur . callproc ( ' sp_ ' + x_charset + ' _test ' , ( N_COUNT_PER_MEASURE , ) )
fb_info_curr = psutil . Process ( fb_pid ) . cpu_times ( )
sp_time [ x_charset , i ] = max ( fb_info_curr . user - fb_info_init . user , 0.000001 )
#print( 'String form: "%s", median ratio: %s' % ( x_charset, 'acceptable' if median(ratio_list) <= UTF8_TO_PTBR_MAX_RATIO else 'TOO BIG: ' + str(median(ratio_list)) ) )
ratio_lst = [ ]
for i in range ( 0 , N_MEASURES ) :
ratio_lst . append ( sp_time [ ' utf8 ' , i ] / sp_time [ ' ptbr ' , i ] )
median_ratio = median ( ratio_lst )
print ( ' Duration ratio: ' + ( ' acceptable ' if median_ratio < UTF8_TO_PTBR_MAX_RATIO else ' POOR: %s , more than threshold: %s ' % ( ' {:9g} ' . format ( median_ratio ) , ' {:9g} ' . format ( UTF8_TO_PTBR_MAX_RATIO ) ) ) )
if median_ratio > = UTF8_TO_PTBR_MAX_RATIO :
print ( ' Ratio statistics for %d measurements ' % N_MEASURES )
for p in ratio_lst :
print ( ' %12.2f ' % p )
act . expected_stdout = expected_stdout
act . stdout = capsys . readouterr ( ) . out
assert act . clean_stdout == act . clean_expected_stdout