mirror of
https://github.com/FirebirdSQL/firebird-qa.git
synced 2025-01-22 21:43:06 +01:00
197 lines
7.8 KiB
Python
197 lines
7.8 KiB
Python
#coding:utf-8
|
|
|
|
"""
|
|
ID: issue-5930
|
|
ISSUE: 5930
|
|
TITLE: SIMILAR TO is substantially (500-700x) slower than LIKE on trivial pattern matches with VARCHAR data.
|
|
DESCRIPTION:
|
|
21.11.2021. Totally re-implemented, package 'psutil' must be installed.
|
|
|
|
We make two calls of psutil.Process(fb_pid).cpu_times() (before and after SQL code) and obtain CPU User Time
|
|
values from each result.
|
|
Difference between them can be considered as much more accurate performance estimation.
|
|
|
|
On each calls of procedural code (see variable N_MEASURES) dozen execution of LIKE <pattern%> and
|
|
SIMILAR_TO statements are performed (see variable N_COUNT_PER_MEASURE). Name of procedures which do work:
|
|
'sp_like_test' and 'sp_sim2_test' (first of them uses 'LIKE' statement, second uses 'SIMILAR TO'). Both procedures
|
|
uses the same data for handling.
|
|
|
|
Each result (difference between cpu_times().user values when PSQL code finished) is added to the list.
|
|
Finally, we evaluate MEDIAN of ratio values between cpu user time which was received for SIMILAR_TO and LIKE statements.
|
|
If this median is less then threshold (see var. SIM2_LIKE_MAX_RATIO) then result can be considered as ACCEPTABLE.
|
|
|
|
See also:
|
|
https://psutil.readthedocs.io/en/latest/#psutil.cpu_times
|
|
|
|
Confirmed bug on WI-T4.0.0.1575
|
|
Checked on Windows:
|
|
5.0.0.311 : 12.620s ; 4.0.1.2660 : 13.388s.
|
|
|
|
21.11.2021. Checked on Linux (after installing pakage psutil):
|
|
5.0.0.313: 10.107s ; 4.0.1.2668: 8.519s ;
|
|
JIRA: CORE-5664
|
|
FBTEST: bugs.core_5664
|
|
"""
|
|
|
|
import pytest
|
|
from firebird.qa import *
|
|
|
|
db = db_factory()
|
|
|
|
act = python_act('db')
|
|
|
|
expected_stdout = """
|
|
String STARTS WITH pattern, result: acceptable.
|
|
String ENDS WITH pattern, result: acceptable.
|
|
"""
|
|
|
|
@pytest.mark.skip('FIXME: Not IMPLEMENTED')
|
|
@pytest.mark.version('>=4.0')
|
|
def test_1(act: Action):
|
|
pytest.fail("Not IMPLEMENTED")
|
|
|
|
# test_script_1
|
|
#---
|
|
#
|
|
#
|
|
# import os
|
|
# import psutil
|
|
#
|
|
# os.environ["ISC_USER"] = user_name
|
|
# os.environ["ISC_PASSWORD"] = user_password
|
|
#
|
|
# #------------------
|
|
# def median(lst):
|
|
# n = len(lst)
|
|
# s = sorted(lst)
|
|
# return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
|
|
# #------------------
|
|
#
|
|
# ###########################
|
|
# ### S E T T I N G S ###
|
|
# ###########################
|
|
# # How many times we call PSQL code (two stored procedures:
|
|
# # one for performing comparisons based on LIKE, second based on SIMILAR TO statements):
|
|
# N_MEASURES = 30
|
|
#
|
|
# # How many iterations must be done in each of stored procedures when they work.
|
|
# # DO NOT set this value less then 10'000 otherwise lot of measures will last ~0 ms
|
|
# # and we will not able to evaluate ratio properly:
|
|
# #
|
|
# N_COUNT_PER_MEASURE = 1000
|
|
#
|
|
# # Maximal value for MEDIAN of ratios between CPU user time when comparison was made
|
|
# # using SIMILAR TO vs LIKE. Lot of measurements show that SIMILAR_TO is FASTER then LIKE
|
|
# # when handling long string and pattern that are used in this test, ratio is ~0.9 ... 1.1
|
|
# #
|
|
# SIM2_LIKE_MAX_RATIO = 5.0
|
|
# ###########################
|
|
#
|
|
#
|
|
# # Patternized code for creating TWO procedures:
|
|
# # 1) for applying LIKE
|
|
# # 2) for applying SIMILAR TO
|
|
# ############################
|
|
# sp_ddl = ''' create or alter procedure sp_%(sp_prefix)s_test (
|
|
# n_count int
|
|
# ,long_string_form varchar(20) -- 'starts_with' | 'ends_with'
|
|
# ) as
|
|
# declare i int = 0;
|
|
# declare long_text varchar(32761);
|
|
# declare word_to_search varchar(16384);
|
|
# declare pattern_to_chk varchar(32761);
|
|
# declare v_guid varchar(32755);
|
|
# begin
|
|
# v_guid = lpad( '', 16384, uuid_to_char(gen_uuid()) );
|
|
# v_guid = replace(v_guid,'-','');
|
|
#
|
|
# -- ::: NB :::
|
|
# -- Text that we want to search (word_to_search)
|
|
# -- must either be in the beginning or at the end of string.
|
|
# -- No sense to search text if it differs with string at first characters
|
|
# -- because evaluating of result for STARTS_WITH will be instant in that case
|
|
# -- (rather thanresult for ENDS_WITH).
|
|
# -- Also, we have to remove '-' from both text and pattern because this is special
|
|
# -- character in SIMILAR TO operator (we do this just for simplification of test).
|
|
#
|
|
# word_to_search = replace(left(v_guid, 4096),'-','');
|
|
#
|
|
# if ( long_string_form = 'starts_with' ) then
|
|
# -- ............ "starts with" ..........
|
|
# begin
|
|
# long_text = word_to_search || v_guid;
|
|
# pattern_to_chk = word_to_search || '%%'; ------------ 'ABCDEF' LIKE/SIMILAR_TO 'ABC<wildcard_char>'
|
|
# end
|
|
# else -- ............ "ends with" ..........
|
|
# begin
|
|
# long_text = v_guid || word_to_search;
|
|
# pattern_to_chk = '%%' || word_to_search; ------------ 'ABCDEF' LIKE/SIMILAR_TO '<wildcard_char>DEF'
|
|
# end
|
|
#
|
|
# while (i < n_count) do
|
|
# begin
|
|
# -- ##############################################
|
|
# -- evaluating result: applying LIKE or SIMILAR_TO
|
|
# -- ##############################################
|
|
# i = i + iif( long_text %(sp_statement)s pattern_to_chk, 1, 1);
|
|
# end
|
|
# end
|
|
# '''
|
|
#
|
|
# op_map = {'like' : 'LIKE', 'sim2' : 'SIMILAR TO'}
|
|
# for sp_prefix,sp_statement in op_map.items():
|
|
# db_conn.execute_immediate( sp_ddl % locals() )
|
|
# db_conn.commit()
|
|
#
|
|
# # Result: procedures with names: 'sp_like_test' and 'sp_sim2_test' have been created.
|
|
# # Both of them use input parameter, n_count -- number of iterations.
|
|
#
|
|
# cur=db_conn.cursor()
|
|
# cur.execute('select mon$server_pid as p from mon$attachments where mon$attachment_id = current_connection')
|
|
#
|
|
# fb_pid = int(cur.fetchone()[0])
|
|
#
|
|
# # test_1: string ENDS with pattern (s like '%QWERTY' == vs== s similar to '%QWERTY' )
|
|
# # test_2: string STARTS with pattern (s like 'QWERTY%' == vs == s similar to 'QWERTY%' )
|
|
#
|
|
# sp_call_data = {}
|
|
# for long_text_form in ('starts_with', 'ends_with'):
|
|
# ratio_list = []
|
|
#
|
|
# for i in range(0, N_MEASURES):
|
|
# sp_time = {}
|
|
# for sp_name in op_map.keys():
|
|
# fb_info_init = psutil.Process(fb_pid).cpu_times()
|
|
# cur.callproc('sp_' + sp_name + '_test', (N_COUNT_PER_MEASURE, long_text_form) )
|
|
# fb_info_curr = psutil.Process(fb_pid).cpu_times()
|
|
#
|
|
# sp_time[ sp_name ] = max(fb_info_curr.user - fb_info_init.user, 0.000001)
|
|
# try:
|
|
# # print('measure: %5d' % i, 'ratio:', "{:9.2f}".format( sp_time['sim2'] / sp_time['like'] ), sp_time )
|
|
# ratio_list.append( sp_time['sim2'] / sp_time['like'] )
|
|
# sp_call_data[long_text_form, i] = (sp_time['sim2'], sp_time['like'])
|
|
# except ZeroDivisionError as e:
|
|
# print(e)
|
|
# # print('sim2:', sp_time['sim2'], '; like:', sp_time['like'])
|
|
#
|
|
#
|
|
# # print( 'String form: "%s", median ratio: %s' % ( long_text_form, 'acceptable' if median(ratio_list) <= SIM2_LIKE_MAX_RATIO else 'TOO BIG: ' + str(median(ratio_list)) ) )
|
|
#
|
|
# if median(ratio_list) <= SIM2_LIKE_MAX_RATIO:
|
|
# print('String %s pattern, result: acceptable.' % long_text_form.upper().replace('_',' '))
|
|
# else:
|
|
# print('THE SEARCH WAS TOO SLOW when string %s pattern' % long_text_form.upper().replace('_',' '))
|
|
# print("\\nCheck sp_call_data values (k=[long_text_form, i], v = (sp_time['sim2'], sp_time['like'])):" )
|
|
# for k,v in sorted(sp_call_data.items()):
|
|
# print(k,':',v, '; ratio:', v[0]/v[1])
|
|
#
|
|
# print('\\nCheck ratio values:')
|
|
# for i,p in enumerate(ratio_list):
|
|
# print( "%d : %12.2f" % (i,p) )
|
|
# print('\\nMedian value: %12.2f' % median(ratio_list))
|
|
#
|
|
# cur.close()
|
|
#
|
|
#
|
|
#---
|