firebird-qa/tests/bugs/core_5664_test.py

#coding:utf-8

"""
ID:          issue-5930
ISSUE:       5930
TITLE:       SIMILAR TO is substantially (500-700x) slower than LIKE on trivial pattern matches with VARCHAR data.
DESCRIPTION:
    21.11.2021. Totally re-implemented, package 'psutil' must be installed.

    We make two calls of psutil.Process(fb_pid).cpu_times() (before and after SQL code) and obtain CPU User Time
    values from each result.
    Difference between them can be considered as much more accurate performance estimation.

    On each calls of procedural code (see variable N_MEASURES) dozen execution of LIKE <pattern%> and
    SIMILAR_TO statements are performed (see variable N_COUNT_PER_MEASURE). Name of procedures which do work:
    'sp_like_test' and 'sp_sim2_test' (first of them uses 'LIKE' statement, second uses 'SIMILAR TO'). Both procedures
    uses the same data for handling.

    Each result (difference between cpu_times().user values when PSQL code finished) is added to the list.
    Finally, we evaluate MEDIAN of ratio values between cpu user time which was received for SIMILAR_TO and LIKE statements.
    If this median is less then threshold (see var. SIM2_LIKE_MAX_RATIO) then result can be considered as ACCEPTABLE.

    See also:
    https://psutil.readthedocs.io/en/latest/#psutil.cpu_times

    Confirmed bug on WI-T4.0.0.1575
    Checked on Windows:
        5.0.0.311 : 12.620s  ; 4.0.1.2660 : 13.388s.

    21.11.2021. Checked on Linux (after installing pakage psutil):
        5.0.0.313: 10.107s ; 4.0.1.2668: 8.519s ;
JIRA:        CORE-5664
FBTEST:      bugs.core_5664
"""

import pytest
from firebird.qa import *

db = db_factory()

act = python_act('db')

expected_stdout = """
    String STARTS WITH pattern, result: acceptable.
    String ENDS WITH pattern, result: acceptable.
"""

@pytest.mark.skip('FIXME: Not IMPLEMENTED')
@pytest.mark.version('>=4.0')
def test_1(act: Action):
    pytest.fail("Not IMPLEMENTED")

# test_script_1
#---
#
#
#  import os
#  import psutil
#
#  os.environ["ISC_USER"] = user_name
#  os.environ["ISC_PASSWORD"] = user_password
#
#  #------------------
#  def median(lst):
#      n = len(lst)
#      s = sorted(lst)
#      return (sum(s[n//2-1:n//2+1])/2.0, s[n//2])[n % 2] if n else None
#  #------------------
#
#  ###########################
#  ###   S E T T I N G S   ###
#  ###########################
#  # How many times we call PSQL code (two stored procedures:
#  # one for performing comparisons based on LIKE, second based on SIMILAR TO statements):
#  N_MEASURES = 30
#
#  # How many iterations must be done in each of stored procedures when they work.
#  # DO NOT set this value less then 10'000 otherwise lot of measures will last ~0 ms
#  # and we will not able to evaluate ratio properly:
#  #
#  N_COUNT_PER_MEASURE = 1000
#
#  # Maximal value for MEDIAN of ratios between CPU user time when comparison was made
#  # using SIMILAR TO vs LIKE. Lot of measurements show that SIMILAR_TO is FASTER then LIKE
#  # when handling long string and pattern that are used in this test, ratio is ~0.9 ... 1.1
#  #
#  SIM2_LIKE_MAX_RATIO = 5.0
#  ###########################
#
#
#  # Patternized code for creating TWO procedures:
#  # 1) for applying LIKE
#  # 2) for applying SIMILAR TO
#  ############################
#  sp_ddl = '''    create or alter procedure sp_%(sp_prefix)s_test (
#          n_count int
#         ,long_string_form  varchar(20) -- 'starts_with' | 'ends_with'
#      ) as
#          declare i int = 0;
#          declare long_text varchar(32761);
#          declare word_to_search varchar(16384);
#          declare pattern_to_chk varchar(32761);
#          declare v_guid varchar(32755);
#      begin
#          v_guid = lpad( '', 16384, uuid_to_char(gen_uuid()) );
#          v_guid = replace(v_guid,'-','');
#
#          -- ::: NB :::
#          -- Text that we want to search (word_to_search)
#          -- must either be in the beginning or at the end of string.
#          -- No sense to search text if it differs with string at first characters
#          -- because evaluating of result for STARTS_WITH will be instant in that case
#          -- (rather thanresult for ENDS_WITH).
#          -- Also, we have to remove '-' from both text and pattern because this is special
#          -- character in SIMILAR TO operator (we do this just for simplification of test).
#
#          word_to_search = replace(left(v_guid, 4096),'-','');
#
#          if ( long_string_form = 'starts_with' ) then
#              -- ............ "starts with" ..........
#              begin
#                  long_text = word_to_search || v_guid;
#                  pattern_to_chk = word_to_search || '%%'; ------------ 'ABCDEF' LIKE/SIMILAR_TO 'ABC<wildcard_char>'
#              end
#          else -- ............ "ends with" ..........
#              begin
#                  long_text = v_guid || word_to_search;
#                  pattern_to_chk = '%%' || word_to_search; ------------ 'ABCDEF' LIKE/SIMILAR_TO '<wildcard_char>DEF'
#              end
#
#          while (i < n_count) do
#          begin
#              -- ##############################################
#              -- evaluating result: applying LIKE or SIMILAR_TO
#              -- ##############################################
#              i = i + iif( long_text %(sp_statement)s pattern_to_chk, 1, 1);
#          end
#      end
#  '''
#
#  op_map = {'like' : 'LIKE', 'sim2' : 'SIMILAR TO'}
#  for sp_prefix,sp_statement in op_map.items():
#      db_conn.execute_immediate( sp_ddl % locals() )
#  db_conn.commit()
#
#  # Result: procedures with names: 'sp_like_test' and 'sp_sim2_test' have been created.
#  # Both of them use input parameter, n_count -- number of iterations.
#
#  cur=db_conn.cursor()
#  cur.execute('select mon$server_pid as p from mon$attachments where mon$attachment_id = current_connection')
#
#  fb_pid = int(cur.fetchone()[0])
#
#  # test_1: string ENDS with pattern (s like '%QWERTY' == vs== s similar to '%QWERTY' )
#  # test_2: string STARTS with pattern (s like 'QWERTY%' == vs == s similar to 'QWERTY%' )
#
#  sp_call_data = {}
#  for long_text_form in ('starts_with', 'ends_with'):
#      ratio_list = []
#
#      for i in range(0, N_MEASURES):
#          sp_time = {}
#          for sp_name in op_map.keys():
#              fb_info_init = psutil.Process(fb_pid).cpu_times()
#              cur.callproc('sp_' + sp_name + '_test', (N_COUNT_PER_MEASURE, long_text_form) )
#              fb_info_curr = psutil.Process(fb_pid).cpu_times()
#
#              sp_time[ sp_name ]  = max(fb_info_curr.user - fb_info_init.user, 0.000001)
#          try:
#              # print('measure: %5d' % i, 'ratio:', "{:9.2f}".format( sp_time['sim2'] / sp_time['like'] ), sp_time )
#              ratio_list.append( sp_time['sim2'] / sp_time['like'] )
#              sp_call_data[long_text_form, i] = (sp_time['sim2'], sp_time['like'])
#          except ZeroDivisionError as e:
#              print(e)
#              # print('sim2:', sp_time['sim2'], ';  like:', sp_time['like'])
#
#
#      # print( 'String form: "%s", median ratio: %s' % ( long_text_form, 'acceptable' if median(ratio_list) <= SIM2_LIKE_MAX_RATIO else 'TOO BIG: ' + str(median(ratio_list))  ) )
#
#      if median(ratio_list) <= SIM2_LIKE_MAX_RATIO:
#          print('String %s pattern, result: acceptable.' % long_text_form.upper().replace('_',' '))
#      else:
#          print('THE SEARCH WAS TOO SLOW when string %s pattern' % long_text_form.upper().replace('_',' '))
#          print("\\nCheck sp_call_data values (k=[long_text_form, i], v = (sp_time['sim2'], sp_time['like'])):" )
#          for k,v in sorted(sp_call_data.items()):
#              print(k,':',v, '; ratio:', v[0]/v[1])
#
#          print('\\nCheck ratio values:')
#          for i,p in enumerate(ratio_list):
#              print( "%d : %12.2f" % (i,p) )
#          print('\\nMedian value: %12.2f' % median(ratio_list))
#
#  cur.close()
#
#
#---