From 69bc95847aee340178ac866b19159de7d68054e1 Mon Sep 17 00:00:00 2001 From: pavel-zotov Date: Mon, 20 May 2024 13:07:39 +0300 Subject: [PATCH] Renamed file --- tests/bugs/gh_4723_all_nulls_test.py | 112 +++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/bugs/gh_4723_all_nulls_test.py diff --git a/tests/bugs/gh_4723_all_nulls_test.py b/tests/bugs/gh_4723_all_nulls_test.py new file mode 100644 index 00000000..1d7ddda2 --- /dev/null +++ b/tests/bugs/gh_4723_all_nulls_test.py @@ -0,0 +1,112 @@ +#coding:utf-8 + +""" +ID: issue-4723 +ISSUE: https://github.com/FirebirdSQL/firebird/issues/4723 +TITLE: Optimize the record-level RLE algorithm for a denser compression of shorter-than-declared strings and sets of subsequent NULLs [CORE4401] +DESCRIPTION: + Test creates table with nullable varchar column an adds lot of rows with NULL value. + Then we run gstat in order to parse statistics related to data pages and avg fill ratio. + gstat reports for data pages and avg ratio following values: + 4.0.5.3099: + Pointer pages: 2, data page slots: 2144 + Data pages: 2144, average fill: 91% + 5.0.1.1399, 6.0.0.351: + Pointer pages: 1, data page slots: 208 + Data pages: 208, average fill: 46% + Test assumes that values returned for 5.x will not be change in too wide range for several upcoming years + in any order - see MIN_* and MAX_* thresholds. +NOTES: + [20.05.2024] pzotov + Improvement URL (27-sep-2022 15:16): + https://github.com/FirebirdSQL/firebird/commit/54f1990b98d3e510a10d06fe9ceb76456804da52 + Improved record compression (denser encoding of repeating bytes and less blocks) (#7302) + + NB: snapshots that were just before and after this commit CAN NOT be verified: + 5.0.0.745: raised BUGCHECK ("decompression overran buffer (179), file: sqz.cpp line: 293") + 5.0.0.756: crashed + Checked on 5.0.1.1399, 6.0.0.351 for DB with page_size = 8192. +""" +import re + +import pytest +import platform +from firebird.qa import * + +N_ROWS = 30000 +N_WIDT = 32760 + +MIN_DP_COUNT_THRESHOLD = 190 +MAX_DP_COUNT_THRESHOLD = 230 +MIN_AVG_FILL_THRESHOLD = 30 +MAX_AVG_FILL_THRESHOLD = 60 + +init_ddl = f""" + recreate table test (f01 varchar({N_WIDT})); + commit; + + set term ^; + execute block as + declare n int = {N_ROWS}; + begin + while (n > 0) do + begin + insert into test(f01) values(null); + n = n - 1; + end + end + ^ + set term ;^ + commit; +""" + +db = db_factory(page_size = 8192, init = init_ddl) +act = python_act('db') + +@pytest.mark.version('>=5.0') +def test_1(act: Action, capsys): + + act.gstat(switches=['-d','-t', 'TEST', '-user', act.db.user, '-pass', act.db.password]) + + # 4.x: Pointer pages: 2, data page slots: 2144 + # 5.x: Pointer pages: 1, data page slots: 208 + p_pointer_pages_data_pages_slots = re.compile( r'Pointer\s+pages(:)?\s+\d+(,)?\s+data\s+page\s+slots(:)?\s+\d+' ) + + # Data pages: 208, average fill: 46% + p_data_pages_average_fill_ratio = re.compile( r'Data\s+pages(:)?\s+\d+(,)?\s+average\s+fill(:)?\s+\d+%' ) + + data_pages_cnt = avg_fill_ratio = -1 + gstat_lines = act.stdout.splitlines() + for line in gstat_lines: + #print(line) + if p_pointer_pages_data_pages_slots.search(line): + data_pages_cnt = int(line.split()[-1]) + if p_data_pages_average_fill_ratio.search(line): + avg_fill_ratio = int(line.split()[-1].replace('%','')) + + + data_pages_cnt_expected_msg = f'data_pages_cnt: expected, within {MIN_DP_COUNT_THRESHOLD=} ... {MAX_DP_COUNT_THRESHOLD=}' + avg_fill_ratio_expected_msg = f'avg_fill_ratio: expected, within {MIN_AVG_FILL_THRESHOLD=} ... {MAX_AVG_FILL_THRESHOLD=}' + if data_pages_cnt > 0 and avg_fill_ratio > 0: + if data_pages_cnt >= MIN_DP_COUNT_THRESHOLD and data_pages_cnt <= MAX_DP_COUNT_THRESHOLD: + print(data_pages_cnt_expected_msg) + else: + print(f'data_pages_cnt UNEXPECTED: {data_pages_cnt=} -- out of scope: {MIN_DP_COUNT_THRESHOLD=} ... {MAX_DP_COUNT_THRESHOLD=}') + + if avg_fill_ratio >= MIN_AVG_FILL_THRESHOLD and avg_fill_ratio <= MAX_AVG_FILL_THRESHOLD: + print(avg_fill_ratio_expected_msg) + else: + print(f'avg_fill_ratio UNEXPECTED: {avg_fill_ratio=} -- out of scope: {MIN_AVG_FILL_THRESHOLD=} ... {MAX_AVG_FILL_THRESHOLD=}') + else: + print(f'ERROR: at least one of: {data_pages_cnt=}, {avg_fill_ratio=} is INVALID.') + print('Could not properly parse gstat output:') + for p in gstat_lines: + print(p) + + act.expected_stdout = f""" + {data_pages_cnt_expected_msg} + {avg_fill_ratio_expected_msg} + """ + act.stdout = capsys.readouterr().out + assert act.clean_stdout == act.clean_expected_stdout +