6
0
mirror of https://github.com/FirebirdSQL/firebird-qa.git synced 2025-01-23 14:03:06 +01:00
firebird-qa/tests/bugs/core_5501_test.py

353 lines
15 KiB
Python
Raw Normal View History

2021-04-26 20:07:00 +02:00
#coding:utf-8
2022-01-25 22:55:48 +01:00
"""
ID: issue-5770
ISSUE: 5770
TITLE: Unclear gstat's diagnostic when damaged page in DB file appears encrypted
DESCRIPTION:
Test creates table 'TEST' with varchar and blob fields, + index on varchar, and add some data to it.
Blob field is filled by long values in order to prevent acomodation of its content within data pages.
As result, this table should have pages of three different types: DataPage, BTreePage and BlobPage.
Then we find number of first PP of this table by scrolling RDB$PAGES join RDB$RELATIONS result set.
After this we:
* define type of every page starting from first PP for 'TEST' table and up to total pages of DB,
and doing this for each subsequent page, until ALL THREE different page types will be detected:
1) data page, 2) index B-Tree and 3) blob page.
These page numbers are stored in variables: (brk_datapage, brk_indxpage, brk_blobpage).
When all three page numbers are found, loop is terminated;
* close connection and open dB as binary file for reading and writing;
* store previous content of .fdb in variable 'raw_db_content' (for further restore);
* move file seek pointer at the beginning of every page from list: (brk_datapage, brk_indxpage, brk_blobpage);
* BREAK page content by writing invalid binary data in the header of page;
This invalid data are: bytes 0...7 ==> 0xFFAACCEEBB0000CC; bytes 8...15 ==> 0xDDEEAADDCC00DDEE;
* Close DB file handle and:
** 1) run 'gstat -e';
** 2) run online validation;
* open DB file again as binary and restore its content from var. 'raw_db_content' in order
fbtest framework could finish this test (by making connect and drop this database);
KEY POINTS:
* report of 'gstat -e' should contain line with text 'ENCRYPTED 3 (DB problem!)'
(number '3' should present becase we damaged pages of THREE diff. types: DP, BTree and Blob).
* report of online validation should contain lines with info about three diff. page types which have problems.
JIRA: CORE-5501
FBTEST: bugs.core_5501
NOTES:
[08.12.2021] pcisar
Reimplementation does not work as expected on Linux FB 4.0 and 3.0.8
gstat output:
Data pages: total 97, encrypted 0, non-crypted 97
Index pages: total 85, encrypted 0, non-crypted 85
Blob pages: total 199, encrypted 0, non-crypted 199
Generator pages: total 1, encrypted 0, non-crypted 1
Validation does not report BLOB page errors, only data and index corruptions.
[18.09.2022] pzotov
Probably old-style bytesarreay was the reason of why pages were not considered by gstat as of unknown type.
Decided to replace is with 'really random content, see 'os.urandom(<length>)'
This is the only change, and after it was done test works fine.
2022-01-25 22:55:48 +01:00
Checked on 3.0.8.33535 (SS/CS), 4.0.1.2692 (SS/CS), 5.0.0.730 (SS/CS) - both Linux and Windows.
"""
#from __future__ import annotations
import os
import time
from typing import Dict
2021-04-26 20:07:00 +02:00
import pytest
import re
from struct import unpack_from
2022-01-25 22:55:48 +01:00
from firebird.qa import *
from firebird.driver import Connection
2021-04-26 20:07:00 +02:00
2022-01-25 22:55:48 +01:00
init_script = """
2021-04-26 20:07:00 +02:00
alter database drop linger;
commit;
create table test(s varchar(1000) unique using index test_s_unq, b blob);
commit;
set count on;
insert into test(s, b)
select
rpad( '',1000, uuid_to_char(gen_uuid()) ),
rpad( '',
2021-04-26 20:07:00 +02:00
10000, -- NB: blob should have a big size! It should NOT be stored withih a data page.
'qwertyuioplkjhgfdsazxcvbnm0987654321')
2021-04-26 20:07:00 +02:00
from rdb$types
rows 100;
commit;
"""
2021-04-26 20:07:00 +02:00
2022-01-25 22:55:48 +01:00
db = db_factory(init=init_script)
substitutions=[
('total \\d+,', 'total'),
('non-crypted \\d+', 'non-crypted'),
('crypted \\d+', 'crypted'),
('Other pages.*', ''),
]
act = python_act('db', substitutions = substitutions)
2022-01-25 22:55:48 +01:00
expected_stdout = """
Data pages: total 63, encrypted 0, non-crypted 63
Index pages: total 88, encrypted 0, non-crypted 88
Blob pages: total 199, encrypted 0, non-crypted 199
Other pages: total 115, ENCRYPTED 3 (DB problem!), non-crypted 112
Detected all THREE page types with problem => YES
"""
PAGE_TYPES = {0: "undef/free",
1: "DB header",
2: "PIP",
3: "TIP",
4: "Pntr Page",
5: "Data Page",
6: "Indx Root",
7: "Indx Data",
8: "Blob Page",
9: "Gens Page",
10: "SCN" # only for ODS>=12
}
def fill_dbo(con: Connection, map_dbo: Dict):
cur = con.cursor()
sql = """
select rel_id, rel_name, idx_id, idx_name
from (
select
rr.rdb$relation_id rel_id, -- 0
rr.rdb$relation_name rel_name, -- 1
-1 idx_id, -- 2
'' idx_name, -- 3
rr.rdb$relation_type rel_type,
rr.rdb$system_flag sys_flag
from rdb$relations rr
union all
select
rr.rdb$relation_id rel_id, -- 0
rr.rdb$relation_name rel_name, -- 1
coalesce(ri.rdb$index_id-1,-1) idx_id, -- 2
coalesce(ri.rdb$index_name,'') idx_name, -- 3
rr.rdb$relation_type rel_type,
rr.rdb$system_flag sys_flag
from rdb$relations rr
join rdb$indices ri on
rr.rdb$relation_name = ri.rdb$relation_name
) r
where
coalesce(r.rel_type,0) = 0 -- exclude views, GTT and external tables
and r.sys_flag is distinct from 1
"""
cur.execute(sql)
for r in cur:
map_dbo[r[0], r[2]] = (r[1].strip(), r[3].strip())
def parse_page_header(con: Connection, page_number: int, map_dbo: Dict):
page_buffer = con.info.get_page_content(page_number)
# dimitr, 20.01.2017 ~13:00
# all *CHAR = 1 byte, *SHORT = 2 bytes, *LONG = 4 bytes.
# https://docs.python.org/2/library/struct.html
# struct.unpack_from(fmt, buffer[, offset=0])
# Unpack the buffer according to the given format.
# The result is a tuple even if it contains exactly one item.
# The buffer must contain at least the amount of data required by the format
# len(buffer[offset:]) must be at least calcsize(fmt).
# First character of the format string can be used to indicate the byte order,
# size and alignment of the packed data
# Native byte order is big-endian or little-endian:
# < little-endian
# > big-endian
# Intel x86 and AMD64 (x86-64) are little-endian
# Use sys.byteorder to check the endianness of your system:
# https://docs.python.org/2/library/struct.html#format-characters
# c char string of length 1
# b signed char
# B unsigned char
# h short
# H unsigned short integer
# i int integer 4
# I unsigned int integer 4
# l long (4)
# L unsigned long (4)
# q long long (8)
# Q unsigned long long
page_type = unpack_from('<b', page_buffer)[0]
relation_id = -1
index_id = -1
segment_cnt = -1 # for Data page: number of record segments on page
index_id = -1
ix_level = -1
btr_len = -1
if page_type == 4:
# POINTER pege:
# *pag* dpg_header=16, SLONG dpg_sequence=4, SLONG ppg_next=4, USHORT ppg_count=2 ==> 16+4+4+2=26
# struct pointer_page
# {
# pag ppg_header;
# SLONG ppg_sequence; // Sequence number in relation
# SLONG ppg_next; // Next pointer page in relation
# USHORT ppg_count; // Number of slots active
# USHORT ppg_relation; // Relation id
# USHORT ppg_min_space; // Lowest slot with space available
# USHORT ppg_max_space; // Highest slot with space available
# SLONG ppg_page[1]; // Data page vector
# };
relation_id = unpack_from('<H', page_buffer, 26)[0] # 'H' ==> USHORT
elif page_type == 5:
# DATA page:
# *pag* dpg_header=16, SLONG dpg_sequence=4 ==> 16+4 = 20:
# struct data_page
# {
# 16 pag dpg_header;
# 4 SLONG dpg_sequence; // Sequence number in relation
# 2 USHORT dpg_relation; // Relation id
# 2 USHORT dpg_count; // Number of record segments on page
# struct dpg_repeat
# {
# USHORT dpg_offset; // Offset of record fragment
# USHORT dpg_length; // Length of record fragment
# } dpg_rpt[1];
# };
relation_id = unpack_from('<H', page_buffer, 20)[0] # 'H' ==> USHORT
segment_cnt = unpack_from('<H', page_buffer, 22)[0]
elif page_type == 6:
# Index root page
# struct index_root_page
# {
# pag irt_header;
# USHORT irt_relation; // relation id (for consistency)
relation_id = unpack_from('<H', page_buffer, 16)[0] # 'H' ==> USHORT
elif page_type == 7:
# B-tree page ("bucket"):
# struct btree_page
# {
# 16 pag btr_header;
# 4 SLONG btr_sibling; // right sibling page
# 4 SLONG btr_left_sibling; // left sibling page
# 4 SLONG btr_prefix_total; // sum of all prefixes on page
# 2 USHORT btr_relation; // relation id for consistency
# 2 USHORT btr_length; // length of data in bucket
# 1 UCHAR btr_id; // index id for consistency
# 1 UCHAR btr_level; // index level (0 = leaf)
# btree_nod btr_nodes[1];
# };
relation_id = unpack_from('<H', page_buffer, 28)[0] # 'H' ==> USHORT
btr_len = unpack_from('<H', page_buffer, 30)[0] # 'H' ==> USHORT // length of data in bucket
index_id = unpack_from('<B', page_buffer, 32)[0] # 'B' => UCHAR
ix_level = unpack_from('<B', page_buffer, 33)[0]
#
if index_id>=0 and (relation_id, index_id) in map_dbo:
u = map_dbo[ relation_id, index_id ]
page_info = f'{PAGE_TYPES[page_type].ljust(9)}, {u[1].strip()}, data_len={btr_len}, lev={ix_level}'
#page_info = ''.join((PAGE_TYPES[page_type].ljust(9), ', ', u[1].strip(), ', data_len=', str(btr_len), ', lev=', str(ix_level))) # 'Indx Page, <index_name>, <length of data in bucket>'
elif (relation_id, -1) in map_dbo:
u = map_dbo[ relation_id, -1 ]
if page_type == 5:
page_info = f'{PAGE_TYPES[page_type].ljust(9)}, {u[0].strip()}, segments on page: {segment_cnt}'
#page_info = ''.join( ( PAGE_TYPES[page_type].ljust(9),', ',u[0].strip(),', segments on page: ',str(segment_cnt) ) ) # '<table_name>, segments on page: NNN' - for Data page
else:
page_info = f'{PAGE_TYPES[page_type].ljust(9)}, {u[0].strip()}'
#page_info = ''.join( ( PAGE_TYPES[page_type].ljust(9),', ',u[0].strip() ) ) # '<table_name>' - for Pointer page
elif relation_id == -1:
page_info = PAGE_TYPES[page_type].ljust(9)
else:
page_info = f'UNKNOWN; {PAGE_TYPES[page_type].ljust(9)}; relation_id {relation_id}; index_id {index_id}'
#page_info = ''.join( ('UNKNOWN; ',PAGE_TYPES[page_type].ljust(9),'; relation_id ', str(relation_id), '; index_id ', str(index_id)) )
return (page_type, relation_id, page_info)
#@pytest.mark.skip("FIXME: see notes")
2022-01-25 22:55:48 +01:00
@pytest.mark.version('>=3.0.2')
def test_1(act: Action, capsys):
map_dbo = {}
sql = """
select p.rdb$relation_id, p.rdb$page_number
from rdb$pages p
join rdb$relations r on p.rdb$relation_id = r.rdb$relation_id
where r.rdb$relation_name=upper('TEST') and p.rdb$page_type = 4
order by p.rdb$page_number
rows 1
2022-01-25 22:55:48 +01:00
"""
with act.db.connect() as con:
fill_dbo(con, map_dbo)
c = con.cursor()
rel_id, pp1st = c.execute(sql).fetchone()
# Found first page for each of three types: Data, Index and Blob
# (loop starts from first PointerPage of table 'TEST')
brk_datapage = brk_indxpage = brk_blobpage = -1
for i in range(pp1st, con.info.pages_allocated):
page_type, relation_id, page_info = parse_page_header(con, i, map_dbo)
#print('page:',i, '; page_type:',page_type, '; rel_id:',relation_id,';', page_info)
if relation_id == 128 and page_type == 5:
brk_datapage = i
elif relation_id == 128 and page_type == 7:
brk_indxpage = i
elif page_type == 8:
brk_blobpage = i
if brk_datapage > 0 and brk_indxpage > 0 and brk_blobpage > 0:
break
# 3.0.8: 187; 184; 186
2022-01-25 22:55:48 +01:00
#
# Store binary content of .fdb for futher restore
raw_db_content = act.db.db_path.read_bytes()
# Make pages damaged: put random 16 bytes at the start of every page that we found:
bw = bytearray(os.urandom(16))
2022-01-25 22:55:48 +01:00
with open(act.db.db_path, 'r+b') as w:
for brk_page in (brk_datapage, brk_indxpage, brk_blobpage):
w.seek(brk_page * con.info.page_size)
w.write(bw)
#time.sleep(2) # ?!
2022-01-25 22:55:48 +01:00
# Validate DB - ensure that there are errors in pages
# RESULT: validation log should contain lines with problems about three diff. page types:
# expected data encountered unknown
# expected index B-tree encountered unknown
# expected blob encountered unknown
with act.connect_server() as srv:
srv.database.validate(database=act.db.db_path, lock_timeout=1)
validation_log = srv.readlines()
# gstat
act.gstat(switches=['-e'])
pattern = re.compile('(data|index|blob|other)\\s+pages[:]{0,1}\\s+total[:]{0,1}\\s+\\d+[,]{0,1}\\s+encrypted[:]{0,1}\\s+\\d+.*[,]{0,1}non-crypted[:]{0,1}\\s+\\d+.*', re.IGNORECASE)
for line in act.stdout.splitlines():
if pattern.match(line.strip()):
print(line.strip())
2022-01-25 22:55:48 +01:00
# Process validation log
data_page_problem = indx_page_problem = blob_page_problem = False
for line in validation_log:
if 'expected data' in line:
data_page_problem = True
elif 'expected index B-tree' in line:
indx_page_problem = True
elif 'expected blob' in line:
blob_page_problem = True
final_msg='Detected all THREE page types with problem => '
if data_page_problem and indx_page_problem and blob_page_problem:
final_msg += 'YES'
print(final_msg)
else:
final_msg += 'NO'
print(final_msg)
print( 'Check: brk_datapage, brk_indxpage, brk_blobpage: ',brk_datapage, brk_indxpage, brk_blobpage )
2022-01-25 22:55:48 +01:00
# restore DB content
act.db.db_path.write_bytes(raw_db_content)
2022-01-25 22:55:48 +01:00
act.reset()
act.expected_stdout = expected_stdout
act.stdout = capsys.readouterr().out
assert act.clean_stdout == act.clean_expected_stdout