8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-22 20:43:02 +01:00

Improvement CORE-4935 : Increase performance of backout of dead record version (or undo of just created primary record version) when backversions chain is too long

Improvement CORE-4936 : Reduce internal contention in background garbage collector
Other small improvements: 
- garbage collector\sweep now skip record header if transaction is below OIT - it allows to reduce number of page fetches
- missed points to gather RECORD_FRAGMENT_READS and RECORD_BACKVERSION_READS statistics
This commit is contained in:
hvlad 2015-09-16 21:16:13 +00:00
parent 4bdfdeeb2a
commit f7e248ca63
4 changed files with 233 additions and 138 deletions

View File

@ -32,120 +32,57 @@ namespace Jrd {
void GarbageCollector::RelationData::clear()
{
TranData::ConstAccessor accessor(&m_tranData);
if (accessor.getFirst())
{
do
{
delete accessor.current()->second;
} while (accessor.getNext());
}
m_tranData.clear();
m_pages.clear();
}
TraNumber GarbageCollector::RelationData::findPage(const ULONG pageno, const TraNumber tranid)
{
PageTranMap::Accessor pages(&m_pages);
if (!pages.locate(pageno))
return MAX_TRA_NUMBER;
// hvlad: this routine could be guarded by shared sync - therefore comparison
// and assignment below should be atomic operation. But we don't require
// exact precision here.
if (pages.current().tranid > tranid)
pages.current().tranid = tranid;
return pages.current().tranid;
}
TraNumber GarbageCollector::RelationData::addPage(const ULONG pageno, const TraNumber tranid)
{
TraNumber minTraID = MAX_TRA_NUMBER;
TranData::ConstAccessor accessor(&m_tranData);
if (accessor.getFirst())
minTraID = accessor.current()->first;
TraNumber findTran = findPage(pageno, tranid);
if (findTran != MAX_TRA_NUMBER)
return findTran;
// look if given page number is already set at given tx bitmap
PageBitmap* bm = NULL;
const bool bmExists = m_tranData.get(tranid, bm);
if (bm && bm->test(pageno))
return minTraID;
// search for given page at other transactions bitmaps
// if found at older tx - we are done, just return
// if found at younger tx - clear it as page should be set at oldest tx (our)
if (minTraID != MAX_TRA_NUMBER)
{
do
{
const TranBitMap* item = accessor.current();
if (item->first <= tranid)
{
if (item->second->test(pageno))
return minTraID;
}
else
{
if (item->second->clear(pageno))
break;
}
} while(accessor.getNext());
}
// add page to our tx bitmap
PBM_SET(&m_pool, &bm, pageno);
if (!bmExists)
{
m_tranData.put(tranid, bm);
if (minTraID > tranid)
minTraID = tranid;
}
return minTraID;
m_pages.add(PageTran(pageno, tranid));
return tranid;
}
void GarbageCollector::RelationData::getPageBitmap(const TraNumber oldest_snapshot, PageBitmap** sbm)
void GarbageCollector::RelationData::swept(const TraNumber oldest_snapshot, PageBitmap** bm)
{
TranData::Accessor accessor(&m_tranData);
while (accessor.getFirst())
PageTranMap::Accessor pages(&m_pages);
bool next = pages.getFirst();
while (next)
{
TranBitMap* item = accessor.current();
if (item->first >= oldest_snapshot)
break;
PageBitmap* bm_tran = item->second;
PageBitmap** bm_or = PageBitmap::bit_or(sbm, &bm_tran);
if (*bm_or == item->second)
if (pages.current().tranid < oldest_snapshot)
{
bm_tran = *sbm;
*sbm = item->second;
item->second = bm_tran;
if (bm)
{
PBM_SET(&m_pool, bm, pages.current().pageno);
}
next = pages.fastRemove();
}
delete item->second;
m_tranData.remove(item->first);
else
next = pages.getNext();
}
}
void GarbageCollector::RelationData::swept(const TraNumber oldest_snapshot)
{
TranData::Accessor accessor(&m_tranData);
while (accessor.getFirst())
{
TranBitMap* item = accessor.current();
if (item->first >= oldest_snapshot)
break;
delete item->second;
m_tranData.remove(item->first);
}
}
TraNumber GarbageCollector::RelationData::minTranID() const
{
TranData::ConstAccessor accessor(&m_tranData);
if (accessor.getFirst())
return accessor.current()->first;
return MAX_TRA_NUMBER;
}
GarbageCollector::~GarbageCollector()
{
SyncLockGuard exGuard(&m_sync, SYNC_EXCLUSIVE, "GarbageCollector::~GarbageCollector");
@ -171,22 +108,27 @@ TraNumber GarbageCollector::addPage(const USHORT relID, const ULONG pageno, cons
Sync syncGC(&m_sync, "GarbageCollector::addPage");
RelationData* relData = getRelData(syncGC, relID, true);
SyncLockGuard syncData(&relData->m_sync, SYNC_EXCLUSIVE, "GarbageCollector::addPage");
SyncLockGuard syncData(&relData->m_sync, SYNC_SHARED, "GarbageCollector::addPage");
TraNumber minTraID = relData->findPage(pageno, tranid);
if (minTraID != MAX_TRA_NUMBER)
return minTraID;
syncData.unlock();
syncData.lock(SYNC_EXCLUSIVE, "GarbageCollector::addPage");
syncGC.unlock();
return relData->addPage(pageno, tranid);
}
bool GarbageCollector::getPageBitmap(const TraNumber oldest_snapshot, USHORT &relID, PageBitmap **sbm)
PageBitmap* GarbageCollector::getPages(const TraNumber oldest_snapshot, USHORT &relID)
{
*sbm = NULL;
SyncLockGuard shGuard(&m_sync, SYNC_EXCLUSIVE, "GarbageCollector::getPageBitmap");
SyncLockGuard shGuard(&m_sync, SYNC_SHARED, "GarbageCollector::getPages");
if (m_relations.isEmpty())
{
m_nextRelID = 0;
return false;
return NULL;
}
FB_SIZE_T pos;
@ -196,20 +138,21 @@ bool GarbageCollector::getPageBitmap(const TraNumber oldest_snapshot, USHORT &re
for (; pos < m_relations.getCount(); pos++)
{
RelationData* relData = m_relations[pos];
SyncLockGuard syncData(&relData->m_sync, SYNC_EXCLUSIVE, "GarbageCollector::getPageBitmap");
SyncLockGuard syncData(&relData->m_sync, SYNC_EXCLUSIVE, "GarbageCollector::getPages");
relData->getPageBitmap(oldest_snapshot, sbm);
PageBitmap* bm = NULL;
relData->swept(oldest_snapshot, &bm);
if (*sbm)
if (bm)
{
relID = relData->getRelID();
m_nextRelID = relID + 1;
return true;
return bm;
}
}
m_nextRelID = 0;
return false;
return NULL;
}
@ -249,23 +192,6 @@ void GarbageCollector::sweptRelation(const TraNumber oldest_snapshot, const USHO
}
TraNumber GarbageCollector::minTranID(const USHORT relID)
{
Sync syncGC(&m_sync, "GarbageCollector::minTranID");
RelationData* relData = getRelData(syncGC, relID, false);
if (relData)
{
SyncLockGuard syncData(&relData->m_sync, SYNC_SHARED, "GarbageCollector::minTranID");
syncGC.unlock();
return relData->minTranID();
}
return MAX_TRA_NUMBER;
}
GarbageCollector::RelationData* GarbageCollector::getRelData(Sync &sync, const USHORT relID,
bool allowCreate)
{
@ -283,6 +209,7 @@ GarbageCollector::RelationData* GarbageCollector::getRelData(Sync &sync, const U
{
m_relations.insert(pos, FB_NEW(m_pool) RelationData(m_pool, relID));
}
sync.downgrade(SYNC_SHARED);
}
return m_relations[pos];

View File

@ -45,21 +45,39 @@ public:
~GarbageCollector();
TraNumber addPage(const USHORT relID, const ULONG pageno, const TraNumber tranid);
bool getPageBitmap(const TraNumber oldest_snapshot, USHORT &relID, PageBitmap** sbm);
PageBitmap* getPages(const TraNumber oldest_snapshot, USHORT &relID);
void removeRelation(const USHORT relID);
void sweptRelation(const TraNumber oldest_snapshot, const USHORT relID);
TraNumber minTranID(const USHORT relID);
private:
typedef Firebird::Pair<Firebird::NonPooled<TraNumber, PageBitmap*> > TranBitMap;
typedef Firebird::GenericMap<TranBitMap> TranData;
struct PageTran
{
PageTran() :
pageno(0),
tranid(0)
{}
PageTran(const ULONG _pageno, const TraNumber _tranid) :
pageno(_pageno),
tranid(_tranid)
{}
ULONG pageno;
TraNumber tranid;
static const ULONG& generate(const void*, const PageTran& item)
{
return item.pageno;
}
};
typedef Firebird::BePlusTree<PageTran, ULONG, MemoryPool, PageTran> PageTranMap;
class RelationData
{
public:
explicit RelationData(MemoryPool& p, USHORT relID)
: m_pool(p), m_tranData(p), m_relID(relID)
: m_pool(p), m_pages(p), m_relID(relID)
{}
~RelationData()
@ -68,9 +86,8 @@ private:
}
TraNumber addPage(const ULONG pageno, const TraNumber tranid);
void getPageBitmap(const TraNumber oldest_snapshot, PageBitmap** sbm);
void swept(const TraNumber oldest_snapshot);
TraNumber minTranID() const;
TraNumber findPage(const ULONG pageno, const TraNumber tranid);
void swept(const TraNumber oldest_snapshot, PageBitmap** bm = NULL);
USHORT getRelID() const
{
@ -86,7 +103,7 @@ private:
Firebird::MemoryPool& m_pool;
Firebird::SyncObject m_sync;
TranData m_tranData;
PageTranMap m_pages;
USHORT m_relID;
};

View File

@ -197,6 +197,9 @@ void DPM_backout_mark(thread_db* tdbb, record_param* rpb, const jrd_tra* transac
header->rhd_flags |= rhd_gc_active;
header->rhd_transaction = transaction->tra_number;
rpb->rpb_flags |= rpb_gc_active;
rpb->rpb_transaction_nr = transaction->tra_number;
CCH_RELEASE(tdbb, window);
}
@ -1569,6 +1572,8 @@ bool DPM_next(thread_db* tdbb, record_param* rpb, USHORT lock_type, bool onepage
// check processed pages if they were swept.
const bool sweeper = (rpb->rpb_stream_flags & RPB_s_sweeper);
jrd_tra* transaction = tdbb->getTransaction();
const TraNumber oldest = transaction ? transaction->tra_oldest : 0;
if (sweeper && (pp_sequence || slot) && !line)
{
@ -1629,6 +1634,9 @@ bool DPM_next(thread_db* tdbb, record_param* rpb, USHORT lock_type, bool onepage
if (get_header(window, line, rpb) &&
!(rpb->rpb_flags & (rpb_blob | rpb_chained | rpb_fragment)))
{
if (sweeper && !rpb->rpb_b_page && rpb->rpb_transaction_nr <= oldest)
continue;
rpb->rpb_number.compose(dbb->dbb_max_records, dbb->dbb_dp_per_pp,
line, slot, pp_sequence);
return true;

View File

@ -147,6 +147,7 @@ static UndoDataRet get_undo_data(thread_db* tdbb, jrd_tra* transaction,
static void invalidate_cursor_records(jrd_tra*, record_param*);
static void list_staying(thread_db*, record_param*, RecordStack&);
static void list_staying_fast(thread_db*, record_param*, RecordStack&, record_param* = NULL);
static void notify_garbage_collector(thread_db* tdbb, record_param* rpb,
TraNumber tranid = MAX_TRA_NUMBER);
@ -571,7 +572,7 @@ void VIO_backout(thread_db* tdbb, record_param* rpb, const jrd_tra* transaction)
DPM_backout_mark(tdbb, rpb, transaction);
rpb->rpb_prior = NULL;
list_staying(tdbb, rpb, staying);
list_staying_fast(tdbb, rpb, staying, &temp);
IDX_garbage_collect(tdbb, rpb, going, staying);
BLB_garbage_collect(tdbb, going, staying, rpb->rpb_page, relation);
@ -1429,6 +1430,8 @@ void VIO_erase(thread_db* tdbb, record_param* rpb, jrd_tra* transaction)
if (transaction->tra_flags & TRA_system)
{
// hvlad: what if record was created\modified by user tx also,
// i.e. if there is backversion ???
VIO_backout(tdbb, rpb, transaction);
return;
}
@ -4436,6 +4439,9 @@ static UCHAR* delete_tail(thread_db* tdbb,
rpb->rpb_f_page, rpb->rpb_f_line);
#endif
RuntimeStatistics::Accumulator fragments(tdbb, rpb->rpb_relation,
RuntimeStatistics::RECORD_FRAGMENT_READS);
while (rpb->rpb_flags & rpb_incomplete)
{
rpb->rpb_page = rpb->rpb_f_page;
@ -4451,6 +4457,8 @@ static UCHAR* delete_tail(thread_db* tdbb,
DPM_delete(tdbb, rpb, prior_page);
prior_page = rpb->rpb_page;
++fragments;
}
return tail;
@ -4604,6 +4612,9 @@ static void garbage_collect(thread_db* tdbb, record_param* rpb, ULONG prior_page
rpb->rpb_f_page, rpb->rpb_f_line);
#endif
RuntimeStatistics::Accumulator backversions(tdbb, rpb->rpb_relation,
RuntimeStatistics::RECORD_BACKVERSION_READS);
// Delete old versions fetching data for garbage collection.
RecordStack going;
@ -4623,6 +4634,8 @@ static void garbage_collect(thread_db* tdbb, record_param* rpb, ULONG prior_page
if (rpb->rpb_record)
going.push(rpb->rpb_record);
++backversions;
// Don't monopolize the server while chasing long back version chains.
if (--tdbb->tdbb_quantum < 0)
JRD_reschedule(tdbb, 0, true);
@ -4776,7 +4789,7 @@ static THREAD_ENTRY_DECLARE garbage_collector(THREAD_ENTRY_PARAM arg)
PageBitmap* gc_bitmap = NULL;
if ((dbb->dbb_flags & DBB_gc_pending) &&
gc->getPageBitmap(dbb->dbb_oldest_snapshot, relID, &gc_bitmap))
(gc_bitmap = gc->getPages(dbb->dbb_oldest_snapshot, relID)))
{
relation = MET_lookup_relation_id(tdbb, relID, false);
if (!relation || (relation->rel_flags & (REL_deleted | REL_deleting)))
@ -5076,6 +5089,119 @@ static void invalidate_cursor_records(jrd_tra* transaction, record_param* mod_rp
}
static void list_staying_fast(thread_db* tdbb, record_param* rpb, RecordStack& staying, record_param* back_rpb)
{
/**************************************
*
* l i s t _ s t a y i n g _ f a s t
*
**************************************
*
* Functional description
* Get all the data that's staying so we can clean up indexes etc.
* without losing anything. This is fast version of old list_staying.
* It is used when current transaction own the record and thus guaranteed
* that versions chain is not changed during walking.
*
**************************************/
record_param temp = *rpb;
if (!DPM_fetch(tdbb, &temp, LCK_read))
{
// It is impossible as our transaction owns the record
BUGCHECK(186); // msg 186 record disappeared
return;
}
fb_assert(temp.rpb_b_page == rpb->rpb_b_page);
fb_assert(temp.rpb_b_line == rpb->rpb_b_line);
fb_assert(temp.rpb_flags == rpb->rpb_flags);
Record* backout_rec = NULL;
RuntimeStatistics::Accumulator backversions(tdbb, rpb->rpb_relation,
RuntimeStatistics::RECORD_BACKVERSION_READS);
if (temp.rpb_flags & rpb_deleted)
{
CCH_RELEASE(tdbb, &temp.getWindow(tdbb));
}
else
{
temp.rpb_record = NULL;
// VIO_data below could change the flags
const bool backout = (temp.rpb_flags & rpb_gc_active);
VIO_data(tdbb, &temp, tdbb->getDefaultPool());
if (!backout)
staying.push(temp.rpb_record);
else
{
fb_assert(!backout_rec);
backout_rec = temp.rpb_record;
}
}
const TraNumber oldest_active = tdbb->getTransaction()->tra_oldest_active;
while (temp.rpb_b_page)
{
ULONG page = temp.rpb_page = temp.rpb_b_page;
USHORT line = temp.rpb_line = temp.rpb_b_line;
temp.rpb_record = NULL;
if (temp.rpb_flags & rpb_delta)
fb_assert(temp.rpb_prior != NULL);
else
fb_assert(temp.rpb_prior == NULL);
bool ok = DPM_fetch(tdbb, &temp, LCK_read);
fb_assert(ok);
fb_assert(temp.rpb_flags & rpb_chained);
fb_assert(!(temp.rpb_flags & (rpb_blob | rpb_fragment)));
VIO_data(tdbb, &temp, tdbb->getDefaultPool());
staying.push(temp.rpb_record);
++backversions;
if (temp.rpb_transaction_nr < oldest_active && temp.rpb_b_page)
{
temp.rpb_page = page;
temp.rpb_line = line;
record_param temp2 = temp;
if (DPM_fetch(tdbb, &temp, LCK_write))
{
temp.rpb_b_page = 0;
temp.rpb_b_line = 0;
temp.rpb_flags &= ~(rpb_delta | rpb_gc_active);
CCH_MARK(tdbb, &temp.getWindow(tdbb));
DPM_rewrite_header(tdbb, &temp);
CCH_RELEASE(tdbb, &temp.getWindow(tdbb));
garbage_collect(tdbb, &temp2, temp.rpb_page, staying);
tdbb->bumpRelStats(RuntimeStatistics::RECORD_PURGES, temp.rpb_relation->rel_id);
if (back_rpb && back_rpb->rpb_page == page && back_rpb->rpb_line == line)
{
back_rpb->rpb_b_page = 0;
back_rpb->rpb_b_line = 0;
}
break;
}
}
// Don't monopolize the server while chasing long back version chains.
if (--tdbb->tdbb_quantum < 0)
JRD_reschedule(tdbb, 0, true);
}
delete backout_rec;
}
static void list_staying(thread_db* tdbb, record_param* rpb, RecordStack& staying)
{
/**************************************
@ -5096,6 +5222,17 @@ static void list_staying(thread_db* tdbb, record_param* rpb, RecordStack& stayin
**************************************/
SET_TDBB(tdbb);
// Use fast way if possible
if (rpb->rpb_transaction_nr)
{
jrd_tra* transaction = tdbb->getTransaction();
if (transaction && transaction->tra_number == rpb->rpb_transaction_nr)
{
list_staying_fast(tdbb, rpb, staying);
return;
}
}
Record* data = rpb->rpb_prior;
Record* backout_rec = NULL;
ULONG next_page = rpb->rpb_page;
@ -5565,7 +5702,10 @@ static int prepare_update( thread_db* tdbb,
continue;
}
stack.push(PageNumber(DB_PAGE_SPACE, temp->rpb_page));
{
const USHORT pageSpaceID = temp->getWindow(tdbb).win_page.getPageSpaceID();
stack.push(PageNumber(pageSpaceID, temp->rpb_page));
}
return PREPARE_OK;
case tra_active:
@ -6035,7 +6175,10 @@ static void update_in_place(thread_db* tdbb,
temp2.rpb_number = org_rpb->rpb_number;
DPM_store(tdbb, &temp2, stack, DPM_secondary);
stack.push(PageNumber(DB_PAGE_SPACE, temp2.rpb_page));
{
const USHORT pageSpaceID = temp2.getWindow(tdbb).win_page.getPageSpaceID();
stack.push(PageNumber(pageSpaceID, temp2.rpb_page));
}
}
if (!DPM_get(tdbb, org_rpb, LCK_write))