8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-23 22:03:03 +01:00

Splitted the undo log into two parts: in-memory one for initial undo items (but they're much more dense now) and on-disk (cached up to TempSpaceLimit) one for full record versions (used in update-in-place).

This dramatically reduces the memory usage for bulk updates, thus solving CORE-1575 (serious memory bug on multiple update a table in a single transaction) and CORE-1477 (Avoiding excessive memory consumption by undo-log after series of updates) and predicting out-of-memory conditions that could often lead to record-level corruptions.
This commit is contained in:
dimitr 2008-10-14 10:32:54 +00:00
parent 1a943d1990
commit e0db5f3ecd
3 changed files with 110 additions and 54 deletions

View File

@ -3460,9 +3460,12 @@ static jrd_tra* transaction_start(thread_db* tdbb, jrd_tra* temp)
jrd_tra::~jrd_tra()
{
delete tra_undo_record;
if (!tra_outer)
{
delete tra_temp_space;
delete tra_blob_space;
delete tra_undo_space;
}
DFW_delete_deferred(this, -1);

View File

@ -90,7 +90,8 @@ typedef Firebird::BePlusTree<BlobIndex, ULONG, MemoryPool, BlobIndex> BlobIndexT
/* Transaction block */
const int DEFAULT_LOCK_TIMEOUT = -1; // infinite
const char* const TRA_TEMP_SPACE = "fb_trans_";
const char* const TRA_BLOB_SPACE = "fb_blob_";
const char* const TRA_UNDO_SPACE = "fb_undo_";
class jrd_tra : public pool_alloc<type_tra>
{
@ -108,14 +109,15 @@ public:
tra_memory_stats(parent_stats),
tra_blobs_tree(p),
tra_blobs(&tra_blobs_tree),
tra_deferred_work(0),
tra_deferred_work(NULL),
tra_resources(*p),
tra_context_vars(*p),
tra_lock_timeout(DEFAULT_LOCK_TIMEOUT),
tra_timestamp(Firebird::TimeStamp::getCurrentTimeStamp()),
tra_open_cursors(*p),
tra_outer(outer),
tra_transactions(*p)
tra_transactions(*p),
tra_undo_record(NULL)
{
if (outer)
{
@ -203,7 +205,10 @@ public:
//Transaction *tra_ext_two_phase;
private:
TempSpace* tra_temp_space; // temp space storage
TempSpace* tra_blob_space; // temp blob storage
TempSpace* tra_undo_space; // undo log storage
Record* tra_undo_record; // temporary record used for the undo purposes
public:
SSHORT getLockWait() const
@ -211,15 +216,39 @@ public:
return -tra_lock_timeout;
}
TempSpace* getTempSpace()
TempSpace* getBlobSpace()
{
if (tra_outer)
return tra_outer->getTempSpace();
return tra_outer->getBlobSpace();
if (!tra_temp_space)
tra_temp_space = FB_NEW(*tra_pool) TempSpace(*tra_pool, TRA_TEMP_SPACE);
if (!tra_blob_space)
tra_blob_space = FB_NEW(*tra_pool) TempSpace(*tra_pool, TRA_BLOB_SPACE);
return tra_temp_space;
return tra_blob_space;
}
TempSpace* getUndoSpace()
{
if (tra_outer)
return tra_outer->getUndoSpace();
if (!tra_undo_space)
tra_undo_space = FB_NEW(*tra_pool) TempSpace(*tra_pool, TRA_UNDO_SPACE);
return tra_undo_space;
}
Record* getUndoRecord(USHORT length)
{
if (!tra_undo_record || tra_undo_record->rec_length < length)
{
delete tra_undo_record;
tra_undo_record = FB_NEW_RPT(*tra_pool, length) Record(*tra_pool);
}
memset(tra_undo_record, 0, sizeof(Record) + length);
return tra_undo_record;
}
};
@ -396,18 +425,66 @@ public:
class UndoItem
{
public:
SINT64 rec_number;
Record* rec_data;
static const SINT64& generate(const void *sender, const UndoItem& item)
{
return item.rec_number;
return item.number;
}
UndoItem() {}
UndoItem(SINT64 rec_numberL, Record* rec_dataL)
UndoItem(RecordNumber recordNumber, UCHAR recordFlags)
: number(recordNumber.getValue()),
length(0), format(NULL), offset(0),
flags(recordFlags)
{}
UndoItem(jrd_tra* transaction, RecordNumber recordNumber, const Record* record, UCHAR recordFlags)
: number(recordNumber.getValue()),
length(record->rec_length),
format(record->rec_format),
flags(recordFlags)
{
this->rec_number = rec_numberL;
this->rec_data = rec_dataL;
if (length)
{
offset = transaction->getUndoSpace()->allocateSpace(length);
transaction->getUndoSpace()->write(offset, record->rec_data, length);
}
}
Record* setupRecord(jrd_tra* transaction, UCHAR newFlags = 0)
{
flags |= newFlags;
Record* const record = transaction->getUndoRecord(length);
record->rec_number.setValue(number);
record->rec_flags = flags;
record->rec_length = length;
record->rec_format = format;
if (length)
{
transaction->getUndoSpace()->read(offset, record->rec_data, length);
}
return record;
}
void release(jrd_tra* transaction)
{
if (length)
{
transaction->getUndoSpace()->releaseSpace(offset, length);
length = 0;
format = NULL;
}
}
private:
SINT64 number;
UCHAR flags;
USHORT length;
offset_t offset;
const Format* format;
};
typedef Firebird::BePlusTree<UndoItem, SINT64, MemoryPool, UndoItem> UndoItemTree;
@ -424,4 +501,3 @@ public:
} //namespace Jrd
#endif // JRD_TRA_H

View File

@ -3030,7 +3030,7 @@ void VIO_verb_cleanup(thread_db* tdbb, jrd_tra* transaction)
verb_count is not zero. */
RecordBitmap::Accessor accessor(action->vct_records);
if (accessor.getFirst())
if (accessor.getFirst())
do {
rpb.rpb_number.setValue(accessor.current());
if (!DPM_get(tdbb, &rpb, LCK_write)) {
@ -3051,7 +3051,7 @@ void VIO_verb_cleanup(thread_db* tdbb, jrd_tra* transaction)
VIO_backout(tdbb, &rpb, transaction);
}
else {
Record* record = action->vct_undo->current().rec_data;
Record* const record = action->vct_undo->current().setupRecord(transaction);
const bool same_tx = (record->rec_flags & REC_same_tx) != 0;
/* Have we done BOTH an update and delete to this record
@ -3122,7 +3122,7 @@ void VIO_verb_cleanup(thread_db* tdbb, jrd_tra* transaction)
BUGCHECK(186); /* msg 186 record disappeared */
}
CCH_RELEASE(tdbb, &rpb.getWindow(tdbb));
Record* record = action->vct_undo->current().rec_data;
Record* const record = action->vct_undo->current().setupRecord(transaction);
const bool same_tx = (record->rec_flags & REC_same_tx) != 0;
const bool new_ver = (record->rec_flags & REC_new_version) != 0;
if (record->rec_length != 0) {
@ -3130,8 +3130,7 @@ void VIO_verb_cleanup(thread_db* tdbb, jrd_tra* transaction)
new_rpb.rpb_record = record;
new_rpb.rpb_address = record->rec_data;
new_rpb.rpb_length = record->rec_length;
verb_post(tdbb, transaction, &rpb, record,
&new_rpb, same_tx, new_ver);
verb_post(tdbb, transaction, &rpb, record, &new_rpb, same_tx, new_ver);
}
else if (same_tx) {
verb_post(tdbb, transaction, &rpb, 0, 0, true, new_ver);
@ -3148,7 +3147,7 @@ void VIO_verb_cleanup(thread_db* tdbb, jrd_tra* transaction)
if (action->vct_undo) {
if (action->vct_undo->getFirst()) {
do {
delete action->vct_undo->current().rec_data;
action->vct_undo->current().release(transaction);
} while (action->vct_undo->getNext());
}
delete action->vct_undo;
@ -4332,8 +4331,7 @@ static Record* realloc_record(Record*& record, USHORT fmt_length)
* Realloc a record to accomodate longer length format.
*
**************************************/
Record* new_record = FB_NEW_RPT(record->rec_pool, fmt_length)
Record(record->rec_pool);
Record* new_record = FB_NEW_RPT(record->rec_pool, fmt_length) Record(record->rec_pool);
new_record->rec_precedence.takeOwnership(record->rec_precedence);
// start copying at rec_format, to not mangle source->rec_precedence
@ -5083,7 +5081,6 @@ static void verb_post(
* false in all other cases.
*
**************************************/
#pragma FB_COMPILER_MESSAGE("Out-of-memory condition in this function corrupts database. And it is likely due to huge amounts of allocations")
SET_TDBB(tdbb);
Jrd::ContextPoolHolder context(tdbb, transaction->tra_pool);
@ -5116,37 +5113,21 @@ static void verb_post(
/* An update-in-place is being posted to this savepoint, and this
savepoint hasn't seen this record before. */
Record* data = FB_NEW_RPT(*tdbb->getDefaultPool(), old_data->rec_length) Record(*tdbb->getDefaultPool());
data->rec_number = rpb->rpb_number;
data->rec_length = old_data->rec_length;
data->rec_format = old_data->rec_format;
if (same_tx) {
data->rec_flags |= REC_same_tx;
}
memcpy(data->rec_data, old_data->rec_data, old_data->rec_length);
if (!action->vct_undo) {
action->vct_undo = new UndoItemTree(tdbb->getDefaultPool());
}
action->vct_undo->add(UndoItem(rpb->rpb_number.getValue(), data));
const UCHAR flags = same_tx ? REC_same_tx : 0;
action->vct_undo->add(UndoItem(transaction, rpb->rpb_number, old_data, flags));
}
else if (same_tx) {
/* An insert/update followed by a delete is posted to this savepoint,
and this savepoint hasn't seen this record before. */
Record* data = FB_NEW_RPT(*tdbb->getDefaultPool(), 1) Record(*tdbb->getDefaultPool());
data->rec_number = rpb->rpb_number;
data->rec_length = 0;
if (new_ver) {
data->rec_flags |= (REC_same_tx | REC_new_version);
}
else {
data->rec_flags |= REC_same_tx;
}
if (!action->vct_undo) {
action->vct_undo = new UndoItemTree(tdbb->getDefaultPool());
}
action->vct_undo->add(UndoItem(rpb->rpb_number.getValue(), data));
const UCHAR flags = REC_same_tx | (new_ver ? REC_new_version : 0);
action->vct_undo->add(UndoItem(rpb->rpb_number, flags));
}
}
else if (same_tx) {
@ -5154,22 +5135,18 @@ static void verb_post(
if (action->vct_undo && action->vct_undo->locate(rpb->rpb_number.getValue())) {
/* An insert/update followed by a delete is posted to this savepoint,
and this savepoint has already undo for this record. */
undo = action->vct_undo->current().rec_data;
undo->rec_flags |= REC_same_tx;
undo = action->vct_undo->current().setupRecord(transaction, REC_same_tx);
}
else {
/* An insert/update followed by a delete is posted to this savepoint,
and this savepoint has seen this record before but it doesn't have
undo data. */
Record* data = FB_NEW_RPT(*tdbb->getDefaultPool(), 1) Record(*tdbb->getDefaultPool());
data->rec_number = rpb->rpb_number;
data->rec_length = 0;
data->rec_flags |= (REC_same_tx | REC_new_version);
if (!action->vct_undo) {
action->vct_undo = new UndoItemTree(tdbb->getDefaultPool());
}
action->vct_undo->add(UndoItem(rpb->rpb_number.getValue(), data));
const UCHAR flags = REC_same_tx | REC_new_version;
action->vct_undo->add(UndoItem(rpb->rpb_number, flags));
}
if (old_data) {
/* The passed old_data will not be used. Thus, garbage collect. */
@ -5185,7 +5162,7 @@ static void verb_post(
Record* undo = NULL;
if (action->vct_undo && action->vct_undo->locate(rpb->rpb_number.getValue())) {
undo = action->vct_undo->current().rec_data;
undo = action->vct_undo->current().setupRecord(transaction);
}
garbage_collect_idx(tdbb, rpb, new_rpb, old_data, undo);