From b2b911fe6cc9b79c6c8c46de4b2a5d9085106aed Mon Sep 17 00:00:00 2001 From: hvlad Date: Wed, 2 Feb 2011 10:27:48 +0000 Subject: [PATCH] Improvement CORE-3323 : Ability to cancel waiting in lock manager --- src/jrd/jrd.cpp | 161 +++++++++++++++++++++++------------------- src/jrd/jrd.h | 4 ++ src/jrd/lck.cpp | 80 +++++++++++++++++++++ src/jrd/lck_proto.h | 1 + src/jrd/pag.cpp | 1 + src/jrd/shut.cpp | 1 + src/jrd/tra.cpp | 1 + src/lock/lock.cpp | 39 +++++++++- src/lock/lock_proto.h | 1 + 9 files changed, 215 insertions(+), 74 deletions(-) diff --git a/src/jrd/jrd.cpp b/src/jrd/jrd.cpp index a34f9f3f9e..94ed707d5f 100644 --- a/src/jrd/jrd.cpp +++ b/src/jrd/jrd.cpp @@ -1583,6 +1583,7 @@ ISC_STATUS FB_CANCEL_OPERATION(ISC_STATUS* user_status, Attachment** handle, USH { attachment->att_flags |= ATT_cancel_raise; attachment->cancelExternalConnection(tdbb); + LCK_cancel_wait(attachment); } break; @@ -4033,84 +4034,24 @@ bool JRD_reschedule(thread_db* tdbb, SLONG quantum, bool punt) THREAD_YIELD(); } - // Test various flags and unwind/throw if required. - // But do that only if we're neither in the verb cleanup state - // nor currently detaching, as these actions should never be interrupted. - - if (!(tdbb->tdbb_flags & (TDBB_verb_cleanup | TDBB_detaching))) + try { + tdbb->checkCancelState(punt); + } + catch (const status_exception& ex) { - // If database has been shutdown then get out + tdbb->tdbb_flags |= TDBB_sys_error; - Attachment* const attachment = tdbb->getAttachment(); - jrd_tra* const transaction = tdbb->getTransaction(); - jrd_req* const request = tdbb->getRequest(); + const Arg::StatusVector status(ex.value()); - try + if (punt) { - if (attachment) - { - if (attachment->att_flags & ATT_shutdown) - { - if (dbb->dbb_ast_flags & DBB_shutdown) - { - status_exception::raise(Arg::Gds(isc_shutdown) << - Arg::Str(attachment->att_filename)); - } - else if (!(tdbb->tdbb_flags & TDBB_shutdown_manager)) - { - status_exception::raise(Arg::Gds(isc_att_shutdown)); - } - } - - // If a cancel has been raised, defer its acknowledgement - // when executing in the context of an internal request or - // the system transaction. - - if ((attachment->att_flags & ATT_cancel_raise) && - !(attachment->att_flags & ATT_cancel_disable)) - { - if ((!request || - !(request->req_flags & (req_internal | req_sys_trigger))) && - (!transaction || !(transaction->tra_flags & TRA_system))) - { - attachment->att_flags &= ~ATT_cancel_raise; - status_exception::raise(Arg::Gds(isc_cancelled)); - } - } - } - - // Handle request cancellation - - if (transaction && (transaction->tra_flags & TRA_cancel_request)) - { - transaction->tra_flags &= ~TRA_cancel_request; - status_exception::raise(Arg::Gds(isc_cancelled)); - } - - // Check the thread state for already posted system errors. If any still persists, - // then someone tries to ignore our attempts to interrupt him. Let's insist. - - if (tdbb->tdbb_flags & TDBB_sys_error) - { - status_exception::raise(Arg::Gds(isc_cancelled)); - } + CCH_unwind(tdbb, false); + ERR_post(status); } - catch (const status_exception& ex) + else { - tdbb->tdbb_flags |= TDBB_sys_error; - - const Arg::StatusVector status(ex.value()); - - if (punt) - { - CCH_unwind(tdbb, false); - ERR_post(status); - } - else - { - ERR_build_status(tdbb->tdbb_status_vector, status); - return true; - } + ERR_build_status(tdbb->tdbb_status_vector, status); + return true; } } @@ -5785,6 +5726,7 @@ static unsigned int purge_transactions(thread_db* tdbb, if ((transaction->tra_flags & TRA_prepared) || (dbb->dbb_ast_flags & DBB_shutdown) || (att_flags & ATT_shutdown)) { + EDS::Transaction::jrdTransactionEnd(tdbb, transaction, false, false, true); TRA_release_transaction(tdbb, transaction); } else if (force_flag) @@ -6235,6 +6177,81 @@ static THREAD_ENTRY_DECLARE shutdown_thread(THREAD_ENTRY_PARAM arg) return 0; } +bool thread_db::checkCancelState(bool punt) +{ + // Test various flags and unwind/throw if required. + // But do that only if we're neither in the verb cleanup state + // nor currently detaching, as these actions should never be interrupted. + // Also don't break wait in LM if it is not safe. + + if (tdbb_flags & (TDBB_verb_cleanup | TDBB_detaching | TDBB_wait_cancel_disable)) + return false; + + if (attachment) + { + if (attachment->att_flags & ATT_shutdown) + { + if (database->dbb_ast_flags & DBB_shutdown) + { + if (!punt) + return true; + + status_exception::raise(Arg::Gds(isc_shutdown) << + Arg::Str(attachment->att_filename)); + } + else if (!(tdbb_flags & TDBB_shutdown_manager)) + { + if (!punt) + return true; + + status_exception::raise(Arg::Gds(isc_att_shutdown)); + } + } + + // If a cancel has been raised, defer its acknowledgement + // when executing in the context of an internal request or + // the system transaction. + + if ((attachment->att_flags & ATT_cancel_raise) && + !(attachment->att_flags & ATT_cancel_disable)) + { + if ((!request || + !(request->req_flags & (req_internal | req_sys_trigger))) && + (!transaction || !(transaction->tra_flags & TRA_system))) + { + if (!punt) + return true; + + attachment->att_flags &= ~ATT_cancel_raise; + status_exception::raise(Arg::Gds(isc_cancelled)); + } + } + } + + // Handle request cancellation + + if (transaction && (transaction->tra_flags & TRA_cancel_request)) + { + if (!punt) + return true; + + transaction->tra_flags &= ~TRA_cancel_request; + status_exception::raise(Arg::Gds(isc_cancelled)); + } + + // Check the thread state for already posted system errors. If any still persists, + // then someone tries to ignore our attempts to interrupt him. Let's insist. + + if (tdbb_flags & TDBB_sys_error) + { + if (!punt) + return true; + + status_exception::raise(Arg::Gds(isc_cancelled)); + } + + return false; +} void thread_db::setTransaction(jrd_tra* val) { diff --git a/src/jrd/jrd.h b/src/jrd/jrd.h index 12144355fc..028f487e19 100644 --- a/src/jrd/jrd.h +++ b/src/jrd/jrd.h @@ -313,6 +313,7 @@ public: ULONG att_flags; // Flags describing the state of the attachment SSHORT att_charset; // user's charset specified in dpb Lock* att_long_locks; // outstanding two phased locks + Lock* att_wait_lock; // lock at which attachment waits currently vec* att_compatibility_table; // hash table of compatible locks vcl* att_val_errors; Firebird::PathName att_working_directory; // Current working directory is cached @@ -778,6 +779,8 @@ public: //attStat->bumpValue(index, relation_id); //dbbStat->bumpValue(index, relation_id); } + + bool checkCancelState(bool punt); }; // tdbb_flags @@ -793,6 +796,7 @@ const USHORT TDBB_sys_error = 128; // error shouldn't be handled by the loope const USHORT TDBB_verb_cleanup = 256; // verb cleanup is in progress const USHORT TDBB_use_db_page_space = 512; // use database (not temporary) page space in GTT operations const USHORT TDBB_detaching = 1024; // detach is in progress +const USHORT TDBB_wait_cancel_disable = 2048; // don't cancel current waiting operation class ThreadContextHolder diff --git a/src/jrd/lck.cpp b/src/jrd/lck.cpp index fc6d2eb34d..a3a422968b 100644 --- a/src/jrd/lck.cpp +++ b/src/jrd/lck.cpp @@ -211,6 +211,61 @@ inline bool checkLock(const Lock* l) #define LCK_CHECK_LOCK(x) true // nothing #endif +namespace { +// This class is used as a guard around long waiting call into LM and have +// two purposes : +// - set and restore att_wait_lock while waiting inside the LM +// - set or clear and restore TDBB_wait_cancel_disable flag in dependence +// of safety of cancelling lock waiting. Currently we can safely cancel +// only LCK_tra locks + +class WaitCancelGuard +{ +public: + WaitCancelGuard(thread_db* tdbb, Lock* lock, int wait) : + m_tdbb(tdbb), + m_save_lock(NULL) + { + Attachment* att = m_tdbb->getAttachment(); + m_save_lock = att->att_wait_lock; + + m_cancel_disabled = (m_tdbb->tdbb_flags & TDBB_wait_cancel_disable); + m_tdbb->tdbb_flags |= TDBB_wait_cancel_disable; + + if (!wait) + return; + + switch (lock->lck_type) + { + case LCK_tra: + m_tdbb->tdbb_flags &= ~TDBB_wait_cancel_disable; + att->att_wait_lock = lock; + break; + + default: + ; + } + } + + ~WaitCancelGuard() + { + Attachment* att = m_tdbb->getAttachment(); + att->att_wait_lock = m_save_lock; + + if (m_cancel_disabled) + m_tdbb->tdbb_flags |= TDBB_wait_cancel_disable; + else + m_tdbb->tdbb_flags &= ~TDBB_wait_cancel_disable; + } + +private: + thread_db* m_tdbb; + Lock* m_save_lock; + bool m_cancel_disabled; +}; + +} // namespace + void LCK_assert(thread_db* tdbb, Lock* lock) { @@ -259,6 +314,7 @@ bool LCK_convert(thread_db* tdbb, Lock* lock, USHORT level, SSHORT wait) Attachment* const old_attachment = lock->lck_attachment; set_lock_attachment(lock, tdbb->getAttachment()); + WaitCancelGuard guard(tdbb, lock, wait); const bool result = CONVERT(tdbb, lock, level, wait); if (!result) @@ -270,6 +326,7 @@ bool LCK_convert(thread_db* tdbb, Lock* lock, USHORT level, SSHORT wait) case isc_deadlock: case isc_lock_conflict: case isc_lock_timeout: + tdbb->checkCancelState(true); return false; case isc_lockmanerr: dbb->dbb_flags |= DBB_bugcheck; @@ -316,6 +373,27 @@ bool LCK_convert_opt(thread_db* tdbb, Lock* lock, USHORT level) } +bool LCK_cancel_wait(Attachment* attachment) +{ +/************************************** + * + * L C K _ c a n c e l _ w a i t + * + ************************************** + * + * Functional description + * Try to cancel waiting of attachment inside the LM + * + **************************************/ + Database *dbb = attachment->att_database; + + if (attachment->att_wait_lock) + return dbb->dbb_lock_mgr->cancelWait(attachment->att_wait_lock->lck_owner_handle); + + return false; +} + + void LCK_downgrade(thread_db* tdbb, Lock* lock) { /************************************** @@ -535,6 +613,7 @@ bool LCK_lock(thread_db* tdbb, Lock* lock, USHORT level, SSHORT wait) Database* dbb = lock->lck_dbb; set_lock_attachment(lock, tdbb->getAttachment()); + WaitCancelGuard guard(tdbb, lock, wait); ENQUEUE(tdbb, lock, level, wait); fb_assert(LCK_CHECK_LOCK(lock)); if (!lock->lck_id) @@ -548,6 +627,7 @@ bool LCK_lock(thread_db* tdbb, Lock* lock, USHORT level, SSHORT wait) case isc_deadlock: case isc_lock_conflict: case isc_lock_timeout: + tdbb->checkCancelState(true); return false; case isc_lockmanerr: dbb->dbb_flags |= DBB_bugcheck; diff --git a/src/jrd/lck_proto.h b/src/jrd/lck_proto.h index d27fa48cbd..92a7341085 100644 --- a/src/jrd/lck_proto.h +++ b/src/jrd/lck_proto.h @@ -32,6 +32,7 @@ namespace Jrd { } void LCK_assert(Jrd::thread_db*, Jrd::Lock*); +bool LCK_cancel_wait(Jrd::Attachment*); bool LCK_convert(Jrd::thread_db*, Jrd::Lock*, USHORT, SSHORT); bool LCK_convert_opt(Jrd::thread_db*, Jrd::Lock*, USHORT); void LCK_downgrade(Jrd::thread_db*, Jrd::Lock*); diff --git a/src/jrd/pag.cpp b/src/jrd/pag.cpp index e0f4400e3b..d8ce617911 100644 --- a/src/jrd/pag.cpp +++ b/src/jrd/pag.cpp @@ -2044,6 +2044,7 @@ static int blocking_ast_attachment(void* ast_object) attachment->att_flags |= ATT_shutdown; attachment->cancelExternalConnection(tdbb); + LCK_cancel_wait(attachment); JRD_shutdown_attachments(dbb); diff --git a/src/jrd/shut.cpp b/src/jrd/shut.cpp index 70a20b8211..51d3aa0dd8 100644 --- a/src/jrd/shut.cpp +++ b/src/jrd/shut.cpp @@ -563,6 +563,7 @@ static bool shutdown_locks(thread_db* tdbb, SSHORT flag) { attachment->att_flags |= ATT_shutdown; attachment->cancelExternalConnection(tdbb); + LCK_cancel_wait(attachment); } } diff --git a/src/jrd/tra.cpp b/src/jrd/tra.cpp index ad9ed0632e..78991b2cdb 100644 --- a/src/jrd/tra.cpp +++ b/src/jrd/tra.cpp @@ -1989,6 +1989,7 @@ static int blocking_ast_transaction(void* ast_object) transaction->tra_flags |= TRA_cancel_request; att->cancelExternalConnection(tdbb); + LCK_cancel_wait(att); } catch (const Firebird::Exception&) {} // no-op diff --git a/src/lock/lock.cpp b/src/lock/lock.cpp index 68d01015a8..f78e1ad1b7 100644 --- a/src/lock/lock.cpp +++ b/src/lock/lock.cpp @@ -829,6 +829,38 @@ void LockManager::repost(thread_db* tdbb, lock_ast_t ast, void* arg, SRQ_PTR own } +bool LockManager::cancelWait(SRQ_PTR owner_offset) +{ +/************************************** + * + * c a n c e l W a i t + * + ************************************** + * + * Functional description + * Wakeup waiting owner to make it check if wait should be cancelled. + * As this routine could be called asyncronous, take extra care and + * don't trust the input params blindly. + * + **************************************/ + LOCK_TRACE(("cancelWait (%ld)\n", owner_offset)); + + if (!owner_offset) + return false; + + Firebird::MutexLockGuard guard(m_localMutex); + + acquire_shmem(DUMMY_OWNER); + + own* owner = (own*) SRQ_ABS_PTR(owner_offset); + if (owner->own_type == type_own) + post_wakeup(owner); + + release_shmem(DUMMY_OWNER); + return true; +} + + SLONG LockManager::queryData(SRQ_PTR parent_request, const USHORT series, const USHORT aggregate) { /************************************** @@ -1196,7 +1228,7 @@ void LockManager::acquire_shmem(SRQ_PTR owner_offset) // the lock mutex. In that event, lets see if there is any unfinished work // left around that we need to finish up. - if (prior_active) + if (prior_active > 0) { post_history(his_active, owner_offset, prior_active, (SRQ_PTR) 0, false); shb* const recover = (shb*) SRQ_ABS_PTR(m_header->lhb_secondary); @@ -4033,7 +4065,9 @@ USHORT LockManager::wait_for_request(thread_db* tdbb, lrq* request, SSHORT lck_w // See if we've waited beyond the lock timeout - // if so we mark our own request as rejected - if (lck_wait < 0 && lock_timeout <= current_time) + const bool cancelled = tdbb->checkCancelState(false); + + if (cancelled || lck_wait < 0 && lock_timeout <= current_time) { // We're going to reject our lock - it's the callers responsibility // to do cleanup and make sure post_pending() is called to wakeup @@ -4041,6 +4075,7 @@ USHORT LockManager::wait_for_request(thread_db* tdbb, lrq* request, SSHORT lck_w request->lrq_flags |= LRQ_rejected; request->lrq_flags &= ~LRQ_pending; lock->lbl_pending_lrq_count--; + // and test - may be timeout due to missing process to deliver request probe_processes(); release_shmem(owner_offset); diff --git a/src/lock/lock_proto.h b/src/lock/lock_proto.h index 267f87ac0d..a715d3f812 100644 --- a/src/lock/lock_proto.h +++ b/src/lock/lock_proto.h @@ -329,6 +329,7 @@ public: bool dequeue(const SRQ_PTR); void repost(thread_db*, lock_ast_t, void*, SRQ_PTR); + bool cancelWait(SRQ_PTR); SLONG queryData(SRQ_PTR, const USHORT, const USHORT); SLONG readData(SRQ_PTR);