diff --git a/src/common/isc_s_proto.h b/src/common/isc_s_proto.h index 7ffb31f7b1..e19a91be9a 100644 --- a/src/common/isc_s_proto.h +++ b/src/common/isc_s_proto.h @@ -47,7 +47,9 @@ #ifdef LINUX // This hack fixes CORE-2896 - embedded connections fail on linux. // Looks like a lot of linux kernels are buggy when working with PRIO_INHERIT mutexes. -//#undef HAVE_PTHREAD_MUTEXATTR_SETPROTOCOL +// dimitr (10-11-2016): PRIO_INHERIT also causes undesired short-time sleeps (CPU idle 30-35%) +// during context switches under concurrent load. Proved on linux kernels up to 4.8. +#undef HAVE_PTHREAD_MUTEXATTR_SETPROTOCOL #endif diff --git a/src/jrd/Relation.cpp b/src/jrd/Relation.cpp index b32c8ba704..21ecdc3213 100644 --- a/src/jrd/Relation.cpp +++ b/src/jrd/Relation.cpp @@ -73,16 +73,8 @@ RelationPages* jrd_rel::getPagesInternal(thread_db* tdbb, TraNumber tran, bool a return 0; RelationPages* newPages = rel_pages_free; - if (!newPages) - { - const size_t BULK_ALLOC = 8; - - RelationPages* allocatedPages = newPages = - FB_NEW_POOL(*rel_pool) RelationPages[BULK_ALLOC]; - - rel_pages_free = ++allocatedPages; - for (size_t i = 1; i < BULK_ALLOC - 1; i++, allocatedPages++) - allocatedPages->rel_next_free = allocatedPages + 1; + if (!newPages) { + newPages = FB_NEW_POOL(*rel_pool) RelationPages(*rel_pool); } else { @@ -527,5 +519,9 @@ void RelationPages::free(RelationPages*& nextFree) rel_index_root = rel_data_pages = 0; rel_slot_space = rel_pri_data_space = rel_sec_data_space = 0; + rel_last_free_pri_dp = 0; rel_instance_id = 0; + + dpMap.clear(); + dpMapMark = 0; } diff --git a/src/jrd/Relation.h b/src/jrd/Relation.h index 3947cb6638..4af55907e8 100644 --- a/src/jrd/Relation.h +++ b/src/jrd/Relation.h @@ -70,25 +70,31 @@ class RelationPages { public: typedef SINT64 RP_INSTANCE_ID; + vcl* rel_pages; // vector of pointer page numbers RP_INSTANCE_ID rel_instance_id; // 0 or att_attachment_id or tra_number + // Vlad asked for this compile-time check to make sure we can contain a txn number here typedef int RangeCheck1[sizeof(RP_INSTANCE_ID) >= sizeof(TraNumber)]; typedef int RangeCheck2[sizeof(RP_INSTANCE_ID) >= sizeof(AttNumber)]; - SLONG rel_index_root; // index root page number - SLONG rel_data_pages; // count of relation data pages + ULONG rel_index_root; // index root page number + ULONG rel_data_pages; // count of relation data pages ULONG rel_slot_space; // lowest pointer page with slot space ULONG rel_pri_data_space; // lowest pointer page with primary data page space ULONG rel_sec_data_space; // lowest pointer page with secondary data page space + ULONG rel_last_free_pri_dp; // last primary data page found with space USHORT rel_pg_space_id; - RelationPages() + RelationPages(Firebird::MemoryPool& pool) : rel_pages(NULL), rel_instance_id(0), rel_index_root(0), rel_data_pages(0), rel_slot_space(0), rel_pri_data_space(0), rel_sec_data_space(0), + rel_last_free_pri_dp(0), rel_pg_space_id(DB_PAGE_SPACE), rel_next_free(NULL), - useCount(0) + useCount(0), + dpMap(pool), + dpMapMark(0) {} inline SLONG addRef() @@ -103,10 +109,83 @@ public: return item->rel_instance_id; } + ULONG getDPNumber(ULONG dpSequence) + { + FB_SIZE_T pos; + if (dpMap.find(dpSequence, pos)) + { + if (dpMap[pos].mark != dpMapMark) + dpMap[pos].mark = ++dpMapMark; + return dpMap[pos].physNum; + } + + return 0; + } + + void setDPNumber(ULONG dpSequence, ULONG dpNumber) + { + FB_SIZE_T pos; + if (dpMap.find(dpSequence, pos)) + { + if (dpNumber) + { + dpMap[pos].physNum = dpNumber; + dpMap[pos].mark = ++dpMapMark; + } + else + dpMap.remove(pos); + } + else if (dpNumber) + { + dpMap.insert(pos, {dpSequence, dpNumber, ++dpMapMark}); + + if (dpMap.getCount() == MAX_DPMAP_ITEMS) + freeOldestMapItems(); + } + } + + void freeOldestMapItems() + { + ULONG minMark = MAX_ULONG; + FB_SIZE_T i; + for (i = 0; i < dpMap.getCount(); i++) + if (minMark > dpMap[i].mark) + minMark = dpMap[i].mark; + + minMark = (minMark + dpMapMark) / 2; + + i = 0; + while (i < dpMap.getCount()) + { + if (dpMap[i].mark > minMark) + dpMap[i++].mark -= minMark; + else + dpMap.remove(i); + } + dpMapMark -= minMark; + } + private: RelationPages* rel_next_free; SLONG useCount; + static const ULONG MAX_DPMAP_ITEMS = 64; + + struct DPItem + { + ULONG seqNum; + ULONG physNum; + ULONG mark; + + static ULONG generate(const DPItem& item) + { + return item.seqNum; + } + }; + + Firebird::SortedArray, ULONG, DPItem> dpMap; + ULONG dpMapMark; + friend class jrd_rel; }; @@ -194,12 +273,12 @@ public: return &rel_pages_base; } - bool delPages(thread_db* tdbb, TraNumber tran = MAX_TRA_NUMBER, RelationPages* aPages = NULL); + bool delPages(thread_db* tdbb, TraNumber tran = MAX_TRA_NUMBER, RelationPages* aPages = NULL); - void getRelLockKey(thread_db* tdbb, UCHAR* key); - USHORT getRelLockKeyLength() const; + void getRelLockKey(thread_db* tdbb, UCHAR* key); + USHORT getRelLockKeyLength() const; - void cleanUp(); + void cleanUp(); class RelPagesSnapshot : public Firebird::Array { @@ -319,7 +398,7 @@ const ULONG REL_gc_lockneed = 0x80000; // gc lock should be acquired inline jrd_rel::jrd_rel(MemoryPool& p) : rel_pool(&p), rel_flags(REL_gc_lockneed), rel_name(p), rel_owner_name(p), rel_security_name(p), - rel_view_contexts(p), rel_gc_records(p) + rel_view_contexts(p), rel_gc_records(p), rel_pages_base(p) { } diff --git a/src/jrd/dpm.epp b/src/jrd/dpm.epp index d83263cb33..9368900a78 100644 --- a/src/jrd/dpm.epp +++ b/src/jrd/dpm.epp @@ -1390,9 +1390,37 @@ bool DPM_get(thread_db* tdbb, record_param* rpb, SSHORT lock_type) if (rpb->rpb_number.getValue() < 0) return false; - // Find the next pointer page, data page, and record + RelationPages* relPages = rpb->rpb_relation->getPages(tdbb); + + const ULONG dpSequence = rpb->rpb_number.getValue() / dbb->dbb_max_records; + ULONG page_number = relPages->getDPNumber(dpSequence); + + if (page_number) + { + window->win_page = page_number; + data_page* dpage = (data_page*) CCH_FETCH(tdbb, window, lock_type, pag_undefined); + const bool pageOk = + dpage->dpg_header.pag_type == pag_data && + !(dpage->dpg_header.pag_flags & (dpg_secondary | dpg_large | dpg_orphan)) && + dpage->dpg_relation == rpb->rpb_relation->rel_id && + dpage->dpg_sequence == dpSequence && + (dpage->dpg_count > 0); + + if (pageOk && get_header(window, line, rpb) && + !(rpb->rpb_flags & (rpb_blob | rpb_chained | rpb_fragment))) + { + return true; + } + + CCH_RELEASE(tdbb, window); + + if (pageOk) + return false; + } + + // Find the pointer page, data page, and record pointer_page* page = get_pointer_page(tdbb, rpb->rpb_relation, - rpb->rpb_relation->getPages(tdbb), window, pp_sequence, LCK_read); + relPages, window, pp_sequence, LCK_read); if (!page) return false; @@ -1401,7 +1429,8 @@ bool DPM_get(thread_db* tdbb, record_param* rpb, SSHORT lock_type) " record %" ULONGFORMAT":%d\n", page->ppg_page[slot], line); #endif - const ULONG page_number = page->ppg_page[slot]; + page_number = page->ppg_page[slot]; + relPages->setDPNumber(dpSequence, page_number); if (page_number) { CCH_HANDOFF(tdbb, window, page_number, lock_type, pag_data); @@ -1614,6 +1643,51 @@ bool DPM_next(thread_db* tdbb, record_param* rpb, USHORT lock_type, bool onepage rpb->rpb_number = saveRecNo; } + ULONG dpSequence = rpb->rpb_number.getValue() / dbb->dbb_max_records; + ULONG page_number = relPages->getDPNumber(dpSequence); + + if (page_number) + { + fb_assert(window->win_page.getPageSpaceID() == relPages->rel_pg_space_id); + + window->win_page = page_number; + const data_page* dpage = (data_page*) CCH_FETCH(tdbb, window, lock_type, pag_undefined); + const bool pageOk = + dpage->dpg_header.pag_type == pag_data && + !(dpage->dpg_header.pag_flags & (dpg_secondary | dpg_large | dpg_orphan)) && + dpage->dpg_relation == rpb->rpb_relation->rel_id && + dpage->dpg_sequence == dpSequence && + (dpage->dpg_count > 0); + + if (pageOk) + { + for (; line < dpage->dpg_count; ++line) + { + if (get_header(window, line, rpb) && + !(rpb->rpb_flags & (rpb_blob | rpb_chained | rpb_fragment))) + { + if (sweeper && !rpb->rpb_b_page && rpb->rpb_transaction_nr <= oldest) + continue; + + rpb->rpb_number.compose(dbb->dbb_max_records, dbb->dbb_dp_per_pp, + line, slot, pp_sequence); + return true; + } + } + } + + if (window->win_flags & WIN_large_scan) + CCH_RELEASE_TAIL(tdbb, window); + else if (window->win_flags & WIN_garbage_collector && + window->win_flags & WIN_garbage_collect) + { + CCH_RELEASE_TAIL(tdbb, window); + window->win_flags &= ~WIN_garbage_collect; + } + else + CCH_RELEASE(tdbb, window); + } + // Find the next pointer page, data page, and record while (true) @@ -1654,6 +1728,8 @@ bool DPM_next(thread_db* tdbb, record_param* rpb, USHORT lock_type, bool onepage } } #endif + dpSequence = ppage->ppg_sequence * dbb->dbb_dp_per_pp + slot; + relPages->setDPNumber(dpSequence, page_number); const data_page* dpage = (data_page*) CCH_HANDOFF(tdbb, window, page_number, lock_type, pag_data); @@ -3133,6 +3209,29 @@ static rhd* locate_space(thread_db* tdbb, } } + if (type == DPM_primary && relPages->rel_last_free_pri_dp) + { + window->win_page = relPages->rel_last_free_pri_dp; + data_page* dpage = (data_page*) CCH_FETCH(tdbb, window, LCK_write, pag_undefined); + const bool pageOk = + dpage->dpg_header.pag_type == pag_data && + !(dpage->dpg_header.pag_flags & (dpg_secondary | dpg_large | dpg_orphan)) && + dpage->dpg_relation == rpb->rpb_relation->rel_id && + //dpage->dpg_sequence == dpSequence && + (dpage->dpg_count > 0); + + if (pageOk) + { + UCHAR* space = find_space(tdbb, rpb, size, stack, record, type); + if (space) + return (rhd*)space; + } + else + CCH_RELEASE(tdbb, window); + + relPages->rel_last_free_pri_dp = 0; + } + // Look for space anywhere // Make few tries to lock consecutive data pages without waiting. In highly @@ -3230,7 +3329,12 @@ static rhd* locate_space(thread_db* tdbb, { UCHAR* space = find_space(tdbb, rpb, size, stack, record, type); if (space) - return (rhd*) space; + { + if (type == DPM_primary) + relPages->rel_last_free_pri_dp = dp_number; + + return (rhd*)space; + } } ppLock = LCK_read;