diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8b6baff991..6d45f78fcb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ jobs: - name: Prepare run: | - sudo apt-get install libtool-bin libtomcrypt1 libtomcrypt-dev libtommath1 libtommath-dev libicu-dev zlib1g-dev + sudo apt-get install libtool-bin libtomcrypt1 libtomcrypt-dev libtommath1 libtommath-dev libicu-dev zlib1g-dev cmake - name: Build run: | @@ -135,7 +135,7 @@ jobs: fetch-depth: 10 - name: Prepare - run: apk update && apk --no-cache --update add build-base libtool git autoconf automake zlib-dev icu-dev ncurses-dev libedit-dev linux-headers tar + run: apk update && apk --no-cache --update add build-base libtool git autoconf automake cmake zlib-dev icu-dev ncurses-dev libedit-dev linux-headers tar - name: Build run: | @@ -323,7 +323,7 @@ jobs: - name: Prepare - Install tools run: | - brew install automake libtool ninja + brew install automake cmake libtool ninja - name: Cache - libc++ install id: cache-libcxx-install-macos diff --git a/.gitignore b/.gitignore index 323f0838c1..23563ca04e 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ extern/ttmath/release/ /src/include/gen/parse.h /src/include/gen/autoconfig.auto /src/include/gen/autoconfig.h +extern/libcds/lib/ diff --git a/appveyor.yml b/appveyor.yml index 64ef3fc1ac..d229ecf49c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ install: - cmd: cd builds\win32 - cmd: run_all.bat JUSTBUILD - cmd: set ARTIFACTS_PATH=output_%FB_OUTPUT_SUFFIX% - - sh: export APT_PACKAGES="libtool-bin" + - sh: export APT_PACKAGES="libtool-bin cmake" - sh: if [ $PLATFORM = "x64" ]; then export APT_PACKAGES="$APT_PACKAGES libtommath1 libtommath-dev libicu-dev zlib1g-dev"; fi - sh: if [ $PLATFORM = "x86" ]; then export APT_PACKAGES="$APT_PACKAGES gcc-multilib g++-multilib libncurses5-dev:i386 libtommath-dev:i386 libicu-dev:i386 zlib1g-dev:i386"; fi - sh: if [ $PLATFORM = "x64" ]; then export CC="gcc" CXX="g++"; fi diff --git a/builds/posix/Makefile.in b/builds/posix/Makefile.in index a60c72a64a..b3ae8c4b1e 100644 --- a/builds/posix/Makefile.in +++ b/builds/posix/Makefile.in @@ -83,6 +83,13 @@ LTC_LDFLAGS='-L$(LIB) $(subst $,$$$$,$(call LIB_LINK_RPATH,lib))' endif endif +# correct build type for cmake builds +FB_CMAKE_BUILD_TYPE=$(TARGET) +ifeq ($(FB_CMAKE_BUILD_TYPE),Native) + FB_CMAKE_BUILD_TYPE=Release +endif + + .PHONY: master_process cross_process firebird Debug Release external all: firebird @@ -190,6 +197,8 @@ ifeq ($(RE2_BUILD_FLG),Y) ln -sf $(ROOT)/extern/re2/obj/libre2.a $(LIB) endif + $(MAKE) libcds + ifeq ($(TOMMATH_BUILD_FLG),Y) CC="$(CC)" CFLAGS="$(CFLAGS)" AR="$(AR)" $(MAKE) -C $(ROOT)/extern/libtommath -f makefile.shared GCC="$(CC)" @@ -279,6 +288,22 @@ $(RE2_LIB): $(RE2_Objs) -$(RM) $@ $(STATICLIB_LINK) $@ $^ +#___________________________________________________________________________ +# libcds +# + +.PHONY: libcds +libcds: + mkdir -p $(LIBCDS)/lib/$(TARGET) + cmake -DCMAKE_BUILD_TYPE=$(FB_CMAKE_BUILD_TYPE) -DCMAKE_CXX_FLAGS=-fPIC -S $(LIBCDS) -B $(LIBCDS)/lib/$(TARGET) + AR="$(AR)" $(MAKE) -C $(LIBCDS)/lib/$(TARGET) + +ifeq ($(TARGET),Debug) + ln -sf $(LIBCDS)/lib/$(TARGET)/bin/libcds-s_d.a $(LIB)/libcds.a +else + ln -sf $(LIBCDS)/lib/$(TARGET)/bin/libcds-s.a $(LIB)/libcds.a +endif + #___________________________________________________________________________ # main build target for both debug and release builds # @@ -343,6 +368,7 @@ cross2: ln -sf $(ROOT)/extern/decNumber/libdecFloat$(CROSS).a $(LIB) CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" $(MAKE) -C $(ROOT)/extern/int128/absl/numeric ln -sf $(ROOT)/extern/int128/absl/numeric/libi128$(CROSS).a $(LIB) + $(MAKE) libcds $(MAKE) yvalve $(MAKE) engine $(MAKE) fbintl diff --git a/builds/posix/make.android.arm64 b/builds/posix/make.android.arm64 index 0e25b3e300..f64947c531 100644 --- a/builds/posix/make.android.arm64 +++ b/builds/posix/make.android.arm64 @@ -41,7 +41,7 @@ DEV_FLAGS=$(COMMON_FLAGS) $(WARN_FLAGS) CROSS_CONFIG=android.arm64 LDFLAGS += -static-libstdc++ -DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) +DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) UDR_SUPPORT_LIBS := LINK_LIBS = $(DroidLibs) diff --git a/builds/posix/make.android.arme b/builds/posix/make.android.arme index cbf56253f6..b4b597d8e3 100644 --- a/builds/posix/make.android.arme +++ b/builds/posix/make.android.arme @@ -41,7 +41,7 @@ DEV_FLAGS=$(COMMON_FLAGS) $(WARN_FLAGS) CROSS_CONFIG=android.arme LDFLAGS += -static-libstdc++ -DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) +DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) UDR_SUPPORT_LIBS := LINK_LIBS = $(DroidLibs) diff --git a/builds/posix/make.android.x86 b/builds/posix/make.android.x86 index 1344fe0dc1..a1dcd58368 100644 --- a/builds/posix/make.android.x86 +++ b/builds/posix/make.android.x86 @@ -41,7 +41,7 @@ DEV_FLAGS=$(COMMON_FLAGS) $(WARN_FLAGS) CROSS_CONFIG=android.x86 LDFLAGS += -static-libstdc++ -DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) +DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) UDR_SUPPORT_LIBS := LINK_LIBS = $(DroidLibs) diff --git a/builds/posix/make.android.x86_64 b/builds/posix/make.android.x86_64 index 34ae36925d..15df6dac61 100644 --- a/builds/posix/make.android.x86_64 +++ b/builds/posix/make.android.x86_64 @@ -41,7 +41,7 @@ DEV_FLAGS=$(COMMON_FLAGS) $(WARN_FLAGS) CROSS_CONFIG=android.x86_64 LDFLAGS += -static-libstdc++ -DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) +DroidLibs := -lm -ldl $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) UDR_SUPPORT_LIBS := LINK_LIBS = $(DroidLibs) diff --git a/builds/posix/make.defaults b/builds/posix/make.defaults index 44290a5451..fab49ceac9 100755 --- a/builds/posix/make.defaults +++ b/builds/posix/make.defaults @@ -148,6 +148,8 @@ else I128LIB= endif +LIBCDSLIB=-lcds + # crypt library CRYPTLIB=@CRYPTLIB@ @@ -204,8 +206,8 @@ endif STATICLIB_LINK = $(AR) crus -LINK_LIBS = @LIBS@ $(DECLIB) $(RE2LIB) $(I128LIB) -SO_LINK_LIBS = @LIBS@ $(DECLIB) $(RE2LIB) $(I128LIB) +LINK_LIBS = @LIBS@ $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) +SO_LINK_LIBS = @LIBS@ $(DECLIB) $(RE2LIB) $(I128LIB) $(LIBCDSLIB) # Default extensions @@ -294,6 +296,11 @@ TOMCRYPT_INC=$(TOMCRYPT)/src/headers TOMCRYPT_SO=$(TOMCRYPT)/.libs/libtomcrypt.so TOMCRYPT_VER=1 +# Own libcds support +LIBCDS=$(ROOT)/extern/libcds +LIBCDS_INC=$(LIBCDS) +LIBCDS_DEF=CDS_BUILD_STATIC_LIB + # LINKER OPTIONS # diff --git a/builds/posix/make.rules b/builds/posix/make.rules index 997981cdb5..69b40e6411 100644 --- a/builds/posix/make.rules +++ b/builds/posix/make.rules @@ -38,6 +38,8 @@ ifneq ($(SYSTEM_BOOST_FLG),Y) WFLAGS += -I$(ROOT)/extern/boost endif +WFLAGS += -I$(LIBCDS_INC) -D$(LIBCDS_DEF) + ifeq ($(TOMMATH_BUILD_FLG),Y) WFLAGS += -I$(TOMMATH_INC) endif diff --git a/builds/win32/clean_all.bat b/builds/win32/clean_all.bat index 3cf5d940eb..0c0c2e111e 100644 --- a/builds/win32/clean_all.bat +++ b/builds/win32/clean_all.bat @@ -22,6 +22,16 @@ for %%v in ( %* ) do ( @echo Cleaning icu... @rmdir /S /Q "%FB_ROOT_PATH%\extern\icu\%FB_TARGET_PLATFORM%\%FBBUILD_BUILDTYPE%" 2>nul +@echo Cleaning cds... +@for /D %%d in ("%FB_ROOT_PATH%\extern\libcds\obj\*") do ( + rmdir /S /Q "%%d\%FB_TARGET_PLATFORM%\cds\%FB_CONFIG%-static" 2>nul +) + +@for /D %%d in ("%FB_ROOT_PATH%\extern\libcds\bin\*") do ( + rmdir /S /Q "%%d\%FB_TARGET_PLATFORM%-%FB_CONFIG%-static" 2>nul +) + + @echo Cleaning decNumber... @rmdir /S /Q "%FB_ROOT_PATH%\extern\decNumber\lib\%FB_TARGET_PLATFORM%" 2>nul @rmdir /S /Q "%FB_ROOT_PATH%\extern\decNumber\temp\%FB_TARGET_PLATFORM%" 2>nul diff --git a/builds/win32/compile.bat b/builds/win32/compile.bat index 07ae6415e0..fe3811137c 100644 --- a/builds/win32/compile.bat +++ b/builds/win32/compile.bat @@ -18,6 +18,11 @@ set projects= set config=debug ) +:: Special case for CDS, set in make_boot only +@if "%FB_LIBCDS%"=="1" ( + set config=%config%-static +) + shift shift diff --git a/builds/win32/make_boot.bat b/builds/win32/make_boot.bat index 54eaa3036e..6fae14afe1 100644 --- a/builds/win32/make_boot.bat +++ b/builds/win32/make_boot.bat @@ -34,6 +34,9 @@ if "%ERRLEV%"=="1" goto :END call :btyacc if "%ERRLEV%"=="1" goto :END +call :libcds +if "%ERRLEV%"=="1" goto :END + call :LibTom if "%ERRLEV%"=="1" goto :END @@ -145,6 +148,17 @@ goto :EOF if errorlevel 1 call :boot2 decNumber_%FB_OBJ_DIR% goto :EOF +::=================== +:: Build libcds +:libcds +@echo. +set FB_LIBCDS=1 +@echo Building libcds (%FB_OBJ_DIR%)... +@call compile.bat extern\libcds\projects\Win\vc141\cds libcds_%FB_CONFIG%_%FB_TARGET_PLATFORM%.log cds +if errorlevel 1 call :boot2 libcds%FB_OBJ_DIR% +set FB_LIBCDS= +goto :EOF + ::=================== :: BUILD ttmath :ttmath diff --git a/builds/win32/msvc15/engine.vcxproj b/builds/win32/msvc15/engine.vcxproj index 029028aef2..49d3df52cf 100644 --- a/builds/win32/msvc15/engine.vcxproj +++ b/builds/win32/msvc15/engine.vcxproj @@ -113,24 +113,28 @@ + + + + diff --git a/builds/win32/msvc15/engine_static.vcxproj b/builds/win32/msvc15/engine_static.vcxproj index 3981c1776e..72aa1a3029 100644 --- a/builds/win32/msvc15/engine_static.vcxproj +++ b/builds/win32/msvc15/engine_static.vcxproj @@ -89,6 +89,7 @@ + @@ -276,6 +277,7 @@ + @@ -429,24 +431,28 @@ + + + + diff --git a/builds/win32/msvc15/engine_static.vcxproj.filters b/builds/win32/msvc15/engine_static.vcxproj.filters index f068282124..e0f9907b7b 100644 --- a/builds/win32/msvc15/engine_static.vcxproj.filters +++ b/builds/win32/msvc15/engine_static.vcxproj.filters @@ -513,6 +513,9 @@ JRD files + + JRD files + Optimizer @@ -1070,6 +1073,9 @@ Header files + + Header files + Header files diff --git a/builds/win32/msvc15/engine_test.vcxproj b/builds/win32/msvc15/engine_test.vcxproj index 312f7dd1e3..3a36737af0 100644 --- a/builds/win32/msvc15/engine_test.vcxproj +++ b/builds/win32/msvc15/engine_test.vcxproj @@ -66,24 +66,28 @@ + + + + diff --git a/builds/win32/msvc15/fbserver.vcxproj b/builds/win32/msvc15/fbserver.vcxproj index 374e17da36..8236adbbbe 100644 --- a/builds/win32/msvc15/fbserver.vcxproj +++ b/builds/win32/msvc15/fbserver.vcxproj @@ -239,4 +239,4 @@ - + \ No newline at end of file diff --git a/builds/win32/msvc15/libcds.props b/builds/win32/msvc15/libcds.props new file mode 100644 index 0000000000..dd481dee9f --- /dev/null +++ b/builds/win32/msvc15/libcds.props @@ -0,0 +1,23 @@ + + + + + ..\..\..\extern\libcds + + + + + $(LIBCDS_ROOT);%(AdditionalIncludeDirectories) + CDS_BUILD_STATIC_LIB;%(PreprocessorDefinitions) + + + $(LIBCDS_ROOT)\bin\vc.$(PlatformToolset)\$(Platform)-$(Configuration)-static;%(AdditionalLibraryDirectories) + libcds-$(PlatformTarget).lib;%(AdditionalDependencies) + + + + + $(LIBCDS_ROOT) + + + diff --git a/src/jrd/InitCDSLib.cpp b/src/jrd/InitCDSLib.cpp new file mode 100644 index 0000000000..ab8d30e277 --- /dev/null +++ b/src/jrd/InitCDSLib.cpp @@ -0,0 +1,134 @@ +/* + * PROGRAM: JRD Access Method + * MODULE: InitCDSLib.cpp + * DESCRIPTION: support for correct usage of CDS library by the engine + * + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Vladyslav Khorsun for the + * Firebird Open Source RDBMS project. + * + * Copyright (c) 2022 Vladyslav Khorsun + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + * + */ + + +#include "firebird.h" +#include "../common/classes/array.h" +#include "../common/classes/init.h" +#include "../common/classes/locks.h" +#include "../jrd/InitCDSLib.h" +#include "../yvalve/gds_proto.h" + +#include //cds::Initialize, cds::Terminate +#include //cds::gc::DHP (Hazard Pointer) + +using namespace Firebird; + +namespace Jrd +{ + +Array* InitCDS::m_pools = nullptr; +MemoryPool* InitCDS::m_pool = nullptr; +MemoryStats InitCDS::m_stats; + +static GlobalPtr initCDS; + +InitCDS::InitCDS(MemoryPool&) +{ + m_pool = MemoryPool::createPool(nullptr, m_stats); + m_pools = FB_NEW_POOL(*m_pool) Array(*m_pool); + + cds::Initialize(); + cds::gc::dhp::smr::set_memory_allocator(alloc, free); + cds::gc::dhp::smr::construct(); +} + +InitCDS::~InitCDS() +{ + cds::gc::dhp::smr::destruct(true); + + char str[512]; + +// CDS_ENABLE_HPSTAT is not defined by default. +// Rebuild of libcds after change is required. + +#ifdef CDS_ENABLE_HPSTAT + cds::gc::DHP::stat const& st = cds::gc::DHP::postmortem_statistics(); + + sprintf(str, "DHP statistics:\n" + " thread count = %llu\n" + " guard allocated = %llu\n" + " guard freed = %llu\n" + " retired data count = %llu\n" + " free data count = %llu\n" + " HP blocks allocated = %llu\n" + " retired blocks allocated = %llu\n" + " hp array extend() calls = %llu\n" + " retired array extend() = %llu\n" + " scan() call count = %llu\n" + " help_scan() call count = %llu\n" + "\n", + st.thread_rec_count, + st.guard_allocated, st.guard_freed, + st.retired_count, st.free_count, + st.hp_block_count, st.retired_block_count, + st.hp_extend_count, st.retired_extend_count, + st.scan_count, st.help_scan_count + ); + gds__log(str); +#endif + cds::Terminate(); + + // no need to protect m_pools at this point + while (m_pools->hasData()) + { + MemoryPool* pool = m_pools->pop(); + MemoryPool::deletePool(pool); + } + + delete m_pools; + MemoryPool::deletePool(m_pool); + +#ifdef DEBUG_CDS_MEMORY + sprintf(str, "DHP pool stats:\n" + " usage = %llu\n" + " mapping = %llu\n" + " max usage = %llu\n" + " max mapping = %llu\n" + "\n", + m_stats.getCurrentUsage(), + m_stats.getCurrentMapping(), + m_stats.getMaximumUsage(), + m_stats.getMaximumMapping() + ); + + gds__log(str); +#endif +} + +static InitInstance mutex; // guard InitCDS::m_pools + +MemoryPool* InitCDS::createPool() +{ + MemoryPool* pool = MemoryPool::createPool(nullptr, m_stats); + + MutexLockGuard guard(mutex(), FB_FUNCTION); + m_pools->push(pool); + return pool; +} + +} // namespace Jrd diff --git a/src/jrd/InitCDSLib.h b/src/jrd/InitCDSLib.h new file mode 100644 index 0000000000..07f7e8fce3 --- /dev/null +++ b/src/jrd/InitCDSLib.h @@ -0,0 +1,70 @@ +/* + * PROGRAM: JRD Access Method + * MODULE: InitCDSLib.h + * DESCRIPTION: support for correct usage of CDS library by the engine + * + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Vladyslav Khorsun for the + * Firebird Open Source RDBMS project. + * + * Copyright (c) 2022 Vladyslav Khorsun + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + * + */ + +#ifndef FB_INIT_CDSLIB_H +#define FB_INIT_CDSLIB_H + +#include "../common/classes/alloc.h" + +// Uncomment to write final memory usage stats into firebird.log. +// See ~InitCDS() and ~InitPool() in CCH + +//#define DEBUG_CDS_MEMORY + +namespace Jrd +{ + +class InitCDS +{ +public: + + explicit InitCDS(MemoryPool&); + ~InitCDS(); + + // Creates memory pool that will not be deleted until cds finish its work. + // Should be used to allocate structures by cds classes. + static Firebird::MemoryPool* createPool(); + +private: + static void* alloc(size_t size) + { + return m_pool->allocate(size ALLOC_ARGS); + } + + static void free(void* p) + { + m_pool->deallocate(p); + } + + static Firebird::Array* m_pools; + static Firebird::MemoryPool* m_pool; + static Firebird::MemoryStats m_stats; +}; + +} // namespace Jrd + +#endif // FB_INIT_CDSLIB_H diff --git a/src/jrd/btr.h b/src/jrd/btr.h index 6c5dd4728d..0ba093de9c 100644 --- a/src/jrd/btr.h +++ b/src/jrd/btr.h @@ -45,7 +45,7 @@ class jrd_tra; template class vec; class Statement; struct temporary_key; -class jrd_tra; +class thread_db; class BtrPageGCLock; class Sort; class PartitionedSort; diff --git a/src/jrd/cch.cpp b/src/jrd/cch.cpp index 0dcf27c930..adde814f66 100644 --- a/src/jrd/cch.cpp +++ b/src/jrd/cch.cpp @@ -63,6 +63,16 @@ #include "../jrd/CryptoManager.h" #include "../common/utils_proto.h" +// Use lock-free lists in hash table implementation +#define HASH_USE_CDS_LIST + + +#ifdef HASH_USE_CDS_LIST +#include +#include "../jrd/InitCDSLib.h" +#endif + + using namespace Jrd; using namespace Ods; using namespace Firebird; @@ -106,8 +116,6 @@ static inline void PAGE_LOCK_RE_POST(thread_db* tdbb, BufferControl* bcb, Lock* } } -#define PAGE_OVERHEAD (sizeof(bcb_repeat) + sizeof(BufferDesc) + sizeof(Lock) + (int) bcb->bcb_page_size) - enum LatchState { lsOk, @@ -116,8 +124,6 @@ enum LatchState }; static void adjust_scan_count(WIN* window, bool mustRead); -static BufferDesc* alloc_bdb(thread_db*, BufferControl*, UCHAR **); -static Lock* alloc_page_lock(Jrd::thread_db*, BufferDesc*); static int blocking_ast_bdb(void*); #ifdef CACHE_READER static void prefetch_epilogue(Prefetch*, FbStatusVector *); @@ -127,16 +133,13 @@ static void prefetch_prologue(Prefetch*, SLONG *); #endif static void check_precedence(thread_db*, WIN*, PageNumber); static void clear_precedence(thread_db*, BufferDesc*); -static BufferDesc* dealloc_bdb(BufferDesc*); static void down_grade(thread_db*, BufferDesc*, int high = 0); static bool expand_buffers(thread_db*, ULONG); -static BufferDesc* find_buffer(BufferControl* bcb, const PageNumber page, bool findPending); static BufferDesc* get_buffer(thread_db*, const PageNumber, SyncType, int); static int get_related(BufferDesc*, PagesArray&, int, const ULONG); static ULONG get_prec_walk_mark(BufferControl*); -static LatchState latch_buffer(thread_db*, Sync&, BufferDesc*, const PageNumber, SyncType, int); static LockState lock_buffer(thread_db*, BufferDesc*, const SSHORT, const SCHAR); -static ULONG memory_init(thread_db*, BufferControl*, SLONG); +static ULONG memory_init(thread_db*, BufferControl*, ULONG); static void page_validation_error(thread_db*, win*, SSHORT); static void purgePrecedence(BufferControl*, BufferDesc*); static SSHORT related(BufferDesc*, const BufferDesc*, SSHORT, const ULONG); @@ -148,6 +151,7 @@ static bool write_page(thread_db*, BufferDesc*, FbStatusVector* const, const boo static bool set_diff_page(thread_db*, BufferDesc*); static void clear_dirty_flag_and_nbak_state(thread_db*, BufferDesc*); +static BufferDesc* get_dirty_buffer(thread_db*); static inline void insertDirty(BufferControl* bcb, BufferDesc* bdb) @@ -197,13 +201,89 @@ const ULONG MIN_BUFFER_SEGMENT = 65536; #define BLOCK(fld_ptr, type, fld) (type*)((SCHAR*) fld_ptr - offsetof(type, fld)) - -const PageNumber FREE_PAGE(DB_PAGE_SPACE, -1); - const int PRE_SEARCH_LIMIT = 256; const int PRE_EXISTS = -1; const int PRE_UNKNOWN = -2; +namespace Jrd +{ + +#ifdef HASH_USE_CDS_LIST + +template +class ListNodeAllocator +{ +public: + typedef T value_type; + + ListNodeAllocator() {}; + + template + constexpr ListNodeAllocator(const ListNodeAllocator&) noexcept {} + + T* allocate(std::size_t n); + void deallocate(T* p, std::size_t n); + +private: +}; + +struct BdbTraits : public cds::container::michael_list::traits +{ + typedef ListNodeAllocator allocator; + //typedef std::less compare; +}; + +typedef cds::container::MichaelKVList BdbList; + +#endif // HASH_USE_CDS_LIST + + +class BCBHashTable +{ +#ifdef HASH_USE_CDS_LIST + using chain_type = BdbList; +#else + using chain_type = que; +#endif + +public: + BCBHashTable(MemoryPool& pool, ULONG count) : + m_pool(pool), + m_count(0), + m_chains(nullptr) + { + resize(count); + } + + ~BCBHashTable() + { + clear(); + } + + void resize(ULONG count); + void clear(); + + BufferDesc* find(const PageNumber& page) const; + + // tries to put bdb into hash slot by page + // if succeed, removes bdb from old slot, if necessary, and returns NULL + // else, returns BufferDesc that is currently occupies target slot + BufferDesc* emplace(BufferDesc* bdb, const PageNumber& page, bool remove); + + void remove(BufferDesc* bdb); +private: + ULONG hash(const PageNumber& pageno) const + { + return pageno.getPageNum() % m_count; + } + + MemoryPool& m_pool; + ULONG m_count; + chain_type* m_chains; +}; + +} + void CCH_clean_page(thread_db* tdbb, PageNumber page) { @@ -228,10 +308,12 @@ void CCH_clean_page(thread_db* tdbb, PageNumber page) BufferControl* bcb = dbb->dbb_bcb; BufferDesc* bdb = NULL; { +#ifndef HASH_USE_CDS_LIST Sync bcbSync(&bcb->bcb_syncObject, "CCH_clean_page"); bcbSync.lock(SYNC_SHARED); +#endif - bdb = find_buffer(bcb, page, false); + bdb = bcb->bcb_hashTable->find(page); if (!bdb) return; @@ -309,7 +391,7 @@ int CCH_down_grade_dbb(void* ast_object) if (SHUT_blocking_ast(tdbb, true)) return 0; - SyncLockGuard dsGuard(&dbb->dbb_sync, SYNC_EXCLUSIVE, "CCH_down_grade_dbb"); + SyncLockGuard dsGuard(&dbb->dbb_sync, SYNC_EXCLUSIVE, FB_FUNCTION); // If we are already shared, there is nothing more we can do. // If any case, the other guy probably wants exclusive access, @@ -342,21 +424,13 @@ int CCH_down_grade_dbb(void* ast_object) BufferControl* bcb = dbb->dbb_bcb; if (bcb) { - SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, "CCH_down_grade_dbb"); + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, FB_FUNCTION); bcb->bcb_flags &= ~BCB_exclusive; - bool done = (bcb->bcb_count == 0); - while (!done) + for (auto blk : bcb->bcb_bdbBlocks) { - done = true; - const bcb_repeat* const head = bcb->bcb_rpt; - const bcb_repeat* tail = bcb->bcb_rpt; - fb_assert(tail); // once I've got here with NULL. AP. - - for (const bcb_repeat* const end = tail + bcb->bcb_count; tail < end; ++tail) + for (BufferDesc* bdb = blk.m_bdbs; bdb < blk.m_bdbs + blk.m_count; bdb++) { - BufferDesc* bdb = tail->bcb_bdb; - // Acquire EX latch to avoid races with LCK_release (called by CCH_release) // or LCK_lock (by lock_buffer) in main thread. Take extra care to avoid // deadlock with CCH_handoff. See CORE-5436. @@ -369,13 +443,6 @@ int CCH_down_grade_dbb(void* ast_object) Thread::sleep(1); } - if (head != bcb->bcb_rpt) - { - // expand_buffers or CCH_fini was called, consider to start all over again - done = (bcb->bcb_count == 0); - break; - } - PAGE_LOCK_ASSERT(tdbb, bcb, bdb->bdb_lock); } } @@ -628,7 +695,7 @@ pag* CCH_fake(thread_db* tdbb, WIN* window, int wait) * 0 => If the latch can't be acquired immediately, * or an IO would be necessary, then give * up and return 0. - * => Latch timeout interval in seconds. + * => Latch timeout interval in seconds. * * return * pag pointer if successful. @@ -653,6 +720,8 @@ pag* CCH_fake(thread_db* tdbb, WIN* window, int wait) if (!bdb) return NULL; // latch timeout occurred + fb_assert(bdb->bdb_page == window->win_page); + // If a dirty orphaned page is being reused - better write it first // to clear current precedences and checkpoint state. This would also // update the bcb_free_pages field appropriately. @@ -683,6 +752,7 @@ pag* CCH_fake(thread_db* tdbb, WIN* window, int wait) // Here the page must not be dirty and have no backup lock owner fb_assert((bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) == 0); + fb_assert(bdb->bdb_page == window->win_page); bdb->bdb_flags &= BDB_lru_chained; // yes, clear all except BDB_lru_chained bdb->bdb_flags |= (BDB_writer | BDB_faked); @@ -692,6 +762,7 @@ pag* CCH_fake(thread_db* tdbb, WIN* window, int wait) lock_buffer(tdbb, bdb, LCK_WAIT, pag_undefined); MOVE_CLEAR(bdb->bdb_buffer, (SLONG) dbb->dbb_page_size); + bdb->bdb_buffer->pag_pageno = window->win_page.getPageNum(); window->win_buffer = bdb->bdb_buffer; window->win_bdb = bdb; window->win_flags = 0; @@ -805,6 +876,12 @@ LockState CCH_fetch_lock(thread_db* tdbb, WIN* window, int lock_type, int wait, if (wait != 1 && bdb == 0) return lsLatchTimeout; // latch timeout + fb_assert(bdb->bdb_page == window->win_page); + if (!(bdb->bdb_flags & BDB_read_pending)) + fb_assert(bdb->bdb_buffer->pag_pageno == window->win_page.getPageNum()); + else + fb_assert(bdb->ourExclusiveLock() || bdb->bdb_lock && bdb->bdb_lock->lck_logical == LCK_none); + if (lock_type >= LCK_write) bdb->bdb_flags |= BDB_writer; @@ -933,6 +1010,8 @@ void CCH_fetch_page(thread_db* tdbb, WIN* window, const bool read_shadow) // then we maintain actual page in difference file. Always read it from there. if (isTempPage || bak_state == Ods::hdr_nbak_normal || !diff_page) { + fb_assert(bdb->bdb_page == window->win_page); + NBAK_TRACE(("Reading page %d:%06d, state=%d, diff page=%d from DISK", bdb->bdb_page.getPageSpaceID(), bdb->bdb_page.getPageNum(), bak_state, diff_page)); @@ -946,6 +1025,8 @@ void CCH_fetch_page(thread_db* tdbb, WIN* window, const bool read_shadow) CCH_unwind(tdbb, true); } } + fb_assert(bdb->bdb_page == window->win_page); + fb_assert(bdb->bdb_buffer->pag_pageno == window->win_page.getPageNum()); } else { @@ -1024,11 +1105,22 @@ void CCH_forget_page(thread_db* tdbb, WIN* window) } // remove from hash table and put into empty list +#ifndef HASH_USE_CDS_LIST { SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, FB_FUNCTION); - QUE_DELETE(bdb->bdb_que); + bcb->bcb_hashTable->remove(bdb); QUE_INSERT(bcb->bcb_empty, bdb->bdb_que); + bcb->bcb_inuse--; } +#else + bcb->bcb_hashTable->remove(bdb); + + { + SyncLockGuard syncEmpty(&bcb->bcb_syncEmpty, SYNC_EXCLUSIVE, FB_FUNCTION); + QUE_INSERT(bcb->bcb_empty, bdb->bdb_que); + bcb->bcb_inuse--; + } +#endif bdb->bdb_flags = 0; @@ -1051,25 +1143,29 @@ void CCH_fini(thread_db* tdbb) **************************************/ SET_TDBB(tdbb); Database* const dbb = tdbb->getDatabase(); + + SyncLockGuard dsGuard(&dbb->dbb_sync, SYNC_EXCLUSIVE, FB_FUNCTION); + BufferControl* const bcb = dbb->dbb_bcb; if (!bcb) return; - bcb_repeat* tail = bcb->bcb_rpt; - const bcb_repeat* const end = tail + bcb->bcb_count; + delete bcb->bcb_hashTable; - for (; tail < end; tail++) + for (auto blk : bcb->bcb_bdbBlocks) { - if (tail->bcb_bdb) + for (ULONG i = 0; i < blk.m_count; i++) { - delete tail->bcb_bdb; - tail->bcb_bdb = NULL; + BufferDesc& bdb = blk.m_bdbs[i]; + + if (bdb.bdb_lock) + bdb.bdb_lock->~Lock(); + bdb.~BufferDesc(); } } - delete[] bcb->bcb_rpt; - bcb->bcb_rpt = NULL; + bcb->bcb_bdbBlocks.clear(); bcb->bcb_count = 0; while (bcb->bcb_memory.hasData()) @@ -1148,7 +1244,7 @@ void CCH_flush(thread_db* tdbb, USHORT flush_flag, TraNumber tra_number) { const time_t now = time(0); - SyncLockGuard guard(&dbb->dbb_flush_count_mutex, SYNC_EXCLUSIVE, "CCH_flush"); + SyncLockGuard guard(&dbb->dbb_flush_count_mutex, SYNC_EXCLUSIVE, FB_FUNCTION); // If this is the first commit set last_flushed_write to now if (!dbb->last_flushed_write) @@ -1218,17 +1314,21 @@ void CCH_flush_ast(thread_db* tdbb) CCH_flush(tdbb, FLUSH_ALL, 0); else { + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_SHARED, FB_FUNCTION); + // Do some fancy footwork to make sure that pages are // not removed from the btc tree at AST level. Then // restore the flag to whatever it was before. const bool keep_pages = bcb->bcb_flags & BCB_keep_pages; bcb->bcb_flags |= BCB_keep_pages; - for (ULONG i = 0; (bcb = dbb->dbb_bcb) && i < bcb->bcb_count; i++) + for (auto blk : bcb->bcb_bdbBlocks) { - BufferDesc* bdb = bcb->bcb_rpt[i].bcb_bdb; - if (bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) - down_grade(tdbb, bdb, 1); + for (BufferDesc* bdb = blk.m_bdbs; bdb < blk.m_bdbs + blk.m_count; bdb++) + { + if (bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) + down_grade(tdbb, bdb, 1); + } } if (!keep_pages) @@ -1261,8 +1361,7 @@ bool CCH_free_page(thread_db* tdbb) BufferDesc* bdb; - if ((bcb->bcb_flags & BCB_free_pending) && - (bdb = get_buffer(tdbb, FREE_PAGE, SYNC_NONE, 1))) + if ((bcb->bcb_flags & BCB_free_pending) && (bdb = get_dirty_buffer(tdbb))) { if (write_buffer(tdbb, bdb, bdb->bdb_page, true, tdbb->tdbb_status_vector, true)) return true; @@ -1308,11 +1407,15 @@ void CCH_get_related(thread_db* tdbb, PageNumber page, PagesArray &lowPages) Database* dbb = tdbb->getDatabase(); BufferControl* bcb = dbb->dbb_bcb; +#ifndef HASH_USE_CDS_LIST Sync bcbSync(&bcb->bcb_syncObject, "CCH_get_related"); bcbSync.lock(SYNC_SHARED); +#endif - BufferDesc* bdb = find_buffer(bcb, page, false); + BufferDesc* bdb = bcb->bcb_hashTable->find(page); +#ifndef HASH_USE_CDS_LIST bcbSync.unlock(); +#endif if (bdb) { @@ -1385,17 +1488,22 @@ pag* CCH_handoff(thread_db* tdbb, WIN* window, ULONG page, int lock, SCHAR page_ WIN temp = *window; window->win_page = PageNumber(window->win_page.getPageSpaceID(), page); - // This prevents a deadlock with the precedence queue, as shown by - // mwrite mwrite1 2 mwrite2 2 test.fdb - - const int wait2 = bdb->ourExclusiveLock() ? LCK_NO_WAIT : wait; - LockState must_read = CCH_fetch_lock(tdbb, window, lock, wait2, page_type); - - if ((must_read == lsLatchTimeout || must_read == lsLockTimeout) && wait2 == LCK_NO_WAIT) + LockState must_read; + if (bdb->bdb_bcb->bcb_flags & BCB_exclusive) { - temp.win_bdb->downgrade(SYNC_SHARED); - must_read = CCH_fetch_lock(tdbb, window, lock, wait, page_type); + // This prevents a deadlock with the precedence queue, as shown by + // mwrite mwrite1 2 mwrite2 2 test.fdb + + const int wait2 = bdb->ourExclusiveLock() ? LCK_NO_WAIT : wait; + must_read = CCH_fetch_lock(tdbb, window, lock, wait2, page_type); + if (must_read == lsLatchTimeout && wait2 == LCK_NO_WAIT) + { + bdb->downgrade(SYNC_SHARED); + must_read = CCH_fetch_lock(tdbb, window, lock, wait, page_type); + } } + else + must_read = CCH_fetch_lock(tdbb, window, lock, wait, page_type); // Latch or lock timeout, return failure. @@ -1465,7 +1573,7 @@ void CCH_init(thread_db* tdbb, ULONG number) if (number > MAX_PAGE_BUFFERS) number = MAX_PAGE_BUFFERS; - const SLONG count = number; + const ULONG count = number; // Allocate and initialize buffers control block BufferControl* bcb = BufferControl::create(dbb); @@ -1473,17 +1581,14 @@ void CCH_init(thread_db* tdbb, ULONG number) { try { - bcb->bcb_rpt = FB_NEW_POOL(*bcb->bcb_bufferpool) bcb_repeat[number]; + bcb->bcb_hashTable = FB_NEW_POOL(*bcb->bcb_bufferpool) + BCBHashTable(*bcb->bcb_bufferpool, number); break; } catch (const Firebird::Exception& ex) { ex.stuffException(tdbb->tdbb_status_vector); - // If the buffer control block can't be allocated, memory is - // very low. Recalculate the number of buffers to account for - // page buffer overhead and reduce that number by a 25% fudge factor. - number = (sizeof(bcb_repeat) * number) / PAGE_OVERHEAD; number -= number >> 2; if (number < MIN_PAGE_BUFFERS) @@ -1504,7 +1609,7 @@ void CCH_init(thread_db* tdbb, ULONG number) // initialization of memory is system-specific - bcb->bcb_count = memory_init(tdbb, bcb, static_cast(number)); + bcb->bcb_count = memory_init(tdbb, bcb, number); bcb->bcb_free_minimum = (SSHORT) MIN(bcb->bcb_count / 4, 128); if (bcb->bcb_count < MIN_PAGE_BUFFERS) @@ -1512,7 +1617,7 @@ void CCH_init(thread_db* tdbb, ULONG number) // Log if requested number of page buffers could not be allocated. - if (count != (SLONG) bcb->bcb_count) + if (count != bcb->bcb_count) { gds__log("Database: %s\n\tAllocated %ld page buffers of %ld requested", tdbb->getAttachment()->att_filename.c_str(), bcb->bcb_count, count); @@ -1977,6 +2082,7 @@ void CCH_release(thread_db* tdbb, WIN* window, const bool release_tail) // Reassert blocking AST after write failure with dummy lock convert // to same level. This will re-enable blocking AST notification. + if (!(bcb->bcb_flags & BCB_exclusive)) { // scope ThreadStatusGuard temp_status(tdbb); LCK_convert_opt(tdbb, bdb->bdb_lock, bdb->bdb_lock->lck_logical); @@ -2137,14 +2243,11 @@ void CCH_shutdown(thread_db* tdbb) bcb->bcb_writer_fini.waitForCompletion(); } - SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, "CCH_shutdown"); + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, FB_FUNCTION); // Flush and release page buffers - bcb_repeat* tail = bcb->bcb_rpt; - const bcb_repeat* const end = tail + bcb->bcb_count; - - if (tail && tail->bcb_bdb) + if (bcb->bcb_count) { try { @@ -2155,17 +2258,21 @@ void CCH_shutdown(thread_db* tdbb) } catch (const Exception&) { - for (; tail < end; tail++) + for (auto blk : bcb->bcb_bdbBlocks) { - BufferDesc* const bdb = tail->bcb_bdb; - - if (dbb->dbb_flags & DBB_bugcheck) + BufferDesc* bdb = blk.m_bdbs; + const BufferDesc* const end = blk.m_bdbs + blk.m_count; + for (; bdb < end; bdb++) { - bdb->bdb_flags &= ~BDB_db_dirty; - clear_dirty_flag_and_nbak_state(tdbb, bdb); - } - PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); + if (dbb->dbb_flags & DBB_bugcheck) + { + bdb->bdb_flags &= ~BDB_db_dirty; + clear_dirty_flag_and_nbak_state(tdbb, bdb); + } + + PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); + } } } } @@ -2475,63 +2582,6 @@ static void adjust_scan_count(WIN* window, bool mustRead) } -static BufferDesc* alloc_bdb(thread_db* tdbb, BufferControl* bcb, UCHAR** memory) -{ -/************************************** - * - * a l l o c _ b d b - * - ************************************** - * - * Functional description - * Allocate buffer descriptor block. - * - **************************************/ - SET_TDBB(tdbb); - - BufferDesc* bdb = FB_NEW_POOL(*bcb->bcb_bufferpool) BufferDesc(bcb); - - try { - bdb->bdb_lock = alloc_page_lock(tdbb, bdb); - } - catch (const Firebird::Exception&) - { - delete bdb; - throw; - } - - bdb->bdb_buffer = (pag*) *memory; - *memory += bcb->bcb_page_size; - - QUE_INSERT(bcb->bcb_empty, bdb->bdb_que); - - return bdb; -} - - -static Lock* alloc_page_lock(thread_db* tdbb, BufferDesc* bdb) -{ -/************************************** - * - * a l l o c _ p a g e _ l o c k - * - ************************************** - * - * Functional description - * Allocate a page-type lock. - * - **************************************/ - SET_TDBB(tdbb); - Database* const dbb = tdbb->getDatabase(); - BufferControl* const bcb = bdb->bdb_bcb; - - const USHORT lockLen = PageNumber::getLockLen(); - - return FB_NEW_RPT(*bcb->bcb_bufferpool, lockLen) - Lock(tdbb, lockLen, LCK_bdb, bdb, blocking_ast_bdb); -} - - static int blocking_ast_bdb(void* ast_object) { /************************************** @@ -2666,35 +2716,42 @@ static void flushAll(thread_db* tdbb, USHORT flush_flag) const bool release_flag = (flush_flag & FLUSH_RLSE) != 0; const bool write_thru = release_flag; - for (ULONG i = 0; i < bcb->bcb_count; i++) { - BufferDesc* bdb = bcb->bcb_rpt[i].bcb_bdb; - if (!bdb) // first non-initialized BDB, abandon following checks - break; + Sync bcbSync(&bcb->bcb_syncObject, FB_FUNCTION); + if (!bcb->bcb_syncObject.ourExclusiveLock()) + bcbSync.lock(SYNC_SHARED); - if (bdb->bdb_flags & (BDB_db_dirty | BDB_dirty)) + for (auto blk : bcb->bcb_bdbBlocks) { - if (bdb->bdb_flags & BDB_dirty) - flush.add(bdb); - else if (bdb->bdb_flags & BDB_db_dirty) + for (ULONG i = 0; i < blk.m_count; i++) { - // pages modified by sweep\garbage collector are not in dirty list - const bool dirty_list = (bdb->bdb_dirty.que_forward != &bdb->bdb_dirty); + BufferDesc* bdb = &blk.m_bdbs[i]; - if (all_flag || (sweep_flag && !dirty_list)) - flush.add(bdb); + if (bdb->bdb_flags & (BDB_db_dirty | BDB_dirty)) + { + if (bdb->bdb_flags & BDB_dirty) + flush.add(bdb); + else if (bdb->bdb_flags & BDB_db_dirty) + { + // pages modified by sweep\garbage collector are not in dirty list + const bool dirty_list = (bdb->bdb_dirty.que_forward != &bdb->bdb_dirty); + + if (all_flag || (sweep_flag && !dirty_list)) + flush.add(bdb); + } + } + else if (release_flag) + { + bdb->addRef(tdbb, SYNC_EXCLUSIVE); + + if (bdb->bdb_use_count > 1) + BUGCHECK(210); // msg 210 page in use during flush + + PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); + bdb->release(tdbb, false); + } } } - else if (release_flag) - { - bdb->addRef(tdbb, SYNC_EXCLUSIVE); - - if (bdb->bdb_use_count > 1) - BUGCHECK(210); // msg 210 page in use during flush - - PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); - bdb->release(tdbb, false); - } } flushPages(tdbb, flush_flag, flush.begin(), flush.getCount()); @@ -2919,8 +2976,7 @@ void BufferControl::cache_reader(BufferControl* bcb) BufferDesc* bdb; if (found) JRD_reschedule(tdbb, true); - else if (bcb->bcb_flags & BCB_free_pending && - (bdb = get_buffer(tdbb, FREE_PAGE, LATCH_none, 1))) + else if (bcb->bcb_flags & BCB_free_pending && (bdb = get_dirty_buffer(tdbb))) { // In our spare time, help writer clean the cache. @@ -3024,7 +3080,7 @@ void BufferControl::cache_writer(BufferControl* bcb) if (bcb->bcb_flags & BCB_free_pending) { - BufferDesc* const bdb = get_buffer(tdbb, FREE_PAGE, SYNC_NONE, 1); + BufferDesc* const bdb = get_dirty_buffer(tdbb); if (bdb) write_buffer(tdbb, bdb, bdb->bdb_page, true, &status_vector, true); } @@ -3151,11 +3207,15 @@ static void check_precedence(thread_db* tdbb, WIN* window, PageNumber page) // Start by finding the buffer containing the high priority page - Sync bcbSync(&bcb->bcb_syncObject, "check_precedence"); +#ifndef HASH_USE_CDS_LIST + Sync bcbSync(&bcb->bcb_syncObject, FB_FUNCTION); bcbSync.lock(SYNC_SHARED); +#endif - BufferDesc* high = find_buffer(bcb, page, false); + BufferDesc* high = bcb->bcb_hashTable->find(page); +#ifndef HASH_USE_CDS_LIST bcbSync.unlock(); +#endif if (!high) return; @@ -3173,7 +3233,7 @@ static void check_precedence(thread_db* tdbb, WIN* window, PageNumber page) // If already related, there's nothing more to do. If the precedence // search was too complex to complete, just write the high page and - // forget about about establishing the relationship. + // forget about establishing the relationship. Sync precSync(&bcb->bcb_syncPrecedence, "check_precedence"); precSync.lock(SYNC_EXCLUSIVE); @@ -3284,30 +3344,6 @@ static void clear_precedence(thread_db* tdbb, BufferDesc* bdb) } -static BufferDesc* dealloc_bdb(BufferDesc* bdb) -{ -/************************************** - * - * d e a l l o c _ b d b - * - ************************************** - * - * Functional description - * Deallocate buffer descriptor block. - * - **************************************/ - if (bdb) - { - delete bdb->bdb_lock; - QUE_DELETE(bdb->bdb_que); - - delete bdb; - } - - return NULL; -} - - static void down_grade(thread_db* tdbb, BufferDesc* bdb, int high) { /************************************** @@ -3527,10 +3563,6 @@ static bool expand_buffers(thread_db* tdbb, ULONG number) * Expand the cache to at least a given number of buffers. * If it's already that big, don't do anything. * - * Nickolay Samofatov, 08-Mar-2004. - * This function does not handle exceptions correctly, - * it looks like good handling requires rewrite. - * **************************************/ SET_TDBB(tdbb); Database* const dbb = tdbb->getDatabase(); @@ -3539,160 +3571,190 @@ static bool expand_buffers(thread_db* tdbb, ULONG number) if (number <= bcb->bcb_count || number > MAX_PAGE_BUFFERS) return false; - Sync syncBcb(&bcb->bcb_syncObject, "expand_buffers"); - syncBcb.lock(SYNC_EXCLUSIVE); + SyncLockGuard syncBcb(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, FB_FUNCTION); - // for Win16 platform, we want to ensure that no cache buffer ever ends on a segment boundary - // CVC: Is this code obsolete or only the comment? + if (number <= bcb->bcb_count) + return false; - ULONG num_per_seg = number - bcb->bcb_count; - ULONG left_to_do = num_per_seg; + // Expand hash table only if there is no concurrent attachments + if ((tdbb->getAttachment()->att_flags & ATT_exclusive) || !(bcb->bcb_flags & BCB_exclusive)) + bcb->bcb_hashTable->resize(number); - // Allocate and initialize buffers control block - Jrd::ContextPoolHolder context(tdbb, bcb->bcb_bufferpool); + SyncLockGuard syncEmpty(&bcb->bcb_syncEmpty, SYNC_EXCLUSIVE, FB_FUNCTION); + ULONG allocated = memory_init(tdbb, bcb, number - bcb->bcb_count); - const bcb_repeat* const old_end = bcb->bcb_rpt + bcb->bcb_count; - - bcb_repeat* const new_rpt = FB_NEW_POOL(*bcb->bcb_bufferpool) bcb_repeat[number]; - bcb_repeat* const old_rpt = bcb->bcb_rpt; - - bcb->bcb_rpt = new_rpt; - bcb->bcb_count = number; - bcb->bcb_free_minimum = (SSHORT) MIN(number / 4, 128); /* 25% clean page reserve */ - - const bcb_repeat* const new_end = bcb->bcb_rpt + number; - - // Initialize tail of new buffer control block - bcb_repeat* new_tail; - for (new_tail = bcb->bcb_rpt; new_tail < new_end; new_tail++) - { - QUE_INIT(new_tail->bcb_page_mod); - new_tail->bcb_bdb = nullptr; - } - - // Move any active buffers from old block to new - - new_tail = bcb->bcb_rpt; - - for (bcb_repeat* old_tail = old_rpt; old_tail < old_end; old_tail++, new_tail++) - { - new_tail->bcb_bdb = old_tail->bcb_bdb; - while (QUE_NOT_EMPTY(old_tail->bcb_page_mod)) - { - QUE que_inst = old_tail->bcb_page_mod.que_forward; - BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); - QUE_DELETE(*que_inst); - QUE mod_que = &bcb->bcb_rpt[bdb->bdb_page.getPageNum() % bcb->bcb_count].bcb_page_mod; - QUE_INSERT(*mod_que, *que_inst); - } - } - - // Allocate new buffer descriptor blocks - - ULONG num_in_seg = 0; - UCHAR* memory = NULL; - for (; new_tail < new_end; new_tail++) - { - // if current segment is exhausted, allocate another - - if (!num_in_seg) - { - const size_t alloc_size = ((size_t) dbb->dbb_page_size) * (num_per_seg + 1); - memory = (UCHAR*) bcb->bcb_bufferpool->allocate(alloc_size ALLOC_ARGS); - bcb->bcb_memory.push(memory); - memory = FB_ALIGN(memory, dbb->dbb_page_size); - - num_in_seg = num_per_seg; - left_to_do -= num_per_seg; - if (num_per_seg > left_to_do) - num_per_seg = left_to_do; - } - new_tail->bcb_bdb = alloc_bdb(tdbb, bcb, &memory); - num_in_seg--; - } - - // Set up new buffer control, release old buffer control, and clean up - - delete[] old_rpt; + bcb->bcb_count += allocated; + bcb->bcb_free_minimum = (SSHORT)MIN(bcb->bcb_count / 4, 128); /* 25% clean page reserve */ return true; } -static BufferDesc* find_buffer(BufferControl* bcb, const PageNumber page, bool findPending) + +static BufferDesc* get_dirty_buffer(thread_db* tdbb) { - QUE mod_que = &bcb->bcb_rpt[page.getPageNum() % bcb->bcb_count].bcb_page_mod; - QUE que_inst = mod_que->que_forward; - for (; que_inst != mod_que; que_inst = que_inst->que_forward) + // This code is only used by the background I/O threads: + // cache writer, cache reader and garbage collector. + + SET_TDBB(tdbb); + Database* dbb = tdbb->getDatabase(); + BufferControl* bcb = dbb->dbb_bcb; + int walk = bcb->bcb_free_minimum; + int chained = walk; + + Sync lruSync(&bcb->bcb_syncLRU, FB_FUNCTION); + lruSync.lock(SYNC_SHARED); + + for (QUE que_inst = bcb->bcb_in_use.que_backward; + que_inst != &bcb->bcb_in_use; que_inst = que_inst->que_backward) { - BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); - if (bdb->bdb_page == page) + BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_in_use); + + if (bdb->bdb_flags & BDB_lru_chained) + { + if (!--chained) + break; + continue; + } + + if (bdb->bdb_use_count || (bdb->bdb_flags & BDB_free_pending)) + continue; + + if (bdb->bdb_flags & BDB_db_dirty) + { + //tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); shouldn't it be here? return bdb; + } + + if (!--walk) + break; } - if (findPending) + if (!chained) { - que_inst = bcb->bcb_pending.que_forward; - for (; que_inst != &bcb->bcb_pending; que_inst = que_inst->que_forward) - { - BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); - if (bdb->bdb_page == page || bdb->bdb_pending_page == page) - return bdb; - } + lruSync.unlock(); + lruSync.lock(SYNC_EXCLUSIVE); + requeueRecentlyUsed(bcb); } + else + bcb->bcb_flags &= ~BCB_free_pending; return NULL; } -static LatchState latch_buffer(thread_db* tdbb, Sync &bcbSync, BufferDesc *bdb, - const PageNumber page, SyncType syncType, int wait) +static BufferDesc* get_oldest_buffer(thread_db* tdbb, BufferControl* bcb) { - //++bdb->bdb_use_count; +/************************************** + * Function description: + * Get candidate for preemption + * Found page buffer must have SYNC_EXCLUSIVE lock. + **************************************/ - if (!(bdb->bdb_flags & BDB_free_pending) -#ifdef SUPERSERVER_V2 - && (page != HEADER_PAGE_NUMBER) -#endif - ) + int walk = bcb->bcb_free_minimum; + BufferDesc* bdb = nullptr; + + Sync lruSync(&bcb->bcb_syncLRU, FB_FUNCTION); + if (bcb->bcb_lru_chain.load() != NULL) { - recentlyUsed(bdb); - } - - // If buffer is currently replacing by another page but still writting - // to disk we should wait until this write finished, else we could - // allocate another buffer and read old page image (or even zero's) - // from disk into new buffer - - const bool waitPending = ((bdb->bdb_flags & BDB_free_pending) && bdb->bdb_page == page); - - bcbSync.unlock(); - - if (waitPending) - { - //--bdb->bdb_use_count; - if (wait == 0) - return lsTimeout; // go out - - Thread::yield(); + lruSync.lock(SYNC_EXCLUSIVE); + requeueRecentlyUsed(bcb); + lruSync.downgrade(SYNC_SHARED); } else + lruSync.lock(SYNC_SHARED); + + for (QUE que_inst = bcb->bcb_in_use.que_backward; + que_inst != &bcb->bcb_in_use; + que_inst = que_inst->que_backward) { - const bool latchOk = bdb->addRef(tdbb, syncType, wait); + bdb = nullptr; - //--bdb->bdb_use_count; + // get the oldest buffer as the least recently used -- note + // that since there are no empty buffers this queue cannot be empty - if (!latchOk) - return lsTimeout; // go out + if (bcb->bcb_in_use.que_forward == &bcb->bcb_in_use) + BUGCHECK(213); // msg 213 insufficient cache size - if (bdb->bdb_page == page) + BufferDesc* oldest = BLOCK(que_inst, BufferDesc, bdb_in_use); + + if (oldest->bdb_flags & BDB_lru_chained) + continue; + + if (oldest->bdb_use_count || !oldest->addRefConditional(tdbb, SYNC_EXCLUSIVE)) + continue; + + /*if (!writeable(oldest)) { - //bdb->bdb_flags &= ~(BDB_faked | BDB_prefetch); - return lsOk; + oldest->release(tdbb, true); + continue; + }*/ + + bdb = oldest; + if (!(bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) || !walk) + break; + + if (!(bcb->bcb_flags & BCB_cache_writer)) + break; + + bcb->bcb_flags |= BCB_free_pending; + if (!(bcb->bcb_flags & BCB_writer_active)) + bcb->bcb_writer_sem.release(); + + if (walk) + { + bdb->release(tdbb, true); +// if (!--walk) +// return nullptr; + bdb = nullptr; + --walk; + } + } + + lruSync.unlock(); + + if (!bdb) + return nullptr; + + // If the buffer selected is dirty, arrange to have it written. + + if (bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) + { + const bool write_thru = (bcb->bcb_flags & BCB_exclusive); + if (!write_buffer(tdbb, bdb, bdb->bdb_page, write_thru, tdbb->tdbb_status_vector, true)) + { + bdb->release(tdbb, true); + CCH_unwind(tdbb, true); + } + } + + // If the buffer is still in the dirty tree, remove it. + // In any case, release any lock it may have. + + removeDirty(bcb, bdb); + + // Cleanup any residual precedence blocks. Unless something is + // screwed up, the only precedence blocks that can still be hanging + // around are ones cleared at AST level. + + if (QUE_NOT_EMPTY(bdb->bdb_higher) || QUE_NOT_EMPTY(bdb->bdb_lower)) + { + Sync precSync(&bcb->bcb_syncPrecedence, "get_buffer"); + precSync.lock(SYNC_EXCLUSIVE); + + while (QUE_NOT_EMPTY(bdb->bdb_higher)) + { + QUE que2 = bdb->bdb_higher.que_forward; + Precedence* precedence = BLOCK(que2, Precedence, pre_higher); + QUE_DELETE(precedence->pre_higher); + QUE_DELETE(precedence->pre_lower); + precedence->pre_hi = (BufferDesc*)bcb->bcb_free; + bcb->bcb_free = precedence; } - bdb->release(tdbb, true); + clear_precedence(tdbb, bdb); } - return lsPageChanged; // try again + + return bdb; } @@ -3708,9 +3770,6 @@ static BufferDesc* get_buffer(thread_db* tdbb, const PageNumber page, SyncType s * Get a buffer. If possible, get a buffer already assigned * to the page. Otherwise get one from the free list or pick * the least recently used buffer to be reused. - * Note the following special page numbers: - * -1 indicates that a buffer is required for journaling => obsolete - * -2 indicates a special scratch buffer for shadowing * * input * page: page to get @@ -3719,327 +3778,167 @@ static BufferDesc* get_buffer(thread_db* tdbb, const PageNumber page, SyncType s * This can cause deadlocks of course. * 0 => If the lock can't be acquired immediately, * give up and return 0; - * => Latch timeout interval in seconds. + * => Latch timeout interval in seconds. * * return * BufferDesc pointer if successful. * NULL pointer if timeout occurred (only possible is wait <> 1). - * if cache manager doesn't have any pages to write anymore. + * if cache manager doesn't have any pages to write anymore. * **************************************/ SET_TDBB(tdbb); Database* dbb = tdbb->getDatabase(); BufferControl* bcb = dbb->dbb_bcb; - Sync bcbSync(&bcb->bcb_syncObject, "get_buffer"); - if (page != FREE_PAGE) - { - bcbSync.lock(SYNC_SHARED); - BufferDesc* bdb = find_buffer(bcb, page, true); - while (bdb) - { - const LatchState ret = latch_buffer(tdbb, bcbSync, bdb, page, syncType, wait); - if (ret == lsOk) - { - tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); - return bdb; - } - - if (ret == lsTimeout) - return NULL; - - bcbSync.lock(SYNC_SHARED); - bdb = find_buffer(bcb, page, true); - } - bcbSync.unlock(); - } - - bcbSync.lock(SYNC_EXCLUSIVE); - - QUE que_inst; - int walk = bcb->bcb_free_minimum; while (true) { - if (page != FREE_PAGE) + BufferDesc* bdb = nullptr; + bool is_empty = false; + while (!bdb) { - // Check to see if buffer has already been assigned to page - BufferDesc* bdb = find_buffer(bcb, page, true); - while (bdb) + // try to get already existing buffer { - const LatchState ret = latch_buffer(tdbb, bcbSync, bdb, page, syncType, wait); - if (ret == lsOk) +#ifndef HASH_USE_CDS_LIST + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_SHARED, FB_FUNCTION); +#endif + bdb = bcb->bcb_hashTable->find(page); + } + + if (bdb) + { + // latch page buffer if it's been found + if (!bdb->addRef(tdbb, syncType, wait)) { + fb_assert(wait <= 0); + return nullptr; + } + + // ensure the found page buffer is still for the same page after latch + if (bdb->bdb_page == page) + { + recentlyUsed(bdb); tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); return bdb; } - if (ret == lsTimeout) - return NULL; - - bcbSync.lock(SYNC_EXCLUSIVE); - bdb = find_buffer(bcb, page, true); + // leave the found page buffer and try another one + bdb->release(tdbb, true); + bdb = nullptr; + continue; } - } - else // page == FREE_PAGE - { - // This code is only used by the background I/O threads: - // cache writer, cache reader and garbage collector. - //Database::Checkout dcoHolder(dbb); - - Sync lruSync(&bcb->bcb_syncLRU, "get_buffer"); - lruSync.lock(SYNC_EXCLUSIVE); - - for (que_inst = bcb->bcb_in_use.que_backward; - que_inst != &bcb->bcb_in_use; que_inst = que_inst->que_backward) + // try empty list + if (QUE_NOT_EMPTY(bcb->bcb_empty)) { - BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_in_use); - - if (bdb->bdb_use_count || (bdb->bdb_flags & BDB_free_pending)) - continue; - - if (bdb->bdb_flags & BDB_db_dirty) + SyncLockGuard bcbSync(&bcb->bcb_syncEmpty, SYNC_EXCLUSIVE, FB_FUNCTION); + if (QUE_NOT_EMPTY(bcb->bcb_empty)) { - //tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); shouldn't it be here? + QUE que_inst = bcb->bcb_empty.que_forward; + QUE_DELETE(*que_inst); + QUE_INIT(*que_inst); + bdb = BLOCK(que_inst, BufferDesc, bdb_que); + + bcb->bcb_inuse++; + is_empty = true; + } + } + + if (bdb) + bdb->addRef(tdbb, SYNC_EXCLUSIVE); + else + { + bdb = get_oldest_buffer(tdbb, bcb); + if (!bdb) + { + Thread::yield(); + } + else if (bdb->bdb_page == page) + { + bdb->downgrade(syncType); + recentlyUsed(bdb); + tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); return bdb; } + } + } - if (!--walk) + fb_assert(bdb->ourExclusiveLock()); + + // we have either empty buffer or candidate for preemption + // try to put it into target hash chain + while (true) + { + BufferDesc* bdb2 = nullptr; + + { +#ifndef HASH_USE_CDS_LIST + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_EXCLUSIVE, FB_FUNCTION); +#endif + bdb2 = bcb->bcb_hashTable->emplace(bdb, page, !is_empty); + if (!bdb2) { - bcb->bcb_flags &= ~BCB_free_pending; - break; + bdb->bdb_page = page; + bdb->bdb_flags &= BDB_lru_chained; // yes, clear all except BDB_lru_chained + bdb->bdb_flags |= BDB_read_pending; + bdb->bdb_scan_count = 0; + if (bdb->bdb_lock) + bdb->bdb_lock->lck_logical = LCK_none; + +#ifndef HASH_USE_CDS_LIST + bcbSync.unlock(); +#endif + + if (!(bdb->bdb_flags & BDB_lru_chained)) + { + Sync syncLRU(&bcb->bcb_syncLRU, FB_FUNCTION); + if (syncLRU.lockConditional(SYNC_EXCLUSIVE)) + { + QUE_DELETE(bdb->bdb_in_use); + QUE_INSERT(bcb->bcb_in_use, bdb->bdb_in_use); + } + else + recentlyUsed(bdb); + } + tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); + return bdb; } } - // hvlad: removed in Vulcan - bcb->bcb_flags &= ~BCB_free_pending; - return NULL; - } + // here we hold lock on bdb and ask for lock on bdb2 + // to avoid deadlock, don't wait for bdb2 unless bdb was empty - // If there is an empty buffer sitting around, allocate it - - if (QUE_NOT_EMPTY(bcb->bcb_empty)) - { - que_inst = bcb->bcb_empty.que_forward; - QUE_DELETE(*que_inst); - BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); - - bcb->bcb_inuse++; - bdb->addRef(tdbb, SYNC_EXCLUSIVE); - - if (page != FREE_PAGE) + const int wait2 = is_empty ? wait : 0; + if (bdb2->addRef(tdbb, syncType, wait2)) { - QUE mod_que = &bcb->bcb_rpt[page.getPageNum() % bcb->bcb_count].bcb_page_mod; - QUE_INSERT(*mod_que, *que_inst); -#ifdef SUPERSERVER_V2 - // Reserve a buffer for header page with deferred header - // page write mechanism. Otherwise, a deadlock will occur - // if all dirty pages in the cache must force header page - // to disk before they can be written but there is no free - // buffer to read the header page into. - - if (page != HEADER_PAGE_NUMBER) -#endif + if (bdb2->bdb_page != page) { - Sync lruSync(&bcb->bcb_syncLRU, "get_buffer"); - lruSync.lock(SYNC_EXCLUSIVE); - - QUE_INSERT(bcb->bcb_in_use, bdb->bdb_in_use); - } - } - - // This correction for bdb_use_count below is needed to - // avoid a deadlock situation in latching code. It's not - // clear though how the bdb_use_count can get < 0 for a bdb - // in bcb_empty queue - - if (bdb->bdb_use_count < 0) - BUGCHECK(301); // msg 301 Non-zero use_count of a buffer in the empty que_inst - - bdb->bdb_page = page; - bdb->bdb_flags = BDB_read_pending; // we have buffer exclusively, this is safe - bdb->bdb_scan_count = 0; - - if (page != FREE_PAGE) - { - CCH_TRACE(("bdb->bdb_lock->lck_logical = LCK_none; page=%i", bdb->bdb_page.getPageNum())); - bdb->bdb_lock->lck_logical = LCK_none; - } - else - PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); - - tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); - return bdb; - } - - Sync lruSync(&bcb->bcb_syncLRU, "get_buffer"); - lruSync.lock(SYNC_EXCLUSIVE); - - if (bcb->bcb_lru_chain.load() != NULL) - requeueRecentlyUsed(bcb); - - for (que_inst = bcb->bcb_in_use.que_backward; - que_inst != &bcb->bcb_in_use; - que_inst = que_inst->que_backward) - { - // get the oldest buffer as the least recently used -- note - // that since there are no empty buffers this queue cannot be empty - - if (bcb->bcb_in_use.que_forward == &bcb->bcb_in_use) - BUGCHECK(213); // msg 213 insufficient cache size - - BufferDesc* oldest = BLOCK(que_inst, BufferDesc, bdb_in_use); - - if (oldest->bdb_flags & BDB_lru_chained) - continue; - - if (oldest->bdb_use_count || !oldest->addRefConditional(tdbb, SYNC_EXCLUSIVE)) - continue; - - if ((oldest->bdb_flags & BDB_free_pending) || !writeable(oldest)) - { - oldest->release(tdbb, true); - continue; - } - -#ifdef SUPERSERVER_V2 - // If page has been prefetched but not yet fetched, let - // it cycle once more thru LRU queue before re-using it. - - if (oldest->bdb_flags & BDB_prefetch) - { - oldest->bdb_flags &= ~BDB_prefetch; - que_inst = que_inst->que_forward; - QUE_MOST_RECENTLY_USED(oldest->bdb_in_use); - //LATCH_MUTEX_RELEASE; - continue; - } -#endif - - if ((bcb->bcb_flags & BCB_cache_writer) && - (oldest->bdb_flags & (BDB_dirty | BDB_db_dirty)) ) - { - bcb->bcb_flags |= BCB_free_pending; - - if (!(bcb->bcb_flags & BCB_writer_active)) - bcb->bcb_writer_sem.release(); - - if (walk) - { - oldest->release(tdbb, true); - if (!--walk) - break; - + bdb2->release(tdbb, true); continue; } + recentlyUsed(bdb2); + tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); } - - BufferDesc* bdb = oldest; - - // hvlad: we already have bcb_lruSync here - //recentlyUsed(bdb); - fb_assert(!(bdb->bdb_flags & BDB_lru_chained)); - QUE_DELETE(bdb->bdb_in_use); - QUE_INSERT(bcb->bcb_in_use, bdb->bdb_in_use); - - lruSync.unlock(); - - bdb->bdb_flags |= BDB_free_pending; - bdb->bdb_pending_page = page; - - QUE_DELETE(bdb->bdb_que); - QUE_INSERT(bcb->bcb_pending, bdb->bdb_que); - - const bool needCleanup = (bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) || - QUE_NOT_EMPTY(bdb->bdb_higher) || QUE_NOT_EMPTY(bdb->bdb_lower); - - if (needCleanup) - { - bcbSync.unlock(); - - // If the buffer selected is dirty, arrange to have it written. - - if (bdb->bdb_flags & (BDB_dirty | BDB_db_dirty)) - { - const bool write_thru = (bcb->bcb_flags & BCB_exclusive); - if (!write_buffer(tdbb, bdb, bdb->bdb_page, write_thru, tdbb->tdbb_status_vector, true)) - { - lruSync.lock(SYNC_EXCLUSIVE); - bdb->bdb_flags &= ~BDB_free_pending; - QUE_DELETE(bdb->bdb_in_use); - QUE_APPEND(bcb->bcb_in_use, bdb->bdb_in_use); - lruSync.unlock(); - - bdb->release(tdbb, true); - CCH_unwind(tdbb, true); - } - } - - // If the buffer is still in the dirty tree, remove it. - // In any case, release any lock it may have. - - removeDirty(bcb, bdb); - - // Cleanup any residual precedence blocks. Unless something is - // screwed up, the only precedence blocks that can still be hanging - // around are ones cleared at AST level. - - if (QUE_NOT_EMPTY(bdb->bdb_higher) || QUE_NOT_EMPTY(bdb->bdb_lower)) - { - Sync precSync(&bcb->bcb_syncPrecedence, "get_buffer"); - precSync.lock(SYNC_EXCLUSIVE); - - while (QUE_NOT_EMPTY(bdb->bdb_higher)) - { - QUE que2 = bdb->bdb_higher.que_forward; - Precedence* precedence = BLOCK(que2, Precedence, pre_higher); - QUE_DELETE(precedence->pre_higher); - QUE_DELETE(precedence->pre_lower); - precedence->pre_hi = (BufferDesc*) bcb->bcb_free; - bcb->bcb_free = precedence; - } - - clear_precedence(tdbb, bdb); - } - - bcbSync.lock(SYNC_EXCLUSIVE); - } - - QUE_DELETE(bdb->bdb_que); // bcb_pending - - QUE mod_que = &bcb->bcb_rpt[page.getPageNum() % bcb->bcb_count].bcb_page_mod; - QUE_INSERT((*mod_que), bdb->bdb_que); - bdb->bdb_flags &= ~BDB_free_pending; - - // This correction for bdb_use_count below is needed to - // avoid a deadlock situation in latching code. It's not - // clear though how the bdb_use_count can get < 0 for a bdb - // in bcb_empty queue - - if (bdb->bdb_use_count < 0) - BUGCHECK(301); /* msg 301 Non-zero use_count of a buffer in the empty Que */ - - bdb->bdb_page = page; - bdb->bdb_flags &= BDB_lru_chained; // yes, clear all except BDB_lru_chained - bdb->bdb_flags |= BDB_read_pending; - bdb->bdb_scan_count = 0; - - bcbSync.unlock(); - - if (page != FREE_PAGE) - bdb->bdb_lock->lck_logical = LCK_none; else - PAGE_LOCK_RELEASE(tdbb, bcb, bdb->bdb_lock); + bdb2 = nullptr; - tdbb->bumpStats(RuntimeStatistics::PAGE_FETCHES); - return bdb; + bdb->release(tdbb, true); + if (is_empty) + { + SyncLockGuard syncEmpty(&bcb->bcb_syncEmpty, SYNC_EXCLUSIVE, FB_FUNCTION); + QUE_INSERT(bcb->bcb_empty, bdb->bdb_que); + bcb->bcb_inuse--; + } + + if (!bdb2 && wait > 0) + break; + + return bdb2; } - - if (que_inst == &bcb->bcb_in_use) - expand_buffers(tdbb, bcb->bcb_count + 75); } -} + // never get here + fb_assert(false); +} static ULONG get_prec_walk_mark(BufferControl* bcb) { @@ -4057,8 +3956,13 @@ static ULONG get_prec_walk_mark(BufferControl* bcb) if (++bcb->bcb_prec_walk_mark == 0) { - for (ULONG i = 0; i < bcb->bcb_count; i++) - bcb->bcb_rpt[i].bcb_bdb->bdb_prec_walk_mark = 0; + SyncLockGuard bcbSync(&bcb->bcb_syncObject, SYNC_SHARED, FB_FUNCTION); + + for (auto blk : bcb->bcb_bdbBlocks) + { + for (ULONG i = 0; i < blk.m_count; i++) + blk.m_bdbs[i].bdb_prec_walk_mark = 0; + } bcb->bcb_prec_walk_mark = 1; } @@ -4268,7 +4172,7 @@ static LockState lock_buffer(thread_db* tdbb, BufferDesc* bdb, const SSHORT wait } -static ULONG memory_init(thread_db* tdbb, BufferControl* bcb, SLONG number) +static ULONG memory_init(thread_db* tdbb, BufferControl* bcb, ULONG number) { /************************************** * @@ -4284,30 +4188,42 @@ static ULONG memory_init(thread_db* tdbb, BufferControl* bcb, SLONG number) SET_TDBB(tdbb); Database* const dbb = tdbb->getDatabase(); - UCHAR* memory = NULL; - SLONG buffers = 0; + ULONG buffers = 0; const size_t page_size = dbb->dbb_page_size; - size_t memory_size = page_size * (number + 1); - fb_assert(memory_size > 0); + UCHAR* memory = nullptr; + UCHAR* lock_memory = nullptr; + const UCHAR* memory_end = nullptr; + BufferDesc* tail = nullptr; - SLONG old_buffers = 0; - bcb_repeat* old_tail = NULL; - const UCHAR* memory_end = NULL; - bcb_repeat* tail = bcb->bcb_rpt; - // "end" is changed inside the loop - for (const bcb_repeat* end = tail + number; tail < end; tail++) + const size_t lock_key_extra = PageNumber::getLockLen() > Lock::KEY_STATIC_SIZE ? + PageNumber::getLockLen() - Lock::KEY_STATIC_SIZE : 0; + + const size_t lock_size = (bcb->bcb_flags & BCB_exclusive) ? 0 : + FB_ALIGN(sizeof(Lock) + lock_key_extra, alignof(Lock)); + + while (number) { if (!memory) { - // Allocate only what is required for remaining buffers. + // Allocate memory block big enough to accomodate BufferDesc's, Lock's and page buffers. - if (memory_size > (page_size * (number + 1))) - memory_size = page_size * (number + 1); + ULONG to_alloc = number; while (true) { - try { + const size_t memory_size = (sizeof(BufferDesc) + lock_size + page_size) * (to_alloc + 1); + + fb_assert(memory_size > 0); + if (memory_size < MIN_BUFFER_SEGMENT) + { + // Diminishing returns + return buffers; + } + + try + { memory = (UCHAR*) bcb->bcb_bufferpool->allocate(memory_size ALLOC_ARGS); + memory_end = memory + memory_size; break; } catch (Firebird::BadAlloc&) @@ -4316,52 +4232,47 @@ static ULONG memory_init(thread_db* tdbb, BufferControl* bcb, SLONG number) // but it's not virtually contiguous. Let's find out by // cutting the size in half to see if the buffers can be // scattered over the remaining virtual address space. - memory_size >>= 1; - if (memory_size < MIN_BUFFER_SEGMENT) - { - // Diminishing returns - return buffers; - } + to_alloc >>= 1; } } - bcb->bcb_memory.push(memory); - memory_end = memory + memory_size; + + tail = (BufferDesc*) FB_ALIGN(memory, alignof(BufferDesc)); + + BufferControl::BDBBlock blk; + blk.m_bdbs = tail; + blk.m_count = to_alloc; + bcb->bcb_bdbBlocks.push(blk); // Allocate buffers on an address that is an even multiple // of the page size (rather the physical sector size.) This // is a necessary condition to support raw I/O interfaces. + memory = (UCHAR*) (blk.m_bdbs + to_alloc); + if (!(bcb->bcb_flags & BCB_exclusive)) + { + lock_memory = FB_ALIGN(memory, lock_size); + memory = (UCHAR*) (lock_memory + lock_size * to_alloc); + } memory = FB_ALIGN(memory, page_size); - old_tail = tail; - old_buffers = buffers; + + fb_assert(memory_end >= memory + page_size * to_alloc); } - QUE_INIT(tail->bcb_page_mod); + tail = ::new(tail) BufferDesc(bcb); - try + if (!(bcb->bcb_flags & BCB_exclusive)) { - tail->bcb_bdb = alloc_bdb(tdbb, bcb, &memory); + tail->bdb_lock = ::new(lock_memory) + Lock(tdbb, PageNumber::getLockLen(), LCK_bdb, tail, blocking_ast_bdb); + + lock_memory += lock_size; } - catch (Firebird::BadAlloc&) - { - // Whoops! Time to reset our expectations. Release the buffer memory - // but use that memory size to calculate a new number that takes into account - // the page buffer overhead. Reduce this number by a 25% fudge factor to - // leave some memory for useful work. - bcb->bcb_bufferpool->deallocate(bcb->bcb_memory.pop()); - memory = NULL; + tail->bdb_buffer = (pag*) memory; + memory += bcb->bcb_page_size; - for (bcb_repeat* tail2 = old_tail; tail2 < tail; tail2++) - tail2->bcb_bdb = dealloc_bdb(tail2->bcb_bdb); - - number = static_cast(memory_size / PAGE_OVERHEAD); - number -= number >> 2; - end = old_tail + number; - tail = --old_tail; // For loop continue pops tail above - buffers = old_buffers; - continue; - } + QUE_INSERT(bcb->bcb_empty, tail->bdb_que); + tail++; buffers++; // Allocated buffers number--; // Remaining buffers @@ -4369,7 +4280,26 @@ static ULONG memory_init(thread_db* tdbb, BufferControl* bcb, SLONG number) // Check if memory segment has been exhausted. if (memory + page_size > memory_end) - memory = 0; + { + const auto blk = bcb->bcb_bdbBlocks.end() - 1; + const BufferDesc* bdb = blk->m_bdbs; + + if (!(bcb->bcb_flags & BCB_exclusive)) + { + // first lock block is after last BufferDesc + fb_assert((char*)bdb->bdb_lock >= (char*)tail); + + // first page buffer is after last lock block + fb_assert((char*)bdb->bdb_buffer >= (char*)tail[-1].bdb_lock + lock_size); + } + else + { + // first page buffer is after last BufferDesc + fb_assert((char*)bdb->bdb_buffer >= (char*)tail); + } + + memory = nullptr; + } } return buffers; @@ -5242,12 +5172,17 @@ bool BufferDesc::addRefConditional(thread_db* tdbb, SyncType syncType) void BufferDesc::downgrade(SyncType syncType) { + // SH -> SH is no-op if (syncType == SYNC_SHARED && !bdb_writers) return; if (bdb_writers != 1) BUGCHECK(296); // inconsistent latch downgrade call + // EX -> EX is no-op + if (syncType == SYNC_EXCLUSIVE) + return; + --bdb_writers; bdb_exclusive = NULL; @@ -5312,3 +5247,269 @@ void BufferDesc::unLockIO(thread_db* tdbb) bdb_syncIO.unlock(NULL, SYNC_EXCLUSIVE); } + + +namespace Jrd { + +/// class BCBHashTable + +void BCBHashTable::resize(ULONG count) +{ + const ULONG old_count = m_count; + chain_type* const old_chains = m_chains; + + chain_type* new_chains = FB_NEW_POOL(m_pool) chain_type[count]; + m_count = count; + m_chains = new_chains; + +#ifndef HASH_USE_CDS_LIST + // Initialize all new new_chains + for (chain_type* que = new_chains; que < new_chains + count; que++) + QUE_INIT(*que); +#endif + + if (!old_chains) + return; + + const chain_type* const old_end = old_chains + old_count; + + // Move any active buffers from old hash table to new + for (chain_type* old_tail = old_chains; old_tail < old_end; old_tail++) + { +#ifndef HASH_USE_CDS_LIST + while (QUE_NOT_EMPTY(*old_tail)) + { + QUE que_inst = old_tail->que_forward; + BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); + QUE_DELETE(*que_inst); + QUE mod_que = &new_chains[hash(bdb->bdb_page)]; + QUE_INSERT(*mod_que, *que_inst); + } +#else + while (!old_tail->empty()) + { + auto n = old_tail->begin(); + old_tail->erase(n->first); // bdb_page + + chain_type* new_chain = &m_chains[hash(n->first)]; + new_chain->insert(n->first, n->second); // bdb_page, bdb + } +#endif + } + + delete[] old_chains; +} + +void BCBHashTable::clear() +{ + if (!m_chains) + return; + +#ifdef HASH_USE_CDS_LIST + const chain_type* const end = m_chains + m_count; + for (chain_type* tail = m_chains; tail < end; tail++) + tail->clear(); +#endif + + delete[] m_chains; + m_chains = nullptr; + m_count = 0; +} + +inline BufferDesc* BCBHashTable::find(const PageNumber& page) const +{ + auto& list = m_chains[hash(page)]; + +#ifndef HASH_USE_CDS_LIST + QUE que_inst = list.que_forward; + for (; que_inst != &list; que_inst = que_inst->que_forward) + { + BufferDesc* bdb = BLOCK(que_inst, BufferDesc, bdb_que); + if (bdb->bdb_page == page) + return bdb; + } + +#else // HASH_USE_CDS_LIST + auto ptr = list.get(page); + if (!ptr.empty()) + { + fb_assert(ptr->second != nullptr); +#ifdef DEV_BUILD + // Original libcds have no update(key, value), use this code with it, + // see also comment in get_buffer() + while (ptr->second == nullptr) + cds::backoff::pause(); +#endif + if (ptr->second->bdb_page == page) + return ptr->second; + } +#endif + + return nullptr; +} + +inline BufferDesc* BCBHashTable::emplace(BufferDesc* bdb, const PageNumber& page, bool remove) +{ +#ifndef HASH_USE_CDS_LIST + // bcb_syncObject should be locked in EX mode + + BufferDesc* bdb2 = find(page); + if (!bdb2) + { + if (remove) + QUE_DELETE(bdb->bdb_que); + + que& mod_que = m_chains[hash(page)]; + QUE_INSERT(mod_que, bdb->bdb_que); + } + return bdb2; +#else // HASH_USE_CDS_LIST + + BufferDesc* bdb2 = nullptr; + BdbList& list = m_chains[hash(page)]; + +/* + // Original libcds have no update(key, value), use this code with it + + auto ret = list.update(page, [bdb, &bdb2](bool bNew, BdbList::value_type& val) + { + if (bNew) + val.second = bdb; + else + while (!(bdb2 = val.second)) + cds::backoff::pause(); + }, + true); +*/ + + auto ret = list.update(page, bdb, [&bdb2](bool bNew, BdbList::value_type& val) + { + // someone might have put a page buffer in the chain concurrently, so + // we store it for the further investigation + if (!bNew) + bdb2 = val.second; + }, + true); + fb_assert(ret.first); + + // if we have inserted the page buffer that we found (empty or oldest) + if (bdb2 == nullptr) + { + fb_assert(ret.second); +#ifdef DEV_BUILD + auto p1 = list.get(page); + fb_assert(!p1.empty() && p1->first == page && p1->second == bdb); +#endif + + if (remove) + { + // remove the page buffer from old hash slot + const PageNumber oldPage = bdb->bdb_page; + BdbList& oldList = m_chains[hash(oldPage)]; + +#ifdef DEV_BUILD + p1 = oldList.get(oldPage); + fb_assert(!p1.empty() && p1->first == oldPage && p1->second == bdb); +#endif + + const bool ok = oldList.erase(oldPage); + fb_assert(ok); + +#ifdef DEV_BUILD + p1 = oldList.get(oldPage); + fb_assert(p1.empty() || p1->second != bdb); +#endif + } + +#ifdef DEV_BUILD + p1 = list.get(page); + fb_assert(!p1.empty() && p1->first == page && p1->second == bdb); +#endif + } + return bdb2; +#endif +} + + +void BCBHashTable::remove(BufferDesc* bdb) +{ +#ifndef HASH_USE_CDS_LIST + QUE_DELETE(bdb->bdb_que); +#else + BdbList& list = m_chains[hash(bdb->bdb_page)]; + +#ifdef DEV_BUILD + auto p = list.get(bdb->bdb_page); + fb_assert(!p.empty() && p->first == bdb->bdb_page && p->second == bdb); +#endif + + list.erase(bdb->bdb_page); +#endif +} + + +}; // namespace Jrd + + +#ifdef HASH_USE_CDS_LIST + +/// class ListNodeAllocator + +class InitPool +{ +public: + explicit InitPool(MemoryPool&) + { + m_pool = InitCDS::createPool(); + m_pool->setStatsGroup(m_stats); + } + + ~InitPool() + { + // m_pool will be deleted by InitCDS dtor after cds termination + // some memory could still be not freed until that moment + +#ifdef DEBUG_CDS_MEMORY + char str[256]; + sprintf(str, "CCH list's common pool stats:\n" + " usage = %llu\n" + " mapping = %llu\n" + " max usage = %llu\n" + " max mapping = %llu\n" + "\n", + m_stats.getCurrentUsage(), + m_stats.getCurrentMapping(), + m_stats.getMaximumUsage(), + m_stats.getMaximumMapping() + ); + gds__log(str); +#endif + } + + void* alloc(size_t size) + { + return m_pool->allocate(size ALLOC_ARGS); + } + +private: + MemoryPool* m_pool; + MemoryStats m_stats; +}; + +static InitInstance initPool; + + +template +T* ListNodeAllocator::allocate(std::size_t n) +{ + return static_cast(initPool().alloc(n * sizeof(T))); +} + +template +void ListNodeAllocator::deallocate(T* p, std::size_t /* n */) +{ + // It uses the correct pool stored within memory block itself + MemoryPool::globalFree(p); +} + +#endif // HASH_USE_CDS_LIST diff --git a/src/jrd/cch.h b/src/jrd/cch.h index f1a48d338d..efd0f2d523 100644 --- a/src/jrd/cch.h +++ b/src/jrd/cch.h @@ -62,6 +62,7 @@ class thread_db; struct que; class BufferDesc; class Database; +class BCBHashTable; // Page buffer cache size constraints. @@ -72,22 +73,16 @@ const ULONG MAX_PAGE_BUFFERS = 131072; const ULONG MAX_PAGE_BUFFERS = MAX_SLONG - 1; #endif - // BufferControl -- Buffer control block -- one per system -struct bcb_repeat -{ - BufferDesc* bcb_bdb; // Buffer descriptor block - que bcb_page_mod; // Que of buffers with page mod n -}; - class BufferControl : public pool_alloc { BufferControl(MemoryPool& p, Firebird::MemoryStats& parentStats) : bcb_bufferpool(&p), bcb_memory_stats(&parentStats), bcb_memory(p), - bcb_writer_fini(p, cache_writer, THREAD_medium) + bcb_writer_fini(p, cache_writer, THREAD_medium), + bcb_bdbBlocks(p) { bcb_database = NULL; QUE_INIT(bcb_in_use); @@ -103,6 +98,7 @@ class BufferControl : public pool_alloc bcb_prec_walk_mark = 0; bcb_page_size = 0; bcb_page_incarnation = 0; + bcb_hashTable = nullptr; #ifdef SUPERSERVER_V2 bcb_prefetch = NULL; #endif @@ -142,9 +138,9 @@ public: Firebird::SyncObject bcb_syncObject; Firebird::SyncObject bcb_syncDirtyBdbs; + Firebird::SyncObject bcb_syncEmpty; Firebird::SyncObject bcb_syncPrecedence; Firebird::SyncObject bcb_syncLRU; - //Firebird::SyncObject bcb_syncPageWrite; typedef ThreadFinishSync BcbThreadSync; @@ -164,7 +160,15 @@ public: void exceptionHandler(const Firebird::Exception& ex, BcbThreadSync::ThreadRoutine* routine); - bcb_repeat* bcb_rpt; + BCBHashTable* bcb_hashTable; + + // block of allocated BufferDesc's + struct BDBBlock + { + BufferDesc* m_bdbs; + ULONG m_count; + }; + Firebird::Array bcb_bdbBlocks; // all allocated BufferDesc's }; const int BCB_keep_pages = 1; // set during btc_flush(), pages not removed from dirty binary tree @@ -186,13 +190,13 @@ class BufferDesc : public pool_alloc public: explicit BufferDesc(BufferControl* bcb) : bdb_bcb(bcb), - bdb_page(0, 0), - bdb_pending_page(0, 0) + bdb_page(0, 0) { bdb_lock = NULL; QUE_INIT(bdb_que); QUE_INIT(bdb_in_use); QUE_INIT(bdb_dirty); + bdb_lru_chain = NULL; bdb_buffer = NULL; bdb_incarnation = 0; bdb_transactions = 0; @@ -202,6 +206,7 @@ public: bdb_exclusive = NULL; bdb_io = NULL; bdb_writers = 0; + bdb_io_locks = 0; bdb_scan_count = 0; bdb_difference_page = 0; bdb_prec_walk_mark = 0; @@ -233,13 +238,12 @@ public: BufferControl* bdb_bcb; Firebird::SyncObject bdb_syncPage; Lock* bdb_lock; // Lock block for buffer - que bdb_que; // Either mod que in hash table or bcb_pending que if BDB_free_pending flag is set + que bdb_que; // Either mod que in hash table or bcb_empty que if never used que bdb_in_use; // queue of buffers in use que bdb_dirty; // dirty pages LRU queue BufferDesc* bdb_lru_chain; // pending LRU chain Ods::pag* bdb_buffer; // Actual buffer PageNumber bdb_page; // Database page number in buffer - PageNumber bdb_pending_page; // Database page number to be ULONG bdb_incarnation; ULONG bdb_transactions; // vector of dirty flags to reduce commit overhead TraNumber bdb_mark_transaction; // hi-water mark transaction to defer header page I/O diff --git a/src/jrd/jrd.cpp b/src/jrd/jrd.cpp index 8ff98bc34f..0005317a8b 100644 --- a/src/jrd/jrd.cpp +++ b/src/jrd/jrd.cpp @@ -384,6 +384,9 @@ static void threadDetach() { ThreadSync* thd = ThreadSync::findThread(); delete thd; + + if (cds::threading::Manager::isThreadAttached()) + cds::threading::Manager::detachThread(); } static void shutdownBeforeUnload() diff --git a/src/jrd/jrd.h b/src/jrd/jrd.h index 7a2307d834..ef8ec60d6b 100644 --- a/src/jrd/jrd.h +++ b/src/jrd/jrd.h @@ -60,6 +60,7 @@ #include "../jrd/Attachment.h" #include "firebird/Interface.h" +#include // cds::threading::Manager #define BUGCHECK(number) ERR_bugcheck(number, __FILE__, __LINE__) #define SOFT_BUGCHECK(number) ERR_soft_bugcheck(number, __FILE__, __LINE__) @@ -772,6 +773,9 @@ public: { context.putSpecific(); currentStatus->init(); + + if (!cds::threading::Manager::isThreadAttached()) + cds::threading::Manager::attachThread(); } ThreadContextHolder(Database* dbb, Jrd::Attachment* att, FbStatusVector* status = NULL) @@ -781,6 +785,9 @@ public: context.setDatabase(dbb); context.setAttachment(att); currentStatus->init(); + + if (!cds::threading::Manager::isThreadAttached()) + cds::threading::Manager::attachThread(); } ~ThreadContextHolder() diff --git a/src/jrd/lck.h b/src/jrd/lck.h index 7a6336a53c..dc8f927498 100644 --- a/src/jrd/lck.h +++ b/src/jrd/lck.h @@ -144,16 +144,17 @@ public: UCHAR lck_physical; // Physical lock level LOCK_DATA_T lck_data; // Data associated with a lock + static constexpr size_t KEY_STATIC_SIZE = sizeof(SINT64); + private: - - static const size_t KEY_STATIC_SIZE = sizeof(SINT64); - union { UCHAR key_string[KEY_STATIC_SIZE]; SINT64 key_long; } lck_key; // Lock key string + static_assert(KEY_STATIC_SIZE >= sizeof(lck_key), "Wrong KEY_STATIC_SIZE"); + public: UCHAR* getKeyPtr() diff --git a/src/jrd/pag.cpp b/src/jrd/pag.cpp index 771713c7fc..a64ca651ae 100644 --- a/src/jrd/pag.cpp +++ b/src/jrd/pag.cpp @@ -2622,14 +2622,19 @@ ULONG PAG_page_count(thread_db* tdbb) if (!PIO_read(tdbb, pageSpace->file, &temp_bdb, temp_bdb.bdb_buffer, &status)) status_exception::raise(&status); - fb_assert(pip->pip_header.pag_type == pag_pages); + // After PIO_extend the tail of the file might have thousands of zero pages. + // Recently last PIP might be marked as fully used but the new PIP is not initialized. + // If nbackup state becomes nbak_stalled in this moment we'll find zero pip in the tail of the file. + // Fortunatelly it must be the last valuable page and we can rely on its number. + fb_assert(pip->pip_header.pag_type == pag_pages || + (!pip->pip_header.pag_type && !pip->pip_used) ); if (pip->pip_used == pagesPerPip) { // this is not last page, continue search continue; } - return pip->pip_used + pageNo + (sequence ? 1 : -1); + return pip->pip_used + pageNo - (sequence ? 0 : pageSpace->pipFirst) + 1; } // compiler warnings silencer