From 932ca5157412040736f08d34090f4b31abed44ce Mon Sep 17 00:00:00 2001 From: Dmitry Yemanov Date: Fri, 25 Jan 2019 12:09:01 +0300 Subject: [PATCH] Replication support (#182) * Initial implementation of replication * Postfixes for configuration and Windows builds * Misc Windows adjustments * Forgotten changes from prior commits * Last round (fingers crossed) of fixing the Windows build * Improve self-documentation * Misc improvements * Updates the docs * More configuration * Frontported some remaining pieces * Installation post-fixes * Special permission for replication --- CHANGELOG.md | 4 + .../arch-specific/freebsd/install.sh.in | 2 + .../linux/makeInstallImage.sh.in | 1 + .../arch-specific/win32/FirebirdInstall.iss | 14 + .../FirebirdInstallEnvironmentChecks.inc | 3 +- .../win32/ba/Instalacija_ProcitajMe.txt | 3 +- .../win32/cz/instalace_ctime.txt | 3 +- .../win32/fr/installation_lisezmoi.txt | 3 +- .../win32/installation_readme.txt | 3 +- .../win32/installation_scripted.txt | 1 + .../win32/ru/installation_readme.txt | 3 +- builds/install/misc/replication.conf | 169 +++ .../install/posix-common/posixLibrary.sh.in | 2 + builds/posix/make.shared.variables | 5 +- builds/win32/make_all.bat | 1 + builds/win32/msvc12/engine.vcxproj | 7 + builds/win32/msvc12/fbserver.vcxproj | 3 + builds/win32/msvc14/engine.vcxproj | 7 + builds/win32/msvc14/fbserver.vcxproj | 3 + builds/win32/msvc15/engine.vcxproj | 7 + builds/win32/msvc15/fbserver.vcxproj | 3 + configure.ac | 1 + doc/README.replication.md | 117 ++ doc/sql.extensions/README.ddl.txt | 1 + lang_helpers/gds_codes.ftn | 2 + lang_helpers/gds_codes.pas | 2 + src/alice/alice.cpp | 16 + src/alice/alice.h | 7 + src/alice/aliceswi.h | 7 +- src/alice/exe.cpp | 3 + src/common/classes/GenericMap.h | 5 + src/common/classes/locks.h | 12 +- src/common/config/config_file.cpp | 9 +- src/common/config/config_file.h | 1 + src/common/isc_s_proto.h | 1 + src/common/os/path_utils.h | 73 +- src/common/os/posix/os_utils.cpp | 5 +- src/common/os/posix/path_utils.cpp | 128 +- src/common/os/win32/path_utils.cpp | 145 ++- src/dsql/DdlNodes.epp | 4 +- src/dsql/DdlNodes.h | 25 + src/dsql/Nodes.h | 5 + src/dsql/StmtNodes.cpp | 6 + src/dsql/dsql.cpp | 6 + src/include/consts_pub.h | 1 + src/include/firebird/FirebirdInterface.idl | 70 +- src/include/firebird/IdlFbInterfaces.h | 858 ++++++++++++ src/include/gen/codetext.h | 1 + src/include/gen/iberror.h | 6 +- src/include/gen/msgs.h | 1 + src/include/gen/sql_code.h | 1 + src/include/gen/sql_state.h | 1 + src/jrd/Attachment.cpp | 1 + src/jrd/Attachment.h | 65 + src/jrd/Database.cpp | 31 + src/jrd/Database.h | 23 +- src/jrd/EngineInterface.h | 29 + src/jrd/JrdStatement.cpp | 13 +- src/jrd/Savepoint.cpp | 5 +- src/jrd/Savepoint.h | 11 + src/jrd/SysFunction.cpp | 3 + src/jrd/SystemPrivileges.h | 1 + src/jrd/blb.h | 2 +- src/jrd/constants.h | 6 + src/jrd/dfw.epp | 7 +- src/jrd/dpm.epp | 49 +- src/jrd/exe.cpp | 3 +- src/jrd/jrd.cpp | 217 +++- src/jrd/jrd.h | 35 +- src/jrd/ods.h | 13 +- src/jrd/pag.cpp | 125 +- src/jrd/pag_proto.h | 9 +- src/jrd/replication/Applier.cpp | 1149 +++++++++++++++++ src/jrd/replication/Applier.h | 193 +++ src/jrd/replication/ChangeLog.cpp | 990 ++++++++++++++ src/jrd/replication/ChangeLog.h | 249 ++++ src/jrd/replication/Config.cpp | 312 +++++ src/jrd/replication/Config.h | 62 + src/jrd/replication/Manager.cpp | 492 +++++++ src/jrd/replication/Manager.h | 149 +++ src/jrd/replication/Protocol.h | 68 + src/jrd/replication/Publisher.cpp | 635 +++++++++ src/jrd/replication/Publisher.h | 49 + src/jrd/replication/Replicator.cpp | 541 ++++++++ src/jrd/replication/Replicator.h | 290 +++++ src/jrd/replication/Utils.cpp | 205 +++ src/jrd/replication/Utils.h | 87 ++ src/jrd/tra.cpp | 37 +- src/jrd/tra.h | 6 + src/jrd/tra_proto.h | 2 +- src/jrd/trig.h | 13 +- src/lock/lock.cpp | 38 +- src/msgs/facilities2.sql | 4 +- src/msgs/messages2.sql | 3 + src/msgs/system_errors2.sql | 1 + src/remote/client/interface.cpp | 162 ++- src/remote/protocol.cpp | 10 + src/remote/protocol.h | 13 + src/remote/remote.h | 5 +- src/remote/server/ReplServer.cpp | 983 ++++++++++++++ src/remote/server/ReplServer.h | 28 + src/remote/server/os/posix/inet_server.cpp | 12 + src/remote/server/os/win32/srvr_w32.cpp | 8 + src/remote/server/server.cpp | 35 + src/utilities/gstat/ppg.cpp | 35 + src/yvalve/YObjects.h | 20 + src/yvalve/why.cpp | 69 + 107 files changed, 9077 insertions(+), 293 deletions(-) create mode 100644 builds/install/misc/replication.conf create mode 100644 doc/README.replication.md create mode 100644 src/jrd/replication/Applier.cpp create mode 100644 src/jrd/replication/Applier.h create mode 100644 src/jrd/replication/ChangeLog.cpp create mode 100644 src/jrd/replication/ChangeLog.h create mode 100644 src/jrd/replication/Config.cpp create mode 100644 src/jrd/replication/Config.h create mode 100644 src/jrd/replication/Manager.cpp create mode 100644 src/jrd/replication/Manager.h create mode 100644 src/jrd/replication/Protocol.h create mode 100644 src/jrd/replication/Publisher.cpp create mode 100644 src/jrd/replication/Publisher.h create mode 100644 src/jrd/replication/Replicator.cpp create mode 100644 src/jrd/replication/Replicator.h create mode 100644 src/jrd/replication/Utils.cpp create mode 100644 src/jrd/replication/Utils.h create mode 100644 src/remote/server/ReplServer.cpp create mode 100644 src/remote/server/ReplServer.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a3641dd98..55f7f23a1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,10 @@ Reference(s): [/doc/README.monitoring_tables](https://github.com/FirebirdSQL/firebird/raw/master/doc/README.monitoring_tables) Contributor(s): Alex Peshkoff +* [CORE-2022](http://tracker.firebirdsql.org/browse/CORE-2022): Built-in logical replication + Reference(s): [/doc/README.replication.md](https://github.com/FirebirdSQL/firebird/raw/master/doc/README.replication.md) + Contributor(s): Dmitry Yemanov + * [CORE-909](http://tracker.firebirdsql.org/browse/CORE-909): Ability to retrieve current UTC/GMT timestamp Reference(s): [/doc/sql.extentions/README.time_zone.md](https://github.com/FirebirdSQL/firebird/raw/master/doc/sql.extensions/README.time_zone.md) Contributor(s): Adriano dos Santos Fernandes diff --git a/builds/install/arch-specific/freebsd/install.sh.in b/builds/install/arch-specific/freebsd/install.sh.in index d566c7ee00..c049e029cc 100755 --- a/builds/install/arch-specific/freebsd/install.sh.in +++ b/builds/install/arch-specific/freebsd/install.sh.in @@ -291,6 +291,8 @@ copyFiles() { cp $BuiltFBDir/firebird.conf ${TargetDir}@FB_CONFDIR@ cp $BuiltFBDir/databases.conf ${TargetDir}@FB_CONFDIR@ cp $BuiltFBDir/fbtrace.conf ${TargetDir}@FB_CONFDIR@ + cp $BuiltFBDir/plugins.conf ${TargetDir}@FB_CONFDIR@ + cp $BuiltFBDir/replication.conf ${TargetDir}@FB_CONFDIR@ chown root:wheel ${TargetDir}@FB_CONFDIR@/*.conf chmod 0644 ${TargetDir}@FB_CONFDIR@/*.conf diff --git a/builds/install/arch-specific/linux/makeInstallImage.sh.in b/builds/install/arch-specific/linux/makeInstallImage.sh.in index aede06a984..f642150d64 100644 --- a/builds/install/arch-specific/linux/makeInstallImage.sh.in +++ b/builds/install/arch-specific/linux/makeInstallImage.sh.in @@ -293,6 +293,7 @@ copyFiles() { cp $BuiltFBDir/databases.conf ${TargetDir}@FB_CONFDIR@ cp $BuiltFBDir/fbtrace.conf ${TargetDir}@FB_CONFDIR@ cp $BuiltFBDir/plugins.conf ${TargetDir}@FB_CONFDIR@ + cp $BuiltFBDir/replication.conf ${TargetDir}@FB_CONFDIR@ chmod 0644 ${TargetDir}@FB_CONFDIR@/*.conf diff --git a/builds/install/arch-specific/win32/FirebirdInstall.iss b/builds/install/arch-specific/win32/FirebirdInstall.iss index 25dbc2da84..bf0003946a 100644 --- a/builds/install/arch-specific/win32/FirebirdInstall.iss +++ b/builds/install/arch-specific/win32/FirebirdInstall.iss @@ -478,6 +478,8 @@ Source: {#FilesDir}\firebird.conf; DestDir: {app}; DestName: firebird.conf; Comp Source: {#FilesDir}\fbtrace.conf; DestDir: {app}; DestName: fbtrace.conf.default; Components: ServerComponent; Source: {#FilesDir}\fbtrace.conf; DestDir: {app}; DestName: fbtrace.conf; Components: ServerComponent; Flags: uninsneveruninstall onlyifdoesntexist; check: NofbtraceConfExists; Source: {#FilesDir}\databases.conf; DestDir: {app}; Components: ClientComponent; Flags: uninsneveruninstall onlyifdoesntexist +Source: {#FilesDir}\replication.conf; DestDir: {app}; DestName: replication.conf.default; Components: ServerComponent; +Source: {#FilesDir}\replication.conf; DestDir: {app}; Components: ServerComponent; Flags: uninsneveruninstall onlyifdoesntexist; check: NoReplicationConfExists; Source: {#FilesDir}\security4.fdb; DestDir: {app}; Destname: security4.fdb.empty; Components: ServerComponent; Source: {#FilesDir}\security4.fdb; DestDir: {app}; Components: ServerComponent; Flags: uninsneveruninstall onlyifdoesntexist Source: {#FilesDir}\firebird.msg; DestDir: {app}; Components: ClientComponent; Flags: sharedfile ignoreversion @@ -1054,6 +1056,7 @@ begin IncrementSharedCount(Is64BitInstallMode, GetAppPath+'\databases.conf', false); IncrementSharedCount(Is64BitInstallMode, GetAppPath+'\fbtrace.conf', false); IncrementSharedCount(Is64BitInstallMode, GetAppPath+'\security4.fdb', false); + IncrementSharedCount(Is64BitInstallMode, GetAppPath+'\replication.conf', false); InitSecurityDB; @@ -1131,6 +1134,16 @@ begin Result := not fileexists(GetAppPath+'\fbtrace.conf'); end; +function ReplicationConfExists: boolean; +begin + Result := fileexists(GetAppPath+'\replication.conf'); +end; + +function NoReplicationConfExists: boolean; +begin + Result := not fileexists(GetAppPath+'\replication.conf'); +end; + function InitializeUninstall: Boolean; var CommandLine: String; @@ -1170,6 +1183,7 @@ begin aStringList.add(appPath+'\fbtrace.conf'); aStringList.add(appPath+'\security4.fdb'); aStringList.add(appPath+'\security4.fdb.old'); + aStringList.add(appPath+'\replication.conf'); for count := 0 to aStringList.count - 1 do begin // We are manually handling the share count of these files, so we must diff --git a/builds/install/arch-specific/win32/FirebirdInstallEnvironmentChecks.inc b/builds/install/arch-specific/win32/FirebirdInstallEnvironmentChecks.inc index ee33deb13f..4c462fe68d 100644 --- a/builds/install/arch-specific/win32/FirebirdInstallEnvironmentChecks.inc +++ b/builds/install/arch-specific/win32/FirebirdInstallEnvironmentChecks.inc @@ -753,7 +753,7 @@ procedure SetupSharedFilesArray; // - Libraries // - License files begin -SetArrayLength(SharedFileArray,36); +SetArrayLength(SharedFileArray,37); SharedFileArray[0].Filename := ExpandConstant('{app}')+'IPLicense.txt'; SharedFileArray[1].Filename := ExpandConstant('{app}')+'IDPLicense.txt'; @@ -802,6 +802,7 @@ SharedFileArray[32].Filename := ExpandConstant('{app}')+'fbsvcmgr.exe'; SharedFileArray[33].Filename := ExpandConstant('{app}')+'fbrmclib.dll'; SharedFileArray[34].Filename := ExpandConstant('{app}')+'fbtracemgr.exe'; SharedFileArray[35].Filename := ExpandConstant('{app}')+'zlib1.dll'; +SharedFileArray[36].Filename := ExpandConstant('{app}')+'replication.conf'; // FIXME // Add in WOW64 here. diff --git a/builds/install/arch-specific/win32/ba/Instalacija_ProcitajMe.txt b/builds/install/arch-specific/win32/ba/Instalacija_ProcitajMe.txt index f37c90dec1..bd0c4519db 100644 --- a/builds/install/arch-specific/win32/ba/Instalacija_ProcitajMe.txt +++ b/builds/install/arch-specific/win32/ba/Instalacija_ProcitajMe.txt @@ -120,12 +120,13 @@ o Ako je Firebird pokrenut kao aplikacija (umjesto kao deinstalacija ce zavrsiti s greskom. Moracete izbrisati ostatke rucno. -o Deinstalacija ostavlja 5 datoteka u install +o Deinstalacija ostavlja 6 datoteka u install direktoriju: - databases.conf - firebird.conf - fbtrace.conf + - replication.conf - firebird.log - security4.fdb diff --git a/builds/install/arch-specific/win32/cz/instalace_ctime.txt b/builds/install/arch-specific/win32/cz/instalace_ctime.txt index c3dc0b6315..c456017287 100644 --- a/builds/install/arch-specific/win32/cz/instalace_ctime.txt +++ b/builds/install/arch-specific/win32/cz/instalace_ctime.txt @@ -116,11 +116,12 @@ o Pokud Firebird běží jako aplikace (místo jako musíte vymazat ručně. o Odinstalace v instalační složce zanechá - čtyri soubory: + šest souborů: - databases.conf - firebird.conf - fbtrace.conf + - replication.conf - firebird.log - security3.fdb diff --git a/builds/install/arch-specific/win32/fr/installation_lisezmoi.txt b/builds/install/arch-specific/win32/fr/installation_lisezmoi.txt index 367f85501b..4a7ad0e9e3 100644 --- a/builds/install/arch-specific/win32/fr/installation_lisezmoi.txt +++ b/builds/install/arch-specific/win32/fr/installation_lisezmoi.txt @@ -124,12 +124,13 @@ o Si Firebird est exécuté comme application (et non Vous devrez alors supprimer les fichiers restant manuellement. -o La désinstallation laisse cinq fichiers dans le +o La désinstallation laisse six fichiers dans le répertoire d'installation: - aliases.conf - firebird.conf - fbtrace.conf + - replication.conf - firebird.log - security4.fdb diff --git a/builds/install/arch-specific/win32/installation_readme.txt b/builds/install/arch-specific/win32/installation_readme.txt index 479a3b73e8..43ce69c486 100644 --- a/builds/install/arch-specific/win32/installation_readme.txt +++ b/builds/install/arch-specific/win32/installation_readme.txt @@ -119,12 +119,13 @@ o If Firebird is running as an application (instead of uninstall the uninstall will complete with errors. You will have to delete the remnants by hand. -o Uninstallation leaves five files in the install +o Uninstallation leaves six files in the install directory: - databases.conf - firebird.conf - fbtrace.conf + - replication.conf - firebird.log - security4.fdb diff --git a/builds/install/arch-specific/win32/installation_scripted.txt b/builds/install/arch-specific/win32/installation_scripted.txt index 15d159b60a..ed791d00ed 100644 --- a/builds/install/arch-specific/win32/installation_scripted.txt +++ b/builds/install/arch-specific/win32/installation_scripted.txt @@ -150,6 +150,7 @@ Parameters specific to Firebird uninstalls firebird.log security3.fdb fbtrace.conf + replication.conf The assumption is that these files will be useful to users re-installing or upgrading Firebird. Starting with Firebird 2.1 diff --git a/builds/install/arch-specific/win32/ru/installation_readme.txt b/builds/install/arch-specific/win32/ru/installation_readme.txt index a309f0ce84..6c07302288 100644 --- a/builds/install/arch-specific/win32/ru/installation_readme.txt +++ b/builds/install/arch-specific/win32/ru/installation_readme.txt @@ -116,12 +116,13 @@ o Если Firebird запущен в качестве приложения (а завершится с ошибкой. Вы должны будете удалить оставшиеся файлы вручную. -o Деинсталлятор оставляет четыре файла в каталоге +o Деинсталлятор оставляет следующие файлы в каталоге установки: - databases.conf - firebird.conf - fbtrace.conf + - replication.conf - firebird.log - security4.fdb diff --git a/builds/install/misc/replication.conf b/builds/install/misc/replication.conf new file mode 100644 index 0000000000..c8764fadb3 --- /dev/null +++ b/builds/install/misc/replication.conf @@ -0,0 +1,169 @@ +# +# Replication configuration +# + +database +{ + ### ORIGIN SIDE SETTINGS + + # Size of the local buffer used to accumulate changes that can be + # deferred until the transaction commit/rollback. The bigger this value + # the less disk access concurrency (related to log IOPS) happens. + # + # For synchronous replication, it also affects number of network round-trips + # between primary and replica hosts. + # However, a larger buffer costs a longer replication "checkpoints" + # (delay to synchronize the original database with its replica at commit). + # + # buffer_size = 1048576 # 1MB + + # Pattern (regular expression) that defines what tables must be included into + # replication. By default, all tables are replicated. + # + # include_filter = + + # Pattern (regular expression) that defines what tables must be excluded from + # replication. By default, all tables are replicated. + # + # exclude_filter = + + # Directory to store replication log files. + # + # log_directory = + + # Prefix for replication log file names. It will be automatically suffixed + # with an ordinal sequential number. If not specified, database filename + # (without path) is used as a prefix. + # + # log_file_prefix + + # Maximum allowed size for a single replication segment. + # + # log_segment_size = 16777216 # 16MB + + # Maximum allowed number of full replication segments. Once this limit is reached, + # the replication process is temporarily delayed to allow the archiving to catch up. + # If any of the full segments is not archived during one minute, + # the replication fails with an error. + # + # Zero means an unlimited number of segments pending archiving. + # + # log_segment_count = 8 + + # Delay, in milliseconds, to wait before the changes are synchronously flushed + # to the log (usually at commit time). This allows multiple concurrently committing + # transactions to amortise I/O costs by sharing a single flush operation. + # + # Zero means no delay, i.e. "group flushing" is disabled. + # + # log_group_flush_delay = 0 + + # Directory for the archived log files. + # + # Directory to store archived replication segments. + # It also defines the $(archpathname) substitution macro (see below). + # + # log_archive_directory = + + # Program (complete command line with arguments) that is executed when some + # replication segment gets full and needs archiving. + # + # This program MUST return zero ONLY if archiving has been performed successfully. + # In particular, it MUST return non-zero if the target archive already exists. + # + # Special predefined macros are available: + # $(logfilename) - file name (without path) of the log segment being archived + # $(logpathname) - full path name of the log segment being archived + # same as log_directory + $(logfilename) + # $(archpathname) - suggested full path name for the archived segment + # same as log_archive_directory + $(logfilename) + # + # Simplest configuration is to use standard OS commands for archiving, e.g.: + # + # Linux: "test ! -f $(archpathname) && cp $(logpathname) $(archpathname)" + # or + # Windows: "copy $(logpathname) $(archpathname)" + # + # log_archive_command = + + # Timeout, in seconds, to wait until incomplete segment is scheduled for archiving. + # It allows to minimize the replication gap if the database is modified rarely. + # + # Zero means no intermediate archiving, i.e. segments are archived only after + # reaching their maximum size (defined by log_segment_size). + # + # log_archive_timeout = 60 + + # Connection string to the replica database (used for synchronous replication only). + # Expected format: + # + # [:@] + # + # Examples: + # + # server2:replica + # john:smith@server2:replica + # server2:/my/replica/database.fdb + # john:smith@server2:/my/replica/database.fdb + # + # Multiple entries are allowed (for different synchronous replicas). + # + # sync_replica = + + + ### REPLICA SIDE SETTINGS + + # Directory to search for the log files to be replicated. + # + # log_source_directory = + + # Filter to limit replication to the particular source database (based on its GUID). + # Expected format: {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX} + # + # source_guid = + + # If enabled, replication.log contains the detailed log of operations performed + # by the replication server. Otherwise (by default), only errors and warnings are logged. + # + # verbose_logging = false + + # Timeout (in seconds) to wait before scanning for the new replication segments. + # + # It's used to pause the replication server when all existing segments are already applied + # to the replica database and there are no new segments in the specified directory. + # + # apply_idle_timeout = 10 + + # Timeout (in seconds) to wait before retrying to apply the queued segments after error. + # + # It's used to pause the replication server after some critical error happened during replication. + # In this case, the server disconnects from the replica database, sleeps for the specified timeout, + # then reconnects back and tries to re-apply the latest segments from the point of failure. + # + # apply_error_timeout = 60 +} + +# +# Database-specific settings belong here, e.g. +# +# (for the origin side) +# +# database = /your/db.fdb +# { + # (for synchronous replication) + # + # sync_replica = sysdba:masterkey@otherhost:/your/replica.fdb + # + # (for asynchronous replication) + # + # log_directory = /your/db/chlog + # log_archive_directory = /your/db/archlog + # log_archive_timeout = 10 +# } +# +# (for the replica side) +# +# database = /your/db.fdb +# { + # log_source_directory = /your/db/incominglog +# } diff --git a/builds/install/posix-common/posixLibrary.sh.in b/builds/install/posix-common/posixLibrary.sh.in index af24e1451a..03bf57fcf7 100644 --- a/builds/install/posix-common/posixLibrary.sh.in +++ b/builds/install/posix-common/posixLibrary.sh.in @@ -931,6 +931,8 @@ fixFilePermissions() { cd @FB_LOGDIR@ touch firebird.log MakeFileFirebirdWritable firebird.log + touch replication.log + MakeFileFirebirdWritable replication.log # Security database cd @FB_SECDBDIR@ diff --git a/builds/posix/make.shared.variables b/builds/posix/make.shared.variables index e04c53f2bf..2bfb559e56 100644 --- a/builds/posix/make.shared.variables +++ b/builds/posix/make.shared.variables @@ -54,7 +54,8 @@ AllObjects += $(YValve_Objects) # Remote Remote_Common:= $(call dirObjects,remote) $(call dirObjects,auth/SecureRemotePassword) -Remote_Server:= $(call dirObjects,remote/server) $(call dirObjects,auth/SecureRemotePassword/server) +Remote_Server:= $(call dirObjects,remote/server) $(call dirObjects,auth/SecureRemotePassword/server) \ + $(call makeObjects,jrd/replication,Config.cpp Utils.cpp) Remote_Client:= $(call dirObjects,remote/client) $(call dirObjects,auth/SecureRemotePassword/client) \ $(call makeObjects,auth/SecurityDatabase,LegacyClient.cpp) \ $(call dirObjects,plugins/crypt/arc4) @@ -66,7 +67,7 @@ AllObjects += $(Remote_Common) $(Remote_Server) $(Remote_Client) # Engine Engine_Objects:= $(call dirObjects,jrd) $(call dirObjects,dsql) $(call dirObjects,jrd/extds) \ - $(call dirObjects,jrd/recsrc) $(call dirObjects,jrd/trace) \ + $(call dirObjects,jrd/recsrc) $(call dirObjects,jrd/replication) $(call dirObjects,jrd/trace) \ $(call makeObjects,lock,lock.cpp) AllObjects += $(Engine_Objects) diff --git a/builds/win32/make_all.bat b/builds/win32/make_all.bat index 6a04f9c938..9e420c63a5 100644 --- a/builds/win32/make_all.bat +++ b/builds/win32/make_all.bat @@ -67,6 +67,7 @@ findstr /V "@UDF_COMMENT@" %FB_ROOT_PATH%\builds\install\misc\firebird.conf.in > @copy %FB_ROOT_PATH%\builds\install\misc\databases.conf.in %FB_OUTPUT_DIR%\databases.conf >nul @copy %FB_ROOT_PATH%\builds\install\misc\fbintl.conf %FB_OUTPUT_DIR%\intl >nul @copy %FB_ROOT_PATH%\builds\install\misc\plugins.conf %FB_OUTPUT_DIR% >nul +@copy %FB_ROOT_PATH%\builds\install\misc\replication.conf %FB_OUTPUT_DIR% >nul @copy %FB_ROOT_PATH%\src\utilities\ntrace\fbtrace.conf %FB_OUTPUT_DIR% >nul @copy %FB_ROOT_PATH%\src\plugins\udr_engine\udr_engine.conf %FB_OUTPUT_DIR%\plugins\udr_engine.conf >nul @copy %FB_ROOT_PATH%\builds\install\misc\IPLicense.txt %FB_OUTPUT_DIR% >nul diff --git a/builds/win32/msvc12/engine.vcxproj b/builds/win32/msvc12/engine.vcxproj index 13d6dcb80e..3e539364ff 100644 --- a/builds/win32/msvc12/engine.vcxproj +++ b/builds/win32/msvc12/engine.vcxproj @@ -129,6 +129,13 @@ + + + + + + + diff --git a/builds/win32/msvc12/fbserver.vcxproj b/builds/win32/msvc12/fbserver.vcxproj index a878b08f19..db5b0e57ac 100644 --- a/builds/win32/msvc12/fbserver.vcxproj +++ b/builds/win32/msvc12/fbserver.vcxproj @@ -218,6 +218,9 @@ + + + diff --git a/builds/win32/msvc14/engine.vcxproj b/builds/win32/msvc14/engine.vcxproj index 563203bd36..191be3e254 100644 --- a/builds/win32/msvc14/engine.vcxproj +++ b/builds/win32/msvc14/engine.vcxproj @@ -129,6 +129,13 @@ + + + + + + + diff --git a/builds/win32/msvc14/fbserver.vcxproj b/builds/win32/msvc14/fbserver.vcxproj index 648c6773af..0a9b234dd7 100644 --- a/builds/win32/msvc14/fbserver.vcxproj +++ b/builds/win32/msvc14/fbserver.vcxproj @@ -218,6 +218,9 @@ + + + diff --git a/builds/win32/msvc15/engine.vcxproj b/builds/win32/msvc15/engine.vcxproj index 7accd34bea..89c7a4f6e7 100644 --- a/builds/win32/msvc15/engine.vcxproj +++ b/builds/win32/msvc15/engine.vcxproj @@ -129,6 +129,13 @@ + + + + + + + diff --git a/builds/win32/msvc15/fbserver.vcxproj b/builds/win32/msvc15/fbserver.vcxproj index 7a684abaf8..8f28551d9a 100644 --- a/builds/win32/msvc15/fbserver.vcxproj +++ b/builds/win32/msvc15/fbserver.vcxproj @@ -219,6 +219,9 @@ + + + diff --git a/configure.ac b/configure.ac index 863647682f..bcd5c95da2 100644 --- a/configure.ac +++ b/configure.ac @@ -1257,6 +1257,7 @@ gen/$fb_tgt/firebird/databases.conf:builds/install/misc/databases.conf.in gen/$fb_tgt/firebird/fbtrace.conf:src/utilities/ntrace/fbtrace.conf gen/$fb_tgt/firebird/intl/fbintl.conf:builds/install/misc/fbintl.conf gen/$fb_tgt/firebird/plugins.conf:builds/install/misc/plugins.conf +gen/$fb_tgt/firebird/replication.conf:builds/install/misc/replication.conf ]) done diff --git a/doc/README.replication.md b/doc/README.replication.md new file mode 100644 index 0000000000..2645004d7b --- /dev/null +++ b/doc/README.replication.md @@ -0,0 +1,117 @@ +# Firebird replication + +## Concept + +Firebird 4 offers built-in support for uni-directional \(aka master-slave\) logical replication. Logical here means record-level replication, as opposed to physical \(page-level\) replication. Implementation is primarily focused on HA \(high-availability\) solutions, but it can be used for other tasks as well. + +Events that are tracked include: inserted/updated/deleted records, sequence changes, DDL statements. Replication is transactional, commit order is preserved. Replication can track changes in either all tables, or in some customized subset of tables. Tables to be replicated are required to have either a primary key or at least a unique key. + +There are two replication modes available: synchronous and asynchronous. + +In synchronous replication, the primary database is permanently connected to the replica database\(s\) and changes are being replicated immediately \(in fact, some recent uncommitted changes may be buffered, but they are transmitted at the commit time\). This effectively means that databases are in sync after every commit. However, this may impact performance due to additional network traffic and round-trips. Multiple synchronous replicas can be configured, if necessary. + +In asynchronous replication, changes are being written into the local journal files that are transferred over the wire and applied to the replica database. This impacts the performance much less, but introduces the delay \(known as replication lag\) when changes are not yet applied to the replica database, i.e. the replica database is always "catching up" the master database. + +There are two access modes for replica databases: read-only and read-write. Read-only replica allows to execute any query that does not modify data \(global temporary tables can be modified as they are not replicated\), modifications are limited to the replication process only. Read-write replica allows to execute any query, possible conflicts must be resolved by users. + +## Journalling + +Asynchronous replication is based on journalling. Replicated changes are written into the journal which consists of multiple files \(known as replication segments\). Firebird server writes segments continuously, one after one. Every segment has a unique number which is generated sequentially. This number \(known as segment sequence\), combined with the database UUID, provide globally unique identification of journal segments. The global sequence counter is stored inside the replicated database and it's never reset \(until the database is restored from backup\). + +Segments are regularly rotated, this process is controlled by either maximum segment size or timeout, both thresholds are configurable. Once the active segment reaches the threshold, it's marked as "full" and writing switches to the next available segment. Full segments are archived and then reused for subsequent writes. Archiving basically means copying the segment with a purpose of transferring it to the replica host and applying there. Copying can be done by Firebird server itself or, alternatively, by custom \(user-specified\) command. + +On the replica side, journal segments are applied in the replication sequence order. Firebird server periodically scans for the new segments appearing in the configured directory. Once the next segment is found, it gets replicated. Replication state is stored in the local file named {UUID} \(per every replication source\) and contains the following markers: latest segment sequence \(LSS\), oldest segment sequence \(OSS\) and list of active transactions started between OSS and LSS. LSS means the last replicated segment. OSS means the segment that started the earliest transaction that wasn't finished at the time LSS was processed. These markers control two things: \(1\) what segment must be replicated next and \(2\) when segment files can be safely deleted. Segments with numbers between OSS and LSS are preserved for replaying the journal after the replicator disconnects from the replica database \(e.g. due to replication error or idle timeout\). If there are no active transactions pending and LSS was processed without errors, all segments up to \(and including\) LSS are deleted. In the case of any critical error, replication is temporarily suspended and re-attempted after timeout. + +## Error reporting + +All replication errors and warnings \(e.g. detected conflicts\) are written into the replication.log file stored in the Firebird log directory \(by default this is the root directory of the Firebird installation\). This file may also include the detailed description of the operations performed by the replicator. + +## Setting up the master side + +Replication is configured using a single configuration file: replication.conf. It allows to define global settings as well as per-database settings. All the possible options are listed inside replication.conf, descriptions are provided as comments there. For per-database configuration, full database name must be specified \(aliases or wildcards are not allowed\) inside the {database} section. + +Tables to be replicated can be customized using two settings: include\_filter and exclude\_filter. They are regular expressions that are applied to table names and define rules for inclusion table\(s\) into the replication set or excluding them from the replication set. + +Synchronous replication can be turned on using the sync\_replica setting \(multiple entries are allowed\). It must specify a connection string to the replica database, prefixed with username/password. In SuperServer and SuperClassic architectures, replica database is being internally attached when the first user gets connected to the master database and detached when the last user disconnects from the master database. In Classic Server architecture, every server process keeps an active connection to the replica database. + +Asynchronous replication requires setting up the journalling mechanism.The primary parameter is log\_directory which defines location of the replication journal. Once this location is specified, asynchronous replication is turned on and Firebird server starts producing the journal segments. + +Minimal configuration looks like this: + +database = /data/mydb.fdb +{ + log\_directory = /dblogs/mydb/ + log\_archive\_directory = /shiplogs/mydb/ +} + +Archiving is performed by copying the segments from /dblogs/mydb/ to /shiplogs/mydb/, Firebird server copies the segments itself. + +The same with user-defined archiving: + +database = /data/mydb.fdb +{ + log\_directory = /dblogs/mydb/ + log\_archive\_directory = /shiplogs/mydb/ + log\_archive\_command = "test ! -f $\(archpathname\) && cp $\(logpathname\) $\(archpathname\)" +} + +Where $\(logpathname\) and $\(archpathname\) are built-in macros that provide the custom shell command with real file names. + +Custom archiving \(log\_archive\_command setting\) allows to use any system shell command \(including scripts / batch files\) to deliver segments to the replica side. It could use compression, FTP, or whatever else available on the server. Actual transport implementation is up to DBA, Firebird just produces segments on the master side and expects them to appear at the replica side. If the replica storage can be remotely attached to the master host, it becomes just a matter of copying the segment files. In other cases, some transport solution is required. + +The same with archiving performed every 10 seconds: + +database = /data/mydb.fdb +{ + log\_directory = /dblogs/mydb/ + log\_archive\_directory = /shiplogs/mydb/ + log\_archive\_command = "test ! -f $\(archpathname\) && cp $\(logpathname\) $\(archpathname\)" + log\_archive\_timeout = 10 +} + +Read replication.conf for other possible settings. + +To apply the changed master-side settings, all users must be reconnected. + +## Setting up the replica side + +The same replication.conf file is used. Setting log\_source\_directory specifies the location that Firebird server scans for the transmitted segments. Additionally, DBA may explicitly specify what source database is accepted for replication. Setting source\_guid is used for that purpose. + +Sample configuration looks like this: + +database = /data/mydb.fdb +{ + log\_source\_directory = /incominglogs/ + source\_guid = {6F9619FF-8B86-D011-B42D-00CF4FC964FF} +} + +Read replication.conf for other possible settings. + +To apply the changed replica-side settings, Firebird server must be restarted. + +## Creating the replica database + +In the Beta 1 release, any physical copying method can be used: + +* File-level copy when Firebird server is shutdown +* ALTER DATABASE BEGIN BACKUP + file-level copy + ALTER DATABASE END BACKUP +* nbackup -l + file-level copy + nbackup -n +* nbackup -b 0 + +Then the replica mode must be activated for the database copy. Two options are possible: + +* gfix -replica read <database> -- set up database as read-only replica +* gfix -replica write <database> -- set up database as read-write replica + +Read-only replica means that only the replicator connection can modify the database. This is mostly indended for high availability solutions as the replica database is guaranteed to match the master one and can be used for fast recovery. Regular user connections may perform any operations allowed for read-only transactions: select from tables, execute read-only procedures, write into global temporary tables, etc. Database maintenance such as sweeping, shutdown, monitoring is also allowed. This can be used for moving read-only load \(analytics, etc\) to the replica database. However, read-only connections may potentially conflict with the replication if some DDL statements \(those requiring an exclusing metadata lock\) are performed on the master database. + +Read-write replicas allow both the replicator connection and regular user connections to modify the database concurrently. This does not guarantee the replica database to be in sync with the master one, so it's not recommended to use this mode for high availability, unless replica-side user connections modify only tables excluded from replication. + +## Converting the replica to a regular database + +As simple as this: + +* gfix -replica none <database> + +This isn't strictly required for read-write replicas, but recommended to avoid unexpected replication flow. + diff --git a/doc/sql.extensions/README.ddl.txt b/doc/sql.extensions/README.ddl.txt index 3a0726eef9..3b71255952 100644 --- a/doc/sql.extensions/README.ddl.txt +++ b/doc/sql.extensions/README.ddl.txt @@ -556,6 +556,7 @@ GRANT_REVOKE_ON_ANY_OBJECT GRANT and REVOKE rights on any object in database GRANT_REVOKE_ANY_DDL_RIGHT GRANT and REVOKE any DDL rights CREATE_PRIVILEGED_ROLES Use SET SYSTEM PRIVILEGES in roles MODIFY_EXT_CONN_POOL Manage properties of pool of external connections +REPLICATE_INTO_DATABASE Use replication API to load changesets into database 22) New grantee type in GRANT and REVOKE operators - SYSTEM PRIVILEGE. diff --git a/lang_helpers/gds_codes.ftn b/lang_helpers/gds_codes.ftn index 05bb8c60cf..2a78368303 100644 --- a/lang_helpers/gds_codes.ftn +++ b/lang_helpers/gds_codes.ftn @@ -1912,6 +1912,8 @@ C -- PARAMETER (GDS__tom_rsa_verify = 335545249) INTEGER*4 GDS__tom_chacha_key PARAMETER (GDS__tom_chacha_key = 335545250) + INTEGER*4 GDS__bad_repl_handle + PARAMETER (GDS__bad_repl_handle = 335545251) INTEGER*4 GDS__gfix_db_name PARAMETER (GDS__gfix_db_name = 335740929) INTEGER*4 GDS__gfix_invalid_sw diff --git a/lang_helpers/gds_codes.pas b/lang_helpers/gds_codes.pas index 6a98731bb3..1e37c2c011 100644 --- a/lang_helpers/gds_codes.pas +++ b/lang_helpers/gds_codes.pas @@ -1907,6 +1907,8 @@ const gds_tom_rsa_verify = 335545249; isc_tom_chacha_key = 335545250; gds_tom_chacha_key = 335545250; + isc_bad_repl_handle = 335545251; + gds_bad_repl_handle = 335545251; isc_gfix_db_name = 335740929; gds_gfix_db_name = 335740929; isc_gfix_invalid_sw = 335740930; diff --git a/src/alice/alice.cpp b/src/alice/alice.cpp index 23a2f2770f..561b04ee29 100644 --- a/src/alice/alice.cpp +++ b/src/alice/alice.cpp @@ -448,6 +448,22 @@ int alice(Firebird::UtilSvc* uSvc) } } + if (table->in_sw_value & sw_replica) + { + if (--argc <= 0) + ALICE_error(135); // msg 135: replica mode (none / read_only / read_write) required + + ALICE_upper_case(*argv++, string, sizeof(string)); + + if (!strcmp(string, "NONE")) + tdgbl->ALICE_data.ua_replica_mode = REPL_NONE; + else if (!strcmp(string, ALICE_SW_MODE_RO)) + tdgbl->ALICE_data.ua_replica_mode = REPL_READ_ONLY; + else if (!strcmp(string, ALICE_SW_MODE_RW)) + tdgbl->ALICE_data.ua_replica_mode = REPL_READ_WRITE; + else + ALICE_error(135); // msg 135: replica mode (none / read_only / read_write) required + } } // put this here since to put it above overly complicates the parsing. diff --git a/src/alice/alice.h b/src/alice/alice.h index 7a337762a4..6c73796438 100644 --- a/src/alice/alice.h +++ b/src/alice/alice.h @@ -65,6 +65,12 @@ enum alice_shut_mode { SHUT_FULL = 4 }; +enum alice_repl_mode { + REPL_NONE = 0, + REPL_READ_ONLY = 1, + REPL_READ_WRITE = 2 +}; + struct user_action { ULONG ua_switches; @@ -86,6 +92,7 @@ struct user_action //TEXT ua_log_file[MAXPATHLEN]; USHORT ua_db_SQL_dialect; alice_shut_mode ua_shutdown_mode; + alice_repl_mode ua_replica_mode; }; diff --git a/src/alice/aliceswi.h b/src/alice/aliceswi.h index ae5d818300..ef8b28bed4 100644 --- a/src/alice/aliceswi.h +++ b/src/alice/aliceswi.h @@ -64,6 +64,7 @@ const SINT64 sw_fetch_password = QUADCONST(0x0000000800000000); const SINT64 sw_nolinger = QUADCONST(0x0000001000000000); const SINT64 sw_icu = QUADCONST(0x0000002000000000); const SINT64 sw_role = QUADCONST(0x0000004000000000); +const SINT64 sw_replica = QUADCONST(0x0000008000000000); enum alice_switches @@ -119,7 +120,8 @@ enum alice_switches IN_SW_ALICE_FETCH_PASSWORD = 46, IN_SW_ALICE_NOLINGER = 47, IN_SW_ALICE_ICU = 48, - IN_SW_ALICE_ROLE = 49 + IN_SW_ALICE_ROLE = 49, + IN_SW_ALICE_REPLICA = 50 }; static const char* const ALICE_SW_ASYNC = "ASYNC"; @@ -212,6 +214,9 @@ static const Switches::in_sw_tab_t alice_in_sw_table[] = // msg 43: \t-quit_log\tquit logging for replay utility */ #endif + {IN_SW_ALICE_REPLICA, 0, "REPLICA", sw_replica, + 0, ~(sw_replica | sw_user | sw_password | sw_nolinger | sw_role), false, false, 134, 2, NULL}, + // msg 134: -replica access mode {IN_SW_ALICE_ROLE, 0, "ROLE", sw_role, 0, 0, false, false, 132, 4, NULL}, // msg 132: -role set SQL role name diff --git a/src/alice/exe.cpp b/src/alice/exe.cpp index c346f3319c..1118dae18f 100644 --- a/src/alice/exe.cpp +++ b/src/alice/exe.cpp @@ -321,6 +321,9 @@ static void buildDpb(Firebird::ClumpletWriter& dpb, const SINT64 switches) else if (switches & sw_set_db_dialect) { dpb.insertInt(isc_dpb_set_db_sql_dialect, tdgbl->ALICE_data.ua_db_SQL_dialect); } + else if (switches & sw_replica) { + dpb.insertByte(isc_dpb_set_db_replica, tdgbl->ALICE_data.ua_replica_mode); + } if (switches & sw_nolinger) dpb.insertTag(isc_dpb_nolinger); diff --git a/src/common/classes/GenericMap.h b/src/common/classes/GenericMap.h index 35494d467c..230f8e1979 100644 --- a/src/common/classes/GenericMap.h +++ b/src/common/classes/GenericMap.h @@ -71,6 +71,9 @@ public: bool getFirst() { return m_Accessor.getFirst(); } bool getNext() { return m_Accessor.getNext(); } + bool locate(const KeyType& key) { return m_Accessor.locate(key); } + bool fastRemove() { return m_Accessor.fastRemove(); } + private: Accessor(const Accessor&); Accessor& operator=(const Accessor&); @@ -88,6 +91,8 @@ public: bool getFirst() { return m_Accessor.getFirst(); } bool getNext() { return m_Accessor.getNext(); } + ValueType* locate(const KeyType& key) { return m_Accessor.locate(key); } + private: ConstAccessor(const ConstAccessor&); ConstAccessor& operator=(const ConstAccessor&); diff --git a/src/common/classes/locks.h b/src/common/classes/locks.h index 68a9be9d4b..9202d485a3 100644 --- a/src/common/classes/locks.h +++ b/src/common/classes/locks.h @@ -334,7 +334,8 @@ public: { try { - lock->leave(); + if (lock) + lock->leave(); } catch (const Exception&) { @@ -342,6 +343,15 @@ public: } } + void release() + { + if (lock) + { + lock->leave(); + lock = NULL; + } + } + private: // Forbid copying MutexLockGuard(const MutexLockGuard&); diff --git a/src/common/config/config_file.cpp b/src/common/config/config_file.cpp index 922d58b96f..f61e665e77 100644 --- a/src/common/config/config_file.cpp +++ b/src/common/config/config_file.cpp @@ -415,6 +415,9 @@ ConfigFile::LineType ConfigFile::parseLine(const char* fileName, const String& i bool ConfigFile::macroParse(String& value, const char* fileName) const { + if (flags & CUSTOM_MACROS) + return true; + String::size_type subFrom; while ((subFrom = value.find("$(")) != String::npos) @@ -430,9 +433,9 @@ bool ConfigFile::macroParse(String& value, const char* fileName) const } ++subTo; - // Avoid double slashes in pathnames - PathUtils::setDirIterator(value.begin()); - PathUtils::setDirIterator(macro.begin()); + // Avoid incorrect slashes in pathnames + PathUtils::fixupSeparators(value.begin()); + PathUtils::fixupSeparators(macro.begin()); if (subFrom > 0 && value[subFrom - 1] == PathUtils::dir_sep && macro.length() > 0 && macro[0] == PathUtils::dir_sep) diff --git a/src/common/config/config_file.h b/src/common/config/config_file.h index 584b6d09d6..4699ff7a5a 100644 --- a/src/common/config/config_file.h +++ b/src/common/config/config_file.h @@ -56,6 +56,7 @@ public: static const USHORT ERROR_WHEN_MISS = 0x02; static const USHORT NATIVE_ORDER = 0x04; static const USHORT NO_COMMENTS = 0x08; + static const USHORT CUSTOM_MACROS = 0x10; // enum to distinguish ctors enum UseText {USE_TEXT}; diff --git a/src/common/isc_s_proto.h b/src/common/isc_s_proto.h index 30c8919f91..eca91dd885 100644 --- a/src/common/isc_s_proto.h +++ b/src/common/isc_s_proto.h @@ -358,6 +358,7 @@ public: SRAM_TPC_HEADER = 0xF9, SRAM_TPC_BLOCK = 0xF8, SRAM_TPC_SNAPSHOTS = 0xF7, + SRAM_CHANGELOG_STATE = 0xF6 }; protected: diff --git a/src/common/os/path_utils.h b/src/common/os/path_utils.h index ccc65ce9b4..5485002d44 100644 --- a/src/common/os/path_utils.h +++ b/src/common/os/path_utils.h @@ -57,55 +57,53 @@ public: static const char dir_list_sep; /** An abstract base class for iterating through the contents of a directory. - Instances of this class are created using the newDirItr method of + Instances of this class are created using the newDirIterator method of the PathUtils class. Each platform implementation is expected to - subclass dir_iterator to create dir_iterator objects that function + subclass DirIterator to create DirIterator objects that function correctly on the platform. **/ - class dir_iterator : protected Firebird::AutoStorage + class DirIterator : protected Firebird::AutoStorage { public: - /// The constructor requires a string that is the path of the - /// directory being iterater. - /// dir_iterator may be located on stack, therefore use AutoStorage - dir_iterator(MemoryPool& p, const Firebird::PathName& dir) + // The constructor requires a string that is the path of the directory being iterated. + // DirIterator may be located on stack, therefore use AutoStorage. + DirIterator(MemoryPool& p, const Firebird::PathName& dir) : AutoStorage(p), dirPrefix(getPool(), dir) {} - dir_iterator(const Firebird::PathName& dir) + DirIterator(const Firebird::PathName& dir) : AutoStorage(), dirPrefix(getPool(), dir) {} - /// destructor provided for memory cleanup. - virtual ~dir_iterator() {} + // Destructor provided for memory cleanup + virtual ~DirIterator() {} - /// The prefix increment operator (++itr) advances the iteration by - /// one and returns a reference to itself to allow cascading operations. - virtual const dir_iterator& operator++() = 0; + // The prefix increment operator (++itr) advances the iteration by + // one and returns a reference to itself to allow cascading operations + virtual const DirIterator& operator++() = 0; - /// The dereference operator returns a reference to the current - /// item in the iteration. This path is prefixed with the path of - /// the directory. If the last element of the path is wanted use - /// PathUtils::splitLastComponent on the result of this function. + // The dereference operator returns a reference to the current + // item in the iteration. This path is prefixed with the path of + // the directory. If the last element of the path is wanted use + // PathUtils::splitLastComponent on the result of this function. virtual const Firebird::PathName& operator*() = 0; - /// Tests if the iterator has reached the end of the iteration. - /// It is implemented in such a way to make the following for loop - /// work correctly: for (dir_iterator *itr = PathUtils::newDirItr(); *itr; ++(*itr)) + // Tests if the iterator has reached the end of the iteration. + // It is implemented in such a way to make the following loop work correctly: + // for (DirIterator *itr = PathUtils::newDirIterator(); *itr; ++(*itr)) virtual operator bool() = 0; protected: - /// Stores the path to the directory as given in the constructor. + // Stores the path to the directory as given in the constructor const Firebird::PathName dirPrefix; private: - /// default constructor not allowed. - dir_iterator(); // no impl - /// copy constructor not allowed - dir_iterator(const dir_iterator&); // no impl - /// assignment operator not allowed - const dir_iterator& operator=(const dir_iterator&); // no impl - + // Default constructor is not allowed + DirIterator(); + // Copy constructor is not allowed + DirIterator(const DirIterator&); + // Assignment operator is not allowed + const DirIterator& operator=(const DirIterator&); }; /** isRelative returns true if the given path is relative, and false if not. @@ -142,6 +140,14 @@ public: // We don't work correctly with MBCS. static void ensureSeparator(Firebird::PathName& in_out); + // Ensure the path separators are correct for the current platform + static void fixupSeparators(char* path); + + static void fixupSeparators(Firebird::PathName& path) + { + fixupSeparators(path.begin()); + } + /** splitLastComponent takes a path as the third argument and removes the last component in that path (usually a file or directory name). The removed component is returned in the second parameter, and the path left @@ -159,19 +165,14 @@ public: **/ static void splitPrefix(Firebird::PathName& path, Firebird::PathName& prefix); - /** This is the factory method for allocating dir_iterator objects. + /** This is the factory method for allocating DirIterator objects. It takes a reference to a memory pool to use for all heap allocations, and the path of the directory to iterate (in that order). It is the responsibility of the caller to delete the object when they are done with it. All errors result in either exceptions being thrown, or a valid empty - dir_iterator being returned. + DirIterator being returned. **/ - static dir_iterator* newDirItr(MemoryPool&, const Firebird::PathName&); - - /** setDirIterator converts all dir iterators to one required on current - platform. - **/ - static void setDirIterator(char* path); + static DirIterator* newDirIterator(MemoryPool&, const Firebird::PathName&); /** makeDir creates directory passed as parameter. return value is 0 on success or error code on error. diff --git a/src/common/os/posix/os_utils.cpp b/src/common/os/posix/os_utils.cpp index bbd56517b4..f5f011aa4a 100644 --- a/src/common/os/posix/os_utils.cpp +++ b/src/common/os/posix/os_utils.cpp @@ -205,7 +205,7 @@ int openCreateSharedFile(const char* pathname, int flags) { int fd = os_utils::open(pathname, flags | O_RDWR | O_CREAT, S_IREAD | S_IWRITE); if (fd < 0) - raiseError(fd, pathname); + raiseError(ERRNO, pathname); // Security check - avoid symbolic links in /tmp. // Malicious user can create a symlink with this name pointing to say @@ -218,9 +218,8 @@ int openCreateSharedFile(const char* pathname, int flags) if (rc != 0) { - int e = errno; close(fd); - raiseError(e, pathname); + raiseError(ERRNO, pathname); } if (S_ISLNK(st.st_mode)) diff --git a/src/common/os/posix/path_utils.cpp b/src/common/os/posix/path_utils.cpp index 1b10e929cd..88dad97a89 100644 --- a/src/common/os/posix/path_utils.cpp +++ b/src/common/os/posix/path_utils.cpp @@ -33,6 +33,8 @@ #include #include +using namespace Firebird; + /// The POSIX implementation of the path_utils abstraction. const char PathUtils::dir_sep = '/'; @@ -42,74 +44,90 @@ const char PathUtils::dir_list_sep = ':'; const size_t PathUtils::curr_dir_link_len = strlen(curr_dir_link); const size_t PathUtils::up_dir_link_len = strlen(up_dir_link); -class PosixDirItr : public PathUtils::dir_iterator +class PosixDirIterator : public PathUtils::DirIterator { public: - PosixDirItr(MemoryPool& p, const Firebird::PathName& path) - : dir_iterator(p, path), dir(0), file(p), done(false) + PosixDirIterator(MemoryPool& p, const PathName& path) + : DirIterator(p, path), dir(nullptr), ent(nullptr), file(p), done(false) { init(); } - PosixDirItr(const Firebird::PathName& path) - : dir_iterator(path), dir(0), done(false) + + PosixDirIterator(const PathName& path) + : DirIterator(path), dir(0), done(false) { init(); } - ~PosixDirItr(); - const PosixDirItr& operator++(); - const Firebird::PathName& operator*() { return file; } + + ~PosixDirIterator(); + + const PosixDirIterator& operator++(); + const PathName& operator*() { return file; } operator bool() { return !done; } private: - DIR *dir; - Firebird::PathName file; + DIR* dir; + dirent* ent; + PathName file; bool done; + void init(); }; -void PosixDirItr::init() +void PosixDirIterator::init() { dir = opendir(dirPrefix.c_str()); + if (!dir) - done = true; - else - ++(*this); + system_call_failed::raise("opendir"); + + ++(*this); } -PosixDirItr::~PosixDirItr() +PosixDirIterator::~PosixDirIterator() { if (dir) + { closedir(dir); - dir = 0; + dir = nullptr; + } + done = true; } -const PosixDirItr& PosixDirItr::operator++() +const PosixDirIterator& PosixDirIterator::operator++() { - if (done) - return *this; - struct dirent *ent = os_utils::readdir(dir); - if (ent == NULL) + if (!done) { - done = true; - } - else - { - PathUtils::concatPath(file, dirPrefix, ent->d_name); + while ( (ent = os_utils::readdir(dir)) ) + { + PathName entryname; + PathUtils::concatPath(entryname, dirPrefix, ent->d_name); + + struct stat stats; + if (!stat(entryname.c_str(), &stats) && S_ISREG(stats.st_mode)) + break; + } + + if (ent) + PathUtils::concatPath(file, dirPrefix, ent->d_name); + else + done = true; } + return *this; } -PathUtils::dir_iterator *PathUtils::newDirItr(MemoryPool& p, const Firebird::PathName& path) +PathUtils::DirIterator* PathUtils::newDirIterator(MemoryPool& p, const PathName& path) { - return FB_NEW_POOL(p) PosixDirItr(p, path); + return FB_NEW_POOL(p) PosixDirIterator(p, path); } -void PathUtils::splitLastComponent(Firebird::PathName& path, Firebird::PathName& file, - const Firebird::PathName& orgPath) +void PathUtils::splitLastComponent(PathName& path, PathName& file, + const PathName& orgPath) { - Firebird::PathName::size_type pos = orgPath.rfind(dir_sep); - if (pos == Firebird::PathName::npos) + PathName::size_type pos = orgPath.rfind(dir_sep); + if (pos == PathName::npos) { path = ""; file = orgPath; @@ -122,7 +140,7 @@ void PathUtils::splitLastComponent(Firebird::PathName& path, Firebird::PathName& file.append(orgPath, pos + 1, orgPath.length() - pos - 1); } -void PathUtils::splitPrefix(Firebird::PathName& path, Firebird::PathName& prefix) +void PathUtils::splitPrefix(PathName& path, PathName& prefix) { prefix.erase(); while (path.hasData() && path[0] == dir_sep) @@ -132,9 +150,9 @@ void PathUtils::splitPrefix(Firebird::PathName& path, Firebird::PathName& prefix } } -void PathUtils::concatPath(Firebird::PathName& result, - const Firebird::PathName& first, - const Firebird::PathName& second) +void PathUtils::concatPath(PathName& result, + const PathName& first, + const PathName& second) { if (first.length() == 0) { @@ -153,13 +171,13 @@ void PathUtils::concatPath(Firebird::PathName& result, ensureSeparator(result); - Firebird::PathName::size_type cur_pos = 0; + PathName::size_type cur_pos = 0; - for (Firebird::PathName::size_type pos = 0; cur_pos < second.length(); cur_pos = pos + 1) + for (PathName::size_type pos = 0; cur_pos < second.length(); cur_pos = pos + 1) { pos = second.find(dir_sep, cur_pos); - if (pos == Firebird::PathName::npos) // simple name, simple handling + if (pos == PathName::npos) // simple name, simple handling pos = second.length(); if (pos == cur_pos) // Empty piece, ignore @@ -180,8 +198,8 @@ void PathUtils::concatPath(Firebird::PathName& result, continue; } - const Firebird::PathName::size_type up_dir = result.rfind(dir_sep, result.length() - 2); - if (up_dir == Firebird::PathName::npos) + const PathName::size_type up_dir = result.rfind(dir_sep, result.length() - 2); + if (up_dir == PathName::npos) continue; result.erase(up_dir + 1); @@ -193,7 +211,7 @@ void PathUtils::concatPath(Firebird::PathName& result, } // We don't work correctly with MBCS. -void PathUtils::ensureSeparator(Firebird::PathName& in_out) +void PathUtils::ensureSeparator(PathName& in_out) { if (in_out.length() == 0) in_out = PathUtils::dir_sep; @@ -202,14 +220,23 @@ void PathUtils::ensureSeparator(Firebird::PathName& in_out) in_out += PathUtils::dir_sep; } -bool PathUtils::isRelative(const Firebird::PathName& path) +void PathUtils::fixupSeparators(char* path) +{ + for (; *path; ++path) + { + if (*path == '\\') + *path = '/'; + } +} + +bool PathUtils::isRelative(const PathName& path) { if (path.length() > 0) return path[0] != dir_sep; return false; } -bool PathUtils::isSymLink(const Firebird::PathName& path) +bool PathUtils::isSymLink(const PathName& path) { struct STAT st, lst; @@ -222,21 +249,12 @@ bool PathUtils::isSymLink(const Firebird::PathName& path) return st.st_ino != lst.st_ino; } -bool PathUtils::canAccess(const Firebird::PathName& path, int mode) +bool PathUtils::canAccess(const PathName& path, int mode) { return access(path.c_str(), mode) == 0; } -void PathUtils::setDirIterator(char* path) -{ - for (; *path; ++path) - { - if (*path == '\\') - *path = '/'; - } -} - -int PathUtils::makeDir(const Firebird::PathName& path) +int PathUtils::makeDir(const PathName& path) { int rc = mkdir(path.c_str(), 0770) ? errno : 0; if (rc == 0) diff --git a/src/common/os/win32/path_utils.cpp b/src/common/os/win32/path_utils.cpp index 30d965e05d..4a5d40211e 100644 --- a/src/common/os/win32/path_utils.cpp +++ b/src/common/os/win32/path_utils.cpp @@ -4,6 +4,8 @@ #include // _access #include // _mkdir +using namespace Firebird; + /// The Win32 implementation of the path_utils abstraction. const char PathUtils::dir_sep = '\\'; @@ -13,84 +15,105 @@ const char PathUtils::dir_list_sep = ';'; const size_t PathUtils::curr_dir_link_len = strlen(curr_dir_link); const size_t PathUtils::up_dir_link_len = strlen(up_dir_link); -class Win32DirItr : public PathUtils::dir_iterator +class Win32DirIterator : public PathUtils::DirIterator { public: - Win32DirItr(MemoryPool& p, const Firebird::PathName& path) - : dir_iterator(p, path), dir(0), file(getPool()), done(false) + Win32DirIterator(MemoryPool& p, const PathName& path) + : DirIterator(p, path), dir(0), file(getPool()), done(false) { - Win32DirInit(); + init(); } - Win32DirItr(const Firebird::PathName& path) - : dir_iterator(path), dir(0), file(getPool()), done(false) + + Win32DirIterator(const PathName& path) + : DirIterator(path), dir(0), file(getPool()), done(false) { - Win32DirInit(); + init(); } - ~Win32DirItr(); - const PathUtils::dir_iterator& operator++(); - const Firebird::PathName& operator*() { return file; } + + ~Win32DirIterator(); + + const PathUtils::DirIterator& operator++(); + const PathName& operator*() { return file; } operator bool() { return !done; } private: HANDLE dir; WIN32_FIND_DATA fd; - Firebird::PathName file; + PathName file; bool done; - void Win32DirInit(); + void init(); }; -void Win32DirItr::Win32DirInit() +void Win32DirIterator::init() { - Firebird::PathName dirPrefix2 = dirPrefix; + PathName mask(dirPrefix); + PathUtils::ensureSeparator(mask); + mask += "*.*"; - if (dirPrefix.length() && dirPrefix[dirPrefix.length() - 1] != PathUtils::dir_sep) - dirPrefix2 = dirPrefix2 + PathUtils::dir_sep; - dirPrefix2 += "*.*"; + dir = FindFirstFile(mask.c_str(), &fd); + + if (dir == INVALID_HANDLE_VALUE) + { + if (GetLastError() != ERROR_FILE_NOT_FOUND) + system_call_failed::raise("FindFirstFile"); - dir = FindFirstFile(dirPrefix2.c_str(), &fd); - if (dir == INVALID_HANDLE_VALUE) { dir = 0; done = true; } + else if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + ++(*this); + else + PathUtils::concatPath(file, dirPrefix, fd.cFileName); } -Win32DirItr::~Win32DirItr() +Win32DirIterator::~Win32DirIterator() { if (dir) + { FindClose(dir); + dir = 0; + } - dir = 0; done = true; } -const PathUtils::dir_iterator& Win32DirItr::operator++() +const PathUtils::DirIterator& Win32DirIterator::operator++() { - if (done) - return *this; + if (!done) + { + while (true) + { + if (!FindNextFile(dir, &fd)) + { + done = true; + break; + } + else if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + break; + } - if (!FindNextFile(dir, &fd)) - done = true; - else - PathUtils::concatPath(file, dirPrefix, fd.cFileName); + if (!done) + PathUtils::concatPath(file, dirPrefix, fd.cFileName); + } return *this; } -PathUtils::dir_iterator *PathUtils::newDirItr(MemoryPool& p, const Firebird::PathName& path) +PathUtils::DirIterator* PathUtils::newDirIterator(MemoryPool& p, const PathName& path) { - return FB_NEW_POOL(p) Win32DirItr(p, path); + return FB_NEW_POOL(p) Win32DirIterator(p, path); } -void PathUtils::splitLastComponent(Firebird::PathName& path, Firebird::PathName& file, - const Firebird::PathName& orgPath) +void PathUtils::splitLastComponent(PathName& path, PathName& file, + const PathName& orgPath) { - Firebird::PathName::size_type pos = orgPath.rfind(PathUtils::dir_sep); - if (pos == Firebird::PathName::npos) + PathName::size_type pos = orgPath.rfind(PathUtils::dir_sep); + if (pos == PathName::npos) { pos = orgPath.rfind('/'); // temp hack to make it work with paths, // not expanded by ISC_expand_filename - if (pos == Firebird::PathName::npos) + if (pos == PathName::npos) { path = ""; file = orgPath; @@ -104,9 +127,9 @@ void PathUtils::splitLastComponent(Firebird::PathName& path, Firebird::PathName& file.append(orgPath, pos + 1, orgPath.length() - pos - 1); } -void PathUtils::concatPath(Firebird::PathName& result, - const Firebird::PathName& first, - const Firebird::PathName& second) +void PathUtils::concatPath(PathName& result, + const PathName& first, + const PathName& second) { if (first.length() == 0) { @@ -125,16 +148,16 @@ void PathUtils::concatPath(Firebird::PathName& result, ensureSeparator(result); - Firebird::PathName::size_type cur_pos = 0; + PathName::size_type cur_pos = 0; - for (Firebird::PathName::size_type pos = 0; cur_pos < second.length(); cur_pos = pos + 1) + for (PathName::size_type pos = 0; cur_pos < second.length(); cur_pos = pos + 1) { static const char separators[] = "/\\"; - static const Firebird::PathName::size_type separatorsLen = - static_cast(strlen(separators)); + static const PathName::size_type separatorsLen = + static_cast(strlen(separators)); pos = second.find_first_of(separators, cur_pos, separatorsLen); - if (pos == Firebird::PathName::npos) // simple name, simple handling + if (pos == PathName::npos) // simple name, simple handling pos = second.length(); if (pos == cur_pos) // Empty piece, ignore @@ -155,10 +178,10 @@ void PathUtils::concatPath(Firebird::PathName& result, continue; } - const Firebird::PathName::size_type up_dir = result.find_last_of( + const PathName::size_type up_dir = result.find_last_of( separators, result.length() - 2, separatorsLen); - if (up_dir == Firebird::PathName::npos) + if (up_dir == PathName::npos) continue; result.erase(up_dir + 1); @@ -170,7 +193,7 @@ void PathUtils::concatPath(Firebird::PathName& result, } // We don't work correctly with MBCS. -void PathUtils::ensureSeparator(Firebird::PathName& in_out) +void PathUtils::ensureSeparator(PathName& in_out) { if (in_out.length() == 0) in_out = PathUtils::dir_sep; @@ -179,14 +202,23 @@ void PathUtils::ensureSeparator(Firebird::PathName& in_out) in_out += PathUtils::dir_sep; } -static bool hasDriveLetter(const Firebird::PathName& path) +void PathUtils::fixupSeparators(char* path) +{ + for (; *path; ++path) + { + if (*path == '/') + *path = '\\'; + } +} + +static bool hasDriveLetter(const PathName& path) { return path.length() > 2 && path[1] == ':' && (('A' <= path[0] && path[0] <= 'Z') || ('a' <= path[0] && path[0] <= 'z')); } -bool PathUtils::isRelative(const Firebird::PathName& path) +bool PathUtils::isRelative(const PathName& path) { if (path.length() > 0) { @@ -196,7 +228,7 @@ bool PathUtils::isRelative(const Firebird::PathName& path) return true; } -void PathUtils::splitPrefix(Firebird::PathName& path, Firebird::PathName& prefix) +void PathUtils::splitPrefix(PathName& path, PathName& prefix) { prefix.erase(); if (hasDriveLetter(path)) @@ -213,26 +245,17 @@ void PathUtils::splitPrefix(Firebird::PathName& path, Firebird::PathName& prefix // This function can be made to return something util if we consider junctions (since w2k) // and NTFS symbolic links (since WinVista). -bool PathUtils::isSymLink(const Firebird::PathName&) +bool PathUtils::isSymLink(const PathName&) { return false; } -bool PathUtils::canAccess(const Firebird::PathName& path, int mode) +bool PathUtils::canAccess(const PathName& path, int mode) { return _access(path.c_str(), mode) == 0; } -void PathUtils::setDirIterator(char* path) -{ - for (; *path; ++path) - { - if (*path == '/') - *path = '\\'; - } -} - -int PathUtils::makeDir(const Firebird::PathName& path) +int PathUtils::makeDir(const PathName& path) { return _mkdir(path.c_str()) ? errno : 0; } diff --git a/src/dsql/DdlNodes.epp b/src/dsql/DdlNodes.epp index e585fb3656..512109e15a 100644 --- a/src/dsql/DdlNodes.epp +++ b/src/dsql/DdlNodes.epp @@ -10255,7 +10255,7 @@ void DropShadowNode::execute(thread_db* tdbb, DsqlCompilerScratch* /*dsqlScratch { if (nodelete) { - AutoSetRestoreFlag noDfw(&tdbb->tdbb_flags, TDBB_dont_post_dfw, true); + AutoSetRestoreFlag noDfw(&tdbb->tdbb_flags, TDBB_dont_post_dfw, true); MODIFY FIL USING FIL.RDB$FILE_FLAGS |= FILE_nodelete; END_MODIFY @@ -11734,7 +11734,7 @@ void GrantRevokeNode::grantRevoke(thread_db* tdbb, jrd_tra* transaction, const G char priv[2]; priv[1] = '\0'; - AutoSetRestoreFlag trDdl(&tdbb->tdbb_flags, TDBB_trusted_ddl, true); + AutoSetRestoreFlag trustedDdlFlag(&tdbb->tdbb_flags, TDBB_trusted_ddl, true); if (isGrant) { diff --git a/src/dsql/DdlNodes.h b/src/dsql/DdlNodes.h index 8732641867..67da62c6bc 100644 --- a/src/dsql/DdlNodes.h +++ b/src/dsql/DdlNodes.h @@ -1947,6 +1947,11 @@ public: virtual bool checkPermission(thread_db* tdbb, jrd_tra* transaction); virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction); + virtual bool mustBeReplicated() const + { + return false; + } + protected: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) { @@ -1977,6 +1982,11 @@ public: virtual bool checkPermission(thread_db* tdbb, jrd_tra* transaction); virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction); + virtual bool mustBeReplicated() const + { + return false; + } + protected: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) { @@ -2159,6 +2169,11 @@ public: virtual bool checkPermission(thread_db* tdbb, jrd_tra* transaction); virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction); + virtual bool mustBeReplicated() const + { + return false; + } + protected: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) { @@ -2221,6 +2236,11 @@ public: virtual bool checkPermission(thread_db* tdbb, jrd_tra* transaction); virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction); + virtual bool mustBeReplicated() const + { + return false; + } + protected: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) { @@ -2376,6 +2396,11 @@ public: virtual bool checkPermission(thread_db* tdbb, jrd_tra* transaction); virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction); + virtual bool mustBeReplicated() const + { + return false; + } + protected: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) { diff --git a/src/dsql/Nodes.h b/src/dsql/Nodes.h index b0a671ac8b..e06c92a598 100644 --- a/src/dsql/Nodes.h +++ b/src/dsql/Nodes.h @@ -264,6 +264,11 @@ public: virtual void putErrorPrefix(Firebird::Arg::StatusVector& statusVector) = 0; virtual void execute(thread_db* tdbb, DsqlCompilerScratch* dsqlScratch, jrd_tra* transaction) = 0; + + virtual bool mustBeReplicated() const + { + return true; + } }; diff --git a/src/dsql/StmtNodes.cpp b/src/dsql/StmtNodes.cpp index 4b2eaa3af5..b054681b76 100644 --- a/src/dsql/StmtNodes.cpp +++ b/src/dsql/StmtNodes.cpp @@ -36,6 +36,7 @@ #include "../jrd/extds/ExtDS.h" #include "../jrd/recsrc/RecordSource.h" #include "../jrd/recsrc/Cursor.h" +#include "../jrd/replication/Publisher.h" #include "../jrd/trace/TraceManager.h" #include "../jrd/trace/TraceJrdHelpers.h" #include "../jrd/cmp_proto.h" @@ -2628,7 +2629,10 @@ const StmtNode* EraseNode::erase(thread_db* tdbb, jrd_req* request, WhichTrigger else if (relation->isVirtual()) VirtualTable::erase(tdbb, rpb); else if (!relation->rel_view_rse) + { VIO_erase(tdbb, rpb, transaction); + REPL_erase(tdbb, rpb, transaction); + } // Handle post operation trigger. if (relation->rel_post_erase && whichTrig != PRE_TRIG) @@ -6408,6 +6412,7 @@ const StmtNode* ModifyNode::modify(thread_db* tdbb, jrd_req* request, WhichTrigg { VIO_modify(tdbb, orgRpb, newRpb, transaction); IDX_modify(tdbb, orgRpb, newRpb, transaction); + REPL_modify(tdbb, orgRpb, newRpb, transaction); } newRpb->rpb_number = orgRpb->rpb_number; @@ -7291,6 +7296,7 @@ const StmtNode* StoreNode::store(thread_db* tdbb, jrd_req* request, WhichTrigger { VIO_store(tdbb, rpb, transaction); IDX_store(tdbb, rpb, transaction); + REPL_store(tdbb, rpb, transaction); } rpb->rpb_number.setValid(true); diff --git a/src/dsql/dsql.cpp b/src/dsql/dsql.cpp index ca62a3eb0a..8623f22e33 100644 --- a/src/dsql/dsql.cpp +++ b/src/dsql/dsql.cpp @@ -64,6 +64,7 @@ #include "../jrd/opt_proto.h" #include "../jrd/tra_proto.h" #include "../jrd/recsrc/RecordSource.h" +#include "../jrd/replication/Publisher.h" #include "../jrd/trace/TraceManager.h" #include "../jrd/trace/TraceDSQLHelpers.h" #include "../common/classes/init.h" @@ -878,7 +879,12 @@ void DsqlDdlRequest::execute(thread_db* tdbb, jrd_tra** traHandle, try { + AutoSetRestoreFlag execDdl(&tdbb->tdbb_flags, TDBB_repl_sql, true); + node->executeDdl(tdbb, internalScratch, req_transaction); + + if (node->mustBeReplicated()) + REPL_exec_sql(tdbb, req_transaction, *getStatement()->getSqlText()); } catch (status_exception& ex) { diff --git a/src/include/consts_pub.h b/src/include/consts_pub.h index 13e05830f0..2caa31045f 100644 --- a/src/include/consts_pub.h +++ b/src/include/consts_pub.h @@ -124,6 +124,7 @@ #define isc_dpb_reset_icu 89 #define isc_dpb_map_attach 90 #define isc_dpb_session_time_zone 91 +#define isc_dpb_set_db_replica 92 /**************************************************/ /* clumplet tags used inside isc_dpb_address_path */ diff --git a/src/include/firebird/FirebirdInterface.idl b/src/include/firebird/FirebirdInterface.idl index 30a9940be7..46fbd6bc75 100644 --- a/src/include/firebird/FirebirdInterface.idl +++ b/src/include/firebird/FirebirdInterface.idl @@ -29,6 +29,7 @@ typedef ISC_DATE; typedef ISC_QUAD; typedef ISC_TIME; +typedef ISC_TIMESTAMP; typedef ISC_TIME_TZ; typedef ISC_TIMESTAMP_TZ; typedef FB_DEC16; @@ -517,6 +518,23 @@ interface Pipe : ReferenceCounted void close(Status status); } */ +/* +interface ReplicationBatch : Versioned +{ + void process(Status status, ReplicationSession replicator); + const string getDatabaseID(); + uint64 getTransactionID(); + ISC_TIMESTAMP getTimestamp(); +} +*/ +interface Replicator : ReferenceCounted +{ +/* + void process(Status status, ReplicationBatch batch); +*/ + void process(Status status, uint length, const uchar* data); + void close(Status status); +} interface Request : ReferenceCounted { @@ -602,6 +620,8 @@ version: // 3.0 => 4.0 Transaction transaction, MessageMetadata inMetadata, void* inBuffer, MessageMetadata outMetadata, uint parLength, const uchar* par); */ + + Replicator createReplicator(Status status); } interface Service : ReferenceCounted @@ -643,7 +663,7 @@ interface Dtc : Versioned } -//Interfaces, used by authentication plugins +// Interfaces, used by authentication plugins interface Auth : PluginBase { @@ -1487,3 +1507,51 @@ interface DecFloat34 : Versioned void fromBcd(int sign, const uchar* bcd, int exp, FB_DEC34* to); void fromString(Status status, const string from, FB_DEC34* to); } + +// Replication interfaces + +interface ReplicatedRecord : Versioned +{ + uint getRawLength(); + const uchar* getRawData(); +} + +interface ReplicatedBlob : Versioned +{ + uint getLength(); + boolean isEof(); + uint getSegment(uint length, uchar* buffer); +} + +interface ReplicatedTransaction : Disposable +{ + boolean prepare(); + boolean commit(); + boolean rollback(); + + boolean startSavepoint(); + boolean releaseSavepoint(); + boolean rollbackSavepoint(); + + boolean insertRecord(const string name, + ReplicatedRecord record); + boolean updateRecord(const string name, + ReplicatedRecord orgRecord, + ReplicatedRecord newRecord); + boolean deleteRecord(const string name, + ReplicatedRecord record); + + boolean storeBlob(ISC_QUAD blobId, ReplicatedBlob blob); + + boolean executeSql(const string sql); +} + +interface ReplicatedSession : Disposable +{ + Status getStatus(); + + ReplicatedTransaction startTransaction(int64 number); + boolean cleanupTransaction(int64 number); + + boolean setSequence(const string name, int64 value); +} diff --git a/src/include/firebird/IdlFbInterfaces.h b/src/include/firebird/IdlFbInterfaces.h index 1a6884c19e..d976c4f7cc 100644 --- a/src/include/firebird/IdlFbInterfaces.h +++ b/src/include/firebird/IdlFbInterfaces.h @@ -51,6 +51,7 @@ namespace Firebird class IStatement; class IBatch; class IBatchCompletionState; + class IReplicator; class IRequest; class IEvents; class IAttachment; @@ -115,6 +116,10 @@ namespace Firebird class IUdrPlugin; class IDecFloat16; class IDecFloat34; + class IReplicatedRecord; + class IReplicatedBlob; + class IReplicatedTransaction; + class IReplicatedSession; // Interfaces declarations @@ -1935,6 +1940,43 @@ namespace Firebird } }; + class IReplicator : public IReferenceCounted + { + public: + struct VTable : public IReferenceCounted::VTable + { + void (CLOOP_CARG *process)(IReplicator* self, IStatus* status, unsigned length, const unsigned char* data) throw(); + void (CLOOP_CARG *close)(IReplicator* self, IStatus* status) throw(); + }; + + protected: + IReplicator(DoNotInherit) + : IReferenceCounted(DoNotInherit()) + { + } + + ~IReplicator() + { + } + + public: + static const unsigned VERSION = 3; + + template void process(StatusType* status, unsigned length, const unsigned char* data) + { + StatusType::clearException(status); + static_cast(this->cloopVTable)->process(this, status, length, data); + StatusType::checkException(status); + } + + template void close(StatusType* status) + { + StatusType::clearException(status); + static_cast(this->cloopVTable)->close(this, status); + StatusType::checkException(status); + } + }; + class IRequest : public IReferenceCounted { public: @@ -2069,6 +2111,7 @@ namespace Firebird unsigned (CLOOP_CARG *getStatementTimeout)(IAttachment* self, IStatus* status) throw(); void (CLOOP_CARG *setStatementTimeout)(IAttachment* self, IStatus* status, unsigned timeOut) throw(); IBatch* (CLOOP_CARG *createBatch)(IAttachment* self, IStatus* status, ITransaction* transaction, unsigned stmtLength, const char* sqlStmt, unsigned dialect, IMessageMetadata* inMetadata, unsigned parLength, const unsigned char* par) throw(); + IReplicator* (CLOOP_CARG *createReplicator)(IAttachment* self, IStatus* status) throw(); }; protected: @@ -2287,6 +2330,20 @@ namespace Firebird StatusType::checkException(status); return ret; } + + template IReplicator* createReplicator(StatusType* status) + { + if (cloopVTable->version < 4) + { + StatusType::setVersionError(status, "IAttachment", cloopVTable->version, 4); + StatusType::checkException(status); + return 0; + } + StatusType::clearException(status); + IReplicator* ret = static_cast(this->cloopVTable)->createReplicator(this, status); + StatusType::checkException(status); + return ret; + } }; class IService : public IReferenceCounted @@ -5769,6 +5826,230 @@ namespace Firebird } }; + class IReplicatedRecord : public IVersioned + { + public: + struct VTable : public IVersioned::VTable + { + unsigned (CLOOP_CARG *getRawLength)(IReplicatedRecord* self) throw(); + const unsigned char* (CLOOP_CARG *getRawData)(IReplicatedRecord* self) throw(); + }; + + protected: + IReplicatedRecord(DoNotInherit) + : IVersioned(DoNotInherit()) + { + } + + ~IReplicatedRecord() + { + } + + public: + static const unsigned VERSION = 2; + + unsigned getRawLength() + { + unsigned ret = static_cast(this->cloopVTable)->getRawLength(this); + return ret; + } + + const unsigned char* getRawData() + { + const unsigned char* ret = static_cast(this->cloopVTable)->getRawData(this); + return ret; + } + }; + + class IReplicatedBlob : public IVersioned + { + public: + struct VTable : public IVersioned::VTable + { + unsigned (CLOOP_CARG *getLength)(IReplicatedBlob* self) throw(); + FB_BOOLEAN (CLOOP_CARG *isEof)(IReplicatedBlob* self) throw(); + unsigned (CLOOP_CARG *getSegment)(IReplicatedBlob* self, unsigned length, unsigned char* buffer) throw(); + }; + + protected: + IReplicatedBlob(DoNotInherit) + : IVersioned(DoNotInherit()) + { + } + + ~IReplicatedBlob() + { + } + + public: + static const unsigned VERSION = 2; + + unsigned getLength() + { + unsigned ret = static_cast(this->cloopVTable)->getLength(this); + return ret; + } + + FB_BOOLEAN isEof() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->isEof(this); + return ret; + } + + unsigned getSegment(unsigned length, unsigned char* buffer) + { + unsigned ret = static_cast(this->cloopVTable)->getSegment(this, length, buffer); + return ret; + } + }; + + class IReplicatedTransaction : public IDisposable + { + public: + struct VTable : public IDisposable::VTable + { + FB_BOOLEAN (CLOOP_CARG *prepare)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *commit)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *rollback)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *startSavepoint)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *releaseSavepoint)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *rollbackSavepoint)(IReplicatedTransaction* self) throw(); + FB_BOOLEAN (CLOOP_CARG *insertRecord)(IReplicatedTransaction* self, const char* name, IReplicatedRecord* record) throw(); + FB_BOOLEAN (CLOOP_CARG *updateRecord)(IReplicatedTransaction* self, const char* name, IReplicatedRecord* orgRecord, IReplicatedRecord* newRecord) throw(); + FB_BOOLEAN (CLOOP_CARG *deleteRecord)(IReplicatedTransaction* self, const char* name, IReplicatedRecord* record) throw(); + FB_BOOLEAN (CLOOP_CARG *storeBlob)(IReplicatedTransaction* self, ISC_QUAD blobId, IReplicatedBlob* blob) throw(); + FB_BOOLEAN (CLOOP_CARG *executeSql)(IReplicatedTransaction* self, const char* sql) throw(); + }; + + protected: + IReplicatedTransaction(DoNotInherit) + : IDisposable(DoNotInherit()) + { + } + + ~IReplicatedTransaction() + { + } + + public: + static const unsigned VERSION = 3; + + FB_BOOLEAN prepare() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->prepare(this); + return ret; + } + + FB_BOOLEAN commit() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->commit(this); + return ret; + } + + FB_BOOLEAN rollback() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->rollback(this); + return ret; + } + + FB_BOOLEAN startSavepoint() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->startSavepoint(this); + return ret; + } + + FB_BOOLEAN releaseSavepoint() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->releaseSavepoint(this); + return ret; + } + + FB_BOOLEAN rollbackSavepoint() + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->rollbackSavepoint(this); + return ret; + } + + FB_BOOLEAN insertRecord(const char* name, IReplicatedRecord* record) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->insertRecord(this, name, record); + return ret; + } + + FB_BOOLEAN updateRecord(const char* name, IReplicatedRecord* orgRecord, IReplicatedRecord* newRecord) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->updateRecord(this, name, orgRecord, newRecord); + return ret; + } + + FB_BOOLEAN deleteRecord(const char* name, IReplicatedRecord* record) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->deleteRecord(this, name, record); + return ret; + } + + FB_BOOLEAN storeBlob(ISC_QUAD blobId, IReplicatedBlob* blob) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->storeBlob(this, blobId, blob); + return ret; + } + + FB_BOOLEAN executeSql(const char* sql) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->executeSql(this, sql); + return ret; + } + }; + + class IReplicatedSession : public IDisposable + { + public: + struct VTable : public IDisposable::VTable + { + IStatus* (CLOOP_CARG *getStatus)(IReplicatedSession* self) throw(); + IReplicatedTransaction* (CLOOP_CARG *startTransaction)(IReplicatedSession* self, ISC_INT64 number) throw(); + FB_BOOLEAN (CLOOP_CARG *cleanupTransaction)(IReplicatedSession* self, ISC_INT64 number) throw(); + FB_BOOLEAN (CLOOP_CARG *setSequence)(IReplicatedSession* self, const char* name, ISC_INT64 value) throw(); + }; + + protected: + IReplicatedSession(DoNotInherit) + : IDisposable(DoNotInherit()) + { + } + + ~IReplicatedSession() + { + } + + public: + static const unsigned VERSION = 3; + + IStatus* getStatus() + { + IStatus* ret = static_cast(this->cloopVTable)->getStatus(this); + return ret; + } + + IReplicatedTransaction* startTransaction(ISC_INT64 number) + { + IReplicatedTransaction* ret = static_cast(this->cloopVTable)->startTransaction(this, number); + return ret; + } + + FB_BOOLEAN cleanupTransaction(ISC_INT64 number) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->cleanupTransaction(this, number); + return ret; + } + + FB_BOOLEAN setSequence(const char* name, ISC_INT64 value) + { + FB_BOOLEAN ret = static_cast(this->cloopVTable)->setSequence(this, name, value); + return ret; + } + }; + // Interfaces implementations template @@ -9428,6 +9709,100 @@ namespace Firebird virtual void getStatus(StatusType* status, IStatus* to, unsigned pos) = 0; }; + template + class IReplicatorBaseImpl : public Base + { + public: + typedef IReplicator Declaration; + + IReplicatorBaseImpl(DoNotInherit = DoNotInherit()) + { + static struct VTableImpl : Base::VTable + { + VTableImpl() + { + this->version = Base::VERSION; + this->addRef = &Name::cloopaddRefDispatcher; + this->release = &Name::cloopreleaseDispatcher; + this->process = &Name::cloopprocessDispatcher; + this->close = &Name::cloopcloseDispatcher; + } + } vTable; + + this->cloopVTable = &vTable; + } + + static void CLOOP_CARG cloopprocessDispatcher(IReplicator* self, IStatus* status, unsigned length, const unsigned char* data) throw() + { + StatusType status2(status); + + try + { + static_cast(self)->Name::process(&status2, length, data); + } + catch (...) + { + StatusType::catchException(&status2); + } + } + + static void CLOOP_CARG cloopcloseDispatcher(IReplicator* self, IStatus* status) throw() + { + StatusType status2(status); + + try + { + static_cast(self)->Name::close(&status2); + } + catch (...) + { + StatusType::catchException(&status2); + } + } + + static void CLOOP_CARG cloopaddRefDispatcher(IReferenceCounted* self) throw() + { + try + { + static_cast(self)->Name::addRef(); + } + catch (...) + { + StatusType::catchException(0); + } + } + + static int CLOOP_CARG cloopreleaseDispatcher(IReferenceCounted* self) throw() + { + try + { + return static_cast(self)->Name::release(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + }; + + template > > > > + class IReplicatorImpl : public IReplicatorBaseImpl + { + protected: + IReplicatorImpl(DoNotInherit = DoNotInherit()) + { + } + + public: + virtual ~IReplicatorImpl() + { + } + + virtual void process(StatusType* status, unsigned length, const unsigned char* data) = 0; + virtual void close(StatusType* status) = 0; + }; + template class IRequestBaseImpl : public Base { @@ -9718,6 +10093,7 @@ namespace Firebird this->getStatementTimeout = &Name::cloopgetStatementTimeoutDispatcher; this->setStatementTimeout = &Name::cloopsetStatementTimeoutDispatcher; this->createBatch = &Name::cloopcreateBatchDispatcher; + this->createReplicator = &Name::cloopcreateReplicatorDispatcher; } } vTable; @@ -10059,6 +10435,21 @@ namespace Firebird } } + static IReplicator* CLOOP_CARG cloopcreateReplicatorDispatcher(IAttachment* self, IStatus* status) throw() + { + StatusType status2(status); + + try + { + return static_cast(self)->Name::createReplicator(&status2); + } + catch (...) + { + StatusType::catchException(&status2); + return static_cast(0); + } + } + static void CLOOP_CARG cloopaddRefDispatcher(IReferenceCounted* self) throw() { try @@ -10121,6 +10512,7 @@ namespace Firebird virtual unsigned getStatementTimeout(StatusType* status) = 0; virtual void setStatementTimeout(StatusType* status, unsigned timeOut) = 0; virtual IBatch* createBatch(StatusType* status, ITransaction* transaction, unsigned stmtLength, const char* sqlStmt, unsigned dialect, IMessageMetadata* inMetadata, unsigned parLength, const unsigned char* par) = 0; + virtual IReplicator* createReplicator(StatusType* status) = 0; }; template @@ -17624,6 +18016,472 @@ namespace Firebird virtual void fromBcd(int sign, const unsigned char* bcd, int exp, FB_DEC34* to) = 0; virtual void fromString(StatusType* status, const char* from, FB_DEC34* to) = 0; }; + + template + class IReplicatedRecordBaseImpl : public Base + { + public: + typedef IReplicatedRecord Declaration; + + IReplicatedRecordBaseImpl(DoNotInherit = DoNotInherit()) + { + static struct VTableImpl : Base::VTable + { + VTableImpl() + { + this->version = Base::VERSION; + this->getRawLength = &Name::cloopgetRawLengthDispatcher; + this->getRawData = &Name::cloopgetRawDataDispatcher; + } + } vTable; + + this->cloopVTable = &vTable; + } + + static unsigned CLOOP_CARG cloopgetRawLengthDispatcher(IReplicatedRecord* self) throw() + { + try + { + return static_cast(self)->Name::getRawLength(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static const unsigned char* CLOOP_CARG cloopgetRawDataDispatcher(IReplicatedRecord* self) throw() + { + try + { + return static_cast(self)->Name::getRawData(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + }; + + template > > + class IReplicatedRecordImpl : public IReplicatedRecordBaseImpl + { + protected: + IReplicatedRecordImpl(DoNotInherit = DoNotInherit()) + { + } + + public: + virtual ~IReplicatedRecordImpl() + { + } + + virtual unsigned getRawLength() = 0; + virtual const unsigned char* getRawData() = 0; + }; + + template + class IReplicatedBlobBaseImpl : public Base + { + public: + typedef IReplicatedBlob Declaration; + + IReplicatedBlobBaseImpl(DoNotInherit = DoNotInherit()) + { + static struct VTableImpl : Base::VTable + { + VTableImpl() + { + this->version = Base::VERSION; + this->getLength = &Name::cloopgetLengthDispatcher; + this->isEof = &Name::cloopisEofDispatcher; + this->getSegment = &Name::cloopgetSegmentDispatcher; + } + } vTable; + + this->cloopVTable = &vTable; + } + + static unsigned CLOOP_CARG cloopgetLengthDispatcher(IReplicatedBlob* self) throw() + { + try + { + return static_cast(self)->Name::getLength(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopisEofDispatcher(IReplicatedBlob* self) throw() + { + try + { + return static_cast(self)->Name::isEof(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static unsigned CLOOP_CARG cloopgetSegmentDispatcher(IReplicatedBlob* self, unsigned length, unsigned char* buffer) throw() + { + try + { + return static_cast(self)->Name::getSegment(length, buffer); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + }; + + template > > + class IReplicatedBlobImpl : public IReplicatedBlobBaseImpl + { + protected: + IReplicatedBlobImpl(DoNotInherit = DoNotInherit()) + { + } + + public: + virtual ~IReplicatedBlobImpl() + { + } + + virtual unsigned getLength() = 0; + virtual FB_BOOLEAN isEof() = 0; + virtual unsigned getSegment(unsigned length, unsigned char* buffer) = 0; + }; + + template + class IReplicatedTransactionBaseImpl : public Base + { + public: + typedef IReplicatedTransaction Declaration; + + IReplicatedTransactionBaseImpl(DoNotInherit = DoNotInherit()) + { + static struct VTableImpl : Base::VTable + { + VTableImpl() + { + this->version = Base::VERSION; + this->dispose = &Name::cloopdisposeDispatcher; + this->prepare = &Name::cloopprepareDispatcher; + this->commit = &Name::cloopcommitDispatcher; + this->rollback = &Name::clooprollbackDispatcher; + this->startSavepoint = &Name::cloopstartSavepointDispatcher; + this->releaseSavepoint = &Name::cloopreleaseSavepointDispatcher; + this->rollbackSavepoint = &Name::clooprollbackSavepointDispatcher; + this->insertRecord = &Name::cloopinsertRecordDispatcher; + this->updateRecord = &Name::cloopupdateRecordDispatcher; + this->deleteRecord = &Name::cloopdeleteRecordDispatcher; + this->storeBlob = &Name::cloopstoreBlobDispatcher; + this->executeSql = &Name::cloopexecuteSqlDispatcher; + } + } vTable; + + this->cloopVTable = &vTable; + } + + static FB_BOOLEAN CLOOP_CARG cloopprepareDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::prepare(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopcommitDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::commit(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG clooprollbackDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::rollback(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopstartSavepointDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::startSavepoint(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopreleaseSavepointDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::releaseSavepoint(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG clooprollbackSavepointDispatcher(IReplicatedTransaction* self) throw() + { + try + { + return static_cast(self)->Name::rollbackSavepoint(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopinsertRecordDispatcher(IReplicatedTransaction* self, const char* name, IReplicatedRecord* record) throw() + { + try + { + return static_cast(self)->Name::insertRecord(name, record); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopupdateRecordDispatcher(IReplicatedTransaction* self, const char* name, IReplicatedRecord* orgRecord, IReplicatedRecord* newRecord) throw() + { + try + { + return static_cast(self)->Name::updateRecord(name, orgRecord, newRecord); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopdeleteRecordDispatcher(IReplicatedTransaction* self, const char* name, IReplicatedRecord* record) throw() + { + try + { + return static_cast(self)->Name::deleteRecord(name, record); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopstoreBlobDispatcher(IReplicatedTransaction* self, ISC_QUAD blobId, IReplicatedBlob* blob) throw() + { + try + { + return static_cast(self)->Name::storeBlob(blobId, blob); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopexecuteSqlDispatcher(IReplicatedTransaction* self, const char* sql) throw() + { + try + { + return static_cast(self)->Name::executeSql(sql); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static void CLOOP_CARG cloopdisposeDispatcher(IDisposable* self) throw() + { + try + { + static_cast(self)->Name::dispose(); + } + catch (...) + { + StatusType::catchException(0); + } + } + }; + + template > > > > + class IReplicatedTransactionImpl : public IReplicatedTransactionBaseImpl + { + protected: + IReplicatedTransactionImpl(DoNotInherit = DoNotInherit()) + { + } + + public: + virtual ~IReplicatedTransactionImpl() + { + } + + virtual FB_BOOLEAN prepare() = 0; + virtual FB_BOOLEAN commit() = 0; + virtual FB_BOOLEAN rollback() = 0; + virtual FB_BOOLEAN startSavepoint() = 0; + virtual FB_BOOLEAN releaseSavepoint() = 0; + virtual FB_BOOLEAN rollbackSavepoint() = 0; + virtual FB_BOOLEAN insertRecord(const char* name, IReplicatedRecord* record) = 0; + virtual FB_BOOLEAN updateRecord(const char* name, IReplicatedRecord* orgRecord, IReplicatedRecord* newRecord) = 0; + virtual FB_BOOLEAN deleteRecord(const char* name, IReplicatedRecord* record) = 0; + virtual FB_BOOLEAN storeBlob(ISC_QUAD blobId, IReplicatedBlob* blob) = 0; + virtual FB_BOOLEAN executeSql(const char* sql) = 0; + }; + + template + class IReplicatedSessionBaseImpl : public Base + { + public: + typedef IReplicatedSession Declaration; + + IReplicatedSessionBaseImpl(DoNotInherit = DoNotInherit()) + { + static struct VTableImpl : Base::VTable + { + VTableImpl() + { + this->version = Base::VERSION; + this->dispose = &Name::cloopdisposeDispatcher; + this->getStatus = &Name::cloopgetStatusDispatcher; + this->startTransaction = &Name::cloopstartTransactionDispatcher; + this->cleanupTransaction = &Name::cloopcleanupTransactionDispatcher; + this->setSequence = &Name::cloopsetSequenceDispatcher; + } + } vTable; + + this->cloopVTable = &vTable; + } + + static IStatus* CLOOP_CARG cloopgetStatusDispatcher(IReplicatedSession* self) throw() + { + try + { + return static_cast(self)->Name::getStatus(); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static IReplicatedTransaction* CLOOP_CARG cloopstartTransactionDispatcher(IReplicatedSession* self, ISC_INT64 number) throw() + { + try + { + return static_cast(self)->Name::startTransaction(number); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopcleanupTransactionDispatcher(IReplicatedSession* self, ISC_INT64 number) throw() + { + try + { + return static_cast(self)->Name::cleanupTransaction(number); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static FB_BOOLEAN CLOOP_CARG cloopsetSequenceDispatcher(IReplicatedSession* self, const char* name, ISC_INT64 value) throw() + { + try + { + return static_cast(self)->Name::setSequence(name, value); + } + catch (...) + { + StatusType::catchException(0); + return static_cast(0); + } + } + + static void CLOOP_CARG cloopdisposeDispatcher(IDisposable* self) throw() + { + try + { + static_cast(self)->Name::dispose(); + } + catch (...) + { + StatusType::catchException(0); + } + } + }; + + template > > > > + class IReplicatedSessionImpl : public IReplicatedSessionBaseImpl + { + protected: + IReplicatedSessionImpl(DoNotInherit = DoNotInherit()) + { + } + + public: + virtual ~IReplicatedSessionImpl() + { + } + + virtual IStatus* getStatus() = 0; + virtual IReplicatedTransaction* startTransaction(ISC_INT64 number) = 0; + virtual FB_BOOLEAN cleanupTransaction(ISC_INT64 number) = 0; + virtual FB_BOOLEAN setSequence(const char* name, ISC_INT64 value) = 0; + }; }; diff --git a/src/include/gen/codetext.h b/src/include/gen/codetext.h index 6bddf29b5f..453aba8661 100644 --- a/src/include/gen/codetext.h +++ b/src/include/gen/codetext.h @@ -952,6 +952,7 @@ static const struct { {"tom_rsa_sign", 335545248}, {"tom_rsa_verify", 335545249}, {"tom_chacha_key", 335545250}, + {"bad_repl_handle", 335545251}, {"gfix_db_name", 335740929}, {"gfix_invalid_sw", 335740930}, {"gfix_incmp_sw", 335740932}, diff --git a/src/include/gen/iberror.h b/src/include/gen/iberror.h index 81cb8ef5ea..51c547e4b1 100644 --- a/src/include/gen/iberror.h +++ b/src/include/gen/iberror.h @@ -986,6 +986,7 @@ const ISC_STATUS isc_tom_rsa_export = 335545247L; const ISC_STATUS isc_tom_rsa_sign = 335545248L; const ISC_STATUS isc_tom_rsa_verify = 335545249L; const ISC_STATUS isc_tom_chacha_key = 335545250L; +const ISC_STATUS isc_bad_repl_handle = 335545251L; const ISC_STATUS isc_gfix_db_name = 335740929L; const ISC_STATUS isc_gfix_invalid_sw = 335740930L; const ISC_STATUS isc_gfix_incmp_sw = 335740932L; @@ -1460,7 +1461,7 @@ const ISC_STATUS isc_trace_switch_user_only = 337182757L; const ISC_STATUS isc_trace_switch_param_miss = 337182758L; const ISC_STATUS isc_trace_param_act_notcompat = 337182759L; const ISC_STATUS isc_trace_mandatory_switch_miss = 337182760L; -const ISC_STATUS isc_err_max = 1404; +const ISC_STATUS isc_err_max = 1405; #else /* c definitions */ @@ -2416,6 +2417,7 @@ const ISC_STATUS isc_err_max = 1404; #define isc_tom_rsa_sign 335545248L #define isc_tom_rsa_verify 335545249L #define isc_tom_chacha_key 335545250L +#define isc_bad_repl_handle 335545251L #define isc_gfix_db_name 335740929L #define isc_gfix_invalid_sw 335740930L #define isc_gfix_incmp_sw 335740932L @@ -2890,7 +2892,7 @@ const ISC_STATUS isc_err_max = 1404; #define isc_trace_switch_param_miss 337182758L #define isc_trace_param_act_notcompat 337182759L #define isc_trace_mandatory_switch_miss 337182760L -#define isc_err_max 1404 +#define isc_err_max 1405 #endif diff --git a/src/include/gen/msgs.h b/src/include/gen/msgs.h index 1755878f8f..066c292b48 100644 --- a/src/include/gen/msgs.h +++ b/src/include/gen/msgs.h @@ -955,6 +955,7 @@ Data source : @4"}, /* eds_statement */ {335545248, "RSA-signing data"}, /* tom_rsa_sign */ {335545249, "Verifying RSA-signed data"}, /* tom_rsa_verify */ {335545250, "Invalid key length @1, need 16 or 32"}, /* tom_chacha_key */ + {335545251, "invalid replicator handle"}, /* bad_repl_handle */ {335740929, "data base file name (@1) already given"}, /* gfix_db_name */ {335740930, "invalid switch @1"}, /* gfix_invalid_sw */ {335740932, "incompatible switch combination"}, /* gfix_incmp_sw */ diff --git a/src/include/gen/sql_code.h b/src/include/gen/sql_code.h index a28647ecf0..855e9aa2bc 100644 --- a/src/include/gen/sql_code.h +++ b/src/include/gen/sql_code.h @@ -951,6 +951,7 @@ static const struct { {335545248, -901}, /* 928 tom_rsa_sign */ {335545249, -901}, /* 929 tom_rsa_verify */ {335545250, -901}, /* 930 tom_chacha_key */ + {335545251, -901}, /* 931 bad_repl_handle */ {335740929, -901}, /* 1 gfix_db_name */ {335740930, -901}, /* 2 gfix_invalid_sw */ {335740932, -901}, /* 4 gfix_incmp_sw */ diff --git a/src/include/gen/sql_state.h b/src/include/gen/sql_state.h index ef314ce5e1..ea42e53306 100644 --- a/src/include/gen/sql_state.h +++ b/src/include/gen/sql_state.h @@ -951,6 +951,7 @@ static const struct { {335545248, "22023"}, // 928 tom_rsa_sign {335545249, "22023"}, // 929 tom_rsa_verify {335545250, "22023"}, // 930 tom_chacha_key + {335545251, "08003"}, // 931 bad_repl_handle {335740929, "00000"}, // 1 gfix_db_name {335740930, "00000"}, // 2 gfix_invalid_sw {335740932, "00000"}, // 4 gfix_incmp_sw diff --git a/src/jrd/Attachment.cpp b/src/jrd/Attachment.cpp index 7e1a121f6f..13780c9341 100644 --- a/src/jrd/Attachment.cpp +++ b/src/jrd/Attachment.cpp @@ -234,6 +234,7 @@ Jrd::Attachment::Attachment(MemoryPool* pool, Database* dbb) att_utility(UTIL_NONE), att_procedures(*pool), att_functions(*pool), + att_generators(*pool), att_internal(*pool), att_dyn_req(*pool), att_dec_status(DecimalStatus::DEFAULT), diff --git a/src/jrd/Attachment.h b/src/jrd/Attachment.h index 093b5124a8..fac5644baa 100644 --- a/src/jrd/Attachment.h +++ b/src/jrd/Attachment.h @@ -49,6 +49,11 @@ namespace EDS { class Connection; } +namespace Replication +{ + class TableMatcher; +} + class CharSetContainer; namespace Jrd @@ -84,6 +89,7 @@ namespace Jrd class Function; class JrdStatement; class Validation; + class Applier; struct DSqlCacheItem { @@ -294,6 +300,55 @@ public: Firebird::RefPtr jStable; }; + class GeneratorFinder + { + public: + explicit GeneratorFinder(MemoryPool& pool) + : m_objects(pool) + {} + + void store(SLONG id, const Firebird::MetaName& name) + { + fb_assert(id >= 0); + fb_assert(name.hasData()); + + if (id < (int) m_objects.getCount()) + { + fb_assert(m_objects[id].isEmpty()); + m_objects[id] = name; + } + else + { + m_objects.resize(id + 1); + m_objects[id] = name; + } + } + + bool lookup(SLONG id, Firebird::MetaName& name) + { + if (id < (int) m_objects.getCount()) + { + name = m_objects[id]; + return true; + } + + return false; + } + + SLONG lookup(const Firebird::MetaName& name) + { + FB_SIZE_T pos; + + if (m_objects.find(name, pos)) + return (SLONG) pos; + + return false; + } + + private: + Firebird::Array m_objects; + }; + public: static Attachment* create(Database* dbb); static void destroy(Attachment* const attachment); @@ -366,6 +421,10 @@ public: USHORT att_original_timezone; USHORT att_current_timezone; + Firebird::IReplicatedSession* att_replicator; + Firebird::AutoPtr att_repl_matcher; + Firebird::AutoPtr att_repl_applier; + enum UtilType { UTIL_NONE, UTIL_GBAK, UTIL_GFIX, UTIL_GSTAT }; UtilType att_utility; @@ -377,6 +436,7 @@ public: TrigVector* att_triggers[DB_TRIGGER_MAX]; TrigVector* att_ddl_triggers; Firebird::Array att_functions; // User defined functions + GeneratorFinder att_generators; Firebird::Array att_internal; // internal statements Firebird::Array att_dyn_req; // internal dyn statements @@ -414,6 +474,11 @@ public: jrd_tra* getSysTransaction(); void setSysTransaction(jrd_tra* trans); // used only by TRA_init + bool isSystem() const + { + return (att_flags & ATT_system); + } + bool isGbak() const; bool isRWGbak() const; bool isUtility() const; // gbak, gfix and gstat. diff --git a/src/jrd/Database.cpp b/src/jrd/Database.cpp index dcc34f6f82..be2492be20 100644 --- a/src/jrd/Database.cpp +++ b/src/jrd/Database.cpp @@ -33,6 +33,7 @@ #include "../jrd/Database.h" #include "../jrd/nbak.h" #include "../jrd/tra.h" +#include "../jrd/pag_proto.h" #include "../jrd/tpc_proto.h" #include "../jrd/lck_proto.h" #include "../jrd/CryptoManager.h" @@ -320,4 +321,34 @@ namespace Jrd reset(); } + void Database::ensureGuid(thread_db* tdbb) + { + if (readOnly()) + return; + + if (!dbb_guid.alignment) // hackery way to check whether it was loaded + { + GenerateGuid(&dbb_guid); + PAG_set_db_guid(tdbb, dbb_guid); + } + } + + FB_UINT64 Database::getReplSequence(thread_db* tdbb) + { + USHORT length = sizeof(FB_UINT64); + if (!PAG_get_clump(tdbb, Ods::HDR_repl_seq, &length, (UCHAR*) &dbb_repl_sequence)) + return 0; + + return dbb_repl_sequence; + } + + void Database::setReplSequence(thread_db* tdbb, FB_UINT64 sequence) + { + if (dbb_repl_sequence != sequence) + { + PAG_set_repl_sequence(tdbb, sequence); + dbb_repl_sequence = sequence; + } + } + } // namespace diff --git a/src/jrd/Database.h b/src/jrd/Database.h index 383be794e6..eacbe3857f 100644 --- a/src/jrd/Database.h +++ b/src/jrd/Database.h @@ -328,6 +328,8 @@ public: return fb_utils::genUniqueId(); } + Firebird::Guid dbb_guid; // database GUID + Firebird::SyncObject dbb_sync; Firebird::SyncObject dbb_sys_attach; // synchronize operations with dbb_sys_attachments @@ -435,6 +437,9 @@ public: time_t dbb_linger_end; Firebird::RefPtr dbb_plugin_config; + FB_UINT64 dbb_repl_sequence; // replication sequence + ReplicaMode dbb_replica_mode; // replica access mode + // returns true if primary file is located on raw device bool onRawDevice() const; @@ -462,6 +467,16 @@ public: void registerModule(Module&); + bool isReplica() const + { + return (dbb_replica_mode != REPLICA_NONE); + } + + bool isReplica(ReplicaMode mode) const + { + return (dbb_replica_mode == mode); + } + private: Database(MemoryPool* p, Firebird::IPluginConfig* pConf, bool shared) : dbb_permanent(p), @@ -486,7 +501,9 @@ private: dbb_init_fini(FB_NEW_POOL(*getDefaultMemoryPool()) ExistenceRefMutex()), dbb_linger_seconds(0), dbb_linger_end(0), - dbb_plugin_config(pConf) + dbb_plugin_config(pConf), + dbb_repl_sequence(0), + dbb_replica_mode(REPLICA_NONE) { dbb_pools.add(p); } @@ -520,6 +537,10 @@ public: static void garbage_collector(Database* dbb); void exceptionHandler(const Firebird::Exception& ex, ThreadFinishSync::ThreadRoutine* routine); + void ensureGuid(thread_db* tdbb); + FB_UINT64 getReplSequence(thread_db* tdbb); + void setReplSequence(thread_db* tdbb, FB_UINT64 sequence); + private: //static int blockingAstSharedCounter(void*); static int blocking_ast_sweep(void* ast_object); diff --git a/src/jrd/EngineInterface.h b/src/jrd/EngineInterface.h index 79da92d1f0..9bb926d134 100644 --- a/src/jrd/EngineInterface.h +++ b/src/jrd/EngineInterface.h @@ -223,6 +223,34 @@ private: void freeEngineData(Firebird::CheckStatusWrapper* status); }; +class JReplicator FB_FINAL : + public Firebird::RefCntIface > +{ +public: + // IReplicator implementation + int release(); + void process(Firebird::CheckStatusWrapper* status, unsigned length, const unsigned char* data); + void close(Firebird::CheckStatusWrapper* status); + +public: + JReplicator(StableAttachmentPart* sa); + + StableAttachmentPart* getAttachment() + { + return sAtt; + } + + JReplicator* getHandle() throw() + { + return this; + } + +private: + Firebird::RefPtr sAtt; + + void freeEngineData(Firebird::CheckStatusWrapper* status); +}; + class JStatement FB_FINAL : public Firebird::RefCntIface > { @@ -399,6 +427,7 @@ public: Firebird::IBatch* createBatch(Firebird::CheckStatusWrapper* status, Firebird::ITransaction* transaction, unsigned stmtLength, const char* sqlStmt, unsigned dialect, Firebird::IMessageMetadata* inMetadata, unsigned parLength, const unsigned char* par); + Firebird::IReplicator* createReplicator(Firebird::CheckStatusWrapper* status); public: explicit JAttachment(StableAttachmentPart* js); diff --git a/src/jrd/JrdStatement.cpp b/src/jrd/JrdStatement.cpp index 4a22db61d3..9850695efb 100644 --- a/src/jrd/JrdStatement.cpp +++ b/src/jrd/JrdStatement.cpp @@ -68,7 +68,7 @@ JrdStatement::JrdStatement(thread_db* tdbb, MemoryPool* p, CompilerScratch* csb) makeSubRoutines(tdbb, this, csb, csb->subProcedures); makeSubRoutines(tdbb, this, csb, csb->subFunctions); - topNode = (csb->csb_node->getKind() == DmlNode::KIND_STATEMENT) ? + topNode = (csb->csb_node && csb->csb_node->getKind() == DmlNode::KIND_STATEMENT) ? static_cast(csb->csb_node) : NULL; accessList = csb->csb_access; @@ -234,10 +234,13 @@ JrdStatement* JrdStatement::makeStatement(thread_db* tdbb, CompilerScratch* csb, DmlNode::doPass1(tdbb, csb, fieldInfo.validationExpr.getAddress()); } - if (csb->csb_node->getKind() == DmlNode::KIND_STATEMENT) - StmtNode::doPass2(tdbb, csb, reinterpret_cast(&csb->csb_node), NULL); - else - ExprNode::doPass2(tdbb, csb, &csb->csb_node); + if (csb->csb_node) + { + if (csb->csb_node->getKind() == DmlNode::KIND_STATEMENT) + StmtNode::doPass2(tdbb, csb, reinterpret_cast(&csb->csb_node), NULL); + else + ExprNode::doPass2(tdbb, csb, &csb->csb_node); + } // Compile (pass2) domains DEFAULT and constraints for (bool found = accessor.getFirst(); found; found = accessor.getNext()) diff --git a/src/jrd/Savepoint.cpp b/src/jrd/Savepoint.cpp index 4f5ffc8f58..3cd31a5f51 100644 --- a/src/jrd/Savepoint.cpp +++ b/src/jrd/Savepoint.cpp @@ -391,7 +391,8 @@ Savepoint* Savepoint::rollback(thread_db* tdbb, Savepoint* prior) DFW_delete_deferred(m_transaction, m_number); m_flags &= ~SAV_force_dfw; - tdbb->tdbb_flags |= TDBB_verb_cleanup; + AutoSetRestoreFlag verbCleanupFlag(&tdbb->tdbb_flags, TDBB_verb_cleanup, true); + tdbb->setTransaction(m_transaction); while (m_actions) @@ -406,13 +407,11 @@ Savepoint* Savepoint::rollback(thread_db* tdbb, Savepoint* prior) } tdbb->setTransaction(old_tran); - tdbb->tdbb_flags &= ~TDBB_verb_cleanup; } catch (const Exception& ex) { Arg::StatusVector error(ex); tdbb->setTransaction(old_tran); - tdbb->tdbb_flags &= ~TDBB_verb_cleanup; m_transaction->tra_flags |= TRA_invalidated; error.prepend(Arg::Gds(isc_savepoint_backout_err)); error.raise(); diff --git a/src/jrd/Savepoint.h b/src/jrd/Savepoint.h index 8bd74c77e2..b70632701c 100644 --- a/src/jrd/Savepoint.h +++ b/src/jrd/Savepoint.h @@ -114,6 +114,7 @@ namespace Jrd // Savepoint flags static const USHORT SAV_root = 1; // transaction-level savepoint static const USHORT SAV_force_dfw = 2; // DFW is present even if savepoint is empty + static const USHORT SAV_replicated = 4; // savepoint has already been replicated public: explicit Savepoint(jrd_tra* transaction) @@ -189,6 +190,11 @@ namespace Jrd return (m_flags & SAV_root); } + bool isReplicated() const + { + return (m_flags & SAV_replicated); + } + bool isChanging() const { return (m_count != 0); @@ -204,6 +210,11 @@ namespace Jrd m_flags |= SAV_force_dfw; } + void markAsReplicated() + { + m_flags |= SAV_replicated; + } + Savepoint* moveToStack(Savepoint*& target) { // Relink savepoint to the top of the provided savepoint stack. diff --git a/src/jrd/SysFunction.cpp b/src/jrd/SysFunction.cpp index 0132ddecc7..4d7fcd4887 100644 --- a/src/jrd/SysFunction.cpp +++ b/src/jrd/SysFunction.cpp @@ -321,6 +321,7 @@ const char EXT_CONN_POOL_IDLE[] = "EXT_CONN_POOL_IDLE_COUNT", EXT_CONN_POOL_ACTIVE[] = "EXT_CONN_POOL_ACTIVE_COUNT", EXT_CONN_POOL_LIFETIME[] = "EXT_CONN_POOL_LIFETIME", + REPLICATION_SEQ_NAME[] = "REPLICATION_SEQUENCE", // SYSTEM namespace: connection wise items SESSION_ID_NAME[] = "SESSION_ID", NETWORK_PROTOCOL_NAME[] = "NETWORK_PROTOCOL", @@ -3973,6 +3974,8 @@ dsc* evlGetContext(thread_db* tdbb, const SysFunction*, const NestValueArray& ar } else if (nameStr == EXT_CONN_POOL_LIFETIME) resultStr.printf("%d", EDS::Manager::getConnPool()->getLifeTime()); + else if (nameStr == REPLICATION_SEQ_NAME) + resultStr.printf("%" UQUADFORMAT, dbb->getReplSequence(tdbb)); else { // "Context variable %s is not found in namespace %s" diff --git a/src/jrd/SystemPrivileges.h b/src/jrd/SystemPrivileges.h index 3e01473542..97a066c8c2 100644 --- a/src/jrd/SystemPrivileges.h +++ b/src/jrd/SystemPrivileges.h @@ -64,6 +64,7 @@ SYSTEM_PRIVILEGE(GRANT_REVOKE_ANY_DDL_RIGHT) SYSTEM_PRIVILEGE(CREATE_PRIVILEGED_ROLES) SYSTEM_PRIVILEGE(GET_DBCRYPT_INFO) SYSTEM_PRIVILEGE(MODIFY_EXT_CONN_POOL) +SYSTEM_PRIVILEGE(REPLICATE_INTO_DATABASE) #ifdef FB_JRD_SYSTEM_PRIVILEGES_TMP maxSystemPrivilege diff --git a/src/jrd/blb.h b/src/jrd/blb.h index 5243a112b6..1bb8f1925f 100644 --- a/src/jrd/blb.h +++ b/src/jrd/blb.h @@ -73,7 +73,7 @@ public: { } - jrd_rel* blb_relation; // Relation, if known + jrd_rel* blb_relation; // Relation, if known JBlob* blb_interface; ULONG blb_length; // Total length of data sans segments diff --git a/src/jrd/constants.h b/src/jrd/constants.h index 7b0e60b4bf..a67b0eedf3 100644 --- a/src/jrd/constants.h +++ b/src/jrd/constants.h @@ -296,6 +296,12 @@ enum InfoType MAX_INFO_TYPE }; +enum ReplicaMode { + REPLICA_NONE = 0, + REPLICA_READ_ONLY = 1, + REPLICA_READ_WRITE = 2 +}; + enum TriggerType { PRE_STORE_TRIGGER = 1, POST_STORE_TRIGGER = 2, diff --git a/src/jrd/dfw.epp b/src/jrd/dfw.epp index 064f288192..6fdb06818c 100644 --- a/src/jrd/dfw.epp +++ b/src/jrd/dfw.epp @@ -1497,8 +1497,9 @@ void DFW_perform_work(thread_db* tdbb, jrd_tra* transaction) { more = false; try { - tdbb->tdbb_flags |= (TDBB_dont_post_dfw | TDBB_use_db_page_space | + const auto flags = (TDBB_dont_post_dfw | TDBB_use_db_page_space | (phase == 0 ? TDBB_dfw_cleanup : 0)); + AutoSetRestoreFlag dfwFlags(&tdbb->tdbb_flags, flags, true); for (const deferred_task* task = task_table; task->task_type != dfw_null; ++task) { @@ -1519,8 +1520,6 @@ void DFW_perform_work(thread_db* tdbb, jrd_tra* transaction) } } - tdbb->tdbb_flags &= ~(TDBB_dont_post_dfw | TDBB_use_db_page_space | TDBB_dfw_cleanup); - if (!phase) { fb_utils::copyStatus(tdbb->tdbb_status_vector, &err_status); @@ -1531,8 +1530,6 @@ void DFW_perform_work(thread_db* tdbb, jrd_tra* transaction) } catch (const Firebird::Exception& ex) { - tdbb->tdbb_flags &= ~(TDBB_dont_post_dfw | TDBB_use_db_page_space | TDBB_dfw_cleanup); - // Do any necessary cleanup if (!phase) { diff --git a/src/jrd/dpm.epp b/src/jrd/dpm.epp index 2135cc04c2..253d38ba4c 100644 --- a/src/jrd/dpm.epp +++ b/src/jrd/dpm.epp @@ -61,6 +61,7 @@ #include "../jrd/mov_proto.h" #include "../jrd/ods_proto.h" #include "../jrd/pag_proto.h" +#include "../jrd/replication/Publisher.h" #include "../common/StatusArg.h" DATABASE DB = FILENAME "ODS.RDB"; @@ -1365,7 +1366,10 @@ SINT64 DPM_gen_id(thread_db* tdbb, SLONG generator, bool initialize, SINT64 val) window.win_page = (*vector)[sequence]; window.win_flags = 0; - const SSHORT lock_mode = dbb->readOnly() ? LCK_read : LCK_write; + const bool isReadOnly = dbb->readOnly() || + (dbb->isReplica(REPLICA_READ_ONLY) && !(tdbb->tdbb_flags & TDBB_replicator)); + + const SSHORT lock_mode = isReadOnly ? LCK_read : LCK_write; generator_page* const page = (generator_page*) CCH_FETCH(tdbb, &window, lock_mode, pag_ids); /* If we are in ODS >= 10, then we have a pointer to an int64 value in the @@ -1377,29 +1381,40 @@ SINT64 DPM_gen_id(thread_db* tdbb, SLONG generator, bool initialize, SINT64 val) */ SINT64* const ptr = ((SINT64*) (page->gpg_values)) + offset; - if (val || initialize) + if (!val && !initialize) // read-only case: zero increment { - if (dbb->readOnly()) - { - CCH_RELEASE(tdbb, &window); - ERR_post(Arg::Gds(isc_read_only_database)); - } - - CCH_MARK_SYSTEM(tdbb, &window); - - if (initialize) - *ptr = val; - else - *ptr += val; - - if (transaction) - transaction->tra_flags |= TRA_write; + const SINT64 value = *ptr; + CCH_RELEASE(tdbb, &window); + return value; } + if (dbb->readOnly()) + { + CCH_RELEASE(tdbb, &window); + ERR_post(Arg::Gds(isc_read_only_database)); + } + else if (isReadOnly) + { + CCH_RELEASE(tdbb, &window); + ERR_post(Arg::Gds(isc_read_only_trans)); + } + + CCH_MARK_SYSTEM(tdbb, &window); + + if (initialize) + *ptr = val; + else + *ptr += val; + const SINT64 value = *ptr; CCH_RELEASE(tdbb, &window); + if (transaction) + transaction->tra_flags |= TRA_write; + + REPL_gen_id(tdbb, generator, *ptr); + return value; } diff --git a/src/jrd/exe.cpp b/src/jrd/exe.cpp index 69eea04022..4628533423 100644 --- a/src/jrd/exe.cpp +++ b/src/jrd/exe.cpp @@ -639,9 +639,8 @@ void EXE_receive(thread_db* tdbb, jrd_tra* transaction = request->req_transaction; - if (!(request->req_flags & req_active)) { + if (!(request->req_flags & req_active)) ERR_post(Arg::Gds(isc_req_sync)); - } const SavNumber mergeSavNumber = transaction->tra_save_point ? transaction->tra_save_point->getNumber() : 0; diff --git a/src/jrd/jrd.cpp b/src/jrd/jrd.cpp index 735891e8e9..1ab87ce4fe 100644 --- a/src/jrd/jrd.cpp +++ b/src/jrd/jrd.cpp @@ -118,6 +118,8 @@ #include "../common/config/config.h" #include "../common/config/dir_list.h" #include "../common/db_alias.h" +#include "../jrd/replication/Publisher.h" +#include "../jrd/replication/Applier.h" #include "../jrd/trace/TraceManager.h" #include "../jrd/trace/TraceObjects.h" #include "../jrd/trace/TraceJrdHelpers.h" @@ -677,6 +679,14 @@ namespace validateHandle(tdbb, batch->getAttachment()); } + inline void validateHandle(thread_db* tdbb, JReplicator* const replicator) + { + if (!replicator) + status_exception::raise(Arg::Gds(isc_bad_repl_handle)); + + validateHandle(tdbb, replicator->getAttachment()->getHandle()); + } + class AttachmentHolder { public: @@ -980,6 +990,8 @@ public: bool dpb_reset_icu; bool dpb_map_attach; ULONG dpb_remote_flags; + ReplicaMode dpb_replica_mode; + bool dpb_set_db_replica; // here begin compound objects // for constructor to work properly dpb_user_name @@ -1094,9 +1106,9 @@ static JAttachment* initAttachment(thread_db*, const PathName&, const PathName&, const DatabaseOptions&, RefMutexUnlock&, IPluginConfig*, JProvider*); static JAttachment* create_attachment(const PathName&, Database*, const DatabaseOptions&, bool newDb); static void prepare_tra(thread_db*, jrd_tra*, USHORT, const UCHAR*); +static void release_attachment(thread_db*, Attachment*); static void start_transaction(thread_db* tdbb, bool transliterate, jrd_tra** tra_handle, Jrd::Attachment* attachment, unsigned int tpb_length, const UCHAR* tpb); -static void release_attachment(thread_db*, Jrd::Attachment*); static void rollback(thread_db*, jrd_tra*, const bool); static void purge_attachment(thread_db* tdbb, StableAttachmentPart* sAtt, unsigned flags = 0); static void getUserInfo(UserId&, const DatabaseOptions&, const char*, @@ -1662,8 +1674,10 @@ JAttachment* JProvider::internalAttach(CheckStatusWrapper* user_status, const ch PAG_attachment_id(tdbb); + bool cleanupTransactions = false; + if (!options.dpb_verify && CCH_exclusive(tdbb, LCK_PW, LCK_NO_WAIT, NULL)) - TRA_cleanup(tdbb); + cleanupTransactions = TRA_cleanup(tdbb); if (invalid_client_SQL_dialect) { @@ -1876,7 +1890,7 @@ JAttachment* JProvider::internalAttach(CheckStatusWrapper* user_status, const ch if (options.dpb_sweep_interval > -1) { validateAccess(tdbb, attachment, CHANGE_HEADER_SETTINGS); - PAG_sweep_interval(tdbb, options.dpb_sweep_interval); + PAG_set_sweep_interval(tdbb, options.dpb_sweep_interval); dbb->dbb_sweep_interval = options.dpb_sweep_interval; } @@ -1918,11 +1932,25 @@ JAttachment* JProvider::internalAttach(CheckStatusWrapper* user_status, const ch dbb->dbb_linger_seconds = 0; } + if (options.dpb_set_db_replica) + { + validateAccess(tdbb, attachment, CHANGE_HEADER_SETTINGS); + if (!CCH_exclusive(tdbb, LCK_EX, WAIT_PERIOD, NULL)) + { + ERR_post(Arg::Gds(isc_lock_timeout) << + Arg::Gds(isc_obj_in_use) << Arg::Str(org_filename)); + } + PAG_set_db_replica(tdbb, options.dpb_replica_mode); + dbb->dbb_linger_seconds = 0; + } + CCH_init2(tdbb); VIO_init(tdbb); CCH_release_exclusive(tdbb); + REPL_attach(tdbb, cleanupTransactions); + attachment->att_trace_manager->activate(); if (attachment->att_trace_manager->needs(ITraceFactory::TRACE_EVENT_ATTACH)) { @@ -1934,9 +1962,8 @@ JAttachment* JProvider::internalAttach(CheckStatusWrapper* user_status, const ch // Recover database after crash during backup difference file merge dbb->dbb_backup_manager->endBackup(tdbb, true); // true = do recovery - if (options.dpb_sweep & isc_dpb_records) { + if (options.dpb_sweep & isc_dpb_records) TRA_sweep(tdbb); - } dbb->dbb_crypto_manager->startCryptThread(tdbb); @@ -2811,7 +2838,7 @@ JAttachment* JProvider::createDatabase(CheckStatusWrapper* user_status, const ch if (options.dpb_sweep_interval > -1) { - PAG_sweep_interval(tdbb, options.dpb_sweep_interval); + PAG_set_sweep_interval(tdbb, options.dpb_sweep_interval); dbb->dbb_sweep_interval = options.dpb_sweep_interval; } @@ -2837,6 +2864,17 @@ JAttachment* JProvider::createDatabase(CheckStatusWrapper* user_status, const ch PAG_set_db_readonly(tdbb, options.dpb_db_readonly); } + if (options.dpb_set_db_replica) + { + if (!CCH_exclusive(tdbb, LCK_EX, WAIT_PERIOD, &dbbGuard)) + { + ERR_post(Arg::Gds(isc_lock_timeout) << + Arg::Gds(isc_obj_in_use) << Arg::Str(org_filename)); + } + + PAG_set_db_replica(tdbb, options.dpb_replica_mode); + } + PAG_attachment_id(tdbb); Monitoring::publishAttachment(tdbb); @@ -4878,6 +4916,40 @@ IBatch* JAttachment::createBatch(CheckStatusWrapper* status, ITransaction* trans } +IReplicator* JAttachment::createReplicator(CheckStatusWrapper* user_status) +{ + JReplicator* jr = NULL; + + try + { + EngineContextHolder tdbb(user_status, this, FB_FUNCTION); + check_database(tdbb); + + try + { + const auto att = tdbb->getAttachment(); + + if (!att->att_repl_applier) + att->att_repl_applier = Applier::create(tdbb); + + jr = FB_NEW JReplicator(getStable()); + jr->addRef(); + } + catch (const Exception& ex) + { + transliterateException(tdbb, ex, user_status, "JResultSet::fetchNext"); + } + } + catch (const Exception& ex) + { + ex.stuffException(user_status); + } + + successful_completion(user_status); + return jr; +} + + int JResultSet::fetchNext(CheckStatusWrapper* user_status, void* buffer) { try @@ -5956,6 +6028,116 @@ void JBatch::cancel(CheckStatusWrapper* status) } +JReplicator::JReplicator(StableAttachmentPart* sa) + : sAtt(sa) +{ } + + +int JReplicator::release() +{ + if (--refCounter != 0) + return 1; + + LocalStatus status; + CheckStatusWrapper statusWrapper(&status); + + freeEngineData(&statusWrapper); + + delete this; + return 0; +} + + +void JReplicator::freeEngineData(Firebird::CheckStatusWrapper* user_status) +{ + try + { + EngineContextHolder tdbb(user_status, this, FB_FUNCTION); + check_database(tdbb); + + try + { + const auto att = sAtt->getHandle(); + if (att) + att->att_repl_applier.reset(); + } + catch (const Exception& ex) + { + transliterateException(tdbb, ex, user_status, FB_FUNCTION); + return; + } + } + catch (const Exception& ex) + { + ex.stuffException(user_status); + return; + } + + successful_completion(user_status); +} + + +void JReplicator::process(CheckStatusWrapper* status, unsigned length, const UCHAR* data) +{ + try + { + EngineContextHolder tdbb(status, this, FB_FUNCTION); + check_database(tdbb); + + try + { + const auto att = sAtt->getHandle(); + att->att_repl_applier->process(tdbb, length, data); + } + catch (const Exception& ex) + { + transliterateException(tdbb, ex, status, "JReplicator::process"); + return; + } + + trace_warning(tdbb, status, "JBatch::add"); + } + catch (const Exception& ex) + { + ex.stuffException(status); + return; + } + + successful_completion(status); +} + + +void JReplicator::close(CheckStatusWrapper* status) +{ + try + { + EngineContextHolder tdbb(status, this, FB_FUNCTION); + check_database(tdbb); + + try + { + const auto att = sAtt->getHandle(); + att->att_repl_applier->shutdown(tdbb); + att->att_repl_applier.reset(); + } + catch (const Exception& ex) + { + transliterateException(tdbb, ex, status, "JReplicator::close"); + return; + } + + trace_warning(tdbb, status, "JBatch::add"); + } + catch (const Exception& ex) + { + ex.stuffException(status); + return; + } + + successful_completion(status); +} + + void JAttachment::ping(CheckStatusWrapper* user_status) { /************************************** @@ -6652,6 +6834,11 @@ void DatabaseOptions::get(const UCHAR* dpb, USHORT dpb_length, bool& invalid_cli rdr.getString(dpb_session_tz); break; + case isc_dpb_set_db_replica: + dpb_set_db_replica = true; + dpb_replica_mode = (ReplicaMode) rdr.getInt(); + break; + default: break; } @@ -7002,7 +7189,7 @@ static void prepare_tra(thread_db* tdbb, jrd_tra* transaction, USHORT length, co } -static void release_attachment(thread_db* tdbb, Jrd::Attachment* attachment) +void release_attachment(thread_db* tdbb, Jrd::Attachment* attachment) { /************************************** * @@ -7022,6 +7209,12 @@ static void release_attachment(thread_db* tdbb, Jrd::Attachment* attachment) if (!attachment) return; + if (attachment->att_replicator) + attachment->att_replicator->dispose(); + + if (attachment->att_repl_applier) + attachment->att_repl_applier->shutdown(tdbb); + if (dbb->dbb_crypto_manager) dbb->dbb_crypto_manager->detach(tdbb, attachment); @@ -8350,13 +8543,11 @@ ISC_STATUS thread_db::checkCancelState(ISC_STATUS* secondary) { if (database->dbb_ast_flags & DBB_shutdown) return isc_shutdown; - else if (!(tdbb_flags & TDBB_shutdown_manager)) - { - if (secondary) - *secondary = attachment->getStable() ? attachment->getStable()->getShutError() : 0; - return isc_att_shutdown; - } + if (secondary) + *secondary = attachment->getStable() ? attachment->getStable()->getShutError() : 0; + + return isc_att_shutdown; } // If a cancel has been raised, defer its acknowledgement diff --git a/src/jrd/jrd.h b/src/jrd/jrd.h index 7021bcf5e0..c86f7af37c 100644 --- a/src/jrd/jrd.h +++ b/src/jrd/jrd.h @@ -458,21 +458,22 @@ private: // tdbb_flags -const USHORT TDBB_sweeper = 1; // Thread sweeper or garbage collector -const USHORT TDBB_no_cache_unwind = 2; // Don't unwind page buffer cache -const USHORT TDBB_backup_write_locked = 4; // BackupManager has write lock on LCK_backup_database -const USHORT TDBB_stack_trace_done = 8; // PSQL stack trace is added into status-vector -const USHORT TDBB_shutdown_manager = 16; // Server shutdown thread -const USHORT TDBB_dont_post_dfw = 32; // dont post DFW tasks as deferred work is performed now -const USHORT TDBB_sys_error = 64; // error shouldn't be handled by the looper -const USHORT TDBB_verb_cleanup = 128; // verb cleanup is in progress -const USHORT TDBB_use_db_page_space = 256; // use database (not temporary) page space in GTT operations -const USHORT TDBB_detaching = 512; // detach is in progress -const USHORT TDBB_wait_cancel_disable = 1024; // don't cancel current waiting operation -const USHORT TDBB_cache_unwound = 2048; // page cache was unwound -const USHORT TDBB_trusted_ddl = 4096; // skip DDL permission checks. Set after DDL permission check and clear after DDL execution -const USHORT TDBB_reset_stack = 8192; // stack should be reset after stack overflow exception -const USHORT TDBB_dfw_cleanup = 16384; // DFW cleanup phase is active +const ULONG TDBB_sweeper = 1; // Thread sweeper or garbage collector +const ULONG TDBB_no_cache_unwind = 2; // Don't unwind page buffer cache +const ULONG TDBB_backup_write_locked = 4; // BackupManager has write lock on LCK_backup_database +const ULONG TDBB_stack_trace_done = 8; // PSQL stack trace is added into status-vector +const ULONG TDBB_dont_post_dfw = 16; // dont post DFW tasks as deferred work is performed now +const ULONG TDBB_sys_error = 32; // error shouldn't be handled by the looper +const ULONG TDBB_verb_cleanup = 64; // verb cleanup is in progress +const ULONG TDBB_use_db_page_space = 128; // use database (not temporary) page space in GTT operations +const ULONG TDBB_detaching = 256; // detach is in progress +const ULONG TDBB_wait_cancel_disable = 512; // don't cancel current waiting operation +const ULONG TDBB_cache_unwound = 1024; // page cache was unwound +const ULONG TDBB_trusted_ddl = 2048; // skip DDL permission checks. Set after DDL permission check and clear after DDL execution +const ULONG TDBB_reset_stack = 4096; // stack should be reset after stack overflow exception +const ULONG TDBB_dfw_cleanup = 8192; // DFW cleanup phase is active +const ULONG TDBB_repl_sql = 16384; // SQL statement is being replicated +const ULONG TDBB_replicator = 32768; // Replicator class thread_db : public Firebird::ThreadData { @@ -521,8 +522,8 @@ public: } FbStatusVector* tdbb_status_vector; - SSHORT tdbb_quantum; // Cycles remaining until voluntary schedule - USHORT tdbb_flags; + SLONG tdbb_quantum; // Cycles remaining until voluntary schedule + ULONG tdbb_flags; TraNumber tdbb_temp_traid; // current temporary table scope diff --git a/src/jrd/ods.h b/src/jrd/ods.h index 78e98e7606..9de23b4c95 100644 --- a/src/jrd/ods.h +++ b/src/jrd/ods.h @@ -442,10 +442,12 @@ const UCHAR HDR_last_page = 3; // Last logical page number of file const UCHAR HDR_sweep_interval = 4; // Transactions between sweeps const UCHAR HDR_crypt_checksum = 5; // Checksum of critical crypt parameters const UCHAR HDR_difference_file = 6; // Delta file that is used during backup lock -const UCHAR HDR_backup_guid = 7; // UID generated on each switch into backup mode +const UCHAR HDR_backup_guid = 7; // GUID generated on each switch into backup mode const UCHAR HDR_crypt_key = 8; // Name of a key used to crypt database const UCHAR HDR_crypt_hash = 9; // Validator of key correctness -const UCHAR HDR_max = 10; // Maximum HDR_clump value +const UCHAR HDR_db_guid = 10; // Database GUID +const UCHAR HDR_repl_seq = 11; // Replication changelog sequence +const UCHAR HDR_max = 11; // Maximum HDR_clump value // Header page flags @@ -456,8 +458,10 @@ const USHORT hdr_no_reserve = 0x8; // 8 don't reserve space for versions const USHORT hdr_SQL_dialect_3 = 0x10; // 16 database SQL dialect 3 const USHORT hdr_read_only = 0x20; // 32 Database is ReadOnly. If not set, DB is RW const USHORT hdr_encrypted = 0x40; // 64 Database is encrypted + const USHORT hdr_backup_mask = 0xC00; const USHORT hdr_shutdown_mask = 0x1080; +const USHORT hdr_replica_mask = 0x6000; // Values for backup mask const USHORT hdr_nbak_normal = 0x000; // Normal mode. Changes are simply written to main files @@ -471,6 +475,11 @@ const USHORT hdr_shutdown_multi = 0x80; const USHORT hdr_shutdown_full = 0x1000; const USHORT hdr_shutdown_single = 0x1080; +// Values for replica mask +const USHORT hdr_replica_none = 0x0000; +const USHORT hdr_replica_read_only = 0x2000; +const USHORT hdr_replica_read_write = 0x4000; + // Page Inventory Page diff --git a/src/jrd/pag.cpp b/src/jrd/pag.cpp index 507abbacdd..c2c8979e90 100644 --- a/src/jrd/pag.cpp +++ b/src/jrd/pag.cpp @@ -1030,8 +1030,7 @@ void PAG_format_pip(thread_db* tdbb, PageSpace& pageSpace) } -#ifdef NOT_USED_OR_REPLACED -bool PAG_get_clump(thread_db* tdbb, SLONG page_num, USHORT type, USHORT* inout_len, UCHAR* entry) +bool PAG_get_clump(thread_db* tdbb, USHORT type, USHORT* inout_len, UCHAR* entry) { /*********************************************** * @@ -1040,7 +1039,7 @@ bool PAG_get_clump(thread_db* tdbb, SLONG page_num, USHORT type, USHORT* inout_l *********************************************** * * Functional description - * Find 'type' clump in page_num + * Find 'type' clump * true - Found it * false - Not present * RETURNS @@ -1050,11 +1049,7 @@ bool PAG_get_clump(thread_db* tdbb, SLONG page_num, USHORT type, USHORT* inout_l **************************************/ SET_TDBB(tdbb); - WIN window(DB_PAGE_SPACE, page_num); - - if (page_num != HEADER_PAGE) - ERR_post(Arg::Gds(isc_page_type_err)); - + WIN window(DB_PAGE_SPACE, HEADER_PAGE); pag* page = CCH_FETCH(tdbb, &window, LCK_read, pag_header); UCHAR* entry_p; @@ -1082,7 +1077,6 @@ bool PAG_get_clump(thread_db* tdbb, SLONG page_num, USHORT type, USHORT* inout_l return true; } -#endif void PAG_header(thread_db* tdbb, bool info) @@ -1207,6 +1201,17 @@ void PAG_header(thread_db* tdbb, bool info) dbb->dbb_ast_flags |= DBB_shutdown_single; } + const USHORT replica_mode = header->hdr_flags & hdr_replica_mask; + if (replica_mode) + { + if (replica_mode == hdr_replica_read_only) + dbb->dbb_replica_mode = REPLICA_READ_ONLY; + else if (replica_mode == hdr_replica_read_write) + dbb->dbb_replica_mode = REPLICA_READ_WRITE; + else + fb_assert(false); + } + } // try catch (const Exception&) { @@ -1437,9 +1442,15 @@ void PAG_init2(thread_db* tdbb, USHORT shadow_number) break; case HDR_sweep_interval: - // CVC: Let's copy it always. - //if (!dbb->readOnly()) - memcpy(&dbb->dbb_sweep_interval, p + 2, sizeof(SLONG)); + memcpy(&dbb->dbb_sweep_interval, p + 2, sizeof(SLONG)); + break; + + case HDR_db_guid: + memcpy(&dbb->dbb_guid, p + 2, sizeof(Guid)); + break; + + case HDR_repl_seq: + memcpy(&dbb->dbb_repl_sequence, p + 2, sizeof(FB_UINT64)); break; } } @@ -1614,6 +1625,24 @@ void PAG_release_pages(thread_db* tdbb, USHORT pageSpaceID, int cntRelease, } +void PAG_set_db_guid(thread_db* tdbb, const Guid& guid) +{ +/************************************** + * + * P A G _ s e t _ d b _ g u i d + * + ************************************** + * + * Functional description + * Set sweep interval. + * + **************************************/ + + SET_TDBB(tdbb); + add_clump(tdbb, HDR_db_guid, sizeof(Guid), (UCHAR*) &guid, CLUMP_REPLACE); +} + + void PAG_set_force_write(thread_db* tdbb, bool flag) { /************************************** @@ -1753,6 +1782,54 @@ void PAG_set_db_readonly(thread_db* tdbb, bool flag) } +void PAG_set_db_replica(thread_db* tdbb, ReplicaMode mode) +{ +/********************************************* + * + * P A G _ s e t _ d b _ r e p l i c a + * + ********************************************* + * + * Functional description + * Set replica mode (none, read-only, read-write) + * + *********************************************/ + SET_TDBB(tdbb); + const auto dbb = tdbb->getDatabase(); + + err_post_if_database_is_readonly(dbb); + + WIN window(HEADER_PAGE_NUMBER); + const auto header = (header_page*) CCH_FETCH(tdbb, &window, LCK_write, pag_header); + + CCH_MARK_MUST_WRITE(tdbb, &window); + + header->hdr_flags &= ~(hdr_replica_read_only | hdr_replica_read_write); + fb_assert((header->hdr_flags & hdr_replica_mask) == hdr_replica_none); + + switch (mode) + { + case REPLICA_NONE: + break; + + case REPLICA_READ_ONLY: + header->hdr_flags |= hdr_replica_read_only; + break; + + case REPLICA_READ_WRITE: + header->hdr_flags |= hdr_replica_read_write; + break; + + default: + fb_assert(false); + } + + CCH_RELEASE(tdbb, &window); + + dbb->dbb_replica_mode = mode; +} + + void PAG_set_db_SQL_dialect(thread_db* tdbb, SSHORT flag) { /********************************************* @@ -1768,6 +1845,8 @@ void PAG_set_db_SQL_dialect(thread_db* tdbb, SSHORT flag) SET_TDBB(tdbb); Database* dbb = tdbb->getDatabase(); + err_post_if_database_is_readonly(dbb); + WIN window(HEADER_PAGE_NUMBER); header_page* header = (header_page*) CCH_FETCH(tdbb, &window, LCK_write, pag_header); @@ -1833,11 +1912,29 @@ void PAG_set_page_buffers(thread_db* tdbb, ULONG buffers) } -void PAG_sweep_interval(thread_db* tdbb, SLONG interval) +void PAG_set_repl_sequence(thread_db* tdbb, FB_UINT64 sequence) { /************************************** * - * P A G _ s w e e p _ i n t e r v a l + * P A G _ s e t _ r e p l _ s e q u e n c e + * + ************************************** + * + * Functional description + * Set replication sequence. + * + **************************************/ + + SET_TDBB(tdbb); + add_clump(tdbb, HDR_repl_seq, sizeof(FB_UINT64), (UCHAR*) &sequence, CLUMP_REPLACE); +} + + +void PAG_set_sweep_interval(thread_db* tdbb, SLONG interval) +{ +/************************************** + * + * P A G _ s e t _ s w e e p _ i n t e r v a l * ************************************** * diff --git a/src/jrd/pag_proto.h b/src/jrd/pag_proto.h index 49048c0aa7..29060a1dc8 100644 --- a/src/jrd/pag_proto.h +++ b/src/jrd/pag_proto.h @@ -49,9 +49,7 @@ AttNumber PAG_attachment_id(Jrd::thread_db*); bool PAG_delete_clump_entry(Jrd::thread_db* tdbb, USHORT); void PAG_format_header(Jrd::thread_db*); void PAG_format_pip(Jrd::thread_db*, Jrd::PageSpace& pageSpace); -#ifdef NOT_USED_OR_REPLACED -bool PAG_get_clump(Jrd::thread_db* tdbb, SLONG, USHORT, USHORT*, UCHAR*); -#endif +bool PAG_get_clump(Jrd::thread_db*, USHORT, USHORT*, UCHAR*); void PAG_header(Jrd::thread_db*, bool); void PAG_header_init(Jrd::thread_db*); void PAG_init(Jrd::thread_db*); @@ -60,13 +58,16 @@ SLONG PAG_last_page(Jrd::thread_db* tdbb); void PAG_release_page(Jrd::thread_db* tdbb, const Jrd::PageNumber&, const Jrd::PageNumber&); void PAG_release_pages(Jrd::thread_db* tdbb, USHORT pageSpaceID, int cntRelease, const ULONG* pgNums, const ULONG prior_page); +void PAG_set_db_guid(Jrd::thread_db* tdbb, const Firebird::Guid&); void PAG_set_force_write(Jrd::thread_db* tdbb, bool); void PAG_set_no_reserve(Jrd::thread_db* tdbb, bool); void PAG_set_db_readonly(Jrd::thread_db* tdbb, bool); +void PAG_set_db_replica(Jrd::thread_db* tdbb, ReplicaMode); void PAG_set_db_SQL_dialect(Jrd::thread_db* tdbb, SSHORT); void PAG_set_page_buffers(Jrd::thread_db* tdbb, ULONG); void PAG_set_page_scn(Jrd::thread_db* tdbb, Jrd::win* window); -void PAG_sweep_interval(Jrd::thread_db* tdbb, SLONG); +void PAG_set_repl_sequence(Jrd::thread_db* tdbb, FB_UINT64); +void PAG_set_sweep_interval(Jrd::thread_db* tdbb, SLONG); ULONG PAG_page_count(Jrd::thread_db*); #endif // JRD_PAG_PROTO_H diff --git a/src/jrd/replication/Applier.cpp b/src/jrd/replication/Applier.cpp new file mode 100644 index 0000000000..de9c096a3f --- /dev/null +++ b/src/jrd/replication/Applier.cpp @@ -0,0 +1,1149 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "ids.h" +#include "../jrd/jrd.h" +#include "../jrd/blb.h" +#include "../jrd/req.h" +#include "../jrd/ini.h" +#include "../jrd/ibase.h" +#include "../jrd/btr_proto.h" +#include "../jrd/cch_proto.h" +#include "../jrd/cmp_proto.h" +#include "../jrd/dpm_proto.h" +#include "../jrd/idx_proto.h" +#include "../jrd/jrd_proto.h" +#include "../jrd/lck_proto.h" +#include "../jrd/met_proto.h" +#include "../jrd/mov_proto.h" +#include "../jrd/rlck_proto.h" +#include "../jrd/tra_proto.h" +#include "../jrd/vio_proto.h" +#include "../dsql/dsql_proto.h" +#include "../dsql/sqlda_pub.h" + +#include "Applier.h" +#include "Protocol.h" +#include "Publisher.h" +#include "Utils.h" + +// Log conflicts as warnings +#define LOG_WARNINGS + +// Detect and resolve record-level conflicts (in favor of master copy) +#define RESOLVE_CONFLICTS + +using namespace Firebird; +using namespace Ods; +using namespace Jrd; +using namespace Replication; + +namespace +{ + struct NoKeyTable + { + USHORT rel_id; + USHORT rel_fields[8]; + }; + + const auto UNDEF = MAX_USHORT; + + NoKeyTable NO_KEY_TABLES[] = { + { rel_segments, { f_seg_name, f_seg_field, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_args, { f_arg_fun_name, f_arg_pos, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_ccon, { f_ccon_cname, f_ccon_tname, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_vrel, { f_vrl_vname, f_vrl_context, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_msgs, { f_msg_trigger, f_msg_number, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_dims, { f_dims_fname, f_dims_dim, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_files, { f_file_name, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } }, + { rel_priv, { f_prv_user, f_prv_u_type, f_prv_o_type, f_prv_priv, f_prv_grant, f_prv_grantor, f_prv_rname, f_prv_fname } }, + { rel_db_creators, { f_crt_user, f_crt_u_type, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF, UNDEF } } + }; + + class BlockReader + { + public: + BlockReader(ULONG length, const UCHAR* data) + : m_header((Block*) data), + m_data(data + sizeof(Block)), + m_metadata(data + sizeof(Block) + m_header->dataLength) + { + fb_assert(m_metadata + m_header->metaLength == data + length); + } + + bool isEof() const + { + return (m_data >= m_metadata); + } + + UCHAR getTag() + { + return *m_data++; + } + + SLONG getInt() + { + m_data = FB_ALIGN(m_data, FB_ALIGNMENT); + const auto ptr = (const SLONG*) m_data; + m_data += sizeof(SLONG); + return *ptr; + } + + SINT64 getBigInt() + { + m_data = FB_ALIGN(m_data, FB_ALIGNMENT); + const auto ptr = (const SINT64*) m_data; + m_data += sizeof(SINT64); + return *ptr; + } + + const MetaName& getMetaName() + { + const auto offset = getInt() * sizeof(MetaName); + const auto metaPtr = (const MetaName*) (m_metadata + offset); + return *metaPtr; + } + + string getString() + { + const auto length = getInt(); + const string str((const char*) m_data, length); + m_data += length; + return str; + } + + ULONG getBinary(const UCHAR*& ptr) + { + const auto len = getInt(); + ptr = m_data; + m_data += len; + return len; + } + + TraNumber getTransactionId() const + { + return m_header->traNumber; + } + + private: + const Block* const m_header; + const UCHAR* m_data; + const UCHAR* const m_metadata; + }; + + class LocalThreadContext + { + public: + LocalThreadContext(thread_db* tdbb, jrd_tra* tra, jrd_req* req = NULL) + : m_tdbb(tdbb) + { + tdbb->setTransaction(tra); + tdbb->setRequest(req); + } + + ~LocalThreadContext() + { + m_tdbb->setTransaction(NULL); + m_tdbb->setRequest(NULL); + } + + private: + thread_db* m_tdbb; + }; + +} // namespace + + +Applier* Applier::create(thread_db* tdbb) +{ + const auto dbb = tdbb->getDatabase(); + + if (!dbb->isReplica()) + raiseError("Database is not in the replica mode"); + + const auto attachment = tdbb->getAttachment(); + + if (!attachment->locksmith(tdbb, REPLICATE_INTO_DATABASE)) + status_exception::raise(Arg::Gds(isc_miss_prvlg) << "REPLICATE_INTO_DATABASE"); + + const auto req_pool = attachment->createPool(); + Jrd::ContextPoolHolder context(tdbb, req_pool); + AutoPtr csb(FB_NEW_POOL(*req_pool) CompilerScratch(*req_pool)); + + const auto request = JrdStatement::makeRequest(tdbb, csb, true); + TimeZoneUtil::validateGmtTimeStamp(request->req_gmt_timestamp); + request->req_attachment = attachment; + + auto& att_pool = *attachment->att_pool; + return FB_NEW_POOL(att_pool) Applier(att_pool, dbb->dbb_filename, request); +} + +void Applier::shutdown(thread_db* tdbb) +{ + TransactionMap::Accessor txnAccessor(&m_txnMap); + if (txnAccessor.getFirst()) + { + do { + const auto transaction = txnAccessor.current()->second; + TRA_rollback(tdbb, transaction, false, true); + } while (txnAccessor.getNext()); + } + + CMP_release(tdbb, m_request); + m_request = NULL; + m_record = NULL; + + m_bitmap->clear(); + m_txnMap.clear(); +} + +void Applier::process(thread_db* tdbb, ULONG length, const UCHAR* data) +{ + Database* const dbb = tdbb->getDatabase(); + + if (dbb->readOnly()) + raiseError("Replication is impossible for read-only database"); + + try + { + tdbb->tdbb_flags |= TDBB_replicator; + + BlockReader reader(length, data); + + const auto traNum = reader.getTransactionId(); + + while (!reader.isEof()) + { + const auto op = reader.getTag(); + + switch (op) + { + case opStartTransaction: + startTransaction(tdbb, traNum); + break; + + case opPrepareTransaction: + prepareTransaction(tdbb, traNum); + break; + + case opCommitTransaction: + commitTransaction(tdbb, traNum); + break; + + case opRollbackTransaction: + rollbackTransaction(tdbb, traNum, false); + break; + + case opCleanupTransaction: + rollbackTransaction(tdbb, traNum, true); + break; + + case opStartSavepoint: + startSavepoint(tdbb, traNum); + break; + + case opReleaseSavepoint: + cleanupSavepoint(tdbb, traNum, false); + break; + + case opRollbackSavepoint: + cleanupSavepoint(tdbb, traNum, true); + break; + + case opInsertRecord: + { + const MetaName relName = reader.getMetaName(); + const UCHAR* record = NULL; + const ULONG length = reader.getBinary(record); + insertRecord(tdbb, traNum, relName, length, record); + } + break; + + case opUpdateRecord: + { + const MetaName relName = reader.getMetaName(); + const UCHAR* orgRecord = NULL; + const ULONG orgLength = reader.getBinary(orgRecord); + const UCHAR* newRecord = NULL; + const ULONG newLength = reader.getBinary(newRecord); + updateRecord(tdbb, traNum, relName, + orgLength, orgRecord, + newLength, newRecord); + } + break; + + case opDeleteRecord: + { + const MetaName relName = reader.getMetaName(); + const UCHAR* record = NULL; + const ULONG length = reader.getBinary(record); + deleteRecord(tdbb, traNum, relName, length, record); + } + break; + + case opStoreBlob: + { + bid blob_id; + blob_id.bid_quad.bid_quad_high = reader.getInt(); + blob_id.bid_quad.bid_quad_low = reader.getInt(); + const UCHAR* blob = NULL; + const ULONG length = reader.getBinary(blob); + storeBlob(tdbb, traNum, &blob_id, length, blob); + } + break; + + case opExecuteSql: + { + const string sql = reader.getString(); + const MetaName ownerName = reader.getMetaName(); + executeSql(tdbb, traNum, sql, ownerName); + } + break; + + case opSetSequence: + { + const MetaName genName = reader.getMetaName(); + const SINT64 value = reader.getBigInt(); + setSequence(tdbb, genName, value); + } + break; + + default: + fb_assert(false); + } + + // Check cancellation flags and reset monitoring state if necessary + tdbb->checkCancelState(true); + Monitoring::checkState(tdbb); + } + + } // try + catch (const Exception& ex) + { + postError(tdbb->tdbb_status_vector, ex); + throw; + } +} + +void Applier::startTransaction(thread_db* tdbb, TraNumber traNum) +{ + const auto attachment = tdbb->getAttachment(); + + if (m_txnMap.exist(traNum)) + raiseError("Transaction %" SQUADFORMAT" already exists", traNum); + + const auto transaction = + TRA_start(tdbb, TRA_read_committed | TRA_rec_version | TRA_no_auto_undo, 1); + + m_txnMap.put(traNum, transaction); +} + +void Applier::prepareTransaction(thread_db* tdbb, TraNumber traNum) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction); + + TRA_prepare(tdbb, transaction, 0, NULL); +} + +void Applier::commitTransaction(thread_db* tdbb, TraNumber traNum) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction); + + TRA_commit(tdbb, transaction, false); + + m_txnMap.remove(traNum); +} + +void Applier::rollbackTransaction(thread_db* tdbb, TraNumber traNum, bool cleanup) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + { + if (cleanup) + return; + + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + } + + LocalThreadContext context(tdbb, transaction); + + TRA_rollback(tdbb, transaction, false, true); + + m_txnMap.remove(traNum); +} + +void Applier::startSavepoint(thread_db* tdbb, TraNumber traNum) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction); + + transaction->startSavepoint(); +} + +void Applier::cleanupSavepoint(thread_db* tdbb, TraNumber traNum, bool undo) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction); + + if (!transaction->tra_save_point) + raiseError("Transaction %" SQUADFORMAT" has no savepoints to cleanup", traNum); + + if (undo) + transaction->rollbackSavepoint(tdbb); + else + transaction->rollforwardSavepoint(tdbb); +} + +void Applier::insertRecord(thread_db* tdbb, TraNumber traNum, + const MetaName& relName, + ULONG length, const UCHAR* data) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction, m_request); + + TRA_attach_request(transaction, m_request); + + const auto relation = MET_lookup_relation(tdbb, relName); + if (!relation) + raiseError("Table %s is not found", relName.c_str()); + + if (!(relation->rel_flags & REL_scanned)) + MET_scan_relation(tdbb, relation); + + const auto format = findFormat(tdbb, relation, length); + + record_param rpb; + rpb.rpb_relation = relation; + + rpb.rpb_record = m_record; + const auto record = m_record = + VIO_record(tdbb, &rpb, format, m_request->req_pool); + + rpb.rpb_format_number = format->fmt_version; + rpb.rpb_address = record->getData(); + rpb.rpb_length = length; + record->copyDataFrom(data); + + try + { + doInsert(tdbb, &rpb, transaction); + return; + } + catch (const status_exception& ex) + { + // Uniqueness violation is handled below, other exceptions are re-thrown + if (ex.value()[1] != isc_unique_key_violation && + ex.value()[1] != isc_no_dup) + { + throw; + } + + fb_utils::init_status(tdbb->tdbb_status_vector); + } + + bool found = false; + +#ifdef RESOLVE_CONFLICTS + index_desc idx; + const auto indexed = lookupRecord(tdbb, relation, record, m_bitmap, idx); + + AutoPtr cleanup; + + if (m_bitmap->getFirst()) + { + record_param tempRpb = rpb; + tempRpb.rpb_record = NULL; + + do { + tempRpb.rpb_number.setValue(m_bitmap->current()); + + if (VIO_get(tdbb, &tempRpb, transaction, m_request->req_pool) && + (!indexed || compareKey(tdbb, relation, idx, record, tempRpb.rpb_record))) + { + if (found) + raiseError("Record in table %s is ambiguously identified using the primary/unique key", relName.c_str()); + + rpb = tempRpb; + found = true; + } + } while (m_bitmap->getNext()); + + cleanup = tempRpb.rpb_record; + } +#endif + + if (found) + { + logWarning("Record being inserted into table %s already exists, updating instead", relName.c_str()); + + record_param newRpb; + newRpb.rpb_relation = relation; + + newRpb.rpb_record = NULL; + AutoPtr newRecord(VIO_record(tdbb, &newRpb, format, m_request->req_pool)); + + newRpb.rpb_format_number = format->fmt_version; + newRpb.rpb_address = newRecord->getData(); + newRpb.rpb_length = length; + newRecord->copyDataFrom(data); + + doUpdate(tdbb, &rpb, &newRpb, transaction, NULL); + } + else + { + doInsert(tdbb, &rpb, transaction); // second (paranoid) attempt + } +} + +void Applier::updateRecord(thread_db* tdbb, TraNumber traNum, + const MetaName& relName, + ULONG orgLength, const UCHAR* orgData, + ULONG newLength, const UCHAR* newData) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction, m_request); + + TRA_attach_request(transaction, m_request); + + const auto relation = MET_lookup_relation(tdbb, relName); + if (!relation) + raiseError("Table %s is not found", relName.c_str()); + + if (!(relation->rel_flags & REL_scanned)) + MET_scan_relation(tdbb, relation); + + const auto orgFormat = findFormat(tdbb, relation, orgLength); + + record_param orgRpb; + orgRpb.rpb_relation = relation; + + orgRpb.rpb_record = m_record; + const auto orgRecord = m_record = + VIO_record(tdbb, &orgRpb, orgFormat, m_request->req_pool); + + orgRpb.rpb_format_number = orgFormat->fmt_version; + orgRpb.rpb_address = orgRecord->getData(); + orgRpb.rpb_length = orgLength; + orgRecord->copyDataFrom(orgData); + + BlobList sourceBlobs(getPool()); + sourceBlobs.resize(orgFormat->fmt_count); + for (USHORT id = 0; id < orgFormat->fmt_count; id++) + { + dsc desc; + if (DTYPE_IS_BLOB(orgFormat->fmt_desc[id].dsc_dtype) && + EVL_field(NULL, orgRecord, id, &desc)) + { + const auto source = (bid*) desc.dsc_address; + + if (!source->isEmpty()) + sourceBlobs[id] = *source; + } + } + + index_desc idx; + const auto indexed = lookupRecord(tdbb, relation, orgRecord, m_bitmap, idx); + + bool found = false; + AutoPtr cleanup; + + if (m_bitmap->getFirst()) + { + record_param tempRpb = orgRpb; + tempRpb.rpb_record = NULL; + + do { + tempRpb.rpb_number.setValue(m_bitmap->current()); + + if (VIO_get(tdbb, &tempRpb, transaction, m_request->req_pool) && + (!indexed || compareKey(tdbb, relation, idx, orgRecord, tempRpb.rpb_record))) + { + if (found) + raiseError("Record in table %s is ambiguously identified using the primary/unique key", relName.c_str()); + + orgRpb = tempRpb; + found = true; + } + } while (m_bitmap->getNext()); + + cleanup = tempRpb.rpb_record; + } + + const auto newFormat = findFormat(tdbb, relation, newLength); + + record_param newRpb; + newRpb.rpb_relation = relation; + + newRpb.rpb_record = NULL; + AutoPtr newRecord(VIO_record(tdbb, &newRpb, newFormat, m_request->req_pool)); + + newRpb.rpb_format_number = newFormat->fmt_version; + newRpb.rpb_address = newRecord->getData(); + newRpb.rpb_length = newLength; + newRecord->copyDataFrom(newData); + + if (found) + { + doUpdate(tdbb, &orgRpb, &newRpb, transaction, &sourceBlobs); + } + else + { +#ifdef RESOLVE_CONFLICTS + logWarning("Record being updated in table %s does not exist, inserting instead", relName.c_str()); + doInsert(tdbb, &newRpb, transaction); +#else + raiseError("Record in table %s cannot be located via the primary/unique key", relName.c_str()); +#endif + } +} + +void Applier::deleteRecord(thread_db* tdbb, TraNumber traNum, + const MetaName& relName, + ULONG length, const UCHAR* data) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction, m_request); + + TRA_attach_request(transaction, m_request); + + const auto relation = MET_lookup_relation(tdbb, relName); + if (!relation) + raiseError("Table %s is not found", relName.c_str()); + + if (!(relation->rel_flags & REL_scanned)) + MET_scan_relation(tdbb, relation); + + const auto format = findFormat(tdbb, relation, length); + + record_param rpb; + rpb.rpb_relation = relation; + + rpb.rpb_record = m_record; + const auto record = m_record = + VIO_record(tdbb, &rpb, format, m_request->req_pool); + + rpb.rpb_format_number = format->fmt_version; + rpb.rpb_address = record->getData(); + rpb.rpb_length = length; + record->copyDataFrom(data); + + index_desc idx; + const bool indexed = lookupRecord(tdbb, relation, record, m_bitmap, idx); + + bool found = false; + AutoPtr cleanup; + + if (m_bitmap->getFirst()) + { + record_param tempRpb = rpb; + tempRpb.rpb_record = NULL; + + do { + tempRpb.rpb_number.setValue(m_bitmap->current()); + + if (VIO_get(tdbb, &tempRpb, transaction, m_request->req_pool) && + (!indexed || compareKey(tdbb, relation, idx, record, tempRpb.rpb_record))) + { + if (found) + raiseError("Record in table %s is ambiguously identified using the primary/unique key", relName.c_str()); + + rpb = tempRpb; + found = true; + } + } while (m_bitmap->getNext()); + + cleanup = tempRpb.rpb_record; + } + + if (found) + { + doDelete(tdbb, &rpb, transaction); + } + else + { +#ifdef RESOLVE_CONFLICTS + logWarning("Record being deleted from table %s does not exist, ignoring", relName.c_str()); +#else + raiseError("Record in table %s cannot be located via the primary/unique key", relName.c_str()); +#endif + } +} + +void Applier::setSequence(thread_db* tdbb, const MetaName& genName, SINT64 value) +{ + const auto attachment = tdbb->getAttachment(); + + auto gen_id = attachment->att_generators.lookup(genName); + + if (gen_id < 0) + { + gen_id = MET_lookup_generator(tdbb, genName); + + if (gen_id < 0) + raiseError("Generator %s is not found", genName.c_str()); + + attachment->att_generators.store(gen_id, genName); + } + + if (DPM_gen_id(tdbb, gen_id, false, 0) < value) + DPM_gen_id(tdbb, gen_id, true, value); +} + +void Applier::storeBlob(thread_db* tdbb, TraNumber traNum, bid* blobId, + ULONG length, const UCHAR* data) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + LocalThreadContext context(tdbb, transaction); + + const auto orgBlobId = blobId->get_permanent_number().getValue(); + + const auto blob = blb::create(tdbb, transaction, blobId); + blob->BLB_put_data(tdbb, data, length); + blob->BLB_close(tdbb); + + transaction->tra_repl_blobs.put(orgBlobId, blobId->bid_temp_id()); +} + +void Applier::executeSql(thread_db* tdbb, + TraNumber traNum, + const string& sql, + const MetaName& owner) +{ + jrd_tra* transaction = NULL; + if (!m_txnMap.get(traNum, transaction)) + raiseError("Transaction %" SQUADFORMAT" is not found", traNum); + + const auto dbb = tdbb->getDatabase(); + const auto attachment = transaction->tra_attachment; + + LocalThreadContext context(tdbb, transaction); + + const auto dialect = + (dbb->dbb_flags & DBB_DB_SQL_dialect_3) ? SQL_DIALECT_V6 : SQL_DIALECT_V5; + + UserId user(*attachment->att_user); + user.setUserName(owner); + + AutoSetRestore autoOwner(&attachment->att_user, &user); + + DSQL_execute_immediate(tdbb, attachment, &transaction, + 0, sql.c_str(), dialect, + NULL, NULL, NULL, NULL, false); +} + +bool Applier::lookupKey(thread_db* tdbb, jrd_rel* relation, index_desc& key) +{ + RelationPages* const relPages = relation->getPages(tdbb); + auto page = relPages->rel_index_root; + if (!page) + { + DPM_scan_pages(tdbb); + page = relPages->rel_index_root; + } + + const PageNumber root_page(relPages->rel_pg_space_id, page); + win window(root_page); + const auto root = (index_root_page*) CCH_FETCH(tdbb, &window, LCK_read, pag_root); + + index_desc idx; + idx.idx_id = key.idx_id = idx_invalid; + + for (USHORT i = 0; i < root->irt_count; i++) + { + if (BTR_description(tdbb, relation, root, &idx, i)) + { + if (idx.idx_flags & idx_primary) + { + key = idx; + break; + } + + if (idx.idx_flags & idx_unique) + { + if ((key.idx_id == idx_invalid) || (idx.idx_count < key.idx_count)) + { + key = idx; + } + } + } + } + + CCH_RELEASE(tdbb, &window); + + return (key.idx_id != idx_invalid); +} + +bool Applier::compareKey(thread_db* tdbb, jrd_rel* relation, const index_desc& idx, + Record* record1, Record* record2) +{ + bool equal = true; + + for (USHORT i = 0; i < idx.idx_count; i++) + { + const auto field_id = idx.idx_rpt[i].idx_field; + + dsc desc1, desc2; + + const bool null1 = !EVL_field(relation, record1, field_id, &desc1); + const bool null2 = !EVL_field(relation, record2, field_id, &desc2); + + if (null1 != null2 || (!null1 && MOV_compare(tdbb, &desc1, &desc2))) + { + equal = false; + break; + } + } + + return equal; +} + +bool Applier::lookupRecord(thread_db* tdbb, + jrd_rel* relation, Record* record, + RecordBitmap* bitmap, + index_desc& idx) +{ + RecordBitmap::reset(bitmap); + + // Special case: RDB$DATABASE has no keys but it's guaranteed to have only one record + if (relation->rel_id == rel_database) + { + bitmap->set(0); + return false; + } + + if (lookupKey(tdbb, relation, idx)) + { + temporary_key key; + const auto result = BTR_key(tdbb, relation, record, &idx, &key, NULL, false); + if (result != idx_e_ok) + { + IndexErrorContext context(relation, &idx); + context.raise(tdbb, result, record); + } + + IndexRetrieval retrieval(relation, &idx, idx.idx_count, &key); + retrieval.irb_generic = irb_equality | (idx.idx_flags & idx_descending ? irb_descending : 0); + + BTR_evaluate(tdbb, &retrieval, &bitmap, NULL); + return true; + } + + NoKeyTable* table = NULL; + + for (size_t i = 0; i < FB_NELEM(NO_KEY_TABLES); i++) + { + const auto tab = &NO_KEY_TABLES[i]; + + if (tab->rel_id == relation->rel_id) + { + table = tab; + break; + } + } + + if (!table) + raiseError("Table %s has no unique key", relation->rel_name.c_str()); + + const auto transaction = tdbb->getTransaction(); + + RLCK_reserve_relation(tdbb, transaction, relation, false); + + record_param rpb; + rpb.rpb_relation = relation; + rpb.rpb_number.setValue(BOF_NUMBER); + + while (VIO_next_record(tdbb, &rpb, transaction, m_request->req_pool, false)) + { + const auto seq_record = rpb.rpb_record; + fb_assert(seq_record); + + bool matched = true; + + for (size_t i = 0; i < FB_NELEM(table->rel_fields); i++) + { + const USHORT field_id = table->rel_fields[i]; + + if (field_id == MAX_USHORT) + break; + + dsc desc1, desc2; + + const bool null1 = !EVL_field(relation, record, field_id, &desc1); + const bool null2 = !EVL_field(relation, seq_record, field_id, &desc2); + + if (null1 != null2 || !null1 && MOV_compare(tdbb, &desc1, &desc2)) + { + matched = false; + break; + } + } + + if (matched) + bitmap->set(rpb.rpb_number.getValue()); + } + + delete rpb.rpb_record; + return false; +} + +const Format* Applier::findFormat(thread_db* tdbb, jrd_rel* relation, ULONG length) +{ + auto format = MET_current(tdbb, relation); + + while (format->fmt_length != length && format->fmt_version) + format = MET_format(tdbb, relation, format->fmt_version - 1); + + if (format->fmt_length != length) + { + raiseError("Record format with length %u is not found for table %s", + length, relation->rel_name.c_str()); + } + + return format; +} + +void Applier::doInsert(thread_db* tdbb, record_param* rpb, jrd_tra* transaction) +{ + fb_assert(!(transaction->tra_flags & TRA_system)); + + const auto record = rpb->rpb_record; + const auto format = record->getFormat(); + const auto relation = rpb->rpb_relation; + + RLCK_reserve_relation(tdbb, transaction, relation, true); + + for (USHORT id = 0; id < format->fmt_count; id++) + { + dsc desc; + if (DTYPE_IS_BLOB(format->fmt_desc[id].dsc_dtype) && + EVL_field(NULL, record, id, &desc)) + { + const auto blobId = (bid*) desc.dsc_address; + + if (!blobId->isEmpty()) + { + bool found = false; + + const auto numericId = blobId->get_permanent_number().getValue(); + + ReplBlobMap::Accessor accessor(&transaction->tra_repl_blobs); + if (accessor.locate(numericId) && + transaction->tra_blobs->locate(accessor.current()->second)) + { + const auto current = &transaction->tra_blobs->current(); + + if (!current->bli_materialized) + { + const auto blob = current->bli_blob_object; + fb_assert(blob); + blob->blb_relation = relation; + blob->blb_sub_type = desc.getBlobSubType(); + blob->blb_charset = desc.getCharSet(); + blobId->set_permanent(relation->rel_id, DPM_store_blob(tdbb, blob, record)); + current->bli_materialized = true; + current->bli_blob_id = *blobId; + transaction->tra_blobs->fastRemove(); + accessor.fastRemove(); + found = true; + } + } + + if (!found) + { + const ULONG num1 = blobId->bid_quad.bid_quad_high; + const ULONG num2 = blobId->bid_quad.bid_quad_low; + raiseError("Blob %u.%u is not found for table %s", + num1, num2, relation->rel_name.c_str()); + } + } + } + } + + Savepoint::ChangeMarker marker(transaction->tra_save_point); + + VIO_store(tdbb, rpb, transaction); + IDX_store(tdbb, rpb, transaction); + REPL_store(tdbb, rpb, transaction); +} + +void Applier::doUpdate(thread_db* tdbb, record_param* orgRpb, record_param* newRpb, + jrd_tra* transaction, BlobList* blobs) +{ + fb_assert(!(transaction->tra_flags & TRA_system)); + + const auto orgRecord = orgRpb->rpb_record; + const auto newRecord = newRpb->rpb_record; + const auto format = newRecord->getFormat(); + const auto relation = newRpb->rpb_relation; + + RLCK_reserve_relation(tdbb, transaction, relation, true); + + for (USHORT id = 0; id < format->fmt_count; id++) + { + dsc desc; + if (DTYPE_IS_BLOB(format->fmt_desc[id].dsc_dtype) && + EVL_field(NULL, newRecord, id, &desc)) + { + const auto dstBlobId = (bid*) desc.dsc_address; + const auto srcBlobId = (blobs && id < blobs->getCount()) ? (bid*) &(*blobs)[id] : NULL; + + if (!dstBlobId->isEmpty()) + { + const bool same_blobs = (srcBlobId && *srcBlobId == *dstBlobId); + + if (same_blobs) + { + if (EVL_field(NULL, orgRecord, id, &desc)) + *dstBlobId = *(bid*) desc.dsc_address; + else + dstBlobId->clear(); + } + else + { + bool found = false; + + const auto numericId = dstBlobId->get_permanent_number().getValue(); + + ReplBlobMap::Accessor accessor(&transaction->tra_repl_blobs); + if (accessor.locate(numericId) && + transaction->tra_blobs->locate(accessor.current()->second)) + { + const auto current = &transaction->tra_blobs->current(); + + if (!current->bli_materialized) + { + const auto blob = current->bli_blob_object; + fb_assert(blob); + blob->blb_relation = relation; + blob->blb_sub_type = desc.getBlobSubType(); + blob->blb_charset = desc.getCharSet(); + dstBlobId->set_permanent(relation->rel_id, DPM_store_blob(tdbb, blob, newRecord)); + current->bli_materialized = true; + current->bli_blob_id = *dstBlobId; + transaction->tra_blobs->fastRemove(); + accessor.fastRemove(); + found = true; + } + } + + if (!found) + { + const ULONG num1 = dstBlobId->bid_quad.bid_quad_high; + const ULONG num2 = dstBlobId->bid_quad.bid_quad_low; + raiseError("Blob %u.%u is not found for table %s", + num1, num2, relation->rel_name.c_str()); + } + } + } + } + } + + Savepoint::ChangeMarker marker(transaction->tra_save_point); + + VIO_modify(tdbb, orgRpb, newRpb, transaction); + IDX_modify(tdbb, orgRpb, newRpb, transaction); + REPL_modify(tdbb, orgRpb, newRpb, transaction); +} + +void Applier::doDelete(thread_db* tdbb, record_param* rpb, jrd_tra* transaction) +{ + fb_assert(!(transaction->tra_flags & TRA_system)); + + RLCK_reserve_relation(tdbb, transaction, rpb->rpb_relation, true); + + Savepoint::ChangeMarker marker(transaction->tra_save_point); + + VIO_erase(tdbb, rpb, transaction); + REPL_erase(tdbb, rpb, transaction); +} + +void Applier::logMessage(const string& message, LogMsgType type) +{ + logReplicaMessage(m_database, message, type); +} + +void Applier::logWarning(const char* msg, ...) +{ +#ifdef LOG_WARNINGS + char buffer[BUFFER_LARGE]; + + va_list ptr; + va_start(ptr, msg); + vsprintf(buffer, msg, ptr); + va_end(ptr); + + logMessage(buffer, WARNING_MSG); +#endif +} + +void Applier::postError(FbStatusVector* status, const Exception& ex) +{ + FbLocalStatus temp_status; + ex.stuffException(&temp_status); + + string message; + + char temp[BUFFER_LARGE]; + const ISC_STATUS* temp_status_ptr = temp_status->getErrors(); + while (fb_interpret(temp, sizeof(temp), &temp_status_ptr)) + { + if (!message.isEmpty()) + message += "\n\t"; + + message += temp; + } + + logMessage(message, ERROR_MSG); + + Arg::StatusVector org_error(&temp_status); + Arg::StatusVector new_error; + new_error << Arg::Gds(isc_random) << Arg::Str("Replication error"); + new_error.append(org_error); + new_error.copyTo(status); +} diff --git a/src/jrd/replication/Applier.h b/src/jrd/replication/Applier.h new file mode 100644 index 0000000000..28c4b5235f --- /dev/null +++ b/src/jrd/replication/Applier.h @@ -0,0 +1,193 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_APPLIER_H +#define JRD_REPLICATION_APPLIER_H + +#include "../common/classes/array.h" +#include "../common/classes/GenericMap.h" +#include "../jrd/jrd.h" +#include "../jrd/tra.h" + +#include "Utils.h" + +namespace Jrd +{ + class Applier : private Firebird::PermanentStorage + { + typedef Firebird::GenericMap > > TransactionMap; + typedef Firebird::HalfStaticArray BlobList; +/* + class ReplicatedTransaction : public Firebird::IReplicatedTransaction + { + public: + // IDispose methods + + void dispose(); + + // IReplicatedTransaction methods + + bool prepare() + { + return m_applier->prepareTransaction(this); + } + + bool commit() + { + return m_applier->commitTransaction(this); + } + + bool rollback() + { + return m_applier->rollbackTransaction(this); + } + + bool startSavepoint() + { + return m_applier->startSavepoint(this); + } + + bool releaseSavepoint() + { + return m_applier->releaseSavepoint(this); + } + + bool rollbackSavepoint() + { + return m_applier->rollbackSavepoint(this); + } + + bool insertRecord(const char* name, + Firebird::IReplicatedRecord* record) + { + return m_applier->insertRecord(this, name, record); + } + + bool updateRecord(const char* name, + Firebird::IReplicatedRecord* orgRecord, + Firebird::IReplicatedRecord* newRecord) + { + return m_applier->updateRecord(this, name, orgRecord, newRecord); + } + + bool deleteRecord(const char* name, + Firebird::IReplicatedRecord* record) + { + return m_applier->insertRecord(this, name, record); + } + + bool storeBlob(ISC_QUAD blobId, Firebird::IReplicatedBlob* blob) + { + return m_applier->storeBlob(this, blobId, blob); + } + + bool executeSql(const char* sql) + { + return m_applier->executeSql(this, sql); + } + + // Construstor + + ReplicatedTransaction(Applier* applier, jrd_tra* transaction) + : m_applier(applier), m_transaction(transaction) + {} + + private: + Applier* const m_applier; + jrd_tra* const m_transaction; + }; +*/ + public: + Applier(Firebird::MemoryPool& pool, + const Firebird::PathName& database, + Jrd::jrd_req* request) + : PermanentStorage(pool), + m_txnMap(pool), m_database(pool, database), + m_request(request), m_bitmap(FB_NEW_POOL(pool) RecordBitmap(pool)), m_record(NULL) + {} + + static Applier* create(thread_db* tdbb); + + void process(thread_db* tdbb, ULONG length, const UCHAR* data); + + void shutdown(thread_db* tdbb); + + private: + TransactionMap m_txnMap; + const Firebird::PathName m_database; + jrd_req* m_request; + Firebird::AutoPtr m_bitmap; + Record* m_record; + + void startTransaction(thread_db* tdbb, TraNumber traNum); + void prepareTransaction(thread_db* tdbb, TraNumber traNum); + void commitTransaction(thread_db* tdbb, TraNumber traNum); + void rollbackTransaction(thread_db* tdbb, TraNumber traNum, bool cleanup); + + void startSavepoint(thread_db* tdbb, TraNumber traNum); + void cleanupSavepoint(thread_db* tdbb, TraNumber traNum, bool undo); + + void insertRecord(thread_db* tdbb, TraNumber traNum, + const Firebird::MetaName& relName, + ULONG length, const UCHAR* data); + void updateRecord(thread_db* tdbb, TraNumber traNum, + const Firebird::MetaName& relName, + ULONG orgLength, const UCHAR* orgData, + ULONG newLength, const UCHAR* newData); + void deleteRecord(thread_db* tdbb, TraNumber traNum, + const Firebird::MetaName& relName, + ULONG length, const UCHAR* data); + + void setSequence(thread_db* tdbb, const Firebird::MetaName& genName, SINT64 value); + + void storeBlob(thread_db* tdbb, TraNumber traNum, bid* blob_id, + ULONG length, const UCHAR* data); + + void executeSql(thread_db* tdbb, TraNumber traNum, + const Firebird::string& sql, + const Firebird::MetaName& owner); + + bool lookupKey(thread_db* tdbb, jrd_rel* relation, index_desc& idx); + bool compareKey(thread_db* tdbb, jrd_rel* relation, + const index_desc& idx, + Record* record1, Record* record2); + bool lookupRecord(thread_db* tdbb, jrd_rel* relation, + Record* record, RecordBitmap* bitmap, + index_desc& idx); + + const Format* findFormat(thread_db* tdbb, jrd_rel* relation, ULONG length); + + void doInsert(thread_db* tdbb, record_param* rpb, + jrd_tra* transaction); + void doUpdate(thread_db* tdbb, record_param* org_rpb, record_param* new_rpb, + jrd_tra* transaction, BlobList* blobs); + void doDelete(thread_db* tdbb, record_param* rpb, + jrd_tra* transaction); + + void logMessage(const Firebird::string& message, Replication::LogMsgType type); + void logWarning(const char* msg, ...); + void postError(FbStatusVector* status, const Firebird::Exception& ex); + }; +} + +#endif // JRD_REPLICATION_APPLIER_H diff --git a/src/jrd/replication/ChangeLog.cpp b/src/jrd/replication/ChangeLog.cpp new file mode 100644 index 0000000000..b6052dc207 --- /dev/null +++ b/src/jrd/replication/ChangeLog.cpp @@ -0,0 +1,990 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../common/classes/alloc.h" +#include "../common/classes/auto.h" +#include "../common/isc_proto.h" +#include "../common/isc_s_proto.h" +#include "../common/os/os_utils.h" +#include "../common/os/path_utils.h" +#include "../jrd/jrd.h" + +#include "Config.h" +#include "ChangeLog.h" +#include "Replicator.h" +#include "Utils.h" + +#include +#include + +#ifdef HAVE_MMAP +#include +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef WIN_NT +#include +#include +#include +#endif + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +using namespace Firebird; +using namespace Jrd; +using namespace Replication; + +#if !defined(WIN_NT) && !defined(LINUX) +#pragma FB_COMPILER_MESSAGE("Add support for your platform!") +#endif + +namespace +{ + const char* SHMEM_FILE = "fb_repl_%s"; + const ULONG SHMEM_VERSION = 1; + + const unsigned FLUSH_WAIT_INTERVAL = 1; // milliseconds + + const unsigned NO_SPACE_TIMEOUT = 10; // seconds + const unsigned NO_SPACE_RETRIES = 6; // up to one minute + + const unsigned COPY_BLOCK_SIZE = 64 * 1024; // 64 KB + + const char* LOGFILE_PATTERN = "%s.chlog-%09" UQUADFORMAT; + + const char* LOGFILENAME_WILDCARD = "$(logfilename)"; + const char* LOGPATHNAME_WILDCARD = "$(logpathname)"; + const char* ARCHPATHNAME_WILDCARD = "$(archpathname)"; + + SegmentHeader g_dummyHeader; + + static THREAD_ENTRY_DECLARE archiver_thread(THREAD_ENTRY_PARAM arg) + { + ChangeLog* const log = static_cast(arg); + log->bgArchiver(); + return 0; + } + + void raiseIOError(const char* syscall, const char* filename) + { + Arg::Gds temp(isc_io_error); + temp << Arg::Str(syscall); + temp << Arg::Str(filename); + temp << SYS_ERR(ERRNO); + temp.raise(); + } +} + + +// ChangeLog::Segment class implementation + +ChangeLog::Segment::Segment(MemoryPool& pool, const PathName& filename, int handle) + : m_filename(pool, filename), m_handle(handle) +{ + struct stat stats; + if (fstat(m_handle, &stats) < 0 || stats.st_size < (int) sizeof(SegmentHeader)) + { + m_header = &g_dummyHeader; + return; + } + + mapHeader(); +} + +ChangeLog::Segment::~Segment() +{ + if (m_header != &g_dummyHeader) + unmapHeader(); + + ::close(m_handle); +} + +void ChangeLog::Segment::init(FB_UINT64 sequence, const Guid& guid) +{ + fb_assert(sizeof(LOG_SIGNATURE) == sizeof(m_header->hdr_signature)); + strcpy(m_header->hdr_signature, LOG_SIGNATURE); + m_header->hdr_version = LOG_CURRENT_VERSION; + m_header->hdr_state = SEGMENT_STATE_USED; + memcpy(&m_header->hdr_guid, &guid, sizeof(Guid)); + m_header->hdr_protocol = PROTOCOL_CURRENT_VERSION; + m_header->hdr_sequence = sequence; + m_header->hdr_length = sizeof(SegmentHeader); + + flush(false); +} + +bool ChangeLog::Segment::validate(const Guid& guid) const +{ + if (strcmp(m_header->hdr_signature, LOG_SIGNATURE)) + return false; + + if (m_header->hdr_version != LOG_CURRENT_VERSION) + return false; + + if (m_header->hdr_state != SEGMENT_STATE_FREE && + m_header->hdr_state != SEGMENT_STATE_USED && + m_header->hdr_state != SEGMENT_STATE_FULL && + m_header->hdr_state != SEGMENT_STATE_ARCH) + { + return false; + } + + if (memcmp(&m_header->hdr_guid, &guid, sizeof(Guid))) + return false; + + if (m_header->hdr_protocol != PROTOCOL_VERSION1) + return false; + + return true; +} + +void ChangeLog::Segment::copyTo(const PathName& filename) const +{ + if (::lseek(m_handle, 0, SEEK_SET) != 0) + raiseIOError("seek", m_filename.c_str()); + + const auto totalLength = m_header->hdr_length; + fb_assert(totalLength > sizeof(SegmentHeader)); + + const auto dstHandle = os_utils::openCreateSharedFile(filename.c_str(), O_TRUNC | O_BINARY); + + AutoFile dstFile(dstHandle); + + Vector buffer; + const auto data = buffer.begin(); + + for (ULONG offset = 0; offset < totalLength; offset += COPY_BLOCK_SIZE) + { + const auto remaining = totalLength - offset; + const auto length = MIN(remaining, COPY_BLOCK_SIZE); + + if (::read(m_handle, data, length) != length) + { + dstFile.release(); + unlink(filename.c_str()); + raiseIOError("read", m_filename.c_str()); + } + + if (::write(dstFile, data, length) != length) + { + dstFile.release(); + unlink(filename.c_str()); + raiseIOError("write", filename.c_str()); + } + } +} + +void ChangeLog::Segment::append(ULONG length, const UCHAR* data) +{ + fb_assert(m_header->hdr_state == SEGMENT_STATE_USED); + fb_assert(length); + + const auto currentLength = m_header->hdr_length; + + if (::lseek(m_handle, currentLength, SEEK_SET) != currentLength) + raiseError("Log file %s seek failed (error %d)", m_filename.c_str(), ERRNO); + + if (::write(m_handle, data, length) != length) + raiseError("Log file %s write failed (error %d)", m_filename.c_str(), ERRNO); + + m_header->hdr_length += length; +} + +void ChangeLog::Segment::setState(SegmentState state) +{ + const auto full = (state == SEGMENT_STATE_FULL); + m_header->hdr_state = state; + flush(full); +} + +void ChangeLog::Segment::truncate() +{ + const auto length = m_header->hdr_length; + + unmapHeader(); + +#ifdef WIN_NT + chsize(m_handle, length); +#else + ftruncate(m_handle, length); +#endif + + mapHeader(); +} + +void ChangeLog::Segment::flush(bool data) +{ + if (data) + { +#ifdef WIN_NT + FlushFileBuffers((HANDLE) _get_osfhandle(m_handle)); +#else + fsync(m_handle); +#endif + } + +#ifdef WIN_NT + FlushViewOfFile(m_header, 0); +#else + msync(m_header, sizeof(SegmentHeader), MS_SYNC); +#endif +} + +void ChangeLog::Segment::mapHeader() +{ +#ifdef WIN_NT + m_mapping = CreateFileMapping((HANDLE) _get_osfhandle(m_handle), NULL, PAGE_READWRITE, + 0, sizeof(SegmentHeader), NULL); + + if (m_mapping == INVALID_HANDLE_VALUE) + raiseError("Log file %s mapping failed (error %d)", m_filename.c_str(), ERRNO); + + auto address = MapViewOfFile(m_mapping, FILE_MAP_READ | FILE_MAP_WRITE, + 0, 0, sizeof(SegmentHeader)); + + if (!address) + raiseError("Log file %s mapping failed (error %d)", m_filename.c_str(), ERRNO); +#else + auto address = mmap(NULL, sizeof(SegmentHeader), PROT_READ | PROT_WRITE, MAP_SHARED, m_handle, 0); + + if (address == MAP_FAILED) + raiseError("Log file %s mapping failed (error %d)", m_filename.c_str(), ERRNO); +#endif + + m_header = (SegmentHeader*) address; +} + +void ChangeLog::Segment::unmapHeader() +{ +#ifdef WIN_NT + UnmapViewOfFile(m_header); + CloseHandle(m_mapping); + m_mapping = INVALID_HANDLE_VALUE; +#else + munmap(m_header, sizeof(SegmentHeader)); +#endif + + m_header = NULL; +} + +PathName ChangeLog::Segment::getFileName() const +{ + PathName directory, filename; + PathUtils::splitLastComponent(directory, filename, m_filename); + + return filename; +} + + +// ChangeLog class implementation + +ChangeLog::ChangeLog(MemoryPool& pool, + const string& dbId, + const PathName& database, + const Guid& guid, + const FB_UINT64 sequence, + const Replication::Config* config) + : PermanentStorage(pool), + m_database(pool, database), m_config(config), + m_segments(pool), m_sequence(sequence), m_shutdown(false) +{ + memcpy(&m_guid, &guid, sizeof(Guid)); + + PathName filename; + filename.printf(SHMEM_FILE, dbId.c_str()); + + FB_NEW_POOL(pool) SharedMemory(filename.c_str(), STATE_MAPPING_SIZE, this); + + { // scope + LockGuard guard(this); + + // If the server crashes while archiving, segments may remain in the ARCH state forever. + // This code allows to recover their state and retry archiving them. + + const auto state = m_state->getHeader(); + + if (!state->pidUpper) + { + fb_assert(!state->pidLower); + + for (const auto segment : m_segments) + { + if (segment->getState() == SEGMENT_STATE_ARCH) + segment->setState(SEGMENT_STATE_FULL); + } + } + + linkSelf(); + } + + Thread::start(archiver_thread, this, THREAD_medium, 0); + m_startupSemaphore.enter(); + m_workingSemaphore.release(); +} + +ChangeLog::~ChangeLog() +{ + m_shutdown = true; + + m_workingSemaphore.release(); + m_cleanupSemaphore.enter(); + + try + { + LockGuard guard(this); + + if (unlinkSelf()) + { + switchActiveSegment(); + + for (const auto segment : m_segments) + { + if (segment->getState() == SEGMENT_STATE_FULL) + archiveSegment(segment); + } + } + } + catch (const Exception&) + {} // no-op + + clearSegments(); + + if (m_state.hasData() && m_state->getHeader()) + delete m_state.release(); +} + +void ChangeLog::lockState() +{ + auto blockage = false; + + if (!m_state->mutexLockCond()) + { + blockage = true; + m_state->mutexLock(); + } + + try + { + const auto state = m_state->getHeader(); + + state->lockAcquires++; + if (blockage) + state->lockBlocks++; + + if (m_segments.isEmpty() || state->segmentCount > m_segments.getCount()) + initSegments(); + } + catch (Exception&) + { + unlockState(); + throw; + } +} + +void ChangeLog::unlockState() +{ + m_state->mutexUnlock(); +} + +void ChangeLog::linkSelf() +{ + static const auto process_id = getpid(); + + const auto state = m_state->getHeader(); + + fb_assert(state->pidLower <= PID_CAPACITY); + fb_assert(state->pidUpper <= PID_CAPACITY); + + fb_assert(state->pidLower <= state->pidUpper); + + if (state->pidLower == state->pidUpper) + { + if (state->pidUpper == PID_CAPACITY) + { + for (ULONG i = 0; i < state->pidUpper; i++) + { + fb_assert(state->pids[i]); + + if (!state->pids[i] || // being a bit paranoid doesn't hurt + state->pids[i] == process_id || + !ISC_check_process_existence(state->pids[i])) + { + state->pids[i] = process_id; + return; + } + } + + status_exception::raise(Arg::Gds(isc_imp_exc)); + } + + state->pids[state->pidUpper++] = process_id; + state->pidLower = state->pidUpper; + } + else + { + if (state->pidLower == PID_CAPACITY) // safety check + status_exception::raise(Arg::Gds(isc_imp_exc)); + + fb_assert(!state->pids[state->pidLower]); + state->pids[state->pidLower] = process_id; + + while (++state->pidLower < state->pidUpper) + { + if (!state->pids[state->pidLower]) + break; + } + } +} + +bool ChangeLog::unlinkSelf() +{ + static const auto process_id = getpid(); + + const auto state = m_state->getHeader(); + + fb_assert(state->pidLower <= PID_CAPACITY); + fb_assert(state->pidUpper <= PID_CAPACITY); + + fb_assert(state->pidLower <= state->pidUpper); + fb_assert(state->pidUpper > 0); + + for (ULONG i = 0; i < state->pidUpper; i++) + { + if (state->pids[i]) + { + if (state->pids[i] == process_id) + { + state->pids[i] = 0; + state->pidLower = MIN(state->pidLower, i); + + if (i == state->pidUpper - 1) + { + while (state->pidUpper && !state->pids[state->pidUpper - 1]) + state->pidUpper--; + } + + break; + } + } + else if (i < state->pidLower) + { + state->pidLower = i; + } + } + + return (state->pidUpper == 0); +} + +bool ChangeLog::initialize(SharedMemoryBase* shmem, bool init) +{ + m_state.reset(reinterpret_cast*>(shmem)); + + if (init) + { + const auto state = m_state->getHeader(); + memset(state, 0, sizeof(State)); + + state->init(SharedMemoryBase::SRAM_CHANGELOG_STATE, SHMEM_VERSION); + + state->timestamp = time(NULL); + state->sequence = m_sequence; + } + + return true; +} + +void ChangeLog::mutexBug(int osErrorCode, const char* /*text*/) +{ + raiseError("Shared memory locking failed (error %d)", osErrorCode); +} + +void ChangeLog::forceSwitch() +{ + LockGuard guard(this); + + switchActiveSegment(); +} + +FB_UINT64 ChangeLog::write(ULONG length, const UCHAR* data, bool sync) +{ + LockGuard guard(this); + + auto segment = getSegment(length); + + for (unsigned i = 0; i < NO_SPACE_RETRIES && !segment; i++) + { + if (i == 0) // log the warning just once + { + const string warningMsg = + "Out of available space in changelog segments, waiting for archiving..."; + + logOriginMessage(m_database, warningMsg, WARNING_MSG); + } + + { // scope + LockCheckout checkout(this); + Thread::sleep(NO_SPACE_TIMEOUT); + } + + segment = getSegment(length); + } + + if (!segment) + raiseError("Out of available space in changelog segments"); + + const auto state = m_state->getHeader(); + if (segment->getLength() == sizeof(SegmentHeader)) + state->timestamp = time(NULL); + + segment->append(length, data); + + if (sync) + { + if (m_config->logGroupFlushDelay) + { + const auto flushMark = state->flushMark; + + segment->addRef(); + + for (ULONG delay = 0; delay < m_config->logGroupFlushDelay; + delay += FLUSH_WAIT_INTERVAL) + { + if (state->flushMark != flushMark) + break; + + LockCheckout checkout(this); + Thread::sleep(FLUSH_WAIT_INTERVAL); + } + + if (state->flushMark == flushMark) + { + segment->flush(true); + state->flushMark++; + } + + segment->release(); + } + else + { + segment->flush(true); + state->flushMark++; + } + } + + return state->sequence; +} + +bool ChangeLog::archiveExecute(Segment* segment) +{ + if (m_config->logArchiveCommand.hasData()) + { + segment->truncate(); + + auto archiveCommand = m_config->logArchiveCommand; + + const auto logfilename = segment->getFileName(); + const auto logpathname = m_config->logDirectory + logfilename; + + const auto archpathname = m_config->logArchiveDirectory.hasData() ? + m_config->logArchiveDirectory + logfilename : ""; + + size_t pos; + + while ( (pos = archiveCommand.find(LOGFILENAME_WILDCARD)) != string::npos) + archiveCommand.replace(pos, strlen(LOGFILENAME_WILDCARD), logfilename); + + while ( (pos = archiveCommand.find(LOGPATHNAME_WILDCARD)) != string::npos) + archiveCommand.replace(pos, strlen(LOGPATHNAME_WILDCARD), logpathname); + + while ( (pos = archiveCommand.find(ARCHPATHNAME_WILDCARD)) != string::npos) + archiveCommand.replace(pos, strlen(ARCHPATHNAME_WILDCARD), archpathname); + + LockCheckout checkout(this); + + fb_assert(archiveCommand.hasData()); + const auto res = executeShell(archiveCommand); + + if (res) + { + string errorMsg; + + if (res < 0) + { + errorMsg.printf("Cannot execute log archive command (error %d): %s", + ERRNO, archiveCommand.c_str()); + } + else + { + errorMsg.printf("Unexpected result (%d) while executing log archive command: %s", + res, archiveCommand.c_str()); + } + + logOriginMessage(m_database, errorMsg, ERROR_MSG); + return false; + } + } + else if (m_config->logArchiveDirectory.hasData()) + { + const auto logfilename = segment->getFileName(); + const auto archpathname = m_config->logArchiveDirectory + logfilename; + + struct stat statistics; + if (os_utils::stat(archpathname.c_str(), &statistics) == 0) + { + if (statistics.st_size > (int) sizeof(SegmentHeader)) + { + string warningMsg; + warningMsg.printf("Destination log file %s exists, it will be overwritten", + archpathname.c_str()); + + logOriginMessage(m_database, warningMsg, WARNING_MSG); + } + } + + try + { + LockCheckout checkout(this); + + segment->copyTo(archpathname); + } + catch (const status_exception& ex) + { + string errorMsg = "Cannot copy log segment"; + const ISC_STATUS* status = ex.value(); + + TEXT temp[BUFFER_LARGE]; + while (fb_interpret(temp, sizeof(temp), &status)) + { + errorMsg += "\n\t"; + errorMsg += temp; + } + + logOriginMessage(m_database, errorMsg, ERROR_MSG); + return false; + } + catch (...) + { + const string errorMsg = "Cannot copy log segment (reason unknown)"; + logOriginMessage(m_database, errorMsg, ERROR_MSG); + return false; + } + } + + return true; +} + +bool ChangeLog::archiveSegment(Segment* segment) +{ +// if (m_config->logArchiveCommand.hasData() || m_config->logArchiveDirectory.hasData()) + { + segment->setState(SEGMENT_STATE_ARCH); + segment->addRef(); + + const auto success = archiveExecute(segment); + + fb_assert(segment->getState() == SEGMENT_STATE_ARCH); + segment->setState(success ? SEGMENT_STATE_FREE : SEGMENT_STATE_FULL); + segment->release(); + + return success; + } + + return false; +} + +void ChangeLog::switchActiveSegment() +{ + Segment* activeSegment = NULL; + + for (const auto segment : m_segments) + { + const auto segmentState = segment->getState(); + + if (segmentState == SEGMENT_STATE_USED) + { + activeSegment = segment; + break; + } + } + + if (activeSegment) + { + const auto state = m_state->getHeader(); + + activeSegment->setState(SEGMENT_STATE_FULL); + state->flushMark++; + } +} + +void ChangeLog::bgArchiver() +{ + try + { + // Signal about our startup + m_startupSemaphore.release(); + + while (!m_shutdown) + { + LockGuard guard(this); + + const auto state = m_state->getHeader(); + + for (const auto segment : m_segments) + { + if (segment->getState() == SEGMENT_STATE_USED) + { + if (segment->getLength() > sizeof(SegmentHeader) && m_config->logArchiveTimeout) + { + const auto delta_timestamp = time(NULL) - state->timestamp; + + if (delta_timestamp > m_config->logArchiveTimeout) + { + segment->setState(SEGMENT_STATE_FULL); + state->flushMark++; + } + } + + break; + } + } + + while (!m_shutdown) + { + bool restart = false; + + for (const auto segment : m_segments) + { + if (segment->getState() == SEGMENT_STATE_FULL) + { + if (archiveSegment(segment)) + { + restart = true; + break; + } + } + } + + if (!restart) + break; + } + + guard.release(); + + m_workingSemaphore.tryEnter(1); + } + } + catch (const Firebird::Exception& ex) + { + iscLogException("Error in changelog thread", ex); + } + + // Signal about our exit + + try + { + m_cleanupSemaphore.release(); + } + catch (const Firebird::Exception& ex) + { + iscLogException("Error while exiting changelog thread", ex); + } +} + +void ChangeLog::initSegments() +{ + clearSegments(); + + const auto state = m_state->getHeader(); + + for (auto iter = PathUtils::newDirIterator(getPool(), m_config->logDirectory); + *iter; ++(*iter)) + { + const auto filename = **iter; + + const auto fd = os_utils::openCreateSharedFile(filename.c_str(), O_BINARY); + + AutoPtr segment(FB_NEW_POOL(getPool()) Segment(getPool(), filename, fd)); + + if (!validateSegment(segment)) + continue; + + if (segment->getSequence() > state->sequence) + segment->setState(SEGMENT_STATE_FREE); + + segment->addRef(); + m_segments.add(segment.release()); + } + + state->segmentCount = (ULONG) m_segments.getCount(); +} + +void ChangeLog::clearSegments() +{ + while (m_segments.hasData()) + m_segments.pop()->release(); +} + +ChangeLog::Segment* ChangeLog::createSegment() +{ + const auto state = m_state->getHeader(); + const auto sequence = ++state->sequence; + + PathName filename; + filename.printf(LOGFILE_PATTERN, m_config->logFilePrefix.c_str(), sequence); + filename = m_config->logDirectory + filename; + + const auto fd = os_utils::openCreateSharedFile(filename.c_str(), O_EXCL | O_BINARY); + + if (::write(fd, &g_dummyHeader, sizeof(SegmentHeader)) != sizeof(SegmentHeader)) + { + ::close(fd); + raiseError("Log file %s write failed (error %d)", filename.c_str(), ERRNO); + } + + const auto segment = FB_NEW_POOL(getPool()) Segment(getPool(), filename, fd); + + segment->init(sequence, m_guid); + segment->addRef(); + + m_segments.add(segment); + state->segmentCount++; + + return segment; +} + +ChangeLog::Segment* ChangeLog::reuseSegment(ChangeLog::Segment* segment) +{ + // Remove segment from the list + + FB_SIZE_T pos = 0; + if (m_segments.find(segment, pos)) + m_segments.remove(pos); + else + fb_assert(false); + + // Save its original filename + + const PathName orgname = segment->getPathName(); + + // Release the reference (thus destroying the segment) + + segment->release(); + + // Rename the backing file + + const auto state = m_state->getHeader(); + const auto sequence = ++state->sequence; + + PathName newname; + newname.printf(LOGFILE_PATTERN, m_config->logFilePrefix.c_str(), sequence); + newname = m_config->logDirectory + newname; + + if (::rename(orgname.c_str(), newname.c_str()) < 0) + raiseError("Log file %s rename failed (error: %d)", orgname.c_str(), ERRNO); + + // Re-open the segment using a new name and initialize it + + const auto fd = os_utils::openCreateSharedFile(newname.c_str(), O_BINARY); + + segment = FB_NEW_POOL(getPool()) Segment(getPool(), newname, fd); + + segment->init(sequence, m_guid); + segment->addRef(); + + m_segments.add(segment); + + return segment; +} + +ChangeLog::Segment* ChangeLog::getSegment(ULONG length) +{ + Segment* activeSegment = NULL; + Segment* freeSegment = NULL; + + FB_UINT64 minSequence = MAX_UINT64; + + for (const auto segment : m_segments) + { + const auto segmentState = segment->getState(); + const auto segmentSequence = segment->getSequence(); + + if (segmentState == SEGMENT_STATE_USED) + { + if (activeSegment) + raiseError("Multiple active changelog segments found"); + + activeSegment = segment; + } + else if (segmentState == SEGMENT_STATE_FREE) + { + if (!freeSegment || segmentSequence < minSequence) + { + freeSegment = segment; + minSequence = segmentSequence; + } + } + } + + const auto state = m_state->getHeader(); + + if (activeSegment) + { + if (activeSegment->getLength() + length > m_config->logSegmentSize) + { + activeSegment->setState(SEGMENT_STATE_FULL); + state->flushMark++; + activeSegment = NULL; + m_workingSemaphore.release(); + } + else if (activeSegment->getLength() > sizeof(SegmentHeader) && m_config->logArchiveTimeout) + { + const size_t deltaTimestamp = time(NULL) - state->timestamp; + + if (deltaTimestamp > m_config->logArchiveTimeout) + { + activeSegment->setState(SEGMENT_STATE_FULL); + activeSegment = NULL; + m_workingSemaphore.release(); + } + } + } + + if (activeSegment) + return activeSegment; + + if (freeSegment) + return reuseSegment(freeSegment); + + // Allocate one more segment if configuration allows that + + if (!m_config->logSegmentCount || m_segments.getCount() < m_config->logSegmentCount) + return createSegment(); + + return NULL; +} diff --git a/src/jrd/replication/ChangeLog.h b/src/jrd/replication/ChangeLog.h new file mode 100644 index 0000000000..44fafe6ce5 --- /dev/null +++ b/src/jrd/replication/ChangeLog.h @@ -0,0 +1,249 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_CHANGELOG_H +#define JRD_REPLICATION_CHANGELOG_H + +#include "../common/classes/array.h" +#include "../common/classes/semaphore.h" +#include "../common/os/guid.h" +#include "../common/isc_s_proto.h" + +#include "Utils.h" + +namespace Replication +{ + enum SegmentState + { + SEGMENT_STATE_FREE = 0, + SEGMENT_STATE_USED = 1, + SEGMENT_STATE_FULL = 2, + SEGMENT_STATE_ARCH = 3 + }; + + struct SegmentHeader + { + char hdr_signature[12]; + USHORT hdr_version; + USHORT hdr_protocol; + Firebird::Guid hdr_guid; + FB_UINT64 hdr_sequence; + ISC_TIMESTAMP hdr_timestamp; + SegmentState hdr_state; + ULONG hdr_length; + }; + + const char LOG_SIGNATURE[] = "FBCHANGELOG"; + + const USHORT LOG_VERSION_1 = 1; + const USHORT LOG_CURRENT_VERSION = LOG_VERSION_1; + + class ChangeLog : protected Firebird::PermanentStorage, public Firebird::IpcObject + { + // Shared state of the changelog + + struct State : public Firebird::MemoryHeader + { + ULONG version; // changelog version + time_t timestamp; // timestamp of last write + ULONG segmentCount; // number of segments in use + ULONG flushMark; // last flush mark + FB_UINT64 sequence; // sequence number of the last segment + FB_UINT64 lockAcquires; // number of state acquires + FB_UINT64 lockBlocks; // number of blocked state acquires + ULONG pidLower; // Lower boundary mark in the PID array + ULONG pidUpper; // Upper boundary mark in the PID array + int pids[1]; // PIDs attached to the state + }; + + // RAII helper to lock the shared state + + class LockGuard + { + public: + LockGuard(ChangeLog* log) + : m_log(log) + { + m_log->lockState(); + } + + ~LockGuard() + { + if (m_log) + m_log->unlockState(); + } + + void release() + { + if (m_log) + { + m_log->unlockState(); + m_log = NULL; + } + } + + private: + ChangeLog* m_log; + }; + + // RAII helper to unlock the shared state + + class LockCheckout + { + public: + LockCheckout(ChangeLog* log) + : m_log(log) + { + m_log->unlockState(); + } + + ~LockCheckout() + { + m_log->lockState(); + } + + private: + ChangeLog* m_log; + }; + + // Changelog segment (physical file on disk) + + class Segment : public Firebird::RefCounted + { + public: + Segment(MemoryPool& pool, const Firebird::PathName& filename, int handle); + virtual ~Segment(); + + void init(FB_UINT64 sequence, const Firebird::Guid& guid); + bool validate(const Firebird::Guid& guid) const; + void append(ULONG length, const UCHAR* data); + void copyTo(const Firebird::PathName& filename) const; + + bool hasData() const + { + return (m_header->hdr_length > sizeof(SegmentHeader)); + } + + ULONG getLength() const + { + return m_header->hdr_length; + } + + FB_UINT64 getSequence() const + { + return m_header->hdr_sequence; + } + + SegmentState getState() const + { + return m_header->hdr_state; + } + + void setState(SegmentState state); + + void truncate(); + void flush(bool data); + + Firebird::PathName getFileName() const; + + const Firebird::PathName& getPathName() const + { + return m_filename; + } + + private: + void mapHeader(); + void unmapHeader(); + + Firebird::PathName m_filename; + int m_handle; + SegmentHeader* m_header; + + #ifdef WIN_NT + HANDLE m_mapping; + #endif + }; + + // Mapping size (not extendable for the time being) + static const ULONG STATE_MAPPING_SIZE = 64 * 1024; // 64 KB + // Max number of processes accessing the shared state + static const ULONG PID_CAPACITY = (STATE_MAPPING_SIZE - offsetof(State, pids)) / sizeof(int); // ~16K + + public: + ChangeLog(Firebird::MemoryPool& pool, + const Firebird::string& dbId, + const Firebird::PathName& database, + const Firebird::Guid& guid, + const FB_UINT64 sequence, + const Config* config); + virtual ~ChangeLog(); + + void forceSwitch(); + FB_UINT64 write(ULONG length, const UCHAR* data, bool sync); + + void bgArchiver(); + + private: + void lockState(); + void unlockState(); + + void linkSelf(); + bool unlinkSelf(); + + bool initialize(Firebird::SharedMemoryBase* shmem, bool init); + void mutexBug(int osErrorCode, const char* text); + + bool validateSegment(const Segment* segment) + { + return segment->validate(m_guid); + } + + void initSegments(); + void clearSegments(); + + Segment* createSegment(); + Segment* reuseSegment(Segment* segment); + Segment* getSegment(ULONG segment); + + bool archiveExecute(Segment*); + bool archiveSegment(Segment*); + + void switchActiveSegment(); + + const Firebird::PathName m_database; + const Config* const m_config; + Firebird::Array m_segments; + Firebird::AutoPtr > m_state; + Firebird::Guid m_guid; + const FB_UINT64 m_sequence; + + Firebird::Semaphore m_startupSemaphore; + Firebird::Semaphore m_cleanupSemaphore; + Firebird::Semaphore m_workingSemaphore; + + volatile bool m_shutdown; + }; + +}; + +#endif // JRD_REPLICATION_CHANGELOG_H diff --git a/src/jrd/replication/Config.cpp b/src/jrd/replication/Config.cpp new file mode 100644 index 0000000000..9fa44e9d68 --- /dev/null +++ b/src/jrd/replication/Config.cpp @@ -0,0 +1,312 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../common/config/config_file.h" +#include "../common/os/path_utils.h" +#include "../common/isc_f_proto.h" +#include "../common/StatusArg.h" +#include "../jrd/constants.h" + +#include "Config.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include +#include +#ifdef HAVE_SYS_FILE_H +#include +#endif + +#include +#include + +using namespace Firebird; +using namespace Replication; + +namespace +{ + const char* REPLICATION_CFGFILE = "replication.conf"; + + const ULONG DEFAULT_BUFFER_SIZE = 1024 * 1024; // 1 MB + const ULONG DEFAULT_LOG_SEGMENT_SIZE = 16 * 1024 * 1024; // 16 MB + const ULONG DEFAULT_LOG_SEGMENT_COUNT = 8; + const ULONG DEFAULT_LOG_ARCHIVE_TIMEOUT = 60; // seconds + const ULONG DEFAULT_LOG_GROUP_FLUSH_DELAY = 0; + const ULONG DEFAULT_APPLY_IDLE_TIMEOUT = 10; // seconds + const ULONG DEFAULT_APPLY_ERROR_TIMEOUT = 60; // seconds + + void parseLong(const string& input, ULONG& output) + { + char* tail = NULL; + auto number = strtol(input.c_str(), &tail, 10); + if (tail && *tail == 0 && number > 0) + output = (ULONG) number; + } + + void parseBoolean(const string& input, bool& output) + { + if (input == "true" || input == "yes" || input == "on" || input == "1") + output = true; + else if (input == "false" || input == "no" || input == "off" || input == "0") + output = false; + } + + void raiseError(const char* msg) + { + (Arg::Gds(isc_random) << Arg::Str(msg)).raise(); + } +} + + +// Replication::Config class + +Config::Config() + : dbName(getPool()), + bufferSize(DEFAULT_BUFFER_SIZE), + includeFilter(getPool()), + excludeFilter(getPool()), + logSegmentSize(DEFAULT_LOG_SEGMENT_SIZE), + logSegmentCount(DEFAULT_LOG_SEGMENT_COUNT), + logDirectory(getPool()), + logFilePrefix(getPool()), + logGroupFlushDelay(DEFAULT_LOG_GROUP_FLUSH_DELAY), + logArchiveDirectory(getPool()), + logArchiveCommand(getPool()), + logArchiveTimeout(DEFAULT_LOG_ARCHIVE_TIMEOUT), + syncReplicas(getPool()), + logSourceDirectory(getPool()), + verboseLogging(false), + applyIdleTimeout(DEFAULT_APPLY_IDLE_TIMEOUT), + applyErrorTimeout(DEFAULT_APPLY_ERROR_TIMEOUT) +{ + sourceGuid.alignment = 0; +} + +// This routine is used to match the database on the master side. +// Therefore it checks only the necessary settings. + +Config* Config::get(const PathName& lookupName) +{ + fb_assert(lookupName.hasData()); + + const PathName filename = + fb_utils::getPrefix(IConfigManager::DIR_CONF, REPLICATION_CFGFILE); + + MemoryPool& pool = *getDefaultMemoryPool(); + + ConfigFile cfgFile(filename, ConfigFile::HAS_SUB_CONF | ConfigFile::NATIVE_ORDER | ConfigFile::CUSTOM_MACROS); + + AutoPtr config(FB_NEW Config); + + bool defaultFound = false, exactMatch = false; + const ConfigFile::Parameters& params = cfgFile.getParameters(); + for (const auto& section : params) + { + if (section.name != "database") + raiseError("Unknown section found in the configuration file"); + + PathName dbName(section.value.c_str()); + + if (dbName.empty()) + { + if (defaultFound) + raiseError("Only one default DATABASE section is allowed"); + + defaultFound = true; + } + else + { + PathUtils::fixupSeparators(dbName); + ISC_expand_filename(dbName, true); + + if (dbName != lookupName) + continue; + + exactMatch = true; + } + + const ConfigFile::Parameters& elements = section.sub->getParameters(); + for (const auto& el : elements) + { + const string key(el.name.c_str()); + string value(el.value); + + if (value.isEmpty()) + continue; + + if (key == "sync_replica") + { + config->syncReplicas.add(value); + } + else if (key == "buffer_size") + { + parseLong(value, config->bufferSize); + } + else if (key == "include_filter") + { + ISC_systemToUtf8(value); + config->includeFilter = value; + } + else if (key == "exclude_filter") + { + ISC_systemToUtf8(value); + config->excludeFilter = value; + } + else if (key == "log_segment_size") + { + parseLong(value, config->logSegmentSize); + } + else if (key == "log_segment_count") + { + parseLong(value, config->logSegmentCount); + } + else if (key == "log_directory") + { + config->logDirectory = value.c_str(); + PathUtils::ensureSeparator(config->logDirectory); + } + else if (key == "log_file_prefix") + { + config->logFilePrefix = value.c_str(); + } + else if (key == "log_group_flush_delay") + { + parseLong(value, config->logGroupFlushDelay); + } + else if (key == "log_archive_directory") + { + config->logArchiveDirectory = value.c_str(); + PathUtils::ensureSeparator(config->logArchiveDirectory); + } + else if (key == "log_archive_command") + { + config->logArchiveCommand = value.c_str(); + } + else if (key == "log_archive_timeout") + { + parseLong(value, config->logArchiveTimeout); + } + } + + if (!exactMatch) + continue; + + if (config->logDirectory.hasData() || config->syncReplicas.hasData()) + { + // If log_directory is specified, then replication is enabled + + if (config->logFilePrefix.isEmpty()) + { + PathName db_directory, db_filename; + PathUtils::splitLastComponent(db_directory, db_filename, dbName); + config->logFilePrefix = db_filename; + } + + config->dbName = dbName; + + return config.release(); + } + } + + return NULL; +} + +// This routine is used to retrieve the list of replica databases. +// Therefore it checks only the necessary settings. + +void Config::enumerate(Firebird::Array& replicas) +{ + const PathName filename = + fb_utils::getPrefix(IConfigManager::DIR_CONF, REPLICATION_CFGFILE); + + MemoryPool& pool = *getDefaultMemoryPool(); + + ConfigFile cfgFile(filename, ConfigFile::HAS_SUB_CONF | ConfigFile::NATIVE_ORDER | ConfigFile::CUSTOM_MACROS); + + AutoPtr config(FB_NEW Config); + + bool defaultFound = false, exactMatch = false; + const ConfigFile::Parameters& params = cfgFile.getParameters(); + for (const auto& section : params) + { + if (section.name != "database") + raiseError("Unknown section found in the configuration file"); + + PathName dbName(section.value.c_str()); + + const ConfigFile::Parameters& elements = section.sub->getParameters(); + for (const auto& el : elements) + { + const string key(el.name.c_str()); + string value(el.value); + + if (value.isEmpty()) + continue; + + if (key == "log_source_directory") + { + config->logSourceDirectory = value.c_str(); + PathUtils::ensureSeparator(config->logSourceDirectory); + } + else if (key == "source_guid") + { + StringToGuid(&config->sourceGuid, value.c_str()); + } + else if (key == "verbose_logging") + { + if (value == "true" || value == "yes" || value == "on" || value == "1") + config->verboseLogging = true; + } + else if (key == "apply_idle_timeout") + { + parseLong(value, config->applyIdleTimeout); + } + else if (key == "apply_error_timeout") + { + parseLong(value, config->applyErrorTimeout); + } + } + + if (dbName.empty()) + { + if (defaultFound) + raiseError("Only one default DATABASE section is allowed"); + + defaultFound = true; + continue; + } + + if (config->logSourceDirectory.hasData()) + { + // If source_directory is specified, then replication is enabled + + PathUtils::fixupSeparators(dbName); + ISC_expand_filename(dbName, true); + + config->dbName = dbName; + replicas.add(config.release()); + } + } +} diff --git a/src/jrd/replication/Config.h b/src/jrd/replication/Config.h new file mode 100644 index 0000000000..b0d626823a --- /dev/null +++ b/src/jrd/replication/Config.h @@ -0,0 +1,62 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_CONFIG_H +#define JRD_REPLICATION_CONFIG_H + +#include "../common/classes/array.h" +#include "../common/classes/objects_array.h" +#include "../common/classes/fb_string.h" +#include "../common/os/guid.h" + +namespace Replication +{ + struct Config : public Firebird::GlobalStorage + { + Config(); + + static Config* get(const Firebird::PathName& dbName); + static void enumerate(Firebird::Array& replicas); + + Firebird::PathName dbName; + ULONG bufferSize; + Firebird::string includeFilter; + Firebird::string excludeFilter; + ULONG logSegmentSize; + ULONG logSegmentCount; + Firebird::PathName logDirectory; + Firebird::PathName logFilePrefix; + ULONG logGroupFlushDelay; + Firebird::PathName logArchiveDirectory; + Firebird::string logArchiveCommand; + ULONG logArchiveTimeout; + Firebird::ObjectsArray syncReplicas; + Firebird::PathName logSourceDirectory; + Firebird::Guid sourceGuid; + bool verboseLogging; + ULONG applyIdleTimeout; + ULONG applyErrorTimeout; + }; +}; + +#endif // JRD_REPLICATION_CONFIG_H diff --git a/src/jrd/replication/Manager.cpp b/src/jrd/replication/Manager.cpp new file mode 100644 index 0000000000..fe2687b00f --- /dev/null +++ b/src/jrd/replication/Manager.cpp @@ -0,0 +1,492 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../common/classes/ClumpletWriter.h" +#include "../common/isc_proto.h" +#include "../common/isc_s_proto.h" +#include "../jrd/jrd.h" + +#include "Manager.h" +#include "Protocol.h" +#include "Utils.h" + +using namespace Firebird; +using namespace Jrd; +using namespace Replication; + +namespace Replication +{ + const size_t MAX_BG_WRITER_LAG = 10 * 1024 * 1024; // 10 MB + + GlobalPtr Manager::g_rmMap; + GlobalPtr Manager::g_mapMutex; +} + + +// Table matcher + +TableMatcher::TableMatcher(MemoryPool& pool, + const string& includeFilter, + const string& excludeFilter) + : m_tables(pool) +{ + m_cs = FB_NEW_POOL(pool) charset; + m_tt = FB_NEW_POOL(pool) texttype; + + IntlUtil::initUtf8Charset(m_cs); + + string collAttributes("ICU-VERSION="); + collAttributes += Jrd::UnicodeUtil::getDefaultIcuVersion(); + IntlUtil::setupIcuAttributes(m_cs, collAttributes, "", collAttributes); + + UCharBuffer collAttributesBuffer; + collAttributesBuffer.push(reinterpret_cast(collAttributes.c_str()), + collAttributes.length()); + + if (!IntlUtil::initUnicodeCollation(m_tt, m_cs, "UNICODE", 0, collAttributesBuffer, "")) + raiseError("Cannot initialize UNICODE collation"); + + m_charSet = CharSet::createInstance(pool, 0, m_cs); + m_textType = FB_NEW_POOL(pool) TextType(0, m_tt, m_charSet); + + if (includeFilter.hasData()) + { + m_includeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher( + pool, m_textType, + (const UCHAR*) includeFilter.c_str(), + includeFilter.length(), + '\\', true)); + } + + if (excludeFilter.hasData()) + { + m_excludeMatcher.reset(FB_NEW_POOL(pool) SimilarMatcher( + pool, m_textType, + (const UCHAR*) excludeFilter.c_str(), + excludeFilter.length(), + '\\', true)); + } +} + +TableMatcher::~TableMatcher() +{ + if (m_tt && m_tt->texttype_fn_destroy) + m_tt->texttype_fn_destroy(m_tt); +} + +bool TableMatcher::matchTable(const MetaName& tableName) +{ + try + { + bool enabled = false; + if (!m_tables.get(tableName, enabled)) + { + enabled = true; + + if (m_includeMatcher) + { + m_includeMatcher->reset(); + m_includeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length()); + enabled = m_includeMatcher->result(); + } + + if (enabled && m_excludeMatcher) + { + m_excludeMatcher->reset(); + m_excludeMatcher->process((const UCHAR*) tableName.c_str(), tableName.length()); + enabled = !m_excludeMatcher->result(); + } + + m_tables.put(tableName, enabled); + } + + return enabled; + } + catch (const Exception&) + { + // If we failed matching the table name due to some internal error, then + // let's allow the table to be replicated. This is not a critical failure. + return true; + } +} + + +// Replication manager + +Manager* Manager::create(const string& dbId, + const PathName& database, + const Guid& guid) +{ + MutexLockGuard guard(g_mapMutex, FB_FUNCTION); + + Manager* mgr = NULL; + if (!g_rmMap->get(dbId, mgr)) + { + const auto config = Replication::Config::get(database); + + if (config) + { + mgr = FB_NEW Manager(dbId, database, guid, config); + + if (g_rmMap->put(dbId, mgr)) + fb_assert(false); + + guard.release(); + + mgr->init(); + } + } + + if (mgr) + { + mgr->addRef(); + return mgr; + } + + return NULL; +} + +void Manager::destroy(Manager* mgr) +{ + if (mgr) + { + const string dbId = mgr->m_dbId; + + MutexLockGuard guard(g_mapMutex, FB_FUNCTION); + + if (!mgr->release()) + { + if (!g_rmMap->remove(dbId)) + fb_assert(false); + } + } +} + +TableMatcher* Manager::createMatcher(MemoryPool& pool, const string& dbId) +{ + MutexLockGuard guard(g_mapMutex, FB_FUNCTION); + + Manager* mgr = NULL; + if (g_rmMap->get(dbId, mgr)) + { + const auto config = mgr->getConfig(); + return FB_NEW_POOL(pool) TableMatcher(pool, config->includeFilter, config->excludeFilter); + } + + return NULL; +} + + +Manager::Manager(const string& dbId, + const PathName& database, + const Guid& guid, + const Replication::Config* config) + : m_dbId(getPool(), dbId), + m_database(getPool(), database), + m_config(config), + m_replicas(getPool()), + m_buffers(getPool()), + m_queue(getPool()), + m_queueSize(0), + m_shutdown(false), + m_signalled(false) +{ + // Startup the journalling + + const auto tdbb = JRD_get_thread_data(); + const auto dbb = tdbb->getDatabase(); + + m_sequence = dbb->dbb_repl_sequence; + + if (config->logDirectory.hasData()) + { + m_changeLog = FB_NEW_POOL(getPool()) + ChangeLog(getPool(), dbId, database, guid, m_sequence, config); + } + + Thread::start(writer_thread, this, THREAD_medium, 0); + m_startupSemaphore.enter(); +} + +Manager::~Manager() +{ + m_shutdown = true; + + m_workingSemaphore.release(); + m_cleanupSemaphore.enter(); + + MutexLockGuard guard(m_queueMutex, FB_FUNCTION); + + // Detach from synchronous replicas + + FbLocalStatus localStatus; + + for (auto& iter : m_replicas) + { + iter->replicator->close(&localStatus); + iter->attachment->detach(&localStatus); + } + + while (m_buffers.hasData()) + delete m_buffers.pop(); +} + +void Manager::init() +{ + MutexLockGuard guard(m_queueMutex, FB_FUNCTION); + + // Attach to synchronous replicas (if any) + + FbLocalStatus localStatus; + DispatcherPtr provider; + + for (const auto iter : m_config->syncReplicas) + { + string database = iter; + string login, password; + + auto pos = database.find('@'); + if (pos != string::npos) + { + const string temp = database.substr(0, pos); + database = database.substr(pos + 1); + + pos = temp.find(':'); + if (pos != string::npos) + { + login = temp.substr(0, pos); + password = temp.substr(pos + 1); + } + else + { + login = temp; + } + } + + ClumpletWriter dpb(ClumpletReader::dpbList, MAX_DPB_SIZE); + + if (login.hasData()) + { + dpb.insertString(isc_dpb_user_name, login); + + if (password.hasData()) + dpb.insertString(isc_dpb_password, password); + } + + const auto attachment = provider->attachDatabase(&localStatus, database.c_str(), + dpb.getBufferLength(), dpb.getBuffer()); + if (!localStatus.isSuccess()) + { + logError(&localStatus); + continue; + } + + const auto replicator = attachment->createReplicator(&localStatus); + if (!localStatus.isSuccess()) + { + logError(&localStatus); + attachment->detach(&localStatus); + continue; + } + + m_replicas.add(FB_NEW_POOL(getPool()) SyncReplica(getPool(), attachment, replicator)); + } + +} + +UCharBuffer* Manager::getBuffer() +{ + MutexLockGuard guard(m_buffersMutex, FB_FUNCTION); + + const auto buffer = m_buffers.hasData() ? + m_buffers.pop() : FB_NEW_POOL(getPool()) UCharBuffer(getPool()); + + fb_assert(buffer->isEmpty()); + buffer->resize(sizeof(Block)); + return buffer; +} + +void Manager::releaseBuffer(UCharBuffer* buffer) +{ + fb_assert(buffer); + buffer->clear(); + + MutexLockGuard guard(m_buffersMutex, FB_FUNCTION); + + fb_assert(!m_buffers.exist(buffer)); + m_buffers.add(buffer); +} + +void Manager::logError(const IStatus* status) +{ + string message; + + auto statusPtr = status->getErrors(); + + char temp[BUFFER_LARGE]; + while (fb_interpret(temp, sizeof(temp), &statusPtr)) + { + if (!message.isEmpty()) + message += "\n\t"; + + message += temp; + } + + logOriginMessage(m_database, message, ERROR_MSG); +} + +void Manager::flush(UCharBuffer* buffer, bool sync) +{ + fb_assert(buffer && buffer->hasData()); + + MutexLockGuard guard(m_queueMutex, FB_FUNCTION); + + if (!sync) + { + // If the background thread is lagging too far behind, + // replicate packets synchronously rather than relying + // on the background thread to catch up any time soon + if (m_queueSize > MAX_BG_WRITER_LAG) + sync = true; + // Otherwise, just add the current chunk to the queue + // and signal the background thread to process it + else + { + m_queue.add(buffer); + m_queueSize += buffer->getCount(); + } + } + + if (sync) + { + m_queue.add(buffer); + m_queueSize += buffer->getCount(); + + const auto tdbb = JRD_get_thread_data(); + const auto dbb = tdbb->getDatabase(); + + for (auto& buffer : m_queue) + { + if (buffer) + { + const auto length = (ULONG) buffer->getCount(); + + if (m_changeLog) + { + const auto sequence = m_changeLog->write(length, buffer->begin(), true); + + if (sequence != m_sequence) + { + dbb->setReplSequence(tdbb, sequence); + m_sequence = sequence; + } + } + + for (auto& iter : m_replicas) + { + iter->status.check(); + iter->replicator->process(&iter->status, length, buffer->begin()); + iter->status.check(); + } + + m_queueSize -= length; + releaseBuffer(buffer); + buffer = NULL; + } + } + + m_queue.clear(); + m_queueSize = 0; + } + else if (!m_signalled) + { + m_signalled = true; + m_workingSemaphore.release(); + } +} + +void Manager::bgWriter() +{ + try + { + // Signal about our startup + + m_startupSemaphore.release(); + + // Loop to replicate queued changes + + while (!m_shutdown) + { + MutexLockGuard guard(m_queueMutex, FB_FUNCTION); + + for (auto& buffer : m_queue) + { + if (buffer) + { + const auto length = (ULONG) buffer->getCount(); + fb_assert(length); + + if (m_changeLog) + { + m_changeLog->write(length, buffer->begin(), false); + } + + for (auto& iter : m_replicas) + { + if (iter->status.isSuccess()) + { + iter->replicator->process(&iter->status, length, buffer->begin()); + } + } + + m_queueSize -= length; + releaseBuffer(buffer); + buffer = NULL; + } + } + + guard.release(); + + if (m_shutdown) + break; + + m_signalled = false; + m_workingSemaphore.tryEnter(1); + } + } + catch (const Exception& ex) + { + iscLogException("Error in replicator thread", ex); + } + + // Signal about our exit + + try + { + m_cleanupSemaphore.release(); + } + catch (const Firebird::Exception& ex) + { + iscLogException("Error while exiting replicator thread", ex); + } +} diff --git a/src/jrd/replication/Manager.h b/src/jrd/replication/Manager.h new file mode 100644 index 0000000000..d56f1b1cef --- /dev/null +++ b/src/jrd/replication/Manager.h @@ -0,0 +1,149 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_MANAGER_H +#define JRD_REPLICATION_MANAGER_H + +#include "../common/classes/array.h" +#include "../common/classes/semaphore.h" +#include "../common/os/guid.h" +#include "../common/isc_s_proto.h" +#include "../../jrd/SimilarToMatcher.h" +#include "../../jrd/intl_classes.h" + +#include "Config.h" +#include "ChangeLog.h" + +namespace Replication +{ + class TableMatcher + { + typedef Jrd::UpcaseConverter SimilarConverter; + typedef Firebird::SimilarToMatcher SimilarMatcher; + typedef Firebird::GenericMap > > TablePermissionMap; + + public: + TableMatcher(MemoryPool& pool, + const Firebird::string& includeFilter, + const Firebird::string& excludeFilter); + ~TableMatcher(); + + bool matchTable(const Firebird::MetaName& tableName); + + private: + charset* m_cs; + Firebird::AutoPtr m_tt; + Firebird::AutoPtr m_charSet; + Firebird::AutoPtr m_textType; + Firebird::AutoPtr m_includeMatcher; + Firebird::AutoPtr m_excludeMatcher; + TablePermissionMap m_tables; + }; + + class Manager : public Firebird::RefCounted, protected Firebird::GlobalStorage + { + public: + struct SyncReplica + { + SyncReplica(Firebird::MemoryPool& pool, Firebird::IAttachment* att, Firebird::IReplicator* repl) + : status(pool), attachment(att), replicator(repl) + {} + + Firebird::FbLocalStatus status; + Firebird::IAttachment* attachment; + Firebird::IReplicator* replicator; + }; + + typedef Firebird::GenericMap > > DbReplMgrMap; + + public: + ~Manager(); + + static Manager* create(const Firebird::string& dbId, + const Firebird::PathName& database, + const Firebird::Guid& guid); + static void destroy(Manager* mgr); + + static TableMatcher* createMatcher(MemoryPool& pool, const Firebird::string& dbId); + + Firebird::UCharBuffer* getBuffer(); + + void flush(Firebird::UCharBuffer* buffer, bool sync); + + void forceLogSwitch() + { + m_changeLog->forceSwitch(); + } + + const Replication::Config* getConfig() const + { + return m_config; + } + + private: + Manager(const Firebird::string& dbId, + const Firebird::PathName& database, + const Firebird::Guid& guid, + const Replication::Config* config); + + void init(); + void logError(const Firebird::IStatus* status); + void releaseBuffer(Firebird::UCharBuffer* buffer); + + void bgWriter(); + + static THREAD_ENTRY_DECLARE writer_thread(THREAD_ENTRY_PARAM arg) + { + Manager* const mgr = static_cast(arg); + mgr->bgWriter(); + return 0; + } + + Firebird::Semaphore m_startupSemaphore; + Firebird::Semaphore m_cleanupSemaphore; + Firebird::Semaphore m_workingSemaphore; + + const Firebird::string m_dbId; + const Firebird::PathName m_database; + const Firebird::AutoPtr m_config; + Firebird::Array m_replicas; + Firebird::Array m_buffers; + Firebird::Mutex m_buffersMutex; + Firebird::Array m_queue; + Firebird::Mutex m_queueMutex; + ULONG m_queueSize; + FB_UINT64 m_sequence; + + volatile bool m_shutdown; + volatile bool m_signalled; + Firebird::AtomicCounter m_waiters; + + Firebird::AutoPtr m_changeLog; + Firebird::RWLock m_lock; + + static Firebird::GlobalPtr g_rmMap; + static Firebird::GlobalPtr g_mapMutex; + }; +} + +#endif // JRD_REPLICATION_MANAGER_H diff --git a/src/jrd/replication/Protocol.h b/src/jrd/replication/Protocol.h new file mode 100644 index 0000000000..324ba91e1c --- /dev/null +++ b/src/jrd/replication/Protocol.h @@ -0,0 +1,68 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_PROTOCOL_H +#define JRD_REPLICATION_PROTOCOL_H + +namespace Replication +{ + // Supported protocol versions + const ULONG PROTOCOL_VERSION1 = 1; + const ULONG PROTOCOL_CURRENT_VERSION = PROTOCOL_VERSION1; + + struct Block + { + SINT64 traNumber; + ULONG dataLength; + ULONG metaLength; + ULONG flags; + ISC_TIMESTAMP timestamp; + }; + + const ULONG BLOCK_BEGIN_TRANS = 1; + const ULONG BLOCK_END_TRANS = 2; + + enum Operation + { + opStartTransaction = 1, + opPrepareTransaction = 2, + opCommitTransaction = 3, + opRollbackTransaction = 4, + opCleanupTransaction = 5, + + opStartSavepoint = 6, + opReleaseSavepoint = 7, + opRollbackSavepoint = 8, + + opInsertRecord = 9, + opUpdateRecord = 10, + opDeleteRecord = 11, + opStoreBlob = 12, + opExecuteSql = 13, + opSetSequence = 14 + }; + +} // namespace + +#endif // JRD_REPLICATION_PROTOCOL_H + diff --git a/src/jrd/replication/Publisher.cpp b/src/jrd/replication/Publisher.cpp new file mode 100644 index 0000000000..1392ddee4f --- /dev/null +++ b/src/jrd/replication/Publisher.cpp @@ -0,0 +1,635 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../jrd/jrd.h" +#include "../jrd/ods.h" +#include "../jrd/req.h" +#include "../jrd/tra.h" +#include "../jrd/blr.h" +#include "../jrd/trig.h" +#include "../jrd/Database.h" +#include "../jrd/blb_proto.h" +#include "../jrd/cch_proto.h" +#include "../jrd/evl_proto.h" +#include "../jrd/met_proto.h" +#include "../jrd/mov_proto.h" +#include "../common/isc_proto.h" + +#include "Publisher.h" +#include "Replicator.h" + +using namespace Firebird; +using namespace Jrd; +using namespace Replication; + +namespace +{ + // Generator RDB$BACKUP_HISTORY, although defined as system, + // should be replicated similar to user-defined ones + const int BACKUP_HISTORY_GENERATOR = 9; + + const char* LOG_ERROR_MSG = "Replication is stopped due to critical error(s)"; + + void handleError(thread_db* tdbb, jrd_tra* transaction = NULL) + { + const auto dbb = tdbb->getDatabase(); + fb_assert(dbb); + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + if (transaction && transaction->tra_replicator) + { + transaction->tra_replicator->dispose(); + transaction->tra_replicator = NULL; + } + + if (attachment->att_replicator) + { + const auto status = attachment->att_replicator->getStatus(); + + if (status->getState() & IStatus::STATE_ERRORS) + { + Firebird::string msg; + msg.printf("Database: %s\n\t%s", dbb->dbb_filename.c_str(), LOG_ERROR_MSG); + iscLogStatus(msg.c_str(), status); + } + + attachment->att_replicator->dispose(); + attachment->att_replicator = NULL; + } + } + + Record* upgradeRecord(thread_db* tdbb, jrd_rel* relation, Record* record) + { + const auto format = MET_current(tdbb, relation); + + if (record->getFormat()->fmt_version == format->fmt_version) + return record; + + auto& pool = *tdbb->getDefaultPool(); + const auto newRecord = FB_NEW_POOL(pool) Record(pool, format); + + dsc orgDesc, newDesc; + + for (auto i = 0; i < newRecord->getFormat()->fmt_count; i++) + { + newRecord->clearNull(i); + + if (EVL_field(relation, newRecord, i, &newDesc)) + { + if (EVL_field(relation, record, i, &orgDesc)) + MOV_move(tdbb, &orgDesc, &newDesc); + else + newRecord->setNull(i); + } + } + + return newRecord; + } + + IReplicatedTransaction* ensureTransaction(thread_db* tdbb, jrd_tra* transaction) + { + // Create a transaction object, unless it already exists + + if (!transaction->tra_replicator) + { + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + const auto replicator = attachment->att_replicator; + + if (!replicator) + return NULL; + + transaction->tra_replicator = replicator->startTransaction(transaction->tra_number); + + if (!transaction->tra_replicator) + handleError(tdbb); + } + + return transaction->tra_replicator; + } + + bool ensureSavepoints(thread_db* tdbb, jrd_tra* transaction) + { + const auto dbb = tdbb->getDatabase(); + fb_assert(dbb); + + const auto replicator = transaction->tra_replicator; + + // Replicate the entire stack of active savepoints (excluding priorly replicated), + // starting with the oldest ones + + HalfStaticArray stack; + + for (Savepoint::Iterator iter(transaction->tra_save_point); *iter; ++iter) + { + const auto savepoint = *iter; + + if (savepoint->isReplicated()) + break; + + stack.push(savepoint); + } + + while (stack.hasData()) + { + const auto savepoint = stack.pop(); + + if (!replicator->startSavepoint()) + { + handleError(tdbb); + return false; + } + + savepoint->markAsReplicated(); + } + + return true; + } + + class ReplicatedRecordImpl : + public Firebird::AutoIface > + { + public: + ReplicatedRecordImpl(thread_db* tdbb, const Record* record) + : m_tdbb(tdbb), m_record(record) + { + } + + ~ReplicatedRecordImpl() + { + } + + unsigned getRawLength() + { + return m_record->getLength(); + } + + const unsigned char* getRawData() + { + return m_record->getData(); + } + + private: + thread_db* const m_tdbb; + const Record* const m_record; + }; + + class ReplicatedBlobImpl : + public Firebird::AutoIface > + { + public: + ReplicatedBlobImpl(thread_db* tdbb, jrd_tra* transaction, const bid* blobId) : + m_tdbb(tdbb), m_blob(blb::open(tdbb, transaction, blobId)) + { + } + + ~ReplicatedBlobImpl() + { + m_blob->BLB_close(m_tdbb); + } + + unsigned getLength() + { + return m_blob->blb_length; + } + + FB_BOOLEAN isEof() + { + return (m_blob->blb_flags & BLB_eof); + } + + unsigned getSegment(unsigned length, unsigned char* buffer) + { + auto p = buffer; + + while (length) + { + auto n = (USHORT) MIN(length, MAX_SSHORT); + + n = m_blob->BLB_get_segment(m_tdbb, p, n); + + p += n; + length -= n; + + if (m_blob->blb_flags & BLB_eof) + break; + } + + return (unsigned) (p - buffer); + } + + private: + thread_db* const m_tdbb; + blb* const m_blob; + }; +} + + +void REPL_attach(thread_db* tdbb, bool cleanupTransactions) +{ + const auto attachment = tdbb->getAttachment(); + + if (attachment->isSystem()) + return; + + fb_assert(!attachment->att_replicator); + + const auto dbb = tdbb->getDatabase(); + fb_assert(dbb); + + dbb->ensureGuid(tdbb); + + const string dbId = dbb->getUniqueFileId(); + const PathName& dbName = dbb->dbb_filename; + const Guid& dbGuid = dbb->dbb_guid; + const MetaName& currentUser = attachment->att_user->getUserName(); + + MemoryPool& pool = *attachment->att_pool; + + attachment->att_replicator = (IReplicatedSession*) + Replicator::create(pool, dbId, dbName, dbGuid, currentUser, + cleanupTransactions); + + if (attachment->att_replicator) + attachment->att_repl_matcher = Manager::createMatcher(pool, dbId); +} + +void REPL_trans_prepare(thread_db* tdbb, jrd_tra* transaction) +{ + const auto replicator = transaction->tra_replicator; + + if (!replicator) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + if (!replicator->prepare()) + handleError(tdbb, transaction); +} + +void REPL_trans_commit(thread_db* tdbb, jrd_tra* transaction) +{ + const auto replicator = transaction->tra_replicator; + + if (!replicator) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + if (!replicator->commit()) + handleError(tdbb, transaction); + + transaction->tra_replicator = NULL; +} + +void REPL_trans_rollback(thread_db* tdbb, jrd_tra* transaction) +{ + const auto replicator = transaction->tra_replicator; + + if (!replicator) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + if (!replicator->rollback()) + handleError(tdbb, transaction); + + transaction->tra_replicator = NULL; +} + +void REPL_trans_cleanup(Jrd::thread_db* tdbb, TraNumber number) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + const auto replicator = attachment->att_replicator; + + if (!replicator) + return; + + if (!replicator->cleanupTransaction(number)) + handleError(tdbb); +} + +void REPL_save_cleanup(thread_db* tdbb, jrd_tra* transaction, + const Savepoint* savepoint, bool undo) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + const auto replicator = transaction->tra_replicator; + + if (!replicator) + return; + + if (tdbb->tdbb_flags & (TDBB_dont_post_dfw | TDBB_repl_sql)) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + if (!transaction->tra_save_point->isReplicated()) + return; + + if (undo) + { + if (!replicator->rollbackSavepoint()) + handleError(tdbb, transaction); + } + else + { + if (!replicator->releaseSavepoint()) + handleError(tdbb, transaction); + } +} + +void REPL_store(thread_db* tdbb, const record_param* rpb, jrd_tra* transaction) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + if (!attachment->att_replicator) + return; + + if (tdbb->tdbb_flags & (TDBB_dont_post_dfw | TDBB_repl_sql)) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + const auto relation = rpb->rpb_relation; + fb_assert(relation); + + if (relation->isTemporary()) + return; + + const auto matcher = attachment->att_repl_matcher.get(); + + if (!relation->isSystem() && matcher && !matcher->matchTable(relation->rel_name)) + return; + + const auto record = upgradeRecord(tdbb, relation, rpb->rpb_record); + fb_assert(record); + + // This temporary auto-pointer is just to delete a temporary record + AutoPtr cleanupRecord(record != rpb->rpb_record ? record : NULL); + + const auto replicator = ensureTransaction(tdbb, transaction); + if (!replicator) + return; + + const auto format = record->getFormat(); + + UCharBuffer buffer; + for (auto id = 0; id < format->fmt_count; id++) + { + dsc desc; + if (DTYPE_IS_BLOB(format->fmt_desc[id].dsc_dtype) && + EVL_field(NULL, record, id, &desc)) + { + const auto destination = (bid*) desc.dsc_address; + + if (!destination->isEmpty()) + { + const auto blobId = *(ISC_QUAD*) desc.dsc_address; + + ReplicatedBlobImpl replBlob(tdbb, transaction, destination); + + if (!replicator->storeBlob(blobId, &replBlob)) + { + handleError(tdbb); + return; + } + } + } + } + + if (!ensureSavepoints(tdbb, transaction)) + return; + + ReplicatedRecordImpl replRecord(tdbb, record); + + if (!replicator->insertRecord(relation->rel_name.c_str(), &replRecord)) + handleError(tdbb, transaction); +} + +void REPL_modify(thread_db* tdbb, const record_param* orgRpb, + const record_param* newRpb, jrd_tra* transaction) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + if (!attachment->att_replicator) + return; + + if (tdbb->tdbb_flags & (TDBB_dont_post_dfw | TDBB_repl_sql)) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + const auto relation = newRpb->rpb_relation; + fb_assert(relation); + + if (relation->isTemporary()) + return; + + const auto matcher = attachment->att_repl_matcher.get(); + + if (!relation->isSystem() && matcher && !matcher->matchTable(relation->rel_name)) + return; + + const auto newRecord = upgradeRecord(tdbb, relation, newRpb->rpb_record); + fb_assert(newRecord); + + const auto orgRecord = upgradeRecord(tdbb, relation, orgRpb->rpb_record); + fb_assert(orgRecord); + + // These temporary auto-pointers are just to delete temporary records + AutoPtr cleanupOrgRecord(orgRecord != orgRpb->rpb_record ? orgRecord : NULL); + AutoPtr cleanupNewRecord(newRecord != newRpb->rpb_record ? newRecord : NULL); + + const auto orgLength = orgRecord->getLength(); + const auto newLength = newRecord->getLength(); + + // Ignore dummy updates + if (orgLength == newLength && + !memcmp(orgRecord->getData(), newRecord->getData(), orgLength)) + { + return; + } + + const auto replicator = ensureTransaction(tdbb, transaction); + if (!replicator) + return; + + const auto format = newRecord->getFormat(); + + UCharBuffer buffer; + for (auto id = 0; id < format->fmt_count; id++) + { + dsc desc; + if (DTYPE_IS_BLOB(format->fmt_desc[id].dsc_dtype) && + EVL_field(NULL, newRecord, id, &desc)) + { + const auto destination = (bid*) desc.dsc_address; + + if (!destination->isEmpty()) + { + const auto blobId = *(ISC_QUAD*) desc.dsc_address; + + ReplicatedBlobImpl replBlob(tdbb, transaction, destination); + + if (!replicator->storeBlob(blobId, &replBlob)) + { + handleError(tdbb); + return; + } + } + } + } + + if (!ensureSavepoints(tdbb, transaction)) + return; + + ReplicatedRecordImpl replOrgRecord(tdbb, orgRecord); + ReplicatedRecordImpl replNewRecord(tdbb, newRecord); + + if (!replicator->updateRecord(relation->rel_name.c_str(), &replOrgRecord, &replNewRecord)) + handleError(tdbb, transaction); +} + + +void REPL_erase(thread_db* tdbb, const record_param* rpb, jrd_tra* transaction) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + if (!attachment->att_replicator) + return; + + if (tdbb->tdbb_flags & (TDBB_dont_post_dfw | TDBB_repl_sql)) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + const auto relation = rpb->rpb_relation; + fb_assert(relation); + + if (relation->isTemporary()) + return; + + const auto matcher = attachment->att_repl_matcher.get(); + + if (!relation->isSystem() && matcher && !matcher->matchTable(relation->rel_name)) + return; + + const auto record = upgradeRecord(tdbb, relation, rpb->rpb_record); + fb_assert(record); + + // This temporary auto-pointer is just to delete a temporary record + AutoPtr cleanupRecord(record != rpb->rpb_record ? record : NULL); + + const auto replicator = ensureTransaction(tdbb, transaction); + if (!replicator) + return; + + if (!ensureSavepoints(tdbb, transaction)) + return; + + ReplicatedRecordImpl replRecord(tdbb, record); + + if (!replicator->deleteRecord(relation->rel_name.c_str(), &replRecord)) + handleError(tdbb, transaction); +} + +void REPL_gen_id(thread_db* tdbb, SLONG genId, SINT64 value) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + const auto replicator = attachment->att_replicator; + + if (!replicator) + return; + + if (tdbb->tdbb_flags & (TDBB_dont_post_dfw | TDBB_repl_sql)) + return; + + if (genId == 0) // special case: ignore RDB$GENERATORS + return; + + // Ignore other system generators, except RDB$BACKUP_HISTORY + if (genId != BACKUP_HISTORY_GENERATOR) + { + for (auto generator = generators; generator->gen_name; generator++) + { + if (generator->gen_id == genId) + return; + } + } + + MetaName genName; + + if (!attachment->att_generators.lookup(genId, genName)) + { + MET_lookup_generator_id(tdbb, genId, genName, NULL); + attachment->att_generators.store(genId, genName); + } + + if (!replicator->setSequence(genName.c_str(), value)) + handleError(tdbb); +} + +void REPL_exec_sql(thread_db* tdbb, jrd_tra* transaction, const string& sql) +{ + const auto attachment = tdbb->getAttachment(); + fb_assert(attachment); + + if (!attachment->att_replicator) + return; + + fb_assert(tdbb->tdbb_flags & TDBB_repl_sql); + + if (tdbb->tdbb_flags & TDBB_dont_post_dfw) + return; + + if (transaction->tra_flags & (TRA_system | TRA_readonly)) + return; + + const auto replicator = ensureTransaction(tdbb, transaction); + if (!replicator) + return; + + if (!ensureSavepoints(tdbb, transaction)) + return; + + if (!replicator->executeSql(sql.c_str())) + handleError(tdbb, transaction); +} diff --git a/src/jrd/replication/Publisher.h b/src/jrd/replication/Publisher.h new file mode 100644 index 0000000000..a289f34f31 --- /dev/null +++ b/src/jrd/replication/Publisher.h @@ -0,0 +1,49 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#ifndef JRD_REPLICATION_PUBLISHER_H +#define JRD_REPLICATION_PUBLISHER_H + +namespace Jrd +{ + class thread_db; + class jrd_tra; + class Savepoint; + struct record_param; +} + +void REPL_attach(Jrd::thread_db* tdbb, bool cleanupTransactions); +void REPL_trans_prepare(Jrd::thread_db* tdbb, Jrd::jrd_tra* transaction); +void REPL_trans_commit(Jrd::thread_db* tdbb, Jrd::jrd_tra* transaction); +void REPL_trans_rollback(Jrd::thread_db* tdbb, Jrd::jrd_tra* transaction); +void REPL_trans_cleanup(Jrd::thread_db* tdbb, TraNumber number); +void REPL_save_cleanup(Jrd::thread_db* tdbb, Jrd::jrd_tra* transaction, + const Jrd::Savepoint* savepoint, bool undo); +void REPL_store(Jrd::thread_db* tdbb, const Jrd::record_param* rpb, + Jrd::jrd_tra* transaction); +void REPL_modify(Jrd::thread_db* tdbb, const Jrd::record_param* orgRpb, + const Jrd::record_param* newRpb, Jrd::jrd_tra* transaction); +void REPL_erase(Jrd::thread_db* tdbb, const Jrd::record_param* rpb, Jrd::jrd_tra* transaction); +void REPL_gen_id(Jrd::thread_db* tdbb, SLONG genId, SINT64 value); +void REPL_exec_sql(Jrd::thread_db* tdbb, Jrd::jrd_tra* transaction, const Firebird::string& sql); + +#endif // JRD_REPLICATION_PUBLISHER_H diff --git a/src/jrd/replication/Replicator.cpp b/src/jrd/replication/Replicator.cpp new file mode 100644 index 0000000000..1120bb90c2 --- /dev/null +++ b/src/jrd/replication/Replicator.cpp @@ -0,0 +1,541 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../jrd/jrd.h" + +#include "Config.h" +#include "Replicator.h" +#include "Utils.h" + +using namespace Firebird; +using namespace Jrd; +using namespace Replication; + + +Replicator* Replicator::create(MemoryPool& pool, + const string& dbId, + const PathName& database, + const Guid& guid, + const MetaName& user, + bool cleanupTransactions) +{ + const auto manager = Manager::create(dbId, database, guid); + + return manager ? FB_NEW_POOL(pool) + Replicator(pool, manager, database, guid, user, cleanupTransactions) : NULL; +} + +Replicator::Replicator(MemoryPool& pool, + Manager* manager, + const PathName& database, + const Guid& guid, + const MetaName& user, + bool cleanupTransactions) + : PermanentStorage(pool), + m_manager(manager), + m_config(manager->getConfig()), + m_database(pool, database), + m_user(user), + m_transactions(pool), + m_generators(pool), + m_status(pool) +{ + memcpy(&m_guid, &guid, sizeof(Guid)); + + if (cleanupTransactions) + cleanupTransaction(0); +} + +Replicator::~Replicator() +{ + Manager::destroy(m_manager); +} + +void Replicator::flush(BatchBlock& block, FlushReason reason, ULONG flags) +{ + const auto traNumber = block.header.traNumber; + + const auto orgLength = (ULONG) block.buffer->getCount(); + fb_assert(orgLength > sizeof(Block)); + block.header.dataLength = orgLength - sizeof(Block); + block.header.metaLength = (ULONG) (block.metadata.getCount() * sizeof(MetaName)); + block.header.timestamp = TimeStamp::getCurrentTimeStamp().value(); + block.header.flags |= flags; + + // Add metadata (if any) to the buffer + + if (block.header.metaLength) + { + block.buffer->resize(orgLength + block.header.metaLength); + memcpy(block.buffer->begin() + orgLength, block.metadata.begin(), block.header.metaLength); + } + + // Re-write the updated header + + memcpy(block.buffer->begin(), &block.header, sizeof(Block)); + + // Pass the buffer to the replication manager and setup the new one + + const auto sync = (reason == FLUSH_SYNC); + m_manager->flush(block.buffer, sync); + + memset(&block.header, 0, sizeof(Block)); + block.header.traNumber = traNumber; + + block.metadata.clear(); + block.lastMetaId = MAX_ULONG; + block.buffer = m_manager->getBuffer(); + block.flushes++; +} + +void Replicator::logError(const IStatus* status) +{ + string message; + + auto statusPtr = status->getErrors(); + + char temp[BUFFER_LARGE]; + while (fb_interpret(temp, sizeof(temp), &statusPtr)) + { + if (!message.isEmpty()) + message += "\n\t"; + + message += temp; + } + + logOriginMessage(m_database, message, ERROR_MSG); +} + +void Replicator::postError(const Exception& ex) +{ + FbLocalStatus tempStatus; + ex.stuffException(&tempStatus); + + logError(&tempStatus); + + Arg::StatusVector newErrors; + newErrors << Arg::Gds(isc_random) << Arg::Str("Replication error"); + newErrors << Arg::StatusVector(tempStatus->getErrors()); + newErrors.copyTo(&m_status); +} + +// IDisposable implementation + +void Replicator::dispose() +{ + try + { + delete this; + } + catch (const Exception& ex) + { + postError(ex); + } +} + +// IReplicatedSession implementation + +IReplicatedTransaction* Replicator::startTransaction(SINT64 number) +{ + AutoPtr transaction; + + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + MemoryPool& pool = getPool(); + transaction = FB_NEW_POOL(pool) Transaction(this); + m_transactions.add(transaction); + + auto& txnData = transaction->getData(); + + fb_assert(!txnData.header.traNumber); + txnData.header.traNumber = number; + txnData.header.flags = BLOCK_BEGIN_TRANS; + + txnData.buffer = m_manager->getBuffer(); + + txnData.putTag(opStartTransaction); + } + catch (const Exception& ex) + { + postError(ex); + } + + return transaction.release(); +} + +bool Replicator::prepareTransaction(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + txnData.putTag(opPrepareTransaction); + + flush(txnData, FLUSH_PREPARE); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::commitTransaction(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + for (const auto generator : m_generators) + { + fb_assert(generator.name.hasData()); + + txnData.putTag(opSetSequence); + txnData.putMetaName(generator.name.c_str()); + txnData.putBigInt(generator.value); + } + + m_generators.clear(); + + txnData.putTag(opCommitTransaction); + flush(txnData, FLUSH_SYNC, BLOCK_END_TRANS); + + FB_SIZE_T pos; + if (m_transactions.find(transaction, pos)) + m_transactions.remove(pos); + + transaction->dispose(); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::rollbackTransaction(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + if (txnData.flushes) + { + txnData.putTag(opRollbackTransaction); + flush(txnData, FLUSH_SYNC, BLOCK_END_TRANS); + } + + FB_SIZE_T pos; + if (m_transactions.find(transaction, pos)) + m_transactions.remove(pos); + + transaction->dispose(); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::startSavepoint(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + txnData.putTag(opStartSavepoint); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::releaseSavepoint(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + txnData.putTag(opReleaseSavepoint); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::rollbackSavepoint(Transaction* transaction) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + txnData.putTag(opRollbackSavepoint); + + flush(txnData, FLUSH_SYNC); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::insertRecord(Transaction* transaction, + const char* relName, + IReplicatedRecord* record) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + const auto length = record->getRawLength(); + const auto data = record->getRawData(); + + auto& txnData = transaction->getData(); + + txnData.putTag(opInsertRecord); + txnData.putMetaName(relName); + txnData.putBinary(length, data); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::updateRecord(Transaction* transaction, + const char* relName, + IReplicatedRecord* orgRecord, + IReplicatedRecord* newRecord) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + const auto orgLength = orgRecord->getRawLength(); + const auto orgData = orgRecord->getRawData(); + + const auto newLength = newRecord->getRawLength(); + const auto newData = newRecord->getRawData(); + + auto& txnData = transaction->getData(); + + txnData.putTag(opUpdateRecord); + txnData.putMetaName(relName); + txnData.putBinary(orgLength, orgData); + txnData.putBinary(newLength, newData); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::deleteRecord(Transaction* transaction, + const char* relName, + IReplicatedRecord* record) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + const auto length = record->getRawLength(); + const auto data = record->getRawData(); + + auto& txnData = transaction->getData(); + + txnData.putTag(opDeleteRecord); + txnData.putMetaName(relName); + txnData.putBinary(length, data); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::storeBlob(Transaction* transaction, + ISC_QUAD blobId, + IReplicatedBlob* blob) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + UCharBuffer buffer; + + const auto length = blob->getLength(); + const auto data = buffer.getBuffer(length); + blob->getSegment(length, data); + + auto& txnData = transaction->getData(); + + txnData.putTag(opStoreBlob); + txnData.putInt(blobId.gds_quad_high); + txnData.putInt(blobId.gds_quad_low); + txnData.putBinary(length, data); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +bool Replicator::executeSql(Transaction* transaction, + const char* sql) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + auto& txnData = transaction->getData(); + + txnData.putTag(opExecuteSql); + txnData.putString(sql); + txnData.putMetaName(m_user); + + if (txnData.getSize() > m_config->bufferSize) + flush(txnData, FLUSH_OVERFLOW); + } + catch (const Exception& ex) + { + postError(ex); + return false; + } + + return true; +} + +FB_BOOLEAN Replicator::cleanupTransaction(SINT64 number) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + BatchBlock block(getPool()); + block.header.traNumber = number; + block.buffer = m_manager->getBuffer(); + block.putTag(opCleanupTransaction); + + flush(block, FLUSH_SYNC, BLOCK_END_TRANS); + } + catch (const Exception& ex) + { + postError(ex); + return FB_FALSE; + } + + return FB_TRUE; +} + +FB_BOOLEAN Replicator::setSequence(const char* genName, + SINT64 value) +{ + try + { + MutexLockGuard guard(m_mutex, FB_FUNCTION); + + for (auto& generator : m_generators) + { + if (generator.name == genName) + { + generator.value = value; + return true; + } + } + + GeneratorValue generator; + generator.name = genName; + generator.value = value; + + m_generators.add(generator); + } + catch (const Exception& ex) + { + postError(ex); + return FB_FALSE; + } + + return FB_TRUE; +} diff --git a/src/jrd/replication/Replicator.h b/src/jrd/replication/Replicator.h new file mode 100644 index 0000000000..d411ef3f55 --- /dev/null +++ b/src/jrd/replication/Replicator.h @@ -0,0 +1,290 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2013 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_REPLICATOR_H +#define JRD_REPLICATION_REPLICATOR_H + +#include "../../common/classes/timestamp.h" +#include "../../common/os/guid.h" +#include "../../jrd/status.h" + +#include "Protocol.h" +#include "Manager.h" + +namespace Replication +{ + class Replicator : + public Firebird::AutoIface >, + private Firebird::PermanentStorage + { + typedef Firebird::Array MetadataCache; + typedef Firebird::HalfStaticArray SavepointStack; + + struct BatchBlock + { + Block header; + Firebird::UCharBuffer* buffer; + MetadataCache metadata; + ULONG lastMetaId; + ULONG flushes; + + explicit BatchBlock(MemoryPool& pool) + : buffer(NULL), metadata(pool), + lastMetaId(MAX_ULONG), flushes(0) + { + memset(&header, 0, sizeof(Block)); + } + + ULONG getSize() const + { + return (ULONG) buffer->getCount(); + } + + void putTag(UCHAR tag) + { + buffer->add(tag); + } + + void putInt(SLONG value) + { + const auto newSize = FB_ALIGN(getSize(), FB_ALIGNMENT); + buffer->resize(newSize); + const auto ptr = (const UCHAR*) &value; + buffer->add(ptr, sizeof(SLONG)); + } + + void putBigInt(SINT64 value) + { + const auto newSize = FB_ALIGN(getSize(), FB_ALIGNMENT); + buffer->resize(newSize); + const auto ptr = (const UCHAR*) &value; + buffer->add(ptr, sizeof(SINT64)); + } + + void putMetaName(const Firebird::MetaName& name) + { + if (lastMetaId < metadata.getCount() && metadata[lastMetaId] == name) + { + putInt(lastMetaId); + return; + } + + FB_SIZE_T pos; + if (!metadata.find(name, pos)) + { + pos = metadata.getCount(); + metadata.add(name); + } + + putInt(pos); + lastMetaId = (ULONG) pos; + } + + void putString(const Firebird::string& str) + { + const auto length = str.length(); + putInt(length); + buffer->add((const UCHAR*) str.c_str(), length); + } + + void putBinary(ULONG length, const UCHAR* data) + { + putInt(length); + buffer->add(data, length); + } + }; + + class Transaction : + public Firebird::AutoIface > + { + public: + explicit Transaction(Replicator* replicator) + : m_replicator(replicator), m_data(replicator->getPool()) + {} + + BatchBlock& getData() + { + return m_data; + } + + // IDisposable methods + + void dispose() + { + delete this; + } + + // IReplicatedTransaction methods + + FB_BOOLEAN prepare() + { + return m_replicator->prepareTransaction(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN commit() + { + return m_replicator->commitTransaction(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN rollback() + { + return m_replicator->rollbackTransaction(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN startSavepoint() + { + return m_replicator->startSavepoint(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN releaseSavepoint() + { + return m_replicator->releaseSavepoint(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN rollbackSavepoint() + { + return m_replicator->rollbackSavepoint(this) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN insertRecord(const char* name, Firebird::IReplicatedRecord* record) + { + return m_replicator->insertRecord(this, name, record) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN updateRecord(const char* name, Firebird::IReplicatedRecord* orgRecord, Firebird::IReplicatedRecord* newRecord) + { + return m_replicator->updateRecord(this, name, orgRecord, newRecord) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN deleteRecord(const char* name, Firebird::IReplicatedRecord* record) + { + return m_replicator->deleteRecord(this, name, record) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN storeBlob(ISC_QUAD blobId, Firebird::IReplicatedBlob* blob) + { + return m_replicator->storeBlob(this, blobId, blob) ? FB_TRUE : FB_FALSE; + } + + FB_BOOLEAN executeSql(const char* sql) + { + return m_replicator->executeSql(this, sql) ? FB_TRUE : FB_FALSE; + } + + private: + Replicator* const m_replicator; + BatchBlock m_data; + }; + + struct GeneratorValue + { + Firebird::MetaName name; + SINT64 value; + }; + + typedef Firebird::Array GeneratorCache; + + enum FlushReason + { + FLUSH_OVERFLOW, + FLUSH_PREPARE, + FLUSH_SYNC + }; + + public: + virtual ~Replicator(); + + static Replicator* create(Firebird::MemoryPool& pool, + const Firebird::string& dbId, + const Firebird::PathName& database, + const Firebird::Guid& guid, + const Firebird::MetaName& user, + bool cleanupTransactions); + + // IDisposable methods + void dispose(); + + // IReplicatedSession methods + + Firebird::IStatus* getStatus() + { + return &m_status; + } + + Firebird::IReplicatedTransaction* startTransaction(SINT64 number); + FB_BOOLEAN cleanupTransaction(SINT64 number); + FB_BOOLEAN setSequence(const char* name, SINT64 value); + + private: + Manager* const m_manager; + const Config* const m_config; + const Firebird::PathName m_database; + Firebird::Guid m_guid; + const Firebird::MetaName m_user; + Firebird::Array m_transactions; + GeneratorCache m_generators; + Firebird::Mutex m_mutex; + Firebird::FbLocalStatus m_status; + + Replicator(Firebird::MemoryPool& pool, + Manager* manager, + const Firebird::PathName& dbName, + const Firebird::Guid& dbGuid, + const Firebird::MetaName& userName, + bool cleanupTransactions); + + void initialize(); + void flush(BatchBlock& txnData, FlushReason reason, ULONG flags = 0); + void logError(const Firebird::IStatus* status); + void postError(const Firebird::Exception& ex); + + bool prepareTransaction(Transaction* transaction); + bool commitTransaction(Transaction* transaction); + bool rollbackTransaction(Transaction* transaction); + + bool startSavepoint(Transaction* transaction); + bool releaseSavepoint(Transaction* transaction); + bool rollbackSavepoint(Transaction* transaction); + + bool insertRecord(Transaction* transaction, + const char* name, + Firebird::IReplicatedRecord* record); + bool updateRecord(Transaction* transaction, + const char* name, + Firebird::IReplicatedRecord* orgRecord, + Firebird::IReplicatedRecord* newRecord); + bool deleteRecord(Transaction* transaction, + const char* name, + Firebird::IReplicatedRecord* record); + + bool storeBlob(Transaction* transaction, + ISC_QUAD blobId, + Firebird::IReplicatedBlob* blob); + + bool executeSql(Transaction* transaction, + const char* sql); +}; + +} // namespace + +#endif // JRD_REPLICATION_REPLICATOR_H diff --git a/src/jrd/replication/Utils.cpp b/src/jrd/replication/Utils.cpp new file mode 100644 index 0000000000..f4f1dad7e5 --- /dev/null +++ b/src/jrd/replication/Utils.cpp @@ -0,0 +1,205 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "../common/classes/GenericMap.h" +#include "../common/config/config_file.h" +#include "../common/isc_proto.h" +#include "../common/isc_f_proto.h" +#include "../common/utils_proto.h" +#include "../common/ScanDir.h" +#include "../common/os/mod_loader.h" +#include "../common/os/path_utils.h" +#include "../jrd/constants.h" + +#include "Utils.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include +#include +#ifdef HAVE_SYS_FILE_H +#include +#endif + +#ifdef WIN_NT +#include +#endif + +#include +#include + +using namespace Firebird; +using namespace Replication; + +namespace +{ + // Must match items inside enum LogMsgType + const char* LOG_MSG_TYPES[] = { + "ERROR", // LogMsgType::ERROR_MSG + "WARNING", // LogMsgType::WARNING_MSG + "VERBOSE", // LogMsgType::VERBOSE_MSG + "DEBUG" // LogMsgType::DEBUG_MSG + }; + + const char* REPLICATION_LOGFILE = "replication.log"; + + class LogWriter : private GlobalStorage + { + public: + LogWriter() + : m_hostname(getPool()), + m_filename(getPool(), fb_utils::getPrefix(IConfigManager::DIR_LOG, REPLICATION_LOGFILE)) + { + char host[BUFFER_LARGE]; + ISC_get_host(host, sizeof(host)); + m_hostname = host; +#ifdef WIN_NT + m_mutex = CreateMutex(NULL, FALSE, "firebird_repl_mutex"); +#endif + } + + ~LogWriter() + { +#ifdef WIN_NT + CloseHandle(m_mutex); +#endif + } + + void logMessage(const string& source, const PathName& database, + LogMsgType type, const string& message) + { + const time_t now = time(NULL); + + const auto file = fopen(m_filename.c_str(), "a"); + if (file && lock(file)) + { + fseek(file, 0, SEEK_END); + fprintf(file, "\n%s (%s) %s\tDatabase: %s\n\t%s: %s\n", + m_hostname.c_str(), source.c_str(), ctime(&now), + database.c_str(), LOG_MSG_TYPES[type], message.c_str()); + fclose(file); + unlock(); + } + } + + private: + bool lock(FILE* file) + { +#ifdef WIN_NT + return (WaitForSingleObject(m_mutex, INFINITE) == WAIT_OBJECT_0); +#else +#ifdef HAVE_FLOCK + if (flock(fileno(file), LOCK_EX)) +#else + if (lockf(fileno(file), F_LOCK, 0)) +#endif + { + return false; + } + + return true; +#endif + } + + void unlock() + { +#ifdef WIN_NT + ReleaseMutex(m_mutex); +#endif + } + + string m_hostname; + const PathName m_filename; +#ifdef WIN_NT + HANDLE m_mutex; +#endif + }; + + void logMessage(const string& source, const PathName& database, + const string& message, LogMsgType type) + { + static LogWriter g_writer; + + g_writer.logMessage(source, database, type, message); + } + +} // namespace + +namespace Replication +{ + void raiseError(const char* msg, ...) + { + char buffer[BUFFER_LARGE]; + + va_list ptr; + va_start(ptr, msg); + vsprintf(buffer, msg, ptr); + va_end(ptr); + + Arg::StatusVector error; + error << Arg::Gds(isc_random) << Arg::Str(buffer); + error.raise(); + } + + int executeShell(const string& command) + { +#ifdef WIN_NT + string params; + params.printf("/c %s", command.c_str()); + SHELLEXECUTEINFO seInfo = {0}; + seInfo.cbSize = sizeof(SHELLEXECUTEINFO); + seInfo.fMask = SEE_MASK_NOCLOSEPROCESS; + seInfo.hwnd = NULL; + seInfo.lpVerb = NULL; + seInfo.lpFile = "cmd.exe"; + seInfo.lpParameters = params.c_str(); + seInfo.lpDirectory = NULL; + seInfo.nShow = SW_HIDE; + seInfo.hInstApp = NULL; + ShellExecuteEx(&seInfo); + WaitForSingleObject(seInfo.hProcess, INFINITE); + DWORD exitCode = 0; + GetExitCodeProcess(seInfo.hProcess, &exitCode); + return (int) exitCode; +#else + return system(command.c_str()); +#endif + } + + void logOriginMessage(const PathName& database, + const string& message, + LogMsgType type) + { + logMessage("origin", database, message, type); + } + + void logReplicaMessage(const PathName& database, + const string& message, + LogMsgType type) + { + logMessage("replica", database, message, type); + } + +} // namespace diff --git a/src/jrd/replication/Utils.h b/src/jrd/replication/Utils.h new file mode 100644 index 0000000000..9b8cdf8b72 --- /dev/null +++ b/src/jrd/replication/Utils.h @@ -0,0 +1,87 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + + +#ifndef JRD_REPLICATION_UTILS_H +#define JRD_REPLICATION_UTILS_H + +#include "../common/classes/fb_string.h" + +#ifdef WIN_NT +#include +#endif + +#include + +namespace Replication +{ + enum LogMsgType + { + ERROR_MSG = 0, + WARNING_MSG, + VERBOSE_MSG, + DEBUG_MSG + }; + + void raiseError(const char* msg, ...); + int executeShell(const Firebird::string& command); + + void logOriginMessage(const Firebird::PathName& database, + const Firebird::string& message, + LogMsgType type); + + void logReplicaMessage(const Firebird::PathName& database, + const Firebird::string& message, + LogMsgType type); + + class AutoFile + { + public: + explicit AutoFile(int fd) + : m_handle(fd) + {} + + ~AutoFile() + { + release(); + } + + operator int() const + { + return m_handle; + } + + void release() + { + if (m_handle) + { + ::close(m_handle); + m_handle = 0; + } + } + + protected: + int m_handle; + }; +} + +#endif // JRD_REPLICATION_UTILS_H diff --git a/src/jrd/tra.cpp b/src/jrd/tra.cpp index a18a275b09..79fb0c2db9 100644 --- a/src/jrd/tra.cpp +++ b/src/jrd/tra.cpp @@ -70,6 +70,7 @@ #include "../dsql/dsql.h" #include "../dsql/dsql_proto.h" #include "../common/StatusArg.h" +#include "../jrd/replication/Publisher.h" #include "../jrd/trace/TraceManager.h" #include "../jrd/trace/TraceJrdHelpers.h" #include "../jrd/Function.h" @@ -279,7 +280,7 @@ bool TRA_active_transactions(thread_db* tdbb, Database* dbb) return LCK_query_data(tdbb, LCK_tra, LCK_ANY) ? true : false; } -void TRA_cleanup(thread_db* tdbb) +bool TRA_cleanup(thread_db* tdbb) { /************************************** * @@ -301,7 +302,7 @@ void TRA_cleanup(thread_db* tdbb) // Return without cleaning up the TIP's for a ReadOnly database if (dbb->readOnly()) - return; + return false; // First, make damn sure there are no outstanding transactions @@ -309,7 +310,7 @@ void TRA_cleanup(thread_db* tdbb) attachment = attachment->att_next) { if (attachment->att_transactions) - return; + return false; } const ULONG trans_per_tip = dbb->dbb_page_manager.transPerTIP; @@ -325,7 +326,7 @@ void TRA_cleanup(thread_db* tdbb) CCH_RELEASE(tdbb, &window); if (ceiling == 0) - return; + return false; // Zip thru transactions from the "oldest active" to the next looking for // active transactions. When one is found, declare it dead. @@ -333,6 +334,7 @@ void TRA_cleanup(thread_db* tdbb) const ULONG last = ceiling / trans_per_tip; ULONG number = active % trans_per_tip; TraNumber limbo = 0; + bool found = false; for (ULONG sequence = active / trans_per_tip; sequence <= last; sequence++, number = 0) { @@ -351,6 +353,7 @@ void TRA_cleanup(thread_db* tdbb) limbo = (TraNumber) sequence * trans_per_tip + number; else if (state == tra_active) { + found = true; CCH_MARK(tdbb, &window); *byte &= ~(TRA_MASK << shift); @@ -411,6 +414,8 @@ void TRA_cleanup(thread_db* tdbb) CCH_RELEASE(tdbb, &window); #endif + + return found; } @@ -491,9 +496,7 @@ void TRA_commit(thread_db* tdbb, jrd_tra* transaction, const bool retaining_flag // Get rid of user savepoints to allow intermediate garbage collection // in indices and BLOBs after in-place updates while (transaction->tra_save_point) - { transaction->rollforwardSavepoint(tdbb); - } transaction_flush(tdbb, FLUSH_TRAN, transaction->tra_number); } @@ -517,6 +520,7 @@ void TRA_commit(thread_db* tdbb, jrd_tra* transaction, const bool retaining_flag // Set the state on the inventory page to be committed TRA_set_state(tdbb, transaction, transaction->tra_number, tra_committed); + REPL_trans_commit(tdbb, transaction); // Perform any post commit work @@ -1294,6 +1298,11 @@ void TRA_release_transaction(thread_db* tdbb, jrd_tra* transaction, Jrd::TraceTr transaction->unlinkFromAttachment(); + // Destroy the replicated transaction reference + + if (transaction->tra_replicator) + transaction->tra_replicator->dispose(); + // Release transaction's under-modification-rpb list delete transaction->tra_rpblist; @@ -1429,6 +1438,7 @@ void TRA_rollback(thread_db* tdbb, jrd_tra* transaction, const bool retaining_fl } TRA_set_state(tdbb, transaction, transaction->tra_number, state); + REPL_trans_rollback(tdbb, transaction); TRA_release_transaction(tdbb, transaction, &trace); } @@ -1964,6 +1974,7 @@ int TRA_wait(thread_db* tdbb, jrd_tra* trans, TraNumber number, jrd_tra::wait_t { state = tra_dead; TRA_set_state(tdbb, 0, number, tra_dead); + REPL_trans_cleanup(tdbb, number); } // If the transaction disappeared into limbo, died, for constructively @@ -2537,6 +2548,11 @@ static void retain_context(thread_db* tdbb, jrd_tra* transaction, bool commit, i { // Set the state on the inventory page TRA_set_state(tdbb, transaction, old_number, state); + + if (commit) + REPL_trans_commit(tdbb, transaction); + else + REPL_trans_rollback(tdbb, transaction); } transaction->tra_number = new_number; @@ -3205,6 +3221,11 @@ static void transaction_start(thread_db* tdbb, jrd_tra* trans) Jrd::Attachment* const attachment = tdbb->getAttachment(); WIN window(DB_PAGE_SPACE, -1); + // Inside the replica, only replicator sessions are allowed to modify data. + // Fake other transactions as read-only to disallow any modifications. + if (dbb->isReplica(REPLICA_READ_ONLY) && !(tdbb->tdbb_flags & TDBB_replicator)) + trans->tra_flags |= TRA_readonly; + Lock* lock = FB_NEW_RPT(*tdbb->getDefaultPool(), 0) Lock(tdbb, sizeof(TraNumber), LCK_tra); // Read header page and allocate transaction number. Since @@ -3705,6 +3726,8 @@ void jrd_tra::rollbackSavepoint(thread_db* tdbb) { if (tra_save_point && !(tra_flags & TRA_system)) { + REPL_save_cleanup(tdbb, this, tra_save_point, true); + Jrd::ContextPoolHolder context(tdbb, tra_pool); tra_save_point = tra_save_point->rollback(tdbb); } @@ -3758,6 +3781,8 @@ void jrd_tra::rollforwardSavepoint(thread_db* tdbb) { if (tra_save_point && !(tra_flags & TRA_system)) { + REPL_save_cleanup(tdbb, this, tra_save_point, false); + Jrd::ContextPoolHolder context(tdbb, tra_pool); tra_save_point = tra_save_point->rollforward(tdbb); } diff --git a/src/jrd/tra.h b/src/jrd/tra.h index e447bda6fe..b12841a56a 100644 --- a/src/jrd/tra.h +++ b/src/jrd/tra.h @@ -144,6 +144,8 @@ struct CallerName Firebird::MetaName userName; }; +typedef Firebird::GenericMap > > ReplBlobMap; + const int DEFAULT_LOCK_TIMEOUT = -1; // infinite const char* const TRA_BLOB_SPACE = "fb_blob_"; const char* const TRA_UNDO_SPACE = "fb_undo_"; @@ -170,6 +172,7 @@ public: tra_blobs_tree(p), tra_blobs(outer ? outer->tra_blobs : &tra_blobs_tree), tra_fetched_blobs(p), + tra_repl_blobs(*p), tra_arrays(NULL), tra_deferred_job(NULL), tra_resources(*p), @@ -184,6 +187,7 @@ public: tra_sorts(*p), tra_public_interface(NULL), tra_gen_ids(NULL), + tra_replicator(NULL), tra_interface(NULL), tra_blob_space(NULL), tra_undo_space(NULL), @@ -263,6 +267,7 @@ public: BlobIndexTree tra_blobs_tree; // list of active blobs BlobIndexTree* tra_blobs; // pointer to actual list of active blobs FetchedBlobIdTree tra_fetched_blobs; // list of fetched blobs + ReplBlobMap tra_repl_blobs; // map of blob IDs replicated in this transaction ArrayField* tra_arrays; // Linked list of active arrays Lock* tra_lock; // lock for transaction Lock* tra_alter_db_lock; // lock for ALTER DATABASE statement(s) @@ -296,6 +301,7 @@ public: //Transaction *tra_ext_two_phase; Firebird::ITransaction* tra_public_interface; GenIdCache* tra_gen_ids; + Firebird::IReplicatedTransaction* tra_replicator; private: JTransaction* tra_interface; diff --git a/src/jrd/tra_proto.h b/src/jrd/tra_proto.h index 09a8de7f12..1ac8f9bf73 100644 --- a/src/jrd/tra_proto.h +++ b/src/jrd/tra_proto.h @@ -33,7 +33,7 @@ namespace Jrd { } bool TRA_active_transactions(Jrd::thread_db* tdbb, Jrd::Database*); -void TRA_cleanup(Jrd::thread_db*); +bool TRA_cleanup(Jrd::thread_db*); void TRA_commit(Jrd::thread_db* tdbb, Jrd::jrd_tra*, const bool); void TRA_extend_tip(Jrd::thread_db* tdbb, ULONG /*, struct Jrd::win* */); int TRA_fetch_state(Jrd::thread_db* tdbb, TraNumber number); diff --git a/src/jrd/trig.h b/src/jrd/trig.h index b239e49259..569a0e51c0 100644 --- a/src/jrd/trig.h +++ b/src/jrd/trig.h @@ -20,6 +20,7 @@ * All Rights Reserved. * Contributor(s): ______________________________________. */ + #ifndef JRD_TRIG_H #define JRD_TRIG_H @@ -35,7 +36,7 @@ namespace Jrd { struct jrd_trg { - const SCHAR* trg_name; + const char* trg_name; UCHAR trg_relation; UCHAR trg_type; USHORT trg_length; @@ -48,9 +49,9 @@ struct jrd_trg struct trigger_msg { - const SCHAR* trigmsg_name; + const char* trigmsg_name; USHORT trigmsg_number; - const SCHAR* trigmsg_text; + const char* trigmsg_text; USHORT trg_ods_version; }; @@ -58,9 +59,9 @@ struct trigger_msg struct gen { - const SCHAR* gen_name; + const char* gen_name; USHORT gen_id; - const char* gen_description; + const char* gen_description; }; } //namespace Jrd @@ -80,7 +81,7 @@ static const Jrd::gen generators[] = { "RDB$BACKUP_HISTORY", 9, "Nbackup technology" }, { FUNCTIONS_GENERATOR, 10, "Function ID" }, { "RDB$GENERATOR_NAME", 11, "Implicit generator name" }, - { 0, 0, NULL } + { nullptr, 0, nullptr } }; diff --git a/src/lock/lock.cpp b/src/lock/lock.cpp index d98bb7af65..c5f78644e5 100644 --- a/src/lock/lock.cpp +++ b/src/lock/lock.cpp @@ -163,13 +163,13 @@ static const bool compatibility[LCK_max][LCK_max] = namespace Jrd { -Firebird::GlobalPtr LockManager::g_lmMap; -Firebird::GlobalPtr LockManager::g_mapMutex; +GlobalPtr LockManager::g_lmMap; +GlobalPtr LockManager::g_mapMutex; -LockManager* LockManager::create(const Firebird::string& id, RefPtr conf) +LockManager* LockManager::create(const string& id, RefPtr conf) { - Firebird::MutexLockGuard guard(g_mapMutex, FB_FUNCTION); + MutexLockGuard guard(g_mapMutex, FB_FUNCTION); LockManager* lockMgr = NULL; if (!g_lmMap->get(id, lockMgr)) @@ -193,9 +193,9 @@ void LockManager::destroy(LockManager* lockMgr) { if (lockMgr) { - const Firebird::string id = lockMgr->m_dbId; + const string id = lockMgr->m_dbId; - Firebird::MutexLockGuard guard(g_mapMutex, FB_FUNCTION); + MutexLockGuard guard(g_mapMutex, FB_FUNCTION); if (!lockMgr->release()) { @@ -208,7 +208,7 @@ void LockManager::destroy(LockManager* lockMgr) } -LockManager::LockManager(const Firebird::string& id, RefPtr conf) +LockManager::LockManager(const string& id, RefPtr conf) : PID(getpid()), m_bugcheck(false), m_sharedFileCreated(false), @@ -281,7 +281,7 @@ LockManager::~LockManager() if (m_sharedMemory->getHeader() && SRQ_EMPTY(m_sharedMemory->getHeader()->lhb_processes)) { - Firebird::PathName name; + PathName name; get_shared_file_name(name); m_sharedMemory->removeMapFile(); #ifdef USE_SHMEM_EXT @@ -338,7 +338,7 @@ void* LockManager::ABS_PTR(SRQ_PTR item) bool LockManager::attach_shared_file(CheckStatusWrapper* statusVector) { - Firebird::PathName name; + PathName name; get_shared_file_name(name); try @@ -381,12 +381,12 @@ void LockManager::detach_shared_file(CheckStatusWrapper* statusVector) } -void LockManager::get_shared_file_name(Firebird::PathName& name, ULONG extent) const +void LockManager::get_shared_file_name(PathName& name, ULONG extent) const { name.printf(LOCK_FILE, m_dbId.c_str()); if (extent) { - Firebird::PathName ename; + PathName ename; ename.printf("%s.ext%d", name.c_str(), extent); name = ename; } @@ -1198,7 +1198,7 @@ void LockManager::acquire_shmem(SRQ_PTR owner_offset) #ifdef HAVE_OBJECT_MAP const ULONG new_length = m_sharedMemory->getHeader()->lhb_length; - Firebird::WriteLockGuard guard(m_remapSync, FB_FUNCTION); + WriteLockGuard guard(m_remapSync, FB_FUNCTION); // Post remapping notifications remap_local_owners(); // Remap the shared memory region @@ -1253,7 +1253,7 @@ void LockManager::Extent::mutexBug(int, const char*) bool LockManager::createExtent(CheckStatusWrapper* statusVector) { - Firebird::PathName name; + PathName name; get_shared_file_name(name, (ULONG) m_extents.getCount()); Extent& extent = m_extents.add(); @@ -1305,7 +1305,7 @@ UCHAR* LockManager::alloc(USHORT size, CheckStatusWrapper* statusVector) } else #elif (defined HAVE_OBJECT_MAP) - Firebird::WriteLockGuard guard(m_remapSync, FB_FUNCTION); + WriteLockGuard guard(m_remapSync, FB_FUNCTION); // Post remapping notifications remap_local_owners(); // Remap the shared memory region @@ -1544,14 +1544,14 @@ void LockManager::blocking_action_thread() m_sharedMemory->eventWait(&m_process->prc_blocking, value, 0); } } - catch (const Firebird::Exception& x) + catch (const Exception& x) { iscLogException("Error in blocking action thread\n", x); } } -void LockManager::exceptionHandler(const Firebird::Exception& ex, +void LockManager::exceptionHandler(const Exception& ex, ThreadFinishSync::ThreadRoutine* /*routine*/) { /************************************** @@ -2680,7 +2680,7 @@ void LockManager::post_blockage(thread_db* tdbb, lrq* request, lbl* lock) ASSERT_ACQUIRED; CHECK(request->lrq_flags & LRQ_pending); - Firebird::HalfStaticArray blocking_owners; + HalfStaticArray blocking_owners; SRQ lock_srq; SRQ_LOOP(lock->lbl_requests, lock_srq) @@ -2721,7 +2721,7 @@ void LockManager::post_blockage(thread_db* tdbb, lrq* request, lbl* lock) break; } - Firebird::HalfStaticArray dead_processes; + HalfStaticArray dead_processes; for (SRQ_PTR* iter = blocking_owners.begin(); iter != blocking_owners.end(); ++iter) { @@ -3874,7 +3874,7 @@ void LockManager::wait_for_request(thread_db* tdbb, lrq* request, SSHORT lck_wai LockTableCheckout checkout(this, FB_FUNCTION); { // scope - Firebird::ReadLockGuard guard(m_remapSync, FB_FUNCTION); + ReadLockGuard guard(m_remapSync, FB_FUNCTION); owner = (own*) SRQ_ABS_PTR(owner_offset); ++m_waitingOwners; } diff --git a/src/msgs/facilities2.sql b/src/msgs/facilities2.sql index 5c602a631d..fb2806e8e1 100644 --- a/src/msgs/facilities2.sql +++ b/src/msgs/facilities2.sql @@ -1,9 +1,9 @@ /* MAX_NUMBER is the next number to be used, always one more than the highest message number. */ set bulk_insert INSERT INTO FACILITIES (LAST_CHANGE, FACILITY, FAC_CODE, MAX_NUMBER) VALUES (?, ?, ?, ?); -- -('2019-01-14 20:23:00', 'JRD', 0, 931) +('2019-01-14 20:23:00', 'JRD', 0, 932) ('2015-03-17 18:33:00', 'QLI', 1, 533) -('2015-01-07 18:01:51', 'GFIX', 3, 134) +('2018-03-17 12:00:00', 'GFIX', 3, 136) ('1996-11-07 13:39:40', 'GPRE', 4, 1) ('2017-02-05 20:37:00', 'DSQL', 7, 41) ('2018-06-22 11:46:00', 'DYN', 8, 309) diff --git a/src/msgs/messages2.sql b/src/msgs/messages2.sql index 829f3b346b..ac1cb6032c 100644 --- a/src/msgs/messages2.sql +++ b/src/msgs/messages2.sql @@ -1038,6 +1038,7 @@ Data source : @4', NULL, NULL) ('tom_rsa_sign', NULL, 'SysFunction.cpp', NULL, 0, 928, NULL, 'RSA-signing data', NULL, NULL); ('tom_rsa_verify', NULL, 'SysFunction.cpp', NULL, 0, 929, NULL, 'Verifying RSA-signed data', NULL, NULL); ('tom_chacha_key', NULL, 'SysFunction.cpp', NULL, 0, 930, NULL, 'Invalid key length @1, need 16 or 32', NULL, NULL); +('bad_repl_handle', NULL, 'jrd.cpp', NULL, 0, 931, NULL, 'invalid replicator handle', NULL, NULL); -- QLI (NULL, NULL, NULL, NULL, 1, 0, NULL, 'expected type', NULL, NULL); (NULL, NULL, NULL, NULL, 1, 1, NULL, 'bad block type', NULL, NULL); @@ -1784,6 +1785,8 @@ COMMIT WORK; ('gfix_opt_icu', 'ALICE_gfix', 'alice.c', NULL, 3, 131, NULL, ' -icu fix database to be usable with present ICU version', NULL, NULL); ('gfix_opt_role', 'ALICE_gfix', 'alice.c', NULL, 3, 132, NULL, ' -role set SQL role name', NULL, NULL); ('gfix_role_req', 'ALICE_gfix', 'alice.c', NULL, 3, 133, NULL, 'SQL role name required', NULL, NULL); +('gfix_opt_repl', 'ALICE_gfix', 'alice.c', NULL, 3, 134, NULL, ' -repl(ica) replica mode ', NULL, NULL); +('gfix_repl_mode_req', 'ALICE_gfix', 'alice.c', NULL, 3, 135, NULL, 'replica mode (none / read_only / read_write) required', NULL, NULL); -- DSQL ('dsql_dbkey_from_non_table', 'MAKE_desc', 'make.c', NULL, 7, 2, NULL, 'Cannot SELECT RDB$DB_KEY from a stored procedure.', NULL, NULL); ('dsql_transitional_numeric', 'dsql_yyparse', 'parse.y', NULL, 7, 3, NULL, 'Precision 10 to 18 changed from DOUBLE PRECISION in SQL dialect 1 to 64-bit scaled integer in SQL dialect 3', NULL, NULL); diff --git a/src/msgs/system_errors2.sql b/src/msgs/system_errors2.sql index 93e365cd8b..14ab8246d4 100644 --- a/src/msgs/system_errors2.sql +++ b/src/msgs/system_errors2.sql @@ -937,6 +937,7 @@ set bulk_insert INSERT INTO SYSTEM_ERRORS (SQL_CODE, SQL_CLASS, SQL_SUBCLASS, FA (-901, '22', '023', 0, 928, 'tom_rsa_sign', NULL, NULL) (-901, '22', '023', 0, 929, 'tom_rsa_verify', NULL, NULL) (-901, '22', '023', 0, 930, 'tom_chacha_key', NULL, NULL) +(-901, '08', '003', 0, 931, 'bad_repl_handle', NULL, NULL) -- GFIX (-901, '00', '000', 3, 1, 'gfix_db_name', NULL, NULL) (-901, '00', '000', 3, 2, 'gfix_invalid_sw', NULL, NULL) diff --git a/src/remote/client/interface.cpp b/src/remote/client/interface.cpp index 866bc66830..ff0785beff 100644 --- a/src/remote/client/interface.cpp +++ b/src/remote/client/interface.cpp @@ -522,6 +522,39 @@ int Batch::release() return 0; } +class Replicator FB_FINAL : public RefCntIface > +{ +public: + // IReplicator implementation + int release(); + void process(CheckStatusWrapper* status, unsigned length, const unsigned char* data); + void close(CheckStatusWrapper* status); + + explicit Replicator(Attachment* att) : attachment(att) + {} + +private: + void freeClientData(CheckStatusWrapper* status, bool force = false); + + Attachment* attachment; +}; + +int Replicator::release() +{ + if (--refCounter != 0) + return 1; + + if (attachment) + { + LocalStatus ls; + CheckStatusWrapper status(&ls); + freeClientData(&status, true); + } + delete this; + + return 0; +} + class Statement FB_FINAL : public RefCntIface > { public: @@ -771,9 +804,11 @@ public: unsigned stmtLength, const char* sqlStmt, unsigned dialect, IMessageMetadata* inMetadata, unsigned parLength, const unsigned char* par); + Replicator* createReplicator(Firebird::CheckStatusWrapper* status); + public: Attachment(Rdb* handle, const PathName& path) - : rdb(handle), dbPath(getPool(), path) + : rdb(handle), dbPath(getPool(), path), replicator(nullptr) { } Rdb* getRdb() @@ -790,6 +825,8 @@ public: Transaction* remoteTransactionInterface(ITransaction* apiTra); Statement* createStatement(CheckStatusWrapper* status, unsigned dialect); + Replicator* replicator; + private: void execWithCheck(CheckStatusWrapper* status, const string& stmt); void freeClientData(CheckStatusWrapper* status, bool force = false); @@ -2770,6 +2807,129 @@ void Batch::releaseStatement() } +Replicator* Attachment::createReplicator(CheckStatusWrapper* status) +{ +/************************************** + * + * c r e a t e R e p l i c a t o r + * + ************************************** + * + * Functional description + * Create data replication interface. + * + **************************************/ + + try + { + reset(status); + + // Check and validate handles, etc. + CHECK_HANDLE(rdb, isc_bad_db_handle); + rem_port* port = rdb->rdb_port; + + if (port->port_protocol < PROTOCOL_VERSION16) + unsupported(); + + if (!replicator) + replicator = FB_NEW Replicator(this); + + replicator->addRef(); + return replicator; + } + catch (const Exception& ex) + { + ex.stuffException(status); + } + + return NULL; +} + + +void Replicator::process(CheckStatusWrapper* status, unsigned length, const unsigned char* data) +{ + try + { + reset(status); + + Rdb* rdb = attachment->getRdb(); + CHECK_HANDLE(rdb, isc_bad_db_handle); + rem_port* port = rdb->rdb_port; + + if (port->port_protocol < PROTOCOL_VERSION16) + unsupported(); + + // Validate data length + CHECK_LENGTH(port, length); + + PACKET* packet = &rdb->rdb_packet; + packet->p_operation = op_repl_data; + P_REPLICATE* repl = &packet->p_replicate; + repl->p_repl_database = rdb->rdb_id; + repl->p_repl_data.cstr_length = length; + repl->p_repl_data.cstr_address = data; + + RefMutexGuard portGuard(*port->port_sync, FB_FUNCTION); + + send_and_receive(status, rdb, packet); + } + catch (const Exception& ex) + { + ex.stuffException(status); + } +} + + +void Replicator::close(CheckStatusWrapper* status) +{ + reset(status); + freeClientData(status); +} + + +void Replicator::freeClientData(CheckStatusWrapper* status, bool force) +{ + try + { + reset(status); + + if (attachment && attachment->replicator) + { + Rdb* rdb = attachment->getRdb(); + CHECK_HANDLE(rdb, isc_bad_db_handle); + rem_port* port = rdb->rdb_port; + + if (port->port_protocol < PROTOCOL_VERSION16) + unsupported(); + + PACKET* packet = &rdb->rdb_packet; + packet->p_operation = op_repl_data; + P_REPLICATE* repl = &packet->p_replicate; + repl->p_repl_database = rdb->rdb_id; + repl->p_repl_data.cstr_length = 0; + + RefMutexGuard portGuard(*port->port_sync, FB_FUNCTION); + + try + { + send_and_receive(status, rdb, packet); + } + catch (const Exception&) + { + if (!force) + throw; + } + + attachment->replicator = NULL; + } + } + catch (const Exception& ex) + { + ex.stuffException(status); + } +} + + ITransaction* Statement::execute(CheckStatusWrapper* status, ITransaction* apiTra, IMessageMetadata* inMetadata, void* inBuffer, IMessageMetadata* outMetadata, void* outBuffer) { diff --git a/src/remote/protocol.cpp b/src/remote/protocol.cpp index 75fa9e8e73..aba92b6ef4 100644 --- a/src/remote/protocol.cpp +++ b/src/remote/protocol.cpp @@ -1101,6 +1101,16 @@ bool_t xdr_protocol(XDR* xdrs, PACKET* p) return P_TRUE(xdrs, p); } + case op_repl_data: + { + P_REPLICATE* repl = &p->p_replicate; + MAP(xdr_short, reinterpret_cast(repl->p_repl_database)); + MAP(xdr_cstring_const, repl->p_repl_data); + DEBUG_PRINTSIZE(xdrs, p->p_operation); + + return P_TRUE(xdrs, p); + } + ///case op_insert: default: #ifdef DEV_BUILD diff --git a/src/remote/protocol.h b/src/remote/protocol.h index 78fe694a8c..cd2b2987b1 100644 --- a/src/remote/protocol.h +++ b/src/remote/protocol.h @@ -285,6 +285,9 @@ enum P_OP op_batch_blob_stream = 105, op_batch_set_bpb = 106, + op_repl_data = 107, + op_repl_req = 108, + op_max }; @@ -711,6 +714,15 @@ typedef struct p_batch_setbpb } P_BATCH_SETBPB; +// Replication support + +typedef struct p_replicate +{ + OBJCT p_repl_database; // database object id + CSTRING_CONST p_repl_data; // replication data +} P_REPLICATE; + + // Generalize packet (sic!) typedef struct packet @@ -762,6 +774,7 @@ typedef struct packet P_BATCH_BLOB p_batch_blob; // BLOB stream portion in batch P_BATCH_REGBLOB p_batch_regblob; // Register already existing BLOB in batch P_BATCH_SETBPB p_batch_setbpb; // Set default BPB for batch + P_REPLICATE p_replicate; // replicate public: packet() diff --git a/src/remote/remote.h b/src/remote/remote.h index 9e14ad0a1f..e6f11ddec5 100644 --- a/src/remote/remote.h +++ b/src/remote/remote.h @@ -1042,6 +1042,8 @@ struct rem_port : public Firebird::GlobalStorage, public Firebird::RefCounted Firebird::ICryptKeyCallback* port_client_crypt_callback; // client callback to transfer database crypt key ServerCallbackBase* port_server_crypt_callback; // server callback to transfer database crypt key + Firebird::RefPtr port_replicator; + UCharArrayAutoPtr port_buffer; FB_UINT64 port_snd_packets; @@ -1084,7 +1086,7 @@ public: port_srv_auth(NULL), port_srv_auth_block(NULL), port_crypt_keys(getPool()), port_crypt_complete(false), port_crypt_level(WIRECRYPT_REQUIRED), port_known_server_keys(getPool()), port_crypt_plugin(NULL), - port_client_crypt_callback(NULL), port_server_crypt_callback(NULL), + port_client_crypt_callback(NULL), port_server_crypt_callback(NULL), port_replicator(NULL), port_buffer(FB_NEW_POOL(getPool()) UCHAR[rpt]), port_snd_packets(0), port_rcv_packets(0), port_snd_bytes(0), port_rcv_bytes(0) { @@ -1286,6 +1288,7 @@ public: void batch_exec(P_BATCH_EXEC*, PACKET*); void batch_rls(P_BATCH_FREE*, PACKET*); void batch_bpb(P_BATCH_SETBPB*, PACKET*); + void replicate(P_REPLICATE*, PACKET*); Firebird::string getRemoteId() const; void auxAcceptError(PACKET* packet); diff --git a/src/remote/server/ReplServer.cpp b/src/remote/server/ReplServer.cpp new file mode 100644 index 0000000000..ec3f142c3b --- /dev/null +++ b/src/remote/server/ReplServer.cpp @@ -0,0 +1,983 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#include "firebird.h" +#include "firebird/Message.h" +#include "../common/common.h" +#include "../jrd/constants.h" +#include "../jrd/ibase.h" +#include "../jrd/license.h" +#include "../jrd/ods.h" +#include "../common/os/guid.h" +#include "../common/os/os_utils.h" +#include "../common/os/path_utils.h" +#include "../common/isc_proto.h" +#include "../common/classes/ClumpletWriter.h" +#include "../common/classes/MetaName.h" +#include "../common/ThreadStart.h" +#include "../common/utils_proto.h" +#include "../common/utils_proto.h" + +#include "../jrd/replication/Applier.h" +#include "../jrd/replication/ChangeLog.h" +#include "../jrd/replication/Config.h" +#include "../jrd/replication/Protocol.h" +#include "../jrd/replication/Utils.h" + +#include +#include +#include +#include + +#ifdef HAVE_SYS_FILE_H +#include +#endif + +#ifdef WIN_NT +#include +#endif + +#include "ReplServer.h" + +#if defined(O_DSYNC) +#define SYNC O_DSYNC +#elif defined(O_SYNC) +#define SYNC O_SYNC +#elif defined(O_FSYNC) +#define SYNC O_FSYNC +#else +#define SYNC 0 +#endif + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +// Debugging facilities +//#define NO_DATABASE +//#define PRESERVE_LOG + +using namespace Firebird; +using namespace Replication; + +namespace +{ + const char CTL_SIGNATURE[] = "FBREPLCTL"; + + const USHORT CTL_VERSION1 = 1; + const USHORT CTL_CURRENT_VERSION = CTL_VERSION1; + + volatile bool* shutdownPtr = NULL; + AtomicCounter activeThreads; + + struct ActiveTransaction + { + ActiveTransaction() + : tra_id(0), sequence(0) + {} + + ActiveTransaction(TraNumber id, FB_UINT64 seq) + : tra_id(id), sequence(seq) + {} + + static const TraNumber& generate(const ActiveTransaction& item) + { + return item.tra_id; + } + + TraNumber tra_id; + FB_UINT64 sequence; + }; + + typedef SortedArray, TraNumber, ActiveTransaction> TransactionList; + + FB_UINT64 getOldestSequence(const TransactionList& transactions) + { + if (transactions.isEmpty()) + return 0; + + FB_UINT64 sequence = MAX_UINT64; + + for (const ActiveTransaction* iter = transactions.begin(); iter != transactions.end(); ++iter) + sequence = MIN(sequence, iter->sequence); + + fb_assert(sequence > 0 && sequence < MAX_UINT64); + + return sequence; + } + + class ControlFile : public AutoFile + { + struct DataV1 + { + char signature[10]; + USHORT version; + ULONG txn_count; + FB_UINT64 sequence; + ULONG offset; + FB_UINT64 db_sequence; + }; + + typedef DataV1 Data; + + public: + ControlFile(const PathName& directory, + const Guid& guid, FB_UINT64 sequence, + TransactionList& transactions) + : AutoFile(init(directory, guid)) + { + char guidStr[GUID_BUFF_SIZE]; + GuidToString(guidStr, &guid); + + const PathName filename = directory + guidStr; + +#ifdef WIN_NT + string name; + name.printf("firebird_replctl_%s", guidStr); + m_mutex = CreateMutex(NULL, FALSE, name.c_str()); + if (WaitForSingleObject(m_mutex, INFINITE) != WAIT_OBJECT_0) +#else // POSIX +#ifdef HAVE_FLOCK + if (flock(m_handle, LOCK_EX)) +#else + if (lockf(m_handle, F_LOCK, 0)) +#endif +#endif + { + raiseError("Control file %s lock failed (error: %d)", filename.c_str(), ERRNO); + } + + memset(&m_data, 0, sizeof(Data)); + strcpy(m_data.signature, CTL_SIGNATURE); + m_data.version = CTL_CURRENT_VERSION; + + const size_t length = (size_t) lseek(m_handle, 0, SEEK_END); + + if (!length) + { + m_data.sequence = sequence ? sequence - 1 : 0; + m_data.offset = 0; + m_data.db_sequence = 0; + + lseek(m_handle, 0, SEEK_SET); + if (write(m_handle, &m_data, sizeof(Data)) != sizeof(Data)) + raiseError("Control file %s cannot be written", filename.c_str()); + } + else if (length >= sizeof(DataV1)) + { + lseek(m_handle, 0, SEEK_SET); + if (read(m_handle, &m_data, sizeof(DataV1)) != sizeof(DataV1)) + raiseError("Control file %s appears corrupted", filename.c_str()); + + if (strcmp(m_data.signature, CTL_SIGNATURE) || + (m_data.version != CTL_VERSION1)) + { + raiseError("Control file %s appears corrupted", filename.c_str()); + } + + ActiveTransaction* const ptr = + m_data.txn_count ? transactions.getBuffer(m_data.txn_count) : NULL; + const ULONG txn_size = m_data.txn_count * sizeof(ActiveTransaction); + + if (txn_size) + { + if (read(m_handle, ptr, txn_size) != txn_size) + raiseError("Control file %s appears corrupted", filename.c_str()); + } + } + else + raiseError("Control file %s appears corrupted", filename.c_str()); + } + + ~ControlFile() + { +#ifdef WIN_NT + ReleaseMutex(m_mutex); + CloseHandle(m_mutex); +#endif + } + + FB_UINT64 getSequence() const + { + return m_data.sequence; + } + + ULONG getOffset() const + { + return m_data.offset; + } + + FB_UINT64 getDbSequence() const + { + return m_data.db_sequence; + } + + void saveDbSequence(FB_UINT64 db_sequence) + { + m_data.db_sequence = db_sequence; + + lseek(m_handle, 0, SEEK_SET); + write(m_handle, &m_data, sizeof(Data)); + } + + void savePartial(FB_UINT64 sequence, ULONG offset, const TransactionList& transactions) + { + bool update = false; + + if (sequence > m_data.sequence) + { + m_data.sequence = sequence; + fb_assert(!m_data.offset); + m_data.offset = offset; + update = true; + } + else if (sequence == m_data.sequence && offset > m_data.offset) + { + m_data.offset = offset; + update = true; + } + + if (update) + { + m_data.txn_count = (ULONG) transactions.getCount(); + + const ULONG txn_size = m_data.txn_count * sizeof(ActiveTransaction); + + lseek(m_handle, 0, SEEK_SET); + write(m_handle, &m_data, sizeof(Data)); + write(m_handle, transactions.begin(), txn_size); + } + } + + void saveComplete(FB_UINT64 sequence, const TransactionList& transactions) + { + if (sequence >= m_data.sequence) + { + m_data.sequence = sequence; + m_data.offset = 0; + + m_data.txn_count = (ULONG) transactions.getCount(); + + const ULONG txn_size = m_data.txn_count * sizeof(ActiveTransaction); + + lseek(m_handle, 0, SEEK_SET); + write(m_handle, &m_data, sizeof(Data)); + write(m_handle, transactions.begin(), txn_size); + } + } + + private: + static int init(const PathName& directory, const Guid& guid) + { +#ifdef WIN_NT + const mode_t ACCESS_MODE = DEFAULT_OPEN_MODE; +#else + const mode_t ACCESS_MODE = 0664; +#endif + char guidStr[GUID_BUFF_SIZE]; + GuidToString(guidStr, &guid); + + const PathName filename = directory + guidStr; + + const int fd = os_utils::open(filename.c_str(), + O_CREAT | O_RDWR | O_BINARY | SYNC, ACCESS_MODE); + + if (fd < 0) + raiseError("Control file %s open failed (error: %d)", filename.c_str(), ERRNO); + + return fd; + } + + Data m_data; + +#ifdef WIN_NT + HANDLE m_mutex; +#endif + }; + + class Target : public GlobalStorage + { + public: + explicit Target(const Replication::Config* config) + : m_config(config), + m_lastError(getPool()), + m_attachment(nullptr), m_replicator(nullptr), + m_sequence(0), m_connected(false) + { + } + + ~Target() + { + shutdown(); + } + + const Replication::Config* getConfig() const + { + return m_config; + } + + bool checkGuid(const Guid& guid) + { + if (!m_config->sourceGuid.alignment) + return true; + + if (!memcmp(&guid, &m_config->sourceGuid, sizeof(Guid))) + return true; + + return false; + } + + FB_UINT64 initReplica() + { + if (m_connected) + return m_sequence; + + verbose("Connecting to database (%s)", m_config->dbName.c_str()); + + ClumpletWriter dpb(ClumpletReader::Tagged, MAX_DPB_SIZE, isc_dpb_version1); + + dpb.insertString(isc_dpb_user_name, DBA_USER_NAME); + dpb.insertString(isc_dpb_config, EMBEDDED_PROVIDERS, fb_strlen(EMBEDDED_PROVIDERS)); + +#ifndef NO_DATABASE + DispatcherPtr provider; + FbLocalStatus localStatus; + + m_attachment = + provider->attachDatabase(&localStatus, m_config->dbName.c_str(), + dpb.getBufferLength(), dpb.getBuffer()); + localStatus.check(); + + m_replicator = m_attachment->createReplicator(&localStatus); + localStatus.check(); + + fb_assert(!m_sequence); + + const auto transaction = m_attachment->startTransaction(&localStatus, 0, NULL); + localStatus.check(); + + const char* sql = + "select rdb$get_context('SYSTEM', 'REPLICATION_SEQUENCE') from rdb$database"; + + FB_MESSAGE(Result, CheckStatusWrapper, + (FB_BIGINT, sequence) + ) result(&localStatus, fb_get_master_interface()); + + m_attachment->execute(&localStatus, transaction, 0, sql, SQL_DIALECT_V6, + NULL, NULL, result.getMetadata(), result.getData()); + localStatus.check(); + + transaction->commit(&localStatus); + localStatus.check(); + + m_sequence = result->sequence; +#endif + m_connected = true; + + return m_sequence; + } + + void shutdown() + { + if (m_attachment) + { + verbose("Disconnecting from database (%s)", m_config->dbName.c_str()); + +#ifndef NO_DATABASE + FbLocalStatus localStatus; + m_replicator->close(&localStatus); + m_attachment->detach(&localStatus); +#endif + m_replicator = NULL; + m_attachment = NULL; + m_sequence = 0; + } + + m_connected = false; + } + + bool replicate(FbLocalStatus& status, ULONG length, const UCHAR* data) + { +#ifdef NO_DATABASE + return true; +#else + m_replicator->process(&status, length, data); + return status.isSuccess(); +#endif + } + + bool isShutdown() const + { + return (m_attachment == NULL); + } + + const PathName& getDirectory() const + { + return m_config->logSourceDirectory; + } + + void logMessage(const string& message, LogMsgType type) const + { + logReplicaMessage(m_config->dbName, message, type); + } + + void logError(const string& message) + { + if (message != m_lastError) + { + logMessage(message, ERROR_MSG); + m_lastError = message; + } + } + + void verbose(const char* msg, ...) const + { + if (m_config->verboseLogging) + { + char buffer[BUFFER_LARGE]; + + va_list ptr; + va_start(ptr, msg); + VSNPRINTF(buffer, sizeof(buffer), msg, ptr); + va_end(ptr); + + logMessage(buffer, VERBOSE_MSG); + } + } + + private: + AutoPtr m_config; + string m_lastError; + IAttachment* m_attachment; + IReplicator* m_replicator; + FB_UINT64 m_sequence; + bool m_connected; + }; + + typedef Array TargetList; + + struct LogSegment + { + explicit LogSegment(MemoryPool& pool, const PathName& fname, const SegmentHeader& hdr) + : filename(pool, fname) + { + memcpy(&header, &hdr, sizeof(SegmentHeader)); + } + + void remove() + { +#ifdef PRESERVE_LOG + PathName path, name, newname; + PathUtils::splitLastComponent(path, name, filename); + PathUtils::concatPath(newname, path, "~" + name); + + if (rename(filename.c_str(), newname.c_str()) < 0) + raiseError("Log file %s rename failed (error: %d)", filename.c_str(), ERRNO); +#else + if (unlink(filename.c_str()) < 0) + raiseError("Log file %s unlink failed (error: %d)", filename.c_str(), ERRNO); +#endif + } + + static const FB_UINT64& generate(const LogSegment* item) + { + return item->header.hdr_sequence; + } + + const PathName filename; + SegmentHeader header; + }; + + typedef SortedArray, FB_UINT64, LogSegment> ProcessQueue; + + void readConfig(TargetList& targets) + { + Array replicas; + Replication::Config::enumerate(replicas); + + for (auto replica : replicas) + targets.add(FB_NEW Target(replica)); + } + + bool validateHeader(const SegmentHeader* header) + { + if (strcmp(header->hdr_signature, LOG_SIGNATURE)) + return false; + + if (header->hdr_version != LOG_CURRENT_VERSION) + return false; + + if (header->hdr_state != SEGMENT_STATE_FREE && + header->hdr_state != SEGMENT_STATE_USED && + header->hdr_state != SEGMENT_STATE_FULL && + header->hdr_state != SEGMENT_STATE_ARCH) + { + return false; + } + + if (header->hdr_protocol != PROTOCOL_VERSION1) + return false; + + return true; + } + + bool replicate(FbLocalStatus& status, FB_UINT64 sequence, + Target* target, TransactionList& transactions, + ULONG offset, ULONG length, const UCHAR* data, + bool rewind) + { + const Block* const header = (Block*) data; + + const auto traNumber = header->traNumber; + + if (!rewind || !traNumber || transactions.exist(traNumber)) + { + if (!target->replicate(status, length, data)) + return false; + } + + if (header->flags & BLOCK_END_TRANS) + { + if (traNumber) + { + FB_SIZE_T pos; + if (transactions.find(traNumber, pos)) + transactions.remove(pos); + } + else if (!rewind) + { + transactions.clear(); + } + } + else if (header->flags & BLOCK_BEGIN_TRANS) + { + fb_assert(traNumber); + + if (!rewind && !transactions.exist(traNumber)) + transactions.add(ActiveTransaction(traNumber, sequence)); + } + + return true; + } + + enum ProcessStatus { PROCESS_SUSPEND, PROCESS_CONTINUE, PROCESS_ERROR }; + + ProcessStatus process_archive(MemoryPool& pool, Target* target) + { + FbLocalStatus localStatus; + + ProcessQueue queue(pool); + + ProcessStatus ret = PROCESS_SUSPEND; + + try + { + target->verbose("Scanning directory (%s)", target->getDirectory().c_str()); + + // First pass: create the processing queue + + for (auto iter = PathUtils::newDirIterator(pool, target->getConfig()->logSourceDirectory); + *iter; ++(*iter)) + { + const auto filename = **iter; + +#ifdef PRESERVE_LOG + PathName path, name; + PathUtils::splitLastComponent(path, name, filename); + + if (name.find('~') == 0) + continue; +#endif + + if (filename.find('{') != PathName::npos && + filename.find('}') != PathName::npos && + filename.find('-') != PathName::npos) + { + continue; + } + + const int fd = os_utils::open(filename.c_str(), O_RDONLY | O_BINARY); + if (fd < 0) + { + if (errno == EACCES || errno == EAGAIN) + { + target->verbose("Skipping file (%s) due to sharing violation", filename.c_str()); + continue; + } + + raiseError("Log file %s open failed (error: %d)", filename.c_str(), ERRNO); + } + + AutoFile file(fd); + + struct stat stats; + if (fstat(file, &stats) < 0) + raiseError("Log file %s fstat failed (error: %d)", filename.c_str(), ERRNO); + + const size_t fileSize = stats.st_size; + + if (fileSize < sizeof(SegmentHeader)) + { + target->verbose("Skipping file (%s) as being too small (at least %u bytes expected, %u bytes detected)", + filename.c_str(), sizeof(SegmentHeader), fileSize); + continue; + } + + if (lseek(file, 0, SEEK_SET) != 0) + raiseError("Log file %s seek failed (error: %d)", filename.c_str(), ERRNO); + + SegmentHeader header; + + if (read(file, &header, sizeof(SegmentHeader)) != sizeof(SegmentHeader)) + raiseError("Log file %s read failed (error: %d)", filename.c_str(), ERRNO); + + if (!validateHeader(&header)) + { + target->verbose("Skipping file (%s) due to unknown format", filename.c_str()); + continue; + } + + if (fileSize < header.hdr_length) + { + target->verbose("Skipping file (%s) as being too small (at least %u bytes expected, %u bytes detected)", + filename.c_str(), header.hdr_length, fileSize); + continue; + } + + if (header.hdr_state == SEGMENT_STATE_FREE) + { + target->verbose("Deleting file (%s) due to incorrect state (expected either FULL or ARCH, found FREE)", + filename.c_str()); + file.release(); + unlink(filename.c_str()); + continue; + } + + if (!target->checkGuid(header.hdr_guid)) + { + char buff[GUID_BUFF_SIZE]; + GuidToString(buff, &header.hdr_guid); + const string guidStr(buff); + target->verbose("Skipping file (%s) due to GUID mismatch (found %s)", + filename.c_str(), guidStr.c_str()); + continue; + } +/* + if (header.hdr_state != SEGMENT_STATE_ARCH) + continue; +*/ + queue.add(FB_NEW_POOL(pool) LogSegment(pool, filename, header)); + } + + if (queue.isEmpty()) + { + target->verbose("No suitable files found"); + return ret; + } + + target->verbose("Added %u segments to the processing queue", (ULONG) queue.getCount()); + + // Second pass: replicate the chain of contiguous segments + + Array buffer(pool); + TransactionList transactions(pool); + + FB_UINT64 next_sequence = 0; + const bool restart = target->isShutdown(); + + for (LogSegment** iter = queue.begin(); iter != queue.end(); ++iter) + { + LogSegment* const segment = *iter; + const FB_UINT64 sequence = segment->header.hdr_sequence; + const Guid& guid = segment->header.hdr_guid; + + ControlFile control(target->getDirectory(), guid, sequence, transactions); + + FB_UINT64 last_sequence = control.getSequence(); + ULONG last_offset = control.getOffset(); + + const FB_UINT64 db_sequence = target->initReplica(); + const FB_UINT64 last_db_sequence = control.getDbSequence(); + + if (sequence <= db_sequence) + { + target->verbose("Deleting file (%s) due to fast forward", segment->filename.c_str()); + segment->remove(); + continue; + } + + if (db_sequence != last_db_sequence) + { + target->verbose("Resetting replication to continue from segment %" UQUADFORMAT, db_sequence + 1); + control.saveDbSequence(db_sequence); + transactions.clear(); + control.saveComplete(db_sequence, transactions); + last_sequence = db_sequence; + last_offset = 0; + } + + FB_UINT64 oldest_sequence = getOldestSequence(transactions); + + const FB_UINT64 threshold = oldest_sequence ? oldest_sequence : + (last_offset ? last_sequence : last_sequence + 1); + + if (sequence < threshold) + { + target->verbose("Deleting file (%s) as priorly replicated", segment->filename.c_str()); + segment->remove(); + continue; + } + + if (!next_sequence) + next_sequence = restart ? threshold : last_sequence + 1; + + if (sequence > next_sequence) + raiseError("Required segment %" UQUADFORMAT " is missing", next_sequence); + + if (sequence < next_sequence) + continue; + + target->verbose("Replicating file (%s), segment %" UQUADFORMAT, + segment->filename.c_str(), sequence); + + const FB_UINT64 org_oldest_sequence = oldest_sequence; + + const int fd = os_utils::open(segment->filename.c_str(), O_RDONLY | O_BINARY); + if (fd < 0) + { + if (errno == EACCES || errno == EAGAIN) + { + target->verbose("Stopping to process the queue, sharing violation for file (%s)", + segment->filename.c_str()); + break; + } + + raiseError("Log file %s open failed (error: %d)", segment->filename.c_str(), ERRNO); + } + + AutoFile file(fd); + + SegmentHeader header; + + if (read(file, &header, sizeof(SegmentHeader)) != sizeof(SegmentHeader)) + raiseError("Log file %s read failed (error: %d)", segment->filename.c_str(), ERRNO); + + if (memcmp(&header, &segment->header, sizeof(SegmentHeader))) + raiseError("Log file %s was unexpectedly changed", segment->filename.c_str()); + + ULONG totalLength = sizeof(SegmentHeader); + while (totalLength < segment->header.hdr_length) + { + Block header; + if (read(file, &header, sizeof(Block)) != sizeof(Block)) + raiseError("Log file %s read failed (error %d)", segment->filename.c_str(), ERRNO); + + const auto blockLength = header.dataLength + header.metaLength; + const auto length = sizeof(Block) + blockLength; + + if (blockLength) + { + const bool rewind = (sequence < last_sequence || + (sequence == last_sequence && (!last_offset || totalLength < last_offset))); + + UCHAR* const data = buffer.getBuffer(length); + memcpy(data, &header, sizeof(Block)); + + if (read(file, data + sizeof(Block), blockLength) != blockLength) + raiseError("Log file %s read failed (error %d)", segment->filename.c_str(), ERRNO); + + const bool success = + replicate(localStatus, sequence, + target, transactions, + totalLength, length, data, + rewind); + + if (!success) + { + oldest_sequence = getOldestSequence(transactions); + + target->verbose("Last segment:offset %" UQUADFORMAT ":%u, oldest segment %" UQUADFORMAT, + control.getSequence(), control.getOffset(), oldest_sequence); + + localStatus.raise(); + } + } + + totalLength += length; + + control.savePartial(sequence, totalLength, transactions); + } + + control.saveComplete(sequence, transactions); + + file.release(); + + target->verbose("Successfully replicated %u bytes in segment %" UQUADFORMAT, + totalLength, sequence); + + oldest_sequence = getOldestSequence(transactions); + next_sequence = sequence + 1; + + target->verbose("Last segment:offset %" UQUADFORMAT ":%u, oldest segment %" UQUADFORMAT, + control.getSequence(), control.getOffset(), oldest_sequence); + + if (org_oldest_sequence && oldest_sequence != org_oldest_sequence) + { + const FB_UINT64 threshold = + oldest_sequence ? MIN(oldest_sequence, sequence) : sequence; + + FB_SIZE_T pos; + if (queue.find(org_oldest_sequence, pos)) + { + do + { + LogSegment* const segment = queue[pos++]; + + if (segment->header.hdr_sequence >= threshold) + break; + + target->verbose("Deleting file (%s) as already replicated", + segment->filename.c_str()); + + segment->remove(); + } + while (pos < queue.getCount()); + } + } + + if (oldest_sequence) + { + target->verbose("Preserving file (%s) due to uncommitted transactions", + segment->filename.c_str()); + } + else + { + target->verbose("Deleting file (%s) as already replicated", + segment->filename.c_str()); + } + + if (!oldest_sequence) + segment->remove(); + + ret = PROCESS_CONTINUE; + } + } + catch (const Exception& ex) + { + LocalStatus localStatus; + CheckStatusWrapper statusWrapper(&localStatus); + ex.stuffException(&statusWrapper); + + string message; + + char temp[BUFFER_LARGE]; + const ISC_STATUS* status_ptr = localStatus.getErrors(); + while (fb_interpret(temp, sizeof(temp), &status_ptr)) + { + if (!message.isEmpty()) + message += "\n\t"; + + message += temp; + } + + if (message.find("Replication") == string::npos) + target->logError(message); + + ret = PROCESS_ERROR; + } + + while (queue.hasData()) + delete queue.pop(); + + return ret; + } + + THREAD_ENTRY_DECLARE process_thread(THREAD_ENTRY_PARAM arg) + { + fb_assert(shutdownPtr); + + AutoPtr target(static_cast(arg)); + const auto config = target->getConfig(); + + target->verbose("Started replication thread"); + + while (!*shutdownPtr) + { + AutoMemoryPool workingPool(MemoryPool::createPool()); + ContextPoolHolder threadContext(workingPool); + + const ProcessStatus ret = process_archive(*workingPool, target); + + if (ret == PROCESS_CONTINUE) + continue; + + target->shutdown(); + + if (!*shutdownPtr) + { + const ULONG timeout = + (ret == PROCESS_SUSPEND) ? config->applyIdleTimeout : config->applyErrorTimeout; + + target->verbose("Going to sleep for %u seconds", timeout); + + Thread::sleep(timeout * 1000); + } + } + + target->verbose("Finished replication thread"); + + --activeThreads; + + return 0; + } +} + +bool REPL_server(CheckStatusWrapper* status, bool wait, bool* aShutdownPtr) +{ + try + { + shutdownPtr = aShutdownPtr; + + TargetList targets; + readConfig(targets); + + for (auto target : targets) + { + ++activeThreads; + Thread::start((ThreadEntryPoint*) process_thread, target, THREAD_medium, NULL); + } + + if (wait) + { + do { + Thread::sleep(100); + } while (activeThreads.value()); + } + } + catch (const Exception& ex) + { + ex.stuffException(status); + return false; + } + + return true; +} diff --git a/src/remote/server/ReplServer.h b/src/remote/server/ReplServer.h new file mode 100644 index 0000000000..85007b9b97 --- /dev/null +++ b/src/remote/server/ReplServer.h @@ -0,0 +1,28 @@ +/* + * The contents of this file are subject to the Initial + * Developer's Public License Version 1.0 (the "License"); + * you may not use this file except in compliance with the + * License. You may obtain a copy of the License at + * http://www.ibphoenix.com/main.nfs?a=ibphoenix&page=ibp_idpl. + * + * Software distributed under the License is distributed AS IS, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. + * See the License for the specific language governing rights + * and limitations under the License. + * + * The Original Code was created by Dmitry Yemanov + * for the Firebird Open Source RDBMS project. + * + * Copyright (c) 2014 Dmitry Yemanov + * and all contributors signed below. + * + * All Rights Reserved. + * Contributor(s): ______________________________________. + */ + +#ifndef UTIL_REPL_SERVER_H +#define UTIL_REPL_SERVER_H + +bool REPL_server(Firebird::CheckStatusWrapper*, bool, bool*); + +#endif // UTIL_REPL_SERVER_H diff --git a/src/remote/server/os/posix/inet_server.cpp b/src/remote/server/os/posix/inet_server.cpp index 63208eb5a6..f9fca41df5 100644 --- a/src/remote/server/os/posix/inet_server.cpp +++ b/src/remote/server/os/posix/inet_server.cpp @@ -82,6 +82,7 @@ #include "../common/file_params.h" #include "../remote/inet_proto.h" #include "../remote/server/serve_proto.h" +#include "../remote/server/ReplServer.h" #include "../yvalve/gds_proto.h" #include "../common/utils_proto.h" #include "../common/classes/fb_string.h" @@ -467,6 +468,17 @@ int CLIB_ROUTINE main( int argc, char** argv) fb_shutdown_callback(NULL, closePort, fb_shut_exit, port); + Firebird::LocalStatus localStatus; + Firebird::CheckStatusWrapper statusWrapper(&localStatus); + + if (!REPL_server(&statusWrapper, false, &serverClosing)) + { + const char* errorMsg = "Replication server initialization error"; + gds__log_status(errorMsg, localStatus.getErrors()); + Firebird::Syslog::Record(Firebird::Syslog::Error, errorMsg); + exit(STARTUP_ERROR); + } + SRVR_multi_thread(port, INET_SERVER_flag); // perform atexit shutdown here when all globals in embedded library are active diff --git a/src/remote/server/os/win32/srvr_w32.cpp b/src/remote/server/os/win32/srvr_w32.cpp index 25c02adb0b..edb8c02017 100644 --- a/src/remote/server/os/win32/srvr_w32.cpp +++ b/src/remote/server/os/win32/srvr_w32.cpp @@ -101,6 +101,7 @@ #include "../remote/server/os/win32/cntl_proto.h" #include "../remote/inet_proto.h" #include "../remote/server/serve_proto.h" +#include "../remote/server/ReplServer.h" #include "../remote/server/os/win32/window_proto.h" #include "../remote/os/win32/wnet_proto.h" #include "../remote/server/os/win32/window.rh" @@ -110,6 +111,7 @@ #include "firebird/Interface.h" #include "../common/classes/ImplementHelper.h" #include "../common/os/os_utils.h" +#include "../common/status.h" #include "../auth/trusted/AuthSspi.h" #include "../auth/SecurityDatabase/LegacyServer.h" #include "../auth/SecureRemotePassword/server/SrpServer.h" @@ -555,6 +557,7 @@ static THREAD_ENTRY_DECLARE start_connections_thread(THREAD_ENTRY_PARAM) * **************************************/ ThreadCounter counter; + FbLocalStatus localStatus; if (server_flag & SRVR_inet) { @@ -567,6 +570,7 @@ static THREAD_ENTRY_DECLARE start_connections_thread(THREAD_ENTRY_PARAM) iscLogException("INET: can't start listener thread", ex); } } + if (server_flag & SRVR_wnet) { try @@ -578,6 +582,7 @@ static THREAD_ENTRY_DECLARE start_connections_thread(THREAD_ENTRY_PARAM) iscLogException("WNET: can't start listener thread", ex); } } + if (server_flag & SRVR_xnet) { try @@ -589,6 +594,9 @@ static THREAD_ENTRY_DECLARE start_connections_thread(THREAD_ENTRY_PARAM) iscLogException("XNET: can't start listener thread", ex); } } + + REPL_server(&localStatus, false, &server_shutdown); + return 0; } diff --git a/src/remote/server/server.cpp b/src/remote/server/server.cpp index 1429f97ce6..ce365256b1 100644 --- a/src/remote/server/server.cpp +++ b/src/remote/server/server.cpp @@ -3621,6 +3621,36 @@ void rem_port::batch_rls(P_BATCH_FREE* batch, PACKET* sendL) } +void rem_port::replicate(P_REPLICATE* repl, PACKET* sendL) +{ + LocalStatus ls; + CheckStatusWrapper status_vector(&ls); + + Rdb* rdb = this->port_context; + if (bad_db(&status_vector, rdb)) + { + this->send_response(sendL, 0, 0, &status_vector, false); + return; + } + + if (!this->port_replicator) + this->port_replicator = rdb->rdb_iface->createReplicator(&status_vector); + + if (repl->p_repl_data.cstr_length) + { + this->port_replicator->process(&status_vector, + repl->p_repl_data.cstr_length, repl->p_repl_data.cstr_address); + } + else + { + this->port_replicator->close(&status_vector); + this->port_replicator = NULL; + } + + this->send_response(sendL, 0, 0, &status_vector, false); +} + + ISC_STATUS rem_port::execute_statement(P_OP op, P_SQLDATA* sqldata, PACKET* sendL) { /***************************************** @@ -4862,6 +4892,11 @@ static bool process_packet(rem_port* port, PACKET* sendL, PACKET* receive, rem_p case op_batch_set_bpb: port->batch_bpb(&receive->p_batch_setbpb, sendL); + break; + + case op_repl_data: + port->replicate(&receive->p_replicate, sendL); + break; ///case op_insert: default: diff --git a/src/utilities/gstat/ppg.cpp b/src/utilities/gstat/ppg.cpp index 720012a6ec..7289d1e7e5 100644 --- a/src/utilities/gstat/ppg.cpp +++ b/src/utilities/gstat/ppg.cpp @@ -188,6 +188,7 @@ void PPG_print_header(const header_page* header, ULONG page, uSvc->printf(false, ", "); uSvc->printf(false, "read only"); } + if (flags & hdr_backup_mask) { if (flag_count++) @@ -204,6 +205,24 @@ void PPG_print_header(const header_page* header, ULONG page, uSvc->printf(false, "wrong backup state %d", flags & hdr_backup_mask); } } + + if (flags & hdr_replica_mask) + { + if (flag_count++) + uSvc->printf(false, ", "); + switch (flags & hdr_replica_mask) + { + case Ods::hdr_replica_read_only: + uSvc->printf(false, "read-only replica"); + break; + case Ods::hdr_replica_read_write: + uSvc->printf(false, "read-write replica"); + break; + default: + uSvc->printf(false, "wrong replica state %d", flags & hdr_replica_mask); + } + } + uSvc->printf(false, "\n"); } @@ -266,6 +285,22 @@ void PPG_print_header(const header_page* header, ULONG page, uSvc->printf(false, "\tCrypt checksum:\t%*.*s\n", p[1], p[1], p + 2); break; + case HDR_db_guid: + { + char buff[Firebird::GUID_BUFF_SIZE]; + Firebird::GuidToString(buff, reinterpret_cast(p + 2)); + uSvc->printf(false, "\tDatabase GUID:\t%s\n", buff); + break; + } + + case HDR_repl_seq: + { + FB_UINT64 sequence; + memcpy(&sequence, p + 2, sizeof(sequence)); + uSvc->printf(false, "\tReplication sequence:\t%" UQUADFORMAT"\n", sequence); + break; + } + default: if (*p > HDR_max) uSvc->printf(false, "\tUnrecognized option %d, length %d\n", p[0], p[1]); diff --git a/src/yvalve/YObjects.h b/src/yvalve/YObjects.h index b88f082672..ac3d18c165 100644 --- a/src/yvalve/YObjects.h +++ b/src/yvalve/YObjects.h @@ -394,6 +394,25 @@ public: }; +class YReplicator FB_FINAL : + public YHelper > +{ +public: + static const ISC_STATUS ERROR_CODE = isc_bad_repl_handle; + + YReplicator(YAttachment* anAttachment, Firebird::IReplicator* aNext); + + void destroy(unsigned dstrFlags); + + // IReplciator implementation + void process(Firebird::CheckStatusWrapper* status, unsigned length, const unsigned char* data); + void close(Firebird::CheckStatusWrapper* status); + +public: + AtomicAttPtr attachment; +}; + + class YMetadata { public: @@ -541,6 +560,7 @@ public: YBatch* createBatch(Firebird::CheckStatusWrapper* status, Firebird::ITransaction* transaction, unsigned stmtLength, const char* sqlStmt, unsigned dialect, Firebird::IMessageMetadata* inMetadata, unsigned parLength, const unsigned char* par); + YReplicator* createReplicator(Firebird::CheckStatusWrapper* status); public: Firebird::IProvider* provider; diff --git a/src/yvalve/why.cpp b/src/yvalve/why.cpp index b0eb5f1c07..9f13f9da0a 100644 --- a/src/yvalve/why.cpp +++ b/src/yvalve/why.cpp @@ -5018,6 +5018,49 @@ void YBatch::cancel(CheckStatusWrapper* status) //------------------------------------- +YReplicator::YReplicator(YAttachment* anAttachment, IReplicator* aNext) + : YHelper(aNext), + attachment(anAttachment) +{ } + + +void YReplicator::destroy(unsigned dstrFlags) +{ + destroy2(dstrFlags); +} + + +void YReplicator::process(CheckStatusWrapper* status, unsigned length, const unsigned char* data) +{ + try + { + YEntry entry(status, this); + entry.next()->process(status, length, data); + } + catch (const Exception& e) + { + e.stuffException(status); + } +} + + +void YReplicator::close(CheckStatusWrapper* status) +{ + try + { + YEntry entry(status, this); + entry.next()->close(status); + } + catch (const Exception& e) + { + e.stuffException(status); + } +} + + +//------------------------------------- + + YTransaction::YTransaction(YAttachment* aAttachment, ITransaction* aNext) : YHelper(aNext), attachment(aAttachment), @@ -5944,6 +5987,32 @@ YBatch* YAttachment::createBatch(CheckStatusWrapper* status, ITransaction* trans } +YReplicator* YAttachment::createReplicator(CheckStatusWrapper* status) +{ + try + { + YEntry entry(status, this); + + IReplicator* replicator = entry.next()->createReplicator(status); + + if (status->getState() & Firebird::IStatus::STATE_ERRORS) + { + return NULL; + } + + YReplicator* newReplicator = FB_NEW YReplicator(this, replicator); + newReplicator->addRef(); + return newReplicator; + } + catch (const Exception& e) + { + e.stuffException(status); + } + + return NULL; +} + + //-------------------------------------