diff --git a/jstests/noPassthrough/shutdown_while_fsync_locked.js b/jstests/noPassthrough/shutdown_while_fsync_locked.js new file mode 100644 index 00000000000..5d611741ce2 --- /dev/null +++ b/jstests/noPassthrough/shutdown_while_fsync_locked.js @@ -0,0 +1,15 @@ +/** + * Ensure that we allow mongod to shutdown cleanly while being fsync locked. + */ +(function() { + "use strict"; + + let conn = MongoRunner.runMongod(); + let db = conn.getDB("test"); + + for (let i = 0; i < 10; i++) { + assert.commandWorked(db.adminCommand({fsync: 1, lock: 1})); + } + + MongoRunner.stopMongod(conn, MongoRunner.EXIT_CLEAN, {skipValidation: true}); +}()); diff --git a/src/mongo/db/commands/fsync.cpp b/src/mongo/db/commands/fsync.cpp index abc04dfd2e3..37b0bae2cf8 100644 --- a/src/mongo/db/commands/fsync.cpp +++ b/src/mongo/db/commands/fsync.cpp @@ -52,6 +52,7 @@ #include "mongo/stdx/condition_variable.h" #include "mongo/util/assert_util.h" #include "mongo/util/background.h" +#include "mongo/util/exit.h" #include "mongo/util/log.h" namespace mongo { @@ -80,6 +81,7 @@ public: private: bool _allowFsyncFailure; + static bool _shutdownTaskRegistered; }; class FSyncCommand : public ErrmsgCommandDeprecated { @@ -214,6 +216,10 @@ public: void releaseLock() { stdx::unique_lock lk(lockStateMutex); + releaseLock_inLock(lk); + } + + void releaseLock_inLock(stdx::unique_lock& lk) { invariant(_lockCount >= 1); _lockCount--; @@ -296,31 +302,31 @@ public: Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex); - if (unlockFsync()) { - const auto lockCount = fsyncCmd.getLockCount(); - result.append("info", str::stream() << "fsyncUnlock completed"); - result.append("lockCount", lockCount); - if (lockCount == 0) { - log() << "fsyncUnlock completed. mongod is now unlocked and free to accept writes"; - } else { - log() << "fsyncUnlock completed. Lock count is now " << lockCount; - } - return true; - } else { + stdx::unique_lock stateLock(fsyncCmd.lockStateMutex); + + auto lockCount = fsyncCmd.getLockCount_inLock(); + if (lockCount == 0) { errmsg = "fsyncUnlock called when not locked"; return false; } - } -private: - // Returns true if lock count is decremented. - bool unlockFsync() { - if (fsyncCmd.getLockCount() == 0) { - error() << "fsyncUnlock called when not locked"; - return false; + fsyncCmd.releaseLock_inLock(stateLock); + + // Relies on the lock to be released in 'releaseLock_inLock()' when the release brings + // the lock count to 0. + if (stateLock) { + // If we're still locked then lock count is not zero. + invariant(lockCount > 0); + lockCount = fsyncCmd.getLockCount_inLock(); + log() << "fsyncUnlock completed. Lock count is now " << lockCount; + } else { + invariant(fsyncCmd.getLockCount() == 0); + lockCount = 0; + log() << "fsyncUnlock completed. mongod is now unlocked and free to accept writes"; } - fsyncCmd.releaseLock(); + result.append("info", str::stream() << "fsyncUnlock completed"); + result.append("lockCount", lockCount); return true; } @@ -329,6 +335,8 @@ private: // Exposed publically via extern in fsync.h. SimpleMutex filesLockedFsync; +bool FSyncLockThread::_shutdownTaskRegistered = false; + void FSyncLockThread::run() { ThreadClient tc("fsyncLockWorker", getGlobalServiceContext()); stdx::lock_guard lkf(filesLockedFsync); @@ -341,7 +349,26 @@ void FSyncLockThread::run() { OperationContext& opCtx = *opCtxPtr; Lock::GlobalRead global(&opCtx); // Block any writes in order to flush the files. - StorageEngine* storageEngine = getGlobalServiceContext()->getStorageEngine(); + ServiceContext* serviceContext = opCtx.getServiceContext(); + StorageEngine* storageEngine = serviceContext->getStorageEngine(); + + // The fsync shutdown task has to be registered once the server is running otherwise it + // conflicts with the servers shutdown task. + if (!_shutdownTaskRegistered) { + _shutdownTaskRegistered = true; + registerShutdownTask([&] { + stdx::unique_lock stateLock(fsyncCmd.lockStateMutex); + if (fsyncCmd.getLockCount_inLock() > 0) { + warning() << "Interrupting fsync because the server is shutting down."; + while (fsyncCmd.getLockCount_inLock()) { + // Relies on the lock to be released in 'releaseLock_inLock()' when the + // release brings the lock count to 0. + invariant(stateLock); + fsyncCmd.releaseLock_inLock(stateLock); + } + } + }); + } try { storageEngine->flushAllFiles(&opCtx, true); @@ -353,7 +380,7 @@ void FSyncLockThread::run() { } bool successfulFsyncLock = false; - auto backupCursorHooks = BackupCursorHooks::get(opCtx.getServiceContext()); + auto backupCursorHooks = BackupCursorHooks::get(serviceContext); try { writeConflictRetry(&opCtx, "beginBackup",