diff --git a/jstests/noPassthrough/index_stepdown_conflict.js b/jstests/noPassthrough/index_stepdown_conflict.js new file mode 100644 index 00000000000..f90c3210586 --- /dev/null +++ b/jstests/noPassthrough/index_stepdown_conflict.js @@ -0,0 +1,66 @@ +/** + * Tests if there is an index build conflict after a node steps down and the existing index build + * fails, that the index build from the new primary is not missing index build on the old + * primary/secondary. The index build conflicts when the new primary independently creates an index + * with the same name as the old primary. + * + * @tags: [ + * requires_replication, + * ] + */ + +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; +import {IndexBuildTest} from "jstests/noPassthrough/libs/index_build.js"; + +// The index build conflict is only an issue when oplogApplicationEnforcesSteadyStateConstraints is +// false. This is false by default outside of our testing. +const rst = new ReplSetTest({ + nodes: 3, + nodeOptions: {setParameter: {oplogApplicationEnforcesSteadyStateConstraints: false}} +}); +rst.startSet(); +rst.initiate(); + +const dbName = 'test'; +const collName = 'coll'; +const primary = rst.getPrimary(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB.getCollection(collName); + +assert.commandWorked(primaryColl.insert({a: 1})); + +rst.awaitReplication(); + +const secondary = rst.getSecondary(); +const secondaryDB = secondary.getDB(dbName); +const secondaryColl = secondaryDB.getCollection(collName); + +const hangFpOnSetup = configureFailPoint(primary, 'hangIndexBuildOnSetupBeforeTakingLocks'); +const hangFpOnConflict = configureFailPoint(primary, 'hangAfterIndexBuildConflict'); + +jsTestLog("Starting index build"); +let awaitIndexBuild = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.InterruptedDueToReplStateChange]); + +jsTestLog("Waiting for primary to register the index build"); +hangFpOnSetup.wait(); + +jsTestLog("Stepping up the secondary"); +assert.commandWorked(secondary.adminCommand({replSetStepUp: 1})); +rst.waitForState(secondary, ReplSetTest.State.PRIMARY); + +jsTestLog("Waiting for new primary to start index build with the same name"); +let awaitSecondaryIndexBuild = + IndexBuildTest.startIndexBuild(secondary, secondaryColl.getFullName(), {a: 1}, null, null, 2); +IndexBuildTest.waitForIndexBuildToStart(primaryDB, collName, "a_1"); + +// Wait for the index builds to conflict on the old primary. +hangFpOnConflict.wait(); + +// Allow first index build to be cleaned up and index build should no longer have conflict. +hangFpOnSetup.off(); +awaitIndexBuild(); +hangFpOnConflict.off(); +awaitSecondaryIndexBuild(); + +rst.stopSet(); diff --git a/src/mongo/db/active_index_builds.cpp b/src/mongo/db/active_index_builds.cpp index 9882885036e..731c6d1b0a6 100644 --- a/src/mongo/db/active_index_builds.cpp +++ b/src/mongo/db/active_index_builds.cpp @@ -98,14 +98,14 @@ void ActiveIndexBuilds::assertNoIndexBuildInProgress() const { } } -void ActiveIndexBuilds::waitUntilAnIndexBuildFinishes(OperationContext* opCtx) { +void ActiveIndexBuilds::waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline) { stdx::unique_lock lk(_mutex); if (_allIndexBuilds.empty()) { return; } const auto generation = _indexBuildsCompletedGen; - opCtx->waitForConditionOrInterrupt( - _indexBuildsCondVar, lk, [&] { return _indexBuildsCompletedGen != generation; }); + opCtx->waitForConditionOrInterruptUntil( + _indexBuildsCondVar, lk, deadline, [&] { return _indexBuildsCompletedGen != generation; }); } void ActiveIndexBuilds::sleepIndexBuilds_forTestOnly(bool sleep) { diff --git a/src/mongo/db/active_index_builds.h b/src/mongo/db/active_index_builds.h index 8c1c38ba88b..5a06b7a9446 100644 --- a/src/mongo/db/active_index_builds.h +++ b/src/mongo/db/active_index_builds.h @@ -85,7 +85,7 @@ public: void assertNoIndexBuildInProgress() const; - void waitUntilAnIndexBuildFinishes(OperationContext* opCtx); + void waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline); void sleepIndexBuilds_forTestOnly(bool sleep); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 207dd311a51..32266434ef8 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2126,8 +2126,9 @@ void IndexBuildsCoordinator::awaitNoBgOpInProgForDb(OperationContext* opCtx, activeIndexBuilds.awaitNoBgOpInProgForDb(opCtx, dbName); } -void IndexBuildsCoordinator::waitUntilAnIndexBuildFinishes(OperationContext* opCtx) { - activeIndexBuilds.waitUntilAnIndexBuildFinishes(opCtx); +void IndexBuildsCoordinator::waitUntilAnIndexBuildFinishes(OperationContext* opCtx, + Date_t deadline) { + activeIndexBuilds.waitUntilAnIndexBuildFinishes(opCtx, deadline); } void IndexBuildsCoordinator::appendBuildInfo(const UUID& buildUUID, BSONObjBuilder* builder) const { diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 52e9ec5fb4d..f4170e2b7d9 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -486,10 +486,10 @@ public: void awaitNoBgOpInProgForDb(OperationContext* opCtx, const DatabaseName& dbName); /** - * Waits until an index build completes. If there are no index builds in progress, returns - * immediately. + * Waits until an index build completes or the deadline expires. If there are no index builds in + * progress, returns immediately. */ - void waitUntilAnIndexBuildFinishes(OperationContext* opCtx); + void waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline = Date_t::max()); /** diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index c0b50bb2cad..85782926ae8 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -175,6 +175,8 @@ namespace { // Failpoint to block after a write and its oplog entry have been written to the storage engine and // are visible, but before we have advanced 'lastApplied' for the write. MONGO_FAIL_POINT_DEFINE(hangBeforeLogOpAdvancesLastApplied); +// Failpoint to block oplog application after receiving an IndexBuildAlreadyInProgress error. +MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildConflict); void abortIndexBuilds(OperationContext* opCtx, const OplogEntry::CommandType& commandType, @@ -2499,6 +2501,23 @@ Status applyCommand_inlock(OperationContext* opCtx, "command", opObj, status); + + // Even though steady state constraints are not enforced, we need to retry index + // builds that conflict in case the existing index build fails. + if (status.code() == ErrorCodes::IndexBuildAlreadyInProgress) { + if (MONGO_unlikely(hangAfterIndexBuildConflict.shouldFail())) { + LOGV2(8539001, "Hanging due to hangAfterIndexBuildConflict failpoint"); + hangAfterIndexBuildConflict.pauseWhileSet(); + } + + auto deadline = Date_t::now() + Milliseconds(1000); + LOGV2(8539000, + "Waiting for existing index build to complete before retrying the " + "conflicting operation"); + IndexBuildsCoordinator::get(opCtx)->waitUntilAnIndexBuildFinishes(opCtx, + deadline); + break; + } } else { LOGV2_DEBUG(51776, 1,