mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-24 16:46:00 +01:00
SERVER-85390 Retry conflicting index build during oplog application (#25891)
GitOrigin-RevId: b82a13bfca57c2c06c5508770603306554d7d3cc
This commit is contained in:
parent
db7a8e109c
commit
db80281e66
66
jstests/noPassthrough/index_stepdown_conflict.js
Normal file
66
jstests/noPassthrough/index_stepdown_conflict.js
Normal file
@ -0,0 +1,66 @@
|
||||
/**
|
||||
* Tests if there is an index build conflict after a node steps down and the existing index build
|
||||
* fails, that the index build from the new primary is not missing index build on the old
|
||||
* primary/secondary. The index build conflicts when the new primary independently creates an index
|
||||
* with the same name as the old primary.
|
||||
*
|
||||
* @tags: [
|
||||
* requires_replication,
|
||||
* ]
|
||||
*/
|
||||
|
||||
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_build.js";
|
||||
|
||||
// The index build conflict is only an issue when oplogApplicationEnforcesSteadyStateConstraints is
|
||||
// false. This is false by default outside of our testing.
|
||||
const rst = new ReplSetTest({
|
||||
nodes: 3,
|
||||
nodeOptions: {setParameter: {oplogApplicationEnforcesSteadyStateConstraints: false}}
|
||||
});
|
||||
rst.startSet();
|
||||
rst.initiate();
|
||||
|
||||
const dbName = 'test';
|
||||
const collName = 'coll';
|
||||
const primary = rst.getPrimary();
|
||||
const primaryDB = primary.getDB(dbName);
|
||||
const primaryColl = primaryDB.getCollection(collName);
|
||||
|
||||
assert.commandWorked(primaryColl.insert({a: 1}));
|
||||
|
||||
rst.awaitReplication();
|
||||
|
||||
const secondary = rst.getSecondary();
|
||||
const secondaryDB = secondary.getDB(dbName);
|
||||
const secondaryColl = secondaryDB.getCollection(collName);
|
||||
|
||||
const hangFpOnSetup = configureFailPoint(primary, 'hangIndexBuildOnSetupBeforeTakingLocks');
|
||||
const hangFpOnConflict = configureFailPoint(primary, 'hangAfterIndexBuildConflict');
|
||||
|
||||
jsTestLog("Starting index build");
|
||||
let awaitIndexBuild = IndexBuildTest.startIndexBuild(
|
||||
primary, primaryColl.getFullName(), {a: 1}, null, [ErrorCodes.InterruptedDueToReplStateChange]);
|
||||
|
||||
jsTestLog("Waiting for primary to register the index build");
|
||||
hangFpOnSetup.wait();
|
||||
|
||||
jsTestLog("Stepping up the secondary");
|
||||
assert.commandWorked(secondary.adminCommand({replSetStepUp: 1}));
|
||||
rst.waitForState(secondary, ReplSetTest.State.PRIMARY);
|
||||
|
||||
jsTestLog("Waiting for new primary to start index build with the same name");
|
||||
let awaitSecondaryIndexBuild =
|
||||
IndexBuildTest.startIndexBuild(secondary, secondaryColl.getFullName(), {a: 1}, null, null, 2);
|
||||
IndexBuildTest.waitForIndexBuildToStart(primaryDB, collName, "a_1");
|
||||
|
||||
// Wait for the index builds to conflict on the old primary.
|
||||
hangFpOnConflict.wait();
|
||||
|
||||
// Allow first index build to be cleaned up and index build should no longer have conflict.
|
||||
hangFpOnSetup.off();
|
||||
awaitIndexBuild();
|
||||
hangFpOnConflict.off();
|
||||
awaitSecondaryIndexBuild();
|
||||
|
||||
rst.stopSet();
|
@ -98,14 +98,14 @@ void ActiveIndexBuilds::assertNoIndexBuildInProgress() const {
|
||||
}
|
||||
}
|
||||
|
||||
void ActiveIndexBuilds::waitUntilAnIndexBuildFinishes(OperationContext* opCtx) {
|
||||
void ActiveIndexBuilds::waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline) {
|
||||
stdx::unique_lock<Latch> lk(_mutex);
|
||||
if (_allIndexBuilds.empty()) {
|
||||
return;
|
||||
}
|
||||
const auto generation = _indexBuildsCompletedGen;
|
||||
opCtx->waitForConditionOrInterrupt(
|
||||
_indexBuildsCondVar, lk, [&] { return _indexBuildsCompletedGen != generation; });
|
||||
opCtx->waitForConditionOrInterruptUntil(
|
||||
_indexBuildsCondVar, lk, deadline, [&] { return _indexBuildsCompletedGen != generation; });
|
||||
}
|
||||
|
||||
void ActiveIndexBuilds::sleepIndexBuilds_forTestOnly(bool sleep) {
|
||||
|
@ -85,7 +85,7 @@ public:
|
||||
|
||||
void assertNoIndexBuildInProgress() const;
|
||||
|
||||
void waitUntilAnIndexBuildFinishes(OperationContext* opCtx);
|
||||
void waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline);
|
||||
|
||||
void sleepIndexBuilds_forTestOnly(bool sleep);
|
||||
|
||||
|
@ -2126,8 +2126,9 @@ void IndexBuildsCoordinator::awaitNoBgOpInProgForDb(OperationContext* opCtx,
|
||||
activeIndexBuilds.awaitNoBgOpInProgForDb(opCtx, dbName);
|
||||
}
|
||||
|
||||
void IndexBuildsCoordinator::waitUntilAnIndexBuildFinishes(OperationContext* opCtx) {
|
||||
activeIndexBuilds.waitUntilAnIndexBuildFinishes(opCtx);
|
||||
void IndexBuildsCoordinator::waitUntilAnIndexBuildFinishes(OperationContext* opCtx,
|
||||
Date_t deadline) {
|
||||
activeIndexBuilds.waitUntilAnIndexBuildFinishes(opCtx, deadline);
|
||||
}
|
||||
|
||||
void IndexBuildsCoordinator::appendBuildInfo(const UUID& buildUUID, BSONObjBuilder* builder) const {
|
||||
|
@ -486,10 +486,10 @@ public:
|
||||
void awaitNoBgOpInProgForDb(OperationContext* opCtx, const DatabaseName& dbName);
|
||||
|
||||
/**
|
||||
* Waits until an index build completes. If there are no index builds in progress, returns
|
||||
* immediately.
|
||||
* Waits until an index build completes or the deadline expires. If there are no index builds in
|
||||
* progress, returns immediately.
|
||||
*/
|
||||
void waitUntilAnIndexBuildFinishes(OperationContext* opCtx);
|
||||
void waitUntilAnIndexBuildFinishes(OperationContext* opCtx, Date_t deadline = Date_t::max());
|
||||
|
||||
|
||||
/**
|
||||
|
@ -175,6 +175,8 @@ namespace {
|
||||
// Failpoint to block after a write and its oplog entry have been written to the storage engine and
|
||||
// are visible, but before we have advanced 'lastApplied' for the write.
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeLogOpAdvancesLastApplied);
|
||||
// Failpoint to block oplog application after receiving an IndexBuildAlreadyInProgress error.
|
||||
MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildConflict);
|
||||
|
||||
void abortIndexBuilds(OperationContext* opCtx,
|
||||
const OplogEntry::CommandType& commandType,
|
||||
@ -2499,6 +2501,23 @@ Status applyCommand_inlock(OperationContext* opCtx,
|
||||
"command",
|
||||
opObj,
|
||||
status);
|
||||
|
||||
// Even though steady state constraints are not enforced, we need to retry index
|
||||
// builds that conflict in case the existing index build fails.
|
||||
if (status.code() == ErrorCodes::IndexBuildAlreadyInProgress) {
|
||||
if (MONGO_unlikely(hangAfterIndexBuildConflict.shouldFail())) {
|
||||
LOGV2(8539001, "Hanging due to hangAfterIndexBuildConflict failpoint");
|
||||
hangAfterIndexBuildConflict.pauseWhileSet();
|
||||
}
|
||||
|
||||
auto deadline = Date_t::now() + Milliseconds(1000);
|
||||
LOGV2(8539000,
|
||||
"Waiting for existing index build to complete before retrying the "
|
||||
"conflicting operation");
|
||||
IndexBuildsCoordinator::get(opCtx)->waitUntilAnIndexBuildFinishes(opCtx,
|
||||
deadline);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
LOGV2_DEBUG(51776,
|
||||
1,
|
||||
|
Loading…
Reference in New Issue
Block a user