0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 09:06:21 +01:00

SERVER-45995: Test that nodes in initial sync maintain and gossip the commit point

create mode 100644 jstests/replsets/initial_sync_nodes_maintain_and_gossip_commit_point.js
This commit is contained in:
Xuerui Fa 2020-02-10 15:17:04 -05:00 committed by Evergreen Agent
parent 6a17da7e7f
commit 9539c8f8be
2 changed files with 147 additions and 0 deletions

View File

@ -0,0 +1,139 @@
/*
* Tests that nodes in initial sync update their commit point and gossip their commit point to
* other nodes. This is done starting with a 3 node replica set with one non-voting secondary. We
* disconnect the non-voting secondary from the other nodes and then add a new node to the replica
* set. Thus, the non-voting secondary can only communicate with the initial syncing node. We then
* hang the initial syncing node at various stages, perform multiple majority writes to advance the
* commit point, and verify that the commit point on the initial syncing node is updated. Finally,
* we ensure that the disconnected secondary is able to update its commit point from the initial
* syncing node via heartbeats.
* @tags: [requires_fcv_44]
*/
(function() {
"use strict";
load("jstests/libs/fail_point_util.js");
const name = jsTestName();
const rst = new ReplSetTest({
name,
nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0, votes: 0}}],
useBridge: true
});
rst.startSet();
rst.initiateWithHighElectionTimeout();
const primary = rst.getPrimary();
const primaryDb = primary.getDB("test");
const secondary = rst.getSecondaries()[0];
const nonVotingSecondary = rst.getSecondaries()[1];
// Insert initial data to ensure that the repl set is initialized correctly.
assert.commandWorked(primaryDb.test.insert({a: 1}));
rst.awaitReplication();
/*
* Fetches the 'lastCommittedOpTime' field of the given node.
*/
function getLastCommittedOpTime(conn) {
const replSetStatus = assert.commandWorked(conn.adminCommand({replSetGetStatus: 1}));
return replSetStatus.optimes.lastCommittedOpTime;
}
const firstCommitPoint = getLastCommittedOpTime(primary);
// Disconnect the non-voting secondary from the other nodes so that it won't update its commit point
// from the other nodes' heartbeats.
nonVotingSecondary.disconnect(secondary);
nonVotingSecondary.disconnect(primary);
jsTest.log("Adding a new node to the replica set");
const initialSyncNode = rst.add({
setParameter: {
// Make sure our initial sync node does not sync from the node with votes 0.
'failpoint.forceSyncSourceCandidate':
tojson({mode: 'alwaysOn', data: {"hostAndPort": primary.host}}),
}
});
const hangAfterGettingBeginFetchingTimestamp =
configureFailPoint(initialSyncNode, "initialSyncHangAfterGettingBeginFetchingTimestamp");
const hangBeforeCompletingOplogFetching =
configureFailPoint(initialSyncNode, "initialSyncHangBeforeCompletingOplogFetching");
const hangBeforeFinish = configureFailPoint(initialSyncNode, "initialSyncHangBeforeFinish");
jsTestLog("Waiting for initial sync node to reach initial sync state");
rst.reInitiate();
rst.waitForState(initialSyncNode, ReplSetTest.State.STARTUP_2);
// Hang the initial sync node after it sets 'beginFetchingTimestamp' to ensure that the node will
// not set 'stopTimestamp' until after we perform the next write.
hangAfterGettingBeginFetchingTimestamp.wait();
// Do a write to be applied by the initial sync node. This is necessary because we cannot update the
// commit point if the term of `lastAppliedOpTime` is not equal to the current term.
assert.commandWorked(primaryDb.test.insert({b: 2}));
// Wait for knowledge of the last commit point to advance to the last write on the primary and
// secondary.
rst.awaitLastOpCommitted(undefined, [primary, secondary]);
const secondCommitPointPrimary = getLastCommittedOpTime(primary);
const secondCommitPointSecondary = getLastCommittedOpTime(secondary);
// Verify that the commit point has advanced on the primary and secondary.
assert.eq(1, rs.compareOpTimes(secondCommitPointPrimary, firstCommitPoint));
assert.eq(1, rs.compareOpTimes(secondCommitPointSecondary, firstCommitPoint));
// Verify that the commit point has *NOT* advanced on the non-voting secondary.
let commitPointNonVotingSecondary = getLastCommittedOpTime(nonVotingSecondary);
assert.eq(rs.compareOpTimes(commitPointNonVotingSecondary, secondCommitPointPrimary), -1);
// Allow the node to proceed to the oplog applying phase of initial sync and ensure that the oplog
// fetcher thread is still running.
hangAfterGettingBeginFetchingTimestamp.off();
hangBeforeCompletingOplogFetching.wait();
// The initial sync node will be able to update its commit point after fetching this write, since it
// set its `lastAppliedOpTime` from the previous write.
assert.commandWorked(primaryDb.test.insert({c: 3}));
// Wait for knowledge of the last commit point to advance to the last write on the primary and
// secondary.
rst.awaitLastOpCommitted(undefined, [primary, secondary]);
const thirdCommitPointPrimary = getLastCommittedOpTime(primary);
const thirdCommitPointSecondary = getLastCommittedOpTime(secondary);
// Verify that the commit point has advanced on the primary and secondary.
assert.eq(1, rs.compareOpTimes(thirdCommitPointPrimary, secondCommitPointPrimary));
assert.eq(1, rs.compareOpTimes(thirdCommitPointSecondary, secondCommitPointSecondary));
// Verify that the commit point has *NOT* advanced on the non-voting secondary.
commitPointNonVotingSecondary = getLastCommittedOpTime(nonVotingSecondary);
assert.eq(rs.compareOpTimes(commitPointNonVotingSecondary, thirdCommitPointPrimary), -1);
// Allow the initial sync node to complete oplog fetching but hang it before it completes initial
// sync.
hangBeforeCompletingOplogFetching.off();
hangBeforeFinish.wait();
// Verify that the initial sync node receives the commit point from the primary via oplog fetching.
// We only assert that it is greater than or equal to the second commit point because it is possible
// for the commit point to not yet be advanced by the primary when we fetch the oplog entry.
assert.gte(rs.compareOpTimes(getLastCommittedOpTime(initialSyncNode), secondCommitPointPrimary), 0);
// Verify that the non-voting secondary has received the updated commit point via heartbeats from
// the initial sync node.
assert.soon(
() => rs.compareOpTimes(getLastCommittedOpTime(nonVotingSecondary),
getLastCommittedOpTime(initialSyncNode)) >= 0,
"The nonVotingSecondary was unable to update its commit point from the initial sync node");
// Since the primary sends a shut down command to all secondaries in `rst.stopSet()`, we reconnect
// the disconnected secondary to the primary to allow it to be shut down.
nonVotingSecondary.reconnect(primary);
hangBeforeFinish.off();
waitForState(initialSyncNode, ReplSetTest.State.SECONDARY);
rst.stopSet();
})();

View File

@ -114,6 +114,9 @@ MONGO_FAIL_POINT_DEFINE(failInitialSyncBeforeApplyingBatch);
// Failpoint which fasserts if applying a batch fails.
MONGO_FAIL_POINT_DEFINE(initialSyncFassertIfApplyingBatchFails);
// Failpoint which causes the initial sync function to hang before stopping the oplog fetcher.
MONGO_FAIL_POINT_DEFINE(initialSyncHangBeforeCompletingOplogFetching);
// Failpoints for synchronization, shared with cloners.
extern FailPoint initialSyncFuzzerSynchronizationPoint1;
extern FailPoint initialSyncFuzzerSynchronizationPoint2;
@ -1545,6 +1548,11 @@ void InitialSyncer::_rollbackCheckerCheckForRollbackCallback(
return;
}
if (MONGO_unlikely(initialSyncHangBeforeCompletingOplogFetching.shouldFail())) {
LOGV2(4599500, "initialSyncHangBeforeCompletingOplogFetching fail point enabled.");
initialSyncHangBeforeCompletingOplogFetching.pauseWhileSet();
}
// Success!
onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, _lastApplied);
}