mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
SERVER-57262 Relax vote constraint to allow voting for candidates with newer config.
This commit is contained in:
parent
a33a04b618
commit
e793725604
@ -214,6 +214,8 @@ last-lts:
|
||||
test_file: jstests/core/timeseries/timeseries_bucket_drop.js
|
||||
- ticket: SERVER-56800
|
||||
test_file: jstests/sharding/cwwc_conflict_add_shard.js
|
||||
- ticket: SERVER-57262
|
||||
test_file: jstests/replsets/catchup_takeover_with_higher_config.js
|
||||
|
||||
# Tests that should only be excluded from particular suites should be listed under that suite.
|
||||
suites:
|
||||
|
125
jstests/replsets/catchup_takeover_with_higher_config.js
Normal file
125
jstests/replsets/catchup_takeover_with_higher_config.js
Normal file
@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Test that when a primary is blocked in drain mode, catchup takeover can work even
|
||||
* if the primary has a lower config than the takeover node. The test starts a 3-node
|
||||
* replica set and then steps up node1 but blocks it in drain mode before it can bump
|
||||
* the config term. Next it steps up node2 and also blocks it in drain mode and later
|
||||
* unblocks node1 to let it finish config term bump so that it has higher config than
|
||||
* node2. Eventually after catchUpTakeoverDelayMillis has passed, node1 should be able
|
||||
* get the vote from node2 which has a lower config, and finish the catchup takeover.
|
||||
*/
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
load("jstests/libs/write_concern_util.js");
|
||||
load('jstests/replsets/libs/election_metrics.js');
|
||||
|
||||
// Get the current config from the node and compare it with the provided config.
|
||||
const getNodeConfigAndCompare = function(node, config, cmp) {
|
||||
const currentConfig = assert.commandWorked(node.adminCommand({replSetGetConfig: 1})).config;
|
||||
if (cmp === '=') {
|
||||
return currentConfig.term === config.term && currentConfig.version === config.version;
|
||||
} else if (cmp === '>') {
|
||||
return currentConfig.term > config.term ||
|
||||
(currentConfig.term === config.term && currentConfig.version > config.version);
|
||||
} else if (cmp === '<') {
|
||||
return currentConfig.term < config.term ||
|
||||
(currentConfig.term === config.term && currentConfig.version < config.version);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for all nodes to acknowledge that the node at nodeIndex is in PRIMARY state.
|
||||
// A node may see multiple nodes claiming primary, but only checks the provided one.
|
||||
const waitForPrimaryState = function(nodes, nodeIndex, timeout) {
|
||||
assert.soon(() => {
|
||||
for (const node of nodes) {
|
||||
const status = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
|
||||
if (status.members[nodeIndex].state !== ReplSetTest.State.PRIMARY) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}, `Failed to wait for primary state for node ${nodes[nodeIndex].host}`, timeout);
|
||||
};
|
||||
|
||||
const replSet = new ReplSetTest({name: jsTestName(), nodes: 3});
|
||||
const nodes = replSet.startSet();
|
||||
let config = replSet.getReplSetConfig();
|
||||
// Prevent nodes from syncing from other secondaries.
|
||||
config.settings = {
|
||||
chainingAllowed: false,
|
||||
};
|
||||
replSet.initiateWithHighElectionTimeout(config);
|
||||
replSet.awaitReplication();
|
||||
assert.eq(replSet.getPrimary(), nodes[0]);
|
||||
|
||||
// Failpoint to hang node1 before the automatic reconfig on stepup bumps the config term.
|
||||
const hangBeforeTermBumpFpNode1 = configureFailPoint(nodes[1], "hangBeforeReconfigOnDrainComplete");
|
||||
const initialConfig = assert.commandWorked(nodes[0].adminCommand({replSetGetConfig: 1})).config;
|
||||
|
||||
// Stepup node1 and wait to hang before bumping the config term.
|
||||
assert.commandWorked(nodes[1].adminCommand({replSetStepUp: 1}));
|
||||
hangBeforeTermBumpFpNode1.wait();
|
||||
|
||||
// Wait for all nodes to acknowledge that node1 has become primary.
|
||||
jsTestLog(`Waiting for all nodes to agree on ${nodes[1].host} being primary`);
|
||||
replSet.awaitNodesAgreeOnPrimary(replSet.kDefaultTimeoutMS, nodes, nodes[1]);
|
||||
|
||||
// Check that the failpoint worked and the config has not changed.
|
||||
assert(getNodeConfigAndCompare(nodes[1], initialConfig, '='));
|
||||
|
||||
// Stepup node2 and wait to hang before bumping the config term as well.
|
||||
const hangBeforeTermBumpFpNode2 = configureFailPoint(nodes[2], "hangBeforeReconfigOnDrainComplete");
|
||||
assert.commandWorked(nodes[2].adminCommand({replSetStepUp: 1}));
|
||||
hangBeforeTermBumpFpNode2.wait();
|
||||
|
||||
// Wait for all nodes to acknowledge that node2 has become primary. Cannot use
|
||||
// awaitNodesAgreeOnPrimary() or getPrimary() here which do not allow a node to
|
||||
// see multiple primaries.
|
||||
jsTestLog(`Waiting for all nodes to agree on ${nodes[2].host} being primary`);
|
||||
waitForPrimaryState(nodes, 2, 30 * 1000);
|
||||
|
||||
// Wait for node0 to change its sync source to node2. Later when the failpoint on node 1
|
||||
// is lifted, it will do a no-op write and finish the stepup process, so its lastApplied
|
||||
// opTime will be greater than the other two nodes. By waiting for sync source change we
|
||||
// make sure node0 will not pull new entries from node1, making node1 the only eligible
|
||||
// candidate to catchup takeover node2.
|
||||
assert.soon(() => {
|
||||
const status = assert.commandWorked(nodes[0].adminCommand({replSetGetStatus: 1}));
|
||||
return status.syncSourceHost === nodes[2].host;
|
||||
});
|
||||
|
||||
const statusBeforeTakeover = assert.commandWorked(nodes[1].adminCommand({serverStatus: 1}));
|
||||
|
||||
// Lift the failpoint on node1 to let it finish reconfig and bump the config term.
|
||||
hangBeforeTermBumpFpNode1.off();
|
||||
|
||||
jsTestLog(
|
||||
`Waiting for ${nodes[1].host} to finish config term bump and propagate to ${nodes[0].host}`);
|
||||
assert.soon(() => getNodeConfigAndCompare(nodes[0], initialConfig, '>'));
|
||||
assert.soon(() => getNodeConfigAndCompare(nodes[1], initialConfig, '>'));
|
||||
// Check that node2 is still in catchup mode, so it cannot install a new config.
|
||||
assert(getNodeConfigAndCompare(nodes[2], initialConfig, '='));
|
||||
|
||||
// Wait for node1 to catchup takeover node2 after the default catchup takeover delay.
|
||||
jsTestLog(`Waiting for ${nodes[1].host} to catchup takeover ${nodes[2].host}`);
|
||||
waitForPrimaryState(nodes, 1, 60 * 1000);
|
||||
|
||||
// Check again that node2 is still in catchup mode and has not installed a new config.
|
||||
assert(getNodeConfigAndCompare(nodes[2], initialConfig, '='));
|
||||
|
||||
// Lift the failpoint on node2 and wait for all nodes to see node1 as the only primary.
|
||||
hangBeforeTermBumpFpNode2.off();
|
||||
replSet.awaitNodesAgreeOnPrimary(replSet.kDefaultTimeoutMS, nodes, nodes[1]);
|
||||
|
||||
// Check that election metrics has been updated with the new reason counter.
|
||||
const statusAfterTakeover = assert.commandWorked(nodes[1].adminCommand({serverStatus: 1}));
|
||||
verifyServerStatusElectionReasonCounterChange(statusBeforeTakeover.electionMetrics,
|
||||
statusAfterTakeover.electionMetrics,
|
||||
"catchUpTakeover",
|
||||
1);
|
||||
|
||||
replSet.stopSet();
|
||||
})();
|
@ -132,7 +132,9 @@ MONGO_FAIL_POINT_DEFINE(skipDurableTimestampUpdates);
|
||||
MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
|
||||
// Will cause signal drain complete to hang right before acquiring the RSTL.
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeRSTLOnDrainComplete);
|
||||
// Will cause signal drain complete to hang after reconfig
|
||||
// Will cause signal drain complete to hang before reconfig.
|
||||
MONGO_FAIL_POINT_DEFINE(hangBeforeReconfigOnDrainComplete);
|
||||
// Will cause signal drain complete to hang after reconfig.
|
||||
MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete);
|
||||
MONGO_FAIL_POINT_DEFINE(doNotRemoveNewlyAddedOnHeartbeats);
|
||||
// Will hang right after setting the currentOp info associated with an automatic reconfig.
|
||||
@ -1160,6 +1162,10 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
|
||||
lk.unlock();
|
||||
|
||||
if (needBumpConfigTerm) {
|
||||
if (MONGO_unlikely(hangBeforeReconfigOnDrainComplete.shouldFail())) {
|
||||
LOGV2(5726200, "Hanging due to hangBeforeReconfigOnDrainComplete failpoint");
|
||||
hangBeforeReconfigOnDrainComplete.pauseWhileSet(opCtx);
|
||||
}
|
||||
// We re-write the term but keep version the same. This conceptually a no-op
|
||||
// in the config consensus group, analogous to writing a new oplog entry
|
||||
// in Raft log state machine on step up.
|
||||
|
@ -3268,9 +3268,9 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
|
||||
return;
|
||||
}
|
||||
|
||||
if (args.getConfigVersionAndTerm() != _rsConfig.getConfigVersionAndTerm()) {
|
||||
if (args.getConfigVersionAndTerm() < _rsConfig.getConfigVersionAndTerm()) {
|
||||
response->setVoteGranted(false);
|
||||
response->setReason("candidate's config with {} differs from mine with {}"_format(
|
||||
response->setReason("candidate's config with {} is older than mine with {}"_format(
|
||||
args.getConfigVersionAndTerm(), _rsConfig.getConfigVersionAndTerm()));
|
||||
} else if (args.getTerm() < _term) {
|
||||
response->setVoteGranted(false);
|
||||
@ -3292,8 +3292,15 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
|
||||
_rsConfig.getMemberAt(_lastVote.getCandidateIndex()).getHostAndPort(),
|
||||
_lastVote.getTerm()));
|
||||
} else {
|
||||
bool isSameConfig = args.getConfigVersionAndTerm() == _rsConfig.getConfigVersionAndTerm();
|
||||
int betterPrimary = _findHealthyPrimaryOfEqualOrGreaterPriority(args.getCandidateIndex());
|
||||
if (_selfConfig().isArbiter() && betterPrimary >= 0) {
|
||||
// Do not grant vote if we are arbiter and can see a healthy primary of greater or equal
|
||||
// priority, to prevent primary flapping when there are two nodes that can't talk to each
|
||||
// other but we that can talk to both as arbiter. We only do this if the voter's config
|
||||
// is same as ours, otherwise the primary information might be stale and we might not be
|
||||
// arbiter in the candidate's newer config. We might also hit an invariant described in
|
||||
// SERVER-46387 without the check for same config.
|
||||
if (isSameConfig && _selfConfig().isArbiter() && betterPrimary >= 0) {
|
||||
response->setVoteGranted(false);
|
||||
response->setReason(
|
||||
"can see a healthy primary ({}) of equal or greater priority"_format(
|
||||
|
@ -3261,7 +3261,7 @@ public:
|
||||
TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigVersionIsLower) {
|
||||
auto response = testWithArbiter(false, 1, 2);
|
||||
ASSERT_EQUALS(
|
||||
"candidate's config with {version: 1, term: 2} differs from mine with"
|
||||
"candidate's config with {version: 1, term: 2} is older than mine with"
|
||||
" {version: 2, term: 2}",
|
||||
response.getReason());
|
||||
}
|
||||
@ -3269,7 +3269,7 @@ TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigVersionIs
|
||||
TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigVersionIsLower) {
|
||||
auto response = testWithArbiter(true, 1, 2);
|
||||
ASSERT_EQUALS(
|
||||
"candidate's config with {version: 1, term: 2} differs from mine with"
|
||||
"candidate's config with {version: 1, term: 2} is older than mine with"
|
||||
" {version: 2, term: 2}",
|
||||
response.getReason());
|
||||
}
|
||||
@ -3277,7 +3277,7 @@ TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigVersionIsL
|
||||
TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigTermIsLower) {
|
||||
auto response = testWithArbiter(false, 2, 1);
|
||||
ASSERT_EQUALS(
|
||||
"candidate's config with {version: 2, term: 1} differs from mine with"
|
||||
"candidate's config with {version: 2, term: 1} is older than mine with"
|
||||
" {version: 2, term: 2}",
|
||||
response.getReason());
|
||||
}
|
||||
@ -3285,7 +3285,7 @@ TEST_F(ConfigTermAndVersionVoteTest, DataNodeDoesNotGrantVoteWhenConfigTermIsLow
|
||||
TEST_F(ConfigTermAndVersionVoteTest, ArbiterDoesNotGrantVoteWhenConfigTermIsLower) {
|
||||
auto response = testWithArbiter(true, 2, 1);
|
||||
ASSERT_EQUALS(
|
||||
"candidate's config with {version: 2, term: 1} differs from mine with"
|
||||
"candidate's config with {version: 2, term: 1} is older than mine with"
|
||||
" {version: 2, term: 2}",
|
||||
response.getReason());
|
||||
}
|
||||
@ -3453,7 +3453,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenConfigVersionIsLower) {
|
||||
|
||||
getTopoCoord().processReplSetRequestVotes(args, &response);
|
||||
ASSERT_EQUALS(
|
||||
"candidate's config with {version: 0, term: 1} differs from mine with {version: 1, term: "
|
||||
"candidate's config with {version: 0, term: 1} is older than mine with {version: 1, term: "
|
||||
"1}",
|
||||
response.getReason());
|
||||
ASSERT_EQUALS(1, response.getTerm());
|
||||
@ -3508,7 +3508,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantDryRunVoteWhenTermIsStale) {
|
||||
ASSERT_FALSE(response.getVoteGranted());
|
||||
}
|
||||
|
||||
TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsHigherButConfigVersionIsLower) {
|
||||
TEST_F(TopoCoordTest, NodeGrantsVoteWhenTermIsHigherButConfigVersionIsLower) {
|
||||
updateConfig(BSON("_id"
|
||||
<< "rs0"
|
||||
<< "version" << 2 << "term" << 1LL << "members"
|
||||
@ -3539,11 +3539,7 @@ TEST_F(TopoCoordTest, NodeDoesNotGrantVoteWhenTermIsHigherButConfigVersionIsLowe
|
||||
getTopoCoord().processReplSetRequestVotes(args, &response);
|
||||
// Candidates config(t, v) is (2, 1) and our config is (1, 2). Even though the candidate's
|
||||
// config version is lower, we grant our vote because the candidate's config term is higher.
|
||||
ASSERT_FALSE(response.getVoteGranted());
|
||||
ASSERT_EQ(
|
||||
"candidate's config with {version: 1, term: 2} differs from mine with {version: 2, term: "
|
||||
"1}",
|
||||
response.getReason());
|
||||
ASSERT_TRUE(response.getVoteGranted());
|
||||
}
|
||||
|
||||
TEST_F(TopoCoordTest, GrantDryRunVoteEvenWhenTermHasBeenSeen) {
|
||||
|
Loading…
Reference in New Issue
Block a user