0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-31268 Make updates_in_heterogeneous_repl_set.js more robust.

The updates_in_heterogeneous_repl_set.js test wants to call stepUp()
on each of its replica set members to ensure that each one gets an
opportunity to handle inserts and updates. However, we found that the
stepUp()-triggered election was occasionally failing, because even
though a call to awaitReplication() ensured that all members were
caught up on their oplogs, some members were not aware that their
peers were caught up.

The new awaitNodesAgreeOnOpTime() function uses an assert.soon() to
wait until all the members are caught up on their peers' status, so
that the election always succeeds.
This commit is contained in:
Justin Seyster 2017-10-04 19:37:10 -04:00
parent 232f7986c6
commit edc3dfce51
2 changed files with 78 additions and 1 deletions

View File

@ -29,7 +29,6 @@ const testName = "updates_in_heterogeneous_repl_set";
// Give each member a chance to be primary while updating documents.
let collIndex = 0;
replTest.nodes.forEach(function(node) {
replTest.awaitReplication();
replTest.stepUp(node);
let coll = node.getDB("test")["coll" + (collIndex++)];

View File

@ -548,6 +548,82 @@ var ReplSetTest = function(opts) {
timeout);
};
/**
* Blocks until each node agrees that all other nodes have applied the most recent oplog entry.
*/
this.awaitNodesAgreeOnAppliedOpTime = function(timeout, nodes) {
timeout = timeout || self.kDefaultTimeoutMS;
nodes = nodes || self.nodes;
assert.soon(function() {
let appliedOpTimeConsensus = undefined;
for (let i = 0; i < nodes.length; i++) {
let replSetGetStatus;
try {
replSetGetStatus = nodes[i].adminCommand({replSetGetStatus: 1});
} catch (e) {
print("AwaitNodesAgreeOnAppliedOpTime: Retrying because node " + nodes[i].name +
" failed to execute replSetGetStatus: " + tojson(e));
return false;
}
assert.commandWorked(replSetGetStatus);
if (appliedOpTimeConsensus === undefined) {
if (replSetGetStatus.optimes) {
appliedOpTimeConsensus = replSetGetStatus.optimes.appliedOpTime;
} else {
// Older versions of mongod do not include an 'optimes' field in the
// replSetGetStatus response. We instead pull an optime from the first
// replica set member that includes one in its status. All we need here is
// any initial value that we can compare to all the other optimes.
let optimeMembers = replSetGetStatus.members.filter(m => m.optime);
assert(optimeMembers.length > 0,
"AwaitNodesAgreeOnAppliedOpTime: replSetGetStatus did not " +
"include optimes for any members: " + tojson(replSetGetStatus));
appliedOpTimeConsensus = optimeMembers[0].optime;
}
assert(appliedOpTimeConsensus,
"AwaitNodesAgreeOnAppliedOpTime: missing appliedOpTime in " +
"replSetGetStatus: " + tojson(replSetGetStatus));
}
if (replSetGetStatus.optimes &&
!friendlyEqual(replSetGetStatus.optimes.appliedOpTime,
appliedOpTimeConsensus)) {
print("AwaitNodesAgreeOnAppliedOpTime: Retrying because node " + nodes[i].name +
" has appliedOpTime " + tojson(replSetGetStatus.optimes.appliedOpTime) +
" that does not match the previously observed appliedOpTime " +
tojson(appliedOpTimeConsensus));
return false;
}
for (let j = 0; j < replSetGetStatus.members.length; j++) {
if (replSetGetStatus.members[j].state == ReplSetTest.State.ARBITER) {
// ARBITER nodes do not apply oplog entries and do not have an 'optime'
// field.
continue;
}
if (!friendlyEqual(replSetGetStatus.members[j].optime,
appliedOpTimeConsensus)) {
print("AwaitNodesAgreeOnAppliedOpTime: Retrying because node " +
nodes[i].name + " sees optime " +
tojson(replSetGetStatus.members[j].optime) + " on node " +
replSetGetStatus.members[j].name + " but expects to see optime " +
tojson(appliedOpTimeConsensus));
return false;
}
}
}
print(
"AwaitNodesAgreeOnAppliedOpTime: All nodes agree that all ops are applied up to " +
tojson(appliedOpTimeConsensus));
return true;
}, "Awaiting nodes to agree that all ops are applied across replica set", timeout);
};
/**
* Blocks until all nodes agree on who the primary is.
* If 'expectedPrimaryNodeId' is provided, ensure that every node is seeing this node as the
@ -896,6 +972,7 @@ var ReplSetTest = function(opts) {
*/
this.stepUp = function(node) {
this.awaitReplication();
this.awaitNodesAgreeOnAppliedOpTime();
this.awaitNodesAgreeOnPrimary();
if (this.getPrimary() === node) {
return;
@ -917,6 +994,7 @@ var ReplSetTest = function(opts) {
"': " + tojson(ex));
}
this.awaitReplication();
this.awaitNodesAgreeOnAppliedOpTime();
this.awaitNodesAgreeOnPrimary();
}