mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
177 lines
6.6 KiB
JavaScript
177 lines
6.6 KiB
JavaScript
/**
|
|
* This test ensures that a secondary that has gone "too stale" (i.e. cannot find another node with
|
|
* a common oplog point) will transition to RECOVERING state, stay in RECOVERING after restart, and
|
|
* transition back to SECONDARY once it finds a sync source with a common oplog point.
|
|
*
|
|
* Note: This test requires persistence in order for a restarted node with a stale oplog to stay in
|
|
* the RECOVERING state. A restarted node with an ephemeral storage engine will not have an oplog
|
|
* upon restart, so will immediately resync.
|
|
*
|
|
* @tags: [
|
|
* requires_persistence,
|
|
* ]
|
|
*
|
|
* Replica Set Setup:
|
|
*
|
|
* Node 0 (PRIMARY) : Small Oplog
|
|
* Node 1 (SECONDARY) : Large Oplog
|
|
* Node 2 (SECONDARY) : Small Oplog
|
|
*
|
|
* 1: Insert one document on the primary (Node 0) and ensure it is replicated.
|
|
* 2: Stop node 2.
|
|
* 3: Wait until Node 2 is down.
|
|
* 4: Overflow the primary's oplog.
|
|
* 5: Stop Node 1 and restart Node 2.
|
|
* 6: Wait for Node 2 to transition to RECOVERING (it should be too stale).
|
|
* 7: Stop and restart Node 2.
|
|
* 8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart).
|
|
* 9: Restart Node 1, which should have the full oplog history.
|
|
* 10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY.
|
|
*
|
|
*/
|
|
|
|
(function() {
|
|
"use strict";
|
|
|
|
load('jstests/replsets/rslib.js');
|
|
|
|
function getFirstOplogEntry(conn) {
|
|
return conn.getDB('local').oplog.rs.find().sort({$natural: 1}).limit(1)[0];
|
|
}
|
|
|
|
/**
|
|
* Overflows the oplog of a given node.
|
|
*
|
|
* To detect oplog overflow, we continuously insert large documents until we
|
|
* detect that the first entry of the oplog is no longer the same as when we started. This
|
|
* implies that the oplog attempted to grow beyond its maximum size i.e. it
|
|
* has overflowed/rolled over.
|
|
*
|
|
* Each document will be inserted with a writeConcern given by 'writeConcern'.
|
|
*
|
|
*/
|
|
function overflowOplog(conn, db, writeConcern) {
|
|
var firstOplogEntry = getFirstOplogEntry(primary);
|
|
var collName = "overflow";
|
|
|
|
// Keep inserting large documents until the oplog rolls over.
|
|
const largeStr = new Array(32 * 1024).join('aaaaaaaa');
|
|
while (bsonWoCompare(getFirstOplogEntry(conn), firstOplogEntry) === 0) {
|
|
assert.commandWorked(
|
|
db[collName].insert({data: largeStr}, {writeConcern: {w: writeConcern}}));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* True if a node's entry in "members" has tooStale: true.
|
|
*/
|
|
function tooStale(conn) {
|
|
return assert.commandWorked(conn.adminCommand("replSetGetStatus")).tooStale;
|
|
}
|
|
|
|
/**
|
|
* Returns a node's current replica state.
|
|
*/
|
|
function myState(conn) {
|
|
return assert.commandWorked(conn.adminCommand("replSetGetStatus")).myState;
|
|
}
|
|
|
|
var testName = "too_stale_secondary";
|
|
|
|
var smallOplogSizeMB = 1;
|
|
var bigOplogSizeMB = 1000;
|
|
|
|
// Node 0 is given a small oplog so we can overflow it. Node 1's large oplog allows it to
|
|
// store all entries comfortably without overflowing, so that Node 2 can eventually use it as
|
|
// a sync source after it goes too stale. Because this test overflows the oplog, a small
|
|
// syncdelay is chosen to frequently take checkpoints, allowing oplog truncation to proceed.
|
|
var replTest = new ReplSetTest({
|
|
name: testName,
|
|
nodes:
|
|
[{oplogSize: smallOplogSizeMB}, {oplogSize: bigOplogSizeMB}, {oplogSize: smallOplogSizeMB}],
|
|
nodeOptions: {syncdelay: 1},
|
|
});
|
|
|
|
var nodes = replTest.startSet();
|
|
replTest.initiate({
|
|
_id: testName,
|
|
members: [
|
|
{_id: 0, host: nodes[0].host},
|
|
{_id: 1, host: nodes[1].host, priority: 0},
|
|
{_id: 2, host: nodes[2].host, priority: 0}
|
|
]
|
|
});
|
|
|
|
var dbName = testName;
|
|
var collName = "test";
|
|
|
|
jsTestLog("Wait for Node 0 to become the primary.");
|
|
replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
|
|
|
|
var primary = replTest.getPrimary();
|
|
var primaryTestDB = primary.getDB(dbName);
|
|
|
|
jsTestLog("1: Insert one document on the primary (Node 0) and ensure it is replicated.");
|
|
assert.commandWorked(primaryTestDB[collName].insert({a: 1}, {writeConcern: {w: 3}}));
|
|
assert(!tooStale(replTest.nodes[2]));
|
|
|
|
jsTestLog("2: Stop Node 2.");
|
|
replTest.stop(2);
|
|
|
|
jsTestLog("3: Wait until Node 2 is down.");
|
|
replTest.waitForState(replTest.nodes[2], ReplSetTest.State.DOWN);
|
|
|
|
var firstOplogEntryNode1 = getFirstOplogEntry(replTest.nodes[1]);
|
|
|
|
jsTestLog("4: Overflow the primary's oplog.");
|
|
overflowOplog(primary, primaryTestDB, 2);
|
|
|
|
// Make sure that Node 1's oplog didn't overflow.
|
|
assert.eq(firstOplogEntryNode1,
|
|
getFirstOplogEntry(replTest.nodes[1]),
|
|
"Node 1's oplog overflowed unexpectedly.");
|
|
|
|
jsTestLog("5: Stop Node 1 and restart Node 2.");
|
|
replTest.stop(1);
|
|
replTest.restart(2);
|
|
|
|
jsTestLog("6: Wait for Node 2 to transition to RECOVERING (it should be too stale).");
|
|
assert.soonNoExcept(() => tooStale(replTest.nodes[2]), "Waiting for Node 2 to become too stale");
|
|
// This checks the state as reported by the node itself.
|
|
assert.soon(() => myState(replTest.nodes[2]) === ReplSetTest.State.RECOVERING,
|
|
"Waiting for Node 2 to transition to RECOVERING");
|
|
// This waits for the state as indicated by the primary node.
|
|
replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING);
|
|
|
|
jsTestLog("7: Stop and restart Node 2.");
|
|
replTest.stop(2);
|
|
replTest.restart(2, {
|
|
// Set the failpoint to fail the transition to maintenance mode once. Make sure transitioning to
|
|
// maintenance mode is resilient to errors (e.g. race with a concurrent election) and will
|
|
// eventually succeed.
|
|
setParameter: {'failpoint.setMaintenanceModeFailsWithNotSecondary': tojson({mode: {times: 1}})}
|
|
});
|
|
|
|
jsTestLog(
|
|
"8: Wait for Node 2 to transition to RECOVERING (its oplog should remain stale after restart)");
|
|
assert.soonNoExcept(() => tooStale(replTest.nodes[2]), "Waiting for Node 2 to become too stale");
|
|
// This checks the state as reported by the node itself.
|
|
assert.soon(() => myState(replTest.nodes[2]) === ReplSetTest.State.RECOVERING,
|
|
"Waiting for Node 2 to transition to RECOVERING");
|
|
// This waits for the state as indicated by the primary node.
|
|
replTest.waitForState(replTest.nodes[2], ReplSetTest.State.RECOVERING);
|
|
|
|
jsTestLog("9: Restart Node 1, which should have the full oplog history.");
|
|
replTest.restart(1);
|
|
|
|
jsTestLog("10: Wait for Node 2 to leave RECOVERING and transition to SECONDARY.");
|
|
assert.soonNoExcept(() => !tooStale(replTest.nodes[2]), "Waiting for Node 2 to exit too stale");
|
|
// This checks the state as reported by the node itself.
|
|
assert.soon(() => myState(replTest.nodes[2]) === ReplSetTest.State.SECONDARY,
|
|
"Waiting for Node 2 to transition to SECONDARY");
|
|
// This waits for the state as indicated by the primary node.
|
|
replTest.waitForState(replTest.nodes[2], ReplSetTest.State.SECONDARY);
|
|
|
|
replTest.stopSet();
|
|
}());
|