2019-01-15 12:41:23 +01:00
|
|
|
|
/*
|
|
|
|
|
* Test that the read operations are not killed and their connections are also not
|
|
|
|
|
* closed during step down.
|
|
|
|
|
*/
|
|
|
|
|
load('jstests/libs/parallelTester.js');
|
2019-01-31 08:10:28 +01:00
|
|
|
|
load("jstests/libs/curop_helpers.js"); // for waitForCurOpByFailPoint().
|
2019-10-16 20:05:39 +02:00
|
|
|
|
load("jstests/replsets/rslib.js");
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
|
|
|
|
(function() {
|
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
"use strict";
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
const testName = "readOpsDuringStepDown";
|
|
|
|
|
const dbName = "test";
|
|
|
|
|
const collName = "coll";
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
var rst = new ReplSetTest({name: testName, nodes: [{}, {rsConfig: {priority: 0}}]});
|
|
|
|
|
rst.startSet();
|
|
|
|
|
rst.initiate();
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
const primary = rst.getPrimary();
|
|
|
|
|
const primaryDB = primary.getDB(dbName);
|
|
|
|
|
const primaryAdmin = primary.getDB("admin");
|
|
|
|
|
const primaryColl = primaryDB[collName];
|
|
|
|
|
const collNss = primaryColl.getFullName();
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
TestData.dbName = dbName;
|
|
|
|
|
TestData.collName = collName;
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
jsTestLog("1. Do a document write");
|
2019-08-14 15:52:59 +02:00
|
|
|
|
assert.commandWorked(
|
2019-01-15 12:41:23 +01:00
|
|
|
|
primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
|
2019-07-27 00:20:35 +02:00
|
|
|
|
rst.awaitReplication();
|
|
|
|
|
|
|
|
|
|
// Open a cursor on primary.
|
|
|
|
|
const cursorIdToBeReadAfterStepDown =
|
|
|
|
|
assert.commandWorked(primaryDB.runCommand({"find": collName, batchSize: 0})).cursor.id;
|
|
|
|
|
|
|
|
|
|
jsTestLog("2. Start blocking getMore cmd before step down");
|
|
|
|
|
const joinGetMoreThread = startParallelShell(() => {
|
|
|
|
|
// Open another cursor on primary before step down.
|
|
|
|
|
primaryDB = db.getSiblingDB(TestData.dbName);
|
|
|
|
|
const cursorIdToBeReadDuringStepDown =
|
|
|
|
|
assert.commandWorked(primaryDB.runCommand({"find": TestData.collName, batchSize: 0}))
|
|
|
|
|
.cursor.id;
|
|
|
|
|
|
|
|
|
|
// Enable the fail point for get more cmd.
|
|
|
|
|
assert.commandWorked(db.adminCommand(
|
|
|
|
|
{configureFailPoint: "waitAfterPinningCursorBeforeGetMoreBatch", mode: "alwaysOn"}));
|
|
|
|
|
|
|
|
|
|
getMoreRes = assert.commandWorked(primaryDB.runCommand(
|
|
|
|
|
{"getMore": cursorIdToBeReadDuringStepDown, collection: TestData.collName}));
|
2019-01-15 12:41:23 +01:00
|
|
|
|
assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);
|
2019-07-27 00:20:35 +02:00
|
|
|
|
}, primary.port);
|
2019-01-15 12:41:23 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
// Wait for getmore cmd to reach the fail point.
|
|
|
|
|
waitForCurOpByFailPoint(primaryAdmin, collNss, "waitAfterPinningCursorBeforeGetMoreBatch");
|
2019-02-07 18:30:37 +01:00
|
|
|
|
|
2019-07-27 00:20:35 +02:00
|
|
|
|
jsTestLog("2. Start blocking find cmd before step down");
|
|
|
|
|
const joinFindThread = startParallelShell(() => {
|
|
|
|
|
// Enable the fail point for find cmd.
|
|
|
|
|
assert.commandWorked(
|
|
|
|
|
db.adminCommand({configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"}));
|
|
|
|
|
|
|
|
|
|
var findRes = assert.commandWorked(
|
|
|
|
|
db.getSiblingDB(TestData.dbName).runCommand({"find": TestData.collName}));
|
|
|
|
|
assert.docEq([{_id: 0}], findRes.cursor.firstBatch);
|
|
|
|
|
}, primary.port);
|
|
|
|
|
|
|
|
|
|
// Wait for find cmd to reach the fail point.
|
|
|
|
|
waitForCurOpByFailPoint(primaryAdmin, collNss, "waitInFindBeforeMakingBatch");
|
|
|
|
|
|
|
|
|
|
jsTestLog("3. Make primary step down");
|
|
|
|
|
const joinStepDownThread = startParallelShell(() => {
|
|
|
|
|
assert.commandWorked(db.adminCommand({"replSetStepDown": 100, "force": true}));
|
|
|
|
|
}, primary.port);
|
|
|
|
|
|
|
|
|
|
// Wait until the step down has started to kill user operations.
|
|
|
|
|
checkLog.contains(primary, "Starting to kill user operations");
|
|
|
|
|
|
2019-09-24 18:50:09 +02:00
|
|
|
|
// Enable "waitAfterCommandFinishesExecution" fail point to make sure the find and get more
|
2019-07-27 00:20:35 +02:00
|
|
|
|
// commands on database 'test' does not complete before step down.
|
2019-10-16 20:05:39 +02:00
|
|
|
|
setFailPoint(primaryAdmin,
|
|
|
|
|
"waitAfterCommandFinishesExecution",
|
|
|
|
|
{ns: collNss, commands: ["find", "getMore"]});
|
2019-07-27 00:20:35 +02:00
|
|
|
|
|
|
|
|
|
jsTestLog("4. Disable fail points");
|
2019-10-16 20:05:39 +02:00
|
|
|
|
clearFailPoint(primaryAdmin, "waitInFindBeforeMakingBatch");
|
|
|
|
|
clearFailPoint(primaryAdmin, "waitAfterPinningCursorBeforeGetMoreBatch");
|
2019-07-27 00:20:35 +02:00
|
|
|
|
|
|
|
|
|
// Wait until the primary transitioned to SECONDARY state.
|
|
|
|
|
joinStepDownThread();
|
|
|
|
|
rst.waitForState(primary, ReplSetTest.State.SECONDARY);
|
|
|
|
|
|
2019-09-24 18:50:09 +02:00
|
|
|
|
// We don't want to check if we have reached "waitAfterCommandFinishesExecution" fail point
|
2019-07-27 00:20:35 +02:00
|
|
|
|
// because we already know that the primary has stepped down successfully. This implies that
|
|
|
|
|
// the find and get more commands are still running even after the node stepped down.
|
2019-10-16 20:05:39 +02:00
|
|
|
|
clearFailPoint(primaryAdmin, "waitAfterCommandFinishesExecution");
|
2019-07-27 00:20:35 +02:00
|
|
|
|
|
|
|
|
|
// Wait for find & getmore thread to join.
|
|
|
|
|
joinGetMoreThread();
|
|
|
|
|
joinFindThread();
|
|
|
|
|
|
|
|
|
|
jsTestLog("5. Start get more cmd after step down");
|
|
|
|
|
var getMoreRes = assert.commandWorked(
|
|
|
|
|
primaryDB.runCommand({"getMore": cursorIdToBeReadAfterStepDown, collection: collName}));
|
|
|
|
|
assert.docEq([{_id: 0}], getMoreRes.cursor.nextBatch);
|
|
|
|
|
|
|
|
|
|
// Validate that no operations got killed on step down and no network disconnection happened due
|
|
|
|
|
// to failed unacknowledged operations.
|
2019-10-16 20:05:39 +02:00
|
|
|
|
const replMetrics = assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
|
|
|
|
|
assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
|
|
|
|
|
assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
|
2019-07-27 00:20:35 +02:00
|
|
|
|
// Should account for find and getmore commands issued before step down.
|
2019-10-16 20:05:39 +02:00
|
|
|
|
assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
|
2020-09-09 17:24:36 +02:00
|
|
|
|
assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, 0);
|
2019-07-27 00:20:35 +02:00
|
|
|
|
|
|
|
|
|
rst.stopSet();
|
2019-01-15 12:41:23 +01:00
|
|
|
|
})();
|