mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
SERVER-48527 Aborting in-progress transactions on step-up should clear session state before returning
This commit is contained in:
parent
a210979713
commit
c7c78598d5
@ -0,0 +1,95 @@
|
||||
//
|
||||
// Tests that refineCollectionShardKey can be safely aborted on step-up after a failover leads to
|
||||
// only half of the internal transaction getting replicated. This is a regression test for
|
||||
// SERVER-48527.
|
||||
//
|
||||
// Tag this test as 'requires_find_command' to prevent it from running in the legacy passthrough.
|
||||
// @tags: [
|
||||
// requires_find_command,
|
||||
// ]
|
||||
//
|
||||
|
||||
(function() {
|
||||
'use strict';
|
||||
|
||||
load("jstests/libs/fail_point_util.js");
|
||||
load('jstests/libs/parallel_shell_helpers.js');
|
||||
load("jstests/replsets/rslib.js");
|
||||
|
||||
const st = new ShardingTest({
|
||||
shards: 1,
|
||||
mongos: 1,
|
||||
other: {
|
||||
configOptions: {
|
||||
setParameter: {
|
||||
// Ensure transactions have multiple oplog entries.
|
||||
maxNumberOfTransactionOperationsInSingleOplogEntry: 1,
|
||||
bgSyncOplogFetcherBatchSize: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
jsTestLog("Reconfig CSRS to have stable primary");
|
||||
const csrs = st.configRS;
|
||||
let cfg = csrs.getReplSetConfigFromNode(0);
|
||||
cfg.settings.electionTimeoutMillis = csrs.kDefaultTimeoutMS;
|
||||
cfg.settings.catchUpTimeoutMillis = 0;
|
||||
cfg.settings.chainingAllowed = false;
|
||||
reconfig(csrs, cfg, true);
|
||||
waitForConfigReplication(csrs.getPrimary());
|
||||
csrs.awaitReplication();
|
||||
|
||||
const kDbName = jsTestName();
|
||||
const kCollName = 'foo';
|
||||
const kNsName = kDbName + '.' + kCollName;
|
||||
|
||||
assert.commandWorked(st.s.adminCommand({enableSharding: kDbName}));
|
||||
assert.commandWorked(st.s.adminCommand({shardCollection: kNsName, key: {_id: 1}}));
|
||||
assert.commandWorked(st.s.getCollection(kNsName).createIndex({_id: 1, aKey: 1}));
|
||||
|
||||
let primary = csrs.getPrimary();
|
||||
let secondaries = csrs.getSecondaries();
|
||||
let newPrimary = secondaries[0];
|
||||
|
||||
jsTest.log("Stop secondary oplog replication on the extra secondary so it will vote for anyone");
|
||||
const stopReplProducerFailPoint = configureFailPoint(secondaries[1], 'stopReplProducer');
|
||||
|
||||
jsTest.log("Stop secondary oplog replication before the last operation in the transaction");
|
||||
// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before
|
||||
// applying the last operation in the transaction. This depends on the oplog fetcher batch size
|
||||
// being 1. This also relies on the last operation in the transaction modifying 'config.chunks'.
|
||||
const stopReplProducerOnDocumentFailPoint = configureFailPoint(
|
||||
newPrimary, "stopReplProducerOnDocument", {document: {"applyOps.ns": "config.chunks"}});
|
||||
|
||||
jsTestLog("Refining collection shard key in a parallel shell");
|
||||
let parallelRefineFn = function(ns) {
|
||||
assert.commandWorked(db.adminCommand({refineCollectionShardKey: ns, key: {_id: 1, aKey: 1}}));
|
||||
};
|
||||
const awaitShell = startParallelShell(funWithArgs(parallelRefineFn, kNsName), st.s.port);
|
||||
|
||||
jsTestLog("Wait for the new primary to block on fail point");
|
||||
stopReplProducerOnDocumentFailPoint.wait();
|
||||
|
||||
jsTestLog(`Triggering CSRS failover from ${primary.host} to ${newPrimary.host}`);
|
||||
assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1}));
|
||||
|
||||
jsTestLog("Waiting for set to agree on the new primary, " + newPrimary.host);
|
||||
csrs.awaitNodesAgreeOnPrimary();
|
||||
|
||||
jsTestLog("Wait for parallel shell to complete");
|
||||
awaitShell();
|
||||
|
||||
// Make sure we won't apply the whole transaction by any chance.
|
||||
jsTestLog("Wait for the new primary to stop replication after primary catch-up");
|
||||
checkLog.contains(newPrimary, "Stopping replication producer");
|
||||
|
||||
jsTestLog("Enable replication on the new primary so that it can finish state transition");
|
||||
stopReplProducerOnDocumentFailPoint.off();
|
||||
assert.eq(csrs.getPrimary(), newPrimary);
|
||||
|
||||
jsTestLog("Re-enable replication on the extra secondary so it can catch up");
|
||||
stopReplProducerFailPoint.off();
|
||||
csrs.awaitReplication();
|
||||
|
||||
st.stop();
|
||||
})();
|
@ -406,6 +406,19 @@ public:
|
||||
_inMultiDocumentTransaction = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears metadata associated with a multi-document transaction.
|
||||
*/
|
||||
void resetMultiDocumentTransactionState() {
|
||||
invariant(_inMultiDocumentTransaction);
|
||||
invariant(!_writeUnitOfWork);
|
||||
invariant(_ruState == WriteUnitOfWork::RecoveryUnitState::kNotInUnitOfWork);
|
||||
_inMultiDocumentTransaction = false;
|
||||
_isStartingMultiDocumentTransaction = false;
|
||||
_lsid = boost::none;
|
||||
_txnNumber = boost::none;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this operation is starting a multi-document transaction.
|
||||
*/
|
||||
|
@ -212,6 +212,7 @@ void abortInProgressTransactions(OperationContext* opCtx) {
|
||||
"sessionId"_attr = txnRecord.getSessionId().toBSON(),
|
||||
"txnNumber"_attr = txnRecord.getTxnNum());
|
||||
txnParticipant.abortTransaction(opCtx);
|
||||
opCtx->resetMultiDocumentTransactionState();
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user