0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 01:21:03 +01:00

SERVER-41881 Unstashing the transaction lock resources should ignore the saved value of maxLockTimeout and explicitly set the maxLockTimeout based on node's state.

SERVER-41883 Replication state transition reacquires locks and tickets of a prepared transaction with no lock timeout.
    SERVER-41556 Handles failure to reacquire locks and ticket when unstashing transactions.
This commit is contained in:
Suganthi Mani 2019-07-24 14:11:58 -04:00
parent f5f62f877b
commit 2ff54098b1
4 changed files with 138 additions and 16 deletions

View File

@ -86,8 +86,9 @@
}
// Transaction should already be aborted.
assert.commandFailedWithCode(session.abortTransaction_forTesting(),
ErrorCodes.NoSuchTransaction);
let res = assert.commandFailedWithCode(session.abortTransaction_forTesting(),
ErrorCodes.NoSuchTransaction);
assert(res.errmsg.match(/Transaction .* has been aborted/), res.errmsg);
session.endSession();
rst.stopSet();

View File

@ -83,6 +83,8 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeReleasingTransactionOplogHole);
MONGO_FAIL_POINT_DEFINE(skipCommitTxnCheckPrepareMajorityCommitted);
MONGO_FAIL_POINT_DEFINE(restoreLocksFail);
const auto getTransactionParticipant = Session::declareDecoration<TransactionParticipant>();
// The command names that are allowed in a prepared transaction.
@ -701,7 +703,7 @@ TransactionParticipant::TxnResources::TxnResources(WithLock wl,
}
// On secondaries, max lock timeout must not be set.
invariant(stashStyle != StashStyle::kSecondary || !opCtx->lockState()->hasMaxLockTimeout());
invariant(!(stashStyle == StashStyle::kSecondary && opCtx->lockState()->hasMaxLockTimeout()));
_recoveryUnit = opCtx->releaseRecoveryUnit();
opCtx->setRecoveryUnit(std::unique_ptr<RecoveryUnit>(
@ -727,19 +729,42 @@ TransactionParticipant::TxnResources::~TxnResources() {
void TransactionParticipant::TxnResources::release(OperationContext* opCtx) {
// Perform operations that can fail the release before marking the TxnResources as released.
auto onError = makeGuard([&] {
// Release any locks acquired as part of lock restoration.
if (_lockSnapshot) {
// WUOW should be released before unlocking.
Locker::WUOWLockSnapshot dummyWUOWLockInfo;
_locker->releaseWriteUnitOfWork(&dummyWUOWLockInfo);
Locker::LockSnapshot dummyLockInfo;
_locker->saveLockStateAndUnlock(&dummyLockInfo);
}
// Release the ticket if acquired.
// restoreWriteUnitOfWorkAndLock() can reacquire the ticket as well.
if (_locker->getClientState() != Locker::ClientState::kInactive) {
_locker->releaseTicket();
}
});
// Restore locks if they are yielded.
if (_lockSnapshot) {
invariant(!_locker->isLocked());
// opCtx is passed in to enable the restoration to be interrupted.
_locker->restoreWriteUnitOfWorkAndLock(opCtx, *_lockSnapshot);
_lockSnapshot.reset(nullptr);
}
_locker->reacquireTicket(opCtx);
if (MONGO_FAIL_POINT(restoreLocksFail)) {
uasserted(ErrorCodes::LockTimeout, str::stream() << "Lock restore failed due to failpoint");
}
invariant(!_released);
_released = true;
// Successfully reacquired the locks and tickets.
onError.dismiss();
_lockSnapshot.reset(nullptr);
// It is necessary to lock the client to change the Locker on the OperationContext.
stdx::lock_guard<Client> lk(*opCtx->getClient());
invariant(opCtx->lockState()->getClientState() == Locker::ClientState::kInactive);
@ -844,7 +869,7 @@ void TransactionParticipant::Participant::resetRetryableWriteState(OperationCont
}
void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx(
OperationContext* opCtx) {
OperationContext* opCtx, MaxLockTimeout maxLockTimeout) {
// Transaction resources already exist for this transaction. Transfer them from the
// stash to the operation context.
//
@ -852,14 +877,41 @@ void TransactionParticipant::Participant::_releaseTransactionResourcesToOpCtx(
// must hold the Client clock to mutate txnResourceStash, we jump through some hoops here to
// move the TxnResources in txnResourceStash into a local variable that can be manipulated
// without holding the Client lock.
[&]() noexcept {
auto tempTxnResourceStash = [&]() noexcept {
using std::swap;
boost::optional<TxnResources> trs;
stdx::lock_guard<Client> lk(*opCtx->getClient());
swap(trs, o(lk).txnResourceStash);
return std::move(*trs);
return trs;
}
().release(opCtx);
();
auto releaseOnError = makeGuard([&] {
// Restore the lock resources back to transaction participant.
using std::swap;
stdx::lock_guard<Client> lk(*opCtx->getClient());
swap(o(lk).txnResourceStash, tempTxnResourceStash);
});
invariant(tempTxnResourceStash);
auto stashLocker = tempTxnResourceStash->locker();
invariant(stashLocker);
if (maxLockTimeout == MaxLockTimeout::kNotAllowed) {
stashLocker->unsetMaxLockTimeout();
} else {
// If maxTransactionLockRequestTimeoutMillis is set, then we will ensure no
// future lock request waits longer than maxTransactionLockRequestTimeoutMillis
// to acquire a lock. This is to avoid deadlocks and minimize non-transaction
// operation performance degradations.
auto maxTransactionLockMillis = gMaxTransactionLockRequestTimeoutMillis.load();
if (maxTransactionLockMillis >= 0) {
stashLocker->setMaxLockTimeout(Milliseconds(maxTransactionLockMillis));
}
}
tempTxnResourceStash->release(opCtx);
releaseOnError.dismiss();
}
void TransactionParticipant::Participant::unstashTransactionResources(OperationContext* opCtx,
@ -875,7 +927,14 @@ void TransactionParticipant::Participant::unstashTransactionResources(OperationC
_checkIsCommandValidWithTxnState(*opCtx->getTxnNumber(), cmdName);
if (o().txnResourceStash) {
_releaseTransactionResourcesToOpCtx(opCtx);
MaxLockTimeout maxLockTimeout;
// Max lock timeout must not be set on secondaries, since secondary oplog application cannot
// fail. And, primaries should respect the transaction lock timeout, since it can prevent
// the transaction from making progress.
maxLockTimeout =
opCtx->writesAreReplicated() ? MaxLockTimeout::kAllowed : MaxLockTimeout::kNotAllowed;
_releaseTransactionResourcesToOpCtx(opCtx, maxLockTimeout);
stdx::lock_guard<Client> lg(*opCtx->getClient());
o(lg).transactionMetricsObserver.onUnstash(ServerTransactionsMetrics::get(opCtx),
opCtx->getServiceContext()->getTickSource());
@ -948,12 +1007,13 @@ void TransactionParticipant::Participant::refreshLocksForPreparedTransaction(
invariant(!opCtx->lockState()->isRSTLLocked());
invariant(!opCtx->lockState()->isLocked());
// The node must have txn resource.
invariant(o().txnResourceStash);
invariant(o().txnState.isPrepared());
_releaseTransactionResourcesToOpCtx(opCtx);
// Lock and Ticket reacquisition of a prepared transaction should not fail for
// state transitions (step up/step down).
_releaseTransactionResourcesToOpCtx(opCtx, MaxLockTimeout::kNotAllowed);
// Snapshot transactions don't conflict with PBWM lock on both primary and secondary.
invariant(!opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());

View File

@ -202,9 +202,9 @@ public:
TxnResources& operator=(TxnResources&&) = default;
/**
* Returns a const pointer to the stashed lock state, or nullptr if no stashed locks exist.
* Returns a pointer to the stashed lock state, or nullptr if no stashed locks exist.
*/
const Locker* locker() const {
Locker* locker() const {
return _locker.get();
}
@ -357,6 +357,9 @@ public:
*/
class Participant : public Observer {
public:
// Indicates whether the future lock requests should have timeouts.
enum class MaxLockTimeout { kNotAllowed, kAllowed };
explicit Participant(OperationContext* opCtx);
explicit Participant(const SessionToKill& session);
@ -770,9 +773,22 @@ public:
// invalidating a transaction, or starting a new transaction.
void _resetTransactionState(WithLock wl, TransactionState::StateFlag state);
// Releases the resources held in *o().txnResources to the operation context.
// o().txnResources must be engaged prior to calling this.
void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx);
/* Releases the resources held in *o().txnResources to the operation context.
* o().txnResources must be engaged prior to calling this.
*
* maxLockTimeout will determine whether future lock requests should have lock timeouts.
* - MaxLockTimeout::kNotAllowed will clear the lock timeout.
* - MaxLockTimeout::kAllowed will set the timeout as
* MaxTransactionLockRequestTimeoutMillis.
*
* ------------------------------------------------------------------
* | | PRIMARY | SECONDARY | STATE TRANSITION |
* |----------------|------------|---------------|------------------|
* |maxLockTimeout | kAllowed | kNotAllowed | kNotAllowed |
* ------------------------------------------------------------------
*/
void _releaseTransactionResourcesToOpCtx(OperationContext* opCtx,
MaxLockTimeout maxLockTimeout);
TransactionParticipant::PrivateState& p() {
return _tp->_p;

View File

@ -769,6 +769,51 @@ TEST_F(TxnParticipantTest, ThrowDuringOnTransactionPrepareAbortsTransaction) {
ASSERT(txnParticipant.transactionIsAborted());
}
TEST_F(TxnParticipantTest, UnstashFailsShouldLeaveTxnResourceStashUnchanged) {
auto sessionCheckout = checkOutSession();
auto txnParticipant = TransactionParticipant::get(opCtx());
txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction");
ASSERT_TRUE(opCtx()->lockState()->isLocked());
// Simulate the locking of an insert.
{
Lock::DBLock dbLock(opCtx(), "test", MODE_IX);
Lock::CollectionLock collLock(opCtx(), NamespaceString("test.foo"), MODE_IX);
}
auto prepareTimestamp = txnParticipant.prepareTransaction(opCtx(), {});
// Simulate a secondary style lock stashing such that the locks are yielded.
{
repl::UnreplicatedWritesBlock uwb(opCtx());
opCtx()->lockState()->unsetMaxLockTimeout();
txnParticipant.stashTransactionResources(opCtx());
}
ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked());
// Enable fail point.
getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::alwaysOn);
ASSERT_THROWS_CODE(txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction"),
AssertionException,
ErrorCodes::LockTimeout);
// Above unstash attempt fail should leave the txnResourceStash unchanged.
ASSERT_FALSE(txnParticipant.getTxnResourceStashLockerForTest()->isLocked());
// Disable fail point.
getGlobalFailPointRegistry()->getFailPoint("restoreLocksFail")->setMode(FailPoint::off);
// Should be successfully able to perform lock restore.
txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction");
ASSERT_TRUE(opCtx()->lockState()->isLocked());
// Commit the transaction to release the locks.
txnParticipant.commitPreparedTransaction(opCtx(), prepareTimestamp, boost::none);
ASSERT_TRUE(txnParticipant.transactionIsCommitted());
}
TEST_F(TxnParticipantTest, StepDownAfterPrepareDoesNotBlock) {
auto sessionCheckout = checkOutSession();
auto txnParticipant = TransactionParticipant::get(opCtx());