From d3b36587bc2222fdefa1c755bb253f78c894496c Mon Sep 17 00:00:00 2001 From: Max Hirschhorn Date: Mon, 18 Oct 2021 22:07:04 +0000 Subject: [PATCH] SERVER-60774 Check every op in ReshardingDonorOplogIterator for final. Changes the ReshardingDonorOplogIterator to not assume the reshardFinalOp entry will always be the last document in resharding's oplog buffer collection. The ReshardingOplogFetcher may end up inserting no-op reshardProgressMark entries after the reshardFinalOp entry upon resuming from a primary failover. --- .../resharding_donor_oplog_iterator.cpp | 10 ++++++ .../resharding_donor_oplog_iterator_test.cpp | 35 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp index 17a5d8fb966..3989cc1beb8 100644 --- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp @@ -188,6 +188,16 @@ std::vector ReshardingDonorOplogIterator::_fillBatch(Pipeline& } numBytes += obj.objsize(); + + // The ReshardingOplogFetcher may end up inserting no-op reshardProgressMark entries *after* + // the reshardFinalOp entry. This can happen when the ReshardingOplogFetcher resumes after + // a primary failover and it had already written the reshardFinalOp entry. We check each + // oplog entry for being the reshardFinalOp and halt filling the batch so that an empty + // batch is returned to ReshardingOplogApplier to signal all oplog entries from the donor + // shard were applied. + if (isFinalOplog(entry)) { + break; + } } while (numBytes < resharding::gReshardingOplogBatchLimitBytes.load() && batch.size() < std::size_t(resharding::gReshardingOplogBatchLimitOperations.load())); diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp index e8bfafde493..b5e1b2e3212 100644 --- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp @@ -503,5 +503,40 @@ TEST_F(ReshardingDonorOplogIterTest, BatchIncludesProgressMarkEntries) { ASSERT_TRUE(next.empty()); } +TEST_F(ReshardingDonorOplogIterTest, IgnoresProgressMarkEntriesAfterFinalOp) { + RAIIServerParameterControllerForTest controller{"reshardingOplogBatchLimitOperations", 100}; + + const auto oplog1 = makeInsertOplog(Timestamp(2, 4), BSON("x" << 1)); + const auto progressMarkOplog1 = makeProgressMarkOplogEntry(Timestamp(15, 3)); + const auto finalOplog = makeFinalOplog(Timestamp(43, 24)); + // reshardProgressMark entries inserted after the reshardFinalOp entry should be ignored. + const auto progressMarkOplog2 = makeProgressMarkOplogEntry(Timestamp(65, 2)); + const auto progressMarkOplog3 = makeProgressMarkOplogEntry(Timestamp(65, 3)); + const auto progressMarkOplog4 = makeProgressMarkOplogEntry(Timestamp(65, 4)); + + DBDirectClient client(operationContext()); + const auto ns = oplogNss().ns(); + client.insert(ns, oplog1.toBSON()); + client.insert(ns, progressMarkOplog1.toBSON()); + client.insert(ns, finalOplog.toBSON()); + client.insert(ns, progressMarkOplog2.toBSON()); + client.insert(ns, progressMarkOplog3.toBSON()); + client.insert(ns, progressMarkOplog4.toBSON()); + + ReshardingDonorOplogIterator iter(oplogNss(), kResumeFromBeginning, &onInsertAlwaysReady); + auto executor = makeTaskExecutorForIterator(); + auto factory = makeCancelableOpCtx(); + auto altClient = makeKillableClient(); + AlternativeClientRegion acr(altClient); + + auto next = getNextBatch(&iter, executor, factory); + ASSERT_EQ(next.size(), 2U); + ASSERT_BSONOBJ_EQ(getId(oplog1), getId(next[0])); + ASSERT_BSONOBJ_EQ(getId(progressMarkOplog1), getId(next[1])); + + next = getNextBatch(&iter, executor, factory); + ASSERT_TRUE(next.empty()); +} + } // anonymous namespace } // namespace mongo