From 82f5da4e8eb7b6832121ef0cbe2b83ed7fd3b5a2 Mon Sep 17 00:00:00 2001 From: sl-mongodb Date: Mon, 18 Nov 2024 11:19:37 -0500 Subject: [PATCH] SERVER-95422: restructured totalOplogSlotDurationMicros calculation (#29229) GitOrigin-RevId: abff1fd21fa13a3231edc71f8572810384b91540 --- src/mongo/db/BUILD.bazel | 1 + src/mongo/db/catalog/BUILD.bazel | 1 - src/mongo/db/catalog/local_oplog_info.cpp | 36 +++++++++++------------ src/mongo/db/catalog/local_oplog_info.h | 9 ++++++ src/mongo/db/curop.cpp | 16 ++++++---- src/mongo/db/curop.h | 5 ---- src/mongo/db/storage/BUILD.bazel | 1 - 7 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/mongo/db/BUILD.bazel b/src/mongo/db/BUILD.bazel index 55e85343a80..bd6c74a11d5 100644 --- a/src/mongo/db/BUILD.bazel +++ b/src/mongo/db/BUILD.bazel @@ -913,6 +913,7 @@ mongo_cc_library( "//src/mongo/db/auth:authprivilege", # TODO(SERVER-93876): Remove. "//src/mongo/db/auth:user_acquisition_stats", "//src/mongo/db/catalog:collection_options", # TODO(SERVER-93876): Remove. + "//src/mongo/db/catalog:local_oplog_info", "//src/mongo/db/commands:create_command", # TODO(SERVER-93876): Remove. "//src/mongo/db/commands:server_status_core", # TODO(SERVER-93876): Remove. "//src/mongo/db/commands:test_commands_enabled", # TODO(SERVER-93876): Remove. diff --git a/src/mongo/db/catalog/BUILD.bazel b/src/mongo/db/catalog/BUILD.bazel index 5b963c65d63..93ad06be2db 100644 --- a/src/mongo/db/catalog/BUILD.bazel +++ b/src/mongo/db/catalog/BUILD.bazel @@ -86,7 +86,6 @@ mongo_cc_library( ], deps = [ "//src/mongo/db:server_base", # TODO(SERVER-93876): Remove. - "//src/mongo/db:shard_role_api", # TODO(SERVER-93876): Remove. "//src/mongo/db:vector_clock_mutable", "//src/mongo/db/repl:optime", "//src/mongo/db/repl:repl_coordinator_interface", # TODO(SERVER-93876): Remove. diff --git a/src/mongo/db/catalog/local_oplog_info.cpp b/src/mongo/db/catalog/local_oplog_info.cpp index f3f116b474b..28bd1e9a24c 100644 --- a/src/mongo/db/catalog/local_oplog_info.cpp +++ b/src/mongo/db/catalog/local_oplog_info.cpp @@ -36,7 +36,6 @@ #include #include -#include "mongo/db/curop.h" #include "mongo/db/logical_time.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/optime.h" @@ -154,7 +153,8 @@ std::vector LocalOplogInfo::getNextOpTimes(OperationContext* opCtx, s // If we abort a transaction that has reserved an optime, we should make sure to update the // stable timestamp if necessary, since this oplog hole may have been holding back the stable // timestamp. - shard_role_details::getRecoveryUnit(opCtx)->onRollback([replCoord, + shard_role_details::getRecoveryUnit(opCtx)->onRollback([this, + replCoord, oplogSlotDurationTimer, isFirstOpTime, prevAssertOnLockAttempt, @@ -162,8 +162,7 @@ std::vector LocalOplogInfo::getNextOpTimes(OperationContext* opCtx, s OperationContext* opCtx) { replCoord->attemptToAdvanceStableTimestamp(); // Sum the oplog slot durations. An operation may participate in multiple transactions. - CurOp::get(opCtx)->debug().totalOplogSlotDurationMicros += - Microseconds(oplogSlotDurationTimer.elapsed()); + _totalOplogSlotDurationMicros += Microseconds(oplogSlotDurationTimer.elapsed()); // Only reset these properties when the first slot is released. if (isFirstOpTime) { @@ -172,21 +171,22 @@ std::vector LocalOplogInfo::getNextOpTimes(OperationContext* opCtx, s } }); - shard_role_details::getRecoveryUnit(opCtx)->onCommit( - [oplogSlotDurationTimer, isFirstOpTime, prevAssertOnLockAttempt, prevRuBlockingAllowed]( - OperationContext* opCtx, boost::optional) { - // Sum the oplog slot durations. An operation may participate in multiple transactions. - CurOp::get(opCtx)->debug().totalOplogSlotDurationMicros += - Microseconds(oplogSlotDurationTimer.elapsed()); + shard_role_details::getRecoveryUnit(opCtx)->onCommit([this, + oplogSlotDurationTimer, + isFirstOpTime, + prevAssertOnLockAttempt, + prevRuBlockingAllowed]( + OperationContext* opCtx, + boost::optional) { + // Sum the oplog slot durations. An operation may participate in multiple transactions. + _totalOplogSlotDurationMicros += Microseconds(oplogSlotDurationTimer.elapsed()); - // Only reset these properties when the first slot is released. - if (isFirstOpTime) { - shard_role_details::getLocker(opCtx)->setAssertOnLockAttempt( - prevAssertOnLockAttempt); - shard_role_details::getRecoveryUnit(opCtx)->setBlockingAllowed( - prevRuBlockingAllowed); - } - }); + // Only reset these properties when the first slot is released. + if (isFirstOpTime) { + shard_role_details::getLocker(opCtx)->setAssertOnLockAttempt(prevAssertOnLockAttempt); + shard_role_details::getRecoveryUnit(opCtx)->setBlockingAllowed(prevRuBlockingAllowed); + } + }); return oplogSlots; } diff --git a/src/mongo/db/catalog/local_oplog_info.h b/src/mongo/db/catalog/local_oplog_info.h index 3b53fc1a64b..94ebd05fb64 100644 --- a/src/mongo/db/catalog/local_oplog_info.h +++ b/src/mongo/db/catalog/local_oplog_info.h @@ -58,6 +58,9 @@ public: RecordStore* getRecordStore() const; void setRecordStore(RecordStore* rs); void resetRecordStore(); + auto getTotalOplogSlotDurationMicros() const { + return _totalOplogSlotDurationMicros; + } /** * Sets the global Timestamp to be 'newTime'. @@ -76,6 +79,12 @@ private: // destroyed. See "oplogCheckCloseDatabase". RecordStore* _rs = nullptr; + // Stores the total time an operation spends with an uncommitted oplog slot held open. Indicator + // that an operation is holding back replication by causing oplog holes to remain open for + // unusual amounts of time. + Microseconds _totalOplogSlotDurationMicros{0}; + + // Synchronizes the section where a new Timestamp is generated and when it is registered in the // storage engine. mutable stdx::mutex _newOpMutex; diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index f120995cf40..b6dd32368a4 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -53,6 +53,7 @@ #include "mongo/db/admission/execution_admission_context.h" #include "mongo/db/admission/ingress_admission_context.h" #include "mongo/db/auth/user_name.h" +#include "mongo/db/catalog/local_oplog_info.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/commands/server_status_metric.h" @@ -1165,7 +1166,9 @@ void OpDebug::report(OperationContext* opCtx, pAttrs->add("placementVersionRefreshDuration", placementVersionRefreshMillis); } - if (totalOplogSlotDurationMicros > Microseconds::zero()) { + if (const auto& totalOplogSlotDurationMicros = + LocalOplogInfo::get(opCtx)->getTotalOplogSlotDurationMicros(); + totalOplogSlotDurationMicros > Microseconds::zero()) { pAttrs->add("totalOplogSlotDuration", totalOplogSlotDurationMicros); } @@ -1550,7 +1553,9 @@ void OpDebug::append(OperationContext* opCtx, OPDEBUG_APPEND_OPTIONAL(b, "estimatedCardinality", estimatedCardinality); - if (totalOplogSlotDurationMicros > Microseconds::zero()) { + if (const auto& totalOplogSlotDurationMicros = + LocalOplogInfo::get(opCtx)->getTotalOplogSlotDurationMicros(); + totalOplogSlotDurationMicros > Microseconds::zero()) { b.appendNumber("totalOplogSlotDurationMicros", durationCount(totalOplogSlotDurationMicros)); } @@ -1900,9 +1905,10 @@ std::function OpDebug::appendStaged(StringSet requ }); addIfNeeded("totalOplogSlotDurationMicros", [](auto field, auto args, auto& b) { - if (args.op.totalOplogSlotDurationMicros > Nanoseconds::zero()) { - b.appendNumber(field, - durationCount(args.op.totalOplogSlotDurationMicros)); + if (const auto& totalOplogSlotDurationMicros = + LocalOplogInfo::get(args.opCtx)->getTotalOplogSlotDurationMicros(); + totalOplogSlotDurationMicros > Nanoseconds::zero()) { + b.appendNumber(field, durationCount(totalOplogSlotDurationMicros)); } }); diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index bf31d25bdff..638b5873094 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -506,11 +506,6 @@ public: // Stores the duration of execution after removing time spent blocked. Milliseconds workingTimeMillis{0}; - // Stores the total time an operation spends with an uncommitted oplog slot held open. Indicator - // that an operation is holding back replication by causing oplog holes to remain open for - // unusual amounts of time. - Microseconds totalOplogSlotDurationMicros{0}; - // Stores the amount of the data processed by the throttle cursors in MB/sec. boost::optional dataThroughputLastSecond; boost::optional dataThroughputAverage; diff --git a/src/mongo/db/storage/BUILD.bazel b/src/mongo/db/storage/BUILD.bazel index f4d3f6e642a..c7d57545a9c 100644 --- a/src/mongo/db/storage/BUILD.bazel +++ b/src/mongo/db/storage/BUILD.bazel @@ -204,7 +204,6 @@ mongo_cc_library( ":flow_control_parameters", "//src/mongo/db:server_base", "//src/mongo/db:service_context", # TODO(SERVER-93876): Remove. - "//src/mongo/db:shard_role_api", "//src/mongo/db/commands:server_status_core", "//src/mongo/db/concurrency:flow_control_ticketholder", "//src/mongo/util:background_job",