mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-30 09:06:21 +01:00
SERVER-44710: Add metrics.repl.network.getmores.numEmptyBatches
This commit is contained in:
parent
31426dd86f
commit
f26fc48b09
@ -255,6 +255,12 @@ jsTestLog(`Secondary ${secondary.host} metrics before restarting replication: ${
|
|||||||
// Enable periodic noops to aid sync source selection.
|
// Enable periodic noops to aid sync source selection.
|
||||||
assert.commandWorked(primary.adminCommand({setParameter: 1, writePeriodicNoops: true}));
|
assert.commandWorked(primary.adminCommand({setParameter: 1, writePeriodicNoops: true}));
|
||||||
|
|
||||||
|
// Enable the setSmallOplogGetMoreMaxTimeMS failpoint on secondary so that it will start using
|
||||||
|
// a small awaitData timeout for oplog fetching after re-choosing the sync source. This is needed to
|
||||||
|
// make sync source return empty batches more frequently in order to test the metric
|
||||||
|
// numEmptyBatches.
|
||||||
|
configureFailPoint(secondary, 'setSmallOplogGetMoreMaxTimeMS');
|
||||||
|
|
||||||
// Repeatedly restart replication and wait for the sync source to be rechosen. If the sync source
|
// Repeatedly restart replication and wait for the sync source to be rechosen. If the sync source
|
||||||
// gets set to empty between stopping and restarting replication, then the secondary won't
|
// gets set to empty between stopping and restarting replication, then the secondary won't
|
||||||
// increment numTimesChoseSame, so we do this in a loop.
|
// increment numTimesChoseSame, so we do this in a loop.
|
||||||
@ -279,6 +285,17 @@ assert.soon(
|
|||||||
assert.gt(ssNew.numSelections, ssOld.numSelections, "num selections not incremented");
|
assert.gt(ssNew.numSelections, ssOld.numSelections, "num selections not incremented");
|
||||||
assert.gt(ssNew.numTimesChoseSame, ssOld.numTimesChoseSame, "same sync source not chosen");
|
assert.gt(ssNew.numTimesChoseSame, ssOld.numTimesChoseSame, "same sync source not chosen");
|
||||||
|
|
||||||
|
// Get the base number of empty batches after the secondary is up to date. Assert that the secondary
|
||||||
|
// eventually gets an empty batch due to awaitData timeout.
|
||||||
|
rt.awaitLastOpCommitted();
|
||||||
|
const targetNumEmptyBatches =
|
||||||
|
secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches + 1;
|
||||||
|
assert.soon(
|
||||||
|
() => secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches >=
|
||||||
|
targetNumEmptyBatches,
|
||||||
|
`Timed out waiting for numEmptyBatches reach ${targetNumEmptyBatches}, current ${
|
||||||
|
secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches}`);
|
||||||
|
|
||||||
// Stop the primary so the secondary cannot choose a sync source.
|
// Stop the primary so the secondary cannot choose a sync source.
|
||||||
ssOld = ssNew;
|
ssOld = ssNew;
|
||||||
rt.stop(primary);
|
rt.stop(primary);
|
||||||
|
@ -61,11 +61,36 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeOplogFetcherRetries);
|
|||||||
MONGO_FAIL_POINT_DEFINE(hangBeforeProcessingSuccessfulBatch);
|
MONGO_FAIL_POINT_DEFINE(hangBeforeProcessingSuccessfulBatch);
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
class OplogBatchStats {
|
||||||
|
public:
|
||||||
|
void recordMillis(int millis, bool isEmptyBatch);
|
||||||
|
BSONObj getReport() const;
|
||||||
|
operator BSONObj() const {
|
||||||
|
return getReport();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
TimerStats _getMores;
|
||||||
|
Counter64 _numEmptyBatches;
|
||||||
|
};
|
||||||
|
|
||||||
|
void OplogBatchStats::recordMillis(int millis, bool isEmptyBatch) {
|
||||||
|
_getMores.recordMillis(millis);
|
||||||
|
if (isEmptyBatch) {
|
||||||
|
_numEmptyBatches.increment();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BSONObj OplogBatchStats::getReport() const {
|
||||||
|
BSONObjBuilder b(_getMores.getReport());
|
||||||
|
b.append("numEmptyBatches", _numEmptyBatches.get());
|
||||||
|
return b.obj();
|
||||||
|
}
|
||||||
|
|
||||||
// The number and time spent reading batches off the network
|
// The number and time spent reading batches off the network
|
||||||
TimerStats getmoreReplStats;
|
OplogBatchStats oplogBatchStats;
|
||||||
ServerStatusMetricField<TimerStats> displayBatchesRecieved("repl.network.getmores",
|
ServerStatusMetricField<OplogBatchStats> displayBatchesRecieved("repl.network.getmores",
|
||||||
&getmoreReplStats);
|
&oplogBatchStats);
|
||||||
// The oplog entries read via the oplog reader
|
// The oplog entries read via the oplog reader
|
||||||
Counter64 opsReadStats;
|
Counter64 opsReadStats;
|
||||||
ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
|
ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
|
||||||
@ -876,7 +901,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
|
|||||||
opsReadStats.increment(info.networkDocumentCount);
|
opsReadStats.increment(info.networkDocumentCount);
|
||||||
networkByteStats.increment(info.networkDocumentBytes);
|
networkByteStats.increment(info.networkDocumentBytes);
|
||||||
|
|
||||||
getmoreReplStats.recordMillis(_lastBatchElapsedMS);
|
oplogBatchStats.recordMillis(_lastBatchElapsedMS, documents.empty());
|
||||||
|
|
||||||
auto status = _enqueueDocumentsFn(firstDocToApply, documents.cend(), info);
|
auto status = _enqueueDocumentsFn(firstDocToApply, documents.cend(), info);
|
||||||
if (!status.isOK()) {
|
if (!status.isOK()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user