0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 00:56:44 +01:00

SERVER-44710: Add metrics.repl.network.getmores.numEmptyBatches

This commit is contained in:
Lingzhi Deng 2020-02-19 18:12:23 -05:00 committed by Evergreen Agent
parent 31426dd86f
commit f26fc48b09
2 changed files with 46 additions and 4 deletions

View File

@ -255,6 +255,12 @@ jsTestLog(`Secondary ${secondary.host} metrics before restarting replication: ${
// Enable periodic noops to aid sync source selection.
assert.commandWorked(primary.adminCommand({setParameter: 1, writePeriodicNoops: true}));
// Enable the setSmallOplogGetMoreMaxTimeMS failpoint on secondary so that it will start using
// a small awaitData timeout for oplog fetching after re-choosing the sync source. This is needed to
// make sync source return empty batches more frequently in order to test the metric
// numEmptyBatches.
configureFailPoint(secondary, 'setSmallOplogGetMoreMaxTimeMS');
// Repeatedly restart replication and wait for the sync source to be rechosen. If the sync source
// gets set to empty between stopping and restarting replication, then the secondary won't
// increment numTimesChoseSame, so we do this in a loop.
@ -279,6 +285,17 @@ assert.soon(
assert.gt(ssNew.numSelections, ssOld.numSelections, "num selections not incremented");
assert.gt(ssNew.numTimesChoseSame, ssOld.numTimesChoseSame, "same sync source not chosen");
// Get the base number of empty batches after the secondary is up to date. Assert that the secondary
// eventually gets an empty batch due to awaitData timeout.
rt.awaitLastOpCommitted();
const targetNumEmptyBatches =
secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches + 1;
assert.soon(
() => secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches >=
targetNumEmptyBatches,
`Timed out waiting for numEmptyBatches reach ${targetNumEmptyBatches}, current ${
secondary.getDB("test").serverStatus().metrics.repl.network.getmores.numEmptyBatches}`);
// Stop the primary so the secondary cannot choose a sync source.
ssOld = ssNew;
rt.stop(primary);

View File

@ -61,11 +61,36 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeOplogFetcherRetries);
MONGO_FAIL_POINT_DEFINE(hangBeforeProcessingSuccessfulBatch);
namespace {
class OplogBatchStats {
public:
void recordMillis(int millis, bool isEmptyBatch);
BSONObj getReport() const;
operator BSONObj() const {
return getReport();
}
private:
TimerStats _getMores;
Counter64 _numEmptyBatches;
};
void OplogBatchStats::recordMillis(int millis, bool isEmptyBatch) {
_getMores.recordMillis(millis);
if (isEmptyBatch) {
_numEmptyBatches.increment();
}
}
BSONObj OplogBatchStats::getReport() const {
BSONObjBuilder b(_getMores.getReport());
b.append("numEmptyBatches", _numEmptyBatches.get());
return b.obj();
}
// The number and time spent reading batches off the network
TimerStats getmoreReplStats;
ServerStatusMetricField<TimerStats> displayBatchesRecieved("repl.network.getmores",
&getmoreReplStats);
OplogBatchStats oplogBatchStats;
ServerStatusMetricField<OplogBatchStats> displayBatchesRecieved("repl.network.getmores",
&oplogBatchStats);
// The oplog entries read via the oplog reader
Counter64 opsReadStats;
ServerStatusMetricField<Counter64> displayOpsRead("repl.network.ops", &opsReadStats);
@ -876,7 +901,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
opsReadStats.increment(info.networkDocumentCount);
networkByteStats.increment(info.networkDocumentBytes);
getmoreReplStats.recordMillis(_lastBatchElapsedMS);
oplogBatchStats.recordMillis(_lastBatchElapsedMS, documents.empty());
auto status = _enqueueDocumentsFn(firstDocToApply, documents.cend(), info);
if (!status.isOK()) {