0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-36099 Trim FTDC connection pool stats

This commit is contained in:
A. Jesse Jiryu Davis 2019-05-16 14:48:19 -04:00
parent db55694f28
commit f30e70b5d1
13 changed files with 147 additions and 35 deletions

View File

@ -35,6 +35,8 @@ function verifyGetDiagnosticData(adminDb) {
assert(foundGoodDocument,
"getDiagnosticData failed to return a non-empty command, is FTDC running?");
return data;
}
/**

View File

@ -0,0 +1,36 @@
/**
* The FTDC connection pool stats from mongos are a different structure than the connPoolStats
* command, verify its contents.
*
* @tags: [requires_sharding]
*/
load('jstests/libs/ftdc.js');
(function() {
'use strict';
const testPath = MongoRunner.toRealPath('ftdc_dir');
const st = new ShardingTest({
shards: 2,
mongos: {
s0: {setParameter: {diagnosticDataCollectionDirectoryPath: testPath}},
}
});
const admin = st.s0.getDB('admin');
const stats = verifyGetDiagnosticData(admin).connPoolStats;
jsTestLog(`Diagnostic connection pool stats: ${tojson(stats)}`);
assert(stats.hasOwnProperty('totalInUse'));
assert(stats.hasOwnProperty('totalAvailable'));
assert(stats.hasOwnProperty('totalCreated'));
assert(stats.hasOwnProperty('totalRefreshing'));
// The connPoolStats command reply has "hosts", but FTDC's stats do not.
assert(!stats.hasOwnProperty('hosts'));
// Check a few properties, without attempting to be thorough.
assert(stats.connectionsInUsePerPool.hasOwnProperty('NetworkInterfaceTL-ShardRegistry'));
assert(stats.replicaSetPingTimesMillis.hasOwnProperty(st.configRS.name));
st.stop();
})();

View File

@ -157,6 +157,15 @@ struct HostNotIn {
const std::set<HostAndPort>& _hosts;
};
int32_t pingTimeMillis(const Node& node) {
auto latencyMillis = node.latencyMicros / 1000;
if (latencyMillis > numeric_limits<int32_t>::max()) {
// In particular, Node::unknownLatency does not fit in an int32.
return numeric_limits<int32_t>::max();
}
return latencyMillis;
}
/**
* Replica set refresh period on the task executor.
*/
@ -397,12 +406,21 @@ ReplicaSetChangeNotifier& ReplicaSetMonitor::getNotifier() {
}
// TODO move to correct order with non-statics before pushing
void ReplicaSetMonitor::appendInfo(BSONObjBuilder& bsonObjBuilder) const {
void ReplicaSetMonitor::appendInfo(BSONObjBuilder& bsonObjBuilder, bool forFTDC) const {
stdx::lock_guard<stdx::mutex> lk(_state->mutex);
BSONObjBuilder monitorInfo(bsonObjBuilder.subobjStart(getName()));
if (forFTDC) {
for (size_t i = 0; i < _state->nodes.size(); i++) {
const Node& node = _state->nodes[i];
monitorInfo.appendNumber(node.host.toString(), pingTimeMillis(node));
}
return;
}
// NOTE: the format here must be consistent for backwards compatibility
BSONArrayBuilder hosts(bsonObjBuilder.subarrayStart("hosts"));
for (unsigned i = 0; i < _state->nodes.size(); i++) {
BSONArrayBuilder hosts(monitorInfo.subarrayStart("hosts"));
for (size_t i = 0; i < _state->nodes.size(); i++) {
const Node& node = _state->nodes[i];
BSONObjBuilder builder;
@ -411,15 +429,7 @@ void ReplicaSetMonitor::appendInfo(BSONObjBuilder& bsonObjBuilder) const {
builder.append("ismaster", node.isMaster); // intentionally not camelCase
builder.append("hidden", false); // we don't keep hidden nodes in the set
builder.append("secondary", node.isUp && !node.isMaster);
int32_t pingTimeMillis = 0;
if (node.latencyMicros / 1000 > numeric_limits<int32_t>::max()) {
// In particular, Node::unknownLatency does not fit in an int32.
pingTimeMillis = numeric_limits<int32_t>::max();
} else {
pingTimeMillis = node.latencyMicros / 1000;
}
builder.append("pingTimeMillis", pingTimeMillis);
builder.append("pingTimeMillis", pingTimeMillis(node));
if (!node.tags.isEmpty()) {
builder.append("tags", node.tags);
@ -427,7 +437,6 @@ void ReplicaSetMonitor::appendInfo(BSONObjBuilder& bsonObjBuilder) const {
hosts.append(builder.obj());
}
hosts.done();
}
void ReplicaSetMonitor::shutdown() {

View File

@ -155,9 +155,10 @@ public:
bool contains(const HostAndPort& server) const;
/**
* Writes information about our cached view of the set to a BSONObjBuilder.
* Writes information about our cached view of the set to a BSONObjBuilder. If
* forFTDC, trim to minimize its size for full-time diagnostic data capture.
*/
void appendInfo(BSONObjBuilder& b) const;
void appendInfo(BSONObjBuilder& b, bool forFTDC = false) const;
/**
* Returns true if the monitor knows a usable primary from it's interal view.

View File

@ -203,19 +203,22 @@ void ReplicaSetMonitorManager::removeAllMonitors() {
}
}
void ReplicaSetMonitorManager::report(BSONObjBuilder* builder) {
void ReplicaSetMonitorManager::report(BSONObjBuilder* builder, bool forFTDC) {
// Don't hold _mutex the whole time to avoid ever taking a monitor's mutex while holding the
// manager's mutex. Otherwise we could get a deadlock between the manager's, monitor's, and
// ShardRegistry's mutex due to the ReplicaSetMonitor's AsynchronousConfigChangeHook potentially
// calling ShardRegistry::updateConfigServerConnectionString.
auto setNames = getAllSetNames();
BSONObjBuilder setStats(
builder->subobjStart(forFTDC ? "replicaSetPingTimesMillis" : "replicaSets"));
for (const auto& setName : setNames) {
auto monitor = getMonitor(setName);
if (!monitor) {
continue;
}
BSONObjBuilder monitorInfo(builder->subobjStart(setName));
monitor->appendInfo(monitorInfo);
monitor->appendInfo(setStats, forFTDC);
}
}

View File

@ -86,9 +86,10 @@ public:
void shutdown();
/**
* Reports information about the replica sets tracked by us, for diagnostic purposes.
* Reports information about the replica sets tracked by us, for diagnostic purposes. If
* forFTDC, trim to minimize its size for full-time diagnostic data capture.
*/
void report(BSONObjBuilder* builder);
void report(BSONObjBuilder* builder, bool forFTDC = false);
/**
* Returns an executor for running RSM tasks.

View File

@ -107,9 +107,7 @@ public:
stats.appendToBSON(result);
// Always report all replica sets being tracked.
BSONObjBuilder setStats(result.subobjStart("replicaSets"));
globalRSMonitorManager.report(&setStats);
setStats.doneFast();
globalRSMonitorManager.report(&result);
return true;
}

View File

@ -83,8 +83,11 @@ env.Library(
'ftdc_mongos.cpp',
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/client/clientdriver_minimal',
'$BUILD_DIR/mongo/executor/task_executor_pool',
'$BUILD_DIR/mongo/idl/server_parameter',
'ftdc_server'
'$BUILD_DIR/mongo/s/grid',
'ftdc_server',
],
)

View File

@ -34,18 +34,59 @@
#include <boost/filesystem.hpp>
#include "mongo/client/connpool.h"
#include "mongo/client/dbclient_connection.h"
#include "mongo/client/global_conn_pool.h"
#include "mongo/db/ftdc/controller.h"
#include "mongo/db/ftdc/ftdc_server.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/executor/connection_pool_stats.h"
#include "mongo/executor/task_executor_pool.h"
#include "mongo/s/grid.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/log.h"
#include "mongo/util/synchronized_value.h"
namespace mongo {
class ConnPoolStatsCollector : public FTDCCollectorInterface {
public:
void collect(OperationContext* opCtx, BSONObjBuilder& builder) override {
executor::ConnectionPoolStats stats{};
// Global connection pool connections.
globalConnPool.appendConnectionStats(&stats);
// Sharding connections.
{
auto const grid = Grid::get(opCtx);
if (grid->getExecutorPool()) {
grid->getExecutorPool()->appendConnectionStats(&stats);
}
auto const customConnPoolStatsFn = grid->getCustomConnectionPoolStatsFn();
if (customConnPoolStatsFn) {
customConnPoolStatsFn(&stats);
}
}
// Output to a BSON object.
builder.appendNumber("numClientConnections", DBClientConnection::getNumConnections());
builder.appendNumber("numAScopedConnections", AScopedConnection::getNumConnections());
stats.appendToBSON(builder, true /* forFTDC */);
// All replica sets being tracked.
globalRSMonitorManager.report(&builder, true /* forFTDC */);
}
std::string name() const override {
return "connPoolStats";
}
};
void registerMongoSCollectors(FTDCController* controller) {
// PoolStats
controller->addPeriodicCollector(stdx::make_unique<FTDCSimpleInternalCommandCollector>(
"connPoolStats", "connPoolStats", "", BSON("connPoolStats" << 1)));
controller->addPeriodicCollector(stdx::make_unique<ConnPoolStatsCollector>());
}
void startMongoSFTDC() {

View File

@ -170,7 +170,8 @@ TEST(ReplicaSetMonitorTest, PrimaryRemovedFromSetStress) {
replMonitor->appendInfo(monitorStateBuilder);
BSONObj monitorState = monitorStateBuilder.done();
BSONElement hostsElem = monitorState["hosts"];
// Stats are under the replica set name, "test".
BSONElement hostsElem = monitorState["test"]["hosts"];
BSONElement addrElem = hostsElem[mongo::str::stream() << idxToRemove]["addr"];
hostToRemove = addrElem.String();
}

View File

@ -69,24 +69,39 @@ void ConnectionPoolStats::updateStatsForHost(std::string pool,
totalRefreshing += newStats.refreshing;
}
void ConnectionPoolStats::appendToBSON(mongo::BSONObjBuilder& result) {
void ConnectionPoolStats::appendToBSON(mongo::BSONObjBuilder& result, bool forFTDC) {
result.appendNumber("totalInUse", totalInUse);
result.appendNumber("totalAvailable", totalAvailable);
result.appendNumber("totalCreated", totalCreated);
result.appendNumber("totalRefreshing", totalRefreshing);
if (forFTDC) {
BSONObjBuilder poolBuilder(result.subobjStart("connectionsInUsePerPool"));
for (const auto& pool : statsByPool) {
BSONObjBuilder poolInfo(poolBuilder.subobjStart(pool.first));
auto& poolStats = pool.second;
poolInfo.appendNumber("poolInUse", poolStats.inUse);
for (const auto& host : statsByPoolHost[pool.first]) {
auto hostStats = host.second;
poolInfo.appendNumber(host.first.toString(), hostStats.inUse);
}
}
return;
}
{
BSONObjBuilder poolBuilder(result.subobjStart("pools"));
for (auto&& pool : statsByPool) {
for (const auto& pool : statsByPool) {
BSONObjBuilder poolInfo(poolBuilder.subobjStart(pool.first));
auto poolStats = pool.second;
auto& poolStats = pool.second;
poolInfo.appendNumber("poolInUse", poolStats.inUse);
poolInfo.appendNumber("poolAvailable", poolStats.available);
poolInfo.appendNumber("poolCreated", poolStats.created);
poolInfo.appendNumber("poolRefreshing", poolStats.refreshing);
for (auto&& host : statsByPoolHost[pool.first]) {
for (const auto& host : statsByPoolHost[pool.first]) {
BSONObjBuilder hostInfo(poolInfo.subobjStart(host.first.toString()));
auto hostStats = host.second;
auto& hostStats = host.second;
hostInfo.appendNumber("inUse", hostStats.inUse);
hostInfo.appendNumber("available", hostStats.available);
hostInfo.appendNumber("created", hostStats.created);

View File

@ -60,7 +60,7 @@ struct ConnectionStatsPer {
struct ConnectionPoolStats {
void updateStatsForHost(std::string pool, HostAndPort host, ConnectionStatsPer newStats);
void appendToBSON(mongo::BSONObjBuilder& result);
void appendToBSON(mongo::BSONObjBuilder& result, bool forFTDC = false);
size_t totalInUse = 0u;
size_t totalAvailable = 0u;

View File

@ -310,7 +310,8 @@ BSONObj replMonitorStats(const BSONObj& a, void* data) {
"replMonitorStats requires a single string argument (the ReplSet name)",
a.nFields() == 1 && a.firstElement().type() == String);
ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get(a.firstElement().valuestrsafe());
auto name = a.firstElement().valuestrsafe();
ReplicaSetMonitorPtr rsm = ReplicaSetMonitor::get(name);
if (!rsm) {
return BSON(""
<< "no ReplSetMonitor exists by that name");
@ -318,7 +319,8 @@ BSONObj replMonitorStats(const BSONObj& a, void* data) {
BSONObjBuilder result;
rsm->appendInfo(result);
return result.obj();
// Stats are like {replSetName: {hosts: [{ ... }, { ... }]}}.
return result.obj()[name].Obj().getOwned();
}
BSONObj useWriteCommandsDefault(const BSONObj& a, void* data) {