0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-43664 Speedup WiredTiger storage engine startup for many tables by optimizing WiredTigerUtil::setTableLogging()

This commit is contained in:
Gregory Wlodarek 2020-08-31 22:59:54 -04:00 committed by Evergreen Agent
parent 2756fa6789
commit 8e64b07e3b
16 changed files with 392 additions and 13 deletions

View File

@ -0,0 +1,97 @@
/**
* Tests that MongoDB sets the WiredTiger table logging settings correctly under different
* circumstances.
*
* @tags: [requires_wiredtiger, requires_journaling]
*/
(function() {
load('jstests/disk/libs/wt_file_helper.js');
// Create a bunch of collections under various database names.
let conn = MongoRunner.runMongod({});
const dbpath = conn.dbpath;
for (let i = 0; i < 10; i++) {
assert.commandWorked(conn.getDB(i.toString()).createCollection(i.toString()));
}
MongoRunner.stopMongod(conn);
/**
* Test 1. The regular case, where no table logging setting modifications are needed.
*/
jsTest.log("Test 1.");
conn = startMongodOnExistingPath(dbpath, {});
checkLog.containsJson(conn, 4366408, {loggingEnabled: true});
MongoRunner.stopMongod(conn);
/**
* Test 2. Repair checks all of the table logging settings.
*/
jsTest.log("Test 2.");
assertRepairSucceeds(dbpath, conn.port, {});
// Cannot use checkLog here as the server is no longer running.
let logContents = rawMongoProgramOutput();
assert(logContents.indexOf(
"Modifying the table logging settings for all existing WiredTiger tables") > 0);
/**
* Test 3. Explicitly create the '_wt_table_checks' file to force all of the table logging setting
* modifications to be made.
*/
jsTest.log("Test 3.");
let files = listFiles(dbpath);
for (f in files) {
assert.neq(files[f].name, dbpath + "/_wt_table_checks");
}
writeFile(dbpath + "/_wt_table_checks", "");
conn = startMongodOnExistingPath(dbpath, {});
checkLog.containsJson(
conn, 4366405, {loggingEnabled: true, repair: false, hasPreviouslyIncompleteTableChecks: true});
MongoRunner.stopMongod(conn);
/**
* Test 4. Change into a single replica set, which requires all of the table logging settings to be
* updated. But simulate an interruption/crash while starting up during the table logging check
* phase.
*
* The next start up will detect an unclean shutdown causing all of the table logging settings to be
* updated.
*/
jsTest.log("Test 4.");
conn = startMongodOnExistingPath(dbpath, {
replSet: "mySet",
setParameter:
"failpoint.crashAfterUpdatingFirstTableLoggingSettings=" + tojson({"mode": "alwaysOn"})
});
assert(!conn);
// Cannot use checkLog here as the server is no longer running.
logContents = rawMongoProgramOutput();
assert(logContents.indexOf(
"Crashing due to 'crashAfterUpdatingFirstTableLoggingSettings' fail point") > 0);
// The '_wt_table_checks' still exists, so all table logging settings should be modified.
conn = startMongodOnExistingPath(dbpath, {});
checkLog.containsJson(
conn, 4366405, {loggingEnabled: true, repair: false, hasPreviouslyIncompleteTableChecks: true});
MongoRunner.stopMongod(conn);
/**
* Test 5. Change into a single node replica set, which requires all of the table logging settings
* to be updated as the node was successfully started up as a standalone the last time.
*/
jsTest.log("Test 5.");
conn = startMongodOnExistingPath(dbpath, {replSet: "mySet"});
checkLog.containsJson(conn, 4366406, {loggingEnabled: false});
MongoRunner.stopMongod(conn);
}());

View File

@ -0,0 +1,51 @@
/**
* Tests that the table logging settings are not changed during read only mode.
*
* @tags: [requires_wiredtiger]
*/
(function() {
load('jstests/disk/libs/wt_file_helper.js');
// Create a bunch of collections under various database names.
let conn = MongoRunner.runMongod({});
const dbpath = conn.dbpath;
for (let i = 0; i < 10; i++) {
assert.commandWorked(conn.getDB(i.toString()).createCollection(i.toString()));
}
MongoRunner.stopMongod(conn);
// Option for read only mode.
let options = {queryableBackupMode: ""};
// Verifies that setTableLogging() does not get called in read only mode, otherwise the invariant
// would fire.
conn = startMongodOnExistingPath(dbpath, options);
assert(conn);
MongoRunner.stopMongod(conn);
// Create the '_wt_table_checks' file in the dbpath and ensure it doesn't get removed while in read
// only mode.
let files = listFiles(dbpath);
for (f in files) {
assert.neq(files[f].name, dbpath + "/_wt_table_checks");
}
writeFile(dbpath + "/_wt_table_checks", "");
conn = startMongodOnExistingPath(dbpath, options);
assert(conn);
MongoRunner.stopMongod(conn);
let hasWTTableChecksFile = false;
files = listFiles(dbpath);
for (f in files) {
if (files[f].name == dbpath + "/_wt_table_checks") {
hasWTTableChecksFile = true;
}
}
assert(hasWTTableChecksFile);
}());

View File

@ -479,6 +479,10 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
LOGV2(20536, "Flow Control is enabled on this deployment");
}
// Notify the storage engine that startup is completed before repair exits below, as repair sets
// the upgrade flag to true.
serviceContext->getStorageEngine()->notifyStartupComplete();
if (storageGlobalParams.upgrade) {
LOGV2(20537, "Finished checking dbs");
exitCleanly(EXIT_CLEAN);

View File

@ -106,6 +106,7 @@ ServiceContextMongoDTest::ServiceContextMongoDTest(std::string engine, RepairAct
CollectionShardingStateFactory::set(
getServiceContext(),
std::make_unique<CollectionShardingStateFactoryShard>(getServiceContext()));
getServiceContext()->getStorageEngine()->notifyStartupComplete();
}
ServiceContextMongoDTest::~ServiceContextMongoDTest() {

View File

@ -64,6 +64,20 @@ public:
*/
virtual void startAsyncThreads() {}
/**
* During the startup process, the storage engine is one of the first components to be started
* up and fully initialized. But that fully initialized storage engine may not be recognized as
* the end for the remaining storage startup tasks that still need to be performed.
*
* For example, after the storage engine has been fully initialized, we need to access it in
* order to set up all of the collections and indexes based on the metadata, or perform some
* corrective measures on the data files, etc.
*
* When all of the storage startup tasks are completed as a whole, then this function is called
* by the external force managing the startup process.
*/
virtual void notifyStartupComplete() {}
virtual RecoveryUnit* newRecoveryUnit() = 0;
/**

View File

@ -172,6 +172,20 @@ public:
*/
virtual void finishInit() = 0;
/**
* During the startup process, the storage engine is one of the first components to be started
* up and fully initialized. But that fully initialized storage engine may not be recognized as
* the end for the remaining storage startup tasks that still need to be performed.
*
* For example, after the storage engine has been fully initialized, we need to access it in
* order to set up all of the collections and indexes based on the metadata, or perform some
* corrective measures on the data files, etc.
*
* When all of the storage startup tasks are completed as a whole, then this function is called
* by the external force managing the startup process.
*/
virtual void notifyStartupComplete() {}
/**
* Returns a new interface to the storage engine's recovery unit. The recovery
* unit is the durability interface. For details, see recovery_unit.h

View File

@ -680,6 +680,10 @@ void StorageEngineImpl::finishInit() {
}
}
void StorageEngineImpl::notifyStartupComplete() {
_engine->notifyStartupComplete();
}
RecoveryUnit* StorageEngineImpl::newRecoveryUnit() {
if (!_engine) {
// shutdown

View File

@ -70,6 +70,8 @@ public:
virtual void finishInit() override;
virtual void notifyStartupComplete() override;
virtual RecoveryUnit* newRecoveryUnit() override;
virtual std::vector<std::string> listDatabases() const override;

View File

@ -769,6 +769,10 @@ void WiredTigerKVEngine::startAsyncThreads() {
}
}
void WiredTigerKVEngine::notifyStartupComplete() {
WiredTigerUtil::notifyStartupComplete();
}
void WiredTigerKVEngine::appendGlobalStats(BSONObjBuilder& b) {
BSONObjBuilder bb(b.subobjStart("concurrentTransactions"));
{
@ -880,6 +884,8 @@ void WiredTigerKVEngine::_openWiredTiger(const std::string& path, const std::str
void WiredTigerKVEngine::cleanShutdown() {
LOGV2(22317, "WiredTigerKVEngine shutting down");
WiredTigerUtil::resetTableLoggingInfo();
if (!_readOnly)
syncSizeInfo(true);
if (!_conn) {

View File

@ -105,6 +105,8 @@ public:
void startAsyncThreads() override;
void notifyStartupComplete() override;
void setRecordStoreExtraOptions(const std::string& options);
void setSortedDataInterfaceExtraOptions(const std::string& options);

View File

@ -63,6 +63,7 @@ public:
repl::ReplicationCoordinator::set(
context,
std::make_unique<repl::ReplicationCoordinatorMock>(context, repl::ReplSettings()));
_engine->notifyStartupComplete();
}
virtual KVEngine* restartEngine() override {

View File

@ -86,6 +86,7 @@ public:
getGlobalServiceContext(),
std::unique_ptr<repl::ReplicationCoordinator>(new repl::ReplicationCoordinatorMock(
getGlobalServiceContext(), repl::ReplSettings())));
_engine->notifyStartupComplete();
}
PrefixedWiredTigerHarnessHelper(StringData extraStrings) : _dbpath("wt_test") {}

View File

@ -86,6 +86,7 @@ public:
repl::ReplicationCoordinator::set(serviceContext(),
std::make_unique<repl::ReplicationCoordinatorMock>(
serviceContext(), repl::ReplSettings()));
_engine.notifyStartupComplete();
}
~WiredTigerHarnessHelper() {}

View File

@ -35,23 +35,97 @@
#include <limits>
#include <boost/filesystem.hpp>
#include <boost/filesystem/path.hpp>
#include "mongo/base/simple_string_data_comparator.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/snapshot_window_options_gen.h"
#include "mongo/db/storage/storage_file_util.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_session_cache.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/processinfo.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/str.h"
namespace mongo {
MONGO_FAIL_POINT_DEFINE(crashAfterUpdatingFirstTableLoggingSettings);
namespace {
const std::string kTableChecksFileName = "_wt_table_checks";
/**
* Returns true if the 'kTableChecksFileName' file exists in the dbpath.
*
* Must be called before createTableChecksFile() or removeTableChecksFile() to get accurate results.
*/
bool hasPreviouslyIncompleteTableChecks() {
auto path = boost::filesystem::path(storageGlobalParams.dbpath) /
boost::filesystem::path(kTableChecksFileName);
return boost::filesystem::exists(path);
}
/**
* Creates the 'kTableChecksFileName' file in the dbpath.
*/
void createTableChecksFile() {
auto path = boost::filesystem::path(storageGlobalParams.dbpath) /
boost::filesystem::path(kTableChecksFileName);
boost::filesystem::ofstream fileStream(path);
fileStream << "This file indicates that a WiredTiger table check operation is in progress or "
"incomplete."
<< std::endl;
if (fileStream.fail()) {
LOGV2_FATAL_NOTRACE(4366400,
"Failed to write to file",
"file"_attr = path.generic_string(),
"error"_attr = errnoWithDescription());
}
fileStream.close();
fassertNoTrace(4366401, fsyncFile(path));
fassertNoTrace(4366402, fsyncParentDirectory(path));
}
/**
* Removes the 'kTableChecksFileName' file in the dbpath, if it exists.
*/
void removeTableChecksFile() {
auto path = boost::filesystem::path(storageGlobalParams.dbpath) /
boost::filesystem::path(kTableChecksFileName);
if (!boost::filesystem::exists(path)) {
return;
}
boost::system::error_code errorCode;
boost::filesystem::remove(path, errorCode);
if (errorCode) {
LOGV2_FATAL_NOTRACE(4366403,
"Failed to remove file",
"file"_attr = path.generic_string(),
"error"_attr = errorCode.message());
}
}
} // namespace
using std::string;
Mutex WiredTigerUtil::_tableLoggingInfoMutex =
MONGO_MAKE_LATCH("WiredTigerUtil::_tableLoggingInfoMutex");
WiredTigerUtil::TableLoggingInfo WiredTigerUtil::_tableLoggingInfo;
Status wtRCToStatus_slow(int retCode, const char* prefix) {
if (retCode == 0)
return Status::OK();
@ -576,6 +650,23 @@ int WiredTigerUtil::verifyTable(OperationContext* opCtx,
return (session->verify)(session, uri.c_str(), nullptr);
}
void WiredTigerUtil::notifyStartupComplete() {
{
stdx::lock_guard<Latch> lk(_tableLoggingInfoMutex);
invariant(_tableLoggingInfo.isInitializing);
_tableLoggingInfo.isInitializing = false;
}
if (!storageGlobalParams.readOnly) {
removeTableChecksFile();
}
}
void WiredTigerUtil::resetTableLoggingInfo() {
stdx::lock_guard<Latch> lk(_tableLoggingInfoMutex);
_tableLoggingInfo = TableLoggingInfo();
}
bool WiredTigerUtil::useTableLogging(NamespaceString ns, bool replEnabled) {
if (!replEnabled) {
// All tables on standalones are logged.
@ -610,13 +701,88 @@ Status WiredTigerUtil::setTableLogging(OperationContext* opCtx, const std::strin
}
Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& uri, bool on) {
std::string setting;
if (on) {
setting = "log=(enabled=true)";
} else {
setting = "log=(enabled=false)";
invariant(!storageGlobalParams.readOnly);
stdx::lock_guard<Latch> lk(_tableLoggingInfoMutex);
// Update the table logging settings regardless if we're no longer starting up the process.
if (!_tableLoggingInfo.isInitializing) {
return _setTableLogging(session, uri, on);
}
// During the start up process, the table logging settings are checked for each table to verify
// that they are set appropriately. We can speed this process up by assuming that the logging
// setting is identical for each table.
// We cross reference the logging settings for the first table and if it isn't correctly set, we
// change the logging settings for all tables during start up.
// In the event that the server wasn't shutdown cleanly, the logging settings will be modified
// for all tables as a safety precaution, or if repair mode is running.
if (_tableLoggingInfo.isFirstTable && hasPreviouslyIncompleteTableChecks()) {
_tableLoggingInfo.hasPreviouslyIncompleteTableChecks = true;
}
if (storageGlobalParams.repair || _tableLoggingInfo.hasPreviouslyIncompleteTableChecks) {
if (_tableLoggingInfo.isFirstTable) {
_tableLoggingInfo.isFirstTable = false;
if (!_tableLoggingInfo.hasPreviouslyIncompleteTableChecks) {
createTableChecksFile();
}
LOGV2(4366405,
"Modifying the table logging settings for all existing WiredTiger tables",
"loggingEnabled"_attr = on,
"repair"_attr = storageGlobalParams.repair,
"hasPreviouslyIncompleteTableChecks"_attr =
_tableLoggingInfo.hasPreviouslyIncompleteTableChecks);
}
return _setTableLogging(session, uri, on);
}
if (!_tableLoggingInfo.isFirstTable) {
if (_tableLoggingInfo.changeTableLogging) {
return _setTableLogging(session, uri, on);
}
// The table logging settings do not need to be modified.
return Status::OK();
}
invariant(_tableLoggingInfo.isFirstTable);
invariant(!_tableLoggingInfo.hasPreviouslyIncompleteTableChecks);
_tableLoggingInfo.isFirstTable = false;
// Check if the first tables logging settings need to be modified.
const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)";
const std::string existingMetadata = getMetadataCreate(session, uri).getValue();
if (existingMetadata.find(setting) != std::string::npos) {
// The table is running with the expected logging settings.
LOGV2(4366408,
"No table logging settings modifications are required for existing WiredTiger tables",
"loggingEnabled"_attr = on);
return Status::OK();
}
// The first table is running with the incorrect logging settings. All tables will need to have
// their logging settings modified.
_tableLoggingInfo.changeTableLogging = true;
createTableChecksFile();
LOGV2(4366406,
"Modifying the table logging settings for all existing WiredTiger tables",
"loggingEnabled"_attr = on);
Status status = _setTableLogging(session, uri, on);
if (MONGO_unlikely(crashAfterUpdatingFirstTableLoggingSettings.shouldFail())) {
LOGV2_FATAL_NOTRACE(
4366407, "Crashing due to 'crashAfterUpdatingFirstTableLoggingSettings' fail point");
}
return status;
}
Status WiredTigerUtil::_setTableLogging(WT_SESSION* session, const std::string& uri, bool on) {
const std::string setting = on ? "log=(enabled=true)" : "log=(enabled=false)";
// This method does some "weak" parsing to see if the table is in the expected logging
// state. Only attempt to alter the table when a change is needed. This avoids grabbing heavy
// locks in WT when creating new tables for collections and indexes. Those tables are created
@ -638,19 +804,14 @@ Status WiredTigerUtil::setTableLogging(WT_SESSION* session, const std::string& u
return Status::OK();
}
LOGV2_DEBUG(22432,
1,
"Changing table logging settings. Uri: {uri} Enable? {on}",
"uri"_attr = uri,
"on"_attr = on);
LOGV2_DEBUG(
22432, 1, "Changing table logging settings", "uri"_attr = uri, "loggingEnabled"_attr = on);
int ret = session->alter(session, uri.c_str(), setting.c_str());
if (ret) {
LOGV2_FATAL(50756,
"Failed to update log setting. Uri: {uri} Enable? {enable} Ret: {error} MD: "
"{metadata} Msg: {message}",
"Failed to update log setting",
"uri"_attr = uri,
"enable"_attr = on,
"loggingEnabled"_attr = on,
"error"_attr = ret,
"metadata"_attr = redact(existingMetadata),
"message"_attr = session->strerror(session, ret));

View File

@ -253,6 +253,10 @@ public:
const std::string& uri,
std::vector<std::string>* errors = nullptr);
static void notifyStartupComplete();
static void resetTableLoggingInfo();
static bool useTableLogging(NamespaceString ns, bool replEnabled);
static Status setTableLogging(OperationContext* opCtx, const std::string& uri, bool on);
@ -273,6 +277,18 @@ private:
*/
template <typename T>
static T _castStatisticsValue(uint64_t statisticsValue, T maximumResultType);
static Status _setTableLogging(WT_SESSION* session, const std::string& uri, bool on);
// Used to keep track of the table logging setting modifications during start up. The mutex must
// be held prior to accessing any of the member variables in the struct.
static Mutex _tableLoggingInfoMutex;
static struct TableLoggingInfo {
bool isInitializing = true;
bool isFirstTable = true;
bool changeTableLogging = false;
bool hasPreviouslyIncompleteTableChecks = false;
} _tableLoggingInfo;
};
class WiredTigerConfigParser {

View File

@ -305,6 +305,10 @@ ServiceContext* initialize(const char* yaml_config) {
// error.
FeatureCompatibilityVersion::fassertInitializedAfterStartup(startupOpCtx.get());
// Notify the storage engine that startup is completed before repair exits below, as repair sets
// the upgrade flag to true.
serviceContext->getStorageEngine()->notifyStartupComplete();
if (storageGlobalParams.upgrade) {
LOGV2(22553, "finished checking dbs");
exitCleanly(EXIT_CLEAN);