0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-56766 External sorter can reuse temp files from previous startups

This commit is contained in:
Yuhong Zhang 2021-05-14 05:53:36 +00:00 committed by Evergreen Agent
parent 79b4b66a43
commit f62706daa8
2 changed files with 100 additions and 3 deletions

View File

@ -0,0 +1,61 @@
/**
* Tests that a resumable index build clears "_tmp" directory except for files for the build on
* startup recovery.
*
* @tags: [
* requires_majority_read_concern,
* requires_persistence,
* requires_replication,
* ]
*/
(function() {
"use strict";
load("jstests/noPassthrough/libs/index_build.js");
const dbName = "test";
const numDocuments = 100;
const maxIndexBuildMemoryUsageMB = 50;
const rst = new ReplSetTest({
nodes: 1,
nodeOptions: {setParameter: {maxIndexBuildMemoryUsageMegabytes: maxIndexBuildMemoryUsageMB}}
});
rst.startSet();
rst.initiate();
// Insert enough data so that the collection scan spills to disk.
const primary = rst.getPrimary();
const coll = primary.getDB(dbName).getCollection(jsTestName());
const bulk = coll.initializeUnorderedBulkOp();
for (let i = 0; i < numDocuments; i++) {
// Each document is at least 1 MB.
bulk.insert({a: i.toString().repeat(1024 * 1024)});
}
assert.commandWorked(bulk.execute());
// Manually writes a garbage file to "_tmp" directory.
const tmpDir = primary.dbpath + "/_tmp/";
mkdir(tmpDir);
writeFile(tmpDir + "garbage", "");
// Runs a resumable index build till completed to make sure the spilled files in "_tmp" directory
// are not deleted with the garbage file.
ResumableIndexBuildTest.run(
rst,
dbName,
coll.getName(),
[[{a: 1}]],
[{name: "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion", logIdWithBuildUUID: 20386}],
// Each document is at least 1 MB, so the index build must have spilled to disk by this point.
maxIndexBuildMemoryUsageMB,
["collection scan"],
[{numScannedAferResume: numDocuments - maxIndexBuildMemoryUsageMB}]);
// Asserts the garbage file is deleted.
const files = listFiles(tmpDir);
assert.eq(files.length, 0, files);
rst.stopSet();
})();

View File

@ -337,6 +337,36 @@ void assertCappedOplog(OperationContext* opCtx, Database* db) {
}
}
void clearTempFilesExceptForResumableBuilds(const std::vector<ResumeIndexInfo>& indexBuildsToResume,
const boost::filesystem::path& tempDir) {
StringSet resumableIndexFiles;
for (const auto& resumeInfo : indexBuildsToResume) {
const auto& indexes = resumeInfo.getIndexes();
for (const auto& index : indexes) {
boost::optional<StringData> indexFilename = index.getFileName();
if (indexFilename) {
resumableIndexFiles.insert(indexFilename->toString());
}
}
}
auto dirItr = boost::filesystem::directory_iterator(tempDir);
auto dirEnd = boost::filesystem::directory_iterator();
for (; dirItr != dirEnd; ++dirItr) {
auto curFilename = dirItr->path().filename().string();
if (!resumableIndexFiles.contains(curFilename)) {
boost::system::error_code ec;
boost::filesystem::remove(dirItr->path(), ec);
if (ec) {
LOGV2(5676601,
"Failed to clear temp directory file",
"filename"_attr = curFilename,
"error"_attr = ec.message());
}
}
}
}
void reconcileCatalogAndRebuildUnfinishedIndexes(
OperationContext* opCtx,
StorageEngine* storageEngine,
@ -344,18 +374,24 @@ void reconcileCatalogAndRebuildUnfinishedIndexes(
auto reconcileResult =
fassert(40593, storageEngine->reconcileCatalogAndIdents(opCtx, lastShutdownState));
// If we did not find any index builds to resume or we are starting up after an unclean
// shutdown, nothing in the temp directory will be used. Thus, we can clear it.
auto tempDir = boost::filesystem::path(storageGlobalParams.dbpath).append("_tmp");
if (reconcileResult.indexBuildsToResume.empty() ||
lastShutdownState == StorageEngine::LastShutdownState::kUnclean) {
// If we did not find any index builds to resume or we are starting up after an unclean
// shutdown, nothing in the temp directory will be used. Thus, we can clear it completely.
LOGV2(5071100, "Clearing temp directory");
boost::system::error_code ec;
boost::filesystem::remove_all(storageGlobalParams.dbpath + "/_tmp/", ec);
boost::filesystem::remove_all(tempDir, ec);
if (ec) {
LOGV2(5071101, "Failed to clear temp directory", "error"_attr = ec.message());
}
} else if (boost::filesystem::exists(tempDir)) {
// Clears the contents of the temp directory except for files for resumable builds.
LOGV2(5676600, "Clearing temp directory except for files for resumable builds");
clearTempFilesExceptForResumableBuilds(reconcileResult.indexBuildsToResume, tempDir);
}
// Determine which indexes need to be rebuilt. rebuildIndexesOnCollection() requires that all