0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/db/dur_recover.cpp
2010-12-08 16:23:21 -05:00

389 lines
14 KiB
C++

// @file dur_recover.cpp crash recovery via the journal
/**
* Copyright (C) 2009 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "pch.h"
#if defined(_DURABLE)
#include "dur.h"
#include "dur_journal.h"
#include "dur_journalformat.h"
#include "durop.h"
#include "namespace.h"
#include "../util/mongoutils/str.h"
#include "bufreader.h"
#include "pdfile.h"
#include "database.h"
#include "db.h"
#include "../util/unittest.h"
#include "cmdline.h"
using namespace mongoutils;
namespace mongo {
namespace dur {
/** todo clean up : this is messy from refactoring. */
struct FullyQualifiedJournalEntry {
bool isBasicWrite() const { return dbName != 0; }
const char *dbName;
JEntry e;
const char *srcData;
shared_ptr<DurOp> op;
};
void removeJournalFiles();
path getJournalDir();
/** get journal filenames, in order. throws if unexpected content found */
static void getFiles(path dir, vector<path>& files) {
map<unsigned,path> m;
for ( filesystem::directory_iterator i( dir );
i != filesystem::directory_iterator();
++i ) {
filesystem::path filepath = *i;
string fileName = filesystem::path(*i).leaf();
if( str::startsWith(fileName, "j._") ) {
unsigned u = str::toUnsigned( str::after(fileName, '_') );
if( m.count(u) ) {
uasserted(13531, str::stream() << "unexpected files in journal directory " << dir.string() << " : " << fileName);
}
if( !m.empty() && !m.count(u-1) ) {
uasserted(13532,
str::stream() << "unexpected file in journal directory " << dir.string()
<< " : " << fileName << " : can't find its preceeding file");
}
m.insert( pair<unsigned,path>(u,filepath) );
}
}
for( map<unsigned,path>::iterator i = m.begin(); i != m.end(); ++i )
files.push_back(i->second);
}
/** read through the memory mapped data of a journal file (journal/j._<n> file)
throws
*/
class JournalIterator : boost::noncopyable {
public:
JournalIterator(void *p, unsigned len) : _br(p, len) {
_needHeader = true;
*_lastDbName = 0;
JHeader h;
_br.read(h); // read/skip file header
uassert(13536, str::stream() << "journal version number mismatch " << h.version, h.versionOk());
uassert(13537, "journal header invalid", h.valid());
}
bool atEof() const { return _br.atEof(); }
/** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
* @return true if got an entry. false at successful end of section (and no entry returned).
* throws on premature end of section.
*/
bool next(FullyQualifiedJournalEntry& e) {
if( _needHeader ) {
JSectHeader h;
_br.read(h);
_needHeader = false;
}
unsigned lenOrOpCode;
_br.read(lenOrOpCode);
if( lenOrOpCode >= JEntry::OpCode_Min ) {
if( lenOrOpCode == JEntry::OpCode_Footer ) {
_needHeader = true;
_br.skip(sizeof(JSectFooter) - 4);
_br.align(Alignment);
return false;
}
if( lenOrOpCode != JEntry::OpCode_DbContext ) {
e.dbName = 0;
e.op = DurOp::read(lenOrOpCode, _br);
return true;
}
// JDbContext
{
char c;
char *p = _lastDbName;
char *end = p + Namespace::MaxNsLen;
while( 1 ) {
_br.read(c);
*p++ = c;
if( c == 0 )
break;
if( p >= end ) {
/* this shouldn't happen, even if log is truncated. */
log() << _br.offset() << endl;
uasserted(13533, "problem processing journal file during recovery");
}
}
_br.read(lenOrOpCode);
}
}
// now do JEntry
assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min );
e.dbName = _lastDbName;
e.e.len = lenOrOpCode;
_br.read(e.e.ofs);
_br.read(e.e.fileNo);
e.srcData = (const char *) _br.skip(lenOrOpCode);
return true;
}
private:
bool _needHeader;
BufReader _br;
char _lastDbName[Namespace::MaxNsLen];
};
/** call go() to execute a recovery from existing journal files.
*/
class RecoveryJob : boost::noncopyable {
public:
void go(vector<path>& files);
~RecoveryJob();
private:
void applyEntries(const vector<FullyQualifiedJournalEntry> &entries);
bool processBuffer(void *, unsigned len);
bool processFile(path journalfile);
void close();
/** retrieve the mmap pointer for the specified dbName plus file number.
open if not yet open.
@param fileNo a value of -1 indicates ".ns"
@param ofs offset to add to the pointer before returning
*/
void* ptr(const char *dbName, int fileNo, unsigned ofs);
map< pair<int,string>, void* > _fileToPtr;
list< shared_ptr<MemoryMappedFile> > _files;
};
/** retrieve the mmap pointer for the specified dbName plus file number.
open if not yet open.
*/
void* RecoveryJob::ptr(const char *dbName, int fileNo, unsigned ofs) {
void *&p = _fileToPtr[ pair<int,string>(fileNo,dbName) ];
if( p == 0 ) {
MemoryMappedFile *f = new MemoryMappedFile();
_files.push_back( shared_ptr<MemoryMappedFile>(f) );
string fn;
{
stringstream ss;
ss << dbName << '.';
if( fileNo < 0 )
ss << "ns";
else
ss << fileNo;
/* todo: do we need to create file here if DNE?
we need to know what its length should be for that though.
however does this happen? FileCreatedOp should have been in the journal and
already applied if the file is new.
*/
fn = ss.str();
}
p = f->map(fn.c_str());
uassert(13534, str::stream() << "recovery error couldn't open " << fn, p);
if( cmdLine.durTrace & CmdLine::DurDumpJournal )
log() << " opened " << fn << ' ' << f->length()/1024.0/1024.0 << endl;
uassert(13543, str::stream() << "recovery error file has length zero " << fn, f->length());
assert( ofs < f->length() );
}
return ((char*) p) + ofs;
}
RecoveryJob::~RecoveryJob() {
if( !_files.empty() )
close();
}
void RecoveryJob::close() {
log() << "recover flush" << endl;
MongoFile::flushAll(true);
log() << "recover close" << endl;
_files.clear(); // closes files
_fileToPtr.clear();
}
string hexdump(const char *data, unsigned len) {
const unsigned char *p = (const unsigned char *) data;
stringstream ss;
for( unsigned i = 0; i < 4 && i < len; i++ ) {
ss << std::hex << setw(2) << setfill('0');
unsigned n = p[i];
ss << n;
ss << ' ';
}
string s = ss.str();
return s;
}
void RecoveryJob::applyEntries(const vector<FullyQualifiedJournalEntry> &entries) {
bool apply = (cmdLine.durTrace & CmdLine::DurScanOnly) == 0;
bool dump = cmdLine.durTrace & CmdLine::DurDumpJournal;
if( dump )
log() << "BEGIN section" << endl;
for( vector<FullyQualifiedJournalEntry>::const_iterator i = entries.begin(); i != entries.end(); ++i ) {
const FullyQualifiedJournalEntry& fqe = *i;
if( fqe.isBasicWrite() ) {
if( dump ) {
stringstream ss;
ss << " BASICWRITE " << setw(20) << fqe.dbName << '.'
<< setw(2) << fqe.e.fileNo << ' ' << setw(6) << fqe.e.len
<< ' ' << hex << setw(8) << (size_t) fqe.srcData << dec << " " << hexdump(fqe.srcData, fqe.e.len);
log() << ss.str() << endl;
}
if( apply ) {
void *p = ptr(fqe.dbName, fqe.e.fileNo, fqe.e.ofs);
memcpy(p, fqe.srcData, fqe.e.len);
}
} else {
if( dump ) {
log() << " OP " << fqe.op->toString() << endl;
}
if( apply ) {
fqe.op->replay();
}
}
}
if( dump )
log() << "END section" << endl;
}
/** @param p start of the memory mapped file
@return true if this is detected to be the last file (ends abruptly)
*/
bool RecoveryJob::processBuffer(void *p, unsigned len) {
JournalIterator i(p, len);
vector<FullyQualifiedJournalEntry> entries;
try {
while( 1 ) {
entries.clear();
FullyQualifiedJournalEntry e;
while( i.next(e) )
entries.push_back(e);
// got all the entries for one group commit. apply them:
applyEntries(entries);
// now do the next section (i.e. group commit)
if( i.atEof() )
break;
}
}
catch( BufReader::eof& ) {
if( cmdLine.durTrace & CmdLine::DurDumpJournal )
log() << "ABRUPT END" << endl;
return true; // abrupt end
}
return false; // non-abrupt end
}
bool RecoveryJob::processFile(path journalfile) {
log() << "recover " << journalfile.string() << endl;
MemoryMappedFile f;
void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
return processBuffer(p, (unsigned) f.length());
}
void RecoveryJob::go(vector<path>& files) {
log() << "recover begin" << endl;
for( unsigned i = 0; i != files.size(); ++i ) {
bool abruptEnd = processFile(files[i]);
if( abruptEnd && i+1 < files.size() ) {
log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
close();
uasserted(13535, "recover abrupt journal file end");
}
}
close();
if( cmdLine.durTrace & CmdLine::DurScanOnly ) {
uasserted(13545, str::stream() << "--durTrace " << (int) CmdLine::DurScanOnly << " specified, terminating");
}
log() << "recover cleaning up" << endl;
removeJournalFiles();
log() << "recover done" << endl;
okToCleanUp = true;
}
/** recover from a crash
throws on error
*/
void recover() {
assert( cmdLine.dur );
filesystem::path p = getJournalDir();
if( !exists(p) ) {
log() << "directory " << p.string() << " does not exist, there will be no recovery startup step" << endl;
okToCleanUp = true;
return;
}
vector<path> journalFiles;
getFiles(p, journalFiles);
if( journalFiles.empty() ) {
log() << "recover : no journal files present, no recovery needed" << endl;
okToCleanUp = true;
return;
}
RecoveryJob j;
j.go(journalFiles);
}
struct BufReaderY { int a,b; };
class BufReaderUnitTest : public UnitTest {
public:
void run() {
BufReader r((void*) "abcdabcdabcd", 12);
char x;
BufReaderY y;
r.read(x); cout << x; // a
assert( x == 'a' );
r.read(y);
r.read(x);
assert( x == 'b' );
}
} brunittest;
} // namespace dur
} // namespace mongo
#endif