0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/db/repl.h

339 lines
12 KiB
C
Raw Normal View History

// repl.h - replication
2008-07-28 00:36:47 +02:00
/**
* Copyright (C) 2008 10gen Inc.
2008-12-29 02:28:49 +01:00
*
2008-07-28 00:36:47 +02:00
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
2008-12-29 02:28:49 +01:00
*
2008-07-28 00:36:47 +02:00
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
2008-12-29 02:28:49 +01:00
*
2008-07-28 00:36:47 +02:00
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* replication data overview
2008-12-29 02:28:49 +01:00
at the slave:
local.sources { host: ..., source: ..., only: ..., syncedTo: ..., localLogTs: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
2008-07-28 23:52:44 +02:00
at the master:
local.oplog.$<source>
*/
#pragma once
2009-04-24 00:01:24 +02:00
#include "pdfile.h"
#include "db.h"
#include "dbhelpers.h"
#include "query.h"
2010-03-09 22:54:19 +01:00
#include "queryoptimizer.h"
2009-04-24 00:01:24 +02:00
2008-12-01 03:00:54 +01:00
#include "../client/dbclient.h"
2009-04-24 00:01:24 +02:00
#include "../util/optime.h"
2010-04-13 19:50:09 +02:00
#include "oplog.h"
2009-01-14 23:09:51 +01:00
2010-04-13 19:50:09 +02:00
namespace mongo {
2009-08-18 03:46:18 +02:00
/* replication slave? (possibly with slave or repl pair nonmaster)
--slave cmd line setting -> SimpleSlave
*/
typedef enum { NotSlave=0, SimpleSlave, ReplPairSlave } SlaveTypes;
2009-08-18 03:46:18 +02:00
class ReplSettings {
public:
SlaveTypes slave;
/* true means we are master and doing replication. if we are not writing to oplog (no --master or repl pairing),
this won't be true.
*/
bool master;
int opIdMem;
bool fastsync;
bool autoresync;
2010-03-01 22:55:31 +01:00
int slavedelay;
ReplSettings()
2010-03-01 22:55:31 +01:00
: slave(NotSlave) , master(false) , opIdMem(100000000) , fastsync() , autoresync(false), slavedelay() {
}
};
2009-08-18 03:46:18 +02:00
extern ReplSettings replSettings;
2009-02-02 17:15:24 +01:00
2009-01-24 00:24:15 +01:00
bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
bool slaveOk, bool useReplAuth, bool snapshot);
/* A replication exception */
2009-02-06 22:21:49 +01:00
class SyncException : public DBException {
public:
virtual const char* what() const throw() { return "sync exception"; }
2010-05-11 21:18:07 +02:00
virtual int getCode() const { return 10001; }
};
/* A Source is a source from which we can pull (replicate) data.
stored in collection local.sources.
Can be a group of things to replicate for several databases.
2008-07-29 23:54:54 +02:00
{ host: ..., source: ..., only: ..., syncedTo: ..., localLogTs: ..., dbsNextPass: { ... }, incompleteCloneDbs: { ... } }
2008-10-10 22:55:21 +02:00
'source' defaults to 'main'; support for multiple source names is
not done (always use main for now).
2008-12-29 02:28:49 +01:00
*/
class ReplSource {
bool resync(string db);
/* pull some operations from the master's oplog, and apply them. */
int sync_pullOpLog(int& nApplied);
void sync_pullOpLog_applyOperation(BSONObj& op, OpTime *localLogTail);
auto_ptr<DBClientConnection> conn;
auto_ptr<DBClientCursor> cursor;
/* we only clone one database per pass, even if a lot need done. This helps us
avoid overflowing the master's transaction log by doing too much work before going
back to read more transactions. (Imagine a scenario of slave startup where we try to
clone 100 databases in one pass.)
*/
set<string> addDbNextPass;
2009-04-16 17:36:06 +02:00
set<string> incompleteCloneDbs;
ReplSource();
// returns the dummy ns used to do the drop
string resyncDrop( const char *db, const char *requester );
// returns true if connected on return
bool connect();
// returns possibly unowned id spec for the operation.
static BSONObj idForOp( const BSONObj &op, bool &mod );
static void updateSetsWithOp( const BSONObj &op, bool mayUpdateStorage );
2009-04-22 23:44:23 +02:00
// call without the db mutex
void syncToTailOfRemoteLog();
// call with the db mutex
OpTime nextLastSavedLocalTs() const;
void setLastSavedLocalTs( const OpTime &nextLocalTs );
2009-04-22 23:44:23 +02:00
// call without the db mutex
void resetSlave();
// call with the db mutex
// returns false if the slave has been reset
bool updateSetsWithLocalOps( OpTime &localLogTail, bool mayUnlock );
2009-04-22 23:44:23 +02:00
string ns() const { return string( "local.oplog.$" ) + sourceName(); }
unsigned _sleepAdviceTime;
2009-04-22 23:44:23 +02:00
public:
2009-03-04 21:57:35 +01:00
static void applyOperation(const BSONObj& op);
bool replacing; // in "replace mode" -- see CmdReplacePeer
bool paired; // --pair in use
string hostName; // ip addr or hostname plus optionally, ":<port>"
string _sourceName; // a logical source name.
string sourceName() const {
return _sourceName.empty() ? "main" : _sourceName;
}
string only; // only a certain db. note that in the sources collection, this may not be changed once you start replicating.
2010-01-04 23:41:12 +01:00
/* the last time point we have already synced up to (in the remote/master's oplog). */
OpTime syncedTo;
2010-01-04 23:41:12 +01:00
/* This is for repl pairs.
2010-02-10 00:30:32 +01:00
_lastSavedLocalTs is the most recent point in the local log that we know is consistent
with the remote log ( ie say the local op log has entries ABCDE and the remote op log
has ABCXY, then _lastSavedLocalTs won't be greater than C until we have reconciled
the DE-XY difference.)
*/
2010-01-03 22:37:38 +01:00
OpTime _lastSavedLocalTs;
int nClonedThisPass;
2009-04-01 22:00:56 +02:00
typedef vector< shared_ptr< ReplSource > > SourceVector;
static void loadAll(SourceVector&);
2009-03-04 00:09:03 +01:00
explicit ReplSource(BSONObj);
/* -1 = error */
int sync(int& nApplied);
void save(); // write ourself to local.sources
void resetConnection() {
cursor = auto_ptr<DBClientCursor>(0);
2009-01-23 20:25:16 +01:00
conn = auto_ptr<DBClientConnection>(0);
}
// make a jsobj from our member fields of the form
// { host: ..., source: ..., syncedTo: ... }
BSONObj jsobj();
bool operator==(const ReplSource&r) const {
return hostName == r.hostName && sourceName() == r.sourceName();
}
2009-04-16 17:36:06 +02:00
operator string() const { return sourceName() + "@" + hostName; }
bool haveMoreDbsToSync() const { return !addDbNextPass.empty(); }
2010-03-01 22:55:31 +01:00
int sleepAdvice() const {
if ( !_sleepAdviceTime )
return 0;
int wait = _sleepAdviceTime - unsigned( time( 0 ) );
2010-03-01 22:55:31 +01:00
return wait > 0 ? wait : 0;
}
2009-02-02 17:15:24 +01:00
static bool throttledForceResyncDead( const char *requester );
static void forceResyncDead( const char *requester );
void forceResync( const char *requester );
};
2009-05-28 21:22:24 +02:00
// class for managing a set of ids in memory
2009-04-24 00:01:24 +02:00
class MemIds {
public:
MemIds() : size_() {}
friend class IdTracker;
void reset() {
imp_.clear();
size_ = 0;
}
2009-04-24 00:01:24 +02:00
bool get( const char *ns, const BSONObj &id ) { return imp_[ ns ].count( id ); }
void set( const char *ns, const BSONObj &id, bool val ) {
if ( val ) {
if ( imp_[ ns ].insert( id.getOwned() ).second ) {
size_ += id.objsize() + sizeof( BSONObj );
}
} else {
if ( imp_[ ns ].erase( id ) == 1 ) {
size_ -= id.objsize() + sizeof( BSONObj );
}
}
2009-04-24 00:01:24 +02:00
}
2009-04-24 00:26:42 +02:00
long long roughSize() const {
return size_;
2009-04-24 00:26:42 +02:00
}
2009-04-24 00:01:24 +02:00
private:
typedef map< string, BSONObjSetDefaultOrder > IdSets;
IdSets imp_;
long long size_;
2009-04-24 00:01:24 +02:00
};
2009-05-28 21:22:24 +02:00
// class for managing a set of ids in a db collection
// All functions must be called with db mutex held
2009-04-24 00:01:24 +02:00
class DbIds {
public:
2009-05-07 02:24:01 +02:00
DbIds( const string & name ) : impl_( name, BSON( "ns" << 1 << "id" << 1 ) ) {}
2009-04-24 00:01:24 +02:00
void reset() {
2009-05-07 02:24:01 +02:00
impl_.reset();
2009-04-24 00:01:24 +02:00
}
bool get( const char *ns, const BSONObj &id ) {
2009-05-07 02:24:01 +02:00
return impl_.get( key( ns, id ) );
2009-04-24 00:01:24 +02:00
}
void set( const char *ns, const BSONObj &id, bool val ) {
2009-05-07 02:24:01 +02:00
impl_.set( key( ns, id ), val );
2009-04-24 00:01:24 +02:00
}
private:
static BSONObj key( const char *ns, const BSONObj &id ) {
BSONObjBuilder b;
b << "ns" << ns;
// rename _id to id since there may be duplicates
2009-04-24 00:01:24 +02:00
b.appendAs( id.firstElement(), "id" );
return b.obj();
}
2009-05-07 02:24:01 +02:00
DbSet impl_;
2009-04-24 00:01:24 +02:00
};
2009-05-28 21:22:24 +02:00
// class for tracking ids and mod ids, in memory or on disk
// All functions must be called with db mutex held
// Kind of sloppy class structure, for now just want to keep the in mem
// version speedy.
// see http://www.mongodb.org/display/DOCS/Pairing+Internals
2009-04-24 00:01:24 +02:00
class IdTracker {
public:
IdTracker() :
dbIds_( "local.temp.replIds" ),
dbModIds_( "local.temp.replModIds" ),
inMem_( true ),
maxMem_( replSettings.opIdMem ) {
2009-04-24 00:01:24 +02:00
}
void reset( int maxMem = replSettings.opIdMem ) {
memIds_.reset();
memModIds_.reset();
dbIds_.reset();
dbModIds_.reset();
maxMem_ = maxMem;
inMem_ = true;
2009-04-24 00:01:24 +02:00
}
bool haveId( const char *ns, const BSONObj &id ) {
if ( inMem_ )
return get( memIds_, ns, id );
else
return get( dbIds_, ns, id );
2009-04-24 00:01:24 +02:00
}
bool haveModId( const char *ns, const BSONObj &id ) {
if ( inMem_ )
return get( memModIds_, ns, id );
else
return get( dbModIds_, ns, id );
2009-04-24 00:01:24 +02:00
}
void haveId( const char *ns, const BSONObj &id, bool val ) {
if ( inMem_ )
set( memIds_, ns, id, val );
else
set( dbIds_, ns, id, val );
2009-04-24 00:01:24 +02:00
}
void haveModId( const char *ns, const BSONObj &id, bool val ) {
if ( inMem_ )
set( memModIds_, ns, id, val );
else
set( dbModIds_, ns, id, val );
2009-04-24 00:01:24 +02:00
}
2009-05-07 01:02:48 +02:00
// will release the db mutex
void mayUpgradeStorage() {
if ( !inMem_ || memIds_.roughSize() + memModIds_.roughSize() <= maxMem_ )
return;
log() << "saving master modified id information to collection" << endl;
upgrade( memIds_, dbIds_ );
upgrade( memModIds_, dbModIds_ );
memIds_.reset();
memModIds_.reset();
inMem_ = false;
}
bool inMem() const { return inMem_; }
2009-04-24 00:01:24 +02:00
private:
template< class T >
bool get( T &ids, const char *ns, const BSONObj &id ) {
return ids.get( ns, id );
}
template< class T >
void set( T &ids, const char *ns, const BSONObj &id, bool val ) {
ids.set( ns, id, val );
}
void upgrade( MemIds &a, DbIds &b ) {
for( MemIds::IdSets::const_iterator i = a.imp_.begin(); i != a.imp_.end(); ++i ) {
for( BSONObjSetDefaultOrder::const_iterator j = i->second.begin(); j != i->second.end(); ++j ) {
set( b, i->first.c_str(), *j, true );
RARELY {
dbtemprelease t;
}
}
}
}
MemIds memIds_;
MemIds memModIds_;
DbIds dbIds_;
DbIds dbModIds_;
bool inMem_;
int maxMem_;
2009-04-24 00:01:24 +02:00
};
bool anyReplEnabled();
2010-02-10 20:18:57 +01:00
void appendReplicationInfo( BSONObjBuilder& result , bool authed , int level = 0 );
2009-01-14 23:09:51 +01:00
} // namespace mongo