0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 00:56:44 +01:00

better logging when there is a sync problem between master and slave with replication

also halt replication when this happens
This commit is contained in:
Dwight 2010-03-05 13:50:46 -05:00
parent 7969923944
commit 6fb243a614

View File

@ -1,10 +1,8 @@
// repl.cpp // repl.cpp
/* TODO /* TODO
PAIRING PAIRING
_ on a syncexception, don't allow going back to master state? _ on a syncexception, don't allow going back to master state?
*/ */
/** /**
@ -49,6 +47,7 @@
namespace mongo { namespace mongo {
// our config from command line etc.
ReplSettings replSettings; ReplSettings replSettings;
void ensureHaveIdIndex(const char *ns); void ensureHaveIdIndex(const char *ns);
@ -202,7 +201,7 @@ namespace mongo {
virtual LockType locktype(){ return WRITE; } virtual LockType locktype(){ return WRITE; }
CmdForceDead() : Command("forcedead") { } CmdForceDead() : Command("forcedead") { }
virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
replAllDead = "forced by command"; replAllDead = "replication forced to stop by 'forcedead' command";
return true; return true;
} }
} cmdForceDead; } cmdForceDead;
@ -1236,8 +1235,8 @@ namespace mongo {
if ( replPair && replPair->state == ReplPair::State_Master ) { if ( replPair && replPair->state == ReplPair::State_Master ) {
OpTime nextOpTime( ts.date() ); OpTime next( ts.date() );
if ( !tailing && !initial && nextOpTime != syncedTo ) { if ( !tailing && !initial && next != syncedTo ) {
log() << "remote slave log filled, forcing slave resync" << endl; log() << "remote slave log filled, forcing slave resync" << endl;
resetSlave(); resetSlave();
return true; return true;
@ -1330,13 +1329,22 @@ namespace mongo {
BSONObj op = c->next(); BSONObj op = c->next();
BSONElement ts = op.getField("ts"); BSONElement ts = op.getField("ts");
assert( ts.type() == Date || ts.type() == Timestamp ); if( !( ts.type() == Date || ts.type() == Timestamp ) ) {
log() << "sync error: problem querying remote oplog record\n";
log() << "op: " << op.toString() << '\n';
log() << "halting replication" << endl;
replInfo = replAllDead = "sync error: no ts found querying remote oplog record";
throw SyncException();
}
OpTime last = nextOpTime; OpTime last = nextOpTime;
OpTime tmp( ts.date() ); nextOpTime = OpTime( ts.date() );
nextOpTime = tmp;
if ( !( last < nextOpTime ) ) { if ( !( last < nextOpTime ) ) {
problem() << "sync error: last " << last.toString() << " >= nextOpTime " << nextOpTime.toString() << endl; log() << "sync error: last >= nextOpTime from master" << endl;
uassert( 10123 , "bad 'ts' value in sources", false); log() << " last: " << last.toStringLong() << '\n';
log() << " nextOpTime: " << nextOpTime.toStringLong() << '\n';
log() << " halting replication" << endl;
replInfo = replAllDead = "sync error last >= nextOpTime";
uassert( 10123 , "bad 'ts' value in sources or at master? (last>=nextOpTime)", false);
} }
if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) { if ( replSettings.slavedelay && ( unsigned( time( 0 ) ) < nextOpTime.getSecs() + replSettings.slavedelay ) ) {
c->putBack( op ); c->putBack( op );