0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
This commit is contained in:
dwight 2010-06-22 20:05:11 -04:00
parent 68785434eb
commit 37bf316844
7 changed files with 117 additions and 72 deletions

View File

@ -37,7 +37,7 @@ namespace mongo {
}
string who = cmdObj["who"].String();
int cfgver = cmdObj["cfgver"].Int();
unsigned long long ord = (unsigned long long) cmdObj["opTime"].Long();
unsigned long long ord = (unsigned long long) cmdObj["ord"].Long();
bool weAreFresher = false;
if( theReplSet->config().version > cfgver ) {
@ -48,6 +48,7 @@ namespace mongo {
else if( ord > rsOpTime.ord ) {
weAreFresher = true;
}
result.append("ord", (long long) ord);
result.append("fresher", weAreFresher);
return true;
}
@ -86,9 +87,6 @@ namespace mongo {
static const int VETO = -10000;
class VoteException : public std::exception {
};
const time_t LeaseTime = 30;
unsigned Consensus::yea(unsigned memberId) /* throws VoteException */ {
@ -165,9 +163,11 @@ namespace mongo {
@param allUp - set to true if all members are up. Only set if true returned.
@return true if we are freshest. Note we may tie.
*/
bool Consensus::weAreFreshest(bool& allUp) {
bool Consensus::weAreFreshest(bool& allUp, int& nTies) {
const unsigned long long ord = rsOpTime.ord;
assert( ord > 0 );
nTies = 0;
cout << "TEMP COMMENTED OUT LINE IN consensus.cpp" << endl;
//assert( ord > 0 );
BSONObj cmd = BSON(
"replSetFresh" << 1 <<
"set" << rs.name() <<
@ -185,13 +185,17 @@ namespace mongo {
nok++;
if( i->result["fresher"].trueValue() )
return false;
unsigned long long remoteOrd = (unsigned long long) i->result["ord"].Long();
if( remoteOrd == ord )
nTies++;
assert( remoteOrd <= ord );
}
else {
DEV log() << "replSet freshest returns " << i->result.toString() << rsLog;
allUp = false;
}
}
DEV log() << "replSet dev we are freshest of up nodes, nok:" << nok << rsLog;
DEV log() << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
assert( ord == rsOpTime.ord );
return true;
}
@ -205,19 +209,37 @@ namespace mongo {
void Consensus::_electSelf() {
bool allUp;
if( !weAreFreshest(allUp) ) {
int nTies;
if( !weAreFreshest(allUp, nTies) ) {
log() << "replSet info not electing self, we are not freshest" << rsLog;
return;
}
if( !allUp && time(0) - started < 60 * 5 ) {
/* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data
if we don't have to -- we'd rather be offline and wait a little longer instead */
if we don't have to -- we'd rather be offline and wait a little longer instead
todo: make this configurable.
*/
log() << "replSet info not electing self, not all members up and we have been up less than 5 minutes" << rsLog;
return;
}
time_t start = time(0);
Member& me = *rs._self;
if( nTies ) {
/* tie? we then randomly sleep to try to not collide on our voting. */
/* todo: smarter. */
DEV log() << "replSet tie " << nTies << " sleeping a little" << rsLog;
if( me.id() == 0 ) {
// would be fine for one node not to sleep
// todo: biggest / highest priority nodes should be the ones that get to not sleep
} else {
assert( !rs.lockedByMe() ); // would be bad to go to sleep locked
sleepmillis( ((unsigned) rand()) * 1000 + 50 );
throw RetryAfterSleepException();
}
}
time_t start = time(0);
int tally = yea( me.id() );
log() << "replSet info electSelf" << rsLog;

View File

@ -44,7 +44,7 @@ namespace mongo {
}
Manager::Manager(ReplSetImpl *_rs) :
task::Server("Manager"), rs(_rs), busy(false), _primary(NOPRIMARY)
task::Server("Manager"), rs(_rs), busyWithElectSelf(false), _primary(NOPRIMARY)
{
}
@ -56,56 +56,66 @@ namespace mongo {
/** called as the health threads get new results */
void Manager::msgCheckNewState() {
{
RSBase::lock lk(rs);
bool again = false;
do {
{
RSBase::lock lk(rs);
if( busy ) return;
if( busyWithElectSelf ) return;
const Member *p = rs->currentPrimary();
const Member *p2;
try { p2 = findOtherPrimary(); }
catch(string s) {
/* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
return;
}
if( p == p2 && p ) return;
if( p2 ) {
/* someone else thinks they are primary. */
if( p == p2 ) // already match
const Member *p = rs->currentPrimary();
const Member *p2;
try { p2 = findOtherPrimary(); }
catch(string s) {
/* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
return;
if( p == 0 )
noteARemoteIsPrimary(p2); return;
if( p != rs->_self )
noteARemoteIsPrimary(p2); return;
/* we thought we were primary, yet now someone else thinks they are. */
if( !rs->elect.aMajoritySeemsToBeUp() )
noteARemoteIsPrimary(p2); return;
/* ignore for now, keep thinking we are master */
return;
}
}
if( p ) {
/* we are already primary, and nothing significant out there has changed. */
/* todo: if !aMajoritySeemsToBeUp, relinquish */
assert( p == rs->_self );
return;
}
if( p == p2 && p ) return;
/* no one seems to be primary. shall we try to elect ourself? */
if( !rs->elect.aMajoritySeemsToBeUp() ) {
rs->_self->lhb() = "can't see a majority, won't consider electing self";
return;
}
if( p2 ) {
/* someone else thinks they are primary. */
if( p == p2 ) // already match
return;
if( p == 0 )
noteARemoteIsPrimary(p2); return;
if( p != rs->_self )
noteARemoteIsPrimary(p2); return;
/* we thought we were primary, yet now someone else thinks they are. */
if( !rs->elect.aMajoritySeemsToBeUp() )
noteARemoteIsPrimary(p2); return;
/* ignore for now, keep thinking we are master */
return;
}
rs->_self->lhb() = "";
busy = true; // don't try to do further elections & such while we are already working on one.
}
try { rs->elect.electSelf(); }
catch(...) { log() << "replSet error unexpected assertion in rs manager" << rsLog; }
busy = false;
if( p ) {
/* we are already primary, and nothing significant out there has changed. */
/* todo: if !aMajoritySeemsToBeUp, relinquish */
assert( p == rs->_self );
return;
}
/* no one seems to be primary. shall we try to elect ourself? */
if( !rs->elect.aMajoritySeemsToBeUp() ) {
rs->_self->lhb() = "can't see a majority, won't consider electing self";
return;
}
rs->_self->lhb() = "";
busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one.
}
try {
rs->elect.electSelf();
}
catch(RetryAfterSleepException&) {
again = true;
}
catch(...) {
log() << "replSet error unexpected assertion in rs manager" << rsLog;
}
busyWithElectSelf = false;
} while( again );
}
}

View File

@ -23,6 +23,7 @@
#include "../../util/concurrency/msg.h"
#include "../../util/hostandport.h"
#include "../commands.h"
#include "rs_exception.h"
#include "rs_optime.h"
#include "rsmember.h"
#include "rs_config.h"
@ -59,7 +60,7 @@ namespace mongo {
class Manager : public task::Server {
bool got(const any&);
ReplSetImpl *rs;
bool busy;
bool busyWithElectSelf;
int _primary;
const Member* findOtherPrimary();
void noteARemoteIsPrimary(const Member *);
@ -80,7 +81,7 @@ namespace mongo {
Atomic<LastYea> ly;
unsigned yea(unsigned memberId); // throws VoteException
void _electSelf();
bool weAreFreshest(bool& allUp);
bool weAreFreshest(bool& allUp, int& nTies);
public:
Consensus(ReplSetImpl *t) : rs(*t) { }
int totalVotes() const;

14
db/repl/rs_exception.h Executable file
View File

@ -0,0 +1,14 @@
// @file rs_exception.h
#pragma once
namespace mongo {
class VoteException : public std::exception { };
class RetryAfterSleepException : public std::exception { };
}

View File

@ -3,20 +3,18 @@
load("../../jstests/rs/test_framework.js");
function go() {
try {
var z = connect("localhost:27000/test");
print("error: mongod already running?");
zz = z;
return;
}
catch (e) {
;
}
assert(__nextPort == 27000, "_nextPort==27000");
a = rs_mongod();
a = null;
try {
a = new Mongo("localhost:27000");
print("using already open mongod on port 27000 -- presume you are debugging or something. should start empty.");
__nextPort++;
}
catch (e) {
a = rs_mongod();
}
b = rs_mongod();
x = a.getDB("admin");
@ -25,7 +23,6 @@ function go() {
memb[0] = x;
memb[1] = y;
print("rs_basic.js go(): started 2 servers");
cfg = { _id: 'asdf', members: [] };

View File

@ -42,6 +42,7 @@ namespace mongo {
void call(const lam&);
private:
virtual bool initClient() { return true; }
virtual string name() { return _name; }
void doWork();
deque<lam> d;

View File

@ -30,8 +30,8 @@ namespace mongo {
*/
class Task : private BackgroundJob {
protected:
virtual void doWork() = 0; // implement the task here.
virtual string name() = 0; // name the thread
virtual void doWork() = 0; // implement the task here.
virtual string name() = 0; // name the threada
public:
Task();