0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/client/distlock.cpp

238 lines
7.8 KiB
C++

// @file distlock.h
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pch.h"
#include "dbclient.h"
#include "distlock.h"
namespace mongo {
string lockPingNS = "config.lockpings";
ThreadLocalValue<string> distLockIds("");
string getDistLockProcess(){
static string s;
if ( s.empty() ){
stringstream ss;
ss << getHostNameCached() << ":" << time(0) << ":" << rand();
s = ss.str();
}
return s;
}
string getDistLockId(){
string s = distLockIds.get();
if ( s.empty() ){
stringstream ss;
ss << getDistLockProcess() << ":" << getThreadName() << ":" << rand();
s = ss.str();
distLockIds.set( s );
}
return s;
}
void distLockPingThread( ConnectionString addr ){
static int loops = 0;
while( ! inShutdown() ){
try {
ScopedDbConnection conn( addr );
// do ping
conn->update( lockPingNS ,
BSON( "_id" << getDistLockProcess() ) ,
BSON( "$set" << BSON( "ping" << DATENOW ) ) ,
true );
// remove really old entries
BSONObjBuilder f;
f.appendDate( "$lt" , jsTime() - ( 4 * 86400 * 1000 ) );
BSONObj r = BSON( "ping" << f.obj() );
conn->remove( lockPingNS , r );
// create index so remove is fast even with a lot of servers
if ( loops++ == 0 ){
conn->ensureIndex( lockPingNS , BSON( "ping" << 1 ) );
}
conn.done();
}
catch ( std::exception& e ){
log( LL_WARNING ) << "couldn't ping: " << e.what() << endl;
}
sleepsecs(30);
}
}
class DistributedLockPinger {
public:
DistributedLockPinger()
: _mutex( "DistributedLockPinger" ){
}
void got( const ConnectionString& conn ){
string s = conn.toString();
scoped_lock lk( _mutex );
if ( _seen.count( s ) > 0 )
return;
boost::thread t( boost::bind( &distLockPingThread , conn ) );
_seen.insert( s );
}
set<string> _seen;
mongo::mutex _mutex;
} distLockPinger;
DistributedLock::DistributedLock( const ConnectionString& conn , const string& name , unsigned takeoverMinutes )
: _conn(conn),_name(name),_takeoverMinutes(takeoverMinutes){
_id = BSON( "_id" << name );
_ns = "config.locks";
distLockPinger.got( conn );
}
bool DistributedLock::lock_try( string why , BSONObj * other ){
ScopedDbConnection conn( _conn );
BSONObjBuilder queryBuilder;
queryBuilder.appendElements( _id );
queryBuilder.append( "state" , 0 );
{ // make sure its there so we can use simple update logic below
BSONObj o = conn->findOne( _ns , _id );
if ( o.isEmpty() ){
try {
conn->insert( _ns , BSON( "_id" << _name << "state" << 0 << "who" << "" ) );
}
catch ( UserException& ){
}
}
else if ( o["state"].numberInt() > 0 ){
BSONObj lastPing = conn->findOne( lockPingNS , o["process"].wrap( "_id" ) );
if ( lastPing.isEmpty() ){
// TODO: maybe this should clear, not sure yet
log() << "lastPing is empty! this could be bad: " << o << endl;
conn.done();
return false;
}
unsigned long long elapsed = jsTime() - lastPing["ping"].Date(); // in ms
elapsed = elapsed / ( 1000 * 60 ); // convert to minutes
if ( elapsed <= _takeoverMinutes ){
log(1) << "dist_lock lock failed because taken by: " << o << endl;
conn.done();
return false;
}
log() << "dist_lock forcefully taking over from: " << o << " elapsed minutes: " << elapsed << endl;
conn->update( _ns , _id , BSON( "$set" << BSON( "state" << 0 ) ) );
}
else if ( o["ts"].type() ){
queryBuilder.append( o["ts"] );
}
}
OID ts;
ts.init();
bool gotLock = false;
BSONObj now;
BSONObj whatIWant = BSON( "$set" << BSON( "state" << 1 <<
"who" << getDistLockId() << "process" << getDistLockProcess() <<
"when" << DATENOW << "why" << why << "ts" << ts ) );
try {
conn->update( _ns , queryBuilder.obj() , whatIWant );
BSONObj o = conn->getLastErrorDetailed();
now = conn->findOne( _ns , _id );
if ( o["n"].numberInt() == 0 ){
if ( other )
*other = now;
gotLock = false;
}
else {
gotLock = true;
}
}
catch ( UpdateNotTheSame& up ){
// this means our update got through on some, but not others
for ( unsigned i=0; i<up.size(); i++ ){
ScopedDbConnection temp( up[i].first );
BSONObj temp2 = temp->findOne( _ns , _id );
if ( now.isEmpty() || now["ts"] < temp2["ts"] ){
now = temp2.getOwned();
}
temp.done();
}
if ( now["ts"].OID() == ts ){
gotLock = true;
conn->update( _ns , _id , whatIWant );
}
else {
gotLock = false;
}
}
conn.done();
log(1) << "dist_lock lock gotLock: " << gotLock << " now: " << now << endl;
if ( ! gotLock )
return false;
return true;
}
void DistributedLock::unlock(){
const int maxAttempts = 3;
int attempted = 0;
while ( ++attempted <= maxAttempts ) {
try {
ScopedDbConnection conn( _conn );
conn->update( _ns , _id, BSON( "$set" << BSON( "state" << 0 ) ) );
log(1) << "dist_lock unlock: " << conn->findOne( _ns , _id ) << endl;
conn.done();
return;
} catch ( std::exception& e) {
log( LL_WARNING ) << "dist_lock " << _name << " failed to contact config server in unlock attempt "
<< attempted << ": " << e.what() << endl;
sleepsecs(1 << attempted);
}
}
log( LL_WARNING ) << "dist_lock couldn't consumate unlock request. " << "Lock " << _name
<< " will be taken over after " << _takeoverMinutes << " minutes timeout" << endl;
}
}