From e7497e9c47de17ffe863cfb573e928f510530b4b Mon Sep 17 00:00:00 2001 From: Randolph Tan Date: Tue, 17 Mar 2015 15:54:57 -0400 Subject: [PATCH] SERVER-17592 DistributedLock Refactor Change dist_lock_try usage to ScopedDistributedLock. --- src/mongo/s/balance.cpp | 10 ++- src/mongo/s/chunk_manager.cpp | 17 ++-- .../s/commands/cluster_move_primary_cmd.cpp | 22 ++--- src/mongo/s/commands_public.cpp | 52 ++---------- src/mongo/s/distlock.cpp | 2 +- src/mongo/s/distlock.h | 82 +------------------ 6 files changed, 31 insertions(+), 154 deletions(-) diff --git a/src/mongo/s/balance.cpp b/src/mongo/s/balance.cpp index 7b34b2269bc..cf010a7c8ac 100644 --- a/src/mongo/s/balance.cpp +++ b/src/mongo/s/balance.cpp @@ -564,7 +564,6 @@ namespace mongo { // getConnectioString and dist lock constructor does not throw, which is what we expect on while // on the balancer thread ConnectionString config = configServer.getConnectionString(); - DistributedLock balanceLock( config , "balancer" ); while ( ! inShutdown() ) { @@ -616,9 +615,12 @@ namespace mongo { uassert( 13258 , "oids broken after resetting!" , _checkOIDs() ); { - dist_lock_try lk( &balanceLock , "doing balance round" ); - if ( ! lk.got() ) { - LOG(1) << "skipping balancing round because another balancer is active" << endl; + ScopedDistributedLock balancerLock(config, "balancer"); + balancerLock.setLockMessage("doing balance round"); + + Status lockStatus = balancerLock.tryAcquire(); + if (!lockStatus.isOK()) { + LOG(1) << "skipping balancing round" << causedBy(lockStatus); // Ping again so scripts can determine if we're active without waiting _ping( true ); diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index 9e075c4724e..3ce85616ee3 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -673,19 +673,14 @@ namespace { configServer.logChange( "dropCollection.start" , _ns , BSONObj() ); - DistributedLock nsLock( ConnectionString( configServer.modelServer(), - ConnectionString::SYNC ), - _ns ); + ScopedDistributedLock nsLock(configServer.getConnectionString(), _ns); + nsLock.setLockMessage("drop"); - dist_lock_try dlk; - try{ - dlk = dist_lock_try( &nsLock , "drop" ); + Status lockStatus = nsLock.tryAcquire(); + if (!lockStatus.isOK()) { + uasserted(14022, str::stream() << "Error locking distributed lock for chunk drop" + << causedBy(lockStatus)); } - catch( LockException& e ){ - uassert( 14022, str::stream() << "Error locking distributed lock for chunk drop." << causedBy( e ), false); - } - - uassert( 13331 , "collection's metadata is undergoing changes. Please try again." , dlk.got() ); uassert(10174, "config servers not all up", configServer.allUp(false)); diff --git a/src/mongo/s/commands/cluster_move_primary_cmd.cpp b/src/mongo/s/commands/cluster_move_primary_cmd.cpp index cac9f82f722..5a0c8b91de3 100644 --- a/src/mongo/s/commands/cluster_move_primary_cmd.cpp +++ b/src/mongo/s/commands/cluster_move_primary_cmd.cpp @@ -147,23 +147,13 @@ namespace { log() << "Moving " << dbname << " primary from: " << config->getPrimary().toString() << " to: " << s.toString(); - // Locking enabled now... - DistributedLock lockSetup(configServer.getConnectionString(), dbname + "-movePrimary"); + ScopedDistributedLock distLock(configServer.getConnectionString(), + dbname + "-movePrimary"); + distLock.setLockMessage(str::stream() << "Moving primary shard of " << dbname); - // Distributed locking added - dist_lock_try dlk; - try { - dlk = dist_lock_try(&lockSetup, string("Moving primary shard of ") + dbname); - } - catch (LockException& e) { - errmsg = str::stream() << "error locking distributed lock to move primary shard of " << dbname << causedBy(e); - warning() << errmsg; - return false; - } - - if (!dlk.got()) { - errmsg = (string)"metadata lock is already taken for moving " + dbname; - return false; + Status lockStatus = distLock.tryAcquire(); + if (!lockStatus.isOK()) { + return appendCommandStatus(result, lockStatus); } set shardedColls; diff --git a/src/mongo/s/commands_public.cpp b/src/mongo/s/commands_public.cpp index 913a96f9266..f826fec3ad6 100644 --- a/src/mongo/s/commands_public.cpp +++ b/src/mongo/s/commands_public.cpp @@ -1801,31 +1801,7 @@ namespace mongo { bool ok = true; { - // take distributed lock to prevent split / migration - /* - ConnectionString config = configServer.getConnectionString(); - DistributedLock lockSetup( config , fullns ); - dist_lock_try dlk; - - - if (shardedInput) { - try{ - int tryc = 0; - while ( !dlk.got() ) { - dlk = dist_lock_try( &lockSetup , (string)"mr-parallel" ); - if ( ! dlk.got() ) { - if ( ++tryc % 100 == 0 ) - warning() << "the collection metadata could not be locked for mapreduce, already locked by " << dlk.other() << endl; - sleepmillis(100); - } - } - } - catch( LockException& e ){ - errmsg = str::stream() << "error locking distributed lock for mapreduce " << causedBy( e ); - return false; - } - } - */ + // TODO: take distributed lock to prevent split / migration? try { STRATEGY->commandOp( dbName, shardedCommand, 0, fullns, q, &results ); @@ -1955,25 +1931,15 @@ namespace mongo { map chunkSizes; { - // take distributed lock to prevent split / migration - ConnectionString config = configServer.getConnectionString(); - DistributedLock lockSetup( config , finalColLong ); - dist_lock_try dlk; + // take distributed lock to prevent split / migration. + ScopedDistributedLock nsLock(configServer.getConnectionString(), + finalColLong); + nsLock.setLockMessage("mr-post-process"); + nsLock.setLockTryIntervalMillis(100); - try{ - int tryc = 0; - while ( !dlk.got() ) { - dlk = dist_lock_try( &lockSetup , (string)"mr-post-process" ); - if ( ! dlk.got() ) { - if ( ++tryc % 100 == 0 ) - warning() << "the collection metadata could not be locked for mapreduce, already locked by " << dlk.other() << endl; - sleepmillis(100); - } - } - } - catch( LockException& e ){ - errmsg = str::stream() << "error locking distributed lock for mapreduce " << causedBy( e ); - return false; + Status lockStatus = nsLock.acquire(-1 /* retry indefinitely */); + if (!lockStatus.isOK()) { + return appendCommandStatus(result, lockStatus); } BSONObj finalCmdObj = finalCmd.obj(); diff --git a/src/mongo/s/distlock.cpp b/src/mongo/s/distlock.cpp index 7ae4749c092..1329afcda4f 100644 --- a/src/mongo/s/distlock.cpp +++ b/src/mongo/s/distlock.cpp @@ -1213,7 +1213,7 @@ namespace mongo { if (waitForMillis == 0) break; - if (lastStatus.code() == ErrorCodes::DistributedClockSkewed) { + if (lastStatus != ErrorCodes::LockBusy) { return lastStatus; } diff --git a/src/mongo/s/distlock.h b/src/mongo/s/distlock.h index 1b52039129c..ec90cda85c2 100644 --- a/src/mongo/s/distlock.h +++ b/src/mongo/s/distlock.h @@ -235,83 +235,6 @@ namespace mongo { bool MONGO_CLIENT_API isLockPingerEnabled(); void MONGO_CLIENT_API setLockPingerEnabled(bool enabled); - - class MONGO_CLIENT_API dist_lock_try { - public: - - dist_lock_try() : _lock(NULL), _got(false) {} - - dist_lock_try( const dist_lock_try& that ) : - _lock(that._lock), _got(that._got), _other(that._other) { - - _other.getOwned(); - - // Make sure the lock ownership passes to this object, - // so we only unlock once. - ((dist_lock_try&) that)._got = false; - ((dist_lock_try&) that)._lock = NULL; - ((dist_lock_try&) that)._other = BSONObj(); - } - - // Needed so we can handle lock exceptions in context of lock try. - dist_lock_try& operator=( const dist_lock_try& that ){ - - if( this == &that ) return *this; - - _lock = that._lock; - _got = that._got; - _other = that._other; - _other.getOwned(); - _why = that._why; - - // Make sure the lock ownership passes to this object, - // so we only unlock once. - ((dist_lock_try&) that)._got = false; - ((dist_lock_try&) that)._lock = NULL; - ((dist_lock_try&) that)._other = BSONObj(); - - return *this; - } - - dist_lock_try( DistributedLock * lock , const std::string& why, double timeout = 0.0 ) - : _lock(lock), _why(why) { - _got = _lock->lock_try( why , false , &_other, timeout ); - } - - ~dist_lock_try() { - if ( _got ) { - verify( ! _other.isEmpty() ); - _lock->unlock( &_other ); - } - } - - /** - * Returns not OK if the lock is known _not_ to be held. - */ - Status checkStatus(double timeout) { - if ( !_lock ) { - return Status(ErrorCodes::LockFailed, "Lock is not currently set up"); - } - - if ( !_got ) { - return Status(ErrorCodes::LockFailed, - str::stream() << "Lock " << _lock->_name << " is currently held by " - << _other); - } - - return _lock->checkStatus(timeout); - } - - bool got() const { return _got; } - BSONObj other() const { return _other; } - - private: - DistributedLock * _lock; - bool _got; - BSONObj _other; - std::string _why; - }; - /** * Scoped wrapper for a distributed lock acquisition attempt. One or more attempts to acquire * the distributed lock are managed by this class, and the distributed lock is unlocked if @@ -343,8 +266,9 @@ namespace mongo { void unlock(); /** - * Tries multiple times to unlock the lock, using the specified lock try interval, until - * a certain amount of time has passed. + * Tries multiple times to lock, using the specified lock try interval, until + * a certain amount of time has passed or when any error that is not LockBusy + * occurred. * * waitForMillis = 0 indicates there should only be one attempt to acquire the lock, and * no waiting.