// Tool.cpp
#include "tool.h"
#include <iostream>
#include <boost/filesystem/operations.hpp>
#include <pcrecpp.h>
#include "util/file_allocator.h"
#include "util/password.h"
using namespace std;
using namespace mongo;
namespace po = boost::program_options;
namespace mongo {
CmdLine cmdLine;
Tool::Tool( string name , DBAccess access , string defaultDB ,
string defaultCollection , bool usesstdout ) :
_name( name ) , _db( defaultDB ) , _coll( defaultCollection ) ,
_usesstdout(usesstdout), _noconnection(false), _autoreconnect(false), _conn(0), _slaveConn(0), _paired(false) {
_options = new po::options_description( "options" );
("help","produce help message")
("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
if ( access & REMOTE_SERVER )
("host,h",po::value<string>(), "mongo host to connect to (\"left,right\" for pairs)" )
("port",po::value<string>(), "server port. Can also use --host hostname:port" )
("ipv6", "enable IPv6 support (disabled by default)")
("username,u",po::value<string>(), "username" )
("password,p", new PasswordValue( &_password ), "password" )
if ( access & LOCAL_SERVER )
("dbpath",po::value<string>(), "directly access mongod database "
"files in the given path, instead of connecting to a mongod "
"server - needs to lock the data directory, so cannot be "
"used if a mongod is currently accessing the same path" )
("directoryperdb", "if dbpath specified, each db is in a separate directory" )
if ( access & SPECIFY_DBCOL )
("db,d",po::value<string>(), "database to use" )
("collection,c",po::value<string>(), "collection to use (some commands)" )
_hidden_options = new po::options_description( name + " hidden options" );
/* support for -vv -vvvv etc. */
for (string s = "vv"; s.length() <= 10; s.append("v")) {
_hidden_options->add_options()(s.c_str(), "verbose");
Tool::~Tool() {
delete( _options );
delete( _hidden_options );
if ( _conn )
delete _conn;
void Tool::printHelp(ostream &out) {
int Tool::main( int argc , char ** argv ) {
static StaticObserver staticObserver;
cmdLine.prealloc = false;
// The default value may vary depending on compile options, but for tools
// we want durability to be disabled.
cmdLine.dur = false;
boost::filesystem::path::default_name_check( boost::filesystem::no_check );
_name = argv[0];
/* using the same style as db.cpp */
int command_line_style = (((po::command_line_style::unix_style ^
po::command_line_style::allow_guessing) |
po::command_line_style::allow_long_disguise) ^
try {
po::options_description all_options("all options");
po::store( po::command_line_parser( argc , argv ).
positional( _positonalOptions ).
style(command_line_style).run() , _params );
po::notify( _params );
catch (po::error &e) {
cerr << "ERROR: " << e.what() << endl << endl;
// hide password from ps output
for (int i=0; i < (argc-1); ++i) {
if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")) {
char* arg = argv[i+1];
while (*arg) {
*arg++ = 'x';
if ( _params.count( "help" ) ) {
return 0;
if ( _params.count( "verbose" ) ) {
logLevel = 1;
for (string s = "vv"; s.length() <= 10; s.append("v")) {
if (_params.count(s)) {
logLevel = s.length();
bool useDirectClient = hasParam( "dbpath" );
if ( ! useDirectClient ) {
_host = "";
if ( _params.count( "host" ) )
_host = _params["host"].as<string>();
if ( _params.count( "port" ) )
_host += ':' + _params["port"].as<string>();
if ( _noconnection ) {
// do nothing
else {
string errmsg;
ConnectionString cs = ConnectionString::parse( _host , errmsg );
if ( ! cs.isValid() ) {
cerr << "invalid hostname [" << _host << "] " << errmsg << endl;
return -1;
_conn = cs.connect( errmsg );
if ( ! _conn ) {
cerr << "couldn't connect to [" << _host << "] " << errmsg << endl;
return -1;
(_usesstdout ? cout : cerr ) << "connected to: " << _host << endl;
else {
if ( _params.count( "directoryperdb" ) ) {
directoryperdb = true;
assert( lastError.get( true ) );
_conn = new DBDirectClient();
_host = "DIRECT";
static string myDbpath = getParam( "dbpath" );
dbpath = myDbpath.c_str();
try {
catch ( DBException& ) {
cerr << endl << "If you are running a mongod on the same "
"path you should connect to that instead of direct data "
"file access" << endl << endl;
dbexit( EXIT_CLEAN );
return -1;
if ( _params.count( "db" ) )
_db = _params["db"].as<string>();
if ( _params.count( "collection" ) )
_coll = _params["collection"].as<string>();
if ( _params.count( "username" ) )
_username = _params["username"].as<string>();
if ( _params.count( "password" )
&& ( _password.empty() ) ) {
_password = askPassword();
if (_params.count("ipv6"))
int ret = -1;
try {
ret = run();
catch ( DBException& e ) {
cerr << "assertion: " << e.toString() << endl;
ret = -1;
if ( currentClient.get() )
if ( useDirectClient )
dbexit( EXIT_CLEAN );
return ret;
DBClientBase& Tool::conn( bool slaveIfPaired ) {
if ( slaveIfPaired && _conn->type() == ConnectionString::SET ) {
if (!_slaveConn)
_slaveConn = &((DBClientReplicaSet*)_conn)->slaveConn();
return *_slaveConn;
return *_conn;
bool Tool::isMaster() {
if ( hasParam("dbpath") ) {
return true;
BSONObj info;
bool isMaster;
bool ok = conn().isMaster(isMaster, &info);
if (ok && !isMaster) {
cerr << "ERROR: trying to write to non-master " << conn().toString() << endl;
cerr << "isMaster info: " << info << endl;
return false;
return true;
void Tool::addFieldOptions() {
("fields,f" , po::value<string>() , "comma separated list of field names e.g. -f name,age" )
("fieldFile" , po::value<string>() , "file with fields names - 1 per line" )
void Tool::needFields() {
if ( hasParam( "fields" ) ) {
BSONObjBuilder b;
string fields_arg = getParam("fields");
pcrecpp::StringPiece input(fields_arg);
string f;
pcrecpp::RE re("([#\\w\\.\\s\\-]+),?" );
while ( re.Consume( &input, &f ) ) {
_fields.push_back( f );
b.append( f , 1 );
_fieldsObj = b.obj();
if ( hasParam( "fieldFile" ) ) {
string fn = getParam( "fieldFile" );
if ( ! exists( fn ) )
throw UserException( 9999 , ((string)"file: " + fn ) + " doesn't exist" );
const int BUF_SIZE = 1024;
char line[ 1024 + 128];
ifstream file( fn.c_str() );
BSONObjBuilder b;
while ( file.rdstate() == ios_base::goodbit ) {
file.getline( line , BUF_SIZE );
const char * cur = line;
while ( isspace( cur[0] ) ) cur++;
if ( cur[0] == '\0' )
_fields.push_back( cur );
b.append( cur , 1 );
_fieldsObj = b.obj();
throw UserException( 9998 , "you need to specify fields" );
void Tool::auth( string dbname ) {
if ( ! dbname.size() )
dbname = _db;
if ( ! ( _username.size() || _password.size() ) )
string errmsg;
if ( _conn->auth( dbname , _username , _password , errmsg ) )
// try against the admin db
string err2;
if ( _conn->auth( "admin" , _username , _password , errmsg ) )
throw UserException( 9997 , (string)"auth failed: " + errmsg );
BSONTool::BSONTool( const char * name, DBAccess access , bool objcheck )
: Tool( name , access , "" , "" ) , _objcheck( objcheck ) {
("objcheck" , "validate object before inserting" )
("filter" , po::value<string>() , "filter to apply before inserting" )
int BSONTool::run() {
_objcheck = hasParam( "objcheck" );
if ( hasParam( "filter" ) )
_matcher.reset( new Matcher( fromjson( getParam( "filter" ) ) ) );
return doRun();
long long BSONTool::processFile( const path& root ) {
_fileName = root.string();
unsigned long long fileLength = file_size( root );
if ( fileLength == 0 ) {
out() << "file " << _fileName << " empty, skipping" << endl;
return 0;
FILE* file = fopen( _fileName.c_str() , "rb" );
if ( ! file ) {
log() << "error opening file: " << _fileName << endl;
return 0;
#if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL)
posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL);
log(1) << "\t file size: " << fileLength << endl;
unsigned long long read = 0;
unsigned long long num = 0;
unsigned long long processed = 0;
const int BUF_SIZE = 1024 * 1024 * 5;
boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
char * buf = buf_holder.get();
ProgressMeter m( fileLength );
while ( read < fileLength ) {
int readlen = fread(buf, 4, 1, file);
int size = ((int*)buf)[0];
if ( size >= BUF_SIZE ) {
cerr << "got an object of size: " << size << " terminating..." << endl;
uassert( 10264 , "invalid object size" , size < BUF_SIZE );
readlen = fread(buf+4, size-4, 1, file);
BSONObj o( buf );
if ( _objcheck && ! o.valid() ) {
cerr << "INVALID OBJECT - going try and pring out " << endl;
cerr << "size: " << size << endl;
BSONObjIterator i(o);
while ( i.more() ) {
BSONElement e = i.next();
try {
catch ( ... ) {
cerr << "\t\t NEXT ONE IS INVALID" << endl;
cerr << "\t name : " << e.fieldName() << " " << e.type() << endl;
cerr << "\t " << e << endl;
if ( _matcher.get() == 0 || _matcher->matches( o ) ) {
gotObject( o );
read += o.objsize();
m.hit( o.objsize() );
fclose( file );
uassert( 10265 , "counts don't match" , m.done() == fileLength );
out() << "\t " << m.hits() << " objects found" << endl;
if ( _matcher.get() )
out() << "\t " << processed << " objects processed" << endl;
return processed;
void setupSignals( bool inFork ) {}