0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 09:06:21 +01:00
mongodb/tools/tool.cpp
2011-07-26 11:42:51 -04:00

509 lines
16 KiB
C++

/*
* Copyright (C) 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Tool.cpp
#include "tool.h"
#include <iostream>
#include <boost/filesystem/operations.hpp>
#include <pcrecpp.h>
#include "util/file_allocator.h"
#include "util/password.h"
#include "util/version.h"
using namespace std;
using namespace mongo;
namespace po = boost::program_options;
namespace mongo {
CmdLine cmdLine;
Tool::Tool( string name , DBAccess access , string defaultDB ,
string defaultCollection , bool usesstdout ) :
_name( name ) , _db( defaultDB ) , _coll( defaultCollection ) ,
_usesstdout(usesstdout), _noconnection(false), _autoreconnect(false), _conn(0), _slaveConn(0), _paired(false) {
_options = new po::options_description( "options" );
_options->add_options()
("help","produce help message")
("verbose,v", "be more verbose (include multiple times for more verbosity e.g. -vvvvv)")
("version", "print the program's version and exit" )
;
if ( access & REMOTE_SERVER )
_options->add_options()
("host,h",po::value<string>(), "mongo host to connect to ( <set name>/s1,s2 for sets)" )
("port",po::value<string>(), "server port. Can also use --host hostname:port" )
("ipv6", "enable IPv6 support (disabled by default)")
("username,u",po::value<string>(), "username" )
("password,p", new PasswordValue( &_password ), "password" )
;
if ( access & LOCAL_SERVER )
_options->add_options()
("dbpath",po::value<string>(), "directly access mongod database "
"files in the given path, instead of connecting to a mongod "
"server - needs to lock the data directory, so cannot be "
"used if a mongod is currently accessing the same path" )
("directoryperdb", "if dbpath specified, each db is in a separate directory" )
("journal", "enable journaling" )
;
if ( access & SPECIFY_DBCOL )
_options->add_options()
("db,d",po::value<string>(), "database to use" )
("collection,c",po::value<string>(), "collection to use (some commands)" )
;
_hidden_options = new po::options_description( name + " hidden options" );
/* support for -vv -vvvv etc. */
for (string s = "vv"; s.length() <= 10; s.append("v")) {
_hidden_options->add_options()(s.c_str(), "verbose");
}
}
Tool::~Tool() {
delete( _options );
delete( _hidden_options );
if ( _conn )
delete _conn;
}
void Tool::printHelp(ostream &out) {
printExtraHelp(out);
_options->print(out);
printExtraHelpAfter(out);
}
void Tool::printVersion(ostream &out) {
out << _name << " version " << mongo::versionString;
if (mongo::versionString[strlen(mongo::versionString)-1] == '-')
out << " (commit " << mongo::gitVersion() << ")";
out << endl;
}
int Tool::main( int argc , char ** argv ) {
static StaticObserver staticObserver;
cmdLine.prealloc = false;
// The default value may vary depending on compile options, but for tools
// we want durability to be disabled.
cmdLine.dur = false;
#if( BOOST_VERSION >= 104500 )
boost::filesystem::path::default_name_check( boost::filesystem2::no_check );
#else
boost::filesystem::path::default_name_check( boost::filesystem::no_check );
#endif
_name = argv[0];
/* using the same style as db.cpp */
int command_line_style = (((po::command_line_style::unix_style ^
po::command_line_style::allow_guessing) |
po::command_line_style::allow_long_disguise) ^
po::command_line_style::allow_sticky);
try {
po::options_description all_options("all options");
all_options.add(*_options).add(*_hidden_options);
po::store( po::command_line_parser( argc , argv ).
options(all_options).
positional( _positonalOptions ).
style(command_line_style).run() , _params );
po::notify( _params );
}
catch (po::error &e) {
cerr << "ERROR: " << e.what() << endl << endl;
printHelp(cerr);
return EXIT_BADOPTIONS;
}
// hide password from ps output
for (int i=0; i < (argc-1); ++i) {
if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--password")) {
char* arg = argv[i+1];
while (*arg) {
*arg++ = 'x';
}
}
}
if ( _params.count( "help" ) ) {
printHelp(cout);
return 0;
}
if ( _params.count( "version" ) ) {
printVersion(cout);
return 0;
}
if ( _params.count( "verbose" ) ) {
logLevel = 1;
}
for (string s = "vv"; s.length() <= 10; s.append("v")) {
if (_params.count(s)) {
logLevel = s.length();
}
}
preSetup();
bool useDirectClient = hasParam( "dbpath" );
if ( ! useDirectClient ) {
_host = "127.0.0.1";
if ( _params.count( "host" ) )
_host = _params["host"].as<string>();
if ( _params.count( "port" ) )
_host += ':' + _params["port"].as<string>();
if ( _noconnection ) {
// do nothing
}
else {
string errmsg;
ConnectionString cs = ConnectionString::parse( _host , errmsg );
if ( ! cs.isValid() ) {
cerr << "invalid hostname [" << _host << "] " << errmsg << endl;
return -1;
}
_conn = cs.connect( errmsg );
if ( ! _conn ) {
cerr << "couldn't connect to [" << _host << "] " << errmsg << endl;
return -1;
}
(_usesstdout ? cout : cerr ) << "connected to: " << _host << endl;
}
}
else {
if ( _params.count( "directoryperdb" ) ) {
directoryperdb = true;
}
assert( lastError.get( true ) );
if (_params.count("journal")){
cmdLine.dur = true;
}
Client::initThread("tools");
_conn = new DBDirectClient();
_host = "DIRECT";
static string myDbpath = getParam( "dbpath" );
dbpath = myDbpath.c_str();
try {
acquirePathLock();
}
catch ( DBException& ) {
cerr << endl << "If you are running a mongod on the same "
"path you should connect to that instead of direct data "
"file access" << endl << endl;
dbexit( EXIT_CLEAN );
return -1;
}
FileAllocator::get()->start();
dur::startup();
}
if ( _params.count( "db" ) )
_db = _params["db"].as<string>();
if ( _params.count( "collection" ) )
_coll = _params["collection"].as<string>();
if ( _params.count( "username" ) )
_username = _params["username"].as<string>();
if ( _params.count( "password" )
&& ( _password.empty() ) ) {
_password = askPassword();
}
if (_params.count("ipv6"))
enableIPv6();
int ret = -1;
try {
ret = run();
}
catch ( DBException& e ) {
cerr << "assertion: " << e.toString() << endl;
ret = -1;
}
catch(const boost::filesystem::filesystem_error &fse) {
/*
https://jira.mongodb.org/browse/SERVER-2904
Simple tools that don't access the database, such as
bsondump, aren't throwing DBExceptions, but are throwing
boost exceptions.
The currently available set of error codes don't seem to match
boost documentation. boost::filesystem::not_found_error
(from http://www.boost.org/doc/libs/1_31_0/libs/filesystem/doc/exception.htm)
doesn't seem to exist in our headers. Also, fse.code() isn't
boost::system::errc::no_such_file_or_directory when this
happens, as you would expect. And, determined from
experimentation that the command-line argument gets turned into
"\\?" instead of "/?" !!!
*/
#if defined(_WIN32)
if (/*(fse.code() == boost::system::errc::no_such_file_or_directory) &&*/
(fse.path1() == "\\?"))
printHelp(cerr);
else
#endif // _WIN32
cerr << "error: " << fse.what() << endl;
ret = -1;
}
if ( currentClient.get() )
currentClient->shutdown();
if ( useDirectClient )
dbexit( EXIT_CLEAN );
return ret;
}
DBClientBase& Tool::conn( bool slaveIfPaired ) {
if ( slaveIfPaired && _conn->type() == ConnectionString::SET ) {
if (!_slaveConn)
_slaveConn = &((DBClientReplicaSet*)_conn)->slaveConn();
return *_slaveConn;
}
return *_conn;
}
bool Tool::isMaster() {
if ( hasParam("dbpath") ) {
return true;
}
BSONObj info;
bool isMaster;
bool ok = conn().isMaster(isMaster, &info);
if (ok && !isMaster) {
cerr << "ERROR: trying to write to non-master " << conn().toString() << endl;
cerr << "isMaster info: " << info << endl;
return false;
}
return true;
}
void Tool::addFieldOptions() {
add_options()
("fields,f" , po::value<string>() , "comma separated list of field names e.g. -f name,age" )
("fieldFile" , po::value<string>() , "file with fields names - 1 per line" )
;
}
void Tool::needFields() {
if ( hasParam( "fields" ) ) {
BSONObjBuilder b;
string fields_arg = getParam("fields");
pcrecpp::StringPiece input(fields_arg);
string f;
pcrecpp::RE re("([#\\w\\.\\s\\-]+),?" );
while ( re.Consume( &input, &f ) ) {
_fields.push_back( f );
b.append( f , 1 );
}
_fieldsObj = b.obj();
return;
}
if ( hasParam( "fieldFile" ) ) {
string fn = getParam( "fieldFile" );
if ( ! exists( fn ) )
throw UserException( 9999 , ((string)"file: " + fn ) + " doesn't exist" );
const int BUF_SIZE = 1024;
char line[ 1024 + 128];
ifstream file( fn.c_str() );
BSONObjBuilder b;
while ( file.rdstate() == ios_base::goodbit ) {
file.getline( line , BUF_SIZE );
const char * cur = line;
while ( isspace( cur[0] ) ) cur++;
if ( cur[0] == '\0' )
continue;
_fields.push_back( cur );
b.append( cur , 1 );
}
_fieldsObj = b.obj();
return;
}
throw UserException( 9998 , "you need to specify fields" );
}
void Tool::auth( string dbname ) {
if ( ! dbname.size() )
dbname = _db;
if ( ! ( _username.size() || _password.size() ) ) {
// Make sure that we don't need authentication to connect to this db
// findOne throws an AssertionException if it's not authenticated.
if (_coll.size() > 0) {
// BSONTools don't have a collection
conn().findOne(getNS(), Query("{}"));
}
return;
}
string errmsg;
if ( _conn->auth( dbname , _username , _password , errmsg ) )
return;
// try against the admin db
string err2;
if ( _conn->auth( "admin" , _username , _password , errmsg ) )
return;
throw UserException( 9997 , (string)"auth failed: " + errmsg );
}
BSONTool::BSONTool( const char * name, DBAccess access , bool objcheck )
: Tool( name , access , "" , "" , false ) , _objcheck( objcheck ) {
add_options()
("objcheck" , "validate object before inserting" )
("filter" , po::value<string>() , "filter to apply before inserting" )
;
}
int BSONTool::run() {
_objcheck = hasParam( "objcheck" );
if ( hasParam( "filter" ) )
_matcher.reset( new Matcher( fromjson( getParam( "filter" ) ) ) );
return doRun();
}
long long BSONTool::processFile( const path& root ) {
_fileName = root.string();
unsigned long long fileLength = file_size( root );
if ( fileLength == 0 ) {
out() << "file " << _fileName << " empty, skipping" << endl;
return 0;
}
FILE* file = fopen( _fileName.c_str() , "rb" );
if ( ! file ) {
log() << "error opening file: " << _fileName << endl;
return 0;
}
#if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL)
posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL);
#endif
log(1) << "\t file size: " << fileLength << endl;
unsigned long long read = 0;
unsigned long long num = 0;
unsigned long long processed = 0;
const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 );
boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
char * buf = buf_holder.get();
ProgressMeter m( fileLength );
while ( read < fileLength ) {
size_t amt = fread(buf, 1, 4, file);
assert( amt == 4 );
int size = ((int*)buf)[0];
uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE );
amt = fread(buf+4, 1, size-4, file);
assert( amt == (size_t)( size - 4 ) );
BSONObj o( buf );
if ( _objcheck && ! o.valid() ) {
cerr << "INVALID OBJECT - going try and pring out " << endl;
cerr << "size: " << size << endl;
BSONObjIterator i(o);
while ( i.more() ) {
BSONElement e = i.next();
try {
e.validate();
}
catch ( ... ) {
cerr << "\t\t NEXT ONE IS INVALID" << endl;
}
cerr << "\t name : " << e.fieldName() << " " << e.type() << endl;
cerr << "\t " << e << endl;
}
}
if ( _matcher.get() == 0 || _matcher->matches( o ) ) {
gotObject( o );
processed++;
}
read += o.objsize();
num++;
m.hit( o.objsize() );
}
fclose( file );
uassert( 10265 , "counts don't match" , m.done() == fileLength );
(_usesstdout ? cout : cerr ) << m.hits() << " objects found" << endl;
if ( _matcher.get() )
(_usesstdout ? cout : cerr ) << processed << " objects processed" << endl;
return processed;
}
void setupSignals( bool inFork ) {}
}