0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 17:10:48 +01:00
mongodb/db/cloner.cpp
Eliot Horowitz 206b92fce3 merge conflict checkpoint
some cloneCollection fixes
some moveShard work
2009-02-20 12:47:59 -05:00

349 lines
13 KiB
C++

// cloner.cpp - copy a database (export/import basically)
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "stdafx.h"
#include "pdfile.h"
#include "../client/dbclient.h"
#include "../util/builder.h"
#include "jsobj.h"
#include "query.h"
#include "commands.h"
#include "db.h"
#include "instance.h"
#include "repl.h"
namespace mongo {
void ensureHaveIdIndex(const char *ns);
extern int port;
bool replAuthenticate(DBClientConnection *);
class Cloner: boost::noncopyable {
auto_ptr< DBClientInterface > conn;
void copy(const char *from_ns, const char *to_ns, bool isindex, bool logForRepl,
bool masterSameProcess, bool slaveOk, BSONObj query = emptyObj);
public:
Cloner() { }
/* slaveOk - if true it is ok if the source of the data is !ismaster.
useReplAuth - use the credentials we normally use as a replication slave for the cloning
*/
bool go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth);
bool cloneCollection( const char *fromhost, const char *ns, BSONObj query, string& errmsg, bool logForRepl, bool copyIndexes );
};
/* for index info object:
{ "name" : "name_1" , "ns" : "foo.index3" , "key" : { "name" : 1.0 } }
we need to fix up the value in the "ns" parameter so that the name prefix is correct on a
copy to a new name.
*/
BSONObj fixindex(BSONObj o) {
BSONObjBuilder b;
BSONObjIterator i(o);
while ( i.more() ) {
BSONElement e = i.next();
if ( e.eoo() )
break;
if ( string("ns") == e.fieldName() ) {
uassert("bad ns field for index during dbcopy", e.type() == String);
const char *p = strchr(e.valuestr(), '.');
uassert("bad ns field for index during dbcopy [2]", p);
string newname = database->name + p;
b.append("ns", newname);
}
else
b.append(e);
}
BSONObj res= b.obj();
/* if( mod ) {
out() << "before: " << o.toString() << endl;
o.dump();
out() << "after: " << res.toString() << endl;
res.dump();
}*/
return res;
}
/* copy the specified collection
isindex - if true, this is system.indexes collection, in which we do some transformation when copying.
*/
void Cloner::copy(const char *from_collection, const char *to_collection, bool isindex, bool logForRepl, bool masterSameProcess, bool slaveOk, BSONObj query) {
auto_ptr<DBClientCursor> c;
{
dbtemprelease r;
c = conn->query( from_collection, query, 0, 0, 0, slaveOk ? Option_SlaveOk : 0 );
}
assert( c.get() );
while ( 1 ) {
{
dbtemprelease r;
if ( !c->more() )
break;
}
BSONObj tmp = c->next();
/* assure object is valid. note this will slow us down a good bit. */
if ( !tmp.valid() ) {
out() << "skipping corrupt object from " << from_collection << '\n';
continue;
}
BSONObj js = tmp;
if ( isindex ) {
assert( strstr(from_collection, "system.indexes") );
js = fixindex(tmp);
}
try {
theDataFileMgr.insert(to_collection, js);
if ( logForRepl )
logOp("i", to_collection, js);
}
catch( UserException& e ) {
log() << "warning: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n';
}
}
}
bool Cloner::go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth) {
massert( "useReplAuth is not written to replication log", !useReplAuth || !logForRepl );
string todb = database->name;
stringstream a,b;
a << "localhost:" << port;
b << "127.0.0.1:" << port;
bool masterSameProcess = ( a.str() == masterHost || b.str() == masterHost );
if ( masterSameProcess ) {
if ( fromdb == todb && database->path == dbpath ) {
// guard against an "infinite" loop
/* if you are replicating, the local.sources config may be wrong if you get this */
errmsg = "can't clone from self (localhost).";
return false;
}
}
/* todo: we can put thesee releases inside dbclient or a dbclient specialization.
or just wait until we get rid of global lock anyway.
*/
string ns = fromdb + ".system.namespaces";
auto_ptr<DBClientCursor> c;
{
dbtemprelease r;
if ( !masterSameProcess ) {
auto_ptr< DBClientConnection > c( new DBClientConnection() );
if ( !c->connect( masterHost, errmsg ) )
return false;
if( !replAuthenticate(c.get()) )
return false;
conn = c;
} else {
conn = auto_ptr< DBClientInterface >( new DBDirectClient() );
}
c = conn->query( ns.c_str(), emptyObj, 0, 0, 0, slaveOk ? Option_SlaveOk : 0 );
}
if ( c.get() == 0 ) {
errmsg = "query failed " + ns;
return false;
}
while ( 1 ) {
{
dbtemprelease r;
if ( !c->more() )
break;
}
BSONObj collection = c->next();
BSONElement e = collection.findElement("name");
if ( e.eoo() ) {
string s = "bad system.namespaces object " + collection.toString();
/* temp
out() << masterHost << endl;
out() << ns << endl;
out() << e.toString() << endl;
exit(1);*/
massert(s.c_str(), false);
}
assert( !e.eoo() );
assert( e.type() == String );
const char *from_name = e.valuestr();
if( strstr(from_name, ".system.") ) {
/* system.users is cloned -- but nothing else from system. */
if( strstr(from_name, ".system.users") == 0 )
continue;
}
else if( strchr(from_name, '$') ) {
// don't clone index namespaces -- we take care of those separately below.
continue;
}
BSONObj options = collection.getObjectField("options");
/* change name "<fromdb>.collection" -> <todb>.collection */
const char *p = strchr(from_name, '.');
assert(p);
string to_name = todb + p;
//if( !options.isEmpty() )
{
string err;
const char *toname = to_name.c_str();
userCreateNS(toname, options, err, logForRepl);
/* chunks are big enough that we should create the _id index up front, that should
be faster. perhaps we should do that for everything? Not doing that yet -- not sure
how we want to handle _id-less collections, and we might not want to create the index
there.
*/
if ( strstr(toname, "._chunks") )
ensureHaveIdIndex(toname);
}
copy(from_name, to_name.c_str(), false, logForRepl, masterSameProcess, slaveOk);
}
// now build the indexes
string system_indexes_from = fromdb + ".system.indexes";
string system_indexes_to = todb + ".system.indexes";
copy(system_indexes_from.c_str(), system_indexes_to.c_str(), true, logForRepl, masterSameProcess, slaveOk);
return true;
}
bool Cloner::cloneCollection( const char *fromhost, const char *ns, BSONObj query, string &errmsg, bool logForRepl, bool copyIndexes ) {
{
dbtemprelease r;
auto_ptr< DBClientConnection > c( new DBClientConnection() );
if ( !c->connect( fromhost, errmsg ) )
return false;
if( !replAuthenticate(c.get()) )
return false;
conn = c;
}
copy( ns, ns, false, logForRepl, false, false, query );
if ( !copyIndexes )
return true;
char db[256];
nsToClient( ns, db );
string indexNs = string( db ) + ".system.indexes";
copy( indexNs.c_str(), indexNs.c_str(), true, logForRepl, false, false, BSON( "ns" << ns ) );
return true;
}
bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
bool slaveOk, bool useReplAuth)
{
Cloner c;
return c.go(masterHost, errmsg, fromdb, logForReplication, slaveOk, useReplAuth);
}
/* Usage:
mydb.$cmd.findOne( { clone: "fromhost" } );
*/
class CmdClone : public Command {
public:
virtual bool slaveOk() {
return false;
}
CmdClone() : Command("clone") { }
virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string from = cmdObj.getStringField("clone");
if ( from.empty() )
return false;
/* replication note: we must logOp() not the command, but the cloned data -- if the slave
were to clone it would get a different point-in-time and not match.
*/
return cloneFrom(from.c_str(), errmsg, database->name, /*logForReplication=*/!fromRepl, /*slaveok*/false, /*usereplauth*/false);
}
} cmdclone;
class CmdCloneCollection : public Command {
public:
virtual bool slaveOk() {
return false;
}
CmdCloneCollection() : Command("cloneCollection") { }
virtual void help( stringstream &help ) const {
help << " example: { cloneCollection: <collection ns>, from: <hostname>, query: <query> }";
}
virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("from");
if ( fromhost.empty() )
return false;
string collection = cmdObj.getStringField("cloneCollection");
if ( collection.empty() )
return false;
BSONObj query = cmdObj.getObjectField("query");
if ( query.isEmpty() )
query = emptyObj;
BSONElement copyIndexesSpec = cmdObj.getField("copyindexes");
bool copyIndexes = copyIndexesSpec.isBoolean() ? copyIndexesSpec.boolean() : true;
/* replication note: we must logOp() not the command, but the cloned data -- if the slave
were to clone it would get a different point-in-time and not match.
*/
setClient( collection.c_str() );
log() << "cloneCollection. db:" << ns << " collection:" << collection << " from: " << fromhost << " query: " << query << endl;
Cloner c;
return c.cloneCollection( fromhost.c_str(), collection.c_str(), query, errmsg, !fromRepl, copyIndexes );
}
} cmdclonecollection;
/* Usage:
admindb.$cmd.findOne( { copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db> } );
*/
class CmdCopyDb : public Command {
public:
CmdCopyDb() : Command("copydb") { }
virtual bool adminOnly() {
return true;
}
virtual bool slaveOk() {
return false;
}
virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("fromhost");
if ( fromhost.empty() ) {
/* copy from self */
stringstream ss;
ss << "localhost:" << port;
fromhost = ss.str();
}
string fromdb = cmdObj.getStringField("fromdb");
string todb = cmdObj.getStringField("todb");
if ( fromhost.empty() || todb.empty() || fromdb.empty() ) {
errmsg = "parms missing - {copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db>}";
return false;
}
setClient(todb.c_str());
bool res = cloneFrom(fromhost.c_str(), errmsg, fromdb, /*logForReplication=*/!fromRepl, /*slaveok*/false, /*replauth*/false);
database = 0;
return res;
}
} cmdcopydb;
} // namespace mongo