From bcf0b72b4afcf2cc54feb14300309351f6c3ee69 Mon Sep 17 00:00:00 2001 From: Spencer T Brody Date: Wed, 6 Jul 2011 17:44:53 -0400 Subject: [PATCH] Part 2 of SERVER-1097: Make mongoexport export valid CSV --- jstests/tool/csvexport1.js | 45 +++++++++++++++++++++++++ jstests/tool/csvexport2.js | 31 +++++++++++++++++ tools/export.cpp | 69 +++++++++++++++++++++++++++++++++++++- util/time_support.h | 10 ++++++ 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 jstests/tool/csvexport1.js create mode 100644 jstests/tool/csvexport2.js diff --git a/jstests/tool/csvexport1.js b/jstests/tool/csvexport1.js new file mode 100644 index 00000000000..1146f3aa471 --- /dev/null +++ b/jstests/tool/csvexport1.js @@ -0,0 +1,45 @@ +// csvexport1.js + +t = new ToolTest( "csvexport1" ) + +c = t.startDB( "foo" ); + +assert.eq( 0 , c.count() , "setup1" ); + +objId = ObjectId() + +c.insert({ a : new NumberInt(1) , b : objId , c: [1, 2, 3], d : {a : "hello", b : "world"} , e: '-'}) +c.insert({ a : -2.0, c : MinKey, d : "Then he said, \"Hello World!\"", e : new NumberLong(3)}) +c.insert({ a : new BinData(0, "1234"), b : ISODate("2009-08-27"), c : new Timestamp(1234, 9876), d : /foo*\"bar\"/i, e : function foo() { print("Hello World!"); }}) + +assert.eq( 3 , c.count() , "setup2" ); + +t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b,c,d,e") + + +c.drop() + +assert.eq( 0 , c.count() , "after drop" ) + +t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline"); + +assert.soon ( 3 + " == c.count()", "after import"); + +// Note: Exporting and Importing to/from CSV is not designed to be round-trippable +expected = [] +expected.push({ a : 1, b : "ObjectID(" + objId.toString() + ")", c : "[ 1, 2, 3 ]", d : "{ \"a\" : \"hello\", \"b\" : \"world\" }", e : "-"}) +expected.push({ a : -2.0, b : "", c : "$MinKey", d : "Then he said, \"Hello World!\"", e : 3}) +expected.push({ a : "D76DF8", b : "2009-08-27T00:00:00Z", c : "{ \"t\" : 1000 , \"i\" : 9876 }", d : "/foo*\\\"bar\\\"/i", e : "function foo() {\n print(\"Hello World!\");\n}"}) + +actual = [] +actual.push(c.find({a : 1}).toArray()[0]); +actual.push(c.find({a : -2.0}).toArray()[0]); +actual.push(c.find({a : "D76DF8"}).toArray()[0]); + +for (i = 0; i < expected.length; i++) { + delete actual[i]._id + assert.eq( expected[i], actual[i], "CSV export " + i); +} + + +t.stop() \ No newline at end of file diff --git a/jstests/tool/csvexport2.js b/jstests/tool/csvexport2.js new file mode 100644 index 00000000000..3e0dd2c6829 --- /dev/null +++ b/jstests/tool/csvexport2.js @@ -0,0 +1,31 @@ +// csvexport2.js + +t = new ToolTest( "csvexport2" ) + +c = t.startDB( "foo" ); + +// This test is designed to test exporting of a CodeWithScope object. +// However, due to SERVER-3391, it is not possible to create a CodeWithScope object in the mongo shell, +// therefore this test does not work. Once SERVER-3391 is resolved, this test should be un-commented out + +//assert.eq( 0 , c.count() , "setup1" ); + +//c.insert({ a : 1 , b : Code("print(\"Hello \" + x);", {"x" : "World!"})}) +//assert.eq( 1 , c.count() , "setup2" ); +//t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b") + + +//c.drop() + +//assert.eq( 0 , c.count() , "after drop" ) +//t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline"); +//assert.soon ( 1 + " == c.count()", "after import"); + +//expected = { a : 1, b : "\"{ \"$code\" : print(\"Hello \" + x); , \"$scope\" : { \"x\" : \"World!\" } }"}; +//actual = c.findOne() + +//delete actual._id; +//assert.eq( expected, actual ); + + +t.stop() \ No newline at end of file diff --git a/tools/export.cpp b/tools/export.cpp index fb32a9e58ff..ac1d7289a94 100644 --- a/tools/export.cpp +++ b/tools/export.cpp @@ -45,6 +45,73 @@ public: _usesstdout = false; } + // Turn every double quote character into two double quote characters + // If hasSurroundingQuotes is true, doesn't escape the first and last + // characters of the string, if it's false, add a double quote character + // around the whole string. + string csvEscape(string str, bool hasSurroundingQuotes = false) { + int index = hasSurroundingQuotes ? 1 : 0; + while (((index = str.find('"', index)) != string::npos) + && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) { + str.replace(index, 1, "\"\""); + index += 2; + } + return hasSurroundingQuotes ? str : "\"" + str + "\""; + } + + // Gets the string representation of a BSON object that can be correctly written to a CSV file + string csvString (const BSONElement& object) { + const char* binData; // Only used with BinData type + + switch (object.type()) { + case MinKey: + return "$MinKey"; + case MaxKey: + return "$MaxKey"; + case NumberInt: + case NumberDouble: + case NumberLong: + case Bool: + return object.toString(false); + case String: + case Symbol: + return csvEscape(object.toString(false), true); + case Object: + return csvEscape(object.jsonString(Strict, false)); + case Array: + return csvEscape(object.jsonString(Strict, false)); + case BinData: + int len; + binData = object.binDataClean(len); + return toHex(binData, len); + case jstOID: + return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes + case Date: + return timeToISOString(object.Date() / 1000); + case Timestamp: + return csvEscape(object.jsonString(Strict, false)); + case RegEx: + return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags())); + case Code: + return csvEscape(object.toString(false)); + case CodeWScope: + if (string(object.codeWScopeScopeData()) == "") { + return csvEscape(object.toString(false)); + } else { + return csvEscape(object.jsonString(Strict, false)); + } + case EOO: + case Undefined: + case DBRef: + case jstNULL: + cerr << "Invalid BSON object type for CSV output: " << object.type() << endl; + return ""; + } + // Can never get here + assert(false); + return ""; + } + int run() { string ns; const bool csv = hasParam( "csv" ); @@ -137,7 +204,7 @@ public: out << ","; const BSONElement & e = obj.getFieldDotted(i->c_str()); if ( ! e.eoo() ) { - out << e.jsonString( Strict , false ); + out << csvString(e); } } out << endl; diff --git a/util/time_support.h b/util/time_support.h index ce2cdbc0e15..ca17807ec96 100644 --- a/util/time_support.h +++ b/util/time_support.h @@ -52,6 +52,16 @@ namespace mongo { return buf; } + inline string timeToISOString(time_t time) { + struct tm t; + time_t_to_Struct( time, &t ); + + const char* fmt = "%Y-%m-%dT%H:%M:%SZ"; + char buf[32]; + assert(strftime(buf, sizeof(buf), fmt, &t) == 20); + return buf; + } + inline boost::gregorian::date currentDate() { boost::posix_time::ptime now = boost::posix_time::second_clock::local_time(); return now.date();