From 29f0d366407c2e24230be2dc7fd99a0ec5d43410 Mon Sep 17 00:00:00 2001 From: Dwight Date: Thu, 24 Sep 2009 17:10:45 -0400 Subject: [PATCH] more buildindex --- db/btree.cpp | 14 ++++++-- db/database.h | 2 +- db/db.sln | 1 - db/extsort.cpp | 4 +-- db/extsort.h | 2 +- db/pdfile.cpp | 93 +++++++++++++++++++++++++++++++++++++++----------- db/storage.h | 3 +- 7 files changed, 91 insertions(+), 28 deletions(-) diff --git a/db/btree.cpp b/db/btree.cpp index c97f4246bbe..9cf16b98815 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -913,7 +913,7 @@ namespace mongo { void BtreeBuilder::addKey(BSONObj& key, DiskLoc loc) { if( n > 0 ) { - int cmp = keyLast.woCompare(key); + int cmp = keyLast.woCompare(key, order); massert( "bad key order in BtreeBuilder - server internal error", cmp <= 0 ); if( cmp == 0 && !dupsAllowed ) uasserted( BtreeBucket::dupKeyError( idx , keyLast ) ); @@ -988,7 +988,17 @@ namespace mongo { } BtreeBuilder::~BtreeBuilder() { - /* TODO: ROLLBACK CODE */ + if( !committed ) { + log() << "TEMP ROLLING back partially built index space" << endl; + DiskLoc x = first; + while( !x.isNull() ) { + DiskLoc next = x.btree()->tempNext(); + btreeStore->deleteRecord(idx.indexNamespace().c_str(), x); + x = next; + } + assert( idx.head.isNull() ); + log() << "TEMP done rollback" << endl; + } } } diff --git a/db/database.h b/db/database.h index 0b802eb05c7..972220a544a 100644 --- a/db/database.h +++ b/db/database.h @@ -65,7 +65,7 @@ namespace mongo { if( n >= RecCache::Base && n <= RecCache::Base+1000 ) massert("getFile(): bad file number - using recstore db w/nonrecstore db build?", false); #endif - massert("getFile(): bad file number value (corrupt db?)", false); + massert("getFile(): bad file number value (corrupt db?): run repair", false); } DEV { if ( n > 100 ) diff --git a/db/db.sln b/db/db.sln index e1e28c7efa7..cd9aa061d47 100644 --- a/db/db.sln +++ b/db/db.sln @@ -21,7 +21,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "shell", "shell", "{2CABB3B8 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{2B262D59-9DC7-4BF1-A431-1BD4966899A5}" ProjectSection(SolutionItems) = preProject - ..\tools\dump.cpp = ..\tools\dump.cpp ..\tools\importJSON.cpp = ..\tools\importJSON.cpp EndProjectSection EndProject diff --git a/db/extsort.cpp b/db/extsort.cpp index 781a87a9bb5..db5317ca5fc 100644 --- a/db/extsort.cpp +++ b/db/extsort.cpp @@ -28,7 +28,7 @@ namespace mongo { BSONObjExternalSorter::BSONObjExternalSorter( const BSONObj & order , long maxFileSize ) - : _order( order ) , _maxFilesize( maxFileSize ) , + : _order( order.getOwned() ) , _maxFilesize( maxFileSize ) , _map(0), _mapSizeSoFar(0), _largestObject(0), _sorted(0){ stringstream rootpath; @@ -77,7 +77,7 @@ namespace mongo { _map = new InMemory( _order ); } - _map->insert( pair( o , loc ) ); + _map->insert( pair( o.getOwned() , loc ) ); long size = o.objsize(); _mapSizeSoFar += size + sizeof( DiskLoc ); diff --git a/db/extsort.h b/db/extsort.h index 11fe830f279..7d1df2e2ebb 100644 --- a/db/extsort.h +++ b/db/extsort.h @@ -65,7 +65,7 @@ namespace mongo { typedef set InMemory; - class Iterator { + class Iterator : boost::noncopyable { public: Iterator( BSONObjExternalSorter * sorter ); diff --git a/db/pdfile.cpp b/db/pdfile.cpp index ffe8f62c808..03e0bd08e40 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -1029,13 +1029,45 @@ assert( !eloc.isNull() ); } } + void testSorting() + { + BSONObjBuilder b; + b.appendNull(""); + BSONObj x = b.obj(); + + BSONObjExternalSorter sorter; + + sorter.add(x, DiskLoc(3,7)); + sorter.add(x, DiskLoc(4,7)); + sorter.add(x, DiskLoc(2,7)); + sorter.add(x, DiskLoc(1,7)); + sorter.add(x, DiskLoc(3,77)); + + sorter.sort(); + + BSONObjExternalSorter::Iterator i = sorter.iterator(); + while( i.more() ) { + BSONObjExternalSorter::Data d = i.next(); + cout << d.second.toString() << endl; + cout << d.first.objsize() << endl; + cout<<"SORTER next:" << d.first.toString() << endl; + } + } + /* _ TODO dropDups */ unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { + testSorting(); + + cout << "\n\nBuildindex " << ns << " idxNo:" << idxNo; + cout << ' ' << idx.info.obj().toString() << endl; + bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups(); BSONObj order = idx.keyPattern(); + idx.head.Null(); + /* get and sort all the keys ----- */ unsigned long long n = 0; auto_ptr c = theDataFileMgr.findAll(ns); @@ -1051,6 +1083,7 @@ assert( !eloc.isNull() ); for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { if( ++k == 2 ) d->setIndexIsMultikey(idxNo); +cout<<"SORTER ADD " << i->toString() << ' ' << loc.toString() << endl; sorter.add(*i, loc); nkeys++; } @@ -1060,36 +1093,56 @@ assert( !eloc.isNull() ); }; sorter.sort(); + if( idxNo == 1 ) { + // TEMP! + + BSONObjExternalSorter::Iterator i = sorter.iterator(); + while( i.more() ) { + BSONObjExternalSorter::Data d = i.next(); + cout << d.second.toString() << endl; + cout << d.first.objsize() << endl; + cout<<"SORTER next:" << d.first.toString() << endl; + } + + cout << "stop here" << endl; + cout << "stop here" << endl; + } + list dupsToDrop; /* build index --- */ - BtreeBuilder btBuilder(dupsAllowed, idx); - BSONObj keyLast; - BSONObjExternalSorter::Iterator i = sorter.iterator(); - while( i.more() ) { - BSONObjExternalSorter::Data d = i.next(); - try { - btBuilder.addKey(d.first, d.second); - } - catch( AssertionException& ) { - if( !dupsAllowed ) { - if( dropDups ) { - /* we could queue these on disk, but normally there are very few dups, so instead we - keep in ram and have a limit. - */ - dupsToDrop.push_back(d.second); - uassert("too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); - } + { + BtreeBuilder btBuilder(dupsAllowed, idx); + BSONObj keyLast; + BSONObjExternalSorter::Iterator i = sorter.iterator(); + while( i.more() ) { + BSONObjExternalSorter::Data d = i.next(); + cout<<"TEMP SORTER next " << d.first.toString() << endl; +//zzz + try { + btBuilder.addKey(d.first, d.second); + } + catch( AssertionException& ) { + if( !dupsAllowed ) { + if( dropDups ) { + /* we could queue these on disk, but normally there are very few dups, so instead we + keep in ram and have a limit. + */ + dupsToDrop.push_back(d.second); + uassert("too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); + } else throw; + } } } + btBuilder.commit(); + wassert( btBuilder.getn() == nkeys || dropDups ); } + for( list::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ) theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true ); - btBuilder.commit(); - wassert( btBuilder.getn() == nkeys || dropDups ); return n; } @@ -1415,7 +1468,7 @@ assert( !eloc.isNull() ); int idxNo = tableToIndex->nIndexes; IndexDetails& idx = tableToIndex->indexes[idxNo]; idx.info = loc; - tableToIndex->addingIndex(tabletoidxns.c_str(), idx); // clear transient info caches so they refresh + tableToIndex->addingIndex(tabletoidxns.c_str(), idx); // clear transient info caches so they refresh; increments nIndexes try { buildIndex(tabletoidxns, tableToIndex, idx, idxNo); } catch( DBException& ) { diff --git a/db/storage.h b/db/storage.h index c8b7f7a3c88..30d2742cec8 100644 --- a/db/storage.h +++ b/db/storage.h @@ -70,7 +70,8 @@ namespace mongo { assert(!isNull()); } void setInvalid() { - fileNo = -2; + fileNo = -2; + ofs = 0; } bool isValid() const { return fileNo != -2;