0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

do not pick a split key if it is not different from the chunks' min key (on behalf of Matt Taylor)

This commit is contained in:
Alberto Lerner 2010-10-19 13:12:28 -04:00
parent 4c4349c696
commit 1e450f7b00
2 changed files with 58 additions and 6 deletions

View File

@ -120,4 +120,29 @@ res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:
assert.eq( true , res.ok , "6a" );
assert.eq( 19 , res.splitKeys.length , "6b" );
// -------------------------
// Case 7: enough occurances of min key documents to pass the chunk limit
// [1111111111111111,2,3)
f.drop();
f.ensureIndex( { x: 1 } );
// Fill collection and get split vector for 1MB maxChunkSize
numDocs = 2100;
for( i=1; i<numDocs; i++ ){
f.save( { x: 1, y: filler } );
}
for( i=1; i<10; i++ ){
f.save( { x: 2, y: filler } );
}
db.getLastError();
res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 , min: 1} );
assert.eq( true , res.ok , "7a" );
assert.eq( 2 , res.splitKeys[0].x, "7b");
print("PASSED");

View File

@ -230,23 +230,38 @@ namespace mongo {
Timer timer;
long long currCount = 0;
long long numChunks = 0;
vector<BSONObj> splitKeys;
BSONObj currKey;
BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
shared_ptr<Cursor> c( bc );
scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
if ( ! cc->ok() ){
errmsg = "can't open a cursor for splitting";
return false;
}
// Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed
// at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and
// split on the following key.
vector<BSONObj> splitKeys;
set<BSONObj> tooFrequentKeys;
splitKeys.push_back( c->currKey() );
while ( cc->ok() ){
currCount++;
if ( currCount > keyCount ){
if ( currKey.isEmpty() || currKey.woCompare( c->currKey() ) ){
currKey = c->currKey();
splitKeys.push_back( bc->prettyKey( currKey ) );
BSONObj currKey = c->currKey();
// Do not use this split key if it is the same used in the previous split point.
if ( currKey.woCompare( splitKeys.back() ) == 0 ){
tooFrequentKeys.insert( currKey );
} else {
splitKeys.push_back( currKey );
currCount = 0;
numChunks++;
log(4) << "picked a split key: " << currKey << endl;
log(4) << "picked a split key: " << bc->prettyKey( currKey ) << endl;
}
}
cc->advance();
@ -266,6 +281,18 @@ namespace mongo {
}
}
// Warn for keys that are more numerous than maxChunkSize allows.
for ( set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); it != tooFrequentKeys.end(); ++it ){
log( LL_WARNING ) << "chunk is larger than " << maxChunkSize
<< " bytes because of key " << bc->prettyKey( *it ) << endl;
}
// Remove the sentinel at the beginning before returning and add fieldnames.
splitKeys.erase( splitKeys.begin() );
for ( vector<BSONObj>::iterator it = splitKeys.begin(); it != splitKeys.end() ; ++it ){
*it = bc->prettyKey( *it );
}
ostringstream os;
os << "Finding the split vector for " << ns << " over "<< keyPattern
<< " keyCount: " << keyCount << " numSplits: " << splitKeys.size();