mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
do not pick a split key if it is not different from the chunks' min key (on behalf of Matt Taylor)
This commit is contained in:
parent
4c4349c696
commit
1e450f7b00
@ -120,4 +120,29 @@ res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:
|
||||
assert.eq( true , res.ok , "6a" );
|
||||
assert.eq( 19 , res.splitKeys.length , "6b" );
|
||||
|
||||
|
||||
// -------------------------
|
||||
// Case 7: enough occurances of min key documents to pass the chunk limit
|
||||
// [1111111111111111,2,3)
|
||||
|
||||
f.drop();
|
||||
f.ensureIndex( { x: 1 } );
|
||||
|
||||
// Fill collection and get split vector for 1MB maxChunkSize
|
||||
numDocs = 2100;
|
||||
for( i=1; i<numDocs; i++ ){
|
||||
f.save( { x: 1, y: filler } );
|
||||
}
|
||||
|
||||
for( i=1; i<10; i++ ){
|
||||
f.save( { x: 2, y: filler } );
|
||||
}
|
||||
db.getLastError();
|
||||
res = db.runCommand( { splitVector: "test.jstests_splitvector" , keyPattern: {x:1} , maxChunkSize: 1 , min: 1} );
|
||||
|
||||
assert.eq( true , res.ok , "7a" );
|
||||
assert.eq( 2 , res.splitKeys[0].x, "7b");
|
||||
|
||||
|
||||
print("PASSED");
|
||||
|
||||
|
@ -230,23 +230,38 @@ namespace mongo {
|
||||
Timer timer;
|
||||
long long currCount = 0;
|
||||
long long numChunks = 0;
|
||||
vector<BSONObj> splitKeys;
|
||||
BSONObj currKey;
|
||||
|
||||
BtreeCursor * bc = new BtreeCursor( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
|
||||
shared_ptr<Cursor> c( bc );
|
||||
scoped_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
|
||||
if ( ! cc->ok() ){
|
||||
errmsg = "can't open a cursor for splitting";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, to be removed
|
||||
// at the end. If a key appears more times than entries allowed on a chunk, we issue a warning and
|
||||
// split on the following key.
|
||||
vector<BSONObj> splitKeys;
|
||||
set<BSONObj> tooFrequentKeys;
|
||||
splitKeys.push_back( c->currKey() );
|
||||
while ( cc->ok() ){
|
||||
currCount++;
|
||||
if ( currCount > keyCount ){
|
||||
if ( currKey.isEmpty() || currKey.woCompare( c->currKey() ) ){
|
||||
currKey = c->currKey();
|
||||
splitKeys.push_back( bc->prettyKey( currKey ) );
|
||||
BSONObj currKey = c->currKey();
|
||||
|
||||
// Do not use this split key if it is the same used in the previous split point.
|
||||
if ( currKey.woCompare( splitKeys.back() ) == 0 ){
|
||||
tooFrequentKeys.insert( currKey );
|
||||
|
||||
} else {
|
||||
splitKeys.push_back( currKey );
|
||||
currCount = 0;
|
||||
numChunks++;
|
||||
log(4) << "picked a split key: " << currKey << endl;
|
||||
|
||||
log(4) << "picked a split key: " << bc->prettyKey( currKey ) << endl;
|
||||
}
|
||||
|
||||
}
|
||||
cc->advance();
|
||||
|
||||
@ -266,6 +281,18 @@ namespace mongo {
|
||||
}
|
||||
}
|
||||
|
||||
// Warn for keys that are more numerous than maxChunkSize allows.
|
||||
for ( set<BSONObj>::const_iterator it = tooFrequentKeys.begin(); it != tooFrequentKeys.end(); ++it ){
|
||||
log( LL_WARNING ) << "chunk is larger than " << maxChunkSize
|
||||
<< " bytes because of key " << bc->prettyKey( *it ) << endl;
|
||||
}
|
||||
|
||||
// Remove the sentinel at the beginning before returning and add fieldnames.
|
||||
splitKeys.erase( splitKeys.begin() );
|
||||
for ( vector<BSONObj>::iterator it = splitKeys.begin(); it != splitKeys.end() ; ++it ){
|
||||
*it = bc->prettyKey( *it );
|
||||
}
|
||||
|
||||
ostringstream os;
|
||||
os << "Finding the split vector for " << ns << " over "<< keyPattern
|
||||
<< " keyCount: " << keyCount << " numSplits: " << splitKeys.size();
|
||||
|
Loading…
Reference in New Issue
Block a user