0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-29 08:42:18 +01:00
mongodb/dbtests/perf/btreeperf.cpp
2011-01-04 00:40:41 -05:00

443 lines
16 KiB
C++

// btreeperf.cpp
/* Copyright 2010 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Performance timing and space utilization testing for btree indexes.
*/
#include <iostream>
#include <boost/random/bernoulli_distribution.hpp>
#include <boost/random/geometric_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/variate_generator.hpp>
#include <boost/random/uniform_int.hpp>
#include "client/dbclient.h"
#include "../../util/timer.h"
using namespace std;
using namespace mongo;
using namespace boost;
const char *ns = "test.btreeperf";
const char *db = "test";
const char *index_collection = "btreeperf.$_id_";
// This random number generator has a much larger period than the default
// generator and is half as fast as the default. Given that we intend to
// generate large numbers of documents and will utilize more than one random
// sample per document, choosing this generator seems like a worthwhile tradeoff.
mt19937 randomNumberGenerator;
/**
* An interface for generating documents to be inserted and document specs for
* remove requests.
*/
class InsertAndRemoveStrategy {
public:
virtual ~InsertAndRemoveStrategy() {}
virtual BSONObj insertObj() = 0;
virtual BSONObj removeObj() = 0;
protected:
/**
* Helper functions for converting a sample value to a sample object with
* specified _id, to be inserted or removed.
*/
template< class T >
BSONObj insertObjWithVal( const T &val ) {
BSONObjBuilder b;
b.append( "_id", val );
return b.obj();
}
template< class T >
BSONObj removeObjWithVal( const T &val ) {
BSONObjBuilder b;
b.append( "_id", val );
return b.obj();
}
};
/**
* Manages a set of elements of type T. Supports inserting unique elements and
* sampling a random element without replacement.
*
* TODO In the contexts where this class is currently used, duplicate keys are
* either impossible or highly unlikely. And an occasional duplicate value will
* not much affect the procedure by wich a random element is chosen. We could
* stop checking for duplicates in push(), eliminate _set from the implementaiton,
* and potentially improve performance and memory requirements somewhat.
*/
template< class T >
class SetSampler {
public:
/** @param val Insert this value in the set if not already present. */
void push( const T& val ) {
if ( _set.insert( val ).second ) {
_vector.push_back( val );
}
}
/** @return a random element removed from the set */
T pull() {
if ( _vector.size() == 0 ) {
return T();
}
uniform_int< size_t > sizeRange( 0, _vector.size() - 1 );
variate_generator< mt19937&, uniform_int< size_t > > sizeGenerator( randomNumberGenerator, sizeRange );
size_t toRemove = sizeGenerator();
T val = _vector[ toRemove ];
// Replace the random element with the last element, then remove the
// last element.
_vector[ toRemove ] = _vector.back();
_vector.pop_back();
_set.erase( val );
return val;
}
private:
vector< T > _vector;
set< T > _set;
};
/**
* Tracks values that have been specified for insertion by the derived class's
* implementation of insertVal() and selects uniformally from among values that
* have been inserted but not yet removed for the next value to remove.
*
* The implementation is probabilistically sound, but may be resource intensive
* and slow due to the use of a SetSampler.
*/
template< class T >
class InsertAndUniformRemoveStrategy : public InsertAndRemoveStrategy {
public:
virtual BSONObj insertObj() {
T val = insertVal();
_sampler.push( val );
return insertObjWithVal( val );
}
virtual BSONObj removeObj() { return removeObjWithVal( _sampler.pull() ); }
protected:
/** @return value to insert. This is the only function a derived class need implement. */
virtual T insertVal() = 0;
private:
SetSampler< T > _sampler;
};
/**
* The derived class supplies keys to be inserted and removed. The key removal
* strategy is similar to the strategy for selecting a random element described
* in the MongoDB cookbook: the first key in the collection greater than or
* equal to the supplied removal key is removed. This allows selecting an
* exising key for removal without the overhead required by a SetSampler.
*
* While this ranged selection strategy can work well for selecting a random
* element, there are some theoretical and empirically observed shortcomings
* when the strategy is applied to removing nodes for btree performance measurement:
* 1 The likelihood that a given key is removed is proportional to the difference
* in value between it and the previous key. Because key deletion increases
* the difference in value between adjacent keys, neighboring keys will be
* more likely to be deleted than they would be in a true uniform distribution.
* 2 MongoDB 1.6 uses 'unused' nodes in the btree implementation. With a ranged
* removal strategy, those nodes must be traversed to find a node available
* for removal.
* 3 Ranged removal was observed to be biased against the balancing policy of
* MongoDB 1.7 in some cases, in terms of storage size. This may be a
* consequence of point 1 above.
* 4 Ranged removal was observed to be significantly biased against the btree
* implementation in MongoDB 1.6 in terms of performance. This is likely a
* consequence of point 2 above.
* 5 In some cases the biases described above were not evident in tests lasting
* several minutes, but were evident in tests lasting several hours.
*/
template< class T >
class InsertAndRangedRemoveStrategy : public InsertAndRemoveStrategy {
public:
virtual BSONObj insertObj() { return insertObjWithVal( insertVal() ); }
virtual BSONObj removeObj() { return rangedRemoveObjWithVal( removeVal() ); }
protected:
/** Small likelihood that this removal spec will not match any document */
template< class U >
BSONObj rangedRemoveObjWithVal( const U &val ) {
BSONObjBuilder b1;
BSONObjBuilder b2( b1.subobjStart( "_id" ) );
b2.append( "$gte", val );
b2.done();
return b1.obj();
}
virtual T insertVal() = 0;
virtual T removeVal() = 0;
};
/**
* Integer Keys
* Uniform Inserts
* Uniform Removes
*/
class UniformInsertRangedUniformRemoveInteger : public InsertAndRangedRemoveStrategy< long long > {
public:
UniformInsertRangedUniformRemoveInteger() :
_uniform_int( 0ULL, ~0ULL ),
_nextLongLong( randomNumberGenerator, _uniform_int ) {
}
/** Small likelihood of duplicates */
virtual long long insertVal() { return _nextLongLong(); }
virtual long long removeVal() { return _nextLongLong(); }
private:
uniform_int< unsigned long long > _uniform_int;
variate_generator< mt19937&, uniform_int< unsigned long long > > _nextLongLong;
};
class UniformInsertUniformRemoveInteger : public InsertAndUniformRemoveStrategy< long long > {
public:
virtual long long insertVal() { return _gen.insertVal(); }
private:
UniformInsertRangedUniformRemoveInteger _gen;
};
/**
* String Keys
* Uniform Inserts
* Uniform Removes
*/
class UniformInsertRangedUniformRemoveString : public InsertAndRangedRemoveStrategy< string > {
public:
UniformInsertRangedUniformRemoveString() :
_geometric_distribution( 0.9 ),
_nextLength( randomNumberGenerator, _geometric_distribution ),
_uniform_char( 'a', 'z' ),
_nextChar( randomNumberGenerator, _uniform_char ) {
}
/** Small likelihood of duplicates */
virtual string insertVal() { return nextString(); }
virtual string removeVal() { return nextString(); }
private:
string nextString() {
// The longer the minimum string length, the lower the likelihood of duplicates
int len = _nextLength() + 5;
len = len > 100 ? 100 : len;
string ret( len, 'x' );
for( int i = 0; i < len; ++i ) {
ret[ i ] = _nextChar();
}
return ret;
}
geometric_distribution<> _geometric_distribution;
variate_generator< mt19937&, geometric_distribution<> > _nextLength;
uniform_int< char > _uniform_char;
variate_generator< mt19937&, uniform_int< char > > _nextChar;
};
class UniformInsertUniformRemoveString : public InsertAndUniformRemoveStrategy< string > {
public:
virtual string insertVal() { return _gen.insertVal(); }
private:
UniformInsertRangedUniformRemoveString _gen;
};
/**
* OID Keys
* Increasing Inserts
* Uniform Removes
*/
class IncreasingInsertRangedUniformRemoveOID : public InsertAndRangedRemoveStrategy< OID > {
public:
IncreasingInsertRangedUniformRemoveOID() :
_max( -1 ) {
}
virtual OID insertVal() { return oidFromULL( ++_max ); }
virtual OID removeVal() {
uniform_int< unsigned long long > distribution( 0, _max > 0 ? _max : 0 );
variate_generator< mt19937&, uniform_int< unsigned long long > > generator( randomNumberGenerator, distribution );
return oidFromULL( generator() );
}
private:
static OID oidFromULL( unsigned long long val ) {
val = __builtin_bswap64( val );
OID oid;
oid.clear();
memcpy( (char*)&oid + 4, &val, 8 );
return oid;
}
long long _max;
};
class IncreasingInsertUniformRemoveOID : public InsertAndUniformRemoveStrategy< OID > {
public:
virtual OID insertVal() { return _gen.insertVal(); }
private:
IncreasingInsertRangedUniformRemoveOID _gen;
};
/**
* Integer Keys
* Increasing Inserts
* Increasing Removes (on remove, the lowest key is always removed)
*/
class IncreasingInsertIncreasingRemoveInteger : public InsertAndRemoveStrategy {
public:
IncreasingInsertIncreasingRemoveInteger() :
// Start with a large value so data type will be preserved if we round
// trip through json.
_min( 1LL << 32 ),
_max( 1LL << 32 ) {
}
virtual BSONObj insertObj() { return insertObjWithVal( ++_max ); }
virtual BSONObj removeObj() { return removeObjWithVal( _min < _max ? ++_min : _min ); }
private:
long long _min;
long long _max;
};
/** Generate a random boolean value. */
class BernoulliGenerator {
public:
/**
* @param excessFalsePercent This specifies the desired rate of false values
* vs true values. If we want false to be 5% more likely than true, we
* specify 5 for this argument.
*/
BernoulliGenerator( int excessFalsePercent ) :
_bernoulli_distribution( 1.0 / ( 2.0 + excessFalsePercent / 100.0 ) ),
_generator( randomNumberGenerator, _bernoulli_distribution ) {
}
bool operator()() { return _generator(); }
private:
bernoulli_distribution<> _bernoulli_distribution;
variate_generator< mt19937&, bernoulli_distribution<> > _generator;
};
/** Runs a strategy on a connection, with specified mix of inserts and removes. */
class InsertAndRemoveRunner {
public:
InsertAndRemoveRunner( DBClientConnection &conn, InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
_conn( conn ),
_strategy( strategy ),
_nextOpTypeRemove( excessInsertPercent ) {
}
void writeOne() {
if ( _nextOpTypeRemove() ) {
_conn.remove( ns, _strategy.removeObj(), true );
}
else {
_conn.insert( ns, _strategy.insertObj() );
}
}
private:
DBClientConnection &_conn;
InsertAndRemoveStrategy &_strategy;
BernoulliGenerator _nextOpTypeRemove;
};
/**
* Writes a test script to cout based on a strategy and specified mix of inserts
* and removes. The script can be subsequently executed by InsertAndRemoveRunner.
* Script generation is intended for strategies that are memory or cpu intensive
* and might either divert resources from a mongod instance being analyzed on the
* same machine or fail to generate requests as quickly as the mongod might
* accept them.
* The script contains one line per operation. Each line begins
* with a letter indicating the operation type, followed by a space. Next
* follows the json representation of a document for the specified operation
* type.
*/
class InsertAndRemoveScriptGenerator {
public:
InsertAndRemoveScriptGenerator( InsertAndRemoveStrategy &strategy, int excessInsertPercent ) :
_strategy( strategy ),
_nextOpTypeRemove( excessInsertPercent ) {
}
void writeOne() {
if ( _nextOpTypeRemove() ) {
cout << "r " << _strategy.removeObj().jsonString() << endl;
}
else {
cout << "i " << _strategy.insertObj().jsonString() << endl;
}
}
private:
InsertAndRemoveStrategy &_strategy;
BernoulliGenerator _nextOpTypeRemove;
};
/**
* Run a test script from cin that was generated by
* InsertAndRemoveScriptGenerator. Running the script is intended to be
* lightweight in terms of memory and cpu usage, and fast.
*/
class InsertAndRemoveScriptRunner {
public:
InsertAndRemoveScriptRunner( DBClientConnection &conn ) :
_conn( conn ) {
}
void writeOne() {
cin.getline( _buf, 1024 );
BSONObj val = fromjson( _buf + 2 );
if ( _buf[ 0 ] == 'r' ) {
_conn.remove( ns, val, true );
}
else {
_conn.insert( ns, val );
}
}
private:
DBClientConnection &_conn;
char _buf[ 1024 ];
};
int main( int argc, const char **argv ) {
DBClientConnection conn;
conn.connect( "127.0.0.1:27017" );
conn.dropCollection( ns );
// UniformInsertRangedUniformRemoveInteger strategy;
// UniformInsertUniformRemoveInteger strategy;
// UniformInsertRangedUniformRemoveString strategy;
// UniformInsertUniformRemoveString strategy;
// IncreasingInsertRangedUniformRemoveOID strategy;
// IncreasingInsertUniformRemoveOID strategy;
// IncreasingInsertIncreasingRemoveInteger strategy;
// InsertAndRemoveScriptGenerator runner( strategy, 5 );
InsertAndRemoveScriptRunner runner( conn );
Timer t;
BSONObj statsCmd = BSON( "collstats" << index_collection );
// Print header, unless we are generating a script (in that case, comment this out).
cout << "ops,milliseconds,docs,totalBucketSize" << endl;
long long i = 0;
long long n = 10000000000;
while( i < n ) {
runner.writeOne();
// Print statistics, unless we are generating a script (in that case, comment this out).
// The stats collection requests below provide regular read operations,
// ensuring we are caught up with the progress being made by the mongod
// under analysis.
if ( ++i % 50000 == 0 ) {
// The total number of documents present.
long long docs = conn.count( ns );
BSONObj result;
conn.runCommand( db, statsCmd, result );
// The total number of bytes used for all allocated 8K buckets of the
// btree.
long long totalBucketSize = result.getField( "count" ).numberLong() * 8192;
cout << i << ',' << t.millis() << ',' << docs << ',' << totalBucketSize << endl;
}
}
}