0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/db/queryoptimizer.h
2010-06-21 13:41:34 -04:00

405 lines
17 KiB
C++

/* queryoptimizer.h */
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "cursor.h"
#include "jsobj.h"
#include "queryutil.h"
#include "matcher.h"
namespace mongo {
class IndexDetails;
class IndexType;
class QueryPlan : boost::noncopyable {
public:
QueryPlan(NamespaceDetails *_d,
int _idxNo, // -1 = no index
const FieldRangeSet &fbs,
const BSONObj &originalQuery,
const BSONObj &order,
const BSONObj &startKey = BSONObj(),
const BSONObj &endKey = BSONObj() ,
string special="" );
/* If true, no other index can do better. */
bool optimal() const { return optimal_; }
/* ScanAndOrder processing will be required if true */
bool scanAndOrderRequired() const { return scanAndOrderRequired_; }
/* When true, the index we are using has keys such that it can completely resolve the
query expression to match by itself without ever checking the main object.
*/
bool exactKeyMatch() const { return exactKeyMatch_; }
/* If true, the startKey and endKey are unhelpful and the index order doesn't match the
requested sort order */
bool unhelpful() const { return unhelpful_; }
int direction() const { return direction_; }
shared_ptr<Cursor> newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const;
shared_ptr<Cursor> newReverseCursor() const;
BSONObj indexKey() const;
const char *ns() const { return fbs_.ns(); }
NamespaceDetails *nsd() const { return d; }
BSONObj originalQuery() const { return _originalQuery; }
BSONObj simplifiedQuery( const BSONObj& fields = BSONObj(), bool expandIn = false ) const { return fbs_.simplifiedQuery( fields, expandIn ); }
const FieldRange &range( const char *fieldName ) const { return fbs_.range( fieldName ); }
void registerSelf( long long nScanned ) const;
// just for testing
BoundList indexBounds() const { return indexBounds_; }
private:
NamespaceDetails *d;
int idxNo;
const FieldRangeSet &fbs_;
const BSONObj &_originalQuery;
const BSONObj &order_;
const IndexDetails *index_;
bool optimal_;
bool scanAndOrderRequired_;
bool exactKeyMatch_;
int direction_;
BoundList indexBounds_;
bool endKeyInclusive_;
bool unhelpful_;
string _special;
IndexType * _type;
};
// Inherit from this interface to implement a new query operation.
// The query optimizer will clone the QueryOp that is provided, giving
// each clone its own query plan.
class QueryOp {
public:
QueryOp() : _complete(), _stopRequested(), _qp(), _error(), _haveOrConstraint() {}
// Used when handing off from one QueryOp type to another
QueryOp( const QueryOp &other ) :
_complete(), _stopRequested(), _qp(), _error(), _matcher( other._matcher ),
_haveOrConstraint( other._haveOrConstraint ), _orConstraint( other._orConstraint ) {}
virtual ~QueryOp() {}
/** these gets called after a query plan is set */
void init() {
if ( _oldMatcher.get() ) {
_matcher.reset( _oldMatcher->nextClauseMatcher( qp().indexKey() ) );
} else {
_matcher.reset( new CoveredIndexMatcher( qp().originalQuery(), qp().indexKey(), alwaysUseRecord() ) );
}
_init();
}
virtual void next() = 0;
virtual bool mayRecordPlan() const = 0;
/** @return a copy of the inheriting class, which will be run with its own
query plan. If multiple plan sets are required for an $or query,
the QueryOp of the winning plan from a given set will be cloned
to generate QueryOps for the subsequent plan set. This function
should only be called after the query op has completed executing.
*/
QueryOp *createChild() {
if( _haveOrConstraint ) {
_matcher->addOrConstraint( _orConstraint );
_haveOrConstraint = false;
}
QueryOp *ret = _createChild();
ret->_oldMatcher = _matcher;
return ret;
}
bool complete() const { return _complete; }
bool error() const { return _error; }
bool stopRequested() const { return _stopRequested; }
ExceptionInfo exception() const { return _exception; }
const QueryPlan &qp() const { return *_qp; }
// To be called by QueryPlanSet::Runner only.
void setQueryPlan( const QueryPlan *qp ) { _qp = qp; }
void setException( const DBException &e ) {
_error = true;
_exception = e.getInfo();
}
shared_ptr< CoveredIndexMatcher > matcher() const { return _matcher; }
protected:
void setComplete() {
_haveOrConstraint = true;
_orConstraint = qp().simplifiedQuery( qp().indexKey(), true );
_complete = true;
}
void setStop() { setComplete(); _stopRequested = true; }
virtual void _init() = 0;
virtual QueryOp *_createChild() const = 0;
virtual bool alwaysUseRecord() const { return false; }
private:
bool _complete;
bool _stopRequested;
ExceptionInfo _exception;
const QueryPlan *_qp;
bool _error;
shared_ptr< CoveredIndexMatcher > _matcher;
shared_ptr< CoveredIndexMatcher > _oldMatcher;
bool _haveOrConstraint;
BSONObj _orConstraint;
};
// Set of candidate query plans for a particular query. Used for running
// a QueryOp on these plans.
class QueryPlanSet {
public:
typedef boost::shared_ptr< QueryPlan > PlanPtr;
typedef vector< PlanPtr > PlanSet;
QueryPlanSet( const char *ns,
auto_ptr< FieldRangeSet > frs,
const BSONObj &originalQuery,
const BSONObj &order,
const BSONElement *hint = 0,
bool honorRecordedPlan = true,
const BSONObj &min = BSONObj(),
const BSONObj &max = BSONObj() );
int nPlans() const { return plans_.size(); }
shared_ptr< QueryOp > runOp( QueryOp &op );
template< class T >
shared_ptr< T > runOp( T &op ) {
return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
}
BSONObj explain() const;
bool usingPrerecordedPlan() const { return usingPrerecordedPlan_; }
PlanPtr getBestGuess() const;
void setBestGuessOnly() { _bestGuessOnly = true; }
//for testing
const FieldRangeSet &fbs() const { return *fbs_; }
private:
void addOtherPlans( bool checkFirst );
void addPlan( PlanPtr plan, bool checkFirst ) {
if ( checkFirst && plan->indexKey().woCompare( plans_[ 0 ]->indexKey() ) == 0 )
return;
plans_.push_back( plan );
}
void init();
void addHint( IndexDetails &id );
struct Runner {
Runner( QueryPlanSet &plans, QueryOp &op );
shared_ptr< QueryOp > run();
QueryOp &op_;
QueryPlanSet &plans_;
static void initOp( QueryOp &op );
static void nextOp( QueryOp &op );
};
const char *ns;
BSONObj _originalQuery;
auto_ptr< FieldRangeSet > fbs_;
PlanSet plans_;
bool mayRecordPlan_;
bool usingPrerecordedPlan_;
BSONObj hint_;
BSONObj order_;
long long oldNScanned_;
bool honorRecordedPlan_;
BSONObj min_;
BSONObj max_;
string _special;
bool _bestGuessOnly;
};
// Handles $or type queries by generating a QueryPlanSet for each $or clause
// NOTE on our $or implementation: In our current qo implementation we don't
// keep statistics on our data, but we can conceptualize the problem of
// selecting an index when statistics exist for all index ranges. The
// d-hitting set problem on k sets and n elements can be reduced to the
// problem of index selection on k $or clauses and n index ranges (where
// d is the max number of indexes, and the number of ranges n is unbounded).
// In light of the fact that d-hitting set is np complete, and we don't even
// track statistics (so cost calculations are expensive) our first
// implementation uses the following greedy approach: We take one $or clause
// at a time and treat each as a separate query for index selection purposes.
// But if an index range is scanned for a particular $or clause, we eliminate
// that range from all subsequent clauses. One could imagine an opposite
// implementation where we select indexes based on the union of index ranges
// for all $or clauses, but this can have much poorer worst case behavior.
// (An index range that suits one $or clause may not suit another, and this
// is worse than the typical case of index range choice staleness because
// with $or the clauses may likely be logically distinct.) The greedy
// implementation won't do any worse than all the $or clauses individually,
// and it can often do better. In the first cut we are intentionally using
// QueryPattern tracking to record successful plans on $or queries for use by
// subsequent $or queries, even though there may be a significant aggregate
// $nor component that would not be represented in QueryPattern.
class MultiPlanScanner {
public:
MultiPlanScanner( const char *ns,
const BSONObj &query,
const BSONObj &order,
const BSONElement *hint = 0,
bool honorRecordedPlan = true,
const BSONObj &min = BSONObj(),
const BSONObj &max = BSONObj() );
shared_ptr< QueryOp > runOp( QueryOp &op );
template< class T >
shared_ptr< T > runOp( T &op ) {
return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
}
shared_ptr< QueryOp > runOpOnce( QueryOp &op );
template< class T >
shared_ptr< T > runOpOnce( T &op ) {
return dynamic_pointer_cast< T >( runOpOnce( static_cast< QueryOp& >( op ) ) );
}
bool mayRunMore() const { return _or ? !_fros.orFinished() : _i == 0; }
BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); }
// just report this when only one query op
bool usingPrerecordedPlan() const {
return !_or && _currentQps->usingPrerecordedPlan();
}
void setBestGuessOnly() { _bestGuessOnly = true; }
private:
void assertNotOr() const {
massert( 13266, "not implemented for $or query", !_or );
}
bool uselessOr( const BSONElement &hint ) const;
const char * _ns;
bool _or;
BSONObj _query;
FieldRangeOrSet _fros;
auto_ptr< QueryPlanSet > _currentQps;
int _i;
bool _honorRecordedPlan;
bool _bestGuessOnly;
BSONObj _hint;
};
class MultiCursor : public Cursor {
public:
class CursorOp : public QueryOp {
public:
CursorOp() {}
CursorOp( const QueryOp &other ) : QueryOp( other ) {}
virtual shared_ptr< Cursor > newCursor() const = 0;
};
// takes ownership of 'op'
MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr< CursorOp > op = shared_ptr< CursorOp >() )
: _mps( new MultiPlanScanner( ns, pattern, order ) ) {
if ( op.get() ) {
_op = op;
} else {
_op.reset( new NoOp() );
_mps->setBestGuessOnly();
}
if ( _mps->mayRunMore() ) {
nextClause();
if ( !ok() ) {
advance();
}
} else {
_c.reset( new BasicCursor( DiskLoc() ) );
}
}
// used to handoff a query to a getMore()
MultiCursor( auto_ptr< MultiPlanScanner > mps, const shared_ptr< Cursor > &c, const shared_ptr< CoveredIndexMatcher > &matcher, const QueryOp &op )
: _op( new NoOp( op ) ), _c( c ), _mps( mps ), _matcher( matcher ) {
_mps->setBestGuessOnly();
if ( !ok() ) {
// would have been advanced by UserQueryOp if possible
advance();
}
}
virtual bool ok() { return _c->ok(); }
virtual Record* _current() { return _c->_current(); }
virtual BSONObj current() { return _c->current(); }
virtual DiskLoc currLoc() { return _c->currLoc(); }
virtual bool advance() {
_c->advance();
while( !ok() && _mps->mayRunMore() ) {
nextClause();
}
return ok();
}
virtual BSONObj currKey() const { return _c->currKey(); }
virtual DiskLoc refLoc() { return _c->refLoc(); }
virtual void noteLocation() {
_c->noteLocation();
}
virtual void checkLocation() {
_c->checkLocation();
}
virtual bool supportGetMore() { return true; }
// with update we could potentially get the same document on multiple
// indexes, but update appears to already handle this with seenObjects
// so we don't have to do anything special here.
virtual bool getsetdup(DiskLoc loc) {
return _c->getsetdup( loc );
}
virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
private:
class NoOp : public CursorOp {
public:
NoOp() {}
NoOp( const QueryOp &other ) : CursorOp( other ) {}
virtual void _init() { setComplete(); }
virtual void next() {}
virtual bool mayRecordPlan() const { return false; }
virtual QueryOp *_createChild() const { return new NoOp(); }
virtual shared_ptr< Cursor > newCursor() const { return qp().newCursor(); }
private:
shared_ptr< CoveredIndexMatcher > _matcher;
shared_ptr< CoveredIndexMatcher > _oldMatcher;
};
void nextClause() {
shared_ptr< CursorOp > best = _mps->runOpOnce( *_op );
if ( ! best->complete() )
throw MsgAssertionException( best->exception() );
_c = best->newCursor();
_matcher = best->matcher();
_op = best;
}
shared_ptr< CursorOp > _op;
shared_ptr< Cursor > _c;
auto_ptr< MultiPlanScanner > _mps;
shared_ptr< CoveredIndexMatcher > _matcher;
};
// NOTE min, max, and keyPattern will be updated to be consistent with the selected index.
IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern );
inline bool isSimpleIdQuery( const BSONObj& query ){
return
strcmp( query.firstElement().fieldName() , "_id" ) == 0 &&
query.nFields() == 1 &&
query.firstElement().isSimpleType();
}
// matcher() will always work on the returned cursor
inline shared_ptr< Cursor > bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ) {
if( !query.getField( "$or" ).eoo() ) {
return shared_ptr< Cursor >( new MultiCursor( ns, query, sort ) );
} else {
auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, query ) );
shared_ptr< Cursor > ret = QueryPlanSet( ns, frs, query, sort ).getBestGuess()->newCursor();
if ( !query.isEmpty() ) {
shared_ptr< CoveredIndexMatcher > matcher( new CoveredIndexMatcher( query, ret->indexKeyPattern() ) );
ret->setMatcher( matcher );
}
return ret;
}
}
} // namespace mongo