mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
567 lines
22 KiB
C++
567 lines
22 KiB
C++
// queryutil.h
|
|
|
|
/* Copyright 2009 10gen Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "jsobj.h"
|
|
|
|
namespace mongo {
|
|
|
|
struct FieldBound {
|
|
BSONElement _bound;
|
|
bool _inclusive;
|
|
bool operator==( const FieldBound &other ) const {
|
|
return _bound.woCompare( other._bound ) == 0 &&
|
|
_inclusive == other._inclusive;
|
|
}
|
|
void flipInclusive() { _inclusive = !_inclusive; }
|
|
};
|
|
|
|
struct FieldInterval {
|
|
FieldInterval() : _cachedEquality( -1 ) {}
|
|
FieldInterval( const BSONElement& e ) : _cachedEquality( -1 ) {
|
|
_lower._bound = _upper._bound = e;
|
|
_lower._inclusive = _upper._inclusive = true;
|
|
}
|
|
FieldBound _lower;
|
|
FieldBound _upper;
|
|
bool strictValid() const {
|
|
int cmp = _lower._bound.woCompare( _upper._bound, false );
|
|
return ( cmp < 0 || ( cmp == 0 && _lower._inclusive && _upper._inclusive ) );
|
|
}
|
|
bool equality() const {
|
|
if ( _cachedEquality == -1 ) {
|
|
_cachedEquality = ( _lower._inclusive && _upper._inclusive && _lower._bound.woCompare( _upper._bound, false ) == 0 );
|
|
}
|
|
return _cachedEquality;
|
|
}
|
|
mutable int _cachedEquality;
|
|
};
|
|
|
|
// range of a field's value that may be determined from query -- used to
|
|
// determine index limits
|
|
class FieldRange {
|
|
public:
|
|
FieldRange( const BSONElement &e = BSONObj().firstElement() , bool isNot=false , bool optimize=true );
|
|
const FieldRange &operator&=( const FieldRange &other );
|
|
const FieldRange &operator|=( const FieldRange &other );
|
|
// does not remove fully contained ranges (eg [1,3] - [2,2] doesn't remove anything)
|
|
// in future we can change so that an or on $in:[3] combined with $gt:2 doesn't scan 3 a second time
|
|
const FieldRange &operator-=( const FieldRange &other );
|
|
// true iff other includes this
|
|
bool operator<=( const FieldRange &other );
|
|
BSONElement min() const { assert( !empty() ); return _intervals[ 0 ]._lower._bound; }
|
|
BSONElement max() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._bound; }
|
|
bool minInclusive() const { assert( !empty() ); return _intervals[ 0 ]._lower._inclusive; }
|
|
bool maxInclusive() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._inclusive; }
|
|
bool equality() const {
|
|
return
|
|
!empty() &&
|
|
min().woCompare( max(), false ) == 0 &&
|
|
maxInclusive() &&
|
|
minInclusive();
|
|
}
|
|
bool inQuery() const {
|
|
if ( equality() ) {
|
|
return true;
|
|
}
|
|
for( vector< FieldInterval >::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
|
|
if ( !i->equality() ) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool nontrivial() const {
|
|
return
|
|
! empty() &&
|
|
( minKey.firstElement().woCompare( min(), false ) != 0 ||
|
|
maxKey.firstElement().woCompare( max(), false ) != 0 );
|
|
}
|
|
bool empty() const { return _intervals.empty(); }
|
|
void makeEmpty() { _intervals.clear(); }
|
|
const vector< FieldInterval > &intervals() const { return _intervals; }
|
|
string getSpecial() const { return _special; }
|
|
void setExclusiveBounds() {
|
|
for( vector< FieldInterval >::iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
|
|
i->_lower._inclusive = false;
|
|
i->_upper._inclusive = false;
|
|
}
|
|
}
|
|
// constructs a range which is the reverse of the current one
|
|
// note - the resulting intervals may not be strictValid()
|
|
void reverse( FieldRange &ret ) const {
|
|
assert( _special.empty() );
|
|
ret._intervals.clear();
|
|
ret._objData = _objData;
|
|
for( vector< FieldInterval >::const_reverse_iterator i = _intervals.rbegin(); i != _intervals.rend(); ++i ) {
|
|
FieldInterval fi;
|
|
fi._lower = i->_upper;
|
|
fi._upper = i->_lower;
|
|
ret._intervals.push_back( fi );
|
|
}
|
|
}
|
|
private:
|
|
BSONObj addObj( const BSONObj &o );
|
|
void finishOperation( const vector< FieldInterval > &newIntervals, const FieldRange &other );
|
|
vector< FieldInterval > _intervals;
|
|
vector< BSONObj > _objData;
|
|
string _special;
|
|
};
|
|
|
|
// implements query pattern matching, used to determine if a query is
|
|
// similar to an earlier query and should use the same plan
|
|
class QueryPattern {
|
|
public:
|
|
friend class FieldRangeSet;
|
|
enum Type {
|
|
Equality,
|
|
LowerBound,
|
|
UpperBound,
|
|
UpperAndLowerBound
|
|
};
|
|
// for testing only, speed unimportant
|
|
bool operator==( const QueryPattern &other ) const {
|
|
bool less = operator<( other );
|
|
bool more = other.operator<( *this );
|
|
assert( !( less && more ) );
|
|
return !( less || more );
|
|
}
|
|
bool operator!=( const QueryPattern &other ) const {
|
|
return !operator==( other );
|
|
}
|
|
bool operator<( const QueryPattern &other ) const {
|
|
map< string, Type >::const_iterator i = _fieldTypes.begin();
|
|
map< string, Type >::const_iterator j = other._fieldTypes.begin();
|
|
while( i != _fieldTypes.end() ) {
|
|
if ( j == other._fieldTypes.end() )
|
|
return false;
|
|
if ( i->first < j->first )
|
|
return true;
|
|
else if ( i->first > j->first )
|
|
return false;
|
|
if ( i->second < j->second )
|
|
return true;
|
|
else if ( i->second > j->second )
|
|
return false;
|
|
++i;
|
|
++j;
|
|
}
|
|
if ( j != other._fieldTypes.end() )
|
|
return true;
|
|
return _sort.woCompare( other._sort ) < 0;
|
|
}
|
|
private:
|
|
QueryPattern() {}
|
|
void setSort( const BSONObj sort ) {
|
|
_sort = normalizeSort( sort );
|
|
}
|
|
BSONObj static normalizeSort( const BSONObj &spec ) {
|
|
if ( spec.isEmpty() )
|
|
return spec;
|
|
int direction = ( spec.firstElement().number() >= 0 ) ? 1 : -1;
|
|
BSONObjIterator i( spec );
|
|
BSONObjBuilder b;
|
|
while( i.moreWithEOO() ) {
|
|
BSONElement e = i.next();
|
|
if ( e.eoo() )
|
|
break;
|
|
b.append( e.fieldName(), direction * ( ( e.number() >= 0 ) ? -1 : 1 ) );
|
|
}
|
|
return b.obj();
|
|
}
|
|
map< string, Type > _fieldTypes;
|
|
BSONObj _sort;
|
|
};
|
|
|
|
// a BoundList contains intervals specified by inclusive start
|
|
// and end bounds. The intervals should be nonoverlapping and occur in
|
|
// the specified direction of traversal. For example, given a simple index {i:1}
|
|
// and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList
|
|
// would be valid for index {i:-1} with direction -1.
|
|
typedef vector< pair< BSONObj, BSONObj > > BoundList;
|
|
|
|
// ranges of fields' value that may be determined from query -- used to
|
|
// determine index limits
|
|
class FieldRangeSet {
|
|
public:
|
|
friend class FieldRangeOrSet;
|
|
friend class FieldRangeVector;
|
|
FieldRangeSet( const char *ns, const BSONObj &query , bool optimize=true );
|
|
bool hasRange( const char *fieldName ) const {
|
|
map< string, FieldRange >::const_iterator f = _ranges.find( fieldName );
|
|
return f != _ranges.end();
|
|
}
|
|
const FieldRange &range( const char *fieldName ) const {
|
|
map< string, FieldRange >::const_iterator f = _ranges.find( fieldName );
|
|
if ( f == _ranges.end() )
|
|
return trivialRange();
|
|
return f->second;
|
|
}
|
|
FieldRange &range( const char *fieldName ) {
|
|
map< string, FieldRange >::iterator f = _ranges.find( fieldName );
|
|
if ( f == _ranges.end() )
|
|
return trivialRange();
|
|
return f->second;
|
|
}
|
|
int nNontrivialRanges() const {
|
|
int count = 0;
|
|
for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i )
|
|
if ( i->second.nontrivial() )
|
|
++count;
|
|
return count;
|
|
}
|
|
const char *ns() const { return _ns; }
|
|
// if fields is specified, order fields of returned object to match those of 'fields'
|
|
BSONObj simplifiedQuery( const BSONObj &fields = BSONObj() ) const;
|
|
bool matchPossible() const {
|
|
for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i )
|
|
if ( i->second.empty() )
|
|
return false;
|
|
return true;
|
|
}
|
|
QueryPattern pattern( const BSONObj &sort = BSONObj() ) const;
|
|
string getSpecial() const;
|
|
const FieldRangeSet &operator-=( const FieldRangeSet &other ) {
|
|
int nUnincluded = 0;
|
|
string unincludedKey;
|
|
map< string, FieldRange >::iterator i = _ranges.begin();
|
|
map< string, FieldRange >::const_iterator j = other._ranges.begin();
|
|
while( nUnincluded < 2 && i != _ranges.end() && j != other._ranges.end() ) {
|
|
int cmp = i->first.compare( j->first );
|
|
if ( cmp == 0 ) {
|
|
if ( i->second <= j->second ) {
|
|
// nothing
|
|
} else {
|
|
++nUnincluded;
|
|
unincludedKey = i->first;
|
|
}
|
|
++i;
|
|
++j;
|
|
} else if ( cmp < 0 ) {
|
|
++i;
|
|
} else {
|
|
// other has a bound we don't, nothing can be done
|
|
return *this;
|
|
}
|
|
}
|
|
if ( j != other._ranges.end() ) {
|
|
// other has a bound we don't, nothing can be done
|
|
return *this;
|
|
}
|
|
if ( nUnincluded > 1 ) {
|
|
return *this;
|
|
}
|
|
if ( nUnincluded == 0 ) {
|
|
makeEmpty();
|
|
return *this;
|
|
}
|
|
// nUnincluded == 1
|
|
_ranges[ unincludedKey ] -= other._ranges[ unincludedKey ];
|
|
appendQueries( other );
|
|
return *this;
|
|
}
|
|
const FieldRangeSet &operator&=( const FieldRangeSet &other ) {
|
|
map< string, FieldRange >::iterator i = _ranges.begin();
|
|
map< string, FieldRange >::const_iterator j = other._ranges.begin();
|
|
while( i != _ranges.end() && j != other._ranges.end() ) {
|
|
int cmp = i->first.compare( j->first );
|
|
if ( cmp == 0 ) {
|
|
i->second &= j->second;
|
|
++i;
|
|
++j;
|
|
} else if ( cmp < 0 ) {
|
|
++i;
|
|
} else {
|
|
_ranges[ j->first ] = j->second;
|
|
++j;
|
|
}
|
|
}
|
|
while( j != other._ranges.end() ) {
|
|
_ranges[ j->first ] = j->second;
|
|
++j;
|
|
}
|
|
appendQueries( other );
|
|
return *this;
|
|
}
|
|
// TODO get rid of this
|
|
BoundList indexBounds( const BSONObj &keyPattern, int direction ) const;
|
|
private:
|
|
void appendQueries( const FieldRangeSet &other ) {
|
|
for( vector< BSONObj >::const_iterator i = other._queries.begin(); i != other._queries.end(); ++i ) {
|
|
_queries.push_back( *i );
|
|
}
|
|
}
|
|
void makeEmpty() {
|
|
for( map< string, FieldRange >::iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
|
|
i->second.makeEmpty();
|
|
}
|
|
}
|
|
void processQueryField( const BSONElement &e, bool optimize );
|
|
void processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize );
|
|
static FieldRange *trivialRange_;
|
|
static FieldRange &trivialRange();
|
|
mutable map< string, FieldRange > _ranges;
|
|
const char *_ns;
|
|
// make sure memory for FieldRange BSONElements is owned
|
|
vector< BSONObj > _queries;
|
|
};
|
|
|
|
class FieldRangeVector {
|
|
public:
|
|
FieldRangeVector( const FieldRangeSet &frs, const BSONObj &keyPattern, int direction )
|
|
:_keyPattern( keyPattern ), _direction( direction >= 0 ? 1 : -1 )
|
|
{
|
|
_queries = frs._queries;
|
|
BSONObjIterator i( _keyPattern );
|
|
while( i.more() ) {
|
|
BSONElement e = i.next();
|
|
int number = (int) e.number(); // returns 0.0 if not numeric
|
|
bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 );
|
|
if ( forward ) {
|
|
_ranges.push_back( frs.range( e.fieldName() ) );
|
|
} else {
|
|
_ranges.push_back( FieldRange() );
|
|
frs.range( e.fieldName() ).reverse( _ranges.back() );
|
|
}
|
|
assert( !_ranges.back().empty() );
|
|
}
|
|
uassert( 13385, "combinatorial limit of $in partitioning of result set exceeded", size() < 1000000 );
|
|
}
|
|
long long size() {
|
|
long long ret = 1;
|
|
for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
|
|
ret *= i->intervals().size();
|
|
}
|
|
return ret;
|
|
}
|
|
BSONObj startKey() const {
|
|
BSONObjBuilder b;
|
|
for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
|
|
const FieldInterval &fi = i->intervals().front();
|
|
b.appendAs( fi._lower._bound, "" );
|
|
}
|
|
return b.obj();
|
|
}
|
|
BSONObj endKey() const {
|
|
BSONObjBuilder b;
|
|
for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
|
|
const FieldInterval &fi = i->intervals().back();
|
|
b.appendAs( fi._upper._bound, "" );
|
|
}
|
|
return b.obj();
|
|
}
|
|
BSONObj obj() const {
|
|
BSONObjBuilder b;
|
|
BSONObjIterator k( _keyPattern );
|
|
for( int i = 0; i < (int)_ranges.size(); ++i ) {
|
|
BSONArrayBuilder a( b.subarrayStart( k.next().fieldName() ) );
|
|
for( vector< FieldInterval >::const_iterator j = _ranges[ i ].intervals().begin();
|
|
j != _ranges[ i ].intervals().end(); ++j ) {
|
|
a << BSONArray( BSON_ARRAY( j->_lower._bound << j->_upper._bound ).clientReadable() );
|
|
}
|
|
a.done();
|
|
}
|
|
return b.obj();
|
|
}
|
|
bool matches( const BSONObj &obj ) const;
|
|
class Iterator {
|
|
public:
|
|
Iterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _inc( _v._ranges.size(), false ), _after() {
|
|
}
|
|
static BSONObj minObject() {
|
|
BSONObjBuilder b;
|
|
b.appendMinKey( "" );
|
|
return b.obj();
|
|
}
|
|
static BSONObj maxObject() {
|
|
BSONObjBuilder b;
|
|
b.appendMaxKey( "" );
|
|
return b.obj();
|
|
}
|
|
bool advance() {
|
|
int i = _i.size() - 1;
|
|
while( i >= 0 && _i[ i ] >= ( (int)_v._ranges[ i ].intervals().size() - 1 ) ) {
|
|
--i;
|
|
}
|
|
if( i >= 0 ) {
|
|
_i[ i ]++;
|
|
for( unsigned j = i + 1; j < _i.size(); ++j ) {
|
|
_i[ j ] = 0;
|
|
}
|
|
} else {
|
|
_i[ 0 ] = _v._ranges[ 0 ].intervals().size();
|
|
}
|
|
return ok();
|
|
}
|
|
// return value
|
|
// -2 end of iteration
|
|
// -1 no skipping
|
|
// >= 0 skip parameter
|
|
int advance( const BSONObj &curr );
|
|
const vector< const BSONElement * > &cmp() const { return _cmp; }
|
|
const vector< bool > &inc() const { return _inc; }
|
|
bool after() const { return _after; }
|
|
void prepDive();
|
|
void setZero( int i ) {
|
|
for( int j = i; j < (int)_i.size(); ++j ) {
|
|
_i[ j ] = 0;
|
|
}
|
|
}
|
|
void setMinus( int i ) {
|
|
for( int j = i; j < (int)_i.size(); ++j ) {
|
|
_i[ j ] = -1;
|
|
}
|
|
}
|
|
bool ok() {
|
|
return _i[ 0 ] < (int)_v._ranges[ 0 ].intervals().size();
|
|
}
|
|
BSONObj startKey() {
|
|
BSONObjBuilder b;
|
|
for( int unsigned i = 0; i < _i.size(); ++i ) {
|
|
const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ];
|
|
b.appendAs( fi._lower._bound, "" );
|
|
}
|
|
return b.obj();
|
|
}
|
|
// temp
|
|
BSONObj endKey() {
|
|
BSONObjBuilder b;
|
|
for( int unsigned i = 0; i < _i.size(); ++i ) {
|
|
const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ];
|
|
b.appendAs( fi._upper._bound, "" );
|
|
}
|
|
return b.obj();
|
|
}
|
|
// check
|
|
private:
|
|
const FieldRangeVector &_v;
|
|
vector< int > _i;
|
|
vector< const BSONElement* > _cmp;
|
|
vector< bool > _inc;
|
|
bool _after;
|
|
};
|
|
private:
|
|
int matchingLowElement( const BSONElement &e, int i, bool direction, bool &lowEquality ) const;
|
|
bool matchesElement( const BSONElement &e, int i, bool direction ) const;
|
|
vector< FieldRange > _ranges;
|
|
BSONObj _keyPattern;
|
|
int _direction;
|
|
vector< BSONObj > _queries; // make sure mem owned
|
|
};
|
|
|
|
// generages FieldRangeSet objects, accounting for or clauses
|
|
class FieldRangeOrSet {
|
|
public:
|
|
FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize=true );
|
|
// if there's a useless or clause, we won't use or ranges to help with scanning
|
|
bool orFinished() const { return _orFound && _orSets.empty(); }
|
|
// removes first or clause, and removes the field ranges it covers from all subsequent or clauses
|
|
// this could invalidate the result of the last topFrs()
|
|
void popOrClause() {
|
|
massert( 13274, "no or clause to pop", !orFinished() );
|
|
const FieldRangeSet &toPop = _orSets.front();
|
|
list< FieldRangeSet >::iterator i = _orSets.begin();
|
|
++i;
|
|
while( i != _orSets.end() ) {
|
|
*i -= toPop;
|
|
if( !i->matchPossible() ) {
|
|
i = _orSets.erase( i );
|
|
} else {
|
|
++i;
|
|
}
|
|
}
|
|
_oldOrSets.push_front( toPop );
|
|
_orSets.pop_front();
|
|
}
|
|
FieldRangeSet *topFrs() const {
|
|
FieldRangeSet *ret = new FieldRangeSet( _baseSet );
|
|
if (_orSets.size()){
|
|
*ret &= _orSets.front();
|
|
}
|
|
return ret;
|
|
}
|
|
void allClausesSimplified( vector< BSONObj > &ret ) const {
|
|
for( list< FieldRangeSet >::const_iterator i = _orSets.begin(); i != _orSets.end(); ++i ) {
|
|
if ( i->matchPossible() ) {
|
|
ret.push_back( i->simplifiedQuery() );
|
|
}
|
|
}
|
|
}
|
|
string getSpecial() const { return _baseSet.getSpecial(); }
|
|
|
|
bool moreOrClauses() const { return !_orSets.empty(); }
|
|
private:
|
|
FieldRangeSet _baseSet;
|
|
list< FieldRangeSet > _orSets;
|
|
list< FieldRangeSet > _oldOrSets; // make sure memory is owned
|
|
bool _orFound;
|
|
};
|
|
|
|
/**
|
|
used for doing field limiting
|
|
*/
|
|
class FieldMatcher {
|
|
public:
|
|
FieldMatcher()
|
|
: _include(true)
|
|
, _special(false)
|
|
, _includeID(true)
|
|
, _skip(0)
|
|
, _limit(-1)
|
|
{}
|
|
|
|
void add( const BSONObj& o );
|
|
|
|
void append( BSONObjBuilder& b , const BSONElement& e ) const;
|
|
|
|
BSONObj getSpec() const;
|
|
bool includeID() { return _includeID; }
|
|
private:
|
|
|
|
void add( const string& field, bool include );
|
|
void add( const string& field, int skip, int limit );
|
|
void appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested=false) const;
|
|
|
|
bool _include; // true if default at this level is to include
|
|
bool _special; // true if this level can't be skipped or included without recursing
|
|
//TODO: benchmark vector<pair> vs map
|
|
typedef map<string, boost::shared_ptr<FieldMatcher> > FieldMap;
|
|
FieldMap _fields;
|
|
BSONObj _source;
|
|
bool _includeID;
|
|
|
|
// used for $slice operator
|
|
int _skip;
|
|
int _limit;
|
|
};
|
|
|
|
/** returns a string that when used as a matcher, would match a super set of regex()
|
|
returns "" for complex regular expressions
|
|
used to optimize queries in some simple regex cases that start with '^'
|
|
|
|
if purePrefix != NULL, sets it to whether the regex can be converted to a range query
|
|
*/
|
|
string simpleRegex(const char* regex, const char* flags, bool* purePrefix=NULL);
|
|
|
|
/** returns the upper bound of a query that matches prefix */
|
|
string simpleRegexEnd( string prefix );
|
|
|
|
long long applySkipLimit( long long num , const BSONObj& cmd );
|
|
|
|
} // namespace mongo
|