0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/db/queryutil.cpp
2011-01-04 00:40:41 -05:00

1209 lines
47 KiB
C++

// queryutil.cpp
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pch.h"
#include "btree.h"
#include "matcher.h"
#include "pdfile.h"
#include "queryoptimizer.h"
#include "../util/unittest.h"
#include "dbmessage.h"
#include "indexkey.h"
namespace mongo {
extern BSONObj staticNull;
/** returns a string that when used as a matcher, would match a super set of regex()
returns "" for complex regular expressions
used to optimize queries in some simple regex cases that start with '^'
if purePrefix != NULL, sets it to whether the regex can be converted to a range query
*/
string simpleRegex(const char* regex, const char* flags, bool* purePrefix) {
string r = "";
if (purePrefix) *purePrefix = false;
bool multilineOK;
if ( regex[0] == '\\' && regex[1] == 'A') {
multilineOK = true;
regex += 2;
}
else if (regex[0] == '^') {
multilineOK = false;
regex += 1;
}
else {
return r;
}
bool extended = false;
while (*flags) {
switch (*(flags++)) {
case 'm': // multiline
if (multilineOK)
continue;
else
return r;
case 'x': // extended
extended = true;
break;
default:
return r; // cant use index
}
}
stringstream ss;
while(*regex) {
char c = *(regex++);
if ( c == '*' || c == '?' ) {
// These are the only two symbols that make the last char optional
r = ss.str();
r = r.substr( 0 , r.size() - 1 );
return r; //breaking here fails with /^a?/
}
else if (c == '\\') {
// slash followed by non-alphanumeric represents the following char
c = *(regex++);
if ((c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '0') ||
(c == '\0')) {
r = ss.str();
break;
}
else {
ss << c;
}
}
else if (strchr("^$.[|()+{", c)) {
// list of "metacharacters" from man pcrepattern
r = ss.str();
break;
}
else if (extended && c == '#') {
// comment
r = ss.str();
break;
}
else if (extended && isspace(c)) {
continue;
}
else {
// self-matching char
ss << c;
}
}
if ( r.empty() && *regex == 0 ) {
r = ss.str();
if (purePrefix) *purePrefix = !r.empty();
}
return r;
}
inline string simpleRegex(const BSONElement& e) {
switch(e.type()) {
case RegEx:
return simpleRegex(e.regex(), e.regexFlags());
case Object: {
BSONObj o = e.embeddedObject();
return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
}
default: assert(false); return ""; //return squashes compiler warning
}
}
string simpleRegexEnd( string regex ) {
++regex[ regex.length() - 1 ];
return regex;
}
FieldRange::FieldRange( const BSONElement &e, bool isNot, bool optimize ) {
// NOTE with $not, we could potentially form a complementary set of intervals.
if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
set< BSONElement, element_lt > vals;
vector< FieldRange > regexes;
uassert( 12580 , "invalid query" , e.isABSONObj() );
BSONObjIterator i( e.embeddedObject() );
while( i.more() ) {
BSONElement ie = i.next();
if ( ie.type() == RegEx ) {
regexes.push_back( FieldRange( ie, false, optimize ) );
}
else {
vals.insert( ie );
}
}
for( set< BSONElement, element_lt >::const_iterator i = vals.begin(); i != vals.end(); ++i )
_intervals.push_back( FieldInterval(*i) );
for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i )
*this |= *i;
return;
}
if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) {
_intervals.push_back( FieldInterval(e) );
const BSONElement& temp = e.embeddedObject().firstElement();
if ( ! temp.eoo() ) {
if ( temp < e )
_intervals.insert( _intervals.begin() , temp );
else
_intervals.push_back( FieldInterval(temp) );
}
return;
}
_intervals.push_back( FieldInterval() );
FieldInterval &initial = _intervals[ 0 ];
BSONElement &lower = initial._lower._bound;
bool &lowerInclusive = initial._lower._inclusive;
BSONElement &upper = initial._upper._bound;
bool &upperInclusive = initial._upper._inclusive;
lower = minKey.firstElement();
lowerInclusive = true;
upper = maxKey.firstElement();
upperInclusive = true;
if ( e.eoo() )
return;
int op = e.getGtLtOp();
if ( e.type() == RegEx
|| (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
) {
uassert( 13454, "invalid regular expression operator", op == BSONObj::Equality || op == BSONObj::opREGEX );
if ( !isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
const string r = simpleRegex(e);
if ( r.size() ) {
lower = addObj( BSON( "" << r ) ).firstElement();
upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement();
upperInclusive = false;
}
else {
BSONObjBuilder b1(32), b2(32);
b1.appendMinForType( "" , String );
lower = addObj( b1.obj() ).firstElement();
b2.appendMaxForType( "" , String );
upper = addObj( b2.obj() ).firstElement();
upperInclusive = false; //MaxForType String is an empty Object
}
// regex matches self - regex type > string type
if (e.type() == RegEx) {
BSONElement re = addObj( BSON( "" << e ) ).firstElement();
_intervals.push_back( FieldInterval(re) );
}
else {
BSONObj orig = e.embeddedObject();
BSONObjBuilder b;
b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe());
BSONElement re = addObj( b.obj() ).firstElement();
_intervals.push_back( FieldInterval(re) );
}
}
return;
}
if ( isNot ) {
switch( op ) {
case BSONObj::Equality:
return;
// op = BSONObj::NE;
// break;
case BSONObj::opALL:
case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
case BSONObj::opTYPE:
// no bound calculation
return;
case BSONObj::NE:
op = BSONObj::Equality;
break;
case BSONObj::LT:
op = BSONObj::GTE;
break;
case BSONObj::LTE:
op = BSONObj::GT;
break;
case BSONObj::GT:
op = BSONObj::LTE;
break;
case BSONObj::GTE:
op = BSONObj::LT;
break;
default: // otherwise doesn't matter
break;
}
}
switch( op ) {
case BSONObj::Equality:
lower = upper = e;
break;
case BSONObj::NE: {
// this will invalidate the upper/lower references above
_intervals.push_back( FieldInterval() );
// optimize doesn't make sense for negative ranges
_intervals[ 0 ]._upper._bound = e;
_intervals[ 0 ]._upper._inclusive = false;
_intervals[ 1 ]._lower._bound = e;
_intervals[ 1 ]._lower._inclusive = false;
_intervals[ 1 ]._upper._bound = maxKey.firstElement();
_intervals[ 1 ]._upper._inclusive = true;
optimize = false; // don't run optimize code below
break;
}
case BSONObj::LT:
upperInclusive = false;
case BSONObj::LTE:
upper = e;
break;
case BSONObj::GT:
lowerInclusive = false;
case BSONObj::GTE:
lower = e;
break;
case BSONObj::opALL: {
massert( 10370 , "$all requires array", e.type() == Array );
BSONObjIterator i( e.embeddedObject() );
bool bound = false;
while ( i.more() ) {
BSONElement x = i.next();
if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
// taken care of elsewhere
}
else if ( x.type() != RegEx ) {
lower = upper = x;
bound = true;
break;
}
}
if ( !bound ) { // if no good non regex bound found, try regex bounds
BSONObjIterator i( e.embeddedObject() );
while( i.more() ) {
BSONElement x = i.next();
if ( x.type() != RegEx )
continue;
string simple = simpleRegex( x.regex(), x.regexFlags() );
if ( !simple.empty() ) {
lower = addObj( BSON( "" << simple ) ).firstElement();
upper = addObj( BSON( "" << simpleRegexEnd( simple ) ) ).firstElement();
break;
}
}
}
break;
}
case BSONObj::opMOD: {
{
BSONObjBuilder b;
b.appendMinForType( "" , NumberDouble );
lower = addObj( b.obj() ).firstElement();
}
{
BSONObjBuilder b;
b.appendMaxForType( "" , NumberDouble );
upper = addObj( b.obj() ).firstElement();
}
break;
}
case BSONObj::opTYPE: {
BSONType t = (BSONType)e.numberInt();
{
BSONObjBuilder b;
b.appendMinForType( "" , t );
lower = addObj( b.obj() ).firstElement();
}
{
BSONObjBuilder b;
b.appendMaxForType( "" , t );
upper = addObj( b.obj() ).firstElement();
}
break;
}
case BSONObj::opREGEX:
case BSONObj::opOPTIONS:
// do nothing
break;
case BSONObj::opELEM_MATCH: {
log() << "warning: shouldn't get here?" << endl;
break;
}
case BSONObj::opNEAR:
case BSONObj::opWITHIN:
_special = "2d";
break;
default:
break;
}
if ( optimize ) {
if ( lower.type() != MinKey && upper.type() == MaxKey && lower.isSimpleType() ) { // TODO: get rid of isSimpleType
BSONObjBuilder b;
b.appendMaxForType( lower.fieldName() , lower.type() );
upper = addObj( b.obj() ).firstElement();
}
else if ( lower.type() == MinKey && upper.type() != MaxKey && upper.isSimpleType() ) { // TODO: get rid of isSimpleType
BSONObjBuilder b;
b.appendMinForType( upper.fieldName() , upper.type() );
lower = addObj( b.obj() ).firstElement();
}
}
}
void FieldRange::finishOperation( const vector< FieldInterval > &newIntervals, const FieldRange &other ) {
_intervals = newIntervals;
for( vector< BSONObj >::const_iterator i = other._objData.begin(); i != other._objData.end(); ++i )
_objData.push_back( *i );
if ( _special.size() == 0 && other._special.size() )
_special = other._special;
}
// as called, these functions find the max/min of a bound in the
// opposite direction, so inclusive bounds are considered less
// superlative
FieldBound maxFieldBound( const FieldBound &a, const FieldBound &b ) {
int cmp = a._bound.woCompare( b._bound, false );
if ( ( cmp == 0 && !b._inclusive ) || cmp < 0 )
return b;
return a;
}
FieldBound minFieldBound( const FieldBound &a, const FieldBound &b ) {
int cmp = a._bound.woCompare( b._bound, false );
if ( ( cmp == 0 && !b._inclusive ) || cmp > 0 )
return b;
return a;
}
bool fieldIntervalOverlap( const FieldInterval &one, const FieldInterval &two, FieldInterval &result ) {
result._lower = maxFieldBound( one._lower, two._lower );
result._upper = minFieldBound( one._upper, two._upper );
return result.strictValid();
}
const FieldRange &FieldRange::operator&=( const FieldRange &other ) {
vector< FieldInterval > newIntervals;
vector< FieldInterval >::const_iterator i = _intervals.begin();
vector< FieldInterval >::const_iterator j = other._intervals.begin();
while( i != _intervals.end() && j != other._intervals.end() ) {
FieldInterval overlap;
if ( fieldIntervalOverlap( *i, *j, overlap ) ) {
newIntervals.push_back( overlap );
}
if ( i->_upper == minFieldBound( i->_upper, j->_upper ) ) {
++i;
}
else {
++j;
}
}
finishOperation( newIntervals, other );
return *this;
}
void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) {
if ( low._bound.eoo() ) {
low = lower._lower; high = lower._upper;
}
else {
int cmp = high._bound.woCompare( lower._lower._bound, false );
if ( ( cmp < 0 ) || ( cmp == 0 && !high._inclusive && !lower._lower._inclusive ) ) {
FieldInterval tmp;
tmp._lower = low;
tmp._upper = high;
newIntervals.push_back( tmp );
low = lower._lower; high = lower._upper;
}
else {
high = lower._upper;
}
}
}
const FieldRange &FieldRange::operator|=( const FieldRange &other ) {
vector< FieldInterval > newIntervals;
FieldBound low;
FieldBound high;
vector< FieldInterval >::const_iterator i = _intervals.begin();
vector< FieldInterval >::const_iterator j = other._intervals.begin();
while( i != _intervals.end() && j != other._intervals.end() ) {
int cmp = i->_lower._bound.woCompare( j->_lower._bound, false );
if ( ( cmp == 0 && i->_lower._inclusive ) || cmp < 0 ) {
handleInterval( *i, low, high, newIntervals );
++i;
}
else {
handleInterval( *j, low, high, newIntervals );
++j;
}
}
while( i != _intervals.end() ) {
handleInterval( *i, low, high, newIntervals );
++i;
}
while( j != other._intervals.end() ) {
handleInterval( *j, low, high, newIntervals );
++j;
}
FieldInterval tmp;
tmp._lower = low;
tmp._upper = high;
newIntervals.push_back( tmp );
finishOperation( newIntervals, other );
return *this;
}
const FieldRange &FieldRange::operator-=( const FieldRange &other ) {
vector< FieldInterval > newIntervals;
vector< FieldInterval >::iterator i = _intervals.begin();
vector< FieldInterval >::const_iterator j = other._intervals.begin();
while( i != _intervals.end() && j != other._intervals.end() ) {
int cmp = i->_lower._bound.woCompare( j->_lower._bound, false );
if ( cmp < 0 ||
( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) {
int cmp2 = i->_upper._bound.woCompare( j->_lower._bound, false );
if ( cmp2 < 0 ) {
newIntervals.push_back( *i );
++i;
}
else if ( cmp2 == 0 ) {
newIntervals.push_back( *i );
if ( newIntervals.back()._upper._inclusive && j->_lower._inclusive ) {
newIntervals.back()._upper._inclusive = false;
}
++i;
}
else {
newIntervals.push_back( *i );
newIntervals.back()._upper = j->_lower;
newIntervals.back()._upper.flipInclusive();
int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
if ( cmp3 < 0 ||
( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
++i;
}
else {
i->_lower = j->_upper;
i->_lower.flipInclusive();
++j;
}
}
}
else {
int cmp2 = i->_lower._bound.woCompare( j->_upper._bound, false );
if ( cmp2 > 0 ||
( cmp2 == 0 && ( !i->_lower._inclusive || !j->_upper._inclusive ) ) ) {
++j;
}
else {
int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
if ( cmp3 < 0 ||
( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
++i;
}
else {
i->_lower = j->_upper;
i->_lower.flipInclusive();
++j;
}
}
}
}
while( i != _intervals.end() ) {
newIntervals.push_back( *i );
++i;
}
finishOperation( newIntervals, other );
return *this;
}
// TODO write a proper implementation that doesn't do a full copy
bool FieldRange::operator<=( const FieldRange &other ) {
FieldRange temp = *this;
temp -= other;
return temp.empty();
}
BSONObj FieldRange::addObj( const BSONObj &o ) {
_objData.push_back( o );
return o;
}
string FieldRangeSet::getSpecial() const {
string s = "";
for ( map<string,FieldRange>::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) {
if ( i->second.getSpecial().size() == 0 )
continue;
uassert( 13033 , "can't have 2 special fields" , s.size() == 0 );
s = i->second.getSpecial();
}
return s;
}
void FieldRangeSet::processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ) {
BSONElement g = f;
int op2 = g.getGtLtOp();
if ( op2 == BSONObj::opALL ) {
BSONElement h = g;
massert( 13050 , "$all requires array", h.type() == Array );
BSONObjIterator i( h.embeddedObject() );
if( i.more() ) {
BSONElement x = i.next();
if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
g = x.embeddedObject().firstElement();
op2 = g.getGtLtOp();
}
}
}
if ( op2 == BSONObj::opELEM_MATCH ) {
BSONObjIterator k( g.embeddedObjectUserCheck() );
while ( k.more() ) {
BSONElement h = k.next();
StringBuilder buf(32);
buf << fieldName << "." << h.fieldName();
string fullname = buf.str();
int op3 = getGtLtOp( h );
if ( op3 == BSONObj::Equality ) {
_ranges[ fullname ] &= FieldRange( h , isNot , optimize );
}
else {
BSONObjIterator l( h.embeddedObject() );
while ( l.more() ) {
_ranges[ fullname ] &= FieldRange( l.next() , isNot , optimize );
}
}
}
}
else {
_ranges[ fieldName ] &= FieldRange( f , isNot , optimize );
}
}
void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
if ( equality && e.type() == Object ) {
equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 );
}
if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) {
_ranges[ e.fieldName() ] &= FieldRange( e , false , optimize );
}
if ( !equality ) {
BSONObjIterator j( e.embeddedObject() );
while( j.more() ) {
BSONElement f = j.next();
if ( strcmp( f.fieldName(), "$not" ) == 0 ) {
switch( f.type() ) {
case Object: {
BSONObjIterator k( f.embeddedObject() );
while( k.more() ) {
BSONElement g = k.next();
uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
processOpElement( e.fieldName(), g, true, optimize );
}
break;
}
case RegEx:
processOpElement( e.fieldName(), f, true, optimize );
break;
default:
uassert( 13041, "invalid use of $not", false );
}
}
else {
processOpElement( e.fieldName(), f, false, optimize );
}
}
}
}
FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize )
: _ns( ns ), _queries( 1, query.getOwned() ) {
BSONObjIterator i( _queries[ 0 ] );
while( i.more() ) {
BSONElement e = i.next();
// e could be x:1 or x:{$gt:1}
if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
continue;
}
if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
continue;
}
if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
continue;
}
processQueryField( e, optimize );
}
}
FieldRangeOrSet::FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize )
: _baseSet( ns, query, optimize ), _orFound() {
BSONObjIterator i( _baseSet._queries[ 0 ] );
while( i.more() ) {
BSONElement e = i.next();
if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
BSONObjIterator j( e.embeddedObject() );
while( j.more() ) {
BSONElement f = j.next();
massert( 13263, "$or array must contain objects", f.type() == Object );
_orSets.push_back( FieldRangeSet( ns, f.embeddedObject(), optimize ) );
massert( 13291, "$or may not contain 'special' query", _orSets.back().getSpecial().empty() );
_originalOrSets.push_back( _orSets.back() );
}
_orFound = true;
continue;
}
}
}
void FieldRangeOrSet::popOrClause( const BSONObj &indexSpec ) {
massert( 13274, "no or clause to pop", !orFinished() );
auto_ptr< FieldRangeSet > holder;
FieldRangeSet *toDiff = &_originalOrSets.front();
if ( toDiff->matchPossible() && !indexSpec.isEmpty() ) {
holder.reset( toDiff->subset( indexSpec ) );
toDiff = holder.get();
}
list< FieldRangeSet >::iterator i = _orSets.begin();
list< FieldRangeSet >::iterator j = _originalOrSets.begin();
++i;
++j;
while( i != _orSets.end() ) {
*i -= *toDiff;
if( !i->matchPossible() ) {
i = _orSets.erase( i );
j = _originalOrSets.erase( j );
}
else {
++i;
++j;
}
}
_oldOrSets.push_front( _orSets.front() );
_orSets.pop_front();
_originalOrSets.pop_front();
}
FieldRange *FieldRangeSet::trivialRange_ = 0;
FieldRange &FieldRangeSet::trivialRange() {
if ( trivialRange_ == 0 )
trivialRange_ = new FieldRange();
return *trivialRange_;
}
BSONObj FieldRangeSet::simplifiedQuery( const BSONObj &_fields ) const {
BSONObj fields = _fields;
if ( fields.isEmpty() ) {
BSONObjBuilder b;
for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
b.append( i->first, 1 );
}
fields = b.obj();
}
BSONObjBuilder b;
BSONObjIterator i( fields );
while( i.more() ) {
BSONElement e = i.next();
const char *name = e.fieldName();
const FieldRange &range = _ranges[ name ];
assert( !range.empty() );
if ( range.equality() )
b.appendAs( range.min(), name );
else if ( range.nontrivial() ) {
BSONObj o;
BSONObjBuilder c;
if ( range.min().type() != MinKey )
c.appendAs( range.min(), range.minInclusive() ? "$gte" : "$gt" );
if ( range.max().type() != MaxKey )
c.appendAs( range.max(), range.maxInclusive() ? "$lte" : "$lt" );
o = c.obj();
b.append( name, o );
}
}
return b.obj();
}
QueryPattern FieldRangeSet::pattern( const BSONObj &sort ) const {
QueryPattern qp;
for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
assert( !i->second.empty() );
if ( i->second.equality() ) {
qp._fieldTypes[ i->first ] = QueryPattern::Equality;
}
else if ( i->second.nontrivial() ) {
bool upper = i->second.max().type() != MaxKey;
bool lower = i->second.min().type() != MinKey;
if ( upper && lower )
qp._fieldTypes[ i->first ] = QueryPattern::UpperAndLowerBound;
else if ( upper )
qp._fieldTypes[ i->first ] = QueryPattern::UpperBound;
else if ( lower )
qp._fieldTypes[ i->first ] = QueryPattern::LowerBound;
}
}
qp.setSort( sort );
return qp;
}
// TODO get rid of this
BoundList FieldRangeSet::indexBounds( const BSONObj &keyPattern, int direction ) const {
typedef vector< pair< shared_ptr< BSONObjBuilder >, shared_ptr< BSONObjBuilder > > > BoundBuilders;
BoundBuilders builders;
builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
BSONObjIterator i( keyPattern );
bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds
while( i.more() ) {
BSONElement e = i.next();
const FieldRange &fr = range( e.fieldName() );
int number = (int) e.number(); // returns 0.0 if not numeric
bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 );
if ( !ineq ) {
if ( fr.equality() ) {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( fr.min(), "" );
j->second->appendAs( fr.min(), "" );
}
}
else {
if ( !fr.inQuery() ) {
ineq = true;
}
BoundBuilders newBuilders;
const vector< FieldInterval > &intervals = fr.intervals();
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
BSONObj first = i->first->obj();
BSONObj second = i->second->obj();
const unsigned maxCombinations = 4000000;
if ( forward ) {
for( vector< FieldInterval >::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) {
uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendAs( j->_lower._bound, "" );
newBuilders.back().second->appendAs( j->_upper._bound, "" );
}
}
else {
for( vector< FieldInterval >::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) {
uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendAs( j->_upper._bound, "" );
newBuilders.back().second->appendAs( j->_lower._bound, "" );
}
}
}
builders = newBuilders;
}
}
else {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( forward ? fr.min() : fr.max(), "" );
j->second->appendAs( forward ? fr.max() : fr.min(), "" );
}
}
}
BoundList ret;
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i )
ret.push_back( make_pair( i->first->obj(), i->second->obj() ) );
return ret;
}
FieldRangeSet *FieldRangeSet::subset( const BSONObj &fields ) const {
FieldRangeSet *ret = new FieldRangeSet( _ns, BSONObj() );
BSONObjIterator i( fields );
while( i.more() ) {
BSONElement e = i.next();
if ( _ranges[ e.fieldName() ].nontrivial() ) {
ret->_ranges[ e.fieldName() ] = _ranges[ e.fieldName() ];
}
}
ret->_queries = _queries;
return ret;
}
bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const {
bool eq;
int l = matchingLowElement( e, i, forward, eq );
return ( l % 2 == 0 ); // if we're inside an interval
}
// binary search for interval containing the specified element
// an even return value indicates that the element is contained within a valid interval
int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward, bool &lowEquality ) const {
lowEquality = false;
int l = -1;
int h = _ranges[ i ].intervals().size() * 2;
while( l + 1 < h ) {
int m = ( l + h ) / 2;
BSONElement toCmp;
bool toCmpInclusive;
const FieldInterval &interval = _ranges[ i ].intervals()[ m / 2 ];
if ( m % 2 == 0 ) {
toCmp = interval._lower._bound;
toCmpInclusive = interval._lower._inclusive;
}
else {
toCmp = interval._upper._bound;
toCmpInclusive = interval._upper._inclusive;
}
int cmp = toCmp.woCompare( e, false );
if ( !forward ) {
cmp = -cmp;
}
if ( cmp < 0 ) {
l = m;
}
else if ( cmp > 0 ) {
h = m;
}
else {
if ( m % 2 == 0 ) {
lowEquality = true;
}
int ret = m;
// if left match and inclusive, all good
// if left match and not inclusive, return right before left bound
// if right match and inclusive, return left bound
// if right match and not inclusive, return right bound
if ( ( m % 2 == 0 && !toCmpInclusive ) || ( m % 2 == 1 && toCmpInclusive ) ) {
--ret;
}
return ret;
}
}
assert( l + 1 == h );
return l;
}
bool FieldRangeVector::matches( const BSONObj &obj ) const {
if ( !_indexSpec.get() ) {
_indexSpec.reset( new IndexSpec( _keyPattern ) );
}
// TODO The representation of matching keys could potentially be optimized
// more for the case at hand. (For example, we can potentially consider
// fields individually instead of constructing several bson objects using
// multikey arrays.) But getKeys() canonically defines the key set for a
// given object and for now we are using it as is.
BSONObjSetDefaultOrder keys;
_indexSpec->getKeys( obj, keys );
for( BSONObjSetDefaultOrder::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
BSONObjIterator j( *i );
BSONObjIterator k( _keyPattern );
bool match = true;
for( int l = 0; l < (int)_ranges.size(); ++l ) {
int number = (int) k.next().number();
bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
if ( !matchesElement( j.next(), l, forward ) ) {
match = false;
break;
}
}
if ( match ) {
// The *i key matched a valid range for every element.
return true;
}
}
return false;
}
// TODO optimize more
int FieldRangeVector::Iterator::advance( const BSONObj &curr ) {
BSONObjIterator j( curr );
BSONObjIterator o( _v._keyPattern );
// track first field for which we are not at the end of the valid values,
// since we may need to advance from the key prefix ending with this field
int latestNonEndpoint = -1;
// iterate over fields to determine appropriate advance method
for( int i = 0; i < (int)_i.size(); ++i ) {
if ( i > 0 && !_v._ranges[ i - 1 ].intervals()[ _i[ i - 1 ] ].equality() ) {
// if last bound was inequality, we don't know anything about where we are for this field
// TODO if possible avoid this certain cases when value in previous field of the previous
// key is the same as value of previous field in current key
setMinus( i );
}
bool eq = false;
BSONElement oo = o.next();
bool reverse = ( ( oo.number() < 0 ) ^ ( _v._direction < 0 ) );
BSONElement jj = j.next();
if ( _i[ i ] == -1 ) { // unknown position for this field, do binary search
bool lowEquality;
int l = _v.matchingLowElement( jj, i, !reverse, lowEquality );
if ( l % 2 == 0 ) { // we are in a valid range for this field
_i[ i ] = l / 2;
int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
if ( diff > 1 ) {
latestNonEndpoint = i;
}
else if ( diff == 1 ) {
int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false );
if ( x != 0 ) {
latestNonEndpoint = i;
}
}
continue;
}
else { // not in a valid range for this field - determine if and how to advance
// check if we're after the last interval for this field
if ( l == (int)_v._ranges[ i ].intervals().size() * 2 - 1 ) {
if ( latestNonEndpoint == -1 ) {
return -2;
}
setZero( latestNonEndpoint + 1 );
// skip to curr / latestNonEndpoint + 1 / superlative
_after = true;
return latestNonEndpoint + 1;
}
_i[ i ] = ( l + 1 ) / 2;
if ( lowEquality ) {
// skip to curr / i + 1 / superlative
_after = true;
return i + 1;
}
// skip to curr / i / nextbounds
_cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
_inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
for( int j = i + 1; j < (int)_i.size(); ++j ) {
_cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
_inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
}
_after = false;
return i;
}
}
bool first = true;
// _i[ i ] != -1, so we have a starting interval for this field
// which serves as a lower/equal bound on the first iteration -
// we advance from this interval to find a matching interval
while( _i[ i ] < (int)_v._ranges[ i ].intervals().size() ) {
// compare to current interval's upper bound
int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false );
if ( reverse ) {
x = -x;
}
if ( x == 0 && _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._inclusive ) {
eq = true;
break;
}
// see if we're less than the upper bound
if ( x > 0 ) {
if ( i == 0 && first ) {
// the value of 1st field won't go backward, so don't check lower bound
// TODO maybe we can check first only?
break;
}
// if it's an equality interval, don't need to compare separately to lower bound
if ( !_v._ranges[ i ].intervals()[ _i[ i ] ].equality() ) {
// compare to current interval's lower bound
x = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound.woCompare( jj, false );
if ( reverse ) {
x = -x;
}
}
// if we're equal to and not inclusive the lower bound, advance
if ( ( x == 0 && !_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive ) ) {
setZero( i + 1 );
// skip to curr / i + 1 / superlative
_after = true;
return i + 1;
}
// if we're less than the lower bound, advance
if ( x > 0 ) {
setZero( i + 1 );
// skip to curr / i / nextbounds
_cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
_inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
for( int j = i + 1; j < (int)_i.size(); ++j ) {
_cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
_inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
}
_after = false;
return i;
}
else {
break;
}
}
// we're above the upper bound, so try next interval and reset remaining fields
++_i[ i ];
setZero( i + 1 );
first = false;
}
int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
if ( diff > 1 || ( !eq && diff == 1 ) ) {
// check if we're not at the end of valid values for this field
latestNonEndpoint = i;
}
else if ( diff == 0 ) { // check if we're past the last interval for this field
if ( latestNonEndpoint == -1 ) {
return -2;
}
// more values possible, skip...
setZero( latestNonEndpoint + 1 );
// skip to curr / latestNonEndpoint + 1 / superlative
_after = true;
return latestNonEndpoint + 1;
}
}
return -1;
}
void FieldRangeVector::Iterator::prepDive() {
for( int j = 0; j < (int)_i.size(); ++j ) {
_cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
_inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
}
}
struct SimpleRegexUnitTest : UnitTest {
void run() {
{
BSONObjBuilder b;
b.appendRegex("r", "^foo");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "foo" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "^f?oo");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "^fz?oo");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "f" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "^f", "");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "f" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "\\Af", "");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "f" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "^f", "m");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "\\Af", "m");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "f" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "\\Af", "mi");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "" );
}
{
BSONObjBuilder b;
b.appendRegex("r", "\\Af \t\vo\n\ro \\ \\# #comment", "mx");
BSONObj o = b.done();
assert( simpleRegex(o.firstElement()) == "foo #" );
}
}
} simple_regex_unittest;
long long applySkipLimit( long long num , const BSONObj& cmd ) {
BSONElement s = cmd["skip"];
BSONElement l = cmd["limit"];
if ( s.isNumber() ) {
num = num - s.numberLong();
if ( num < 0 ) {
num = 0;
}
}
if ( l.isNumber() ) {
long long limit = l.numberLong();
if ( limit < num ) {
num = limit;
}
}
return num;
}
string debugString( Message& m ) {
stringstream ss;
ss << "op: " << opToString( m.operation() ) << " len: " << m.size();
if ( m.operation() >= 2000 && m.operation() < 2100 ) {
DbMessage d(m);
ss << " ns: " << d.getns();
switch ( m.operation() ) {
case dbUpdate: {
int flags = d.pullInt();
BSONObj q = d.nextJsObj();
BSONObj o = d.nextJsObj();
ss << " flags: " << flags << " query: " << q << " update: " << o;
break;
}
case dbInsert:
ss << d.nextJsObj();
break;
case dbDelete: {
int flags = d.pullInt();
BSONObj q = d.nextJsObj();
ss << " flags: " << flags << " query: " << q;
break;
}
default:
ss << " CANNOT HANDLE YET";
}
}
return ss.str();
}
} // namespace mongo