2009-02-24 20:14:45 +01:00
// queryutil.cpp
2009-10-27 20:58:27 +01:00
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
2009-02-24 20:14:45 +01:00
2010-04-27 21:27:52 +02:00
# include "pch.h"
2009-02-24 20:14:45 +01:00
# include "btree.h"
2009-08-18 17:59:40 +02:00
# include "matcher.h"
2009-02-24 20:14:45 +01:00
# include "pdfile.h"
# include "queryoptimizer.h"
2010-01-15 02:57:50 +01:00
# include "../util/unittest.h"
2009-02-24 20:14:45 +01:00
namespace mongo {
2010-02-26 03:42:46 +01:00
/** returns a string that when used as a matcher, would match a super set of regex()
returns " " for complex regular expressions
used to optimize queries in some simple regex cases that start with ' ^ '
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
if purePrefix ! = NULL , sets it to whether the regex can be converted to a range query
*/
string simpleRegex ( const char * regex , const char * flags , bool * purePrefix ) {
string r = " " ;
if ( purePrefix ) * purePrefix = false ;
2010-03-30 02:17:10 +02:00
bool multilineOK ;
if ( regex [ 0 ] = = ' \\ ' & & regex [ 1 ] = = ' A ' ) {
multilineOK = true ;
regex + = 2 ;
} else if ( regex [ 0 ] = = ' ^ ' ) {
multilineOK = false ;
regex + = 1 ;
} else {
return r ;
}
2010-02-26 03:42:46 +01:00
bool extended = false ;
while ( * flags ) {
switch ( * ( flags + + ) ) {
case ' m ' : // multiline
2010-03-30 02:17:10 +02:00
if ( multilineOK )
continue ;
else
return r ;
2010-02-26 03:42:46 +01:00
case ' x ' : // extended
extended = true ;
break ;
default :
return r ; // cant use index
2010-01-15 04:47:34 +01:00
}
2010-02-26 03:42:46 +01:00
}
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
stringstream ss ;
2010-01-15 04:47:34 +01:00
2010-02-26 03:42:46 +01:00
while ( * regex ) {
char c = * ( regex + + ) ;
if ( c = = ' * ' | | c = = ' ? ' ) {
// These are the only two symbols that make the last char optional
r = ss . str ( ) ;
r = r . substr ( 0 , r . size ( ) - 1 ) ;
return r ; //breaking here fails with /^a?/
} else if ( c = = ' \\ ' ) {
// slash followed by non-alphanumeric represents the following char
c = * ( regex + + ) ;
if ( ( c > = ' A ' & & c < = ' Z ' ) | |
( c > = ' a ' & & c < = ' z ' ) | |
( c > = ' 0 ' & & c < = ' 0 ' ) | |
( c = = ' \0 ' ) )
{
2010-01-15 04:47:34 +01:00
r = ss . str ( ) ;
break ;
2010-01-15 02:57:50 +01:00
} else {
2010-01-15 04:47:34 +01:00
ss < < c ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
} else if ( strchr ( " ^$.[|() + { " , c)){
// list of "metacharacters" from man pcrepattern
r = ss . str ( ) ;
break ;
} else if ( extended & & c = = ' # ' ) {
// comment
2010-01-15 02:57:50 +01:00
r = ss . str ( ) ;
2010-02-26 03:42:46 +01:00
break ;
} else if ( extended & & isspace ( c ) ) {
continue ;
} else {
// self-matching char
ss < < c ;
}
}
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
if ( r . empty ( ) & & * regex = = 0 ) {
r = ss . str ( ) ;
if ( purePrefix ) * purePrefix = ! r . empty ( ) ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
return r ;
}
inline string simpleRegex ( const BSONElement & e ) {
switch ( e . type ( ) ) {
case RegEx :
return simpleRegex ( e . regex ( ) , e . regexFlags ( ) ) ;
case Object : {
BSONObj o = e . embeddedObject ( ) ;
return simpleRegex ( o [ " $regex " ] . valuestrsafe ( ) , o [ " $options " ] . valuestrsafe ( ) ) ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
default : assert ( false ) ; return " " ; //return squashes compiler warning
2010-01-15 02:57:50 +01:00
}
}
2010-02-26 03:42:46 +01:00
string simpleRegexEnd ( string regex ) {
+ + regex [ regex . length ( ) - 1 ] ;
return regex ;
}
2009-08-20 22:18:50 +02:00
2010-02-23 23:20:28 +01:00
FieldRange : : FieldRange ( const BSONElement & e , bool isNot , bool optimize ) {
2010-02-23 23:41:05 +01:00
// NOTE with $not, we could potentially form a complementary set of intervals.
if ( ! isNot & & ! e . eoo ( ) & & e . type ( ) ! = RegEx & & e . getGtLtOp ( ) = = BSONObj : : opIN ) {
2009-08-18 17:59:40 +02:00
set < BSONElement , element_lt > vals ;
2010-03-09 01:53:45 +01:00
vector < FieldRange > regexes ;
2010-01-28 23:30:58 +01:00
uassert ( 12580 , " invalid query " , e . isABSONObj ( ) ) ;
2009-08-18 17:59:40 +02:00
BSONObjIterator i ( e . embeddedObject ( ) ) ;
2010-03-09 01:53:45 +01:00
while ( i . more ( ) ) {
BSONElement ie = i . next ( ) ;
if ( ie . type ( ) = = RegEx ) {
regexes . push_back ( FieldRange ( ie , false , optimize ) ) ;
} else {
vals . insert ( ie ) ;
}
}
2009-10-09 16:12:19 +02:00
for ( set < BSONElement , element_lt > : : const_iterator i = vals . begin ( ) ; i ! = vals . end ( ) ; + + i )
intervals_ . push_back ( FieldInterval ( * i ) ) ;
2010-03-09 01:53:45 +01:00
for ( vector < FieldRange > : : const_iterator i = regexes . begin ( ) ; i ! = regexes . end ( ) ; + + i )
* this | = * i ;
2009-10-09 16:12:19 +02:00
return ;
}
if ( e . type ( ) = = Array & & e . getGtLtOp ( ) = = BSONObj : : Equality ) {
intervals_ . push_back ( FieldInterval ( e ) ) ;
const BSONElement & temp = e . embeddedObject ( ) . firstElement ( ) ;
if ( ! temp . eoo ( ) ) {
if ( temp < e )
intervals_ . insert ( intervals_ . begin ( ) , temp ) ;
else
intervals_ . push_back ( FieldInterval ( temp ) ) ;
2009-08-18 17:59:40 +02:00
}
2009-10-09 16:12:19 +02:00
2009-08-18 17:59:40 +02:00
return ;
}
intervals_ . push_back ( FieldInterval ( ) ) ;
FieldInterval & initial = intervals_ [ 0 ] ;
BSONElement & lower = initial . lower_ . bound_ ;
bool & lowerInclusive = initial . lower_ . inclusive_ ;
BSONElement & upper = initial . upper_ . bound_ ;
bool & upperInclusive = initial . upper_ . inclusive_ ;
lower = minKey . firstElement ( ) ;
lowerInclusive = true ;
upper = maxKey . firstElement ( ) ;
upperInclusive = true ;
2009-02-24 20:14:45 +01:00
if ( e . eoo ( ) )
return ;
2010-01-15 02:24:01 +01:00
if ( e . type ( ) = = RegEx
| | ( e . type ( ) = = Object & & ! e . embeddedObject ( ) [ " $regex " ] . eoo ( ) )
)
{
2010-02-23 23:20:28 +01:00
if ( ! isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
const string r = simpleRegex ( e ) ;
if ( r . size ( ) ) {
lower = addObj ( BSON ( " " < < r ) ) . firstElement ( ) ;
upper = addObj ( BSON ( " " < < simpleRegexEnd ( r ) ) ) . firstElement ( ) ;
upperInclusive = false ;
2010-02-25 22:16:31 +01:00
} else {
BSONObjBuilder b1 ( 32 ) , b2 ( 32 ) ;
b1 . appendMinForType ( " " , String ) ;
lower = addObj ( b1 . obj ( ) ) . firstElement ( ) ;
b2 . appendMaxForType ( " " , String ) ;
upper = addObj ( b2 . obj ( ) ) . firstElement ( ) ;
upperInclusive = false ; //MaxForType String is an empty Object
2010-02-23 23:20:28 +01:00
}
2010-03-01 23:59:20 +01:00
2010-03-09 01:53:45 +01:00
// regex matches self - regex type > string type
2010-03-01 23:59:20 +01:00
if ( e . type ( ) = = RegEx ) {
BSONElement re = addObj ( BSON ( " " < < e ) ) . firstElement ( ) ;
2010-03-02 02:24:08 +01:00
intervals_ . push_back ( FieldInterval ( re ) ) ;
} else {
BSONObj orig = e . embeddedObject ( ) ;
BSONObjBuilder b ;
b . appendRegex ( " " , orig [ " $regex " ] . valuestrsafe ( ) , orig [ " $options " ] . valuestrsafe ( ) ) ;
BSONElement re = addObj ( b . obj ( ) ) . firstElement ( ) ;
intervals_ . push_back ( FieldInterval ( re ) ) ;
2010-03-01 23:59:20 +01:00
}
2010-02-23 23:20:28 +01:00
}
2009-02-24 20:14:45 +01:00
return ;
}
2010-02-23 23:20:28 +01:00
int op = e . getGtLtOp ( ) ;
if ( isNot ) {
switch ( op ) {
case BSONObj : : Equality :
case BSONObj : : opALL :
case BSONObj : : opMOD : // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
case BSONObj : : opTYPE :
op = BSONObj : : NE ; // no bound calculation
break ;
case BSONObj : : NE :
op = BSONObj : : Equality ;
break ;
case BSONObj : : LT :
op = BSONObj : : GTE ;
break ;
case BSONObj : : LTE :
op = BSONObj : : GT ;
break ;
case BSONObj : : GT :
op = BSONObj : : LTE ;
break ;
case BSONObj : : GTE :
op = BSONObj : : LT ;
break ;
default : // otherwise doesn't matter
break ;
}
}
switch ( op ) {
2009-08-20 22:18:50 +02:00
case BSONObj : : Equality :
2009-08-18 17:59:40 +02:00
lower = upper = e ;
2009-08-20 22:18:50 +02:00
break ;
case BSONObj : : LT :
2009-08-18 17:59:40 +02:00
upperInclusive = false ;
2009-08-20 22:18:50 +02:00
case BSONObj : : LTE :
2009-08-18 17:59:40 +02:00
upper = e ;
2009-08-20 22:18:50 +02:00
break ;
case BSONObj : : GT :
2009-08-18 17:59:40 +02:00
lowerInclusive = false ;
2009-08-20 22:18:50 +02:00
case BSONObj : : GTE :
2009-08-18 17:59:40 +02:00
lower = e ;
2009-08-20 22:18:50 +02:00
break ;
2009-08-18 17:59:40 +02:00
case BSONObj : : opALL : {
2009-12-28 22:43:43 +01:00
massert ( 10370 , " $all requires array " , e . type ( ) = = Array ) ;
2009-08-20 22:18:50 +02:00
BSONObjIterator i ( e . embeddedObject ( ) ) ;
2010-03-09 02:33:47 +01:00
bool bound = false ;
while ( i . more ( ) ) {
2010-02-19 22:11:55 +01:00
BSONElement x = i . next ( ) ;
if ( x . type ( ) = = Object & & x . embeddedObject ( ) . firstElement ( ) . getGtLtOp ( ) = = BSONObj : : opELEM_MATCH ) {
2010-03-09 20:53:40 +01:00
// taken care of elsewhere
2010-02-19 22:11:55 +01:00
}
2010-03-09 02:33:47 +01:00
else if ( x . type ( ) ! = RegEx ) {
2010-02-19 22:11:55 +01:00
lower = upper = x ;
2010-03-09 02:33:47 +01:00
bound = true ;
break ;
}
}
if ( ! bound ) { // if no good non regex bound found, try regex bounds
BSONObjIterator i ( e . embeddedObject ( ) ) ;
while ( i . more ( ) ) {
BSONElement x = i . next ( ) ;
if ( x . type ( ) ! = RegEx )
continue ;
string simple = simpleRegex ( x . regex ( ) , x . regexFlags ( ) ) ;
if ( ! simple . empty ( ) ) {
lower = addObj ( BSON ( " " < < simple ) ) . firstElement ( ) ;
upper = addObj ( BSON ( " " < < simpleRegexEnd ( simple ) ) ) . firstElement ( ) ;
break ;
}
2010-02-19 22:11:55 +01:00
}
}
2009-08-20 22:18:50 +02:00
break ;
2009-08-18 17:59:40 +02:00
}
2009-08-20 22:50:58 +02:00
case BSONObj : : opMOD : {
2009-10-12 18:00:25 +02:00
{
BSONObjBuilder b ;
b . appendMinForType ( " " , NumberDouble ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
{
BSONObjBuilder b ;
b . appendMaxForType ( " " , NumberDouble ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
2009-08-20 22:50:58 +02:00
break ;
2009-08-20 22:18:50 +02:00
}
2009-10-12 17:58:14 +02:00
case BSONObj : : opTYPE : {
BSONType t = ( BSONType ) e . numberInt ( ) ;
{
BSONObjBuilder b ;
b . appendMinForType ( " " , t ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
{
BSONObjBuilder b ;
b . appendMaxForType ( " " , t ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
break ;
}
2010-02-23 21:45:12 +01:00
case BSONObj : : opREGEX :
case BSONObj : : opOPTIONS :
// do nothing
break ;
2009-12-31 20:30:04 +01:00
case BSONObj : : opELEM_MATCH : {
log ( ) < < " warning: shouldn't get here? " < < endl ;
break ;
}
2010-02-25 22:24:34 +01:00
case BSONObj : : opNEAR :
2010-03-16 04:50:14 +01:00
case BSONObj : : opWITHIN :
2010-02-25 22:24:34 +01:00
_special = " 2d " ;
break ;
2009-08-20 22:18:50 +02:00
default :
break ;
2009-02-24 20:14:45 +01:00
}
2009-08-11 10:58:54 +02:00
if ( optimize ) {
2009-08-18 17:59:40 +02:00
if ( lower . type ( ) ! = MinKey & & upper . type ( ) = = MaxKey & & lower . isSimpleType ( ) ) { // TODO: get rid of isSimpleType
2009-08-11 10:58:54 +02:00
BSONObjBuilder b ;
2009-08-18 17:59:40 +02:00
b . appendMaxForType ( lower . fieldName ( ) , lower . type ( ) ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
2009-08-11 10:58:54 +02:00
}
2009-08-18 17:59:40 +02:00
else if ( lower . type ( ) = = MinKey & & upper . type ( ) ! = MaxKey & & upper . isSimpleType ( ) ) { // TODO: get rid of isSimpleType
2009-08-11 10:58:54 +02:00
BSONObjBuilder b ;
2009-08-18 17:59:40 +02:00
b . appendMinForType ( upper . fieldName ( ) , upper . type ( ) ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
2009-08-11 10:58:54 +02:00
}
2009-08-10 23:08:43 +02:00
}
2009-07-06 16:42:22 +02:00
2009-02-24 20:14:45 +01:00
}
2009-08-18 16:19:27 +02:00
// as called, these functions find the max/min of a bound in the
2009-09-07 19:39:32 +02:00
// opposite direction, so inclusive bounds are considered less
2009-08-18 16:19:27 +02:00
// superlative
FieldBound maxFieldBound ( const FieldBound & a , const FieldBound & b ) {
int cmp = a . bound_ . woCompare ( b . bound_ , false ) ;
if ( ( cmp = = 0 & & ! b . inclusive_ ) | | cmp < 0 )
return b ;
return a ;
}
FieldBound minFieldBound ( const FieldBound & a , const FieldBound & b ) {
int cmp = a . bound_ . woCompare ( b . bound_ , false ) ;
if ( ( cmp = = 0 & & ! b . inclusive_ ) | | cmp > 0 )
return b ;
return a ;
}
bool fieldIntervalOverlap ( const FieldInterval & one , const FieldInterval & two , FieldInterval & result ) {
result . lower_ = maxFieldBound ( one . lower_ , two . lower_ ) ;
result . upper_ = minFieldBound ( one . upper_ , two . upper_ ) ;
return result . valid ( ) ;
}
2009-02-24 20:14:45 +01:00
2009-09-07 19:39:32 +02:00
// NOTE Not yet tested for complex $or bounds, just for simple bounds generated by $in
2009-08-11 22:43:30 +02:00
const FieldRange & FieldRange : : operator & = ( const FieldRange & other ) {
2009-08-18 16:19:27 +02:00
vector < FieldInterval > newIntervals ;
vector < FieldInterval > : : const_iterator i = intervals_ . begin ( ) ;
vector < FieldInterval > : : const_iterator j = other . intervals_ . begin ( ) ;
while ( i ! = intervals_ . end ( ) & & j ! = other . intervals_ . end ( ) ) {
FieldInterval overlap ;
if ( fieldIntervalOverlap ( * i , * j , overlap ) )
newIntervals . push_back ( overlap ) ;
if ( i - > upper_ = = minFieldBound ( i - > upper_ , j - > upper_ ) )
+ + i ;
else
+ + j ;
2009-02-27 17:22:12 +01:00
}
2009-08-18 16:19:27 +02:00
intervals_ = newIntervals ;
2009-02-24 20:14:45 +01:00
for ( vector < BSONObj > : : const_iterator i = other . objData_ . begin ( ) ; i ! = other . objData_ . end ( ) ; + + i )
objData_ . push_back ( * i ) ;
2010-02-25 22:24:34 +01:00
if ( _special . size ( ) = = 0 & & other . _special . size ( ) )
_special = other . _special ;
2009-02-24 20:14:45 +01:00
return * this ;
}
2010-03-09 01:53:45 +01:00
void handleInterval ( const FieldInterval & lower , FieldBound & low , FieldBound & high , vector < FieldInterval > & newIntervals ) {
if ( low . bound_ . eoo ( ) ) {
low = lower . lower_ ; high = lower . upper_ ;
} else {
if ( high . bound_ . woCompare ( lower . lower_ . bound_ , false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way
FieldInterval tmp ;
tmp . lower_ = low ;
tmp . upper_ = high ;
newIntervals . push_back ( tmp ) ;
low = lower . lower_ ; high = lower . upper_ ;
} else {
high = lower . upper_ ;
}
}
}
const FieldRange & FieldRange : : operator | = ( const FieldRange & other ) {
vector < FieldInterval > newIntervals ;
FieldBound low ;
FieldBound high ;
vector < FieldInterval > : : const_iterator i = intervals_ . begin ( ) ;
vector < FieldInterval > : : const_iterator j = other . intervals_ . begin ( ) ;
while ( i ! = intervals_ . end ( ) & & j ! = other . intervals_ . end ( ) ) {
int cmp = i - > lower_ . bound_ . woCompare ( j - > lower_ . bound_ , false ) ;
if ( ( cmp = = 0 & & i - > lower_ . inclusive_ ) | | cmp < 0 ) {
handleInterval ( * i , low , high , newIntervals ) ;
+ + i ;
} else {
handleInterval ( * j , low , high , newIntervals ) ;
+ + j ;
}
}
while ( i ! = intervals_ . end ( ) ) {
handleInterval ( * i , low , high , newIntervals ) ;
+ + i ;
}
while ( j ! = other . intervals_ . end ( ) ) {
handleInterval ( * j , low , high , newIntervals ) ;
+ + j ;
}
FieldInterval tmp ;
tmp . lower_ = low ;
tmp . upper_ = high ;
newIntervals . push_back ( tmp ) ;
intervals_ = newIntervals ;
for ( vector < BSONObj > : : const_iterator i = other . objData_ . begin ( ) ; i ! = other . objData_ . end ( ) ; + + i )
objData_ . push_back ( * i ) ;
if ( _special . size ( ) = = 0 & & other . _special . size ( ) )
_special = other . _special ;
return * this ;
}
2009-08-11 22:43:30 +02:00
BSONObj FieldRange : : addObj ( const BSONObj & o ) {
2009-02-24 20:14:45 +01:00
objData_ . push_back ( o ) ;
return o ;
}
2010-02-25 22:24:34 +01:00
string FieldRangeSet : : getSpecial ( ) const {
string s = " " ;
for ( map < string , FieldRange > : : iterator i = ranges_ . begin ( ) ; i ! = ranges_ . end ( ) ; i + + ) {
if ( i - > second . getSpecial ( ) . size ( ) = = 0 )
continue ;
uassert ( 13033 , " can't have 2 special fields " , s . size ( ) = = 0 ) ;
s = i - > second . getSpecial ( ) ;
}
return s ;
}
2010-02-23 23:20:28 +01:00
void FieldRangeSet : : processOpElement ( const char * fieldName , const BSONElement & f , bool isNot , bool optimize ) {
2010-03-09 20:53:40 +01:00
BSONElement g = f ;
int op2 = g . getGtLtOp ( ) ;
if ( op2 = = BSONObj : : opALL ) {
BSONElement h = g ;
massert ( 13050 , " $all requires array " , h . type ( ) = = Array ) ;
BSONObjIterator i ( h . embeddedObject ( ) ) ;
if ( i . more ( ) ) {
BSONElement x = i . next ( ) ;
if ( x . type ( ) = = Object & & x . embeddedObject ( ) . firstElement ( ) . getGtLtOp ( ) = = BSONObj : : opELEM_MATCH ) {
g = x . embeddedObject ( ) . firstElement ( ) ;
op2 = g . getGtLtOp ( ) ;
}
}
}
2010-02-23 23:20:28 +01:00
if ( op2 = = BSONObj : : opELEM_MATCH ) {
2010-03-09 20:53:40 +01:00
BSONObjIterator k ( g . embeddedObjectUserCheck ( ) ) ;
2010-02-23 23:20:28 +01:00
while ( k . more ( ) ) {
2010-03-09 20:53:40 +01:00
BSONElement h = k . next ( ) ;
2010-02-23 23:20:28 +01:00
StringBuilder buf ( 32 ) ;
2010-03-09 20:53:40 +01:00
buf < < fieldName < < " . " < < h . fieldName ( ) ;
2010-02-23 23:20:28 +01:00
string fullname = buf . str ( ) ;
2010-03-09 20:53:40 +01:00
int op3 = getGtLtOp ( h ) ;
2010-02-23 23:20:28 +01:00
if ( op3 = = BSONObj : : Equality ) {
2010-03-09 20:53:40 +01:00
ranges_ [ fullname ] & = FieldRange ( h , isNot , optimize ) ;
2010-02-23 23:20:28 +01:00
}
else {
2010-03-09 20:53:40 +01:00
BSONObjIterator l ( h . embeddedObject ( ) ) ;
2010-02-23 23:20:28 +01:00
while ( l . more ( ) ) {
ranges_ [ fullname ] & = FieldRange ( l . next ( ) , isNot , optimize ) ;
}
}
}
} else {
ranges_ [ fieldName ] & = FieldRange ( f , isNot , optimize ) ;
}
}
2009-12-31 21:18:43 +01:00
FieldRangeSet : : FieldRangeSet ( const char * ns , const BSONObj & query , bool optimize )
: ns_ ( ns ) , query_ ( query . getOwned ( ) ) {
2009-02-24 20:14:45 +01:00
BSONObjIterator i ( query_ ) ;
2009-12-31 21:18:43 +01:00
while ( i . more ( ) ) {
2009-02-24 20:14:45 +01:00
BSONElement e = i . next ( ) ;
2009-12-31 21:18:43 +01:00
// e could be x:1 or x:{$gt:1}
2009-02-25 17:33:03 +01:00
if ( strcmp ( e . fieldName ( ) , " $where " ) = = 0 )
continue ;
2009-12-31 20:30:04 +01:00
2010-02-23 23:20:28 +01:00
bool equality = ( getGtLtOp ( e ) = = BSONObj : : Equality ) ;
if ( equality & & e . type ( ) = = Object ) {
equality = ( strcmp ( e . embeddedObject ( ) . firstElement ( ) . fieldName ( ) , " $not " ) ! = 0 ) ;
}
2009-12-31 20:30:04 +01:00
2010-02-23 23:20:28 +01:00
if ( equality | | ( e . type ( ) = = Object & & ! e . embeddedObject ( ) [ " $regex " ] . eoo ( ) ) ) {
ranges_ [ e . fieldName ( ) ] & = FieldRange ( e , false , optimize ) ;
2009-02-24 20:14:45 +01:00
}
2010-02-23 23:20:28 +01:00
if ( ! equality ) {
2010-02-23 21:45:12 +01:00
BSONObjIterator j ( e . embeddedObject ( ) ) ;
while ( j . more ( ) ) {
BSONElement f = j . next ( ) ;
2010-02-23 23:20:28 +01:00
if ( strcmp ( f . fieldName ( ) , " $not " ) = = 0 ) {
switch ( f . type ( ) ) {
case Object : {
BSONObjIterator k ( f . embeddedObject ( ) ) ;
while ( k . more ( ) ) {
BSONElement g = k . next ( ) ;
uassert ( 13034 , " invalid use of $not " , g . getGtLtOp ( ) ! = BSONObj : : Equality ) ;
processOpElement ( e . fieldName ( ) , g , true , optimize ) ;
2010-02-23 21:45:12 +01:00
}
2010-02-23 23:20:28 +01:00
break ;
2010-02-23 21:45:12 +01:00
}
2010-02-23 23:20:28 +01:00
case RegEx :
processOpElement ( e . fieldName ( ) , f , true , optimize ) ;
break ;
default :
2010-02-25 22:24:34 +01:00
uassert ( 13041 , " invalid use of $not " , false ) ;
2010-02-23 23:20:28 +01:00
}
2010-02-23 21:45:12 +01:00
} else {
2010-02-23 23:20:28 +01:00
processOpElement ( e . fieldName ( ) , f , false , optimize ) ;
2009-12-31 21:18:43 +01:00
}
2009-02-24 20:14:45 +01:00
}
}
}
}
2009-12-31 21:18:43 +01:00
2009-08-11 22:43:30 +02:00
FieldRange * FieldRangeSet : : trivialRange_ = 0 ;
FieldRange & FieldRangeSet : : trivialRange ( ) {
if ( trivialRange_ = = 0 )
trivialRange_ = new FieldRange ( ) ;
return * trivialRange_ ;
2009-02-24 20:14:45 +01:00
}
2009-08-11 22:43:30 +02:00
BSONObj FieldRangeSet : : simplifiedQuery ( const BSONObj & _fields ) const {
2009-05-13 17:10:29 +02:00
BSONObj fields = _fields ;
if ( fields . isEmpty ( ) ) {
BSONObjBuilder b ;
2009-08-11 22:43:30 +02:00
for ( map < string , FieldRange > : : const_iterator i = ranges_ . begin ( ) ; i ! = ranges_ . end ( ) ; + + i ) {
2009-05-13 17:10:29 +02:00
b . append ( i - > first . c_str ( ) , 1 ) ;
}
fields = b . obj ( ) ;
}
2009-02-24 20:14:45 +01:00
BSONObjBuilder b ;
2009-05-13 17:10:29 +02:00
BSONObjIterator i ( fields ) ;
2009-10-29 21:06:35 +01:00
while ( i . more ( ) ) {
2009-05-13 17:10:29 +02:00
BSONElement e = i . next ( ) ;
const char * name = e . fieldName ( ) ;
2009-08-11 22:43:30 +02:00
const FieldRange & range = ranges_ [ name ] ;
2009-08-18 17:59:40 +02:00
assert ( ! range . empty ( ) ) ;
2009-08-11 22:43:30 +02:00
if ( range . equality ( ) )
b . appendAs ( range . min ( ) , name ) ;
else if ( range . nontrivial ( ) ) {
2009-02-24 20:14:45 +01:00
BSONObjBuilder c ;
2009-08-11 22:43:30 +02:00
if ( range . min ( ) . type ( ) ! = MinKey )
c . appendAs ( range . min ( ) , range . minInclusive ( ) ? " $gte " : " $gt " ) ;
if ( range . max ( ) . type ( ) ! = MaxKey )
c . appendAs ( range . max ( ) , range . maxInclusive ( ) ? " $lte " : " $lt " ) ;
2009-05-13 17:10:29 +02:00
b . append ( name , c . done ( ) ) ;
2009-02-24 20:14:45 +01:00
}
}
return b . obj ( ) ;
}
2009-02-24 21:20:29 +01:00
2009-08-11 22:43:30 +02:00
QueryPattern FieldRangeSet : : pattern ( const BSONObj & sort ) const {
2009-02-24 21:20:29 +01:00
QueryPattern qp ;
2009-08-11 22:43:30 +02:00
for ( map < string , FieldRange > : : const_iterator i = ranges_ . begin ( ) ; i ! = ranges_ . end ( ) ; + + i ) {
2009-08-18 17:59:40 +02:00
assert ( ! i - > second . empty ( ) ) ;
2009-02-24 21:20:29 +01:00
if ( i - > second . equality ( ) ) {
qp . fieldTypes_ [ i - > first ] = QueryPattern : : Equality ;
} else if ( i - > second . nontrivial ( ) ) {
2009-08-11 22:43:30 +02:00
bool upper = i - > second . max ( ) . type ( ) ! = MaxKey ;
bool lower = i - > second . min ( ) . type ( ) ! = MinKey ;
2009-02-24 21:20:29 +01:00
if ( upper & & lower )
qp . fieldTypes_ [ i - > first ] = QueryPattern : : UpperAndLowerBound ;
else if ( upper )
qp . fieldTypes_ [ i - > first ] = QueryPattern : : UpperBound ;
else if ( lower )
qp . fieldTypes_ [ i - > first ] = QueryPattern : : LowerBound ;
}
}
2009-02-26 17:13:36 +01:00
qp . setSort ( sort ) ;
2009-02-24 21:20:29 +01:00
return qp ;
}
2009-07-02 20:05:36 +02:00
2009-09-07 20:44:41 +02:00
BoundList FieldRangeSet : : indexBounds ( const BSONObj & keyPattern , int direction ) const {
2009-09-07 22:49:22 +02:00
BSONObjBuilder equalityBuilder ;
typedef vector < pair < shared_ptr < BSONObjBuilder > , shared_ptr < BSONObjBuilder > > > BoundBuilders ;
BoundBuilders builders ;
2009-09-07 20:44:41 +02:00
BSONObjIterator i ( keyPattern ) ;
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
const FieldRange & fr = range ( e . fieldName ( ) ) ;
int number = ( int ) e . number ( ) ; // returns 0.0 if not numeric
bool forward = ( ( number > = 0 ? 1 : - 1 ) * ( direction > = 0 ? 1 : - 1 ) > 0 ) ;
2009-09-07 22:49:22 +02:00
if ( builders . empty ( ) ) {
if ( fr . equality ( ) ) {
equalityBuilder . appendAs ( fr . min ( ) , " " ) ;
} else {
BSONObj equalityObj = equalityBuilder . done ( ) ;
const vector < FieldInterval > & intervals = fr . intervals ( ) ;
if ( forward ) {
for ( vector < FieldInterval > : : const_iterator j = intervals . begin ( ) ; j ! = intervals . end ( ) ; + + j ) {
builders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
builders . back ( ) . first - > appendElements ( equalityObj ) ;
builders . back ( ) . second - > appendElements ( equalityObj ) ;
builders . back ( ) . first - > appendAs ( j - > lower_ . bound_ , " " ) ;
builders . back ( ) . second - > appendAs ( j - > upper_ . bound_ , " " ) ;
}
} else {
for ( vector < FieldInterval > : : const_reverse_iterator j = intervals . rbegin ( ) ; j ! = intervals . rend ( ) ; + + j ) {
builders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
builders . back ( ) . first - > appendElements ( equalityObj ) ;
builders . back ( ) . second - > appendElements ( equalityObj ) ;
builders . back ( ) . first - > appendAs ( j - > upper_ . bound_ , " " ) ;
builders . back ( ) . second - > appendAs ( j - > lower_ . bound_ , " " ) ;
}
}
}
} else {
for ( BoundBuilders : : const_iterator j = builders . begin ( ) ; j ! = builders . end ( ) ; + + j ) {
j - > first - > appendAs ( forward ? fr . min ( ) : fr . max ( ) , " " ) ;
j - > second - > appendAs ( forward ? fr . max ( ) : fr . min ( ) , " " ) ;
}
}
}
if ( builders . empty ( ) ) {
BSONObj equalityObj = equalityBuilder . done ( ) ;
assert ( ! equalityObj . isEmpty ( ) ) ;
builders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
builders . back ( ) . first - > appendElements ( equalityObj ) ;
builders . back ( ) . second - > appendElements ( equalityObj ) ;
2009-09-07 20:44:41 +02:00
}
BoundList ret ;
2009-09-07 22:49:22 +02:00
for ( BoundBuilders : : const_iterator i = builders . begin ( ) ; i ! = builders . end ( ) ; + + i )
ret . push_back ( make_pair ( i - > first - > obj ( ) , i - > second - > obj ( ) ) ) ;
2009-09-07 20:44:41 +02:00
return ret ;
}
2009-10-26 15:24:30 +01:00
///////////////////
2009-10-28 16:52:23 +01:00
// FieldMatcher //
2009-10-26 15:24:30 +01:00
///////////////////
2009-09-07 20:44:41 +02:00
2009-07-01 20:55:52 +02:00
void FieldMatcher : : add ( const BSONObj & o ) {
2010-02-16 05:17:42 +01:00
massert ( 10371 , " can only add to FieldMatcher once " , _source . isEmpty ( ) ) ;
_source = o ;
2009-10-28 16:52:23 +01:00
2009-07-01 21:00:09 +02:00
BSONObjIterator i ( o ) ;
2009-10-28 16:52:23 +01:00
int true_false = - 1 ;
2009-07-01 21:00:09 +02:00
while ( i . more ( ) ) {
2009-10-28 16:52:23 +01:00
BSONElement e = i . next ( ) ;
2010-04-16 22:52:54 +02:00
if ( e . type ( ) = = Object ) {
BSONObj obj = e . embeddedObject ( ) ;
BSONElement e2 = obj . firstElement ( ) ;
if ( strcmp ( e2 . fieldName ( ) , " $slice " ) = = 0 ) {
if ( e2 . isNumber ( ) ) {
int i = e2 . numberInt ( ) ;
if ( i < 0 )
add ( e . fieldName ( ) , i , - i ) ; // limit is now positive
else
add ( e . fieldName ( ) , 0 , i ) ;
} else if ( e2 . type ( ) = = Array ) {
BSONObj arr = e2 . embeddedObject ( ) ;
uassert ( 13099 , " $slice array wrong size " , arr . nFields ( ) = = 2 ) ;
BSONObjIterator it ( arr ) ;
int skip = it . next ( ) . numberInt ( ) ;
int limit = it . next ( ) . numberInt ( ) ;
uassert ( 13100 , " $slice limit must be positive " , limit > 0 ) ;
add ( e . fieldName ( ) , skip , limit ) ;
} else {
uassert ( 13098 , " $slice only supports numbers and [skip, limit] arrays " , false ) ;
}
} else {
uassert ( 13097 , string ( " Unsupported projection option: " ) + obj . firstElement ( ) . fieldName ( ) , false ) ;
}
2010-05-01 03:20:24 +02:00
} else if ( ! strcmp ( e . fieldName ( ) , " _id " ) & & ! e . trueValue ( ) ) {
_includeID = false ;
2010-04-16 22:52:54 +02:00
2010-05-01 03:20:24 +02:00
} else {
2009-10-28 16:52:23 +01:00
2010-05-01 03:20:24 +02:00
add ( e . fieldName ( ) , e . trueValue ( ) ) ;
// validate input
if ( true_false = = - 1 ) {
true_false = e . trueValue ( ) ;
_include = ! e . trueValue ( ) ;
}
else {
uassert ( 10053 , " You cannot currently mix including and excluding fields. Contact us if this is an issue. " ,
( bool ) true_false = = e . trueValue ( ) ) ;
}
2009-07-01 21:00:09 +02:00
}
}
2009-07-01 20:55:52 +02:00
}
2009-10-28 16:52:23 +01:00
void FieldMatcher : : add ( const string & field , bool include ) {
if ( field . empty ( ) ) { // this is the field the user referred to
2010-02-16 05:17:42 +01:00
_include = include ;
2009-10-28 16:52:23 +01:00
} else {
2010-04-16 22:52:54 +02:00
_include = ! include ;
2009-10-28 16:52:23 +01:00
const size_t dot = field . find ( ' . ' ) ;
const string subfield = field . substr ( 0 , dot ) ;
const string rest = ( dot = = string : : npos ? " " : field . substr ( dot + 1 , string : : npos ) ) ;
2010-02-16 05:17:42 +01:00
boost : : shared_ptr < FieldMatcher > & fm = _fields [ subfield ] ;
2009-10-28 16:52:23 +01:00
if ( ! fm )
2010-04-16 22:52:54 +02:00
fm . reset ( new FieldMatcher ( ) ) ;
2009-10-28 16:52:23 +01:00
fm - > add ( rest , include ) ;
}
2009-07-01 20:55:52 +02:00
}
2009-10-28 16:52:23 +01:00
2010-04-16 22:52:54 +02:00
void FieldMatcher : : add ( const string & field , int skip , int limit ) {
_special = true ; // can't include or exclude whole object
if ( field . empty ( ) ) { // this is the field the user referred to
_skip = skip ;
_limit = limit ;
} else {
const size_t dot = field . find ( ' . ' ) ;
const string subfield = field . substr ( 0 , dot ) ;
const string rest = ( dot = = string : : npos ? " " : field . substr ( dot + 1 , string : : npos ) ) ;
boost : : shared_ptr < FieldMatcher > & fm = _fields [ subfield ] ;
if ( ! fm )
fm . reset ( new FieldMatcher ( ) ) ;
fm - > add ( rest , skip , limit ) ;
}
}
2009-07-01 21:33:03 +02:00
BSONObj FieldMatcher : : getSpec ( ) const {
2010-02-16 05:17:42 +01:00
return _source ;
2009-07-01 21:33:03 +02:00
}
2009-07-01 20:55:52 +02:00
2009-10-28 16:52:23 +01:00
//b will be the value part of an array-typed BSONElement
2010-05-05 21:51:46 +02:00
void FieldMatcher : : appendArray ( BSONObjBuilder & b , const BSONObj & a , bool nested ) const {
int skip = nested ? 0 : _skip ;
int limit = nested ? - 1 : _limit ;
2010-04-16 22:52:54 +02:00
if ( skip < 0 ) {
skip = max ( 0 , skip + a . nFields ( ) ) ;
}
2009-10-28 16:52:23 +01:00
int i = 0 ;
BSONObjIterator it ( a ) ;
while ( it . more ( ) ) {
BSONElement e = it . next ( ) ;
2009-07-02 20:05:36 +02:00
2010-04-16 22:52:54 +02:00
if ( skip ) {
skip - - ;
continue ;
}
if ( limit ! = - 1 & & ( limit - - = = 0 ) ) {
break ;
}
2009-10-28 16:52:23 +01:00
switch ( e . type ( ) ) {
case Array : {
BSONObjBuilder subb ;
2010-05-05 21:51:46 +02:00
appendArray ( subb , e . embeddedObject ( ) , true ) ;
2009-10-28 16:52:23 +01:00
b . appendArray ( b . numStr ( i + + ) . c_str ( ) , subb . obj ( ) ) ;
break ;
}
case Object : {
BSONObjBuilder subb ;
BSONObjIterator jt ( e . embeddedObject ( ) ) ;
while ( jt . more ( ) ) {
append ( subb , jt . next ( ) ) ;
}
b . append ( b . numStr ( i + + ) , subb . obj ( ) ) ;
break ;
}
default :
2010-02-16 05:17:42 +01:00
if ( _include )
2009-10-28 16:52:23 +01:00
b . appendAs ( e , b . numStr ( i + + ) . c_str ( ) ) ;
}
}
2009-07-02 20:05:36 +02:00
}
2009-10-28 16:52:23 +01:00
2009-07-02 20:05:36 +02:00
void FieldMatcher : : append ( BSONObjBuilder & b , const BSONElement & e ) const {
2010-02-16 05:17:42 +01:00
FieldMap : : const_iterator field = _fields . find ( e . fieldName ( ) ) ;
2009-10-28 16:52:23 +01:00
2010-02-16 05:17:42 +01:00
if ( field = = _fields . end ( ) ) {
if ( _include )
2009-10-28 16:52:23 +01:00
b . append ( e ) ;
2010-02-16 05:17:42 +01:00
}
else {
2009-10-28 16:52:23 +01:00
FieldMatcher & subfm = * field - > second ;
2010-02-16 05:17:42 +01:00
2010-04-16 22:52:54 +02:00
if ( ( subfm . _fields . empty ( ) & & ! subfm . _special ) | | ! ( e . type ( ) = = Object | | e . type ( ) = = Array ) ) {
2010-02-16 05:17:42 +01:00
if ( subfm . _include )
2009-10-28 16:52:23 +01:00
b . append ( e ) ;
2010-02-16 05:17:42 +01:00
}
else if ( e . type ( ) = = Object ) {
2009-10-28 16:52:23 +01:00
BSONObjBuilder subb ;
BSONObjIterator it ( e . embeddedObject ( ) ) ;
while ( it . more ( ) ) {
subfm . append ( subb , it . next ( ) ) ;
}
b . append ( e . fieldName ( ) , subb . obj ( ) ) ;
2009-08-20 16:06:47 +02:00
2010-02-16 05:17:42 +01:00
}
else { //Array
2009-10-28 16:52:23 +01:00
BSONObjBuilder subb ;
subfm . appendArray ( subb , e . embeddedObject ( ) ) ;
b . appendArray ( e . fieldName ( ) , subb . obj ( ) ) ;
2009-08-14 20:19:23 +02:00
}
2009-07-02 20:05:36 +02:00
}
}
2009-02-24 23:48:06 +01:00
2010-01-15 02:57:50 +01:00
struct SimpleRegexUnitTest : UnitTest {
void run ( ) {
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^foo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " foo " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f?oo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^fz?oo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f " , " " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
2010-03-30 02:17:10 +02:00
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " \\ Af " , " " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
2010-01-15 02:57:50 +01:00
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f " , " m " ) ;
BSONObj o = b . done ( ) ;
2010-03-30 02:17:10 +02:00
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " \\ Af " , " m " ) ;
BSONObj o = b . done ( ) ;
2010-01-15 02:57:50 +01:00
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
{
BSONObjBuilder b ;
2010-03-30 02:17:10 +02:00
b . appendRegex ( " r " , " \\ Af " , " mi " ) ;
2010-01-15 02:57:50 +01:00
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
2010-01-15 04:47:34 +01:00
{
BSONObjBuilder b ;
2010-03-30 02:17:10 +02:00
b . appendRegex ( " r " , " \\ Af \t \v o \n \r o \\ \\ # #comment " , " mx " ) ;
2010-01-15 04:47:34 +01:00
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " foo # " ) ;
}
2010-01-15 02:57:50 +01:00
}
} simple_regex_unittest ;
2009-02-24 20:14:45 +01:00
} // namespace mongo