2009-02-24 20:14:45 +01:00
// queryutil.cpp
2009-10-27 20:58:27 +01:00
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
2009-02-24 20:14:45 +01:00
2010-04-27 21:27:52 +02:00
# include "pch.h"
2009-02-24 20:14:45 +01:00
# include "btree.h"
2009-08-18 17:59:40 +02:00
# include "matcher.h"
2009-02-24 20:14:45 +01:00
# include "pdfile.h"
# include "queryoptimizer.h"
2010-01-15 02:57:50 +01:00
# include "../util/unittest.h"
2010-07-23 22:06:06 +02:00
# include "dbmessage.h"
2009-02-24 20:14:45 +01:00
namespace mongo {
2010-07-16 12:47:26 +02:00
extern BSONObj staticNull ;
2010-02-26 03:42:46 +01:00
/** returns a string that when used as a matcher, would match a super set of regex()
returns " " for complex regular expressions
used to optimize queries in some simple regex cases that start with ' ^ '
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
if purePrefix ! = NULL , sets it to whether the regex can be converted to a range query
*/
string simpleRegex ( const char * regex , const char * flags , bool * purePrefix ) {
string r = " " ;
if ( purePrefix ) * purePrefix = false ;
2010-03-30 02:17:10 +02:00
bool multilineOK ;
if ( regex [ 0 ] = = ' \\ ' & & regex [ 1 ] = = ' A ' ) {
multilineOK = true ;
regex + = 2 ;
} else if ( regex [ 0 ] = = ' ^ ' ) {
multilineOK = false ;
regex + = 1 ;
} else {
return r ;
}
2010-02-26 03:42:46 +01:00
bool extended = false ;
while ( * flags ) {
switch ( * ( flags + + ) ) {
case ' m ' : // multiline
2010-03-30 02:17:10 +02:00
if ( multilineOK )
continue ;
else
return r ;
2010-02-26 03:42:46 +01:00
case ' x ' : // extended
extended = true ;
break ;
default :
return r ; // cant use index
2010-01-15 04:47:34 +01:00
}
2010-02-26 03:42:46 +01:00
}
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
stringstream ss ;
2010-01-15 04:47:34 +01:00
2010-02-26 03:42:46 +01:00
while ( * regex ) {
char c = * ( regex + + ) ;
if ( c = = ' * ' | | c = = ' ? ' ) {
// These are the only two symbols that make the last char optional
r = ss . str ( ) ;
r = r . substr ( 0 , r . size ( ) - 1 ) ;
return r ; //breaking here fails with /^a?/
} else if ( c = = ' \\ ' ) {
// slash followed by non-alphanumeric represents the following char
c = * ( regex + + ) ;
if ( ( c > = ' A ' & & c < = ' Z ' ) | |
( c > = ' a ' & & c < = ' z ' ) | |
( c > = ' 0 ' & & c < = ' 0 ' ) | |
( c = = ' \0 ' ) )
{
2010-01-15 04:47:34 +01:00
r = ss . str ( ) ;
break ;
2010-01-15 02:57:50 +01:00
} else {
2010-01-15 04:47:34 +01:00
ss < < c ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
} else if ( strchr ( " ^$.[|() + { " , c)){
// list of "metacharacters" from man pcrepattern
r = ss . str ( ) ;
break ;
} else if ( extended & & c = = ' # ' ) {
// comment
2010-01-15 02:57:50 +01:00
r = ss . str ( ) ;
2010-02-26 03:42:46 +01:00
break ;
} else if ( extended & & isspace ( c ) ) {
continue ;
} else {
// self-matching char
ss < < c ;
}
}
2010-01-15 02:57:50 +01:00
2010-02-26 03:42:46 +01:00
if ( r . empty ( ) & & * regex = = 0 ) {
r = ss . str ( ) ;
if ( purePrefix ) * purePrefix = ! r . empty ( ) ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
return r ;
}
inline string simpleRegex ( const BSONElement & e ) {
switch ( e . type ( ) ) {
case RegEx :
return simpleRegex ( e . regex ( ) , e . regexFlags ( ) ) ;
case Object : {
BSONObj o = e . embeddedObject ( ) ;
return simpleRegex ( o [ " $regex " ] . valuestrsafe ( ) , o [ " $options " ] . valuestrsafe ( ) ) ;
2010-01-15 02:57:50 +01:00
}
2010-02-26 03:42:46 +01:00
default : assert ( false ) ; return " " ; //return squashes compiler warning
2010-01-15 02:57:50 +01:00
}
}
2010-02-26 03:42:46 +01:00
string simpleRegexEnd ( string regex ) {
+ + regex [ regex . length ( ) - 1 ] ;
return regex ;
}
2009-08-20 22:18:50 +02:00
2010-02-23 23:20:28 +01:00
FieldRange : : FieldRange ( const BSONElement & e , bool isNot , bool optimize ) {
2010-02-23 23:41:05 +01:00
// NOTE with $not, we could potentially form a complementary set of intervals.
if ( ! isNot & & ! e . eoo ( ) & & e . type ( ) ! = RegEx & & e . getGtLtOp ( ) = = BSONObj : : opIN ) {
2009-08-18 17:59:40 +02:00
set < BSONElement , element_lt > vals ;
2010-03-09 01:53:45 +01:00
vector < FieldRange > regexes ;
2010-01-28 23:30:58 +01:00
uassert ( 12580 , " invalid query " , e . isABSONObj ( ) ) ;
2009-08-18 17:59:40 +02:00
BSONObjIterator i ( e . embeddedObject ( ) ) ;
2010-03-09 01:53:45 +01:00
while ( i . more ( ) ) {
BSONElement ie = i . next ( ) ;
if ( ie . type ( ) = = RegEx ) {
regexes . push_back ( FieldRange ( ie , false , optimize ) ) ;
} else {
vals . insert ( ie ) ;
}
}
2009-10-09 16:12:19 +02:00
for ( set < BSONElement , element_lt > : : const_iterator i = vals . begin ( ) ; i ! = vals . end ( ) ; + + i )
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( * i ) ) ;
2009-10-09 16:12:19 +02:00
2010-03-09 01:53:45 +01:00
for ( vector < FieldRange > : : const_iterator i = regexes . begin ( ) ; i ! = regexes . end ( ) ; + + i )
* this | = * i ;
2009-10-09 16:12:19 +02:00
return ;
}
if ( e . type ( ) = = Array & & e . getGtLtOp ( ) = = BSONObj : : Equality ) {
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( e ) ) ;
2009-10-09 16:12:19 +02:00
const BSONElement & temp = e . embeddedObject ( ) . firstElement ( ) ;
if ( ! temp . eoo ( ) ) {
if ( temp < e )
2010-05-10 19:06:20 +02:00
_intervals . insert ( _intervals . begin ( ) , temp ) ;
2009-10-09 16:12:19 +02:00
else
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( temp ) ) ;
2009-08-18 17:59:40 +02:00
}
2009-10-09 16:12:19 +02:00
2009-08-18 17:59:40 +02:00
return ;
}
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( ) ) ;
FieldInterval & initial = _intervals [ 0 ] ;
BSONElement & lower = initial . _lower . _bound ;
bool & lowerInclusive = initial . _lower . _inclusive ;
BSONElement & upper = initial . _upper . _bound ;
bool & upperInclusive = initial . _upper . _inclusive ;
2009-08-18 17:59:40 +02:00
lower = minKey . firstElement ( ) ;
lowerInclusive = true ;
upper = maxKey . firstElement ( ) ;
upperInclusive = true ;
2009-02-24 20:14:45 +01:00
if ( e . eoo ( ) )
return ;
2010-01-15 02:24:01 +01:00
if ( e . type ( ) = = RegEx
| | ( e . type ( ) = = Object & & ! e . embeddedObject ( ) [ " $regex " ] . eoo ( ) )
)
{
2010-02-23 23:20:28 +01:00
if ( ! isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
const string r = simpleRegex ( e ) ;
if ( r . size ( ) ) {
lower = addObj ( BSON ( " " < < r ) ) . firstElement ( ) ;
upper = addObj ( BSON ( " " < < simpleRegexEnd ( r ) ) ) . firstElement ( ) ;
upperInclusive = false ;
2010-02-25 22:16:31 +01:00
} else {
BSONObjBuilder b1 ( 32 ) , b2 ( 32 ) ;
b1 . appendMinForType ( " " , String ) ;
lower = addObj ( b1 . obj ( ) ) . firstElement ( ) ;
b2 . appendMaxForType ( " " , String ) ;
upper = addObj ( b2 . obj ( ) ) . firstElement ( ) ;
upperInclusive = false ; //MaxForType String is an empty Object
2010-02-23 23:20:28 +01:00
}
2010-03-01 23:59:20 +01:00
2010-03-09 01:53:45 +01:00
// regex matches self - regex type > string type
2010-03-01 23:59:20 +01:00
if ( e . type ( ) = = RegEx ) {
BSONElement re = addObj ( BSON ( " " < < e ) ) . firstElement ( ) ;
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( re ) ) ;
2010-03-02 02:24:08 +01:00
} else {
BSONObj orig = e . embeddedObject ( ) ;
BSONObjBuilder b ;
b . appendRegex ( " " , orig [ " $regex " ] . valuestrsafe ( ) , orig [ " $options " ] . valuestrsafe ( ) ) ;
BSONElement re = addObj ( b . obj ( ) ) . firstElement ( ) ;
2010-05-10 19:06:20 +02:00
_intervals . push_back ( FieldInterval ( re ) ) ;
2010-03-01 23:59:20 +01:00
}
2010-02-23 23:20:28 +01:00
}
2009-02-24 20:14:45 +01:00
return ;
}
2010-02-23 23:20:28 +01:00
int op = e . getGtLtOp ( ) ;
if ( isNot ) {
switch ( op ) {
case BSONObj : : Equality :
case BSONObj : : opALL :
case BSONObj : : opMOD : // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
case BSONObj : : opTYPE :
op = BSONObj : : NE ; // no bound calculation
break ;
case BSONObj : : NE :
op = BSONObj : : Equality ;
break ;
case BSONObj : : LT :
op = BSONObj : : GTE ;
break ;
case BSONObj : : LTE :
op = BSONObj : : GT ;
break ;
case BSONObj : : GT :
op = BSONObj : : LTE ;
break ;
case BSONObj : : GTE :
op = BSONObj : : LT ;
break ;
default : // otherwise doesn't matter
break ;
}
}
switch ( op ) {
2009-08-20 22:18:50 +02:00
case BSONObj : : Equality :
2009-08-18 17:59:40 +02:00
lower = upper = e ;
2009-08-20 22:18:50 +02:00
break ;
case BSONObj : : LT :
2009-08-18 17:59:40 +02:00
upperInclusive = false ;
2009-08-20 22:18:50 +02:00
case BSONObj : : LTE :
2009-08-18 17:59:40 +02:00
upper = e ;
2009-08-20 22:18:50 +02:00
break ;
case BSONObj : : GT :
2009-08-18 17:59:40 +02:00
lowerInclusive = false ;
2009-08-20 22:18:50 +02:00
case BSONObj : : GTE :
2009-08-18 17:59:40 +02:00
lower = e ;
2009-08-20 22:18:50 +02:00
break ;
2009-08-18 17:59:40 +02:00
case BSONObj : : opALL : {
2009-12-28 22:43:43 +01:00
massert ( 10370 , " $all requires array " , e . type ( ) = = Array ) ;
2009-08-20 22:18:50 +02:00
BSONObjIterator i ( e . embeddedObject ( ) ) ;
2010-03-09 02:33:47 +01:00
bool bound = false ;
while ( i . more ( ) ) {
2010-02-19 22:11:55 +01:00
BSONElement x = i . next ( ) ;
if ( x . type ( ) = = Object & & x . embeddedObject ( ) . firstElement ( ) . getGtLtOp ( ) = = BSONObj : : opELEM_MATCH ) {
2010-03-09 20:53:40 +01:00
// taken care of elsewhere
2010-02-19 22:11:55 +01:00
}
2010-03-09 02:33:47 +01:00
else if ( x . type ( ) ! = RegEx ) {
2010-02-19 22:11:55 +01:00
lower = upper = x ;
2010-03-09 02:33:47 +01:00
bound = true ;
break ;
}
}
if ( ! bound ) { // if no good non regex bound found, try regex bounds
BSONObjIterator i ( e . embeddedObject ( ) ) ;
while ( i . more ( ) ) {
BSONElement x = i . next ( ) ;
if ( x . type ( ) ! = RegEx )
continue ;
string simple = simpleRegex ( x . regex ( ) , x . regexFlags ( ) ) ;
if ( ! simple . empty ( ) ) {
lower = addObj ( BSON ( " " < < simple ) ) . firstElement ( ) ;
upper = addObj ( BSON ( " " < < simpleRegexEnd ( simple ) ) ) . firstElement ( ) ;
break ;
}
2010-02-19 22:11:55 +01:00
}
}
2009-08-20 22:18:50 +02:00
break ;
2009-08-18 17:59:40 +02:00
}
2009-08-20 22:50:58 +02:00
case BSONObj : : opMOD : {
2009-10-12 18:00:25 +02:00
{
BSONObjBuilder b ;
b . appendMinForType ( " " , NumberDouble ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
{
BSONObjBuilder b ;
b . appendMaxForType ( " " , NumberDouble ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
2009-08-20 22:50:58 +02:00
break ;
2009-08-20 22:18:50 +02:00
}
2009-10-12 17:58:14 +02:00
case BSONObj : : opTYPE : {
BSONType t = ( BSONType ) e . numberInt ( ) ;
{
BSONObjBuilder b ;
b . appendMinForType ( " " , t ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
{
BSONObjBuilder b ;
b . appendMaxForType ( " " , t ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
}
break ;
}
2010-02-23 21:45:12 +01:00
case BSONObj : : opREGEX :
case BSONObj : : opOPTIONS :
// do nothing
break ;
2009-12-31 20:30:04 +01:00
case BSONObj : : opELEM_MATCH : {
log ( ) < < " warning: shouldn't get here? " < < endl ;
break ;
}
2010-02-25 22:24:34 +01:00
case BSONObj : : opNEAR :
2010-03-16 04:50:14 +01:00
case BSONObj : : opWITHIN :
2010-02-25 22:24:34 +01:00
_special = " 2d " ;
break ;
2009-08-20 22:18:50 +02:00
default :
break ;
2009-02-24 20:14:45 +01:00
}
2009-08-11 10:58:54 +02:00
if ( optimize ) {
2009-08-18 17:59:40 +02:00
if ( lower . type ( ) ! = MinKey & & upper . type ( ) = = MaxKey & & lower . isSimpleType ( ) ) { // TODO: get rid of isSimpleType
2009-08-11 10:58:54 +02:00
BSONObjBuilder b ;
2009-08-18 17:59:40 +02:00
b . appendMaxForType ( lower . fieldName ( ) , lower . type ( ) ) ;
upper = addObj ( b . obj ( ) ) . firstElement ( ) ;
2009-08-11 10:58:54 +02:00
}
2009-08-18 17:59:40 +02:00
else if ( lower . type ( ) = = MinKey & & upper . type ( ) ! = MaxKey & & upper . isSimpleType ( ) ) { // TODO: get rid of isSimpleType
2009-08-11 10:58:54 +02:00
BSONObjBuilder b ;
2009-08-18 17:59:40 +02:00
b . appendMinForType ( upper . fieldName ( ) , upper . type ( ) ) ;
lower = addObj ( b . obj ( ) ) . firstElement ( ) ;
2009-08-11 10:58:54 +02:00
}
2009-08-10 23:08:43 +02:00
}
2009-07-06 16:42:22 +02:00
2009-02-24 20:14:45 +01:00
}
2009-08-18 16:19:27 +02:00
2010-05-12 19:38:58 +02:00
void FieldRange : : finishOperation ( const vector < FieldInterval > & newIntervals , const FieldRange & other ) {
_intervals = newIntervals ;
for ( vector < BSONObj > : : const_iterator i = other . _objData . begin ( ) ; i ! = other . _objData . end ( ) ; + + i )
_objData . push_back ( * i ) ;
if ( _special . size ( ) = = 0 & & other . _special . size ( ) )
_special = other . _special ;
}
2009-08-18 16:19:27 +02:00
// as called, these functions find the max/min of a bound in the
2009-09-07 19:39:32 +02:00
// opposite direction, so inclusive bounds are considered less
2009-08-18 16:19:27 +02:00
// superlative
FieldBound maxFieldBound ( const FieldBound & a , const FieldBound & b ) {
2010-05-10 19:06:20 +02:00
int cmp = a . _bound . woCompare ( b . _bound , false ) ;
if ( ( cmp = = 0 & & ! b . _inclusive ) | | cmp < 0 )
2009-08-18 16:19:27 +02:00
return b ;
return a ;
}
FieldBound minFieldBound ( const FieldBound & a , const FieldBound & b ) {
2010-05-10 19:06:20 +02:00
int cmp = a . _bound . woCompare ( b . _bound , false ) ;
if ( ( cmp = = 0 & & ! b . _inclusive ) | | cmp > 0 )
2009-08-18 16:19:27 +02:00
return b ;
return a ;
}
bool fieldIntervalOverlap ( const FieldInterval & one , const FieldInterval & two , FieldInterval & result ) {
2010-05-10 19:06:20 +02:00
result . _lower = maxFieldBound ( one . _lower , two . _lower ) ;
result . _upper = minFieldBound ( one . _upper , two . _upper ) ;
2010-07-16 10:49:15 +02:00
return result . strictValid ( ) ;
2009-08-18 16:19:27 +02:00
}
2009-02-24 20:14:45 +01:00
2009-09-07 19:39:32 +02:00
// NOTE Not yet tested for complex $or bounds, just for simple bounds generated by $in
2009-08-11 22:43:30 +02:00
const FieldRange & FieldRange : : operator & = ( const FieldRange & other ) {
2009-08-18 16:19:27 +02:00
vector < FieldInterval > newIntervals ;
2010-05-10 19:06:20 +02:00
vector < FieldInterval > : : const_iterator i = _intervals . begin ( ) ;
vector < FieldInterval > : : const_iterator j = other . _intervals . begin ( ) ;
while ( i ! = _intervals . end ( ) & & j ! = other . _intervals . end ( ) ) {
2009-08-18 16:19:27 +02:00
FieldInterval overlap ;
if ( fieldIntervalOverlap ( * i , * j , overlap ) )
newIntervals . push_back ( overlap ) ;
2010-05-10 19:06:20 +02:00
if ( i - > _upper = = minFieldBound ( i - > _upper , j - > _upper ) )
2009-08-18 16:19:27 +02:00
+ + i ;
else
+ + j ;
2009-02-27 17:22:12 +01:00
}
2010-05-12 19:38:58 +02:00
finishOperation ( newIntervals , other ) ;
2009-02-24 20:14:45 +01:00
return * this ;
}
2010-03-09 01:53:45 +01:00
void handleInterval ( const FieldInterval & lower , FieldBound & low , FieldBound & high , vector < FieldInterval > & newIntervals ) {
2010-05-10 19:06:20 +02:00
if ( low . _bound . eoo ( ) ) {
low = lower . _lower ; high = lower . _upper ;
2010-03-09 01:53:45 +01:00
} else {
2010-05-10 19:06:20 +02:00
if ( high . _bound . woCompare ( lower . _lower . _bound , false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way
2010-03-09 01:53:45 +01:00
FieldInterval tmp ;
2010-05-10 19:06:20 +02:00
tmp . _lower = low ;
tmp . _upper = high ;
2010-03-09 01:53:45 +01:00
newIntervals . push_back ( tmp ) ;
2010-05-10 19:06:20 +02:00
low = lower . _lower ; high = lower . _upper ;
2010-03-09 01:53:45 +01:00
} else {
2010-05-10 19:06:20 +02:00
high = lower . _upper ;
2010-03-09 01:53:45 +01:00
}
}
}
const FieldRange & FieldRange : : operator | = ( const FieldRange & other ) {
vector < FieldInterval > newIntervals ;
FieldBound low ;
FieldBound high ;
2010-05-10 19:06:20 +02:00
vector < FieldInterval > : : const_iterator i = _intervals . begin ( ) ;
vector < FieldInterval > : : const_iterator j = other . _intervals . begin ( ) ;
while ( i ! = _intervals . end ( ) & & j ! = other . _intervals . end ( ) ) {
int cmp = i - > _lower . _bound . woCompare ( j - > _lower . _bound , false ) ;
if ( ( cmp = = 0 & & i - > _lower . _inclusive ) | | cmp < 0 ) {
2010-03-09 01:53:45 +01:00
handleInterval ( * i , low , high , newIntervals ) ;
+ + i ;
} else {
handleInterval ( * j , low , high , newIntervals ) ;
+ + j ;
}
}
2010-05-10 19:06:20 +02:00
while ( i ! = _intervals . end ( ) ) {
2010-03-09 01:53:45 +01:00
handleInterval ( * i , low , high , newIntervals ) ;
+ + i ;
}
2010-05-10 19:06:20 +02:00
while ( j ! = other . _intervals . end ( ) ) {
2010-03-09 01:53:45 +01:00
handleInterval ( * j , low , high , newIntervals ) ;
+ + j ;
}
FieldInterval tmp ;
2010-05-10 19:06:20 +02:00
tmp . _lower = low ;
tmp . _upper = high ;
2010-03-09 01:53:45 +01:00
newIntervals . push_back ( tmp ) ;
2010-05-12 19:38:58 +02:00
finishOperation ( newIntervals , other ) ;
return * this ;
}
const FieldRange & FieldRange : : operator - = ( const FieldRange & other ) {
2010-06-08 09:33:56 +02:00
vector < FieldInterval > : : iterator i = _intervals . begin ( ) ;
2010-05-12 19:38:58 +02:00
vector < FieldInterval > : : const_iterator j = other . _intervals . begin ( ) ;
while ( i ! = _intervals . end ( ) & & j ! = other . _intervals . end ( ) ) {
int cmp = i - > _lower . _bound . woCompare ( j - > _lower . _bound , false ) ;
2010-06-09 04:59:58 +02:00
if ( cmp < 0 | |
( cmp = = 0 & & i - > _lower . _inclusive & & ! j - > _lower . _inclusive ) ) {
2010-05-12 19:38:58 +02:00
int cmp2 = i - > _upper . _bound . woCompare ( j - > _lower . _bound , false ) ;
2010-06-09 04:59:58 +02:00
if ( cmp2 < 0 ) {
+ + i ;
} else if ( cmp2 = = 0 ) {
if ( i - > _upper . _inclusive & & j - > _lower . _inclusive ) {
i - > _upper . _inclusive = false ;
}
2010-06-08 09:33:56 +02:00
+ + i ;
} else {
int cmp3 = i - > _upper . _bound . woCompare ( j - > _upper . _bound , false ) ;
2010-06-09 04:59:58 +02:00
if ( cmp3 < 0 | |
( cmp3 = = 0 & & ( ! i - > _upper . _inclusive | | j - > _upper . _inclusive ) ) ) {
2010-06-08 09:33:56 +02:00
i - > _upper = j - > _lower ;
i - > _upper . flipInclusive ( ) ;
+ + i ;
} else {
+ + j ;
}
2010-05-12 19:38:58 +02:00
}
2010-06-08 09:33:56 +02:00
} else {
int cmp2 = i - > _lower . _bound . woCompare ( j - > _upper . _bound , false ) ;
2010-06-09 04:59:58 +02:00
if ( cmp2 > 0 | |
( cmp2 = = 0 & & ( ! i - > _lower . _inclusive | | ! j - > _lower . _inclusive ) ) ) {
2010-06-08 09:33:56 +02:00
+ + j ;
} else {
int cmp3 = i - > _upper . _bound . woCompare ( j - > _upper . _bound , false ) ;
2010-06-09 04:59:58 +02:00
if ( cmp3 < 0 | |
( cmp3 = = 0 & & ( ! i - > _upper . _inclusive | | j - > _upper . _inclusive ) ) ) {
2010-06-08 09:33:56 +02:00
i = _intervals . erase ( i ) ;
} else {
i - > _lower = j - > _upper ;
i - > _lower . flipInclusive ( ) ;
+ + j ;
}
}
2010-05-12 19:38:58 +02:00
}
}
2010-06-08 09:33:56 +02:00
finishOperation ( _intervals , other ) ;
2010-03-09 01:53:45 +01:00
return * this ;
}
2010-07-19 09:39:16 +02:00
// TODO write a proper implementation that doesn't do a full copy
bool FieldRange : : operator < = ( const FieldRange & other ) {
FieldRange temp = * this ;
temp - = other ;
return temp . empty ( ) ;
}
2009-08-11 22:43:30 +02:00
BSONObj FieldRange : : addObj ( const BSONObj & o ) {
2010-05-10 19:06:20 +02:00
_objData . push_back ( o ) ;
2009-02-24 20:14:45 +01:00
return o ;
}
2010-07-16 12:47:26 +02:00
2010-02-25 22:24:34 +01:00
string FieldRangeSet : : getSpecial ( ) const {
string s = " " ;
2010-05-10 19:06:20 +02:00
for ( map < string , FieldRange > : : iterator i = _ranges . begin ( ) ; i ! = _ranges . end ( ) ; i + + ) {
2010-02-25 22:24:34 +01:00
if ( i - > second . getSpecial ( ) . size ( ) = = 0 )
continue ;
uassert ( 13033 , " can't have 2 special fields " , s . size ( ) = = 0 ) ;
s = i - > second . getSpecial ( ) ;
}
return s ;
}
2010-02-23 23:20:28 +01:00
void FieldRangeSet : : processOpElement ( const char * fieldName , const BSONElement & f , bool isNot , bool optimize ) {
2010-03-09 20:53:40 +01:00
BSONElement g = f ;
int op2 = g . getGtLtOp ( ) ;
if ( op2 = = BSONObj : : opALL ) {
BSONElement h = g ;
massert ( 13050 , " $all requires array " , h . type ( ) = = Array ) ;
BSONObjIterator i ( h . embeddedObject ( ) ) ;
if ( i . more ( ) ) {
BSONElement x = i . next ( ) ;
if ( x . type ( ) = = Object & & x . embeddedObject ( ) . firstElement ( ) . getGtLtOp ( ) = = BSONObj : : opELEM_MATCH ) {
g = x . embeddedObject ( ) . firstElement ( ) ;
op2 = g . getGtLtOp ( ) ;
}
}
}
2010-02-23 23:20:28 +01:00
if ( op2 = = BSONObj : : opELEM_MATCH ) {
2010-03-09 20:53:40 +01:00
BSONObjIterator k ( g . embeddedObjectUserCheck ( ) ) ;
2010-02-23 23:20:28 +01:00
while ( k . more ( ) ) {
2010-03-09 20:53:40 +01:00
BSONElement h = k . next ( ) ;
2010-02-23 23:20:28 +01:00
StringBuilder buf ( 32 ) ;
2010-03-09 20:53:40 +01:00
buf < < fieldName < < " . " < < h . fieldName ( ) ;
2010-02-23 23:20:28 +01:00
string fullname = buf . str ( ) ;
2010-03-09 20:53:40 +01:00
int op3 = getGtLtOp ( h ) ;
2010-02-23 23:20:28 +01:00
if ( op3 = = BSONObj : : Equality ) {
2010-05-10 19:06:20 +02:00
_ranges [ fullname ] & = FieldRange ( h , isNot , optimize ) ;
2010-02-23 23:20:28 +01:00
}
else {
2010-03-09 20:53:40 +01:00
BSONObjIterator l ( h . embeddedObject ( ) ) ;
2010-02-23 23:20:28 +01:00
while ( l . more ( ) ) {
2010-05-10 19:06:20 +02:00
_ranges [ fullname ] & = FieldRange ( l . next ( ) , isNot , optimize ) ;
2010-02-23 23:20:28 +01:00
}
}
}
} else {
2010-05-10 19:06:20 +02:00
_ranges [ fieldName ] & = FieldRange ( f , isNot , optimize ) ;
2010-02-23 23:20:28 +01:00
}
}
2010-05-12 19:38:58 +02:00
void FieldRangeSet : : processQueryField ( const BSONElement & e , bool optimize ) {
bool equality = ( getGtLtOp ( e ) = = BSONObj : : Equality ) ;
if ( equality & & e . type ( ) = = Object ) {
equality = ( strcmp ( e . embeddedObject ( ) . firstElement ( ) . fieldName ( ) , " $not " ) ! = 0 ) ;
}
2009-12-31 21:18:43 +01:00
2010-05-12 19:38:58 +02:00
if ( equality | | ( e . type ( ) = = Object & & ! e . embeddedObject ( ) [ " $regex " ] . eoo ( ) ) ) {
_ranges [ e . fieldName ( ) ] & = FieldRange ( e , false , optimize ) ;
}
if ( ! equality ) {
BSONObjIterator j ( e . embeddedObject ( ) ) ;
while ( j . more ( ) ) {
BSONElement f = j . next ( ) ;
if ( strcmp ( f . fieldName ( ) , " $not " ) = = 0 ) {
switch ( f . type ( ) ) {
case Object : {
BSONObjIterator k ( f . embeddedObject ( ) ) ;
while ( k . more ( ) ) {
BSONElement g = k . next ( ) ;
uassert ( 13034 , " invalid use of $not " , g . getGtLtOp ( ) ! = BSONObj : : Equality ) ;
processOpElement ( e . fieldName ( ) , g , true , optimize ) ;
}
break ;
}
case RegEx :
processOpElement ( e . fieldName ( ) , f , true , optimize ) ;
break ;
default :
uassert ( 13041 , " invalid use of $not " , false ) ;
}
} else {
processOpElement ( e . fieldName ( ) , f , false , optimize ) ;
2010-05-10 18:51:49 +02:00
}
2010-05-12 19:38:58 +02:00
}
}
}
FieldRangeSet : : FieldRangeSet ( const char * ns , const BSONObj & query , bool optimize )
2010-07-16 10:49:15 +02:00
: _ns ( ns ) , _queries ( 1 , query . getOwned ( ) ) {
BSONObjIterator i ( _queries [ 0 ] ) ;
2009-12-31 20:30:04 +01:00
2010-05-12 19:38:58 +02:00
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
// e could be x:1 or x:{$gt:1}
2010-06-07 21:18:00 +02:00
if ( strcmp ( e . fieldName ( ) , " $where " ) = = 0 ) {
2010-05-12 19:38:58 +02:00
continue ;
2010-06-07 21:18:00 +02:00
}
2010-05-12 19:38:58 +02:00
if ( strcmp ( e . fieldName ( ) , " $or " ) = = 0 ) {
continue ;
}
if ( strcmp ( e . fieldName ( ) , " $nor " ) = = 0 ) {
2010-06-07 21:18:00 +02:00
continue ;
2010-05-12 19:38:58 +02:00
}
processQueryField ( e , optimize ) ;
}
2009-02-24 20:14:45 +01:00
}
2010-05-12 19:38:58 +02:00
2010-06-02 01:21:21 +02:00
FieldRangeOrSet : : FieldRangeOrSet ( const char * ns , const BSONObj & query , bool optimize )
: _baseSet ( ns , query , optimize ) , _orFound ( ) {
2010-07-16 10:49:15 +02:00
BSONObjIterator i ( _baseSet . _queries [ 0 ] ) ;
2010-06-02 01:21:21 +02:00
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
if ( strcmp ( e . fieldName ( ) , " $or " ) = = 0 ) {
massert ( 13262 , " $or requires nonempty array " , e . type ( ) = = Array & & e . embeddedObject ( ) . nFields ( ) > 0 ) ;
BSONObjIterator j ( e . embeddedObject ( ) ) ;
while ( j . more ( ) ) {
BSONElement f = j . next ( ) ;
massert ( 13263 , " $or array must contain objects " , f . type ( ) = = Object ) ;
_orSets . push_back ( FieldRangeSet ( ns , f . embeddedObject ( ) , optimize ) ) ;
2010-06-07 21:18:00 +02:00
massert ( 13291 , " $or may not contain 'special' query " , _orSets . back ( ) . getSpecial ( ) . empty ( ) ) ;
2010-06-02 01:21:21 +02:00
}
_orFound = true ;
continue ;
}
}
}
2009-12-31 21:18:43 +01:00
2009-08-11 22:43:30 +02:00
FieldRange * FieldRangeSet : : trivialRange_ = 0 ;
FieldRange & FieldRangeSet : : trivialRange ( ) {
if ( trivialRange_ = = 0 )
trivialRange_ = new FieldRange ( ) ;
return * trivialRange_ ;
2009-02-24 20:14:45 +01:00
}
2010-07-16 12:47:26 +02:00
BSONObj FieldRangeSet : : simplifiedQuery ( const BSONObj & _fields ) const {
2009-05-13 17:10:29 +02:00
BSONObj fields = _fields ;
if ( fields . isEmpty ( ) ) {
BSONObjBuilder b ;
2010-05-10 19:06:20 +02:00
for ( map < string , FieldRange > : : const_iterator i = _ranges . begin ( ) ; i ! = _ranges . end ( ) ; + + i ) {
2010-07-20 18:07:14 +02:00
b . append ( i - > first , 1 ) ;
2009-05-13 17:10:29 +02:00
}
fields = b . obj ( ) ;
}
2009-02-24 20:14:45 +01:00
BSONObjBuilder b ;
2009-05-13 17:10:29 +02:00
BSONObjIterator i ( fields ) ;
2009-10-29 21:06:35 +01:00
while ( i . more ( ) ) {
2009-05-13 17:10:29 +02:00
BSONElement e = i . next ( ) ;
const char * name = e . fieldName ( ) ;
2010-05-10 19:06:20 +02:00
const FieldRange & range = _ranges [ name ] ;
2009-08-18 17:59:40 +02:00
assert ( ! range . empty ( ) ) ;
2009-08-11 22:43:30 +02:00
if ( range . equality ( ) )
b . appendAs ( range . min ( ) , name ) ;
else if ( range . nontrivial ( ) ) {
2010-06-09 23:43:36 +02:00
BSONObj o ;
2010-07-16 12:47:26 +02:00
BSONObjBuilder c ;
if ( range . min ( ) . type ( ) ! = MinKey )
c . appendAs ( range . min ( ) , range . minInclusive ( ) ? " $gte " : " $gt " ) ;
if ( range . max ( ) . type ( ) ! = MaxKey )
c . appendAs ( range . max ( ) , range . maxInclusive ( ) ? " $lte " : " $lt " ) ;
o = c . obj ( ) ;
2010-06-10 09:44:24 +02:00
b . append ( name , o ) ;
2009-02-24 20:14:45 +01:00
}
}
return b . obj ( ) ;
}
2009-02-24 21:20:29 +01:00
2009-08-11 22:43:30 +02:00
QueryPattern FieldRangeSet : : pattern ( const BSONObj & sort ) const {
2009-02-24 21:20:29 +01:00
QueryPattern qp ;
2010-05-10 19:06:20 +02:00
for ( map < string , FieldRange > : : const_iterator i = _ranges . begin ( ) ; i ! = _ranges . end ( ) ; + + i ) {
2009-08-18 17:59:40 +02:00
assert ( ! i - > second . empty ( ) ) ;
2009-02-24 21:20:29 +01:00
if ( i - > second . equality ( ) ) {
2010-05-10 19:06:20 +02:00
qp . _fieldTypes [ i - > first ] = QueryPattern : : Equality ;
2009-02-24 21:20:29 +01:00
} else if ( i - > second . nontrivial ( ) ) {
2009-08-11 22:43:30 +02:00
bool upper = i - > second . max ( ) . type ( ) ! = MaxKey ;
bool lower = i - > second . min ( ) . type ( ) ! = MinKey ;
2009-02-24 21:20:29 +01:00
if ( upper & & lower )
2010-05-10 19:06:20 +02:00
qp . _fieldTypes [ i - > first ] = QueryPattern : : UpperAndLowerBound ;
2009-02-24 21:20:29 +01:00
else if ( upper )
2010-05-10 19:06:20 +02:00
qp . _fieldTypes [ i - > first ] = QueryPattern : : UpperBound ;
2009-02-24 21:20:29 +01:00
else if ( lower )
2010-05-10 19:06:20 +02:00
qp . _fieldTypes [ i - > first ] = QueryPattern : : LowerBound ;
2009-02-24 21:20:29 +01:00
}
}
2009-02-26 17:13:36 +01:00
qp . setSort ( sort ) ;
2009-02-24 21:20:29 +01:00
return qp ;
}
2009-07-02 20:05:36 +02:00
2010-07-16 10:49:15 +02:00
// TODO get rid of this
2009-09-07 20:44:41 +02:00
BoundList FieldRangeSet : : indexBounds ( const BSONObj & keyPattern , int direction ) const {
2009-09-07 22:49:22 +02:00
typedef vector < pair < shared_ptr < BSONObjBuilder > , shared_ptr < BSONObjBuilder > > > BoundBuilders ;
BoundBuilders builders ;
2010-06-23 00:07:08 +02:00
builders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
2009-09-07 20:44:41 +02:00
BSONObjIterator i ( keyPattern ) ;
2010-06-23 00:07:08 +02:00
bool ineq = false ; // until ineq is true, we are just dealing with equality and $in bounds
2009-09-07 20:44:41 +02:00
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
const FieldRange & fr = range ( e . fieldName ( ) ) ;
int number = ( int ) e . number ( ) ; // returns 0.0 if not numeric
bool forward = ( ( number > = 0 ? 1 : - 1 ) * ( direction > = 0 ? 1 : - 1 ) > 0 ) ;
2010-06-23 00:07:08 +02:00
if ( ! ineq ) {
2009-09-07 22:49:22 +02:00
if ( fr . equality ( ) ) {
2010-06-23 00:07:08 +02:00
for ( BoundBuilders : : const_iterator j = builders . begin ( ) ; j ! = builders . end ( ) ; + + j ) {
j - > first - > appendAs ( fr . min ( ) , " " ) ;
j - > second - > appendAs ( fr . min ( ) , " " ) ;
}
2009-09-07 22:49:22 +02:00
} else {
2010-06-23 00:07:08 +02:00
if ( ! fr . inQuery ( ) ) {
ineq = true ;
}
BoundBuilders newBuilders ;
2009-09-07 22:49:22 +02:00
const vector < FieldInterval > & intervals = fr . intervals ( ) ;
2010-06-23 00:07:08 +02:00
for ( BoundBuilders : : const_iterator i = builders . begin ( ) ; i ! = builders . end ( ) ; + + i ) {
BSONObj first = i - > first - > obj ( ) ;
BSONObj second = i - > second - > obj ( ) ;
if ( forward ) {
for ( vector < FieldInterval > : : const_iterator j = intervals . begin ( ) ; j ! = intervals . end ( ) ; + + j ) {
2010-06-23 05:02:15 +02:00
uassert ( 13303 , " combinatorial limit of $in partitioning of result set exceeded " , newBuilders . size ( ) < 1000000 ) ;
2010-06-23 00:07:08 +02:00
newBuilders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
newBuilders . back ( ) . first - > appendElements ( first ) ;
newBuilders . back ( ) . second - > appendElements ( second ) ;
newBuilders . back ( ) . first - > appendAs ( j - > _lower . _bound , " " ) ;
newBuilders . back ( ) . second - > appendAs ( j - > _upper . _bound , " " ) ;
}
} else {
for ( vector < FieldInterval > : : const_reverse_iterator j = intervals . rbegin ( ) ; j ! = intervals . rend ( ) ; + + j ) {
2010-06-23 05:02:15 +02:00
uassert ( 13304 , " combinatorial limit of $in partitioning of result set exceeded " , newBuilders . size ( ) < 1000000 ) ;
2010-06-23 00:07:08 +02:00
newBuilders . push_back ( make_pair ( shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) , shared_ptr < BSONObjBuilder > ( new BSONObjBuilder ( ) ) ) ) ;
newBuilders . back ( ) . first - > appendElements ( first ) ;
newBuilders . back ( ) . second - > appendElements ( second ) ;
newBuilders . back ( ) . first - > appendAs ( j - > _upper . _bound , " " ) ;
newBuilders . back ( ) . second - > appendAs ( j - > _lower . _bound , " " ) ;
}
2009-09-07 22:49:22 +02:00
}
}
2010-06-23 00:07:08 +02:00
builders = newBuilders ;
2009-09-07 22:49:22 +02:00
}
} else {
for ( BoundBuilders : : const_iterator j = builders . begin ( ) ; j ! = builders . end ( ) ; + + j ) {
j - > first - > appendAs ( forward ? fr . min ( ) : fr . max ( ) , " " ) ;
j - > second - > appendAs ( forward ? fr . max ( ) : fr . min ( ) , " " ) ;
}
}
}
2009-09-07 20:44:41 +02:00
BoundList ret ;
2009-09-07 22:49:22 +02:00
for ( BoundBuilders : : const_iterator i = builders . begin ( ) ; i ! = builders . end ( ) ; + + i )
ret . push_back ( make_pair ( i - > first - > obj ( ) , i - > second - > obj ( ) ) ) ;
2009-09-07 20:44:41 +02:00
return ret ;
2010-07-16 10:49:15 +02:00
}
2009-10-26 15:24:30 +01:00
///////////////////
2009-10-28 16:52:23 +01:00
// FieldMatcher //
2009-10-26 15:24:30 +01:00
///////////////////
2009-09-07 20:44:41 +02:00
2009-07-01 20:55:52 +02:00
void FieldMatcher : : add ( const BSONObj & o ) {
2010-02-16 05:17:42 +01:00
massert ( 10371 , " can only add to FieldMatcher once " , _source . isEmpty ( ) ) ;
_source = o ;
2009-10-28 16:52:23 +01:00
2009-07-01 21:00:09 +02:00
BSONObjIterator i ( o ) ;
2009-10-28 16:52:23 +01:00
int true_false = - 1 ;
2009-07-01 21:00:09 +02:00
while ( i . more ( ) ) {
2009-10-28 16:52:23 +01:00
BSONElement e = i . next ( ) ;
2010-04-16 22:52:54 +02:00
if ( e . type ( ) = = Object ) {
BSONObj obj = e . embeddedObject ( ) ;
BSONElement e2 = obj . firstElement ( ) ;
if ( strcmp ( e2 . fieldName ( ) , " $slice " ) = = 0 ) {
if ( e2 . isNumber ( ) ) {
int i = e2 . numberInt ( ) ;
if ( i < 0 )
add ( e . fieldName ( ) , i , - i ) ; // limit is now positive
else
add ( e . fieldName ( ) , 0 , i ) ;
} else if ( e2 . type ( ) = = Array ) {
BSONObj arr = e2 . embeddedObject ( ) ;
uassert ( 13099 , " $slice array wrong size " , arr . nFields ( ) = = 2 ) ;
BSONObjIterator it ( arr ) ;
int skip = it . next ( ) . numberInt ( ) ;
int limit = it . next ( ) . numberInt ( ) ;
uassert ( 13100 , " $slice limit must be positive " , limit > 0 ) ;
add ( e . fieldName ( ) , skip , limit ) ;
} else {
uassert ( 13098 , " $slice only supports numbers and [skip, limit] arrays " , false ) ;
}
} else {
uassert ( 13097 , string ( " Unsupported projection option: " ) + obj . firstElement ( ) . fieldName ( ) , false ) ;
}
2010-05-01 03:20:24 +02:00
} else if ( ! strcmp ( e . fieldName ( ) , " _id " ) & & ! e . trueValue ( ) ) {
_includeID = false ;
2010-04-16 22:52:54 +02:00
2010-05-01 03:20:24 +02:00
} else {
2009-10-28 16:52:23 +01:00
2010-05-01 03:20:24 +02:00
add ( e . fieldName ( ) , e . trueValue ( ) ) ;
// validate input
if ( true_false = = - 1 ) {
true_false = e . trueValue ( ) ;
_include = ! e . trueValue ( ) ;
}
else {
uassert ( 10053 , " You cannot currently mix including and excluding fields. Contact us if this is an issue. " ,
( bool ) true_false = = e . trueValue ( ) ) ;
}
2009-07-01 21:00:09 +02:00
}
}
2009-07-01 20:55:52 +02:00
}
2009-10-28 16:52:23 +01:00
void FieldMatcher : : add ( const string & field , bool include ) {
if ( field . empty ( ) ) { // this is the field the user referred to
2010-02-16 05:17:42 +01:00
_include = include ;
2009-10-28 16:52:23 +01:00
} else {
2010-04-16 22:52:54 +02:00
_include = ! include ;
2009-10-28 16:52:23 +01:00
const size_t dot = field . find ( ' . ' ) ;
const string subfield = field . substr ( 0 , dot ) ;
const string rest = ( dot = = string : : npos ? " " : field . substr ( dot + 1 , string : : npos ) ) ;
2010-02-16 05:17:42 +01:00
boost : : shared_ptr < FieldMatcher > & fm = _fields [ subfield ] ;
2009-10-28 16:52:23 +01:00
if ( ! fm )
2010-04-16 22:52:54 +02:00
fm . reset ( new FieldMatcher ( ) ) ;
2009-10-28 16:52:23 +01:00
fm - > add ( rest , include ) ;
}
2009-07-01 20:55:52 +02:00
}
2009-10-28 16:52:23 +01:00
2010-04-16 22:52:54 +02:00
void FieldMatcher : : add ( const string & field , int skip , int limit ) {
_special = true ; // can't include or exclude whole object
if ( field . empty ( ) ) { // this is the field the user referred to
_skip = skip ;
_limit = limit ;
} else {
const size_t dot = field . find ( ' . ' ) ;
const string subfield = field . substr ( 0 , dot ) ;
const string rest = ( dot = = string : : npos ? " " : field . substr ( dot + 1 , string : : npos ) ) ;
boost : : shared_ptr < FieldMatcher > & fm = _fields [ subfield ] ;
if ( ! fm )
fm . reset ( new FieldMatcher ( ) ) ;
fm - > add ( rest , skip , limit ) ;
}
}
2009-07-01 21:33:03 +02:00
BSONObj FieldMatcher : : getSpec ( ) const {
2010-02-16 05:17:42 +01:00
return _source ;
2009-07-01 21:33:03 +02:00
}
2009-07-01 20:55:52 +02:00
2009-10-28 16:52:23 +01:00
//b will be the value part of an array-typed BSONElement
2010-05-05 21:51:46 +02:00
void FieldMatcher : : appendArray ( BSONObjBuilder & b , const BSONObj & a , bool nested ) const {
int skip = nested ? 0 : _skip ;
int limit = nested ? - 1 : _limit ;
2010-04-16 22:52:54 +02:00
if ( skip < 0 ) {
skip = max ( 0 , skip + a . nFields ( ) ) ;
}
2009-10-28 16:52:23 +01:00
int i = 0 ;
BSONObjIterator it ( a ) ;
while ( it . more ( ) ) {
BSONElement e = it . next ( ) ;
2009-07-02 20:05:36 +02:00
2010-04-16 22:52:54 +02:00
if ( skip ) {
skip - - ;
continue ;
}
if ( limit ! = - 1 & & ( limit - - = = 0 ) ) {
break ;
}
2009-10-28 16:52:23 +01:00
switch ( e . type ( ) ) {
case Array : {
BSONObjBuilder subb ;
2010-05-05 21:51:46 +02:00
appendArray ( subb , e . embeddedObject ( ) , true ) ;
2010-07-20 18:39:35 +02:00
b . appendArray ( b . numStr ( i + + ) , subb . obj ( ) ) ;
2009-10-28 16:52:23 +01:00
break ;
}
case Object : {
BSONObjBuilder subb ;
BSONObjIterator jt ( e . embeddedObject ( ) ) ;
while ( jt . more ( ) ) {
append ( subb , jt . next ( ) ) ;
}
b . append ( b . numStr ( i + + ) , subb . obj ( ) ) ;
break ;
}
default :
2010-02-16 05:17:42 +01:00
if ( _include )
2010-07-20 18:39:35 +02:00
b . appendAs ( e , b . numStr ( i + + ) ) ;
2009-10-28 16:52:23 +01:00
}
}
2009-07-02 20:05:36 +02:00
}
2009-10-28 16:52:23 +01:00
2009-07-02 20:05:36 +02:00
void FieldMatcher : : append ( BSONObjBuilder & b , const BSONElement & e ) const {
2010-02-16 05:17:42 +01:00
FieldMap : : const_iterator field = _fields . find ( e . fieldName ( ) ) ;
2009-10-28 16:52:23 +01:00
2010-02-16 05:17:42 +01:00
if ( field = = _fields . end ( ) ) {
if ( _include )
2009-10-28 16:52:23 +01:00
b . append ( e ) ;
2010-02-16 05:17:42 +01:00
}
else {
2009-10-28 16:52:23 +01:00
FieldMatcher & subfm = * field - > second ;
2010-02-16 05:17:42 +01:00
2010-04-16 22:52:54 +02:00
if ( ( subfm . _fields . empty ( ) & & ! subfm . _special ) | | ! ( e . type ( ) = = Object | | e . type ( ) = = Array ) ) {
2010-02-16 05:17:42 +01:00
if ( subfm . _include )
2009-10-28 16:52:23 +01:00
b . append ( e ) ;
2010-02-16 05:17:42 +01:00
}
else if ( e . type ( ) = = Object ) {
2009-10-28 16:52:23 +01:00
BSONObjBuilder subb ;
BSONObjIterator it ( e . embeddedObject ( ) ) ;
while ( it . more ( ) ) {
subfm . append ( subb , it . next ( ) ) ;
}
b . append ( e . fieldName ( ) , subb . obj ( ) ) ;
2009-08-20 16:06:47 +02:00
2010-02-16 05:17:42 +01:00
}
else { //Array
2009-10-28 16:52:23 +01:00
BSONObjBuilder subb ;
subfm . appendArray ( subb , e . embeddedObject ( ) ) ;
b . appendArray ( e . fieldName ( ) , subb . obj ( ) ) ;
2009-08-14 20:19:23 +02:00
}
2009-07-02 20:05:36 +02:00
}
}
2009-02-24 23:48:06 +01:00
2010-07-16 12:47:26 +02:00
bool FieldRangeVector : : matchesElement ( const BSONElement & e , int i , bool forward ) const {
2010-08-18 20:42:58 +02:00
bool eq ;
int l = matchingLowElement ( e , i , forward , eq ) ;
2010-07-16 18:45:59 +02:00
return ( l % 2 = = 0 ) ; // if we're inside an interval
}
2010-08-17 02:39:01 +02:00
// binary search for interval containing the specified element
// an even return value indicates that the element is contained within a valid interval
2010-08-18 20:42:58 +02:00
int FieldRangeVector : : matchingLowElement ( const BSONElement & e , int i , bool forward , bool & lowEquality ) const {
lowEquality = false ;
2010-07-16 12:47:26 +02:00
int l = - 1 ;
int h = _ranges [ i ] . intervals ( ) . size ( ) * 2 ;
while ( l + 1 < h ) {
int m = ( l + h ) / 2 ;
BSONElement toCmp ;
2010-08-18 20:42:58 +02:00
bool toCmpInclusive ;
const FieldInterval & interval = _ranges [ i ] . intervals ( ) [ m / 2 ] ;
2010-07-16 12:47:26 +02:00
if ( m % 2 = = 0 ) {
2010-08-18 20:42:58 +02:00
toCmp = interval . _lower . _bound ;
toCmpInclusive = interval . _lower . _inclusive ;
2010-07-16 12:47:26 +02:00
} else {
2010-08-18 20:42:58 +02:00
toCmp = interval . _upper . _bound ;
toCmpInclusive = interval . _upper . _inclusive ;
2010-07-16 12:47:26 +02:00
}
int cmp = toCmp . woCompare ( e , false ) ;
if ( ! forward ) {
cmp = - cmp ;
}
if ( cmp < 0 ) {
l = m ;
} else if ( cmp > 0 ) {
h = m ;
} else {
2010-08-18 20:42:58 +02:00
if ( m % 2 = = 0 ) {
lowEquality = true ;
}
int ret = m ;
// if left match and inclusive, all good
// if left match and not inclusive, return right before left bound
// if right match and inclusive, return left bound
// if right match and not inclusive, return right bound
if ( ( m % 2 = = 0 & & ! toCmpInclusive ) | | ( m % 2 = = 1 & & toCmpInclusive ) ) {
- - ret ;
}
return ret ;
2010-07-16 12:47:26 +02:00
}
}
assert ( l + 1 = = h ) ;
2010-07-16 18:45:59 +02:00
return l ;
2010-07-16 12:47:26 +02:00
}
bool FieldRangeVector : : matches ( const BSONObj & obj ) const {
BSONObjIterator k ( _keyPattern ) ;
for ( int i = 0 ; i < ( int ) _ranges . size ( ) ; + + i ) {
if ( _ranges [ i ] . empty ( ) ) {
return false ;
}
BSONElement kk = k . next ( ) ;
int number = ( int ) kk . number ( ) ;
2010-07-21 00:39:49 +02:00
bool forward = ( number > = 0 ? 1 : - 1 ) * ( _direction > = 0 ? 1 : - 1 ) > 0 ;
2010-07-16 12:47:26 +02:00
BSONElement e = obj . getField ( kk . fieldName ( ) ) ;
if ( e . eoo ( ) ) {
e = staticNull . firstElement ( ) ;
}
if ( e . type ( ) = = Array ) {
BSONObjIterator j ( e . embeddedObject ( ) ) ;
bool match = false ;
while ( j . more ( ) ) {
if ( matchesElement ( j . next ( ) , i , forward ) ) {
match = true ;
break ;
}
}
if ( ! match ) {
return false ;
}
} else if ( ! matchesElement ( e , i , forward ) ) {
return false ;
}
}
return true ;
}
2010-07-16 18:45:59 +02:00
// TODO optimize more
2010-07-16 10:49:15 +02:00
int FieldRangeVector : : Iterator : : advance ( const BSONObj & curr ) {
BSONObjIterator j ( curr ) ;
BSONObjIterator o ( _v . _keyPattern ) ;
2010-08-17 02:39:01 +02:00
// track first field for which we are not at the end of the valid values,
// since we may need to advance from the key prefix ending with this field
2010-07-16 10:49:15 +02:00
int latestNonEndpoint = - 1 ;
2010-08-17 02:39:01 +02:00
// iterate over fields to determine appropriate advance method
2010-07-16 10:49:15 +02:00
for ( int i = 0 ; i < ( int ) _i . size ( ) ; + + i ) {
2010-07-16 18:45:59 +02:00
if ( i > 0 & & ! _v . _ranges [ i - 1 ] . intervals ( ) [ _i [ i - 1 ] ] . equality ( ) ) {
2010-08-17 02:39:01 +02:00
// if last bound was inequality, we don't know anything about where we are for this field
2010-08-18 20:42:58 +02:00
// TODO if possible avoid this certain cases when value in previous field of the previous
// key is the same as value of previous field in current key
2010-07-16 18:45:59 +02:00
setMinus ( i ) ;
}
2010-07-16 10:49:15 +02:00
bool eq = false ;
BSONElement oo = o . next ( ) ;
2010-07-16 18:45:59 +02:00
bool reverse = ( ( oo . number ( ) < 0 ) ^ ( _v . _direction < 0 ) ) ;
BSONElement jj = j . next ( ) ;
2010-08-17 02:39:01 +02:00
if ( _i [ i ] = = - 1 ) { // unknown position for this field, do binary search
2010-08-18 20:42:58 +02:00
bool lowEquality ;
int l = _v . matchingLowElement ( jj , i , ! reverse , lowEquality ) ;
2010-08-17 02:39:01 +02:00
if ( l % 2 = = 0 ) { // we are in a valid range for this field
2010-07-16 18:45:59 +02:00
_i [ i ] = l / 2 ;
int diff = ( int ) _v . _ranges [ i ] . intervals ( ) . size ( ) - _i [ i ] ;
if ( diff > 1 ) {
latestNonEndpoint = i ;
} else if ( diff = = 1 ) {
int x = _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _upper . _bound . woCompare ( jj , false ) ;
if ( x ! = 0 ) {
latestNonEndpoint = i ;
}
}
continue ;
2010-08-17 02:39:01 +02:00
} else { // not in a valid range for this field - determine if and how to advance
// check if we're after the last interval for this field
2010-07-16 18:45:59 +02:00
if ( l = = ( int ) _v . _ranges [ i ] . intervals ( ) . size ( ) * 2 - 1 ) {
if ( latestNonEndpoint = = - 1 ) {
return - 2 ;
}
setZero ( latestNonEndpoint + 1 ) ;
// skip to curr / latestNonEndpoint + 1 / superlative
2010-08-18 20:42:58 +02:00
_after = true ;
2010-07-16 18:45:59 +02:00
return latestNonEndpoint + 1 ;
}
_i [ i ] = ( l + 1 ) / 2 ;
2010-08-18 20:42:58 +02:00
if ( lowEquality ) {
// skip to curr / i + 1 / superlative
_after = true ;
return i + 1 ;
}
2010-07-16 10:49:15 +02:00
// skip to curr / i / nextbounds
_cmp [ i ] = & _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _bound ;
2010-08-18 20:42:58 +02:00
_inc [ i ] = _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _inclusive ;
2010-07-16 10:49:15 +02:00
for ( int j = i + 1 ; j < ( int ) _i . size ( ) ; + + j ) {
_cmp [ j ] = & _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _bound ;
2010-08-18 20:42:58 +02:00
_inc [ j ] = _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _inclusive ;
2010-07-16 10:49:15 +02:00
}
2010-08-18 20:42:58 +02:00
_after = false ;
2010-07-16 18:45:59 +02:00
return i ;
2010-07-16 10:49:15 +02:00
}
2010-07-16 18:45:59 +02:00
}
2010-07-21 09:10:10 +02:00
bool first = true ;
2010-08-17 02:39:01 +02:00
// _i[ i ] != -1, so we have a starting interval for this field
// which serves as a lower/equal bound on the first iteration -
// we advance from this interval to find a matching interval
2010-07-16 18:45:59 +02:00
while ( _i [ i ] < ( int ) _v . _ranges [ i ] . intervals ( ) . size ( ) ) {
2010-08-17 02:39:01 +02:00
// compare to current interval's upper bound
2010-07-16 18:45:59 +02:00
int x = _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _upper . _bound . woCompare ( jj , false ) ;
if ( reverse ) {
2010-07-16 10:49:15 +02:00
x = - x ;
}
2010-08-18 20:42:58 +02:00
if ( x = = 0 & & _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _upper . _inclusive ) {
2010-07-16 18:45:59 +02:00
eq = true ;
2010-07-16 10:49:15 +02:00
break ;
}
2010-08-17 02:39:01 +02:00
// see if we're less than the upper bound
2010-07-16 18:45:59 +02:00
if ( x > 0 ) {
2010-07-21 09:10:10 +02:00
if ( i = = 0 & & first ) {
2010-08-17 02:39:01 +02:00
// the value of 1st field won't go backward, so don't check lower bound
// TODO maybe we can check first only?
break ;
2010-07-21 09:10:10 +02:00
}
2010-08-17 02:39:01 +02:00
// if it's an equality interval, don't need to compare separately to lower bound
2010-07-16 18:45:59 +02:00
if ( ! _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . equality ( ) ) {
2010-08-17 02:39:01 +02:00
// compare to current interval's lower bound
2010-07-16 18:45:59 +02:00
x = _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _bound . woCompare ( jj , false ) ;
if ( reverse ) {
x = - x ;
}
}
2010-08-18 20:42:58 +02:00
// if we're equal to and not inclusive the lower bound, advance
if ( ( x = = 0 & & ! _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _inclusive ) ) {
setZero ( i + 1 ) ;
// skip to curr / i + 1 / superlative
_after = true ;
return i + 1 ;
}
2010-08-17 02:39:01 +02:00
// if we're less than the lower bound, advance
2010-07-16 18:45:59 +02:00
if ( x > 0 ) {
setZero ( i + 1 ) ;
// skip to curr / i / nextbounds
_cmp [ i ] = & _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _bound ;
2010-08-18 20:42:58 +02:00
_inc [ i ] = _v . _ranges [ i ] . intervals ( ) [ _i [ i ] ] . _lower . _inclusive ;
2010-07-16 18:45:59 +02:00
for ( int j = i + 1 ; j < ( int ) _i . size ( ) ; + + j ) {
_cmp [ j ] = & _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _bound ;
2010-08-18 20:42:58 +02:00
_inc [ j ] = _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _inclusive ;
2010-07-16 18:45:59 +02:00
}
2010-08-18 20:42:58 +02:00
_after = false ;
2010-07-16 18:45:59 +02:00
return i ;
} else {
break ;
}
}
2010-08-17 02:39:01 +02:00
// we're above the upper bound, so try next interval and reset remaining fields
2010-07-16 10:49:15 +02:00
+ + _i [ i ] ;
setZero ( i + 1 ) ;
2010-07-21 09:10:10 +02:00
first = false ;
2010-07-16 10:49:15 +02:00
}
int diff = ( int ) _v . _ranges [ i ] . intervals ( ) . size ( ) - _i [ i ] ;
if ( diff > 1 | | ( ! eq & & diff = = 1 ) ) {
2010-08-18 20:42:58 +02:00
// check if we're not at the end of valid values for this field
2010-07-16 10:49:15 +02:00
latestNonEndpoint = i ;
2010-08-17 02:39:01 +02:00
} else if ( diff = = 0 ) { // check if we're past the last interval for this field
2010-07-16 10:49:15 +02:00
if ( latestNonEndpoint = = - 1 ) {
return - 2 ;
}
2010-08-17 02:39:01 +02:00
// more values possible, skip...
2010-07-16 10:49:15 +02:00
setZero ( latestNonEndpoint + 1 ) ;
// skip to curr / latestNonEndpoint + 1 / superlative
2010-08-18 20:42:58 +02:00
_after = true ;
2010-07-16 10:49:15 +02:00
return latestNonEndpoint + 1 ;
}
}
return - 1 ;
}
2010-08-18 20:42:58 +02:00
void FieldRangeVector : : Iterator : : prepDive ( ) {
for ( int j = 0 ; j < ( int ) _i . size ( ) ; + + j ) {
_cmp [ j ] = & _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _bound ;
_inc [ j ] = _v . _ranges [ j ] . intervals ( ) . front ( ) . _lower . _inclusive ;
}
}
2010-01-15 02:57:50 +01:00
struct SimpleRegexUnitTest : UnitTest {
void run ( ) {
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^foo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " foo " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f?oo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^fz?oo " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f " , " " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
2010-03-30 02:17:10 +02:00
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " \\ Af " , " " ) ;
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
2010-01-15 02:57:50 +01:00
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " ^f " , " m " ) ;
BSONObj o = b . done ( ) ;
2010-03-30 02:17:10 +02:00
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
{
BSONObjBuilder b ;
b . appendRegex ( " r " , " \\ Af " , " m " ) ;
BSONObj o = b . done ( ) ;
2010-01-15 02:57:50 +01:00
assert ( simpleRegex ( o . firstElement ( ) ) = = " f " ) ;
}
{
BSONObjBuilder b ;
2010-03-30 02:17:10 +02:00
b . appendRegex ( " r " , " \\ Af " , " mi " ) ;
2010-01-15 02:57:50 +01:00
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " " ) ;
}
2010-01-15 04:47:34 +01:00
{
BSONObjBuilder b ;
2010-03-30 02:17:10 +02:00
b . appendRegex ( " r " , " \\ Af \t \v o \n \r o \\ \\ # #comment " , " mx " ) ;
2010-01-15 04:47:34 +01:00
BSONObj o = b . done ( ) ;
assert ( simpleRegex ( o . firstElement ( ) ) = = " foo # " ) ;
}
2010-01-15 02:57:50 +01:00
}
} simple_regex_unittest ;
2010-07-22 15:51:22 +02:00
long long applySkipLimit ( long long num , const BSONObj & cmd ) {
BSONElement s = cmd [ " skip " ] ;
BSONElement l = cmd [ " limit " ] ;
if ( s . isNumber ( ) ) {
num = num - s . numberLong ( ) ;
if ( num < 0 ) {
num = 0 ;
}
}
if ( l . isNumber ( ) ) {
long long limit = l . numberLong ( ) ;
if ( limit < num ) {
num = limit ;
}
}
return num ;
}
2010-07-23 22:06:06 +02:00
string debugString ( Message & m ) {
stringstream ss ;
ss < < " op: " < < opToString ( m . operation ( ) ) < < " len: " < < m . size ( ) ;
if ( m . operation ( ) > = 2000 & & m . operation ( ) < 2100 ) {
DbMessage d ( m ) ;
ss < < " ns: " < < d . getns ( ) ;
switch ( m . operation ( ) ) {
2010-07-23 23:15:01 +02:00
case dbUpdate : {
int flags = d . pullInt ( ) ;
BSONObj q = d . nextJsObj ( ) ;
ss < < " flags: " < < flags < < " query: " < < q ;
2010-07-23 22:06:06 +02:00
break ;
2010-07-23 23:15:01 +02:00
}
2010-07-23 22:06:06 +02:00
case dbInsert :
ss < < d . nextJsObj ( ) ;
break ;
2010-07-23 23:15:01 +02:00
case dbDelete : {
int flags = d . pullInt ( ) ;
BSONObj q = d . nextJsObj ( ) ;
ss < < " flags: " < < flags < < " query: " < < q ;
2010-07-23 22:06:06 +02:00
break ;
2010-07-23 23:15:01 +02:00
}
2010-07-23 22:06:06 +02:00
default :
ss < < " CANNOT HANDLE YET " ;
}
}
return ss . str ( ) ;
}
2010-07-22 15:51:22 +02:00
2009-02-24 20:14:45 +01:00
} // namespace mongo