0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 01:21:03 +01:00
mongodb/db/key.cpp
2011-07-05 11:17:05 -04:00

651 lines
20 KiB
C++

// @file key.cpp
/**
* Copyright (C) 2011 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "pch.h"
#include "key.h"
#include "../util/unittest.h"
namespace mongo {
extern const Ordering nullOrdering = Ordering::make(BSONObj());
// KeyBson is for V0 (version #0) indexes
int oldCompare(const BSONObj& l,const BSONObj& r, const Ordering &o);
// "old" = pre signed dates & such; i.e. btree V0
/* must be same canon type when called */
int oldCompareElementValues(const BSONElement& l, const BSONElement& r) {
dassert( l.canonicalType() == r.canonicalType() );
int f;
double x;
switch ( l.type() ) {
case EOO:
case Undefined: // EOO and Undefined are same canonicalType
case jstNULL:
case MaxKey:
case MinKey:
return 0;
case Bool:
return *l.value() - *r.value();
case Timestamp:
case Date:
// unsigned dates for old version
if ( l.date() < r.date() )
return -1;
return l.date() == r.date() ? 0 : 1;
case NumberLong:
if( r.type() == NumberLong ) {
long long L = l._numberLong();
long long R = r._numberLong();
if( L < R ) return -1;
if( L == R ) return 0;
return 1;
}
// else fall through
case NumberInt:
case NumberDouble: {
double left = l.number();
double right = r.number();
bool lNan = !( left <= numeric_limits< double >::max() &&
left >= -numeric_limits< double >::max() );
bool rNan = !( right <= numeric_limits< double >::max() &&
right >= -numeric_limits< double >::max() );
if ( lNan ) {
if ( rNan ) {
return 0;
}
else {
return -1;
}
}
else if ( rNan ) {
return 1;
}
x = left - right;
if ( x < 0 ) return -1;
return x == 0 ? 0 : 1;
}
case jstOID:
return memcmp(l.value(), r.value(), 12);
case Code:
case Symbol:
case String:
// nulls not allowed in the middle of strings in the old version
return strcmp(l.valuestr(), r.valuestr());
case Object:
case Array:
return oldCompare(l.embeddedObject(), r.embeddedObject(), nullOrdering);
case DBRef: {
int lsz = l.valuesize();
int rsz = r.valuesize();
if ( lsz - rsz != 0 ) return lsz - rsz;
return memcmp(l.value(), r.value(), lsz);
}
case BinData: {
int lsz = l.objsize(); // our bin data size in bytes, not including the subtype byte
int rsz = r.objsize();
if ( lsz - rsz != 0 ) return lsz - rsz;
return memcmp(l.value()+4, r.value()+4, lsz+1);
}
case RegEx: {
int c = strcmp(l.regex(), r.regex());
if ( c )
return c;
return strcmp(l.regexFlags(), r.regexFlags());
}
case CodeWScope : {
f = l.canonicalType() - r.canonicalType();
if ( f )
return f;
f = strcmp( l.codeWScopeCode() , r.codeWScopeCode() );
if ( f )
return f;
f = strcmp( l.codeWScopeScopeData() , r.codeWScopeScopeData() );
if ( f )
return f;
return 0;
}
default:
out() << "oldCompareElementValues: bad type " << (int) l.type() << endl;
assert(false);
}
return -1;
}
int oldElemCompare(const BSONElement&l , const BSONElement& r) {
int lt = (int) l.canonicalType();
int rt = (int) r.canonicalType();
int x = lt - rt;
if( x )
return x;
return oldCompareElementValues(l, r);
}
// pre signed dates & such
int oldCompare(const BSONObj& l,const BSONObj& r, const Ordering &o) {
BSONObjIterator i(l);
BSONObjIterator j(r);
unsigned mask = 1;
while ( 1 ) {
// so far, equal...
BSONElement l = i.next();
BSONElement r = j.next();
if ( l.eoo() )
return r.eoo() ? 0 : -1;
if ( r.eoo() )
return 1;
int x;
{
x = oldElemCompare(l, r);
if( o.descending(mask) )
x = -x;
}
if ( x != 0 )
return x;
mask <<= 1;
}
return -1;
}
/* old style compares:
- dates are unsigned
- strings no nulls
*/
int KeyBson::woCompare(const KeyBson& r, const Ordering &o) const {
return oldCompare(_o, r._o, o);
}
// woEqual could be made faster than woCompare but this is for backward compatibility so not worth a big effort
bool KeyBson::woEqual(const KeyBson& r) const {
return oldCompare(_o, r._o, nullOrdering) == 0;
}
// [ ][HASMORE][x][y][canontype_4bits]
enum CanonicalsEtc {
cminkey=1,
cnull=2,
cdouble=4,
cstring=6,
cbindata=7,
coid=8,
cfalse=10,
ctrue=11,
cdate=12,
cmaxkey=14,
cCANONTYPEMASK = 0xf,
cY = 0x10,
cint = cY | cdouble,
cX = 0x20,
clong = cX | cdouble,
cHASMORE = 0x40,
cNOTUSED = 0x80 // but see IsBSON sentinel - this bit not usable without great care
};
// bindata bson type
const unsigned BinDataLenMask = 0xf0; // lengths are powers of 2 of this value
const unsigned BinDataTypeMask = 0x0f; // 0-7 as you would expect, 8-15 are 128+value. see BinDataType.
const int BinDataLenMax = 32;
const int BinDataLengthToCode[] = {
0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
0x80, -1/*9*/, 0x90/*10*/, -1/*11*/, 0xa0/*12*/, -1/*13*/, 0xb0/*14*/, -1/*15*/,
0xc0/*16*/, -1, -1, -1, 0xd0/*20*/, -1, -1, -1,
0xe0/*24*/, -1, -1, -1, -1, -1, -1, -1,
0xf0/*32*/
};
const int BinDataCodeToLength[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 32
};
int binDataCodeToLength(int codeByte) {
return BinDataCodeToLength[codeByte >> 4];
}
/** object cannot be represented in compact format. so store in traditional bson format
with a leading sentinel byte IsBSON to indicate it's in that format.
Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here
so that we don't have to do an extra malloc.
*/
void KeyV1Owned::traditional(const BSONObj& obj) {
b.reset();
b.appendUChar(IsBSON);
b.appendBuf(obj.objdata(), obj.objsize());
_keyData = (const unsigned char *) b.buf();
}
// fromBSON to Key format
KeyV1Owned::KeyV1Owned(const BSONObj& obj) {
BSONObj::iterator i(obj);
unsigned char bits = 0;
while( 1 ) {
BSONElement e = i.next();
if( i.more() )
bits |= cHASMORE;
switch( e.type() ) {
case MinKey:
b.appendUChar(cminkey|bits);
break;
case jstNULL:
b.appendUChar(cnull|bits);
break;
case MaxKey:
b.appendUChar(cmaxkey|bits);
break;
case Bool:
b.appendUChar( (e.boolean()?ctrue:cfalse) | bits );
break;
case jstOID:
b.appendUChar(coid|bits);
b.appendBuf(&e.__oid(), sizeof(OID));
break;
case BinData:
{
int t = e.binDataType();
// 0-7 and 0x80 to 0x87 are supported by Key
if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) {
int len;
const char * d = e.binData(len);
int code = BinDataLengthToCode[len];
if( code >= 0 ) {
if( t >= 128 )
t = (t-128) | 0x08;
dassert( (code&t) == 0 );
b.appendUChar( cbindata|bits );
b.appendUChar( code | t );
b.appendBuf(d, len);
break;
}
}
traditional(obj);
return;
}
case Date:
b.appendUChar(cdate|bits);
b.appendStruct(e.date());
break;
case String:
{
b.appendUChar(cstring|bits);
// note we do not store the terminating null, to save space.
unsigned x = (unsigned) e.valuestrsize() - 1;
if( x > 255 ) {
traditional(obj);
return;
}
b.appendUChar(x);
b.appendBuf(e.valuestr(), x);
break;
}
case NumberInt:
b.appendUChar(cint|bits);
b.appendNum((double) e._numberInt());
break;
case NumberLong:
{
long long n = e._numberLong();
double d = (double) n;
if( d != n ) {
traditional(obj);
return;
}
b.appendUChar(clong|bits);
b.appendNum(d);
break;
}
case NumberDouble:
{
double d = e._numberDouble();
if( isNaN(d) ) {
traditional(obj);
return;
}
b.appendUChar(cdouble|bits);
b.appendNum(d);
break;
}
default:
// if other types involved, store as traditional BSON
traditional(obj);
return;
}
if( !i.more() )
break;
bits = 0;
}
_keyData = (const unsigned char *) b.buf();
dassert( b.len() == dataSize() ); // check datasize method is correct
dassert( (*_keyData & cNOTUSED) == 0 );
}
BSONObj KeyV1::toBson() const {
if( !isCompactFormat() )
return bson();
BSONObjBuilder b(512);
const unsigned char *p = _keyData;
while( 1 ) {
unsigned bits = *p++;
switch( bits & 0x3f ) {
case cminkey: b.appendMinKey(""); break;
case cnull: b.appendNull(""); break;
case cfalse: b.appendBool("", false); break;
case ctrue: b.appendBool("", true); break;
case cmaxkey:
b.appendMaxKey("");
break;
case cstring:
{
unsigned sz = *p++;
// we build the element ourself as we have to null terminate it
BufBuilder &bb = b.bb();
bb.appendNum((char) String);
bb.appendUChar(0); // fieldname ""
bb.appendNum(sz+1);
bb.appendBuf(p, sz);
bb.appendUChar(0); // null char at end of string
p += sz;
break;
}
case coid:
b.appendOID("", (OID *) p);
p += sizeof(OID);
break;
case cbindata:
{
int len = binDataCodeToLength(*p);
int subtype = (*p) & BinDataTypeMask;
if( subtype & 0x8 ) {
subtype = (subtype & 0x7) | 0x80;
}
b.appendBinData("", len, (BinDataType) subtype, ++p);
p += len;
break;
}
case cdate:
b.appendDate("", (Date_t&) *p);
p += 8;
break;
case cdouble:
b.append("", (double&) *p);
p += sizeof(double);
break;
case cint:
b.append("", (int) ((double&) *p));
p += sizeof(double);
break;
case clong:
b.append("", (long long) ((double&) *p));
p += sizeof(double);
break;
default:
assert(false);
}
if( (bits & cHASMORE) == 0 )
break;
}
return b.obj();
}
static int compare(const unsigned char *&l, const unsigned char *&r) {
int lt = (*l & cCANONTYPEMASK);
int rt = (*r & cCANONTYPEMASK);
int x = lt - rt;
if( x )
return x;
l++; r++;
// same type
switch( lt ) {
case cdouble:
{
double L = *((double *) l);
double R = *((double *) r);
if( L < R )
return -1;
if( L != R )
return 1;
l += 8; r += 8;
break;
}
case cstring:
{
int lsz = *l;
int rsz = *r;
int common = min(lsz, rsz);
l++; r++; // skip the size byte
// use memcmp as we (will) allow zeros in UTF8 strings
int res = memcmp(l, r, common);
if( res )
return res;
// longer string is the greater one
int diff = lsz-rsz;
if( diff )
return diff;
l += lsz; r += lsz;
break;
}
case cbindata:
{
int L = *l;
int R = *r;
int diff = L-R; // checks length and subtype simultaneously
if( diff )
return diff;
// same length, same type
l++; r++;
int len = binDataCodeToLength(L);
int res = memcmp(l, r, len);
if( res )
return res;
l += len; r += len;
break;
}
case cdate:
{
long long L = *((long long *) l);
long long R = *((long long *) r);
if( L < R )
return -1;
if( L > R )
return 1;
l += 8; r += 8;
break;
}
case coid:
{
int res = memcmp(l, r, sizeof(OID));
if( res )
return res;
l += 12; r += 12;
break;
}
default:
// all the others are a match -- e.g. null == null
;
}
return 0;
}
// at least one of this and right are traditional BSON format
int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const {
BSONObj L = toBson();
BSONObj R = right.toBson();
return L.woCompare(R, order, /*considerfieldname*/false);
}
int KeyV1::woCompare(const KeyV1& right, const Ordering &order) const {
const unsigned char *l = _keyData;
const unsigned char *r = right._keyData;
if( (*l|*r) == IsBSON ) // only can do this if cNOTUSED maintained
return compareHybrid(right, order);
unsigned mask = 1;
while( 1 ) {
char lval = *l;
char rval = *r;
{
int x = compare(l, r); // updates l and r pointers
if( x ) {
if( order.descending(mask) )
x = -x;
return x;
}
}
{
int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE));
if( x )
return x;
if( (lval & cHASMORE) == 0 )
break;
}
mask <<= 1;
}
return 0;
}
static unsigned sizes[] = {
0,
1, //cminkey=1,
1, //cnull=2,
0,
9, //cdouble=4,
0,
0, //cstring=6,
0,
13, //coid=8,
0,
1, //cfalse=10,
1, //ctrue=11,
9, //cdate=12,
0,
1, //cmaxkey=14,
0
};
inline unsigned sizeOfElement(const unsigned char *p) {
unsigned type = *p & cCANONTYPEMASK;
unsigned sz = sizes[type];
if( sz == 0 ) {
if( type == cstring ) {
sz = ((unsigned) p[1]) + 2;
}
else {
assert( type == cbindata );
sz = binDataCodeToLength(p[1]) + 2;
}
}
return sz;
}
int KeyV1::dataSize() const {
const unsigned char *p = _keyData;
if( !isCompactFormat() ) {
return bson().objsize() + 1;
}
bool more;
do {
unsigned z = sizeOfElement(p);
more = (*p & cHASMORE) != 0;
p += z;
} while( more );
return p - _keyData;
}
bool KeyV1::woEqual(const KeyV1& right) const {
const unsigned char *l = _keyData;
const unsigned char *r = right._keyData;
if( (*l|*r) == IsBSON ) {
return toBson().equal(right.toBson());
}
while( 1 ) {
char lval = *l;
char rval = *r;
if( (lval&(cCANONTYPEMASK|cHASMORE)) != (rval&(cCANONTYPEMASK|cHASMORE)) )
return false;
l++; r++;
switch( lval&cCANONTYPEMASK ) {
case coid:
if( *((unsigned*) l) != *((unsigned*) r) )
return false;
l += 4; r += 4;
case cdate:
case cdouble:
if( *((unsigned long long *) l) != *((unsigned long long *) r) )
return false;
l += 8; r += 8;
break;
case cstring:
{
unsigned sz = ((unsigned) *l) + 1;
if( memcmp(l, r, sz) ) // first byte checked is the length byte
return false;
l += sz; r += sz;
break;
}
case cbindata:
{
int len = binDataCodeToLength(*l) + 1;
if( memcmp(l, r, len) )
return false;
l += len; r += len;
break;
}
case cminkey:
case cnull:
case cfalse:
case ctrue:
case cmaxkey:
break;
default:
assert(false);
}
if( (lval&cHASMORE) == 0 )
break;
}
return true;
}
struct CmpUnitTest : public UnitTest {
void run() {
char a[2];
char b[2];
a[0] = -3;
a[1] = 0;
b[0] = 3;
b[1] = 0;
assert( strcmp(a,b)>0 && memcmp(a,b,2)>0 );
}
} cunittest;
}