// @file key.cpp /** * Copyright (C) 2011 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "pch.h" #include "key.h" #include "../util/unittest.h" namespace mongo { // [ ][HASMORE][x][y][canontype_4bits] enum CanonicalsEtc { cminkey=1, cnull=2, cdouble=4, cstring=6, cbindata=7, coid=8, cfalse=10, ctrue=11, cdate=12, cmaxkey=14, cCANONTYPEMASK = 0xf, cY = 0x10, cint = cY | cdouble, cX = 0x20, clong = cX | cdouble, cHASMORE = 0x40, cNOTUSED = 0x80 // but see IsBSON sentinel - this bit not usable without great care }; // bindata bson type const unsigned BinDataLenMask = 0xf0; // lengths are powers of 2 of this value const unsigned BinDataTypeMask = 0x0f; // 0-7 as you would expect, 8-15 are 128+value. see BinDataType. const int BinDataLenMax = 32; const int BinDataLengthToCode[] = { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, -1/*9*/, 0x90/*10*/, -1/*11*/, 0xa0/*12*/, -1/*13*/, 0xb0/*14*/, -1/*15*/, 0xc0/*16*/, -1, -1, -1, 0xd0/*20*/, -1, -1, -1, 0xe0/*24*/, -1, -1, -1, -1, -1, -1, -1, 0xf0/*32*/ }; const int BinDataCodeToLength[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 32 }; int binDataCodeToLength(int codeByte) { return BinDataCodeToLength[codeByte >> 4]; } /** object cannot be represented in compact format. so store in traditional bson format with a leading sentinel byte IsBSON to indicate it's in that format. Given that the KeyV1Owned constructor already grabbed a bufbuilder, we reuse it here so that we don't have to do an extra malloc. */ void KeyV1Owned::traditional(const BSONObj& obj) { b.reset(); b.appendUChar(IsBSON); b.appendBuf(obj.objdata(), obj.objsize()); _keyData = (const unsigned char *) b.buf(); } // fromBSON to Key format KeyV1Owned::KeyV1Owned(const BSONObj& obj) { BSONObj::iterator i(obj); assert( i.more() ); unsigned char bits = 0; while( 1 ) { BSONElement e = i.next(); if( i.more() ) bits |= cHASMORE; switch( e.type() ) { case MinKey: b.appendUChar(cminkey|bits); break; case jstNULL: b.appendUChar(cnull|bits); break; case MaxKey: b.appendUChar(cmaxkey|bits); break; case Bool: b.appendUChar( (e.boolean()?ctrue:cfalse) | bits ); break; case jstOID: b.appendUChar(coid|bits); b.appendBuf(&e.__oid(), sizeof(OID)); break; case BinData: { int t = e.binDataType(); // 0-7 and 0x80 to 0x87 are supported by Key if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) { int len; const char * d = e.binData(len); int code = BinDataLengthToCode[len]; if( code >= 0 ) { if( t >= 128 ) t = (t-128) | 0x08; dassert( (code&t) == 0 ); b.appendUChar( cbindata|bits ); b.appendUChar( code | t ); b.appendBuf(d, len); break; } } traditional(obj); return; } case Date: b.appendUChar(cdate|bits); b.appendStruct(e.date()); break; case String: { b.appendUChar(cstring|bits); // note we do not store the terminating null, to save space. unsigned x = (unsigned) e.valuestrsize() - 1; if( x > 255 ) { traditional(obj); return; } b.appendUChar(x); b.appendBuf(e.valuestr(), x); break; } case NumberInt: b.appendUChar(cint|bits); b.appendNum((double) e._numberInt()); break; case NumberLong: { long long n = e._numberLong(); double d = (double) n; if( d != n ) { traditional(obj); return; } b.appendUChar(clong|bits); b.appendNum(d); break; } case NumberDouble: { double d = e._numberDouble(); bool nan = !( d <= numeric_limits< double >::max() && d >= -numeric_limits< double >::max() ); if( !nan ) { b.appendUChar(cdouble|bits); b.appendNum(d); break; } // else fall through and return a traditional BSON obj so our compressed keys need not check for nan } default: // if other types involved, store as traditional BSON traditional(obj); return; } if( !i.more() ) break; bits = 0; } _keyData = (const unsigned char *) b.buf(); dassert( b.len() == dataSize() ); // check datasize method is correct dassert( (*_keyData & cNOTUSED) == 0 ); } BSONObj KeyV1::toBson() const { if( !isCompactFormat() ) return bson(); BSONObjBuilder b(512); const unsigned char *p = _keyData; while( 1 ) { unsigned bits = *p++; switch( bits & 0x3f ) { case cminkey: b.appendMinKey(""); break; case cnull: b.appendNull(""); break; case cfalse: b.appendBool("", false); break; case ctrue: b.appendBool("", true); break; case cmaxkey: b.appendMaxKey(""); break; case cstring: { unsigned sz = *p++; // we build the element ourself as we have to null terminate it BufBuilder &bb = b.bb(); bb.appendNum((char) String); bb.appendUChar(0); // fieldname "" bb.appendNum(sz+1); bb.appendBuf(p, sz); bb.appendUChar(0); // null char at end of string p += sz; break; } case coid: b.appendOID("", (OID *) p); p += sizeof(OID); break; case cbindata: { int len = binDataCodeToLength(*p); int subtype = (*p) & BinDataTypeMask; if( subtype & 0x8 ) { subtype = (subtype & 0x7) | 0x80; } b.appendBinData("", len, (BinDataType) subtype, ++p); p += len; break; } case cdate: b.appendDate("", (Date_t&) *p); p += 8; break; case cdouble: b.append("", (double&) *p); p += sizeof(double); break; case cint: b.append("", (int) ((double&) *p)); p += sizeof(double); break; case clong: b.append("", (long long) ((double&) *p)); p += sizeof(double); break; default: assert(false); } if( (bits & cHASMORE) == 0 ) break; } return b.obj(); } static int compare(const unsigned char *&l, const unsigned char *&r) { int lt = (*l & cCANONTYPEMASK); int rt = (*r & cCANONTYPEMASK); int x = lt - rt; if( x ) return x; l++; r++; // same type switch( lt ) { case cdouble: { double L = *((double *) l); double R = *((double *) r); if( L < R ) return -1; if( L > R ) return 1; l += 8; r += 8; break; } case cstring: { int lsz = *l; int rsz = *r; int common = min(lsz, rsz); l++; r++; // skip the size byte // use memcmp as we (will) allow zeros in UTF8 strings int res = memcmp(l, r, common); if( res ) return res; // longer string is the greater one int diff = lsz-rsz; if( diff ) return diff; l += lsz; r += lsz; break; } case coid: { int res = memcmp(l, r, sizeof(OID)); if( res ) return res; l += 12; r += 12; break; } case cbindata: { int L = *l; int R = *r; int diff = L-R; // checks length and subtype simultaneously if( diff ) return diff; // same length, same type l++; r++; int len = binDataCodeToLength(L); int res = memcmp(l, r, len); if( res ) return res; l += len; r += len; break; } case cdate: { long long L = *((long long *) l); long long R = *((long long *) r); if( L < R ) return -1; if( L > R ) return 1; l += 8; r += 8; break; } default: // all the others are a match -- e.g. null == null ; } return 0; } // at least one of this and right are traditional BSON format int NOINLINE_DECL KeyV1::compareHybrid(const KeyV1& right, const Ordering& order) const { BSONObj L = toBson(); BSONObj R = right.toBson(); return L.woCompare(R, order, /*considerfieldname*/false); } int KeyV1::woCompare(const KeyV1& right, const Ordering &order) const { const unsigned char *l = _keyData; const unsigned char *r = right._keyData; if( (*l|*r) == IsBSON ) // only can do this if cNOTUSED maintained return compareHybrid(right, order); unsigned mask = 1; while( 1 ) { char lval = *l; char rval = *r; { int x = compare(l, r); // updates l and r pointers if( x ) { if( order.descending(mask) ) x = -x; return x; } } { int x = ((int)(lval & cHASMORE)) - ((int)(rval & cHASMORE)); if( x ) return x; if( (lval & cHASMORE) == 0 ) break; } mask <<= 1; } return 0; } static unsigned sizes[] = { 0, 1, //cminkey=1, 1, //cnull=2, 0, 9, //cdouble=4, 0, 0, //cstring=6, 0, 13, //coid=8, 0, 1, //cfalse=10, 1, //ctrue=11, 9, //cdate=12, 0, 1, //cmaxkey=14, 0 }; inline unsigned sizeOfElement(const unsigned char *p) { unsigned type = *p & cCANONTYPEMASK; unsigned sz = sizes[type]; if( sz == 0 ) { if( type == cstring ) { sz = ((unsigned) p[1]) + 2; } else { assert( type == cbindata ); sz = binDataCodeToLength(p[1]) + 2; } } return sz; } int KeyV1::dataSize() const { const unsigned char *p = _keyData; if( !isCompactFormat() ) { return bson().objsize() + 1; } bool more; do { unsigned z = sizeOfElement(p); more = (*p & cHASMORE) != 0; p += z; } while( more ); return p - _keyData; } bool KeyV1::woEqual(const KeyV1& right) const { const unsigned char *l = _keyData; const unsigned char *r = right._keyData; if( (*l|*r) == IsBSON ) { return toBson().woEqual(right.toBson()); } while( 1 ) { char lval = *l; char rval = *r; if( (lval&(cCANONTYPEMASK|cHASMORE)) != (rval&(cCANONTYPEMASK|cHASMORE)) ) return false; l++; r++; switch( lval&cCANONTYPEMASK ) { case coid: if( *((unsigned*) l) != *((unsigned*) r) ) return false; l += 4; r += 4; case cdate: case cdouble: if( *((unsigned long long *) l) != *((unsigned long long *) r) ) return false; l += 8; r += 8; break; case cstring: { unsigned sz = ((unsigned) *l) + 1; if( memcmp(l, r, sz) ) // first byte checked is the length byte return false; l += sz; r += sz; break; } case cbindata: { int len = binDataCodeToLength(*l) + 1; if( memcmp(l, r, len) ) return false; l += len; r += len; break; } case cminkey: case cnull: case cfalse: case ctrue: case cmaxkey: break; default: assert(false); } if( (lval&cHASMORE) == 0 ) break; } return true; } struct CmpUnitTest : public UnitTest { void run() { char a[2]; char b[2]; a[0] = -3; a[1] = 0; b[0] = 3; b[1] = 0; assert( strcmp(a,b)>0 && memcmp(a,b,2)>0 ); } } cunittest; }