/** @file jsobj.h */
BSONObj and its helpers
"BSON" stands for "binary JSON" -- ie a binary way to represent objects that would be
represented in JSON (plus a few extensions useful for databases & other languages).
* Copyright (C) 2008 10gen Inc.
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
#pragma once
#include "../stdafx.h"
#include "../util/builder.h"
#include "boost/utility.hpp"
namespace mongo {
class BSONObj;
class Record;
class BSONObjBuilder;
class BSONObjBuilderValueStream;
#pragma pack(1)
the complete list of valid BSON types
enum BSONType {
/** smaller than all other types */
/** end of object */
/** double precision floating point value */
/** character string, stored in utf8 */
/** an embedded object */
/** an embedded array */
/** binary data */
/** Undefined type */
/** ObjectId */
/** boolean type */
/** date type */
/** null type */
/** regular expression, a pattern with options */
/** deprecated / will be redesigned */
/** deprecated / use CodeWScope */
/** a programming language (e.g., Python) symbol */
/** javascript code that can execute on the database server, with context */
/** 32 bit signed integer */
NumberInt = 16,
/** Updated to a Date with value next OpTime on insert */
Timestamp = 17,
/** max type that is not MaxKey */
/** larger than all other types */
/* subtypes of BinData.
bdtCustom and above are ones that the JS compiler understands, but are
opaque to the database.
enum BinDataType { Function=1, ByteArray=2, bdtUUID = 3, MD5Type=5, bdtCustom=128 };
/** Object ID type.
BSON objects typically have an _id field for the object id. This field should be the first
member of the object when present. class OID is a special type that is a 12 byte id which
is likely to be unique to the system. You may also use other types for _id's.
When _id field is missing from a BSON object, on an insert the database may insert one
automatically in certain circumstances.
class OID {
union {
long long a;
unsigned char data[8];
unsigned b;
const unsigned char *getData() const { return data; }
bool operator==(const OID& r) {
return a==r.a&&b==r.b;
bool operator!=(const OID& r) {
return a!=r.a||b!=r.b;
/** The object ID output as 24 hex digits. */
string str() const {
stringstream s;
s << hex;
// s.fill( '0' );
// s.width( 2 );
// fill wasn't working so doing manually...
for( int i = 0; i < 8; i++ ) {
unsigned u = data[i];
if( u < 16 ) s << '0';
s << u;
const unsigned char * raw = (const unsigned char*)&b;
for( int i = 0; i < 4; i++ ) {
unsigned u = raw[i];
if( u < 16 ) s << '0';
s << u;
s.width( 16 );
s << a;
s.width( 8 );
s << b;
s << dec;
return s.str();
sets the contents to a new oid / randomized value
void init();
/** Set to the hex string value specified. */
void init( string s );
ostream& operator<<( ostream &s, const OID &o );
/** Formatting mode for generating JSON from BSON.
for details.
enum JsonStringFormat {
/** strict RFC format */
/** 10gen format, which is close to JS format. This form is understandable by
javascript running inside the Mongo server via eval() */
/** Javascript JSON compatible */
#pragma pack()
/* internals
-------- size() ------------
/** BSONElement represents an "element" in a BSONObj. So for the object { a : 3, b : "abc" },
'a : 3' is the first element (key+value).
The BSONElement object points into the BSONObj's data. Thus the BSONObj must stay in scope
for the life of the BSONElement.
class BSONElement {
friend class BSONObjIterator;
friend class BSONObj;
string toString() const;
operator string() const { return toString(); }
string jsonString( JsonStringFormat format, bool includeFieldNames = true ) const;
/** Returns the type of the element */
BSONType type() const {
return (BSONType) *data;
/** Indicates if it is the end-of-object element, which is present at the end of
every BSON object.
bool eoo() const {
return type() == EOO;
/** Size of the element.
@param maxLen If maxLen is specified, don't scan more than maxLen bytes to calculate size.
int size( int maxLen = -1 ) const;
/** Wrap this element up as a singleton object. */
BSONObj wrap() const;
/** field name of the element. e.g., for
name : "Joe"
"name" is the fieldname
const char * fieldName() const {
if ( eoo() ) return ""; // no fieldname for it.
return data + 1;
/** raw data of the element's value (so be careful). */
const char * value() const {
return (data + fieldNameSize() + 1);
/** size in bytes of the element's value (when applicable). */
int valuesize() const {
return size() - fieldNameSize() - 1;
bool isBoolean() const {
return type() == Bool;
/** @return value of a boolean element.
You must assure element is a boolean before
calling. */
bool boolean() const {
return *value() ? true : false;
/** Retrieve a java style date value from the element.
Ensure element is of type Date before calling.
unsigned long long date() const {
return *reinterpret_cast< const unsigned long long* >( value() );
/** True if element is of a numeric type. */
bool isNumber() const {
return type() == NumberDouble || type() == NumberInt;
/** Retrieve the numeric value of the element. If not of a numeric type, returns 0. */
double number() const {
switch( type() ) {
case NumberDouble:
return *reinterpret_cast< const double* >( value() );
case NumberInt:
return *reinterpret_cast< const int* >( value() );
return 0;
/** Retrieve the object ID stored in the object.
You must ensure the element is of type jstOID first. */
const OID &__oid() const {
return *reinterpret_cast< const OID* >( value() );
/** True if element is null. */
bool isNull() const {
return type() == jstNULL;
/** Size (length) of a string element.
You must assure of type String first. */
int valuestrsize() const {
return *reinterpret_cast< const int* >( value() );
// for objects the size *includes* the size of the size field
int objsize() const {
return *reinterpret_cast< const int* >( value() );
/** Get a string's value. Also gives you start of the real data for an embedded object.
You must assure data is of an appropriate type first -- see also valuestrsafe().
const char * valuestr() const {
return value() + 4;
/** Get the string value of the element. If not a string returns "". */
const char *valuestrsafe() const {
return type() == String ? valuestr() : "";
/** Get the string value of the element. If not a string returns "". */
string str() const { return valuestrsafe(); }
/** Get javascript code of a CodeWScope data element. */
const char * codeWScopeCode() const {
return value() + 8;
/** Get the scope context of a CodeWScope data element. */
const char * codeWScopeScopeData() const {
// TODO fix
return codeWScopeCode() + strlen( codeWScopeCode() ) + 1;
/** Get the embedded object this element holds. */
BSONObj embeddedObject() const;
/* uasserts if not an object */
BSONObj embeddedObjectUserCheck();
BSONObj codeWScopeObject() const;
/** Get binary data. Element must be of type BinData */
const char *binData(int& len) const {
// BinData:
assert( type() == BinData );
len = valuestrsize();
return value() + 5;
BinDataType binDataType() const {
// BinData:
assert( type() == BinData );
char c = (value() + 4)[0];
return (BinDataType)c;
/** Retrieve the regex string for a Regex element */
const char *regex() const {
assert(type() == RegEx);
return value();
const char *simpleRegex() const;
/** Retrieve the regex flags (options) for a Regex element */
const char *regexFlags() const {
const char *p = regex();
return p + strlen(p) + 1;
/** like operator== but doesn't check the fieldname,
just the value.
bool valuesEqual(const BSONElement& r) const {
if ( isNumber() )
return number() == r.number() && r.isNumber();
bool match= valuesize() == r.valuesize() &&
memcmp(value(),r.value(),valuesize()) == 0;
return match && type() == r.type();
// todo: make "0" == 0.0
/** Returns true if elements are equal. */
bool operator==(const BSONElement& r) const {
if ( strcmp(fieldName(), r.fieldName()) != 0 )
return false;
return valuesEqual(r);
/** Well ordered comparison.
@return <0: l0:l>r
order by type, field name, and field value.
If considerFieldName is true, pay attention to the field name.
int woCompare( const BSONElement &e, bool considerFieldName = true ) const;
const char * rawdata() const {
return data;
int getGtLtOp() const;
/** Constructs an empty element */
/** Check that data is internally consistent. */
void validate() const;
/** True if this element may contain subobjects. */
bool mayEncapsulate() const {
return type() == Object ||
type() == Array ||
type() == CodeWScope;
unsigned long long timestampTime() const{
unsigned long long t = ((unsigned int*)(value() + 4 ))[0];
return t * 1000;
unsigned int timestampInc() const{
return ((unsigned int*)(value() ))[0];
// If maxLen is specified, don't scan more than maxLen bytes.
BSONElement(const char *d, int maxLen = -1) : data(d) {
fieldNameSize_ = -1;
if ( eoo() )
fieldNameSize_ = 0;
else {
if ( maxLen != -1 ) {
int size = strnlen( fieldName(), maxLen - 1 );
massert( "Invalid field name", size != -1 );
fieldNameSize_ = size + 1;
totalSize = -1;
const char *data;
mutable int fieldNameSize_; // cached value
int fieldNameSize() const {
if ( fieldNameSize_ == -1 )
fieldNameSize_ = strlen( fieldName() ) + 1;
return fieldNameSize_;
mutable int totalSize; /* caches the computed size */
/* l and r MUST have same type when called: check that first. */
int compareElementValues(const BSONElement& l, const BSONElement& r);
int getGtLtOp(const BSONElement& e);
/* compare values with type check.
note: as is now, not smart about int/double comingling. TODO
inline int compareValues(const BSONElement& l, const BSONElement& r)
int x = (int) l.type() - (int) r.type();
if( x ) return x;
return compareElementValues(l,r);
C++ representation of a "BSON" object -- that is, an extended JSON-style
object in a binary representation.
Note that BSONObj's have a smart pointer capability built in -- so you can
pass them around by value. The reference counts used to implement this
do not use locking, so copying and destroying BSONObj's are not thread-safe
BSON object format:
{}* EOO
totalSize includes itself.
EOO: nothing follows
Undefined: nothing follows
OID: an OID object
Date: <8bytes>
Object: a nested object, leading with its entire size, which terminates with EOO.
Array: same as object
DBRef: a database reference: basically a collection name plus an Object ID
Code: a function (not a closure): same format as String.
Symbol: a language symbol (say a python symbol). same format as String.
Code With Scope: