2008-06-06 15:43:15 +02:00
|
|
|
// btree.h
|
|
|
|
|
2008-07-20 23:37:33 +02:00
|
|
|
/**
|
|
|
|
* Copyright (C) 2008 10gen Inc.
|
2008-12-29 02:28:49 +01:00
|
|
|
*
|
2008-07-20 23:37:33 +02:00
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU Affero General Public License, version 3,
|
|
|
|
* as published by the Free Software Foundation.
|
2008-12-29 02:28:49 +01:00
|
|
|
*
|
2008-07-20 23:37:33 +02:00
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Affero General Public License for more details.
|
2008-12-29 02:28:49 +01:00
|
|
|
*
|
2008-07-20 23:37:33 +02:00
|
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2008-06-06 15:43:15 +02:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "../stdafx.h"
|
|
|
|
#include "jsobj.h"
|
|
|
|
#include "storage.h"
|
|
|
|
#include "pdfile.h"
|
|
|
|
|
2009-01-14 23:09:51 +01:00
|
|
|
namespace mongo {
|
|
|
|
|
2008-08-11 20:53:08 +02:00
|
|
|
#pragma pack(push,1)
|
2008-06-06 15:43:15 +02:00
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
struct _KeyNode {
|
|
|
|
DiskLoc prevChildBucket;
|
|
|
|
DiskLoc recordLoc;
|
|
|
|
short keyDataOfs() {
|
|
|
|
return (short) _kdo;
|
|
|
|
}
|
|
|
|
unsigned short _kdo;
|
|
|
|
void setKeyDataOfs(short s) {
|
|
|
|
_kdo = s;
|
|
|
|
assert(s>=0);
|
|
|
|
}
|
|
|
|
void setKeyDataOfsSavingUse(short s) {
|
|
|
|
_kdo = s;
|
|
|
|
assert(s>=0);
|
|
|
|
}
|
|
|
|
void setUnused() {
|
|
|
|
/* Setting ofs to odd is the sentinel for unused, as real recordLoc's are always
|
|
|
|
even numbers.
|
|
|
|
Note we need to keep its value basically the same as we use the recordLoc
|
|
|
|
as part of the key in the index (to handle duplicate keys efficiently).
|
|
|
|
*/
|
|
|
|
recordLoc.GETOFS() |= 1;
|
|
|
|
}
|
|
|
|
int isUnused() {
|
|
|
|
return recordLoc.getOfs() & 1;
|
|
|
|
}
|
|
|
|
int isUsed() {
|
|
|
|
return !isUnused();
|
|
|
|
}
|
|
|
|
};
|
2008-06-06 15:43:15 +02:00
|
|
|
|
|
|
|
#pragma pack(pop)
|
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
class BucketBasics;
|
2008-06-06 15:43:15 +02:00
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
/* wrapper - this is our in memory representation of the key. _KeyNode is the disk representation. */
|
|
|
|
class KeyNode {
|
|
|
|
public:
|
|
|
|
KeyNode(BucketBasics& bb, _KeyNode &k);
|
|
|
|
DiskLoc& prevChildBucket;
|
|
|
|
DiskLoc& recordLoc;
|
|
|
|
BSONObj key;
|
|
|
|
};
|
2008-06-06 15:43:15 +02:00
|
|
|
|
2008-08-11 20:53:08 +02:00
|
|
|
#pragma pack(push,1)
|
2008-06-06 15:43:15 +02:00
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
/* this class is all about the storage management */
|
|
|
|
class BucketBasics {
|
|
|
|
friend class KeyNode;
|
|
|
|
public:
|
|
|
|
void dumpTree(DiskLoc thisLoc, const BSONObj &order);
|
|
|
|
bool isHead() {
|
|
|
|
return parent.isNull();
|
|
|
|
}
|
|
|
|
void assertValid(const BSONObj &order, bool force = false);
|
|
|
|
int fullValidate(const DiskLoc& thisLoc, const BSONObj &order); /* traverses everything */
|
|
|
|
protected:
|
|
|
|
DiskLoc& getChild(int pos) {
|
|
|
|
assert( pos >= 0 && pos <= n );
|
|
|
|
return pos == n ? nextChild : k(pos).prevChildBucket;
|
|
|
|
}
|
|
|
|
KeyNode keyNode(int i) {
|
|
|
|
assert( i < n );
|
|
|
|
return KeyNode(*this, k(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
char * dataAt(short ofs) {
|
|
|
|
return data + ofs;
|
|
|
|
}
|
|
|
|
|
|
|
|
void init(); // initialize a new node
|
|
|
|
|
|
|
|
/* returns false if node is full and must be split
|
|
|
|
keypos is where to insert -- inserted after that key #. so keypos=0 is the leftmost one.
|
|
|
|
*/
|
|
|
|
bool basicInsert(int keypos, const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order);
|
|
|
|
void pushBack(const DiskLoc& recordLoc, BSONObj& key, const BSONObj &order, DiskLoc prevChild);
|
|
|
|
void _delKeyAtPos(int keypos); // low level version that doesn't deal with child ptrs.
|
|
|
|
|
|
|
|
/* !Packed means there is deleted fragment space within the bucket.
|
|
|
|
We "repack" when we run out of space before considering the node
|
|
|
|
to be full.
|
|
|
|
*/
|
|
|
|
enum Flags { Packed=1 };
|
|
|
|
|
|
|
|
DiskLoc childForPos(int p) {
|
|
|
|
return p == n ? nextChild : k(p).prevChildBucket;
|
|
|
|
}
|
|
|
|
|
|
|
|
int totalDataSize() const;
|
|
|
|
void pack( const BSONObj &order );
|
|
|
|
void setNotPacked();
|
|
|
|
void setPacked();
|
|
|
|
int _alloc(int bytes);
|
|
|
|
void truncateTo(int N, const BSONObj &order);
|
|
|
|
void markUnused(int keypos);
|
|
|
|
public:
|
|
|
|
DiskLoc parent;
|
|
|
|
|
|
|
|
string bucketSummary() const {
|
|
|
|
stringstream ss;
|
|
|
|
ss << " Bucket info:" << endl;
|
|
|
|
ss << " n: " << n << endl;
|
|
|
|
ss << " parent: " << parent.toString() << endl;
|
|
|
|
ss << " nextChild: " << parent.toString() << endl;
|
|
|
|
ss << " Size: " << _Size << " flags:" << flags << endl;
|
|
|
|
ss << " emptySize: " << emptySize << " topSize: " << topSize << endl;
|
|
|
|
return ss.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
void _shape(int level, stringstream&);
|
|
|
|
DiskLoc nextChild; // child bucket off and to the right of the highest key.
|
|
|
|
int _Size; // total size of this btree node in bytes. constant.
|
|
|
|
int Size() const;
|
|
|
|
int flags;
|
|
|
|
int emptySize; // size of the empty region
|
|
|
|
int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom')
|
|
|
|
int n; // # of keys so far.
|
|
|
|
int reserved;
|
|
|
|
_KeyNode& k(int i) {
|
|
|
|
return ((_KeyNode*)data)[i];
|
|
|
|
}
|
|
|
|
char data[4];
|
|
|
|
};
|
|
|
|
|
|
|
|
class BtreeBucket : public BucketBasics {
|
|
|
|
friend class BtreeCursor;
|
|
|
|
public:
|
|
|
|
void dump();
|
|
|
|
|
|
|
|
static DiskLoc addHead(IndexDetails&); /* start a new index off, empty */
|
2009-01-26 23:23:45 +01:00
|
|
|
|
|
|
|
int bt_insert(DiskLoc thisLoc, DiskLoc recordLoc,
|
2009-01-15 16:17:11 +01:00
|
|
|
BSONObj& key, const BSONObj &order, bool dupsAllowed,
|
2009-01-26 23:23:45 +01:00
|
|
|
IndexDetails& idx, bool toplevel = true);
|
2009-01-15 16:17:11 +01:00
|
|
|
|
|
|
|
bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc);
|
|
|
|
|
|
|
|
/* locate may return an "unused" key that is just a marker. so be careful.
|
|
|
|
looks for a key:recordloc pair.
|
|
|
|
*/
|
|
|
|
DiskLoc locate(const DiskLoc& thisLoc, BSONObj& key, const BSONObj &order, int& pos, bool& found, DiskLoc recordLoc, int direction=1);
|
|
|
|
|
|
|
|
/* advance one key position in the index: */
|
|
|
|
DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller);
|
|
|
|
DiskLoc getHead(const DiskLoc& thisLoc);
|
|
|
|
|
|
|
|
/* get tree shape */
|
|
|
|
void shape(stringstream&);
|
|
|
|
private:
|
|
|
|
void fixParentPtrs(const DiskLoc& thisLoc);
|
|
|
|
void delBucket(const DiskLoc& thisLoc, IndexDetails&);
|
|
|
|
void delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p);
|
|
|
|
BSONObj keyAt(int keyOfs) {
|
|
|
|
return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key;
|
|
|
|
}
|
|
|
|
static BtreeBucket* allocTemp(); /* caller must release with free() */
|
|
|
|
void insertHere(DiskLoc thisLoc, int keypos,
|
|
|
|
DiskLoc recordLoc, BSONObj& key, const BSONObj &order,
|
|
|
|
DiskLoc lchild, DiskLoc rchild, IndexDetails&);
|
|
|
|
int _insert(DiskLoc thisLoc, DiskLoc recordLoc,
|
|
|
|
BSONObj& key, const BSONObj &order, bool dupsAllowed,
|
|
|
|
DiskLoc lChild, DiskLoc rChild, IndexDetails&);
|
2009-01-26 23:23:45 +01:00
|
|
|
bool find(BSONObj& key, DiskLoc recordLoc, const BSONObj &order, int& pos, bool assertIfDup);
|
2009-01-15 16:17:11 +01:00
|
|
|
static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
|
|
|
|
};
|
|
|
|
|
|
|
|
class BtreeCursor : public Cursor {
|
|
|
|
friend class BtreeBucket;
|
|
|
|
BSONObj startKey;
|
|
|
|
BSONObj endKey;
|
2008-11-18 21:47:37 +01:00
|
|
|
// BSONObj query; // the query we are working on in association with the cursor -- see noMoreMatches()
|
2009-01-15 16:17:11 +01:00
|
|
|
public:
|
2009-01-20 22:54:57 +01:00
|
|
|
BtreeCursor(IndexDetails&, const BSONObj& startKey, int direction, const BSONObj& query);
|
2009-01-15 16:17:11 +01:00
|
|
|
virtual bool ok() {
|
|
|
|
return !bucket.isNull();
|
|
|
|
}
|
|
|
|
bool eof() {
|
|
|
|
return !ok();
|
|
|
|
}
|
|
|
|
virtual bool advance();
|
|
|
|
|
|
|
|
virtual void noteLocation(); // updates keyAtKeyOfs...
|
|
|
|
virtual void checkLocation();
|
|
|
|
|
|
|
|
_KeyNode& _currKeyNode() {
|
|
|
|
assert( !bucket.isNull() );
|
|
|
|
_KeyNode& kn = bucket.btree()->k(keyOfs);
|
|
|
|
assert( kn.isUsed() );
|
|
|
|
return kn;
|
|
|
|
}
|
|
|
|
KeyNode currKeyNode() {
|
|
|
|
assert( !bucket.isNull() );
|
|
|
|
return bucket.btree()->keyNode(keyOfs);
|
|
|
|
}
|
|
|
|
BSONObj currKey() {
|
|
|
|
return currKeyNode().key;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual BSONObj indexKeyPattern() {
|
|
|
|
return indexDetails.keyPattern();
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void aboutToDeleteBucket(const DiskLoc& b) {
|
|
|
|
if ( bucket == b )
|
|
|
|
keyOfs = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual DiskLoc currLoc() {
|
|
|
|
return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc();
|
|
|
|
}
|
|
|
|
virtual Record* _current() {
|
|
|
|
return currLoc().rec();
|
|
|
|
}
|
|
|
|
virtual BSONObj current() {
|
|
|
|
return BSONObj(_current());
|
|
|
|
}
|
|
|
|
virtual string toString() {
|
|
|
|
string s = string("BtreeCursor ") + indexDetails.indexName();
|
|
|
|
if ( direction < 0 ) s += " reverse";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2009-01-15 19:04:25 +01:00
|
|
|
virtual BSONObj prettyStartKey() const {
|
|
|
|
vector< string > fieldNames;
|
|
|
|
getIndexFields( fieldNames );
|
|
|
|
return startKey.replaceFieldNames( fieldNames ).clientReadable();
|
|
|
|
}
|
|
|
|
virtual BSONObj prettyEndKey() const {
|
|
|
|
vector< string > fieldNames;
|
|
|
|
getIndexFields( fieldNames );
|
|
|
|
return endKey.replaceFieldNames( fieldNames ).clientReadable();
|
|
|
|
}
|
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
private:
|
2009-01-27 21:35:28 +01:00
|
|
|
/* set startKey and endKey -- the bounding keys for the query range. */
|
2009-01-15 16:17:11 +01:00
|
|
|
void findExtremeKeys( const BSONObj &query );
|
2009-01-27 21:35:28 +01:00
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
void findExtremeInequalityValues( const BSONElement &e,
|
|
|
|
BSONElement &lowest,
|
|
|
|
BSONElement &highest );
|
2009-01-15 19:04:25 +01:00
|
|
|
void getIndexFields( vector< string > &fields ) const {
|
|
|
|
return getFields( indexDetails.keyPattern(), fields );
|
|
|
|
}
|
|
|
|
static void getFields( const BSONObj &key, vector< string > &fields );
|
2009-01-27 21:35:28 +01:00
|
|
|
|
|
|
|
/* Our btrees may (rarely) have "unused" keys when items are deleted.
|
|
|
|
Skip past them.
|
|
|
|
*/
|
|
|
|
void skipUnusedKeys();
|
|
|
|
|
|
|
|
/* Check if the current key is beyond endKey. */
|
2009-01-15 16:17:11 +01:00
|
|
|
void checkEnd();
|
2009-01-27 21:35:28 +01:00
|
|
|
|
2009-01-15 16:17:11 +01:00
|
|
|
IndexDetails& indexDetails;
|
|
|
|
BSONObj order;
|
|
|
|
DiskLoc bucket;
|
|
|
|
int keyOfs;
|
|
|
|
int direction; // 1=fwd,-1=reverse
|
|
|
|
BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call
|
|
|
|
DiskLoc locAtKeyOfs;
|
|
|
|
};
|
2008-06-06 15:43:15 +02:00
|
|
|
|
|
|
|
#pragma pack(pop)
|
2009-01-14 23:09:51 +01:00
|
|
|
|
|
|
|
} // namespace mongo;
|