mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
56 bit disklocs in btree v1
This commit is contained in:
parent
e95cab5429
commit
8e1b5c26ab
87
db/btree.cpp
87
db/btree.cpp
@ -28,6 +28,7 @@
|
||||
#include "stats/counters.h"
|
||||
#include "dur_commitjob.h"
|
||||
#include "btreebuilder.h"
|
||||
#include "../util/unittest.h"
|
||||
|
||||
namespace mongo {
|
||||
|
||||
@ -36,16 +37,11 @@ namespace mongo {
|
||||
|
||||
#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this );
|
||||
|
||||
_KeyNode& _KeyNode::writing() const {
|
||||
return *getDur().writing( const_cast< _KeyNode* >( this ) );
|
||||
template< class Loc >
|
||||
__KeyNode<Loc> & __KeyNode<Loc>::writing() const {
|
||||
return *getDur().writing( const_cast< __KeyNode<Loc> * >( this ) );
|
||||
}
|
||||
|
||||
template< class V >
|
||||
BucketBasics<V>::KeyNode::KeyNode(const BucketBasics<V>& bb, const _KeyNode &k) :
|
||||
prevChildBucket(k.prevChildBucket),
|
||||
recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
|
||||
{ }
|
||||
|
||||
// BucketBasics::lowWaterMark()
|
||||
//
|
||||
// We define this value as the maximum number of bytes such that, if we have
|
||||
@ -227,9 +223,9 @@ namespace mongo {
|
||||
}
|
||||
else if ( z == 0 ) {
|
||||
if ( !(k(i).recordLoc < k(i+1).recordLoc) ) {
|
||||
out() << "ERROR: btree key order corrupt (recordloc's wrong). Keys:" << endl;
|
||||
out() << " k(" << i << "):" << keyNode(i).key.toString() << " RL:" << k(i).recordLoc.toString() << endl;
|
||||
out() << " k(" << i+1 << "):" << keyNode(i+1).key.toString() << " RL:" << k(i+1).recordLoc.toString() << endl;
|
||||
out() << "ERROR: btree key order corrupt (recordloc's wrong):" << endl;
|
||||
out() << " k(" << i << ")" << keyNode(i).key.toString() << " RL:" << k(i).recordLoc.toString() << endl;
|
||||
out() << " k(" << i+1 << ")" << keyNode(i+1).key.toString() << " RL:" << k(i+1).recordLoc.toString() << endl;
|
||||
wassert( k(i).recordLoc < k(i+1).recordLoc );
|
||||
}
|
||||
}
|
||||
@ -734,8 +730,10 @@ namespace mongo {
|
||||
* note result might be an Unused location!
|
||||
*/
|
||||
template< class V >
|
||||
bool BtreeBucket<V>::find(const IndexDetails& idx, const Key& key, const DiskLoc &recordLoc,
|
||||
bool BtreeBucket<V>::find(const IndexDetails& idx, const Key& key, const DiskLoc &rl,
|
||||
const Ordering &order, int& pos, bool assertIfDup) const {
|
||||
Loc recordLoc;
|
||||
recordLoc = rl;
|
||||
globalIndexCounters.btree( (char*)this );
|
||||
|
||||
// binary search for this key
|
||||
@ -744,7 +742,7 @@ namespace mongo {
|
||||
int h=this->n-1;
|
||||
while ( l <= h ) {
|
||||
int m = (l+h)/2;
|
||||
KeyNode M = this->keyNode(m);
|
||||
KeyNode M = this->keyNode(m);
|
||||
int x = key.woCompare(M.key, order);
|
||||
if ( x == 0 ) {
|
||||
if( assertIfDup ) {
|
||||
@ -770,7 +768,7 @@ namespace mongo {
|
||||
}
|
||||
|
||||
// dup keys allowed. use recordLoc as if it is part of the key
|
||||
DiskLoc unusedRL = M.recordLoc;
|
||||
Loc unusedRL = M.recordLoc;
|
||||
unusedRL.GETOFS() &= ~1; // so we can test equality without the used bit messing us up
|
||||
x = recordLoc.compare(unusedRL);
|
||||
}
|
||||
@ -914,8 +912,10 @@ namespace mongo {
|
||||
advanceLoc.btreemod<V>()->delKeyAtPos( advanceLoc, id, advanceKeyOfs, order );
|
||||
}
|
||||
|
||||
#define BTREE(loc) (static_cast<DiskLoc>(loc).btree<V>())
|
||||
#define BTREEMOD(loc) (static_cast<DiskLoc>(loc).btreemod<V>())
|
||||
//#define BTREE(loc) (static_cast<DiskLoc>(loc).btree<V>())
|
||||
#define BTREE(loc) (loc.btree<V>())
|
||||
//#define BTREEMOD(loc) (static_cast<DiskLoc>(loc).btreemod<V>())
|
||||
#define BTREEMOD(loc) (loc.btreemod<V>())
|
||||
|
||||
template< class V >
|
||||
void BtreeBucket<V>::replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id ) {
|
||||
@ -929,7 +929,9 @@ namespace mongo {
|
||||
ll.btree<V>()->childForPos( indexInParent( thisLoc ) ).writing() = this->nextChild;
|
||||
}
|
||||
BTREE(this->nextChild)->parent.writing() = this->parent;
|
||||
(static_cast<DiskLoc>(this->nextChild).btree<V>())->parent.writing() = this->parent;
|
||||
|
||||
(this->nextChild.btree<V>())->parent.writing() = this->parent;
|
||||
//(static_cast<DiskLoc>(this->nextChild).btree<V>())->parent.writing() = this->parent;
|
||||
ClientCursor::informAboutToDeleteBucket( thisLoc );
|
||||
deallocBucket( thisLoc, id );
|
||||
}
|
||||
@ -1214,15 +1216,13 @@ namespace mongo {
|
||||
bool BtreeBucket<V>::unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc ) const {
|
||||
int pos;
|
||||
bool found;
|
||||
DiskLoc loc = locate(id, thisLoc, key, Ordering::make(id.keyPattern()), pos, found, recordLoc, 1);
|
||||
const Ordering ord = Ordering::make(id.keyPattern());
|
||||
DiskLoc loc = locate(id, thisLoc, key, ord, pos, found, recordLoc, 1);
|
||||
if ( found ) {
|
||||
|
||||
if ( key.objsize() > KeyMax ) {
|
||||
OCCASIONALLY problem() << "unindex: key too large to index but was found for " << id.indexNamespace() << " reIndex suggested" << endl;
|
||||
}
|
||||
|
||||
loc.btreemod<V>()->delKeyAtPos(loc, id, pos, Ordering::make(id.keyPattern()));
|
||||
|
||||
}
|
||||
loc.btreemod<V>()->delKeyAtPos(loc, id, pos, ord);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -1239,7 +1239,7 @@ namespace mongo {
|
||||
inline void BtreeBucket<V>::fix(const DiskLoc thisLoc, const DiskLoc child) {
|
||||
if ( !child.isNull() ) {
|
||||
if ( insert_debug )
|
||||
out() << " " << child.toString() << ".parent=" << thisLoc.toString() << endl;
|
||||
out() << " fix " << child.toString() << ".parent=" << thisLoc.toString() << endl;
|
||||
child.btree<V>()->parent.writing() = thisLoc;
|
||||
}
|
||||
}
|
||||
@ -1335,8 +1335,8 @@ namespace mongo {
|
||||
dump();
|
||||
assert(false);
|
||||
}
|
||||
const DiskLoc *pc = &k(keypos+1).prevChildBucket;
|
||||
*getDur().alreadyDeclared((DiskLoc*) pc) = rchild; // declared in basicInsert()
|
||||
const Loc *pc = &k(keypos+1).prevChildBucket;
|
||||
*getDur().alreadyDeclared( const_cast<Loc*>(pc) ) = rchild; // declared in basicInsert()
|
||||
if ( !rchild.isNull() )
|
||||
rchild.btree<V>()->parent.writing() = thisLoc;
|
||||
}
|
||||
@ -1513,7 +1513,7 @@ namespace mongo {
|
||||
DiskLoc child = this->childForPos(p);
|
||||
|
||||
if ( !child.isNull() ) {
|
||||
DiskLoc l = BTREE(child)->locate(idx, child, key, order, pos, found, recordLoc, direction);
|
||||
DiskLoc l = BTREE(child)->locate(idx, child, key, order, pos, found, recordLoc, direction);
|
||||
if ( !l.isNull() )
|
||||
return l;
|
||||
}
|
||||
@ -1716,7 +1716,8 @@ namespace mongo {
|
||||
}
|
||||
|
||||
DEBUGGING out() << "TEMP: key: " << key.toString() << endl;
|
||||
DiskLoc child = this->childForPos(pos);
|
||||
Loc ch = this->childForPos(pos);
|
||||
DiskLoc child = ch;
|
||||
if ( insert_debug )
|
||||
out() << " getChild(" << pos << "): " << child.toString() << endl;
|
||||
// In current usage, rChild isNull() for a this->new key and false when we are
|
||||
@ -1841,20 +1842,17 @@ namespace mongo {
|
||||
|
||||
b->dumpTree(id.head, orderObj);
|
||||
|
||||
/* b->bt_insert(id.head, B, key, order, false, id);
|
||||
/* b->bt_insert(id.head, B, key, order, false, id);
|
||||
b->k(1).setUnused();
|
||||
|
||||
b->dumpTree(id.head, order);
|
||||
|
||||
b->bt_insert(id.head, A, key, order, false, id);
|
||||
|
||||
b->dumpTree(id.head, order);
|
||||
*/
|
||||
|
||||
// this should assert. does it? (it might "accidentally" though, not asserting proves a problem, asserting proves nothing)
|
||||
b->bt_insert(id.head, C, key, order, false, id);
|
||||
|
||||
// b->dumpTree(id.head, order);
|
||||
// b->dumpTree(id.head, order);
|
||||
}
|
||||
|
||||
template class BucketBasics<V0>;
|
||||
@ -1862,4 +1860,27 @@ namespace mongo {
|
||||
template class BtreeBucket<V0>;
|
||||
template class BtreeBucket<V1>;
|
||||
|
||||
struct BTUnitTest : public UnitTest {
|
||||
void run() {
|
||||
DiskLoc big(0xf12312, 0x70001234);
|
||||
DiskLoc56Bit bigl;
|
||||
{
|
||||
bigl = big;
|
||||
assert( big == bigl );
|
||||
DiskLoc e = bigl;
|
||||
assert( big == e );
|
||||
}
|
||||
{
|
||||
DiskLoc d;
|
||||
assert( d.isNull() );
|
||||
DiskLoc56Bit l;
|
||||
l = d;
|
||||
assert( l.isNull() );
|
||||
d = l;
|
||||
assert( d.isNull() );
|
||||
assert( l < bigl );
|
||||
}
|
||||
}
|
||||
} btunittest;
|
||||
|
||||
}
|
||||
|
137
db/btree.h
137
db/btree.h
@ -66,7 +66,7 @@ namespace mongo {
|
||||
const int OldBucketSize = 8192;
|
||||
|
||||
// largest key size we allow. note we very much need to support bigger keys (somehow) in the future.
|
||||
const int KeyMax = OldBucketSize / 10;\
|
||||
const int KeyMax = OldBucketSize / 10;
|
||||
|
||||
#pragma pack(1)
|
||||
template< class Version > class BucketBasics;
|
||||
@ -76,19 +76,19 @@ namespace mongo {
|
||||
* bucket. It contains an offset pointer to the variable width bson
|
||||
* data component. A _KeyNode may be 'unused', please see below.
|
||||
*/
|
||||
struct _KeyNode {
|
||||
template< class Loc >
|
||||
struct __KeyNode {
|
||||
/** Signals that we are writing this _KeyNode and casts away const */
|
||||
_KeyNode& writing() const;
|
||||
__KeyNode<Loc> & writing() const;
|
||||
/**
|
||||
* The 'left' child bucket of this key. If this is the i-th key, it
|
||||
* points to the i index child bucket.
|
||||
*/
|
||||
DiskLoc prevChildBucket;
|
||||
Loc prevChildBucket;
|
||||
/** The location of the record associated with this key. */
|
||||
DiskLoc recordLoc;
|
||||
short keyDataOfs() const {
|
||||
return (short) _kdo;
|
||||
}
|
||||
Loc recordLoc;
|
||||
short keyDataOfs() const { return (short) _kdo; }
|
||||
|
||||
/** Offset within current bucket of the variable width bson key for this _KeyNode. */
|
||||
unsigned short _kdo;
|
||||
void setKeyDataOfs(short s) {
|
||||
@ -175,17 +175,96 @@ namespace mongo {
|
||||
char data[4];
|
||||
|
||||
public:
|
||||
typedef __KeyNode<DiskLoc> _KeyNode;
|
||||
typedef DiskLoc Loc;
|
||||
typedef KeyBson Key;
|
||||
typedef KeyBson KeyOwned;
|
||||
enum { BucketSize = 8192 };
|
||||
};
|
||||
|
||||
// a a a ofs ofs ofs ofs
|
||||
class DiskLoc56Bit {
|
||||
int ofs;
|
||||
unsigned char _a[3];
|
||||
unsigned long long Z() const {
|
||||
// endian
|
||||
return *((unsigned long long*)this) & 0x00ffffffffffffffULL;
|
||||
}
|
||||
enum {
|
||||
// first bit of offsets used in _KeyNode we don't use -1 here.
|
||||
OurNullOfs = -2
|
||||
};
|
||||
public:
|
||||
template< class V >
|
||||
const BtreeBucket<V> * btree() const {
|
||||
return DiskLoc(*this).btree<V>();
|
||||
}
|
||||
template< class V >
|
||||
BtreeBucket<V> * btreemod() const {
|
||||
return DiskLoc(*this).btreemod<V>();
|
||||
}
|
||||
operator DiskLoc() const {
|
||||
// endian
|
||||
if( isNull() ) return DiskLoc();
|
||||
unsigned a = *((unsigned *) (_a-1));
|
||||
return DiskLoc(a >> 8, ofs);
|
||||
}
|
||||
int& GETOFS() { return ofs; }
|
||||
int getOfs() const { return ofs; }
|
||||
bool operator<(const DiskLoc56Bit& rhs) const {
|
||||
// the orderering of dup keys in btrees isn't too critical, but we'd like to put items that are
|
||||
// close together on disk close together in the tree, so we do want the file # to be the most significant
|
||||
// bytes
|
||||
return Z() < rhs.Z();
|
||||
}
|
||||
int compare(const DiskLoc56Bit& rhs) const {
|
||||
unsigned long long a = Z();
|
||||
unsigned long long b = rhs.Z();
|
||||
if( a < b ) return -1;
|
||||
return a == b ? 0 : 1;
|
||||
}
|
||||
bool operator==(const DiskLoc56Bit& rhs) const { return Z() == rhs.Z(); }
|
||||
bool operator!=(const DiskLoc56Bit& rhs) const { return Z() != rhs.Z(); }
|
||||
bool operator==(const DiskLoc& rhs) const {
|
||||
return DiskLoc(*this) == rhs;
|
||||
}
|
||||
bool operator!=(const DiskLoc& rhs) const { return !(*this==rhs); }
|
||||
bool isNull() const { return ofs < 0; }
|
||||
void Null() {
|
||||
ofs = OurNullOfs;
|
||||
_a[0] = _a[1] = _a[2] = 0;
|
||||
}
|
||||
string toString() const { return DiskLoc(*this).toString(); }
|
||||
void operator=(const DiskLoc& loc) {
|
||||
ofs = loc.getOfs();
|
||||
int la = loc.a();
|
||||
assert( la <= 0xffffff ); // must fit in 3 bytes
|
||||
if( la < 0 ) {
|
||||
assert( la == -1 );
|
||||
la = 0;
|
||||
ofs = OurNullOfs;
|
||||
}
|
||||
memcpy(_a, &la, 3); // endian
|
||||
dassert( ofs != 0 );
|
||||
}
|
||||
DiskLoc56Bit& writing() const {
|
||||
return *((DiskLoc56Bit*) getDur().writingPtr((void*)this, 7));
|
||||
}
|
||||
};
|
||||
|
||||
class BtreeData_V1 {
|
||||
public:
|
||||
typedef DiskLoc56Bit Loc;
|
||||
//typedef DiskLoc Loc;
|
||||
typedef __KeyNode<Loc> _KeyNode;
|
||||
typedef KeyV1 Key;
|
||||
typedef KeyV1Owned KeyOwned;
|
||||
enum { BucketSize = 8192-16 }; // leave room for Record header
|
||||
protected:
|
||||
/** Parent bucket of this bucket, which isNull() for the root bucket. */
|
||||
DiskLoc parent;
|
||||
Loc parent;
|
||||
/** Given that there are n keys, this is the n index child. */
|
||||
DiskLoc nextChild;
|
||||
Loc nextChild;
|
||||
|
||||
unsigned short flags;
|
||||
|
||||
@ -202,11 +281,6 @@ namespace mongo {
|
||||
char data[4];
|
||||
|
||||
void _init() { }
|
||||
|
||||
public:
|
||||
typedef KeyV1 Key;
|
||||
typedef KeyV1Owned KeyOwned;
|
||||
enum { BucketSize = 8192-16 }; // leave room for Record header
|
||||
};
|
||||
|
||||
typedef BtreeData_V0 V0;
|
||||
@ -244,6 +318,8 @@ namespace mongo {
|
||||
public:
|
||||
template <class U> friend class BtreeBuilder;
|
||||
typedef typename Version::Key Key;
|
||||
typedef typename Version::_KeyNode _KeyNode;
|
||||
typedef typename Version::Loc Loc;
|
||||
|
||||
int getN() const { return this->n; }
|
||||
|
||||
@ -256,8 +332,8 @@ namespace mongo {
|
||||
class KeyNode {
|
||||
public:
|
||||
KeyNode(const BucketBasics<Version>& bb, const _KeyNode &k);
|
||||
const DiskLoc& prevChildBucket;
|
||||
const DiskLoc& recordLoc;
|
||||
const Loc& prevChildBucket;
|
||||
const Loc& recordLoc;
|
||||
/* Points to the bson key storage for a _KeyNode */
|
||||
Key key;
|
||||
};
|
||||
@ -375,8 +451,8 @@ namespace mongo {
|
||||
enum Flags { Packed=1 };
|
||||
|
||||
/** n == 0 is ok */
|
||||
const DiskLoc& childForPos(int p) const { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
|
||||
DiskLoc& childForPos(int p) { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
|
||||
const Loc& childForPos(int p) const { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
|
||||
Loc& childForPos(int p) { return p == this->n ? this->nextChild : k(p).prevChildBucket; }
|
||||
|
||||
/** Same as bodySize(). */
|
||||
int totalDataSize() const;
|
||||
@ -450,8 +526,8 @@ namespace mongo {
|
||||
* BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
|
||||
* we use tempNext() when we do that to be less confusing. (one might have written a union in C)
|
||||
*/
|
||||
const DiskLoc& tempNext() const { return this->parent; }
|
||||
DiskLoc& tempNext() { return this->parent; }
|
||||
DiskLoc tempNext() const { return this->parent; }
|
||||
void setTempNext(DiskLoc l) { this->parent = l; }
|
||||
|
||||
void _shape(int level, stringstream&) const;
|
||||
int Size() const;
|
||||
@ -915,7 +991,7 @@ namespace mongo {
|
||||
virtual bool ok() { return !bucket.isNull(); }
|
||||
virtual bool advance();
|
||||
virtual void noteLocation(); // updates keyAtKeyOfs...
|
||||
virtual void checkLocation();
|
||||
virtual void checkLocation() = 0;
|
||||
virtual bool supportGetMore() { return true; }
|
||||
virtual bool supportYields() { return true; }
|
||||
|
||||
@ -937,12 +1013,12 @@ namespace mongo {
|
||||
virtual bool modifiedKeys() const { return _multikey; }
|
||||
virtual bool isMultiKey() const { return _multikey; }
|
||||
|
||||
const _KeyNode& _currKeyNode() const {
|
||||
/*const _KeyNode& _currKeyNode() const {
|
||||
assert( !bucket.isNull() );
|
||||
const _KeyNode& kn = keyNode(keyOfs);
|
||||
assert( kn.isUsed() );
|
||||
return kn;
|
||||
}
|
||||
}*/
|
||||
|
||||
/** returns BSONObj() if ofs is out of range */
|
||||
virtual BSONObj keyAt(int ofs) const = 0;
|
||||
@ -979,12 +1055,16 @@ namespace mongo {
|
||||
/** for debugging only */
|
||||
const DiskLoc getBucket() const { return bucket; }
|
||||
|
||||
// just for unit tests
|
||||
virtual bool curKeyHasChild() = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Our btrees may (rarely) have "unused" keys when items are deleted.
|
||||
* Skip past them.
|
||||
*/
|
||||
bool skipUnusedKeys( bool mayJump );
|
||||
virtual bool skipUnusedKeys( bool mayJump ) = 0;
|
||||
|
||||
bool skipOutOfRangeKeysAndCheckEnd();
|
||||
void skipAndCheck();
|
||||
void checkEnd();
|
||||
@ -994,7 +1074,6 @@ namespace mongo {
|
||||
|
||||
virtual void _audit() = 0;
|
||||
virtual DiskLoc _locate(const BSONObj& key, const DiskLoc& loc) = 0;
|
||||
virtual const _KeyNode& keyNode(int keyOfs) const = 0;
|
||||
virtual DiskLoc _advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) = 0;
|
||||
virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) = 0;
|
||||
|
||||
@ -1041,4 +1120,10 @@ namespace mongo {
|
||||
return static_cast< BtreeBucket<V>* >( getDur().writingPtr( b, V::BucketSize ) );
|
||||
}
|
||||
|
||||
template< class V >
|
||||
BucketBasics<V>::KeyNode::KeyNode(const BucketBasics<V>& bb, const _KeyNode &k) :
|
||||
prevChildBucket(k.prevChildBucket),
|
||||
recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
|
||||
{ }
|
||||
|
||||
} // namespace mongo;
|
||||
|
@ -48,7 +48,7 @@ namespace mongo {
|
||||
template<class V>
|
||||
void BtreeBuilder<V>::newBucket() {
|
||||
DiskLoc L = BtreeBucket<V>::addBucket(idx);
|
||||
b->tempNext() = L;
|
||||
b->setTempNext(L);
|
||||
cur = L;
|
||||
b = cur.btreemod<V>();
|
||||
}
|
||||
@ -123,7 +123,7 @@ namespace mongo {
|
||||
if ( ! up->_pushBack(r, k, ordering, keepLoc) ) {
|
||||
// current bucket full
|
||||
DiskLoc n = BtreeBucket<V>::addBucket(idx);
|
||||
up->tempNext() = n;
|
||||
up->setTempNext(n);
|
||||
upLoc = n;
|
||||
up = upLoc.btreemod<V>();
|
||||
up->pushBack(r, k, ordering, keepLoc);
|
||||
|
@ -32,6 +32,7 @@ namespace mongo {
|
||||
public:
|
||||
typedef typename BucketBasics<V>::KeyNode KeyNode;
|
||||
typedef typename V::Key Key;
|
||||
typedef typename V::_KeyNode _KeyNode;
|
||||
|
||||
BtreeCursorImpl(NamespaceDetails *a, int b, const IndexDetails& c, const BSONObj &d, const BSONObj &e, bool f, int g) :
|
||||
BtreeCursor(a,b,c,d,e,f,g) { }
|
||||
@ -60,6 +61,87 @@ namespace mongo {
|
||||
return bucket.btree<V>()->keyNode(keyOfs).key.toBson();
|
||||
}
|
||||
|
||||
virtual bool curKeyHasChild() {
|
||||
return !currKeyNode().prevChildBucket.isNull();
|
||||
}
|
||||
|
||||
bool skipUnusedKeys( bool mayJump ) {
|
||||
int u = 0;
|
||||
while ( 1 ) {
|
||||
if ( !ok() )
|
||||
break;
|
||||
const _KeyNode& kn = keyNode(keyOfs);
|
||||
if ( kn.isUsed() )
|
||||
break;
|
||||
bucket = _advance(bucket, keyOfs, _direction, "skipUnusedKeys");
|
||||
u++;
|
||||
//don't include unused keys in nscanned
|
||||
//++_nscanned;
|
||||
if ( mayJump && ( u % 10 == 0 ) ) {
|
||||
skipOutOfRangeKeysAndCheckEnd();
|
||||
}
|
||||
}
|
||||
if ( u > 10 )
|
||||
OCCASIONALLY log() << "btree unused skipped:" << u << '\n';
|
||||
return u;
|
||||
}
|
||||
|
||||
/* Since the last noteLocation(), our key may have moved around, and that old cached
|
||||
information may thus be stale and wrong (although often it is right). We check
|
||||
that here; if we have moved, we have to search back for where we were at.
|
||||
|
||||
i.e., after operations on the index, the BtreeCursor's cached location info may
|
||||
be invalid. This function ensures validity, so you should call it before using
|
||||
the cursor if other writers have used the database since the last noteLocation
|
||||
call.
|
||||
*/
|
||||
void checkLocation() {
|
||||
if ( eof() )
|
||||
return;
|
||||
|
||||
_multikey = d->isMultikey(idxNo);
|
||||
|
||||
if ( keyOfs >= 0 ) {
|
||||
assert( !keyAtKeyOfs.isEmpty() );
|
||||
|
||||
// Note keyAt() returns an empty BSONObj if keyOfs is now out of range,
|
||||
// which is possible as keys may have been deleted.
|
||||
int x = 0;
|
||||
while( 1 ) {
|
||||
// if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
|
||||
// b->k(keyOfs).recordLoc == locAtKeyOfs ) {
|
||||
if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) {
|
||||
const _KeyNode& kn = keyNode(keyOfs);
|
||||
if( kn.recordLoc == locAtKeyOfs ) {
|
||||
if ( !kn.isUsed() ) {
|
||||
// we were deleted but still exist as an unused
|
||||
// marker key. advance.
|
||||
skipUnusedKeys( false );
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// we check one key earlier too, in case a key was just deleted. this is
|
||||
// important so that multi updates are reasonably fast.
|
||||
if( keyOfs == 0 || x++ )
|
||||
break;
|
||||
keyOfs--;
|
||||
}
|
||||
}
|
||||
|
||||
/* normally we don't get to here. when we do, old position is no longer
|
||||
valid and we must refind where we left off (which is expensive)
|
||||
*/
|
||||
|
||||
/* TODO: Switch to keep indexdetails and do idx.head! */
|
||||
bucket = _locate(keyAtKeyOfs, locAtKeyOfs);
|
||||
RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl;
|
||||
if ( ! bucket.isNull() )
|
||||
skipUnusedKeys( false );
|
||||
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void _advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) {
|
||||
thisLoc.btree<V>()->advanceTo(thisLoc, keyOfs, keyBegin, keyBeginLen, afterKey, keyEnd, keyEndInclusive, order, direction);
|
||||
@ -185,6 +267,10 @@ namespace mongo {
|
||||
if( v == 0 )
|
||||
return new BtreeCursorImpl<V0>(_d,_idxNo,_id,_bounds,_direction);
|
||||
uasserted(14801, str::stream() << "unsupported index version " << v);
|
||||
|
||||
// just check we are in sync with this method
|
||||
dassert( IndexDetails::isASupportedIndexVersionNumber(v) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -282,28 +368,6 @@ namespace mongo {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* skip unused keys. */
|
||||
bool BtreeCursor::skipUnusedKeys( bool mayJump ) {
|
||||
int u = 0;
|
||||
while ( 1 ) {
|
||||
if ( !ok() )
|
||||
break;
|
||||
const _KeyNode& kn = keyNode(keyOfs);
|
||||
if ( kn.isUsed() )
|
||||
break;
|
||||
bucket = _advance(bucket, keyOfs, _direction, "skipUnusedKeys");
|
||||
u++;
|
||||
//don't include unused keys in nscanned
|
||||
//++_nscanned;
|
||||
if ( mayJump && ( u % 10 == 0 ) ) {
|
||||
skipOutOfRangeKeysAndCheckEnd();
|
||||
}
|
||||
}
|
||||
if ( u > 10 )
|
||||
OCCASIONALLY log() << "btree unused skipped:" << u << '\n';
|
||||
return u;
|
||||
}
|
||||
|
||||
// Return a value in the set {-1, 0, 1} to represent the sign of parameter i.
|
||||
int sgn( int i ) {
|
||||
if ( i == 0 )
|
||||
@ -355,62 +419,6 @@ namespace mongo {
|
||||
}
|
||||
}
|
||||
|
||||
/* Since the last noteLocation(), our key may have moved around, and that old cached
|
||||
information may thus be stale and wrong (although often it is right). We check
|
||||
that here; if we have moved, we have to search back for where we were at.
|
||||
|
||||
i.e., after operations on the index, the BtreeCursor's cached location info may
|
||||
be invalid. This function ensures validity, so you should call it before using
|
||||
the cursor if other writers have used the database since the last noteLocation
|
||||
call.
|
||||
*/
|
||||
void BtreeCursor::checkLocation() {
|
||||
if ( eof() )
|
||||
return;
|
||||
|
||||
_multikey = d->isMultikey(idxNo);
|
||||
|
||||
if ( keyOfs >= 0 ) {
|
||||
assert( !keyAtKeyOfs.isEmpty() );
|
||||
|
||||
// Note keyAt() returns an empty BSONObj if keyOfs is now out of range,
|
||||
// which is possible as keys may have been deleted.
|
||||
int x = 0;
|
||||
while( 1 ) {
|
||||
// if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
|
||||
// b->k(keyOfs).recordLoc == locAtKeyOfs ) {
|
||||
if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) {
|
||||
const _KeyNode& kn = keyNode(keyOfs);
|
||||
if( kn.recordLoc == locAtKeyOfs ) {
|
||||
if ( !kn.isUsed() ) {
|
||||
// we were deleted but still exist as an unused
|
||||
// marker key. advance.
|
||||
skipUnusedKeys( false );
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// we check one key earlier too, in case a key was just deleted. this is
|
||||
// important so that multi updates are reasonably fast.
|
||||
if( keyOfs == 0 || x++ )
|
||||
break;
|
||||
keyOfs--;
|
||||
}
|
||||
}
|
||||
|
||||
/* normally we don't get to here. when we do, old position is no longer
|
||||
valid and we must refind where we left off (which is expensive)
|
||||
*/
|
||||
|
||||
/* TODO: Switch to keep indexdetails and do idx.head! */
|
||||
bucket = _locate(keyAtKeyOfs, locAtKeyOfs);
|
||||
RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl;
|
||||
if ( ! bucket.isNull() )
|
||||
skipUnusedKeys( false );
|
||||
|
||||
}
|
||||
|
||||
string BtreeCursor::toString() {
|
||||
string s = string("BtreeCursor ") + indexDetails.indexName();
|
||||
if ( _direction < 0 ) s += " reverse";
|
||||
|
@ -39,17 +39,18 @@ namespace mongo {
|
||||
(such as adding a virtual function)
|
||||
*/
|
||||
class DiskLoc {
|
||||
int _a; // this will be volume, file #, etc. but is a logical value could be anything depending on storage engine
|
||||
int _a; // this will be volume, file #, etsc. but is a logical value could be anything depending on storage engine
|
||||
int ofs;
|
||||
|
||||
public:
|
||||
|
||||
enum SentinelValues {
|
||||
/* note NullOfs is different. todo clean up. see refs to NullOfs in code - use is valid but outside DiskLoc context so confusing as-is. */
|
||||
NullOfs = -1,
|
||||
MaxFiles=16000 // thus a limit of about 32TB of data per db
|
||||
};
|
||||
|
||||
DiskLoc(int a, int b) : _a(a), ofs(b) { }
|
||||
DiskLoc(int a, int Ofs) : _a(a), ofs(Ofs) { }
|
||||
DiskLoc() { Null(); }
|
||||
DiskLoc(const DiskLoc& l) {
|
||||
_a=l._a;
|
||||
|
12
db/index.h
12
db/index.h
@ -152,6 +152,7 @@ namespace mongo {
|
||||
return v;
|
||||
}
|
||||
|
||||
/** @return true if index has unique constraint */
|
||||
bool unique() const {
|
||||
BSONObj io = info.obj();
|
||||
return io["unique"].trueValue() ||
|
||||
@ -159,13 +160,13 @@ namespace mongo {
|
||||
isIdIndex();
|
||||
}
|
||||
|
||||
/* if set, when building index, if any duplicates, drop the duplicating object */
|
||||
/** return true if dropDups was set when building index (if any duplicates, dropdups drops the duplicating objects) */
|
||||
bool dropDups() const {
|
||||
return info.obj().getBoolField( "dropDups" );
|
||||
}
|
||||
|
||||
/* delete this index. does NOT clean up the system catalog
|
||||
(system.indexes or system.namespaces) -- only NamespaceIndex.
|
||||
/** delete this index. does NOT clean up the system catalog
|
||||
(system.indexes or system.namespaces) -- only NamespaceIndex.
|
||||
*/
|
||||
void kill_idx();
|
||||
|
||||
@ -179,8 +180,11 @@ namespace mongo {
|
||||
it may not mean we can build the index version in question: we may not maintain building
|
||||
of indexes in old formats in the future.
|
||||
*/
|
||||
static bool isASupportedIndexVersionNumber(int v) { return v == 0 || v == 1; }
|
||||
static bool isASupportedIndexVersionNumber(int v) { return (v&1)==v; } // v == 0 || v == 1
|
||||
|
||||
/** @return the interface for this interface, which varies with the index version.
|
||||
used for backward compatibility of index versions/formats.
|
||||
*/
|
||||
IndexInterface& idxInterface() {
|
||||
int v = version();
|
||||
dassert( isASupportedIndexVersionNumber(v) );
|
||||
|
@ -166,28 +166,21 @@ namespace mongo {
|
||||
void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const;
|
||||
|
||||
BSONSizeTracker _sizeTracker;
|
||||
|
||||
vector<const char*> _fieldNames;
|
||||
vector<BSONElement> _fixed;
|
||||
|
||||
BSONObj _nullKey; // a full key with all fields null
|
||||
|
||||
BSONObj _nullObj; // only used for _nullElt
|
||||
BSONElement _nullElt; // jstNull
|
||||
|
||||
int _nFields; // number of fields in the index
|
||||
bool _sparse; // if the index is sparse
|
||||
|
||||
shared_ptr<IndexType> _indexType;
|
||||
|
||||
const IndexDetails * _details;
|
||||
|
||||
void _init();
|
||||
|
||||
friend class IndexType;
|
||||
public:
|
||||
bool _finishedInit;
|
||||
|
||||
friend class IndexType;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
typedef BtreeBucket::_KeyNode _KeyNode;
|
||||
|
||||
const char* ns() {
|
||||
return "unittests.btreetests";
|
||||
}
|
||||
@ -104,7 +106,7 @@
|
||||
d = b->getNextChild();
|
||||
}
|
||||
else {
|
||||
d = const_cast< DiskLoc& >( b->keyNode( i ).prevChildBucket );
|
||||
d = b->keyNode( i ).prevChildBucket;
|
||||
}
|
||||
assert( !d.isNull() );
|
||||
return d.btree();
|
||||
@ -323,7 +325,7 @@
|
||||
end.appendMaxKey( "a" );
|
||||
auto_ptr< BtreeCursor > c( BtreeCursor::make( nsdetails( ns() ), 1, id(), start.done(), end.done(), false, 1 ) );
|
||||
while( c->ok() ) {
|
||||
if ( !c->_currKeyNode().prevChildBucket.isNull() ) {
|
||||
if ( c->curKeyHasChild() ) {
|
||||
toDel.push_back( c->currKey().firstElement().valuestr() );
|
||||
}
|
||||
else {
|
||||
@ -388,8 +390,9 @@
|
||||
}
|
||||
// too much work to try to make this happen through inserts and deletes
|
||||
// we are intentionally manipulating the btree bucket directly here
|
||||
getDur().writingDiskLoc( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket ) ) = DiskLoc();
|
||||
getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
|
||||
BtreeBucket::Loc* L = const_cast< BtreeBucket::Loc* >( &bt()->keyNode( 1 ).prevChildBucket );
|
||||
getDur().writing(L)->Null();
|
||||
getDur().writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).recordLoc ).GETOFS() ) |= 1; // make unused
|
||||
BSONObj k = BSON( "a" << toInsert );
|
||||
Base::insert( k );
|
||||
}
|
||||
@ -820,13 +823,23 @@
|
||||
ArtificialTree::setTree( "{h:{e:{b:{a:null},c:null,d:null},_:{f:null}},_:{i:null}}", id() );
|
||||
// dump();
|
||||
string ns = id().indexNamespace();
|
||||
const BtreeBucket* b = bt();
|
||||
ASSERT_EQUALS( 8, bt()->fullValidate( dl(), order(), 0, true ) );
|
||||
ASSERT_EQUALS( 6, nsdetails( ns.c_str() )->stats.nrecords );
|
||||
|
||||
// cout << "---------------------" << endl;
|
||||
|
||||
// dump();
|
||||
|
||||
// cout << "---------------------" << endl;
|
||||
|
||||
BSONObj k = BSON( "" << "c" );
|
||||
assert( unindex( k ) );
|
||||
|
||||
// dump();
|
||||
ASSERT_EQUALS( 7, bt()->fullValidate( dl(), order(), 0, true ) );
|
||||
|
||||
long long keyCount = bt()->fullValidate( dl(), order(), 0, true );
|
||||
ASSERT_EQUALS( 7, keyCount );
|
||||
ASSERT_EQUALS( 4, nsdetails( ns.c_str() )->stats.nrecords );
|
||||
// no recursion currently in this case
|
||||
ArtificialTree::checkStructure( "{h:{b:{a:null},d:null,e:null,f:null},_:{i:null}}", id() );
|
||||
@ -1435,7 +1448,7 @@
|
||||
void run() {
|
||||
string ns = id().indexNamespace();
|
||||
ArtificialTree::setTree( "{a:null,c:{b:null},d:null}", id() );
|
||||
getDur().writingInt( const_cast< DiskLoc& >( bt()->keyNode( 1 ).prevChildBucket.btree()->keyNode( 0 ).recordLoc ).GETOFS() ) |= 1; // make unused
|
||||
getDur().writingInt( const_cast< BtreeBucket::Loc& >( bt()->keyNode( 1 ).prevChildBucket.btree()->keyNode( 0 ).recordLoc ).GETOFS() ) |= 1; // make unused
|
||||
long long unused = 0;
|
||||
ASSERT_EQUALS( 3, bt()->fullValidate( dl(), order(), &unused, true ) );
|
||||
ASSERT_EQUALS( 1, unused );
|
||||
|
@ -19,9 +19,11 @@
|
||||
|
||||
#include "pch.h"
|
||||
#include "dbtests.h"
|
||||
#include "../util/unittest.h"
|
||||
|
||||
int main( int argc, char** argv ) {
|
||||
static StaticObserver StaticObserver;
|
||||
doPreServerStatupInits();
|
||||
UnitTest::runTests();
|
||||
return Suite::run(argc, argv, "/tmp/unittest");
|
||||
}
|
||||
|
@ -357,8 +357,6 @@ namespace mongo {
|
||||
|
||||
Logstream::get().flush();
|
||||
|
||||
cout << "**************************************************" << endl;
|
||||
cout << "**************************************************" << endl;
|
||||
cout << "**************************************************" << endl;
|
||||
|
||||
int rc = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user