0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-30 09:06:21 +01:00

duplicate key indexing fix

This commit is contained in:
Dwight 2008-02-24 20:16:36 -05:00
parent 13e6a66d08
commit 9cb73f240f
4 changed files with 73 additions and 14 deletions

View File

@ -343,8 +343,6 @@ bool BtreeBucket::unindex(const DiskLoc& thisLoc, const char *ns, JSObj& key, co
BtreeCursor c(thisLoc, key, 1, true);
// dump();
while( c.ok() ) {
KeyNode kn = c.currKeyNode();
if( !kn.key.woEqual(key) )
@ -447,9 +445,30 @@ void BtreeBucket::insertHere(DiskLoc thisLoc, const char *ns, int keypos,
// split
if( split_debug )
cout << " " << thisLoc.toString() << ".split" << endl;
int mid = n / 2;
/* on duplicate key, we need to ensure that they all end up on the RHS */
if( 0 ) {
assert(mid>0);
while( 1 ) {
KeyNode mn = keyNode(mid);
KeyNode left = keyNode(mid-1);
if( left.key < mn.key )
break;
mid--;
if( mid < 3 ) {
cout << "Assertion failure - mid<3: duplicate key bug not fixed yet" << endl;
cout << " ns:" << ns << endl;
cout << " key:" << mn.key.toString() << endl;
break;
}
}
}
BtreeBucket *r = allocTemp();
DiskLoc rLoc;
int mid = n / 2;
if( split_debug )
cout << " mid:" << mid << ' ' << keyNode(mid).key.toString() << " n:" << n << endl;
for( int i = mid+1; i < n; i++ ) {
@ -600,6 +619,21 @@ DiskLoc BtreeBucket::locate(const DiskLoc& thisLoc, JSObj& key, int& pos, bool&
int p;
found = find(key, p);
if( found ) {
/* todo: slow? this can be avoided for indexes that don't allow dup keys. add support for that. */
{
// todo: fix for direction==-1!
// dmtodo
DiskLoc lchild = k(p).prevChildBucket;
if( !lchild.isNull() ) {
// if dup keys, might be dups to the left.
DiskLoc largestLoc;
int largestKey;
lchild.btree()->findLargestKey(lchild, largestLoc, largestKey);
if( !largestLoc.isNull() && largestLoc.btree()->keyAt(largestKey).woEqual(key) )
return lchild.btree()->locate(lchild, key, pos, found, direction);
}
}
pos = p;
return thisLoc;
}

View File

@ -390,6 +390,7 @@ void _unindexRecord(const char *ns, IndexDetails& id, JSObj& obj, const DiskLoc&
id.getKeysFromObject(obj, keys);
for( set<JSObj>::iterator i=keys.begin(); i != keys.end(); i++ ) {
JSObj j = *i;
// cout << "TEMP: j:" << j.toString() << endl;
if( otherTraceLevel >= 5 ) {
cout << "_unindexRecord() " << obj.toString();
cout << "\n unindex:" << j.toString() << endl;
@ -398,14 +399,13 @@ void _unindexRecord(const char *ns, IndexDetails& id, JSObj& obj, const DiskLoc&
bool ok = id.head.btree()->unindex(id.head, ns, j, dl);
#if defined(_WIN32)
//TMEPTEMPTEMPTEMP TEMP
id.head.btree()->fullValidate(id.head);
#endif
if( !ok ) {
cout << "Warning: _unindex failed" << endl;
cout << " " << obj.toString() << endl;
cout << " " << j.toString() << endl;
cout << " if you added dup keys this can happen until we support that" << endl;
cout << "Assertion failure: _unindex failed" << '\n';
cout << " obj:" << obj.toString() << '\n';
cout << " key:" << j.toString() << '\n';
cout << " dl:" << dl.toString() << endl;
}
}
}

View File

@ -130,7 +130,7 @@ fail:
}
void deleteObjects(const char *ns, JSObj pattern, bool justOne) {
// cout << "delete ns:" << ns << " queryobjsize:" <<
// cout << "TEMP delete ns:" << ns << " queryobjsize:" <<
// pattern.objsize() << endl;
if( strstr(ns, ".system.") ) {
@ -162,6 +162,7 @@ DiskLoc _tempDelLoc;
DiskLoc rloc = c->currLoc();
c->advance(); // must advance before deleting as the next ptr will die
JSObj js(r);
//cout << "TEMP: " << js.toString() << endl;
bool deep;
if( !matcher.matches(js, &deep) ) {
if( c->tempStopOnMiss() )

View File

@ -104,12 +104,36 @@ function testarrayindexing() {
function testdups() {
print("testdups");
for( pass=0;pass<2;pass++ ) {
t.td.remove({});
for( var x=0;x<2000;x++ )
t.td.save({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d", z: x, str: "a long string dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"});
assert( t.td.find({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d"}).toArray().length == 2000 );
t.td.ensureIndex({ggg:true});
print(" pass:" + pass);
if( pass < 2 )
t.td.remove({});
for( var x=0;x<2000;x++ )
t.td.save({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d", z: x,
str: "a long string dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"});
assert( t.td.find({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d"}).toArray().length == 2000 );
// t.td.ensureIndex({ggg:true});
if( pass == 0 )
t.td.ensureIndex({ggg:1});
else if( pass == 1 )
t.td.ensureIndex({ggg:-1});
}
print(" end testdups");
print(" try t.td.remove({});");
}
function testdups2() {
print("testdups");
for( pass=0;pass<1;pass++ ) {
print(" pass:" + pass);
t.td.remove({});
for( var x=0;x<250;x++ )
t.td.save({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d", z: x,
str: "a long string dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"});
assert( t.td.find({ggg:"asdfasdf bbb a a jdssjsjdjds dsdsdsdsds d"}).toArray().length == 2000 );
t.td.ensureIndex({ggg:true});
}
t.td.remove({});
print(" end testdups");
}
function runquick() {