0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 01:21:03 +01:00

SERVER-38952 Prune redundant index bound

This commit is contained in:
Arun Banala 2019-02-13 18:44:17 +00:00
parent 798812c292
commit 8120e440ab
3 changed files with 101 additions and 39 deletions

View File

@ -239,4 +239,29 @@
// Finally, confirm that a collection scan produces the same results.
assertArrayEq(coll.find(trimTestQuery).toArray(),
coll.find(trimTestQuery).hint({$natural: 1}).toArray());
// Verify that no overlapping bounds are generated and all the expected documents are returned
// for fieldname-or-array-index queries.
const existenceQuery = {"a.0.1": {$exists: true}};
assert.commandWorked(coll.insert({a: [{1: "exists"}, 1]}));
assert.commandWorked(coll.insert({a: {0: {1: "exists"}}}));
assert.commandWorked(coll.insert({a: {0: [2, "exists"]}}));
assert.commandWorked(coll.insert({a: {0: [2, {"object_exists": 1}]}}));
assert.commandWorked(coll.insert({a: {0: [2, ["array_exists"]]}}));
assert.commandWorked(coll.insert({a: {0: [{1: "exists"}]}}));
assert.commandWorked(coll.insert({a: {0: [{1: []}]}}));
assert.commandWorked(coll.insert({a: {0: [{1: {}}]}}));
assert.commandWorked(coll.insert({a: [{0: [{1: ["exists"]}]}]}));
assert.commandWorked(coll.insert({a: [{}, {0: [{1: ["exists"]}]}]}));
assert.commandWorked(coll.insert({a: [{}, {0: [[], {}, {1: ["exists"]}]}]}));
assert.commandWorked(coll.insert({a: {0: ["not_exist"]}}));
assert.commandWorked(coll.insert({a: {"01": ["not_exist"]}}));
assert.commandWorked(coll.insert({a: [{11: "not_exist"}]}));
assertWildcardQuery(existenceQuery, 'a.0.1', {'executionStats.nReturned': 11});
// Finally, confirm that a collection scan produces the same results.
assertArrayEq(coll.find(existenceQuery).toArray(),
coll.find(existenceQuery).hint({$natural: 1}).toArray());
})();

View File

@ -118,7 +118,8 @@ std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& mu
}
/**
* Returns an std::string of the full dotted field, minus the parts listed in 'skipParts'.
* Returns a FieldRef of the full dotted field, minus the parts at indices listed in
* 'skipComponents'.
*/
FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
const std::set<size_t>& skipComponents) {
@ -126,18 +127,13 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
if (skipComponents.empty()) {
return path;
}
StringBuilder ss;
size_t startPart = 0;
for (const auto& skipPart : skipComponents) {
ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
<< path.dottedSubstring(startPart, skipPart);
startPart = skipPart + 1;
FieldRef result;
for (size_t index = 0; index < path.numParts(); ++index) {
if (!skipComponents.count(index)) {
result.appendPart(path.getPart(index));
}
}
if (startPart < path.numParts()) {
ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
<< path.dottedSubstring(startPart, path.numParts());
}
return FieldRef{ss.str()};
return result;
}
/**
@ -158,7 +154,8 @@ MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
}
std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
const FieldRef& queryPath) {
const FieldRef& queryPath,
bool requiresSubpathBounds) {
// We iterate over the power set of array index positions to generate all necessary paths.
// The algorithm is unavoidably O(n2^n), but we enforce that 'n' is never more than single
// digits during the planner's index selection phase.
@ -181,7 +178,30 @@ std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size
arrayIndicesToSkip.insert(potentialArrayIndices[i]);
}
}
paths.insert(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip));
// Add the path to the FieldRef set, and obtain an iterator pointing to the new entry.
const auto result =
paths.emplace(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip));
// If any path in the set prefixes another, then the bounds generated will overlap (and
// thus, be invalid). So, we must make sure that the new path does not prefix and is not
// prefixed by any existing entries in the set. If any such prefixes do exist, we must
// remove the subpath(s) and retain only the shortest prefix path, since the bounds it
// generates will be a superset of all the paths generated by the removed entries.
if (requiresSubpathBounds && result.second) {
const auto currentPathItr = result.first;
// If the new path is a subpath of an existing entry, remove the new path.
if (currentPathItr != paths.begin() &&
std::prev(currentPathItr)->isPrefixOf(*currentPathItr)) {
paths.erase(currentPathItr);
continue;
}
// If existing paths are subpaths of the new entry, remove the old paths.
while (std::next(currentPathItr) != paths.end() &&
currentPathItr->isPrefixOf(*std::next(currentPathItr))) {
paths.erase(std::next(currentPathItr));
}
}
}
return paths;
}
@ -425,27 +445,12 @@ void finalizeWildcardIndexScanConfiguration(IndexScanNode* scan) {
// specifically the interval ["path.","path/") on "$_path".
const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back());
// Helper function to check whether the final path component in 'queryPath' is an array index.
const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
};
// If subpath bounds are needed, we build a range interval on all subpaths of the query path(s).
// We must therefore trim any trailing array indices from the query path before generating the
// fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For
// instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey,
// and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']].
// The latter two are subsets of the ['a.', 'a/'] interval.
while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
queryPath.removeLastPart();
}
// Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
// indices in their keys, so if this query traverses one or more multikey fields via an array
// index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
// and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
auto paths =
generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath, requiresSubpathBounds);
// Add a $_path point-interval for each path that needs to be traversed in the index. If subpath
// bounds are required, then we must add a further range interval on ["path.","path/").

View File

@ -1519,7 +1519,8 @@ TEST_F(QueryPlannerWildcardTest, InitialNumericPathComponentIsAlwaysFieldName) {
}
TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQueries) {
addWildcardIndex(BSON("a.$**" << 1), {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4"});
addWildcardIndex(BSON("a.$**" << 1),
{"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4", "a.c.b", "a.c.b.1"});
runQuery(fromjson("{'a.0.b': {$exists: true}}"));
assertNumSolutions(1U);
@ -1540,15 +1541,46 @@ TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQ
"['a.b.c','a.b.c',true,true], ['a.b.c.','a.b.c/',true,false]], 'a.0.b.1.c': [[{$minKey: "
"1},{$maxKey: 1},true,true]]}}}}}");
// Confirm that any trailing array index fields are trimmed before the fieldname-or-array-index
// pathset is generated, such that the subpath bounds do not overlap.
runQuery(fromjson("{'a.0.b.1': {$exists: true, $eq: null}}"));
// When an array index field exists in the query pattern and one of the resulting
// fieldname-or-array-index paths is a prefix of another, then the subpath bounds generated by
// the prefix path will contain all the bounds generated by its subpaths. Test that we avoid
// overlap by removing the redundant paths.
//
// In the below example, 'a' is multikey and the query is on 'a.0.0.0'. We generate paths
// 'a.0.0' and 'a.0.0.0' because the first '0' is an array index. But the subpaths bound
// generated by 'a.0.0' -> ['a.0.0.','a.0.0/'] would contain all the bounds generated by
// 'a.0.0.0'. Therefore we must remove path 'a.0.0.0' before generating the subpath bounds.
runQuery(fromjson("{'a.0.0.0': {$exists: true}}"));
assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {'a.0.b.1': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, "
"pattern:{'$_path': 1, 'a.0.b.1': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
"['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], "
"'a.0.b.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
"{fetch: {filter: {'a.0.0.0': {$exists: true}}, node: {ixscan: {filter:null, "
"pattern:{'$_path': 1, 'a.0.0.0': 1}, bounds: {'$_path': [['a.0.0','a.0.0',true,true], "
"['a.0.0.','a.0.0/',true,false]], 'a.0.0.0': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
// When there are multiple subpaths that are prefixes of a particular subpath, all the other
// subpaths need to be removed. In the below example, 'a.c.b', 'a.c.b.1', 'a.c.b.2' are possible
// subpaths, but since 'a.c.b' is a prefix of the rest, we need to ensure that 'a.c.b' is the
// only path that we consider.
runQuery(fromjson("{'a.c.b.1.2': {$exists: true}}"));
assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {'a.c.b.1.2': {$exists: true}}, node: {ixscan: {filter:null, "
"pattern:{'$_path': 1, 'a.c.b.1.2': 1}, bounds: {'$_path': [['a.c.b','a.c.b',true,true], "
"['a.c.b.','a.c.b/',true,false]], "
"'a.c.b.1.2': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
// Similar to the previous case except one of the subpath is a 'string prefix' (when compared as
// strings) and not a prefix from the FieldRef point of view. In this case bounds for both the
// subpaths should be generated since they don't overlap.
runQuery(fromjson("{'a.c.b.11.1': {$exists: true}}"));
assertNumSolutions(1U);
assertSolutionExists(
"{fetch: {filter: {'a.c.b.11.1': {$exists: true}}, node: {ixscan: {filter:null, "
"pattern:{'$_path': 1, 'a.c.b.11.1': 1}, bounds: {'$_path': "
"[['a.c.b.1','a.c.b.1',true,true], ['a.c.b.1.','a.c.b.1/',true,false], "
"['a.c.b.11.1','a.c.b.11.1',true,true], ['a.c.b.11.1.','a.c.b.11.1/',true,false]], "
"'a.c.b.11.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
runQuery(fromjson("{'a.0.b.2.3.4': {$exists: true, $eq: null}}"));
assertNumSolutions(1U);