mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 01:21:03 +01:00
SERVER-38952 Prune redundant index bound
This commit is contained in:
parent
798812c292
commit
8120e440ab
@ -239,4 +239,29 @@
|
||||
// Finally, confirm that a collection scan produces the same results.
|
||||
assertArrayEq(coll.find(trimTestQuery).toArray(),
|
||||
coll.find(trimTestQuery).hint({$natural: 1}).toArray());
|
||||
|
||||
// Verify that no overlapping bounds are generated and all the expected documents are returned
|
||||
// for fieldname-or-array-index queries.
|
||||
const existenceQuery = {"a.0.1": {$exists: true}};
|
||||
assert.commandWorked(coll.insert({a: [{1: "exists"}, 1]}));
|
||||
assert.commandWorked(coll.insert({a: {0: {1: "exists"}}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [2, "exists"]}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [2, {"object_exists": 1}]}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [2, ["array_exists"]]}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [{1: "exists"}]}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [{1: []}]}}));
|
||||
assert.commandWorked(coll.insert({a: {0: [{1: {}}]}}));
|
||||
assert.commandWorked(coll.insert({a: [{0: [{1: ["exists"]}]}]}));
|
||||
assert.commandWorked(coll.insert({a: [{}, {0: [{1: ["exists"]}]}]}));
|
||||
assert.commandWorked(coll.insert({a: [{}, {0: [[], {}, {1: ["exists"]}]}]}));
|
||||
|
||||
assert.commandWorked(coll.insert({a: {0: ["not_exist"]}}));
|
||||
assert.commandWorked(coll.insert({a: {"01": ["not_exist"]}}));
|
||||
assert.commandWorked(coll.insert({a: [{11: "not_exist"}]}));
|
||||
|
||||
assertWildcardQuery(existenceQuery, 'a.0.1', {'executionStats.nReturned': 11});
|
||||
// Finally, confirm that a collection scan produces the same results.
|
||||
assertArrayEq(coll.find(existenceQuery).toArray(),
|
||||
coll.find(existenceQuery).hint({$natural: 1}).toArray());
|
||||
|
||||
})();
|
||||
|
@ -118,7 +118,8 @@ std::vector<size_t> findArrayIndexPathComponents(const std::set<std::size_t>& mu
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an std::string of the full dotted field, minus the parts listed in 'skipParts'.
|
||||
* Returns a FieldRef of the full dotted field, minus the parts at indices listed in
|
||||
* 'skipComponents'.
|
||||
*/
|
||||
FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
|
||||
const std::set<size_t>& skipComponents) {
|
||||
@ -126,18 +127,13 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path,
|
||||
if (skipComponents.empty()) {
|
||||
return path;
|
||||
}
|
||||
StringBuilder ss;
|
||||
size_t startPart = 0;
|
||||
for (const auto& skipPart : skipComponents) {
|
||||
ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
|
||||
<< path.dottedSubstring(startPart, skipPart);
|
||||
startPart = skipPart + 1;
|
||||
FieldRef result;
|
||||
for (size_t index = 0; index < path.numParts(); ++index) {
|
||||
if (!skipComponents.count(index)) {
|
||||
result.appendPart(path.getPart(index));
|
||||
}
|
||||
}
|
||||
if (startPart < path.numParts()) {
|
||||
ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "")
|
||||
<< path.dottedSubstring(startPart, path.numParts());
|
||||
}
|
||||
return FieldRef{ss.str()};
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -158,7 +154,8 @@ MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry(
|
||||
}
|
||||
|
||||
std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size_t>& multikeyPaths,
|
||||
const FieldRef& queryPath) {
|
||||
const FieldRef& queryPath,
|
||||
bool requiresSubpathBounds) {
|
||||
// We iterate over the power set of array index positions to generate all necessary paths.
|
||||
// The algorithm is unavoidably O(n2^n), but we enforce that 'n' is never more than single
|
||||
// digits during the planner's index selection phase.
|
||||
@ -181,7 +178,30 @@ std::set<FieldRef> generateFieldNameOrArrayIndexPathSet(const std::set<std::size
|
||||
arrayIndicesToSkip.insert(potentialArrayIndices[i]);
|
||||
}
|
||||
}
|
||||
paths.insert(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip));
|
||||
|
||||
// Add the path to the FieldRef set, and obtain an iterator pointing to the new entry.
|
||||
const auto result =
|
||||
paths.emplace(pathWithoutSpecifiedComponents(queryPath, arrayIndicesToSkip));
|
||||
|
||||
// If any path in the set prefixes another, then the bounds generated will overlap (and
|
||||
// thus, be invalid). So, we must make sure that the new path does not prefix and is not
|
||||
// prefixed by any existing entries in the set. If any such prefixes do exist, we must
|
||||
// remove the subpath(s) and retain only the shortest prefix path, since the bounds it
|
||||
// generates will be a superset of all the paths generated by the removed entries.
|
||||
if (requiresSubpathBounds && result.second) {
|
||||
const auto currentPathItr = result.first;
|
||||
// If the new path is a subpath of an existing entry, remove the new path.
|
||||
if (currentPathItr != paths.begin() &&
|
||||
std::prev(currentPathItr)->isPrefixOf(*currentPathItr)) {
|
||||
paths.erase(currentPathItr);
|
||||
continue;
|
||||
}
|
||||
// If existing paths are subpaths of the new entry, remove the old paths.
|
||||
while (std::next(currentPathItr) != paths.end() &&
|
||||
currentPathItr->isPrefixOf(*std::next(currentPathItr))) {
|
||||
paths.erase(std::next(currentPathItr));
|
||||
}
|
||||
}
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
@ -425,27 +445,12 @@ void finalizeWildcardIndexScanConfiguration(IndexScanNode* scan) {
|
||||
// specifically the interval ["path.","path/") on "$_path".
|
||||
const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back());
|
||||
|
||||
// Helper function to check whether the final path component in 'queryPath' is an array index.
|
||||
const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) {
|
||||
return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) &&
|
||||
queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u));
|
||||
};
|
||||
|
||||
// If subpath bounds are needed, we build a range interval on all subpaths of the query path(s).
|
||||
// We must therefore trim any trailing array indices from the query path before generating the
|
||||
// fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For
|
||||
// instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey,
|
||||
// and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']].
|
||||
// The latter two are subsets of the ['a.', 'a/'] interval.
|
||||
while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) {
|
||||
queryPath.removeLastPart();
|
||||
}
|
||||
|
||||
// Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array
|
||||
// indices in their keys, so if this query traverses one or more multikey fields via an array
|
||||
// index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array-
|
||||
// and non-array permutations of the path in order to produce INEXACT_FETCH bounds.
|
||||
auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath);
|
||||
auto paths =
|
||||
generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath, requiresSubpathBounds);
|
||||
|
||||
// Add a $_path point-interval for each path that needs to be traversed in the index. If subpath
|
||||
// bounds are required, then we must add a further range interval on ["path.","path/").
|
||||
|
@ -1519,7 +1519,8 @@ TEST_F(QueryPlannerWildcardTest, InitialNumericPathComponentIsAlwaysFieldName) {
|
||||
}
|
||||
|
||||
TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQueries) {
|
||||
addWildcardIndex(BSON("a.$**" << 1), {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4"});
|
||||
addWildcardIndex(BSON("a.$**" << 1),
|
||||
{"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4", "a.c.b", "a.c.b.1"});
|
||||
|
||||
runQuery(fromjson("{'a.0.b': {$exists: true}}"));
|
||||
assertNumSolutions(1U);
|
||||
@ -1540,15 +1541,46 @@ TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQ
|
||||
"['a.b.c','a.b.c',true,true], ['a.b.c.','a.b.c/',true,false]], 'a.0.b.1.c': [[{$minKey: "
|
||||
"1},{$maxKey: 1},true,true]]}}}}}");
|
||||
|
||||
// Confirm that any trailing array index fields are trimmed before the fieldname-or-array-index
|
||||
// pathset is generated, such that the subpath bounds do not overlap.
|
||||
runQuery(fromjson("{'a.0.b.1': {$exists: true, $eq: null}}"));
|
||||
// When an array index field exists in the query pattern and one of the resulting
|
||||
// fieldname-or-array-index paths is a prefix of another, then the subpath bounds generated by
|
||||
// the prefix path will contain all the bounds generated by its subpaths. Test that we avoid
|
||||
// overlap by removing the redundant paths.
|
||||
//
|
||||
// In the below example, 'a' is multikey and the query is on 'a.0.0.0'. We generate paths
|
||||
// 'a.0.0' and 'a.0.0.0' because the first '0' is an array index. But the subpaths bound
|
||||
// generated by 'a.0.0' -> ['a.0.0.','a.0.0/'] would contain all the bounds generated by
|
||||
// 'a.0.0.0'. Therefore we must remove path 'a.0.0.0' before generating the subpath bounds.
|
||||
runQuery(fromjson("{'a.0.0.0': {$exists: true}}"));
|
||||
assertNumSolutions(1U);
|
||||
assertSolutionExists(
|
||||
"{fetch: {filter: {'a.0.b.1': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, "
|
||||
"pattern:{'$_path': 1, 'a.0.b.1': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], "
|
||||
"['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], "
|
||||
"'a.0.b.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
|
||||
"{fetch: {filter: {'a.0.0.0': {$exists: true}}, node: {ixscan: {filter:null, "
|
||||
"pattern:{'$_path': 1, 'a.0.0.0': 1}, bounds: {'$_path': [['a.0.0','a.0.0',true,true], "
|
||||
"['a.0.0.','a.0.0/',true,false]], 'a.0.0.0': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
|
||||
|
||||
// When there are multiple subpaths that are prefixes of a particular subpath, all the other
|
||||
// subpaths need to be removed. In the below example, 'a.c.b', 'a.c.b.1', 'a.c.b.2' are possible
|
||||
// subpaths, but since 'a.c.b' is a prefix of the rest, we need to ensure that 'a.c.b' is the
|
||||
// only path that we consider.
|
||||
runQuery(fromjson("{'a.c.b.1.2': {$exists: true}}"));
|
||||
assertNumSolutions(1U);
|
||||
assertSolutionExists(
|
||||
"{fetch: {filter: {'a.c.b.1.2': {$exists: true}}, node: {ixscan: {filter:null, "
|
||||
"pattern:{'$_path': 1, 'a.c.b.1.2': 1}, bounds: {'$_path': [['a.c.b','a.c.b',true,true], "
|
||||
"['a.c.b.','a.c.b/',true,false]], "
|
||||
"'a.c.b.1.2': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
|
||||
|
||||
// Similar to the previous case except one of the subpath is a 'string prefix' (when compared as
|
||||
// strings) and not a prefix from the FieldRef point of view. In this case bounds for both the
|
||||
// subpaths should be generated since they don't overlap.
|
||||
runQuery(fromjson("{'a.c.b.11.1': {$exists: true}}"));
|
||||
assertNumSolutions(1U);
|
||||
assertSolutionExists(
|
||||
"{fetch: {filter: {'a.c.b.11.1': {$exists: true}}, node: {ixscan: {filter:null, "
|
||||
"pattern:{'$_path': 1, 'a.c.b.11.1': 1}, bounds: {'$_path': "
|
||||
"[['a.c.b.1','a.c.b.1',true,true], ['a.c.b.1.','a.c.b.1/',true,false], "
|
||||
"['a.c.b.11.1','a.c.b.11.1',true,true], ['a.c.b.11.1.','a.c.b.11.1/',true,false]], "
|
||||
"'a.c.b.11.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}");
|
||||
|
||||
|
||||
runQuery(fromjson("{'a.0.b.2.3.4': {$exists: true, $eq: null}}"));
|
||||
assertNumSolutions(1U);
|
||||
|
Loading…
Reference in New Issue
Block a user