From 8120e440ab45ab4bd8b1372aee23cae7fad6b01e Mon Sep 17 00:00:00 2001 From: Arun Banala Date: Wed, 13 Feb 2019 18:44:17 +0000 Subject: [PATCH] SERVER-38952 Prune redundant index bound --- jstests/core/wildcard_index_multikey.js | 25 +++++++ .../db/query/planner_wildcard_helpers.cpp | 67 ++++++++++--------- .../query_planner_wildcard_index_test.cpp | 48 ++++++++++--- 3 files changed, 101 insertions(+), 39 deletions(-) diff --git a/jstests/core/wildcard_index_multikey.js b/jstests/core/wildcard_index_multikey.js index d9704963720..039c5176ff4 100644 --- a/jstests/core/wildcard_index_multikey.js +++ b/jstests/core/wildcard_index_multikey.js @@ -239,4 +239,29 @@ // Finally, confirm that a collection scan produces the same results. assertArrayEq(coll.find(trimTestQuery).toArray(), coll.find(trimTestQuery).hint({$natural: 1}).toArray()); + + // Verify that no overlapping bounds are generated and all the expected documents are returned + // for fieldname-or-array-index queries. + const existenceQuery = {"a.0.1": {$exists: true}}; + assert.commandWorked(coll.insert({a: [{1: "exists"}, 1]})); + assert.commandWorked(coll.insert({a: {0: {1: "exists"}}})); + assert.commandWorked(coll.insert({a: {0: [2, "exists"]}})); + assert.commandWorked(coll.insert({a: {0: [2, {"object_exists": 1}]}})); + assert.commandWorked(coll.insert({a: {0: [2, ["array_exists"]]}})); + assert.commandWorked(coll.insert({a: {0: [{1: "exists"}]}})); + assert.commandWorked(coll.insert({a: {0: [{1: []}]}})); + assert.commandWorked(coll.insert({a: {0: [{1: {}}]}})); + assert.commandWorked(coll.insert({a: [{0: [{1: ["exists"]}]}]})); + assert.commandWorked(coll.insert({a: [{}, {0: [{1: ["exists"]}]}]})); + assert.commandWorked(coll.insert({a: [{}, {0: [[], {}, {1: ["exists"]}]}]})); + + assert.commandWorked(coll.insert({a: {0: ["not_exist"]}})); + assert.commandWorked(coll.insert({a: {"01": ["not_exist"]}})); + assert.commandWorked(coll.insert({a: [{11: "not_exist"}]})); + + assertWildcardQuery(existenceQuery, 'a.0.1', {'executionStats.nReturned': 11}); + // Finally, confirm that a collection scan produces the same results. + assertArrayEq(coll.find(existenceQuery).toArray(), + coll.find(existenceQuery).hint({$natural: 1}).toArray()); + })(); diff --git a/src/mongo/db/query/planner_wildcard_helpers.cpp b/src/mongo/db/query/planner_wildcard_helpers.cpp index efe021227ac..d15f1cfef04 100644 --- a/src/mongo/db/query/planner_wildcard_helpers.cpp +++ b/src/mongo/db/query/planner_wildcard_helpers.cpp @@ -118,7 +118,8 @@ std::vector findArrayIndexPathComponents(const std::set& mu } /** - * Returns an std::string of the full dotted field, minus the parts listed in 'skipParts'. + * Returns a FieldRef of the full dotted field, minus the parts at indices listed in + * 'skipComponents'. */ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path, const std::set& skipComponents) { @@ -126,18 +127,13 @@ FieldRef pathWithoutSpecifiedComponents(const FieldRef& path, if (skipComponents.empty()) { return path; } - StringBuilder ss; - size_t startPart = 0; - for (const auto& skipPart : skipComponents) { - ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "") - << path.dottedSubstring(startPart, skipPart); - startPart = skipPart + 1; + FieldRef result; + for (size_t index = 0; index < path.numParts(); ++index) { + if (!skipComponents.count(index)) { + result.appendPart(path.getPart(index)); + } } - if (startPart < path.numParts()) { - ss << (ss.len() && !ss.stringData().endsWith(".") ? "." : "") - << path.dottedSubstring(startPart, path.numParts()); - } - return FieldRef{ss.str()}; + return result; } /** @@ -158,7 +154,8 @@ MultikeyPaths buildMultiKeyPathsForExpandedWildcardIndexEntry( } std::set generateFieldNameOrArrayIndexPathSet(const std::set& multikeyPaths, - const FieldRef& queryPath) { + const FieldRef& queryPath, + bool requiresSubpathBounds) { // We iterate over the power set of array index positions to generate all necessary paths. // The algorithm is unavoidably O(n2^n), but we enforce that 'n' is never more than single // digits during the planner's index selection phase. @@ -181,7 +178,30 @@ std::set generateFieldNameOrArrayIndexPathSet(const std::setisPrefixOf(*currentPathItr)) { + paths.erase(currentPathItr); + continue; + } + // If existing paths are subpaths of the new entry, remove the old paths. + while (std::next(currentPathItr) != paths.end() && + currentPathItr->isPrefixOf(*std::next(currentPathItr))) { + paths.erase(std::next(currentPathItr)); + } + } } return paths; } @@ -425,27 +445,12 @@ void finalizeWildcardIndexScanConfiguration(IndexScanNode* scan) { // specifically the interval ["path.","path/") on "$_path". const bool requiresSubpathBounds = boundsOverlapObjectTypeBracket(bounds->fields.back()); - // Helper function to check whether the final path component in 'queryPath' is an array index. - const auto lastFieldIsArrayIndex = [&multikeyPaths](const auto& queryPath) { - return (queryPath.numParts() > 1u && multikeyPaths.count(queryPath.numParts() - 2u) && - queryPath.isNumericPathComponentStrict(queryPath.numParts() - 1u)); - }; - - // If subpath bounds are needed, we build a range interval on all subpaths of the query path(s). - // We must therefore trim any trailing array indices from the query path before generating the - // fieldname-or-array power set, in order to avoid overlapping the final set of bounds. For - // instance, the untrimmed query path 'a.0' will produce paths 'a' and 'a.0' if 'a' is multikey, - // and so we would end up with bounds [['a','a'], ['a.','a/'], ['a.0','a.0'], ['a.0.','a.0/']]. - // The latter two are subsets of the ['a.', 'a/'] interval. - while (requiresSubpathBounds && lastFieldIsArrayIndex(queryPath)) { - queryPath.removeLastPart(); - } - // Account for fieldname-or-array-index semantics. $** indexes do not explicitly encode array // indices in their keys, so if this query traverses one or more multikey fields via an array // index (e.g. query 'a.0.b' where 'a' is an array), then we must generate bounds on all array- // and non-array permutations of the path in order to produce INEXACT_FETCH bounds. - auto paths = generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath); + auto paths = + generateFieldNameOrArrayIndexPathSet(multikeyPaths, queryPath, requiresSubpathBounds); // Add a $_path point-interval for each path that needs to be traversed in the index. If subpath // bounds are required, then we must add a further range interval on ["path.","path/"). diff --git a/src/mongo/db/query/query_planner_wildcard_index_test.cpp b/src/mongo/db/query/query_planner_wildcard_index_test.cpp index c3cdc174b29..b045dd27ad5 100644 --- a/src/mongo/db/query/query_planner_wildcard_index_test.cpp +++ b/src/mongo/db/query/query_planner_wildcard_index_test.cpp @@ -1519,7 +1519,8 @@ TEST_F(QueryPlannerWildcardTest, InitialNumericPathComponentIsAlwaysFieldName) { } TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQueries) { - addWildcardIndex(BSON("a.$**" << 1), {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4"}); + addWildcardIndex(BSON("a.$**" << 1), + {"a", "a.b", "a.b.2", "a.b.2.3", "a.b.2.3.4", "a.c.b", "a.c.b.1"}); runQuery(fromjson("{'a.0.b': {$exists: true}}")); assertNumSolutions(1U); @@ -1540,15 +1541,46 @@ TEST_F(QueryPlannerWildcardTest, ShouldGenerateSpecialBoundsForNullAndExistenceQ "['a.b.c','a.b.c',true,true], ['a.b.c.','a.b.c/',true,false]], 'a.0.b.1.c': [[{$minKey: " "1},{$maxKey: 1},true,true]]}}}}}"); - // Confirm that any trailing array index fields are trimmed before the fieldname-or-array-index - // pathset is generated, such that the subpath bounds do not overlap. - runQuery(fromjson("{'a.0.b.1': {$exists: true, $eq: null}}")); + // When an array index field exists in the query pattern and one of the resulting + // fieldname-or-array-index paths is a prefix of another, then the subpath bounds generated by + // the prefix path will contain all the bounds generated by its subpaths. Test that we avoid + // overlap by removing the redundant paths. + // + // In the below example, 'a' is multikey and the query is on 'a.0.0.0'. We generate paths + // 'a.0.0' and 'a.0.0.0' because the first '0' is an array index. But the subpaths bound + // generated by 'a.0.0' -> ['a.0.0.','a.0.0/'] would contain all the bounds generated by + // 'a.0.0.0'. Therefore we must remove path 'a.0.0.0' before generating the subpath bounds. + runQuery(fromjson("{'a.0.0.0': {$exists: true}}")); assertNumSolutions(1U); assertSolutionExists( - "{fetch: {filter: {'a.0.b.1': {$exists: true, $eq: null}}, node: {ixscan: {filter:null, " - "pattern:{'$_path': 1, 'a.0.b.1': 1}, bounds: {'$_path': [['a.0.b','a.0.b',true,true], " - "['a.0.b.','a.0.b/',true,false], ['a.b','a.b',true,true], ['a.b.','a.b/',true,false]], " - "'a.0.b.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + "{fetch: {filter: {'a.0.0.0': {$exists: true}}, node: {ixscan: {filter:null, " + "pattern:{'$_path': 1, 'a.0.0.0': 1}, bounds: {'$_path': [['a.0.0','a.0.0',true,true], " + "['a.0.0.','a.0.0/',true,false]], 'a.0.0.0': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + + // When there are multiple subpaths that are prefixes of a particular subpath, all the other + // subpaths need to be removed. In the below example, 'a.c.b', 'a.c.b.1', 'a.c.b.2' are possible + // subpaths, but since 'a.c.b' is a prefix of the rest, we need to ensure that 'a.c.b' is the + // only path that we consider. + runQuery(fromjson("{'a.c.b.1.2': {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.c.b.1.2': {$exists: true}}, node: {ixscan: {filter:null, " + "pattern:{'$_path': 1, 'a.c.b.1.2': 1}, bounds: {'$_path': [['a.c.b','a.c.b',true,true], " + "['a.c.b.','a.c.b/',true,false]], " + "'a.c.b.1.2': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + + // Similar to the previous case except one of the subpath is a 'string prefix' (when compared as + // strings) and not a prefix from the FieldRef point of view. In this case bounds for both the + // subpaths should be generated since they don't overlap. + runQuery(fromjson("{'a.c.b.11.1': {$exists: true}}")); + assertNumSolutions(1U); + assertSolutionExists( + "{fetch: {filter: {'a.c.b.11.1': {$exists: true}}, node: {ixscan: {filter:null, " + "pattern:{'$_path': 1, 'a.c.b.11.1': 1}, bounds: {'$_path': " + "[['a.c.b.1','a.c.b.1',true,true], ['a.c.b.1.','a.c.b.1/',true,false], " + "['a.c.b.11.1','a.c.b.11.1',true,true], ['a.c.b.11.1.','a.c.b.11.1/',true,false]], " + "'a.c.b.11.1': [[{$minKey: 1},{$maxKey: 1},true,true]]}}}}}"); + runQuery(fromjson("{'a.0.b.2.3.4': {$exists: true, $eq: null}}")); assertNumSolutions(1U);