mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
SERVER-39938: Pushdown $match on $expr before $lookup
This commit is contained in:
parent
c87f63d468
commit
7142cc2a3d
@ -37,8 +37,6 @@ assert.commandWorked(
|
||||
}
|
||||
|
||||
{
|
||||
// Note that a $expr cannot be moved. However, the optimizer can split the non-$expr part of the
|
||||
// $match and move that to be in front of the $sort.
|
||||
const pipeline =
|
||||
[{$sort: {b: 1}}, {$match: {$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}}];
|
||||
|
||||
@ -47,7 +45,9 @@ assert.commandWorked(
|
||||
const pipelineExplained = coll.explain().aggregate(pipeline);
|
||||
const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
|
||||
assert.neq(null, collScanStage, pipelineExplained);
|
||||
assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
|
||||
assert.eq({$and: [{a: {"$not": {"$eq": 2}}}, {$expr: {$ne: ["$a", "$b"]}}]},
|
||||
collScanStage.filter,
|
||||
collScanStage);
|
||||
}
|
||||
|
||||
{
|
||||
@ -62,6 +62,8 @@ assert.commandWorked(
|
||||
const pipelineExplained = coll.explain().aggregate(pipeline);
|
||||
const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
|
||||
assert.neq(null, collScanStage, pipelineExplained);
|
||||
assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
|
||||
assert.eq({$and: [{a: {"$not": {"$eq": 2}}}, {$expr: {$ne: ["$a", "$b"]}}]},
|
||||
collScanStage.filter,
|
||||
collScanStage);
|
||||
}
|
||||
}());
|
78
jstests/noPassthroughWithMongod/lookup_match_pushdown.js
Normal file
78
jstests/noPassthroughWithMongod/lookup_match_pushdown.js
Normal file
@ -0,0 +1,78 @@
|
||||
/**
|
||||
* Tests that the $match stage is pushed before $lookup stage.
|
||||
*/
|
||||
(function() {
|
||||
"use strict";
|
||||
|
||||
load("jstests/aggregation/extras/utils.js"); // For assertArrayEq.
|
||||
|
||||
const coll = db.lookup_match_pushdown;
|
||||
coll.drop();
|
||||
const other = db.lookup_match_pushdown_other;
|
||||
other.drop();
|
||||
|
||||
assert.commandWorked(
|
||||
db.adminCommand({"configureFailPoint": 'disablePipelineOptimization', "mode": 'off'}));
|
||||
|
||||
assert.commandWorked(coll.insertMany([{_id: 1, x: 5}, {_id: 2, x: 6}]));
|
||||
assert.commandWorked(
|
||||
other.insertMany([{_id: 2, y: 5, z: 10}, {_id: 3, y: 5, z: 12}, {_id: 4, y: 6, z: 10}]));
|
||||
|
||||
// Checks that the order of the pipeline stages matches the expected ordering.
|
||||
function checkPipelineAndResults(pipeline, expectedPipeline, expectedResults) {
|
||||
// Check pipeline is as expected.
|
||||
const explain = assert.commandWorked(coll.explain().aggregate(pipeline));
|
||||
if (expectedPipeline.length > 0) {
|
||||
assert.eq(
|
||||
explain.stages[0].$cursor.queryPlanner.winningPlan.stage, expectedPipeline[0], explain);
|
||||
}
|
||||
assert.eq(explain.stages.length, expectedPipeline.length, explain);
|
||||
for (let i = 1; i < expectedPipeline.length; i++) {
|
||||
assert.eq(Object.keys(explain.stages[i]), expectedPipeline[i], explain);
|
||||
}
|
||||
|
||||
// Check results are as expected.
|
||||
const res = coll.aggregate(pipeline).toArray();
|
||||
assertArrayEq({actual: res, expected: expectedResults});
|
||||
}
|
||||
|
||||
const expectedPipeline = ["COLLSCAN", "$lookup"];
|
||||
|
||||
// For $eq and $expr:$eq, we should see the same results (in this particular case).
|
||||
const expectedResultsEq = [{_id: 1, x: 5, a: {_id: 2, y: 5, z: 10}}];
|
||||
|
||||
// Ensure $match gets pushed down into $lookup when $eq is used.
|
||||
const pipelineEq = [
|
||||
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
|
||||
{$unwind: "$a"},
|
||||
{$match: {"a.z": 10, x: {$eq: 5}}}
|
||||
];
|
||||
checkPipelineAndResults(pipelineEq, expectedPipeline, expectedResultsEq);
|
||||
|
||||
// Ensure $match gets pushed down into $lookup when $expr:$eq is used.
|
||||
const pipelineExprEq = [
|
||||
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
|
||||
{$unwind: "$a"},
|
||||
{$match: {"a.z": 10, $expr: {$eq: ["$x", 5]}}}
|
||||
];
|
||||
checkPipelineAndResults(pipelineExprEq, expectedPipeline, expectedResultsEq);
|
||||
|
||||
// For $eq and $expr:$gt, we should see the same results (in this particular case).
|
||||
const expectedResultsGt = [{_id: 2, x: 6, a: {_id: 4, y: 6, z: 10}}];
|
||||
|
||||
// Ensure $match gets pushed down into $lookup when $eq is used.
|
||||
const pipelineGt = [
|
||||
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
|
||||
{$unwind: "$a"},
|
||||
{$match: {"a.z": 10, x: {$gt: 5}}}
|
||||
];
|
||||
checkPipelineAndResults(pipelineGt, expectedPipeline, expectedResultsGt);
|
||||
|
||||
// Ensure $match gets pushed down into $lookup when $expr:$eq is used.
|
||||
const pipelineExprGt = [
|
||||
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
|
||||
{$unwind: "$a"},
|
||||
{$match: {"a.z": 10, $expr: {$gt: ["$x", 5]}}}
|
||||
];
|
||||
checkPipelineAndResults(pipelineExprGt, expectedPipeline, expectedResultsGt);
|
||||
}());
|
@ -223,22 +223,6 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the leaf at 'path' is independent of 'fields'.
|
||||
*/
|
||||
bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) {
|
||||
// For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path'
|
||||
// is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of
|
||||
// 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'.
|
||||
for (StringData field : fields) {
|
||||
if (path == field || expression::isPathPrefixOf(path, field) ||
|
||||
expression::isPathPrefixOf(field, path)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}.
|
||||
*/
|
||||
@ -410,32 +394,49 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
|
||||
switch (expr.getCategory()) {
|
||||
case MatchExpression::MatchCategory::kLogical: {
|
||||
// Any logical expression is independent of 'pathSet' if all its children are
|
||||
// independent of 'pathSet'.
|
||||
for (size_t i = 0; i < expr.numChildren(); i++) {
|
||||
if (!isIndependentOf(*expr.getChild(i), pathSet)) {
|
||||
return false;
|
||||
}
|
||||
// Checks if 'expr' has any children which do not have renaming implemented.
|
||||
bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr) {
|
||||
if (expr.matchType() == MatchExpression::MatchType::EXPRESSION) {
|
||||
return true;
|
||||
} else if (expr.getCategory() == MatchExpression::MatchCategory::kArrayMatching ||
|
||||
expr.getCategory() == MatchExpression::MatchCategory::kOther) {
|
||||
return false;
|
||||
} else if (expr.getCategory() == MatchExpression::MatchCategory::kLogical) {
|
||||
for (size_t i = 0; i < expr.numChildren(); i++) {
|
||||
if (!hasOnlyRenameableMatchExpressionChildren(*expr.getChild(i))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case MatchExpression::MatchCategory::kLeaf: {
|
||||
return isLeafIndependentOf(expr.path(), pathSet);
|
||||
}
|
||||
// All other match expressions are never considered independent.
|
||||
case MatchExpression::MatchCategory::kArrayMatching:
|
||||
case MatchExpression::MatchCategory::kOther: {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
MONGO_UNREACHABLE;
|
||||
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
|
||||
// Any expression types that do not have renaming implemented cannot have their independence
|
||||
// evaluated here. See applyRenamesToExpression().
|
||||
if (!hasOnlyRenameableMatchExpressionChildren(expr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto depsTracker = DepsTracker{};
|
||||
expr.addDependencies(&depsTracker);
|
||||
return std::none_of(
|
||||
depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) {
|
||||
return pathSet.find(field) != pathSet.end() ||
|
||||
std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) {
|
||||
return expression::isPathPrefixOf(field, path) ||
|
||||
expression::isPathPrefixOf(path, field);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) {
|
||||
// Any expression types that do not have renaming implemented cannot have their independence
|
||||
// evaluated here. See applyRenamesToExpression().
|
||||
if (!hasOnlyRenameableMatchExpressionChildren(expr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto depsTracker = DepsTracker{};
|
||||
expr.addDependencies(&depsTracker);
|
||||
return std::all_of(
|
||||
|
@ -811,6 +811,32 @@ TEST(IsIndependent, BallIsIndependentOfBalloon) {
|
||||
ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"}));
|
||||
}
|
||||
|
||||
// This is a descriptive test to ensure that until renames are implemented for these expressions,
|
||||
// matches on these expressions cannot be swapped with other stages.
|
||||
TEST(IsIndependent, NonRenameableExpressionIsNotIndependent) {
|
||||
std::vector<std::string> stringExpressions = {
|
||||
// Category: kOther.
|
||||
"{$or: [{a: {$size: 3}}, {b: {$size: 4}}]}",
|
||||
// Category: kArrayMatching.
|
||||
"{$or: [{a: {$_internalSchemaMaxItems: 3}}, {b: {$_internalSchemaMaxItems: 4}}]}",
|
||||
"{$or: [{a: {$_internalSchemaMinItems: 3}}, {b: {$_internalSchemaMinItems: 4}}]}",
|
||||
"{$or: [{a: {$_internalSchemaObjectMatch: {b: 1}}},"
|
||||
" {a: {$_internalSchemaObjectMatch: {b: 2}}}]}",
|
||||
"{$or: [{a: {$elemMatch: {b: 3}}}, {a: {$elemMatch: {b: 4}}}]}"};
|
||||
|
||||
for (auto str : stringExpressions) {
|
||||
BSONObj matchPredicate = fromjson(str);
|
||||
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
|
||||
auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
|
||||
ASSERT_OK(swMatchExpression.getStatus());
|
||||
auto matchExpression = std::move(swMatchExpression.getValue());
|
||||
|
||||
// Both of these should be true once renames are implemented.
|
||||
ASSERT_FALSE(expression::isIndependentOf(*matchExpression.get(), {"c"}));
|
||||
ASSERT_FALSE(expression::isOnlyDependentOn(*matchExpression.get(), {"a", "b"}));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
|
||||
BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
|
||||
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
|
||||
@ -1149,6 +1175,50 @@ TEST(SplitMatchExpression, ShouldNotMoveMaxItemsAcrossRename) {
|
||||
ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$_internalSchemaMaxItems: 3}}"));
|
||||
}
|
||||
|
||||
TEST(SplitMatchExpression, ShouldNotMoveMaxItemsInLogicalExpressionAcrossRename) {
|
||||
BSONObj matchPredicate = fromjson(
|
||||
"{$or: [{a: {$_internalSchemaMaxItems: 3}},"
|
||||
" {a: {$_internalSchemaMaxItems: 4}}]}");
|
||||
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
|
||||
auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
|
||||
ASSERT_OK(matcher.getStatus());
|
||||
|
||||
StringMap<std::string> renames{{"a", "c"}};
|
||||
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
|
||||
expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
|
||||
|
||||
ASSERT_FALSE(splitExpr.first.get());
|
||||
|
||||
ASSERT_TRUE(splitExpr.second.get());
|
||||
BSONObjBuilder secondBob;
|
||||
splitExpr.second->serialize(&secondBob, true);
|
||||
ASSERT_BSONOBJ_EQ(secondBob.obj(),
|
||||
fromjson("{$or: [{a: {$_internalSchemaMaxItems: 3}},"
|
||||
" {a: {$_internalSchemaMaxItems: 4}}]}"));
|
||||
}
|
||||
|
||||
TEST(SplitMatchExpression, ShouldNotMoveInternalSchemaObjectMatchInLogicalExpressionAcrossRename) {
|
||||
BSONObj matchPredicate = fromjson(
|
||||
"{$or: [{a: {$_internalSchemaObjectMatch: {b: 1}}},"
|
||||
" {a: {$_internalSchemaObjectMatch: {b: 1}}}]}");
|
||||
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
|
||||
auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
|
||||
ASSERT_OK(matcher.getStatus());
|
||||
|
||||
StringMap<std::string> renames{{"a", "c"}};
|
||||
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
|
||||
expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
|
||||
|
||||
ASSERT_FALSE(splitExpr.first.get());
|
||||
|
||||
ASSERT_TRUE(splitExpr.second.get());
|
||||
BSONObjBuilder secondBob;
|
||||
splitExpr.second->serialize(&secondBob, true);
|
||||
ASSERT_BSONOBJ_EQ(secondBob.obj(),
|
||||
fromjson("{$or: [{a: {$_internalSchemaObjectMatch: {b: {$eq: 1}}}},"
|
||||
" {a: {$_internalSchemaObjectMatch: {b: {$eq: 1}}}}]}"));
|
||||
}
|
||||
|
||||
TEST(SplitMatchExpression, ShouldMoveMinLengthAcrossRename) {
|
||||
BSONObj matchPredicate = fromjson("{a: {$_internalSchemaMinLength: 3}}");
|
||||
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
|
||||
|
@ -1009,12 +1009,18 @@ TEST_F(DocumentSourceLookUpTest,
|
||||
auto subPipeline = lookupStage->getSubPipeline_forTest(DOC("_id" << 5));
|
||||
ASSERT(subPipeline);
|
||||
|
||||
auto expectedPipe = fromjson(
|
||||
str::stream() << "[{mock: {}}, {$match: {x:{$eq: 1}}}, {$sort: {sortKey: {x: 1}}}, "
|
||||
<< sequentialCacheStageObj()
|
||||
<< ", {$facet: {facetPipe: [{$teeConsumer: {}},{$group: {_id: '$_id'}}, "
|
||||
"{$match: {$and: [{_id: {$_internalExprEq: 5}}, {$expr: {$eq: "
|
||||
"['$_id', {$const: 5}]}}]}}]}}]");
|
||||
// Note that the second $match stage should be moved up to before the $group stage, since $group
|
||||
// should swap with $match when filtering on $_id.
|
||||
auto expectedPipe =
|
||||
fromjson(str::stream() << "[{mock: {}},"
|
||||
" {$match: {x:{$eq: 1}}},"
|
||||
" {$sort: {sortKey: {x: 1}}},"
|
||||
<< sequentialCacheStageObj()
|
||||
<< ",{$facet: {facetPipe: ["
|
||||
" {$teeConsumer: {}},"
|
||||
" {$match: {$and: [{_id: {$_internalExprEq: 5}},"
|
||||
" {$expr: {$eq: ['$_id', {$const: 5}]}}]}},"
|
||||
" {$group: {_id: '$_id'}}]}}]");
|
||||
|
||||
ASSERT_VALUE_EQ(Value(subPipeline->writeExplainOps(kExplain)), Value(BSONArray(expectedPipe)));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user