0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-39938: Pushdown $match on $expr before $lookup

This commit is contained in:
Alya Berciu 2021-03-18 15:42:16 +00:00 committed by Evergreen Agent
parent c87f63d468
commit 7142cc2a3d
5 changed files with 202 additions and 45 deletions

View File

@ -37,8 +37,6 @@ assert.commandWorked(
}
{
// Note that a $expr cannot be moved. However, the optimizer can split the non-$expr part of the
// $match and move that to be in front of the $sort.
const pipeline =
[{$sort: {b: 1}}, {$match: {$and: [{a: {$ne: 2}}, {$expr: {$ne: ["$a", "$b"]}}]}}];
@ -47,7 +45,9 @@ assert.commandWorked(
const pipelineExplained = coll.explain().aggregate(pipeline);
const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
assert.neq(null, collScanStage, pipelineExplained);
assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
assert.eq({$and: [{a: {"$not": {"$eq": 2}}}, {$expr: {$ne: ["$a", "$b"]}}]},
collScanStage.filter,
collScanStage);
}
{
@ -62,6 +62,8 @@ assert.commandWorked(
const pipelineExplained = coll.explain().aggregate(pipeline);
const collScanStage = getAggPlanStage(pipelineExplained, "COLLSCAN");
assert.neq(null, collScanStage, pipelineExplained);
assert.eq({a: {"$not": {"$eq": 2}}}, collScanStage.filter, collScanStage);
assert.eq({$and: [{a: {"$not": {"$eq": 2}}}, {$expr: {$ne: ["$a", "$b"]}}]},
collScanStage.filter,
collScanStage);
}
}());

View File

@ -0,0 +1,78 @@
/**
* Tests that the $match stage is pushed before $lookup stage.
*/
(function() {
"use strict";
load("jstests/aggregation/extras/utils.js"); // For assertArrayEq.
const coll = db.lookup_match_pushdown;
coll.drop();
const other = db.lookup_match_pushdown_other;
other.drop();
assert.commandWorked(
db.adminCommand({"configureFailPoint": 'disablePipelineOptimization', "mode": 'off'}));
assert.commandWorked(coll.insertMany([{_id: 1, x: 5}, {_id: 2, x: 6}]));
assert.commandWorked(
other.insertMany([{_id: 2, y: 5, z: 10}, {_id: 3, y: 5, z: 12}, {_id: 4, y: 6, z: 10}]));
// Checks that the order of the pipeline stages matches the expected ordering.
function checkPipelineAndResults(pipeline, expectedPipeline, expectedResults) {
// Check pipeline is as expected.
const explain = assert.commandWorked(coll.explain().aggregate(pipeline));
if (expectedPipeline.length > 0) {
assert.eq(
explain.stages[0].$cursor.queryPlanner.winningPlan.stage, expectedPipeline[0], explain);
}
assert.eq(explain.stages.length, expectedPipeline.length, explain);
for (let i = 1; i < expectedPipeline.length; i++) {
assert.eq(Object.keys(explain.stages[i]), expectedPipeline[i], explain);
}
// Check results are as expected.
const res = coll.aggregate(pipeline).toArray();
assertArrayEq({actual: res, expected: expectedResults});
}
const expectedPipeline = ["COLLSCAN", "$lookup"];
// For $eq and $expr:$eq, we should see the same results (in this particular case).
const expectedResultsEq = [{_id: 1, x: 5, a: {_id: 2, y: 5, z: 10}}];
// Ensure $match gets pushed down into $lookup when $eq is used.
const pipelineEq = [
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
{$unwind: "$a"},
{$match: {"a.z": 10, x: {$eq: 5}}}
];
checkPipelineAndResults(pipelineEq, expectedPipeline, expectedResultsEq);
// Ensure $match gets pushed down into $lookup when $expr:$eq is used.
const pipelineExprEq = [
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
{$unwind: "$a"},
{$match: {"a.z": 10, $expr: {$eq: ["$x", 5]}}}
];
checkPipelineAndResults(pipelineExprEq, expectedPipeline, expectedResultsEq);
// For $eq and $expr:$gt, we should see the same results (in this particular case).
const expectedResultsGt = [{_id: 2, x: 6, a: {_id: 4, y: 6, z: 10}}];
// Ensure $match gets pushed down into $lookup when $eq is used.
const pipelineGt = [
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
{$unwind: "$a"},
{$match: {"a.z": 10, x: {$gt: 5}}}
];
checkPipelineAndResults(pipelineGt, expectedPipeline, expectedResultsGt);
// Ensure $match gets pushed down into $lookup when $expr:$eq is used.
const pipelineExprGt = [
{$lookup: {as: "a", from: other.getName(), localField: "x", foreignField: "y"}},
{$unwind: "$a"},
{$match: {"a.z": 10, $expr: {$gt: ["$x", 5]}}}
];
checkPipelineAndResults(pipelineExprGt, expectedPipeline, expectedResultsGt);
}());

View File

@ -223,22 +223,6 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) {
}
}
/**
* Returns whether the leaf at 'path' is independent of 'fields'.
*/
bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) {
// For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path'
// is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of
// 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'.
for (StringData field : fields) {
if (path == field || expression::isPathPrefixOf(path, field) ||
expression::isPathPrefixOf(field, path)) {
return false;
}
}
return true;
}
/**
* Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}.
*/
@ -410,32 +394,49 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
return false;
}
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
switch (expr.getCategory()) {
case MatchExpression::MatchCategory::kLogical: {
// Any logical expression is independent of 'pathSet' if all its children are
// independent of 'pathSet'.
for (size_t i = 0; i < expr.numChildren(); i++) {
if (!isIndependentOf(*expr.getChild(i), pathSet)) {
return false;
}
// Checks if 'expr' has any children which do not have renaming implemented.
bool hasOnlyRenameableMatchExpressionChildren(const MatchExpression& expr) {
if (expr.matchType() == MatchExpression::MatchType::EXPRESSION) {
return true;
} else if (expr.getCategory() == MatchExpression::MatchCategory::kArrayMatching ||
expr.getCategory() == MatchExpression::MatchCategory::kOther) {
return false;
} else if (expr.getCategory() == MatchExpression::MatchCategory::kLogical) {
for (size_t i = 0; i < expr.numChildren(); i++) {
if (!hasOnlyRenameableMatchExpressionChildren(*expr.getChild(i))) {
return false;
}
return true;
}
case MatchExpression::MatchCategory::kLeaf: {
return isLeafIndependentOf(expr.path(), pathSet);
}
// All other match expressions are never considered independent.
case MatchExpression::MatchCategory::kArrayMatching:
case MatchExpression::MatchCategory::kOther: {
return false;
}
}
return true;
}
MONGO_UNREACHABLE;
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
// Any expression types that do not have renaming implemented cannot have their independence
// evaluated here. See applyRenamesToExpression().
if (!hasOnlyRenameableMatchExpressionChildren(expr)) {
return false;
}
auto depsTracker = DepsTracker{};
expr.addDependencies(&depsTracker);
return std::none_of(
depsTracker.fields.begin(), depsTracker.fields.end(), [&pathSet](auto&& field) {
return pathSet.find(field) != pathSet.end() ||
std::any_of(pathSet.begin(), pathSet.end(), [&field](auto&& path) {
return expression::isPathPrefixOf(field, path) ||
expression::isPathPrefixOf(path, field);
});
});
}
bool isOnlyDependentOn(const MatchExpression& expr, const std::set<std::string>& roots) {
// Any expression types that do not have renaming implemented cannot have their independence
// evaluated here. See applyRenamesToExpression().
if (!hasOnlyRenameableMatchExpressionChildren(expr)) {
return false;
}
auto depsTracker = DepsTracker{};
expr.addDependencies(&depsTracker);
return std::all_of(

View File

@ -811,6 +811,32 @@ TEST(IsIndependent, BallIsIndependentOfBalloon) {
ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"}));
}
// This is a descriptive test to ensure that until renames are implemented for these expressions,
// matches on these expressions cannot be swapped with other stages.
TEST(IsIndependent, NonRenameableExpressionIsNotIndependent) {
std::vector<std::string> stringExpressions = {
// Category: kOther.
"{$or: [{a: {$size: 3}}, {b: {$size: 4}}]}",
// Category: kArrayMatching.
"{$or: [{a: {$_internalSchemaMaxItems: 3}}, {b: {$_internalSchemaMaxItems: 4}}]}",
"{$or: [{a: {$_internalSchemaMinItems: 3}}, {b: {$_internalSchemaMinItems: 4}}]}",
"{$or: [{a: {$_internalSchemaObjectMatch: {b: 1}}},"
" {a: {$_internalSchemaObjectMatch: {b: 2}}}]}",
"{$or: [{a: {$elemMatch: {b: 3}}}, {a: {$elemMatch: {b: 4}}}]}"};
for (auto str : stringExpressions) {
BSONObj matchPredicate = fromjson(str);
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
ASSERT_OK(swMatchExpression.getStatus());
auto matchExpression = std::move(swMatchExpression.getValue());
// Both of these should be true once renames are implemented.
ASSERT_FALSE(expression::isIndependentOf(*matchExpression.get(), {"c"}));
ASSERT_FALSE(expression::isOnlyDependentOn(*matchExpression.get(), {"a", "b"}));
}
}
TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
@ -1149,6 +1175,50 @@ TEST(SplitMatchExpression, ShouldNotMoveMaxItemsAcrossRename) {
ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$_internalSchemaMaxItems: 3}}"));
}
TEST(SplitMatchExpression, ShouldNotMoveMaxItemsInLogicalExpressionAcrossRename) {
BSONObj matchPredicate = fromjson(
"{$or: [{a: {$_internalSchemaMaxItems: 3}},"
" {a: {$_internalSchemaMaxItems: 4}}]}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
ASSERT_OK(matcher.getStatus());
StringMap<std::string> renames{{"a", "c"}};
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
ASSERT_FALSE(splitExpr.first.get());
ASSERT_TRUE(splitExpr.second.get());
BSONObjBuilder secondBob;
splitExpr.second->serialize(&secondBob, true);
ASSERT_BSONOBJ_EQ(secondBob.obj(),
fromjson("{$or: [{a: {$_internalSchemaMaxItems: 3}},"
" {a: {$_internalSchemaMaxItems: 4}}]}"));
}
TEST(SplitMatchExpression, ShouldNotMoveInternalSchemaObjectMatchInLogicalExpressionAcrossRename) {
BSONObj matchPredicate = fromjson(
"{$or: [{a: {$_internalSchemaObjectMatch: {b: 1}}},"
" {a: {$_internalSchemaObjectMatch: {b: 1}}}]}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
auto matcher = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
ASSERT_OK(matcher.getStatus());
StringMap<std::string> renames{{"a", "c"}};
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
ASSERT_FALSE(splitExpr.first.get());
ASSERT_TRUE(splitExpr.second.get());
BSONObjBuilder secondBob;
splitExpr.second->serialize(&secondBob, true);
ASSERT_BSONOBJ_EQ(secondBob.obj(),
fromjson("{$or: [{a: {$_internalSchemaObjectMatch: {b: {$eq: 1}}}},"
" {a: {$_internalSchemaObjectMatch: {b: {$eq: 1}}}}]}"));
}
TEST(SplitMatchExpression, ShouldMoveMinLengthAcrossRename) {
BSONObj matchPredicate = fromjson("{a: {$_internalSchemaMinLength: 3}}");
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());

View File

@ -1009,12 +1009,18 @@ TEST_F(DocumentSourceLookUpTest,
auto subPipeline = lookupStage->getSubPipeline_forTest(DOC("_id" << 5));
ASSERT(subPipeline);
auto expectedPipe = fromjson(
str::stream() << "[{mock: {}}, {$match: {x:{$eq: 1}}}, {$sort: {sortKey: {x: 1}}}, "
<< sequentialCacheStageObj()
<< ", {$facet: {facetPipe: [{$teeConsumer: {}},{$group: {_id: '$_id'}}, "
"{$match: {$and: [{_id: {$_internalExprEq: 5}}, {$expr: {$eq: "
"['$_id', {$const: 5}]}}]}}]}}]");
// Note that the second $match stage should be moved up to before the $group stage, since $group
// should swap with $match when filtering on $_id.
auto expectedPipe =
fromjson(str::stream() << "[{mock: {}},"
" {$match: {x:{$eq: 1}}},"
" {$sort: {sortKey: {x: 1}}},"
<< sequentialCacheStageObj()
<< ",{$facet: {facetPipe: ["
" {$teeConsumer: {}},"
" {$match: {$and: [{_id: {$_internalExprEq: 5}},"
" {$expr: {$eq: ['$_id', {$const: 5}]}}]}},"
" {$group: {_id: '$_id'}}]}}]");
ASSERT_VALUE_EQ(Value(subPipeline->writeExplainOps(kExplain)), Value(BSONArray(expectedPipe)));
}