0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00

SERVER-54480 Fix index key rehydration in SBE

This commit is contained in:
Ian Boros 2021-02-12 11:38:33 -05:00 committed by Evergreen Agent
parent abb82fc8a4
commit 7634943d1a
7 changed files with 233 additions and 88 deletions

View File

@ -3,7 +3,6 @@
// @tags: [
// do_not_wrap_aggregations_in_facets,
// requires_pipeline_optimization,
// sbe_incompatible,
// ]
(function() {
"use strict";

View File

@ -6,7 +6,6 @@
* @tags: [
* assumes_unsharded_collection,
* requires_find_command,
* sbe_incompatible,
* ]
*/
(function() {

View File

@ -2,7 +2,6 @@
// shard key since the document needs to be fetched in order to apply the SHARDING_FILTER stage.
// @tags: [
// assumes_unsharded_collection,
// sbe_incompatible,
// ]
/**
@ -14,6 +13,11 @@
load("jstests/libs/analyze_plan.js");
const isSBEEnabled = (() => {
const getParam = db.adminCommand({getParameter: 1, featureFlagSBE: 1});
return getParam.hasOwnProperty("featureFlagSBE") && getParam.featureFlagSBE.value;
})();
let coll = db["projection_dotted_paths"];
coll.drop();
assert.commandWorked(coll.createIndex({a: 1, "b.c": 1, "b.d": 1, c: 1}));
@ -24,8 +28,8 @@ assert.commandWorked(coll.insert({_id: 1, a: 1, b: {c: 1, d: 1, e: 1}, c: 1, e:
let resultDoc = coll.findOne({a: 1}, {_id: 0, a: 1, "b.c": 1, "b.d": 1, c: 1});
assert.eq(resultDoc, {a: 1, b: {c: 1, d: 1}, c: 1});
let explain = coll.find({a: 1}, {_id: 0, a: 1, "b.c": 1, "b.d": 1, c: 1}).explain("queryPlanner");
assert(isIxscan(db, explain.queryPlanner.winningPlan));
assert(isIndexOnly(db, explain.queryPlanner.winningPlan));
assert(isIxscan(db, explain.queryPlanner.winningPlan), explain);
assert(isIndexOnly(db, explain.queryPlanner.winningPlan), explain);
// Project a subset of the indexed fields. Verify that the projection is computed correctly and
// that the plan is covered.
@ -63,7 +67,11 @@ assert(!isIndexOnly(db, explain.queryPlanner.winningPlan));
resultDoc = coll.findOne({_id: 1}, {_id: 0, "b.c": 1, "b.e": 1, c: 1});
assert.eq(resultDoc, {b: {c: 1, e: 1}, c: 1});
explain = coll.find({_id: 1}, {_id: 0, "b.c": 1, "b.e": 1, c: 1}).explain("queryPlanner");
assert(isIdhack(db, explain.queryPlanner.winningPlan));
if (isSBEEnabled) {
assert(isIxscan(db, explain.queryPlanner.winningPlan), explain);
} else {
assert(isIdhack(db, explain.queryPlanner.winningPlan), explain);
}
// If we make a dotted path multikey, projections using that path cannot be covered. But
// projections which do not include the multikey path can still be covered.
@ -95,4 +103,27 @@ assert(isIndexOnly(db, explain.queryPlanner.winningPlan));
// with nulls. This is a bug tracked by SERVER-23229.
resultDoc = coll.findOne({a: 1}, {_id: 0, "x.y.y": 1, "x.y.z": 1, "x.z": 1});
assert.eq(resultDoc, {x: {y: {y: null, z: null}, z: null}});
// Test covered plans where an index contains overlapping dotted paths.
{
assert.commandWorked(coll.createIndex({"a.b.c": 1, "a.b": 1}));
assert.commandWorked(coll.insert({a: {b: {c: 1, d: 1}}}));
explain = coll.find({"a.b.c": 1}, {_id: 0, "a.b": 1}).explain();
assert(isIxscan(db, explain.queryPlanner.winningPlan));
assert(isIndexOnly(db, explain.queryPlanner.winningPlan));
assert.eq(coll.findOne({"a.b.c": 1}, {_id: 0, "a.b": 1}), {a: {b: {c: 1, d: 1}}});
}
{
assert.commandWorked(coll.dropIndexes());
assert.commandWorked(coll.createIndex({"a.b": 1, "a.b.c": 1}));
assert.commandWorked(coll.insert({a: {b: {c: 1, d: 1}}}));
const filter = {"a.b": {c: 1, d: 1}};
explain = coll.find(filter, {_id: 0, "a.b": 1}).explain();
assert(isIxscan(db, explain.queryPlanner.winningPlan));
assert(isIndexOnly(db, explain.queryPlanner.winningPlan));
assert.eq(coll.findOne(filter, {_id: 0, "a.b": 1}), {a: {b: {c: 1, d: 1}}});
}
}());

View File

@ -51,6 +51,11 @@ function getPlanStages(root, stage) {
results = results.concat(getPlanStages(getWinningPlan(root.queryPlanner), stage));
}
// This field is used in SBE explain output.
if ("queryPlan" in root) {
results = results.concat(getPlanStages(root.queryPlan, stage));
}
if ("thenStage" in root) {
results = results.concat(getPlanStages(root.thenStage, stage));
}

View File

@ -61,6 +61,128 @@
#include "mongo/db/s/collection_sharding_state.h"
namespace mongo::stage_builder {
namespace {
/**
* Tree representation of an index key pattern.
*
* For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like:
*
* <root>
* / |
* a x
* / \
* b c
*
* This tree is used for building SBE subtrees to re-hydrate index keys.
*/
struct IndexKeyPatternTreeNode {
IndexKeyPatternTreeNode* emplace(StringData fieldComponent) {
auto newNode = std::make_unique<IndexKeyPatternTreeNode>();
const auto newNodeRaw = newNode.get();
children.emplace(fieldComponent, std::move(newNode));
childrenOrder.push_back(fieldComponent.toString());
return newNodeRaw;
}
StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children;
std::vector<std::string> childrenOrder;
// Which slot the index key for this component is stored in. May be boost::none for non-leaf
// nodes.
boost::optional<sbe::value::SlotId> indexKeySlot;
};
/**
* Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode
* representing the mapping between key pattern component and slot.
*
* Note that this will "short circuit" in cases where the index key pattern contains two components
* where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b"
* component will not be represented in the output tree. For the purpose of rehydrating index keys,
* this is fine (and actually preferable).
*/
std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
const sbe::value::SlotVector& slots) {
size_t i = 0;
auto root = std::make_unique<IndexKeyPatternTreeNode>();
for (auto&& elem : keyPattern) {
auto* node = root.get();
bool skipElem = false;
FieldRef fr(elem.fieldNameStringData());
for (FieldIndex j = 0; j < fr.numParts(); ++j) {
const auto part = fr.getPart(j);
if (auto it = node->children.find(part); it != node->children.end()) {
node = it->second.get();
if (node->indexKeySlot) {
// We're processing the a sub-path of a path that's already indexed. We can
// bail out here since we won't use the sub-path when reconstructing the
// object.
skipElem = true;
break;
}
} else {
node = node->emplace(part);
}
}
if (!skipElem) {
node->indexKeySlot = slots[i];
}
++i;
}
return root;
}
/**
* Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for
* producing a partial object from an index key.
*
* For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key
* {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}.
*/
std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) {
std::vector<std::unique_ptr<sbe::EExpression>> args;
for (auto&& fieldName : kpTree->childrenOrder) {
auto it = kpTree->children.find(fieldName);
args.emplace_back(makeConstant(fieldName));
if (it->second->indexKeySlot) {
args.emplace_back(makeVariable(*it->second->indexKeySlot));
} else {
// The reason this is in an else branch is that in the case where we have an index key
// like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a'
// portion, so the 'a.b' subtree can be skipped.
args.push_back(buildNewObjExpr(it->second.get()));
}
}
return sbe::makeE<sbe::EFunction>("newObj", std::move(args));
}
/**
* Given a stage, and index key pattern a corresponding array of slot IDs, this function
* add a ProjectStage to the tree which rehydrates the index key and stores the result in
* 'resultSlot.'
*/
std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
const BSONObj& indexKeyPattern,
PlanNodeId nodeId,
const sbe::value::SlotVector& indexKeySlots,
sbe::value::SlotId resultSlot) {
auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots);
auto keyExpr = buildNewObjExpr(kpTree.get());
return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr));
}
} // namespace
std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment(
const CanonicalQuery& cq,
OperationContext* opCtx,
@ -310,16 +432,70 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
// Index scans cannot produce an oplogTsSlot, so assert that the caller doesn't need it.
invariant(!reqs.has(kOplogTs));
return generateIndexScan(_opCtx,
_collection,
ixn,
reqs,
&_slotIdGenerator,
&_frameIdGenerator,
&_spoolIdGenerator,
_yieldPolicy,
_data.env,
_lockAcquisitionCallback);
sbe::IndexKeysInclusionSet indexKeyBitset;
if (reqs.has(PlanStageSlots::kReturnKey) || reqs.has(PlanStageSlots::kResult)) {
// If either 'reqs.result' or 'reqs.returnKey' is true, we need to get all parts of the
// index key (regardless of what was requested by 'reqs.indexKeyBitset') so that we can
// create the inflated index key (keyExpr).
for (int i = 0; i < ixn->index.keyPattern.nFields(); ++i) {
indexKeyBitset.set(i);
}
} else if (reqs.getIndexKeyBitset()) {
indexKeyBitset = *reqs.getIndexKeyBitset();
}
auto [stage, outputs] = generateIndexScan(_opCtx,
_collection,
ixn,
indexKeyBitset,
&_slotIdGenerator,
&_frameIdGenerator,
&_spoolIdGenerator,
_yieldPolicy,
_data.env,
_lockAcquisitionCallback);
if (reqs.has(PlanStageSlots::kReturnKey)) {
std::vector<std::unique_ptr<sbe::EExpression>> mkObjArgs;
size_t i = 0;
for (auto&& elem : ixn->index.keyPattern) {
auto fieldName = elem.fieldNameStringData();
mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>(
std::string_view{fieldName.rawData(), fieldName.size()}));
mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>((*outputs.getIndexKeySlots())[i++]));
}
auto rawKeyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs));
outputs.set(PlanStageSlots::kReturnKey, _slotIdGenerator.generate());
stage = sbe::makeProjectStage(std::move(stage),
ixn->nodeId(),
outputs.get(PlanStageSlots::kReturnKey),
std::move(rawKeyExpr));
}
if (reqs.has(PlanStageSlots::kResult)) {
outputs.set(PlanStageSlots::kResult, _slotIdGenerator.generate());
stage = rehydrateIndexKey(std::move(stage),
ixn->index.keyPattern,
ixn->nodeId(),
*outputs.getIndexKeySlots(),
outputs.get(PlanStageSlots::kResult));
}
if (reqs.getIndexKeyBitset()) {
outputs.setIndexKeySlots(
makeIndexKeyOutputSlotsMatchingParentReqs(ixn->index.keyPattern,
*reqs.getIndexKeyBitset(),
indexKeyBitset,
*outputs.getIndexKeySlots()));
} else {
outputs.setIndexKeySlots(boost::none);
}
return {std::move(stage), std::move(outputs)};
}
std::tuple<sbe::value::SlotId, sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>>

View File

@ -56,6 +56,7 @@
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/logv2/log.h"
#include "mongo/util/str.h"
#include "mongo/util/visit_helper.h"
namespace mongo::stage_builder {
namespace {
@ -679,7 +680,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan(
OperationContext* opCtx,
const CollectionPtr& collection,
const IndexScanNode* ixn,
PlanStageReqs reqs,
const sbe::IndexKeysInclusionSet& originalIndexKeyBitset,
sbe::value::SlotIdGenerator* slotIdGenerator,
sbe::value::SlotIdGenerator* frameIdGenerator,
sbe::value::SpoolIdGenerator* spoolIdGenerator,
@ -697,18 +698,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan(
accessMethod->getSortedDataInterface()->getOrdering());
std::unique_ptr<sbe::PlanStage> stage;
sbe::value::SlotVector indexKeySlots;
sbe::IndexKeysInclusionSet indexKeyBitset;
std::unique_ptr<sbe::EExpression> keyExpr;
PlanStageSlots outputs;
// Save the bit vector describing the fields from the index that our caller requires. If an
// index filter is defined, we may require additional index fields which are not needed by the
// parent stage. We will need the parent's reqs later on so that we can hand the correct slot
// vector for these fields back to our parent.
auto parentIndexKeyBitset = reqs.getIndexKeyBitset();
// Determine the set of fields from the index required to apply the filter and union those with
// the set of fields from the index required by the parent stage.
auto [indexFilterKeyBitset, indexFilterKeyFields] = [&]() {
@ -719,35 +710,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan(
}
return std::make_pair(sbe::IndexKeysInclusionSet{}, std::vector<std::string>{});
}();
reqs.getIndexKeyBitset() =
parentIndexKeyBitset.value_or(sbe::IndexKeysInclusionSet{}) | indexFilterKeyBitset;
if (reqs.has(PlanStageSlots::kResult) || reqs.has(PlanStageSlots::kReturnKey)) {
// If either 'reqs.result' or 'reqs.returnKey' is true, we need to get all parts of the
// index key (regardless of what was requested by 'reqs.indexKeyBitset') so that we can
// create the inflated index key (keyExpr).
std::vector<std::unique_ptr<sbe::EExpression>> mkObjArgs;
size_t keyIndex = 0;
for (auto&& elem : ixn->index.keyPattern) {
auto fieldName = elem.fieldNameStringData();
auto slot = slotIdGenerator->generate();
mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>(
std::string_view{fieldName.rawData(), fieldName.size()}));
mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>(slot));
indexKeySlots.emplace_back(slot);
indexKeyBitset.set(keyIndex++);
}
keyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs));
} else if (reqs.getIndexKeyBitset()) {
// If both 'reqs.result' and 'reqs.returnKey' are false, we should only get the
// parts of the index key that were requested by 'reqs.indexKeyBitset'.
indexKeySlots = slotIdGenerator->generateMultiple(reqs.getIndexKeyBitset()->count());
indexKeyBitset = *reqs.getIndexKeyBitset();
}
auto indexKeyBitset = originalIndexKeyBitset | indexFilterKeyBitset;
auto indexKeySlots = slotIdGenerator->generateMultiple(indexKeyBitset.count());
if (intervals.size() == 1) {
// If we have just a single interval, we can construct a simplified sub-tree.
@ -826,37 +790,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan(
ixn->nodeId());
}
if (reqs.has(PlanStageSlots::kResult) || reqs.has(PlanStageSlots::kReturnKey)) {
if (reqs.has(PlanStageSlots::kResult)) {
outputs.set(PlanStageSlots::kResult, slotIdGenerator->generate());
stage = sbe::makeProjectStage(std::move(stage),
ixn->nodeId(),
outputs.get(PlanStageSlots::kResult),
std::move(keyExpr));
if (reqs.has(PlanStageSlots::kReturnKey)) {
outputs.set(PlanStageSlots::kReturnKey, slotIdGenerator->generate());
stage = sbe::makeProjectStage(
std::move(stage),
ixn->nodeId(),
outputs.get(PlanStageSlots::kReturnKey),
sbe::makeE<sbe::EVariable>(outputs.get(PlanStageSlots::kResult)));
}
} else {
outputs.set(PlanStageSlots::kReturnKey, slotIdGenerator->generate());
stage = sbe::makeProjectStage(std::move(stage),
ixn->nodeId(),
outputs.get(PlanStageSlots::kReturnKey),
std::move(keyExpr));
}
}
// We only need to return the slots which were explicitly requested by our parent stage.
outputs.setIndexKeySlots(
!parentIndexKeyBitset
? boost::none
: boost::optional<sbe::value::SlotVector>{makeIndexKeyOutputSlotsMatchingParentReqs(
ixn->index.keyPattern, *parentIndexKeyBitset, indexKeyBitset, indexKeySlots)});
outputs.setIndexKeySlots(makeIndexKeyOutputSlotsMatchingParentReqs(
ixn->index.keyPattern, originalIndexKeyBitset, indexKeyBitset, indexKeySlots));
return {std::move(stage), std::move(outputs)};
}

View File

@ -54,7 +54,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan(
OperationContext* opCtx,
const CollectionPtr& collection,
const IndexScanNode* ixn,
PlanStageReqs reqs,
const sbe::IndexKeysInclusionSet& indexKeyBitset,
sbe::value::SlotIdGenerator* slotIdGenerator,
sbe::value::SlotIdGenerator* frameIdGenerator,
sbe::value::SpoolIdGenerator* spoolIdGenerator,