0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-24 00:17:37 +01:00

SERVER-96583 Give CBR access to histograms generated via analyze() (#28767)

GitOrigin-RevId: b0d74e60c0e17d255a3b7a05faa9dc071e9bfe55
This commit is contained in:
Ben Shteinfeld 2024-11-12 16:42:12 -05:00 committed by MongoDB Bot
parent 2d2716587f
commit 20af414ee6
23 changed files with 159 additions and 37 deletions

View File

@ -0,0 +1,48 @@
/**
* Ensure that the analyze command produces histograms which cost-based ranking is able to use.
*/
import {
canonicalizePlan,
getRejectedPlans,
getWinningPlanFromExplain,
isCollscan
} from "jstests/libs/query/analyze_plan.js";
import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
// TODO SERVER-92589: Remove this exemption
if (checkSbeFullyEnabled(db)) {
jsTestLog(`Skipping ${jsTestName()} as SBE executor is not supported yet`);
quit();
}
const collName = jsTestName();
const coll = db[collName];
coll.drop();
// Generate docs with decreasing distribution
let docs = [];
for (let i = 0; i < 100; i++) {
for (let j = 0; j < i; j++) {
docs.push({a: j});
}
}
assert.commandWorked(coll.insertMany(docs));
coll.createIndex({a: 1});
// Generate histogram for field 'a'
assert.commandWorked(coll.runCommand({analyze: collName, key: "a", numberBuckets: 10}));
try {
// Use histogram CE
assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "histogramCE"}));
const explain = coll.find({a: 5}).explain();
assert.eq(0, getRejectedPlans(explain).length);
const winningPlan = getWinningPlanFromExplain(explain);
assert.eq(winningPlan.estimatesMetadata.ceSource, "Histogram", winningPlan);
} finally {
// Ensure that query knob doesn't leak into other testcases in the suite.
assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "multiPlanning"}));
}

View File

@ -8,3 +8,20 @@ exports_files(
"*.cpp",
]),
)
mongo_cc_library(
name = "histogram_estimator",
srcs = [
"histogram_estimation_impl.cpp",
"histogram_estimator.cpp",
],
hdrs = [
"histogram_common.h",
"histogram_estimation_impl.h",
"histogram_estimator.h",
],
deps = [
"//src/mongo/db/query/cost_based_ranker:estimates",
"//src/mongo/db/query/stats:stats_histograms",
],
)

View File

@ -4,29 +4,17 @@ Import("env")
env = env.Clone()
env.Library(
target="histogram_estimation_impl",
source=[
"histogram_estimation_impl.cpp",
],
LIBDEPS=[
"$BUILD_DIR/mongo/db/query/query_index_bounds",
"$BUILD_DIR/mongo/db/query/stats/stats_histograms",
],
)
env.Library(
target="ce_test_utils",
source=[
"test_utils.cpp",
"histogram_accuracy_test_utils.cpp",
"histogram_estimator.cpp",
],
LIBDEPS=[
"$BUILD_DIR/mongo/db/query/ce/histogram_estimator",
"$BUILD_DIR/mongo/db/query/stats/stats_test_utils",
"$BUILD_DIR/mongo/db/sbe_values",
"$BUILD_DIR/mongo/db/server_base",
"histogram_estimation_impl",
],
)

View File

@ -27,6 +27,7 @@
* it in the license file.
*/
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#include "mongo/db/query/ce/test_utils.h"
namespace mongo::ce {

View File

@ -30,6 +30,7 @@
#include <sstream>
#include "mongo/db/query/ce/histogram_accuracy_test_utils.h"
#include "mongo/db/query/ce/histogram_estimation_impl.h"
namespace mongo::ce {

View File

@ -29,7 +29,7 @@
#pragma once
#include "mongo/db/query/stats/ce_histogram.h"
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/query/cost_based_ranker/estimates.h"

View File

@ -31,6 +31,7 @@
#include "mongo/db/exec/sbe/values/bson.h"
#include "mongo/db/query/ce/histogram_common.h"
#include "mongo/db/query/stats/ce_histogram.h"
#include "mongo/db/query/stats/value_utils.h"
namespace mongo::ce {

View File

@ -27,6 +27,7 @@
* it in the license file.
*/
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#include "mongo/db/query/ce/test_utils.h"
#include "mongo/unittest/death_test.h"

View File

@ -28,6 +28,7 @@
*/
#include "mongo/db/query/ce/histogram_estimator.h"
#include "mongo/db/query/ce/histogram_estimation_impl.h"
namespace mongo::ce {

View File

@ -29,7 +29,8 @@
#pragma once
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#include "mongo/db/query/ce/histogram_common.h"
#include "mongo/db/query/stats/ce_histogram.h"
namespace mongo::ce {

View File

@ -28,8 +28,8 @@
*/
#include "mongo/bson/json.h"
#include "mongo/db/query/ce/histogram_accuracy_test_utils.h"
#include "mongo/db/query/ce/histogram_common.h"
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#include "mongo/db/query/ce/histogram_estimator.h"
#include "mongo/db/query/ce/test_utils.h"
#include "mongo/unittest/death_test.h"

View File

@ -27,6 +27,7 @@
* it in the license file.
*/
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#include "mongo/db/query/ce/test_utils.h"
#include "mongo/db/query/stats/maxdiff_test_utils.h"
#include "mongo/db/query/stats/rand_utils.h"

View File

@ -28,6 +28,7 @@
*/
#include "mongo/db/query/ce/test_utils.h"
#include "mongo/db/query/ce/histogram_estimation_impl.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery

View File

@ -21,6 +21,8 @@ mongo_cc_library(
"ce_utils",
"estimates",
"heuristic_estimator",
"//src/mongo/db/query/ce:histogram_estimator",
"//src/mongo/db/query/stats:collection_statistics_interface",
],
)

View File

@ -29,6 +29,7 @@
#include "mongo/db/query/cost_based_ranker/cardinality_estimator.h"
#include "mongo/db/query/ce/histogram_estimator.h"
#include "mongo/db/query/cost_based_ranker/heuristic_estimator.h"
#include "mongo/db/query/stage_types.h"
@ -330,7 +331,22 @@ CardinalityEstimate CardinalityEstimator::estimate(const OrderedIntervalList* no
// the sum of cardinalities of the intervals. Therefore interval selectivities are summed.
CardinalityEstimate resultCard = minCE;
for (const auto& interval : node->intervals) {
SelectivityEstimate sel = estimateInterval(interval, _inputCard);
SelectivityEstimate sel = [&] {
if (_rankerMode == QueryPlanRankerModeEnum::kHistogramCE ||
_rankerMode == QueryPlanRankerModeEnum::kAutomaticCE) {
auto histogram = _collStats.getHistogram(node->name);
if (histogram) {
bool canEstimate =
ce::HistogramEstimator::canEstimateInterval(*histogram, interval, true);
if (canEstimate) {
return ce::HistogramEstimator::estimateCardinality(
*histogram, _inputCard, interval, true) /
_inputCard;
}
}
}
return estimateInterval(interval, _inputCard);
}();
resultCard += sel * _inputCard;
}
resultCard = std::min(resultCard, _inputCard);

View File

@ -35,6 +35,7 @@
#include "mongo/db/query/cost_based_ranker/estimates_storage.h"
#include "mongo/db/query/index_bounds.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/db/query/stats/collection_statistics.h"
namespace mongo::cost_based_ranker {
@ -56,8 +57,14 @@ concept UnionType = std::same_as<T, OrNode> || std::same_as<T, MergeSortNode>;
*/
class CardinalityEstimator {
public:
CardinalityEstimator(CardinalityEstimate collCard, EstimateMap& qsnEstimates)
: _inputCard{collCard}, _qsnEstimates{qsnEstimates} {};
CardinalityEstimator(const stats::CollectionStatistics& collStats,
EstimateMap& qsnEstimates,
QueryPlanRankerModeEnum rankerMode)
: _inputCard{CardinalityEstimate{CardinalityType{collStats.getCardinality()},
EstimationSource::Metadata}},
_collStats(collStats),
_qsnEstimates{qsnEstimates},
_rankerMode(rankerMode) {}
// Delete the copy and move constructors and assignment operator
CardinalityEstimator(const CardinalityEstimator&) = delete;
@ -132,10 +139,16 @@ private:
// A subsequent conjunction will push again onto this stack.
std::vector<SelectivityEstimate> _conjSels;
// Collection statistics contains cached histograms.
const stats::CollectionStatistics& _collStats;
// A map from QSN to QSNEstimate that stores the final CE result for each QSN node.
// Not owned by this class - it is passed by the user of this class, and is filled in with
// entries during the estimation process.
EstimateMap& _qsnEstimates;
// The cardinality estimate mode we are using for estimates.
QueryPlanRankerModeEnum _rankerMode;
};
} // namespace mongo::cost_based_ranker

View File

@ -62,9 +62,10 @@ void estimateQsnCost(const QuerySolutionNode* node, EstimateMap& estimateMap) {
} // namespace
void estimatePlanCost(const QuerySolution& plan,
CardinalityEstimate collectionCard,
QueryPlanRankerModeEnum mode,
const stats::CollectionStatistics& collStats,
EstimateMap& estimateMap) {
CardinalityEstimator cardEstimator(collectionCard, estimateMap);
CardinalityEstimator cardEstimator(collStats, estimateMap, mode);
cardEstimator.estimatePlan(plan);
estimateQsnCost(plan.root(), estimateMap);
}

View File

@ -32,6 +32,7 @@
#include "mongo/db/query/cost_based_ranker/estimates.h"
#include "mongo/db/query/cost_based_ranker/estimates_storage.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/db/query/stats/collection_statistics.h"
namespace mongo::cost_based_ranker {
@ -40,7 +41,8 @@ namespace mongo::cost_based_ranker {
* insert an entry into the EstimateMap out-param.
*/
void estimatePlanCost(const QuerySolution& plan,
CardinalityEstimate collectionCard,
QueryPlanRankerModeEnum mode,
const stats::CollectionStatistics& collStats,
EstimateMap& estimateMap);
} // namespace mongo::cost_based_ranker

View File

@ -1628,12 +1628,12 @@ StatusWith<QueryPlanner::CostBasedRankerResult> QueryPlanner::planWithCostBasedR
// This is a temporary stub implementation of CBR which arbitrarily picks the last of the
// enumerated plans.
CardinalityEstimate collCard{
CardinalityType{static_cast<double>(params.mainCollectionInfo.stats.noOfRecords)},
EstimationSource::Metadata};
EstimateMap estimates;
for (auto&& soln : statusWithMultiPlanSolns.getValue()) {
estimatePlanCost(*soln, collCard, estimates);
estimatePlanCost(*soln,
query.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode(),
*params.mainCollectionInfo.collStats,
estimates);
}
std::vector<std::unique_ptr<QuerySolution>> acceptedSoln;

View File

@ -41,6 +41,7 @@
#include "mongo/db/query/query_settings/query_settings_manager.h"
#include "mongo/db/query/query_settings_decoration.h"
#include "mongo/db/query/query_utils.h"
#include "mongo/db/query/stats/collection_statistics_impl.h"
#include "mongo/db/query/wildcard_multikey_paths.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
@ -446,7 +447,8 @@ void QueryPlannerParams::fillOutSecondaryCollectionsPlannerParams(
}
auto fillOutSecondaryInfo = [&](const NamespaceString& nss,
const CollectionPtr& secondaryColl) {
auto secondaryInfo = CollectionInfo{.options = providedOptions};
CollectionInfo secondaryInfo;
secondaryInfo.options = providedOptions;
if (secondaryColl) {
fillOutIndexEntries(opCtx, canonicalQuery, secondaryColl, secondaryInfo.indexes);
fillOutPlannerCollectionInfo(
@ -531,12 +533,13 @@ void QueryPlannerParams::fillOutMainCollectionPlannerParams(
applyQuerySettingsOrIndexFiltersForMainCollection(canonicalQuery, collections);
fillOutPlannerCollectionInfo(
opCtx,
mainColl,
&mainCollectionInfo.stats,
// Include collection statistics if cost-based ranker is enabled
canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() !=
QueryPlanRankerModeEnum::kMultiPlanning /* includeSizeStats */);
opCtx, mainColl, &mainCollectionInfo.stats, false /* includeSizeStats */);
if (canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() !=
QueryPlanRankerModeEnum::kMultiPlanning) {
mainCollectionInfo.collStats = std::make_unique<stats::CollectionStatisticsImpl>(
static_cast<double>(mainColl->getRecordStore()->numRecords()), canonicalQuery.nss());
}
}
void QueryPlannerParams::setTargetSbeStageBuilder(OperationContext* opCtx,

View File

@ -42,6 +42,7 @@
#include "mongo/db/query/index_hint.h"
#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/query/stats/collection_statistics.h"
#include "mongo/s/shard_key_pattern_query_util.h"
#include "mongo/s/shard_targeting_helpers.h"
@ -69,6 +70,12 @@ struct PlannerCollectionInfo {
* $lookup) useful for query planning.
*/
struct CollectionInfo {
CollectionInfo() = default;
CollectionInfo(const CollectionInfo&) = delete;
CollectionInfo& operator=(const CollectionInfo&) = delete;
CollectionInfo(CollectionInfo&& other) noexcept = default;
CollectionInfo& operator=(CollectionInfo&&) noexcept = default;
// See QueryPlannerParams::Options.
// For secondary collections, this is currently unused (but may still be populated).
size_t options{0 /* DEFAULT */};
@ -86,6 +93,9 @@ struct CollectionInfo {
// hints, this does not force the planner to prefer collection scans over other candidate
// solutions. This is currently used for applying query settings '$natural' hints.
boost::optional<NaturalOrderHint::Direction> collscanDirection = boost::none;
// Histogram-based statistics for fields in the collection.
std::unique_ptr<stats::CollectionStatistics> collStats{nullptr};
};

View File

@ -57,6 +57,10 @@ using namespace mongo;
class QueryPlannerPipelinePushdownTest : public QueryPlannerTest {
protected:
QueryPlannerPipelinePushdownTest() : QueryPlannerTest() {
secondaryCollMap.emplace(kSecondaryNamespace, CollectionInfo());
}
std::vector<boost::intrusive_ptr<DocumentSource>> makeInnerPipelineStages(
const Pipeline& pipeline) {
std::vector<boost::intrusive_ptr<DocumentSource>> stages;
@ -77,8 +81,7 @@ protected:
const NamespaceString kSecondaryNamespace =
NamespaceString::createNamespaceString_forTest("test.other");
const std::map<NamespaceString, CollectionInfo> secondaryCollMap{
{kSecondaryNamespace, CollectionInfo()}};
std::map<NamespaceString, CollectionInfo> secondaryCollMap;
};
TEST_F(QueryPlannerPipelinePushdownTest, PushdownOfASingleGroup) {

View File

@ -49,6 +49,17 @@ mongo_cc_library(
],
)
mongo_cc_library(
name = "collection_statistics_interface",
srcs = [],
hdrs = [
"collection_statistics.h",
],
deps = [
":stats_histograms",
],
)
mongo_cc_library(
name = "stats",
srcs = [
@ -58,7 +69,6 @@ mongo_cc_library(
"stats_catalog.cpp",
],
hdrs = [
"collection_statistics.h",
"collection_statistics_impl.h",
"stats_cache.h",
"stats_cache_loader.h",
@ -66,6 +76,7 @@ mongo_cc_library(
"stats_catalog.h",
],
deps = [
":collection_statistics_interface",
":stats_histograms",
"//src/mongo/db:dbdirectclient",
"//src/mongo/util:caching", # TODO(SERVER-93876): Remove.