mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-24 00:17:37 +01:00
SERVER-96583 Give CBR access to histograms generated via analyze() (#28767)
GitOrigin-RevId: b0d74e60c0e17d255a3b7a05faa9dc071e9bfe55
This commit is contained in:
parent
2d2716587f
commit
20af414ee6
48
jstests/noPassthroughWithMongod/cbr_histograms.js
Normal file
48
jstests/noPassthroughWithMongod/cbr_histograms.js
Normal file
@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Ensure that the analyze command produces histograms which cost-based ranking is able to use.
|
||||
*/
|
||||
|
||||
import {
|
||||
canonicalizePlan,
|
||||
getRejectedPlans,
|
||||
getWinningPlanFromExplain,
|
||||
isCollscan
|
||||
} from "jstests/libs/query/analyze_plan.js";
|
||||
|
||||
import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
|
||||
|
||||
// TODO SERVER-92589: Remove this exemption
|
||||
if (checkSbeFullyEnabled(db)) {
|
||||
jsTestLog(`Skipping ${jsTestName()} as SBE executor is not supported yet`);
|
||||
quit();
|
||||
}
|
||||
|
||||
const collName = jsTestName();
|
||||
const coll = db[collName];
|
||||
coll.drop();
|
||||
|
||||
// Generate docs with decreasing distribution
|
||||
let docs = [];
|
||||
for (let i = 0; i < 100; i++) {
|
||||
for (let j = 0; j < i; j++) {
|
||||
docs.push({a: j});
|
||||
}
|
||||
}
|
||||
assert.commandWorked(coll.insertMany(docs));
|
||||
|
||||
coll.createIndex({a: 1});
|
||||
|
||||
// Generate histogram for field 'a'
|
||||
assert.commandWorked(coll.runCommand({analyze: collName, key: "a", numberBuckets: 10}));
|
||||
|
||||
try {
|
||||
// Use histogram CE
|
||||
assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "histogramCE"}));
|
||||
const explain = coll.find({a: 5}).explain();
|
||||
assert.eq(0, getRejectedPlans(explain).length);
|
||||
const winningPlan = getWinningPlanFromExplain(explain);
|
||||
assert.eq(winningPlan.estimatesMetadata.ceSource, "Histogram", winningPlan);
|
||||
} finally {
|
||||
// Ensure that query knob doesn't leak into other testcases in the suite.
|
||||
assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "multiPlanning"}));
|
||||
}
|
@ -8,3 +8,20 @@ exports_files(
|
||||
"*.cpp",
|
||||
]),
|
||||
)
|
||||
|
||||
mongo_cc_library(
|
||||
name = "histogram_estimator",
|
||||
srcs = [
|
||||
"histogram_estimation_impl.cpp",
|
||||
"histogram_estimator.cpp",
|
||||
],
|
||||
hdrs = [
|
||||
"histogram_common.h",
|
||||
"histogram_estimation_impl.h",
|
||||
"histogram_estimator.h",
|
||||
],
|
||||
deps = [
|
||||
"//src/mongo/db/query/cost_based_ranker:estimates",
|
||||
"//src/mongo/db/query/stats:stats_histograms",
|
||||
],
|
||||
)
|
||||
|
@ -4,29 +4,17 @@ Import("env")
|
||||
|
||||
env = env.Clone()
|
||||
|
||||
env.Library(
|
||||
target="histogram_estimation_impl",
|
||||
source=[
|
||||
"histogram_estimation_impl.cpp",
|
||||
],
|
||||
LIBDEPS=[
|
||||
"$BUILD_DIR/mongo/db/query/query_index_bounds",
|
||||
"$BUILD_DIR/mongo/db/query/stats/stats_histograms",
|
||||
],
|
||||
)
|
||||
|
||||
env.Library(
|
||||
target="ce_test_utils",
|
||||
source=[
|
||||
"test_utils.cpp",
|
||||
"histogram_accuracy_test_utils.cpp",
|
||||
"histogram_estimator.cpp",
|
||||
],
|
||||
LIBDEPS=[
|
||||
"$BUILD_DIR/mongo/db/query/ce/histogram_estimator",
|
||||
"$BUILD_DIR/mongo/db/query/stats/stats_test_utils",
|
||||
"$BUILD_DIR/mongo/db/sbe_values",
|
||||
"$BUILD_DIR/mongo/db/server_base",
|
||||
"histogram_estimation_impl",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
#include "mongo/db/query/ce/test_utils.h"
|
||||
|
||||
namespace mongo::ce {
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "mongo/db/query/ce/histogram_accuracy_test_utils.h"
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
|
||||
namespace mongo::ce {
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mongo/db/query/stats/ce_histogram.h"
|
||||
#include "mongo/db/exec/sbe/values/value.h"
|
||||
|
||||
#include "mongo/db/query/cost_based_ranker/estimates.h"
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
|
||||
#include "mongo/db/exec/sbe/values/bson.h"
|
||||
#include "mongo/db/query/ce/histogram_common.h"
|
||||
#include "mongo/db/query/stats/ce_histogram.h"
|
||||
#include "mongo/db/query/stats/value_utils.h"
|
||||
|
||||
namespace mongo::ce {
|
||||
|
@ -27,6 +27,7 @@
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
#include "mongo/db/query/ce/test_utils.h"
|
||||
#include "mongo/unittest/death_test.h"
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
*/
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimator.h"
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
|
||||
namespace mongo::ce {
|
||||
|
||||
|
@ -29,7 +29,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
#include "mongo/db/query/ce/histogram_common.h"
|
||||
#include "mongo/db/query/stats/ce_histogram.h"
|
||||
|
||||
namespace mongo::ce {
|
||||
|
||||
|
@ -28,8 +28,8 @@
|
||||
*/
|
||||
|
||||
#include "mongo/bson/json.h"
|
||||
#include "mongo/db/query/ce/histogram_accuracy_test_utils.h"
|
||||
#include "mongo/db/query/ce/histogram_common.h"
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
#include "mongo/db/query/ce/histogram_estimator.h"
|
||||
#include "mongo/db/query/ce/test_utils.h"
|
||||
#include "mongo/unittest/death_test.h"
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
#include "mongo/db/query/ce/test_utils.h"
|
||||
#include "mongo/db/query/stats/maxdiff_test_utils.h"
|
||||
#include "mongo/db/query/stats/rand_utils.h"
|
||||
|
@ -28,6 +28,7 @@
|
||||
*/
|
||||
|
||||
#include "mongo/db/query/ce/test_utils.h"
|
||||
#include "mongo/db/query/ce/histogram_estimation_impl.h"
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
|
||||
|
||||
|
@ -21,6 +21,8 @@ mongo_cc_library(
|
||||
"ce_utils",
|
||||
"estimates",
|
||||
"heuristic_estimator",
|
||||
"//src/mongo/db/query/ce:histogram_estimator",
|
||||
"//src/mongo/db/query/stats:collection_statistics_interface",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "mongo/db/query/cost_based_ranker/cardinality_estimator.h"
|
||||
|
||||
#include "mongo/db/query/ce/histogram_estimator.h"
|
||||
#include "mongo/db/query/cost_based_ranker/heuristic_estimator.h"
|
||||
#include "mongo/db/query/stage_types.h"
|
||||
|
||||
@ -330,7 +331,22 @@ CardinalityEstimate CardinalityEstimator::estimate(const OrderedIntervalList* no
|
||||
// the sum of cardinalities of the intervals. Therefore interval selectivities are summed.
|
||||
CardinalityEstimate resultCard = minCE;
|
||||
for (const auto& interval : node->intervals) {
|
||||
SelectivityEstimate sel = estimateInterval(interval, _inputCard);
|
||||
SelectivityEstimate sel = [&] {
|
||||
if (_rankerMode == QueryPlanRankerModeEnum::kHistogramCE ||
|
||||
_rankerMode == QueryPlanRankerModeEnum::kAutomaticCE) {
|
||||
auto histogram = _collStats.getHistogram(node->name);
|
||||
if (histogram) {
|
||||
bool canEstimate =
|
||||
ce::HistogramEstimator::canEstimateInterval(*histogram, interval, true);
|
||||
if (canEstimate) {
|
||||
return ce::HistogramEstimator::estimateCardinality(
|
||||
*histogram, _inputCard, interval, true) /
|
||||
_inputCard;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimateInterval(interval, _inputCard);
|
||||
}();
|
||||
resultCard += sel * _inputCard;
|
||||
}
|
||||
resultCard = std::min(resultCard, _inputCard);
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "mongo/db/query/cost_based_ranker/estimates_storage.h"
|
||||
#include "mongo/db/query/index_bounds.h"
|
||||
#include "mongo/db/query/query_solution.h"
|
||||
#include "mongo/db/query/stats/collection_statistics.h"
|
||||
|
||||
namespace mongo::cost_based_ranker {
|
||||
|
||||
@ -56,8 +57,14 @@ concept UnionType = std::same_as<T, OrNode> || std::same_as<T, MergeSortNode>;
|
||||
*/
|
||||
class CardinalityEstimator {
|
||||
public:
|
||||
CardinalityEstimator(CardinalityEstimate collCard, EstimateMap& qsnEstimates)
|
||||
: _inputCard{collCard}, _qsnEstimates{qsnEstimates} {};
|
||||
CardinalityEstimator(const stats::CollectionStatistics& collStats,
|
||||
EstimateMap& qsnEstimates,
|
||||
QueryPlanRankerModeEnum rankerMode)
|
||||
: _inputCard{CardinalityEstimate{CardinalityType{collStats.getCardinality()},
|
||||
EstimationSource::Metadata}},
|
||||
_collStats(collStats),
|
||||
_qsnEstimates{qsnEstimates},
|
||||
_rankerMode(rankerMode) {}
|
||||
|
||||
// Delete the copy and move constructors and assignment operator
|
||||
CardinalityEstimator(const CardinalityEstimator&) = delete;
|
||||
@ -132,10 +139,16 @@ private:
|
||||
// A subsequent conjunction will push again onto this stack.
|
||||
std::vector<SelectivityEstimate> _conjSels;
|
||||
|
||||
// Collection statistics contains cached histograms.
|
||||
const stats::CollectionStatistics& _collStats;
|
||||
|
||||
// A map from QSN to QSNEstimate that stores the final CE result for each QSN node.
|
||||
// Not owned by this class - it is passed by the user of this class, and is filled in with
|
||||
// entries during the estimation process.
|
||||
EstimateMap& _qsnEstimates;
|
||||
|
||||
// The cardinality estimate mode we are using for estimates.
|
||||
QueryPlanRankerModeEnum _rankerMode;
|
||||
};
|
||||
|
||||
} // namespace mongo::cost_based_ranker
|
||||
|
@ -62,9 +62,10 @@ void estimateQsnCost(const QuerySolutionNode* node, EstimateMap& estimateMap) {
|
||||
} // namespace
|
||||
|
||||
void estimatePlanCost(const QuerySolution& plan,
|
||||
CardinalityEstimate collectionCard,
|
||||
QueryPlanRankerModeEnum mode,
|
||||
const stats::CollectionStatistics& collStats,
|
||||
EstimateMap& estimateMap) {
|
||||
CardinalityEstimator cardEstimator(collectionCard, estimateMap);
|
||||
CardinalityEstimator cardEstimator(collStats, estimateMap, mode);
|
||||
cardEstimator.estimatePlan(plan);
|
||||
estimateQsnCost(plan.root(), estimateMap);
|
||||
}
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "mongo/db/query/cost_based_ranker/estimates.h"
|
||||
#include "mongo/db/query/cost_based_ranker/estimates_storage.h"
|
||||
#include "mongo/db/query/query_solution.h"
|
||||
#include "mongo/db/query/stats/collection_statistics.h"
|
||||
|
||||
namespace mongo::cost_based_ranker {
|
||||
|
||||
@ -40,7 +41,8 @@ namespace mongo::cost_based_ranker {
|
||||
* insert an entry into the EstimateMap out-param.
|
||||
*/
|
||||
void estimatePlanCost(const QuerySolution& plan,
|
||||
CardinalityEstimate collectionCard,
|
||||
QueryPlanRankerModeEnum mode,
|
||||
const stats::CollectionStatistics& collStats,
|
||||
EstimateMap& estimateMap);
|
||||
|
||||
} // namespace mongo::cost_based_ranker
|
||||
|
@ -1628,12 +1628,12 @@ StatusWith<QueryPlanner::CostBasedRankerResult> QueryPlanner::planWithCostBasedR
|
||||
// This is a temporary stub implementation of CBR which arbitrarily picks the last of the
|
||||
// enumerated plans.
|
||||
|
||||
CardinalityEstimate collCard{
|
||||
CardinalityType{static_cast<double>(params.mainCollectionInfo.stats.noOfRecords)},
|
||||
EstimationSource::Metadata};
|
||||
EstimateMap estimates;
|
||||
for (auto&& soln : statusWithMultiPlanSolns.getValue()) {
|
||||
estimatePlanCost(*soln, collCard, estimates);
|
||||
estimatePlanCost(*soln,
|
||||
query.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode(),
|
||||
*params.mainCollectionInfo.collStats,
|
||||
estimates);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<QuerySolution>> acceptedSoln;
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "mongo/db/query/query_settings/query_settings_manager.h"
|
||||
#include "mongo/db/query/query_settings_decoration.h"
|
||||
#include "mongo/db/query/query_utils.h"
|
||||
#include "mongo/db/query/stats/collection_statistics_impl.h"
|
||||
#include "mongo/db/query/wildcard_multikey_paths.h"
|
||||
#include "mongo/db/storage/storage_options.h"
|
||||
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
|
||||
@ -446,7 +447,8 @@ void QueryPlannerParams::fillOutSecondaryCollectionsPlannerParams(
|
||||
}
|
||||
auto fillOutSecondaryInfo = [&](const NamespaceString& nss,
|
||||
const CollectionPtr& secondaryColl) {
|
||||
auto secondaryInfo = CollectionInfo{.options = providedOptions};
|
||||
CollectionInfo secondaryInfo;
|
||||
secondaryInfo.options = providedOptions;
|
||||
if (secondaryColl) {
|
||||
fillOutIndexEntries(opCtx, canonicalQuery, secondaryColl, secondaryInfo.indexes);
|
||||
fillOutPlannerCollectionInfo(
|
||||
@ -531,12 +533,13 @@ void QueryPlannerParams::fillOutMainCollectionPlannerParams(
|
||||
applyQuerySettingsOrIndexFiltersForMainCollection(canonicalQuery, collections);
|
||||
|
||||
fillOutPlannerCollectionInfo(
|
||||
opCtx,
|
||||
mainColl,
|
||||
&mainCollectionInfo.stats,
|
||||
// Include collection statistics if cost-based ranker is enabled
|
||||
canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() !=
|
||||
QueryPlanRankerModeEnum::kMultiPlanning /* includeSizeStats */);
|
||||
opCtx, mainColl, &mainCollectionInfo.stats, false /* includeSizeStats */);
|
||||
|
||||
if (canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() !=
|
||||
QueryPlanRankerModeEnum::kMultiPlanning) {
|
||||
mainCollectionInfo.collStats = std::make_unique<stats::CollectionStatisticsImpl>(
|
||||
static_cast<double>(mainColl->getRecordStore()->numRecords()), canonicalQuery.nss());
|
||||
}
|
||||
}
|
||||
|
||||
void QueryPlannerParams::setTargetSbeStageBuilder(OperationContext* opCtx,
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "mongo/db/query/index_hint.h"
|
||||
#include "mongo/db/query/multiple_collection_accessor.h"
|
||||
#include "mongo/db/query/query_knobs_gen.h"
|
||||
#include "mongo/db/query/stats/collection_statistics.h"
|
||||
#include "mongo/s/shard_key_pattern_query_util.h"
|
||||
#include "mongo/s/shard_targeting_helpers.h"
|
||||
|
||||
@ -69,6 +70,12 @@ struct PlannerCollectionInfo {
|
||||
* $lookup) useful for query planning.
|
||||
*/
|
||||
struct CollectionInfo {
|
||||
CollectionInfo() = default;
|
||||
CollectionInfo(const CollectionInfo&) = delete;
|
||||
CollectionInfo& operator=(const CollectionInfo&) = delete;
|
||||
CollectionInfo(CollectionInfo&& other) noexcept = default;
|
||||
CollectionInfo& operator=(CollectionInfo&&) noexcept = default;
|
||||
|
||||
// See QueryPlannerParams::Options.
|
||||
// For secondary collections, this is currently unused (but may still be populated).
|
||||
size_t options{0 /* DEFAULT */};
|
||||
@ -86,6 +93,9 @@ struct CollectionInfo {
|
||||
// hints, this does not force the planner to prefer collection scans over other candidate
|
||||
// solutions. This is currently used for applying query settings '$natural' hints.
|
||||
boost::optional<NaturalOrderHint::Direction> collscanDirection = boost::none;
|
||||
|
||||
// Histogram-based statistics for fields in the collection.
|
||||
std::unique_ptr<stats::CollectionStatistics> collStats{nullptr};
|
||||
};
|
||||
|
||||
|
||||
|
@ -57,6 +57,10 @@ using namespace mongo;
|
||||
|
||||
class QueryPlannerPipelinePushdownTest : public QueryPlannerTest {
|
||||
protected:
|
||||
QueryPlannerPipelinePushdownTest() : QueryPlannerTest() {
|
||||
secondaryCollMap.emplace(kSecondaryNamespace, CollectionInfo());
|
||||
}
|
||||
|
||||
std::vector<boost::intrusive_ptr<DocumentSource>> makeInnerPipelineStages(
|
||||
const Pipeline& pipeline) {
|
||||
std::vector<boost::intrusive_ptr<DocumentSource>> stages;
|
||||
@ -77,8 +81,7 @@ protected:
|
||||
|
||||
const NamespaceString kSecondaryNamespace =
|
||||
NamespaceString::createNamespaceString_forTest("test.other");
|
||||
const std::map<NamespaceString, CollectionInfo> secondaryCollMap{
|
||||
{kSecondaryNamespace, CollectionInfo()}};
|
||||
std::map<NamespaceString, CollectionInfo> secondaryCollMap;
|
||||
};
|
||||
|
||||
TEST_F(QueryPlannerPipelinePushdownTest, PushdownOfASingleGroup) {
|
||||
|
@ -49,6 +49,17 @@ mongo_cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
mongo_cc_library(
|
||||
name = "collection_statistics_interface",
|
||||
srcs = [],
|
||||
hdrs = [
|
||||
"collection_statistics.h",
|
||||
],
|
||||
deps = [
|
||||
":stats_histograms",
|
||||
],
|
||||
)
|
||||
|
||||
mongo_cc_library(
|
||||
name = "stats",
|
||||
srcs = [
|
||||
@ -58,7 +69,6 @@ mongo_cc_library(
|
||||
"stats_catalog.cpp",
|
||||
],
|
||||
hdrs = [
|
||||
"collection_statistics.h",
|
||||
"collection_statistics_impl.h",
|
||||
"stats_cache.h",
|
||||
"stats_cache_loader.h",
|
||||
@ -66,6 +76,7 @@ mongo_cc_library(
|
||||
"stats_catalog.h",
|
||||
],
|
||||
deps = [
|
||||
":collection_statistics_interface",
|
||||
":stats_histograms",
|
||||
"//src/mongo/db:dbdirectclient",
|
||||
"//src/mongo/util:caching", # TODO(SERVER-93876): Remove.
|
||||
|
Loading…
Reference in New Issue
Block a user