diff --git a/jstests/noPassthroughWithMongod/cbr_histograms.js b/jstests/noPassthroughWithMongod/cbr_histograms.js new file mode 100644 index 00000000000..46f32a1685a --- /dev/null +++ b/jstests/noPassthroughWithMongod/cbr_histograms.js @@ -0,0 +1,48 @@ +/** + * Ensure that the analyze command produces histograms which cost-based ranking is able to use. + */ + +import { + canonicalizePlan, + getRejectedPlans, + getWinningPlanFromExplain, + isCollscan +} from "jstests/libs/query/analyze_plan.js"; + +import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js"; + +// TODO SERVER-92589: Remove this exemption +if (checkSbeFullyEnabled(db)) { + jsTestLog(`Skipping ${jsTestName()} as SBE executor is not supported yet`); + quit(); +} + +const collName = jsTestName(); +const coll = db[collName]; +coll.drop(); + +// Generate docs with decreasing distribution +let docs = []; +for (let i = 0; i < 100; i++) { + for (let j = 0; j < i; j++) { + docs.push({a: j}); + } +} +assert.commandWorked(coll.insertMany(docs)); + +coll.createIndex({a: 1}); + +// Generate histogram for field 'a' +assert.commandWorked(coll.runCommand({analyze: collName, key: "a", numberBuckets: 10})); + +try { + // Use histogram CE + assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "histogramCE"})); + const explain = coll.find({a: 5}).explain(); + assert.eq(0, getRejectedPlans(explain).length); + const winningPlan = getWinningPlanFromExplain(explain); + assert.eq(winningPlan.estimatesMetadata.ceSource, "Histogram", winningPlan); +} finally { + // Ensure that query knob doesn't leak into other testcases in the suite. + assert.commandWorked(db.adminCommand({setParameter: 1, planRankerMode: "multiPlanning"})); +} diff --git a/src/mongo/db/query/ce/BUILD.bazel b/src/mongo/db/query/ce/BUILD.bazel index 5ffe3e8fc2a..6cd63725c96 100644 --- a/src/mongo/db/query/ce/BUILD.bazel +++ b/src/mongo/db/query/ce/BUILD.bazel @@ -8,3 +8,20 @@ exports_files( "*.cpp", ]), ) + +mongo_cc_library( + name = "histogram_estimator", + srcs = [ + "histogram_estimation_impl.cpp", + "histogram_estimator.cpp", + ], + hdrs = [ + "histogram_common.h", + "histogram_estimation_impl.h", + "histogram_estimator.h", + ], + deps = [ + "//src/mongo/db/query/cost_based_ranker:estimates", + "//src/mongo/db/query/stats:stats_histograms", + ], +) diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript index 0063cdab9b2..f50505aecf1 100644 --- a/src/mongo/db/query/ce/SConscript +++ b/src/mongo/db/query/ce/SConscript @@ -4,29 +4,17 @@ Import("env") env = env.Clone() -env.Library( - target="histogram_estimation_impl", - source=[ - "histogram_estimation_impl.cpp", - ], - LIBDEPS=[ - "$BUILD_DIR/mongo/db/query/query_index_bounds", - "$BUILD_DIR/mongo/db/query/stats/stats_histograms", - ], -) - env.Library( target="ce_test_utils", source=[ "test_utils.cpp", "histogram_accuracy_test_utils.cpp", - "histogram_estimator.cpp", ], LIBDEPS=[ + "$BUILD_DIR/mongo/db/query/ce/histogram_estimator", "$BUILD_DIR/mongo/db/query/stats/stats_test_utils", "$BUILD_DIR/mongo/db/sbe_values", "$BUILD_DIR/mongo/db/server_base", - "histogram_estimation_impl", ], ) diff --git a/src/mongo/db/query/ce/generated_histograms_test.cpp b/src/mongo/db/query/ce/generated_histograms_test.cpp index 74a9b6e80b8..e40180c67e7 100644 --- a/src/mongo/db/query/ce/generated_histograms_test.cpp +++ b/src/mongo/db/query/ce/generated_histograms_test.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/query/ce/histogram_estimation_impl.h" #include "mongo/db/query/ce/test_utils.h" namespace mongo::ce { diff --git a/src/mongo/db/query/ce/histogram_accuracy_test_utils.cpp b/src/mongo/db/query/ce/histogram_accuracy_test_utils.cpp index 1e10b2ac11b..b302d717a5b 100644 --- a/src/mongo/db/query/ce/histogram_accuracy_test_utils.cpp +++ b/src/mongo/db/query/ce/histogram_accuracy_test_utils.cpp @@ -30,6 +30,7 @@ #include #include "mongo/db/query/ce/histogram_accuracy_test_utils.h" +#include "mongo/db/query/ce/histogram_estimation_impl.h" namespace mongo::ce { diff --git a/src/mongo/db/query/ce/histogram_common.h b/src/mongo/db/query/ce/histogram_common.h index 7dfb894fb3d..0aaa4bf71b0 100644 --- a/src/mongo/db/query/ce/histogram_common.h +++ b/src/mongo/db/query/ce/histogram_common.h @@ -29,7 +29,7 @@ #pragma once -#include "mongo/db/query/stats/ce_histogram.h" +#include "mongo/db/exec/sbe/values/value.h" #include "mongo/db/query/cost_based_ranker/estimates.h" diff --git a/src/mongo/db/query/ce/histogram_estimation_impl.h b/src/mongo/db/query/ce/histogram_estimation_impl.h index c3463804430..ba140561c20 100644 --- a/src/mongo/db/query/ce/histogram_estimation_impl.h +++ b/src/mongo/db/query/ce/histogram_estimation_impl.h @@ -31,6 +31,7 @@ #include "mongo/db/exec/sbe/values/bson.h" #include "mongo/db/query/ce/histogram_common.h" +#include "mongo/db/query/stats/ce_histogram.h" #include "mongo/db/query/stats/value_utils.h" namespace mongo::ce { diff --git a/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp b/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp index ca88a909afd..9bfc64e03f9 100644 --- a/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp +++ b/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/query/ce/histogram_estimation_impl.h" #include "mongo/db/query/ce/test_utils.h" #include "mongo/unittest/death_test.h" diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp index 5e4784cc33b..54dd4cf2254 100644 --- a/src/mongo/db/query/ce/histogram_estimator.cpp +++ b/src/mongo/db/query/ce/histogram_estimator.cpp @@ -28,6 +28,7 @@ */ #include "mongo/db/query/ce/histogram_estimator.h" +#include "mongo/db/query/ce/histogram_estimation_impl.h" namespace mongo::ce { diff --git a/src/mongo/db/query/ce/histogram_estimator.h b/src/mongo/db/query/ce/histogram_estimator.h index 8ab6eb7e59b..ad3952a45dc 100644 --- a/src/mongo/db/query/ce/histogram_estimator.h +++ b/src/mongo/db/query/ce/histogram_estimator.h @@ -29,7 +29,8 @@ #pragma once -#include "mongo/db/query/ce/histogram_estimation_impl.h" +#include "mongo/db/query/ce/histogram_common.h" +#include "mongo/db/query/stats/ce_histogram.h" namespace mongo::ce { diff --git a/src/mongo/db/query/ce/histogram_estimator_test.cpp b/src/mongo/db/query/ce/histogram_estimator_test.cpp index da77117dc71..3c83ebcfc2c 100644 --- a/src/mongo/db/query/ce/histogram_estimator_test.cpp +++ b/src/mongo/db/query/ce/histogram_estimator_test.cpp @@ -28,8 +28,8 @@ */ #include "mongo/bson/json.h" -#include "mongo/db/query/ce/histogram_accuracy_test_utils.h" -#include "mongo/db/query/ce/histogram_common.h" +#include "mongo/db/query/ce/histogram_estimation_impl.h" +#include "mongo/db/query/ce/histogram_estimator.h" #include "mongo/db/query/ce/test_utils.h" #include "mongo/unittest/death_test.h" diff --git a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp index 62d8a2977bf..fb90569b51e 100644 --- a/src/mongo/db/query/ce/maxdiff_histogram_test.cpp +++ b/src/mongo/db/query/ce/maxdiff_histogram_test.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/query/ce/histogram_estimation_impl.h" #include "mongo/db/query/ce/test_utils.h" #include "mongo/db/query/stats/maxdiff_test_utils.h" #include "mongo/db/query/stats/rand_utils.h" diff --git a/src/mongo/db/query/ce/test_utils.cpp b/src/mongo/db/query/ce/test_utils.cpp index 5c07c126037..7fb08517389 100644 --- a/src/mongo/db/query/ce/test_utils.cpp +++ b/src/mongo/db/query/ce/test_utils.cpp @@ -28,6 +28,7 @@ */ #include "mongo/db/query/ce/test_utils.h" +#include "mongo/db/query/ce/histogram_estimation_impl.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery diff --git a/src/mongo/db/query/cost_based_ranker/BUILD.bazel b/src/mongo/db/query/cost_based_ranker/BUILD.bazel index 5e6170295e8..59830b21721 100644 --- a/src/mongo/db/query/cost_based_ranker/BUILD.bazel +++ b/src/mongo/db/query/cost_based_ranker/BUILD.bazel @@ -21,6 +21,8 @@ mongo_cc_library( "ce_utils", "estimates", "heuristic_estimator", + "//src/mongo/db/query/ce:histogram_estimator", + "//src/mongo/db/query/stats:collection_statistics_interface", ], ) diff --git a/src/mongo/db/query/cost_based_ranker/cardinality_estimator.cpp b/src/mongo/db/query/cost_based_ranker/cardinality_estimator.cpp index 771cade80ce..2b1c0b8e505 100644 --- a/src/mongo/db/query/cost_based_ranker/cardinality_estimator.cpp +++ b/src/mongo/db/query/cost_based_ranker/cardinality_estimator.cpp @@ -29,6 +29,7 @@ #include "mongo/db/query/cost_based_ranker/cardinality_estimator.h" +#include "mongo/db/query/ce/histogram_estimator.h" #include "mongo/db/query/cost_based_ranker/heuristic_estimator.h" #include "mongo/db/query/stage_types.h" @@ -330,7 +331,22 @@ CardinalityEstimate CardinalityEstimator::estimate(const OrderedIntervalList* no // the sum of cardinalities of the intervals. Therefore interval selectivities are summed. CardinalityEstimate resultCard = minCE; for (const auto& interval : node->intervals) { - SelectivityEstimate sel = estimateInterval(interval, _inputCard); + SelectivityEstimate sel = [&] { + if (_rankerMode == QueryPlanRankerModeEnum::kHistogramCE || + _rankerMode == QueryPlanRankerModeEnum::kAutomaticCE) { + auto histogram = _collStats.getHistogram(node->name); + if (histogram) { + bool canEstimate = + ce::HistogramEstimator::canEstimateInterval(*histogram, interval, true); + if (canEstimate) { + return ce::HistogramEstimator::estimateCardinality( + *histogram, _inputCard, interval, true) / + _inputCard; + } + } + } + return estimateInterval(interval, _inputCard); + }(); resultCard += sel * _inputCard; } resultCard = std::min(resultCard, _inputCard); diff --git a/src/mongo/db/query/cost_based_ranker/cardinality_estimator.h b/src/mongo/db/query/cost_based_ranker/cardinality_estimator.h index f3889c36119..cbd60c14c8e 100644 --- a/src/mongo/db/query/cost_based_ranker/cardinality_estimator.h +++ b/src/mongo/db/query/cost_based_ranker/cardinality_estimator.h @@ -35,6 +35,7 @@ #include "mongo/db/query/cost_based_ranker/estimates_storage.h" #include "mongo/db/query/index_bounds.h" #include "mongo/db/query/query_solution.h" +#include "mongo/db/query/stats/collection_statistics.h" namespace mongo::cost_based_ranker { @@ -56,8 +57,14 @@ concept UnionType = std::same_as || std::same_as; */ class CardinalityEstimator { public: - CardinalityEstimator(CardinalityEstimate collCard, EstimateMap& qsnEstimates) - : _inputCard{collCard}, _qsnEstimates{qsnEstimates} {}; + CardinalityEstimator(const stats::CollectionStatistics& collStats, + EstimateMap& qsnEstimates, + QueryPlanRankerModeEnum rankerMode) + : _inputCard{CardinalityEstimate{CardinalityType{collStats.getCardinality()}, + EstimationSource::Metadata}}, + _collStats(collStats), + _qsnEstimates{qsnEstimates}, + _rankerMode(rankerMode) {} // Delete the copy and move constructors and assignment operator CardinalityEstimator(const CardinalityEstimator&) = delete; @@ -132,10 +139,16 @@ private: // A subsequent conjunction will push again onto this stack. std::vector _conjSels; + // Collection statistics contains cached histograms. + const stats::CollectionStatistics& _collStats; + // A map from QSN to QSNEstimate that stores the final CE result for each QSN node. // Not owned by this class - it is passed by the user of this class, and is filled in with // entries during the estimation process. EstimateMap& _qsnEstimates; + + // The cardinality estimate mode we are using for estimates. + QueryPlanRankerModeEnum _rankerMode; }; } // namespace mongo::cost_based_ranker diff --git a/src/mongo/db/query/cost_based_ranker/qsn_estimator.cpp b/src/mongo/db/query/cost_based_ranker/qsn_estimator.cpp index b0bd0416557..0de353d9e78 100644 --- a/src/mongo/db/query/cost_based_ranker/qsn_estimator.cpp +++ b/src/mongo/db/query/cost_based_ranker/qsn_estimator.cpp @@ -62,9 +62,10 @@ void estimateQsnCost(const QuerySolutionNode* node, EstimateMap& estimateMap) { } // namespace void estimatePlanCost(const QuerySolution& plan, - CardinalityEstimate collectionCard, + QueryPlanRankerModeEnum mode, + const stats::CollectionStatistics& collStats, EstimateMap& estimateMap) { - CardinalityEstimator cardEstimator(collectionCard, estimateMap); + CardinalityEstimator cardEstimator(collStats, estimateMap, mode); cardEstimator.estimatePlan(plan); estimateQsnCost(plan.root(), estimateMap); } diff --git a/src/mongo/db/query/cost_based_ranker/qsn_estimator.h b/src/mongo/db/query/cost_based_ranker/qsn_estimator.h index 5a9ae03a56e..9390917d14c 100644 --- a/src/mongo/db/query/cost_based_ranker/qsn_estimator.h +++ b/src/mongo/db/query/cost_based_ranker/qsn_estimator.h @@ -32,6 +32,7 @@ #include "mongo/db/query/cost_based_ranker/estimates.h" #include "mongo/db/query/cost_based_ranker/estimates_storage.h" #include "mongo/db/query/query_solution.h" +#include "mongo/db/query/stats/collection_statistics.h" namespace mongo::cost_based_ranker { @@ -40,7 +41,8 @@ namespace mongo::cost_based_ranker { * insert an entry into the EstimateMap out-param. */ void estimatePlanCost(const QuerySolution& plan, - CardinalityEstimate collectionCard, + QueryPlanRankerModeEnum mode, + const stats::CollectionStatistics& collStats, EstimateMap& estimateMap); } // namespace mongo::cost_based_ranker diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 1e0542c7ac8..3dc300f25cf 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -1628,12 +1628,12 @@ StatusWith QueryPlanner::planWithCostBasedR // This is a temporary stub implementation of CBR which arbitrarily picks the last of the // enumerated plans. - CardinalityEstimate collCard{ - CardinalityType{static_cast(params.mainCollectionInfo.stats.noOfRecords)}, - EstimationSource::Metadata}; EstimateMap estimates; for (auto&& soln : statusWithMultiPlanSolns.getValue()) { - estimatePlanCost(*soln, collCard, estimates); + estimatePlanCost(*soln, + query.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode(), + *params.mainCollectionInfo.collStats, + estimates); } std::vector> acceptedSoln; diff --git a/src/mongo/db/query/query_planner_params.cpp b/src/mongo/db/query/query_planner_params.cpp index a7a035622bc..eabde2ae194 100644 --- a/src/mongo/db/query/query_planner_params.cpp +++ b/src/mongo/db/query/query_planner_params.cpp @@ -41,6 +41,7 @@ #include "mongo/db/query/query_settings/query_settings_manager.h" #include "mongo/db/query/query_settings_decoration.h" #include "mongo/db/query/query_utils.h" +#include "mongo/db/query/stats/collection_statistics_impl.h" #include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/db/storage/storage_options.h" #include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h" @@ -446,7 +447,8 @@ void QueryPlannerParams::fillOutSecondaryCollectionsPlannerParams( } auto fillOutSecondaryInfo = [&](const NamespaceString& nss, const CollectionPtr& secondaryColl) { - auto secondaryInfo = CollectionInfo{.options = providedOptions}; + CollectionInfo secondaryInfo; + secondaryInfo.options = providedOptions; if (secondaryColl) { fillOutIndexEntries(opCtx, canonicalQuery, secondaryColl, secondaryInfo.indexes); fillOutPlannerCollectionInfo( @@ -531,12 +533,13 @@ void QueryPlannerParams::fillOutMainCollectionPlannerParams( applyQuerySettingsOrIndexFiltersForMainCollection(canonicalQuery, collections); fillOutPlannerCollectionInfo( - opCtx, - mainColl, - &mainCollectionInfo.stats, - // Include collection statistics if cost-based ranker is enabled - canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() != - QueryPlanRankerModeEnum::kMultiPlanning /* includeSizeStats */); + opCtx, mainColl, &mainCollectionInfo.stats, false /* includeSizeStats */); + + if (canonicalQuery.getExpCtx()->getQueryKnobConfiguration().getPlanRankerMode() != + QueryPlanRankerModeEnum::kMultiPlanning) { + mainCollectionInfo.collStats = std::make_unique( + static_cast(mainColl->getRecordStore()->numRecords()), canonicalQuery.nss()); + } } void QueryPlannerParams::setTargetSbeStageBuilder(OperationContext* opCtx, diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index e4f4c3bed2c..ee67c380588 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -42,6 +42,7 @@ #include "mongo/db/query/index_hint.h" #include "mongo/db/query/multiple_collection_accessor.h" #include "mongo/db/query/query_knobs_gen.h" +#include "mongo/db/query/stats/collection_statistics.h" #include "mongo/s/shard_key_pattern_query_util.h" #include "mongo/s/shard_targeting_helpers.h" @@ -69,6 +70,12 @@ struct PlannerCollectionInfo { * $lookup) useful for query planning. */ struct CollectionInfo { + CollectionInfo() = default; + CollectionInfo(const CollectionInfo&) = delete; + CollectionInfo& operator=(const CollectionInfo&) = delete; + CollectionInfo(CollectionInfo&& other) noexcept = default; + CollectionInfo& operator=(CollectionInfo&&) noexcept = default; + // See QueryPlannerParams::Options. // For secondary collections, this is currently unused (but may still be populated). size_t options{0 /* DEFAULT */}; @@ -86,6 +93,9 @@ struct CollectionInfo { // hints, this does not force the planner to prefer collection scans over other candidate // solutions. This is currently used for applying query settings '$natural' hints. boost::optional collscanDirection = boost::none; + + // Histogram-based statistics for fields in the collection. + std::unique_ptr collStats{nullptr}; }; diff --git a/src/mongo/db/query/query_planner_pipeline_pushdown_test.cpp b/src/mongo/db/query/query_planner_pipeline_pushdown_test.cpp index 4527088ae50..f13f29175d8 100644 --- a/src/mongo/db/query/query_planner_pipeline_pushdown_test.cpp +++ b/src/mongo/db/query/query_planner_pipeline_pushdown_test.cpp @@ -57,6 +57,10 @@ using namespace mongo; class QueryPlannerPipelinePushdownTest : public QueryPlannerTest { protected: + QueryPlannerPipelinePushdownTest() : QueryPlannerTest() { + secondaryCollMap.emplace(kSecondaryNamespace, CollectionInfo()); + } + std::vector> makeInnerPipelineStages( const Pipeline& pipeline) { std::vector> stages; @@ -77,8 +81,7 @@ protected: const NamespaceString kSecondaryNamespace = NamespaceString::createNamespaceString_forTest("test.other"); - const std::map secondaryCollMap{ - {kSecondaryNamespace, CollectionInfo()}}; + std::map secondaryCollMap; }; TEST_F(QueryPlannerPipelinePushdownTest, PushdownOfASingleGroup) { diff --git a/src/mongo/db/query/stats/BUILD.bazel b/src/mongo/db/query/stats/BUILD.bazel index 35e90e41261..010a2937451 100644 --- a/src/mongo/db/query/stats/BUILD.bazel +++ b/src/mongo/db/query/stats/BUILD.bazel @@ -49,6 +49,17 @@ mongo_cc_library( ], ) +mongo_cc_library( + name = "collection_statistics_interface", + srcs = [], + hdrs = [ + "collection_statistics.h", + ], + deps = [ + ":stats_histograms", + ], +) + mongo_cc_library( name = "stats", srcs = [ @@ -58,7 +69,6 @@ mongo_cc_library( "stats_catalog.cpp", ], hdrs = [ - "collection_statistics.h", "collection_statistics_impl.h", "stats_cache.h", "stats_cache_loader.h", @@ -66,6 +76,7 @@ mongo_cc_library( "stats_catalog.h", ], deps = [ + ":collection_statistics_interface", ":stats_histograms", "//src/mongo/db:dbdirectclient", "//src/mongo/util:caching", # TODO(SERVER-93876): Remove.