diff --git a/src/mongo/db/query/ce/histogram_estimation_impl.cpp b/src/mongo/db/query/ce/histogram_estimation_impl.cpp index cfdb06956ae..898aad53db8 100644 --- a/src/mongo/db/query/ce/histogram_estimation_impl.cpp +++ b/src/mongo/db/query/ce/histogram_estimation_impl.cpp @@ -185,7 +185,7 @@ EstimationResult interpolateEstimateInBucket(const ScalarHistogram& h, // If the value is minimal for its type, and the operation is $lt or $lte return cardinality up // to the previous bucket. - auto&& [min, inclusive] = stats::getMinMaxBoundForSBEType(tag, true /*isMin*/); + auto&& [min, inclusive] = stats::getMinBound(tag); if (compareValues(min.getTag(), min.getValue(), tag, val) == 0) { return {resultCard, resultNDV}; } diff --git a/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp b/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp index 8ff5bb81e7f..b0fd5b76349 100644 --- a/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp +++ b/src/mongo/db/query/ce/histogram_estimation_impl_test.cpp @@ -39,7 +39,8 @@ namespace { namespace value = sbe::value; using stats::CEHistogram; -using stats::getMinMaxBoundForSBEType; +using stats::getMaxBound; +using stats::getMinBound; using stats::ScalarHistogram; using stats::TypeCounts; @@ -741,19 +742,19 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromData) { ASSERT_BSONOBJ_EQ(expected, hist.serialize()); // Minimum ObjectId. - auto&& [minOid, inclOid] = getMinMaxBoundForSBEType(value::TypeTags::ObjectId, true /*isMin*/); + auto&& [minOid, inclOid] = getMinBound(value::TypeTags::ObjectId); auto minOidTag = minOid.getTag(); auto minOidVal = minOid.getValue(); ASSERT_EQ(1.0, estimateCardinality(hist, minOidTag, minOidVal, kEqual).card); // Minimum date. - const auto&& [minDate, inclDate] = getMinMaxBoundForSBEType(Date, true /*isMin*/); + const auto&& [minDate, inclDate] = getMinBound(Date); const auto minDateTag = minDate.getTag(); const auto minDateVal = minDate.getValue(); ASSERT_EQ(1.0, estimateCardinality(hist, minDateTag, minDateVal, kEqual).card); // Minimum timestamp. - auto&& [minTs, inclTs] = getMinMaxBoundForSBEType(value::TypeTags::Timestamp, true /*isMin*/); + auto&& [minTs, inclTs] = getMinBound(value::TypeTags::Timestamp); auto minTsTag = minTs.getTag(); auto minTsVal = minTs.getValue(); ASSERT_EQ(1.0, estimateCardinality(hist, minTsTag, minTsVal, kEqual).card); @@ -823,8 +824,7 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromData) { ASSERT_EQ(3.0, expectedCard.card); // [minTs, maxTs], estimated by the entire timestamp bucket. - auto&& [maxTs, inclMaxTs] = - getMinMaxBoundForSBEType(value::TypeTags::Timestamp, false /*isMin*/); + auto&& [maxTs, inclMaxTs] = getMaxBound(value::TypeTags::Timestamp); const auto maxTsTag = maxTs.getTag(); const auto maxTsVal = maxTs.getValue(); expectedCard = @@ -849,20 +849,20 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromBuckets) { ASSERT_EQ(500.0, getTotals(hist).card); // Minimum ObjectId. - auto&& [minOid, inclOid] = getMinMaxBoundForSBEType(value::TypeTags::ObjectId, true /*isMin*/); + auto&& [minOid, inclOid] = getMinBound(value::TypeTags::ObjectId); auto minOidTag = minOid.getTag(); auto minOidVal = minOid.getValue(); ASSERT_APPROX_EQUAL( 1.9, estimateCardinality(hist, minOidTag, minOidVal, kEqual).card, kErrorBound); // Minimum date. - const auto&& [minDate, inclDate] = getMinMaxBoundForSBEType(Date, true /*isMin*/); + const auto&& [minDate, inclDate] = getMinBound(Date); const auto minDateTag = minDate.getTag(); const auto minDateVal = minDate.getValue(); ASSERT_EQ(4.0, estimateCardinality(hist, minDateTag, minDateVal, kEqual).card); // Minimum timestamp. - auto&& [minTs, inclTs] = getMinMaxBoundForSBEType(value::TypeTags::Timestamp, true /*isMin*/); + auto&& [minTs, inclTs] = getMinBound(value::TypeTags::Timestamp); auto minTsTag = minTs.getTag(); auto minTsVal = minTs.getValue(); ASSERT_APPROX_EQUAL( diff --git a/src/mongo/db/query/stats/BUILD.bazel b/src/mongo/db/query/stats/BUILD.bazel index 010a2937451..e707c767e05 100644 --- a/src/mongo/db/query/stats/BUILD.bazel +++ b/src/mongo/db/query/stats/BUILD.bazel @@ -83,3 +83,16 @@ mongo_cc_library( "//src/mongo/util/concurrency:thread_pool", # TODO(SERVER-93876): Remove. ], ) + +mongo_cc_library( + name = "test_utils", + srcs = [ + "test_utils.cpp", + ], + hdrs = [ + "test_utils.h", + ], + deps = [ + "//src/mongo/db:sbe_values", + ], +) diff --git a/src/mongo/db/query/stats/SConscript b/src/mongo/db/query/stats/SConscript index 7cfc5dd241d..0c69a981000 100644 --- a/src/mongo/db/query/stats/SConscript +++ b/src/mongo/db/query/stats/SConscript @@ -123,5 +123,17 @@ env.CppUnitTest( ], LIBDEPS=[ "stats_test_utils", + "test_utils", + ], +) + +env.Benchmark( + target="value_utils_bm", + source=[ + "value_utils_bm.cpp", + ], + LIBDEPS=[ + "stats_histograms", + "test_utils", ], ) diff --git a/src/mongo/db/query/stats/test_utils.cpp b/src/mongo/db/query/stats/test_utils.cpp new file mode 100644 index 00000000000..4a1fcf81bba --- /dev/null +++ b/src/mongo/db/query/stats/test_utils.cpp @@ -0,0 +1,89 @@ +/** + * Copyright (C) 2024-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/stats/test_utils.h" + +#include "mongo/logv2/log.h" + +namespace mongo::stats { + +bool sameTypeClassByComparingMin(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2) { + if (tag1 == tag2) { + return true; + } + + static constexpr const char* kTempFieldName = "temp"; + + // As the type representations in SBEValues differ from those in BSONObj and they are not a + // one-to-one match, the implementation ensures correctness by converting to BSONType with + // tagToType() and then comparing with the values from BSONObjBuilder::appendMinForType. + BSONObjBuilder minb1; + minb1.appendMinForType(kTempFieldName, sbe::value::tagToType(tag1)); + const BSONObj min1 = minb1.obj(); + + BSONObjBuilder minb2; + minb2.appendMinForType(kTempFieldName, sbe::value::tagToType(tag2)); + const BSONObj min2 = minb2.obj(); + + return min1.woCompare(min2) == 0; +}; + +TypeTagPairs generateTypeTagPairs(size_t start, size_t end) { + invariant(end > start); + + std::vector> allTypeTagPairs( + (end - start) * (end - start)); + + for (size_t first = start; first < end; ++first) { + for (size_t second = start; second < end; ++second) { + auto firstTag = static_cast(first); + auto secondTag = static_cast(second); + if (tagToType(firstTag) == BSONType::EOO || tagToType(secondTag) == BSONType::EOO) { + continue; + } + allTypeTagPairs.emplace_back(firstTag, secondTag); + } + } + return allTypeTagPairs; +} + +TypeTagPairs generateAllTypeTagPairs() { + return generateTypeTagPairs(0, size_t(sbe::value::TypeTags::TypeTagsMax)); +} + +TypeTagPairs generateShallowTypeTagPairs() { + return generateTypeTagPairs(0, size_t(sbe::value::TypeTags::EndOfShallowTypeTags) + 1); +} + +TypeTagPairs generateHeapTypeTagPairs() { + return generateTypeTagPairs(size_t(sbe::value::TypeTags::EndOfShallowTypeTags) + 1, + size_t(sbe::value::TypeTags::TypeTagsMax)); +} + +} // namespace mongo::stats diff --git a/src/mongo/db/query/stats/test_utils.h b/src/mongo/db/query/stats/test_utils.h new file mode 100644 index 00000000000..cfeaa3ae658 --- /dev/null +++ b/src/mongo/db/query/stats/test_utils.h @@ -0,0 +1,59 @@ +/** + * Copyright (C) 2024-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/exec/sbe/values/value.h" + +namespace mongo::stats { + +using TypeTagPairs = std::vector>; + +/** + * An alternative implementation of sameTypeClass by comparing the minimum values of two TypeTags + * with BSONObjBuilder::appendMinForType. + */ +bool sameTypeClassByComparingMin(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2); + +/** + * Generates all possible pairs of TypeTags for testing. + */ +TypeTagPairs generateAllTypeTagPairs(); + +/** + * Generates all possible pairs of shallow TypeTags for testing. + */ +TypeTagPairs generateShallowTypeTagPairs(); + +/** + * Generates all possible pairs of heap-based TypeTags for testing. + */ +TypeTagPairs generateHeapTypeTagPairs(); + +} // namespace mongo::stats diff --git a/src/mongo/db/query/stats/value_utils.cpp b/src/mongo/db/query/stats/value_utils.cpp index 9faad306a83..6c1dee1d8b9 100644 --- a/src/mongo/db/query/stats/value_utils.cpp +++ b/src/mongo/db/query/stats/value_utils.cpp @@ -46,6 +46,37 @@ #include "mongo/util/str.h" namespace mongo::stats { + +/** + * Generates and returns a sorted vector of all type tags. + * + * This function enumerates all possible type tags defined in the sbe::value::TypeTags enum. It then + * sorts these type tags using the compareStrictOrder() function. The compareStrictOrder() function + * sorts according to their sort order. If two type tags share the same sort order, it breaks the + * tie using the TypeTags enum values. + */ +std::vector sortTypeTags(); + +/** + * Generates and returns a map of type tags to their respective subsequent type tags. + * + * This function creates a mapping from each type tag to its subsequent type tag, determined by the + * order of type tags in the 'kTypeTagsSorted' vector. If two type tags share the same canonical + * order, they will map to the same subsequent type tag. + * + * The function skips the last type tag in the vector, as there is no subsequent type tag for it. + */ +absl::flat_hash_map nextTypeTagsMap(); + +namespace { + +const static std::vector kTypeTagsSorted = sortTypeTags(); + +const static absl::flat_hash_map kNextTypeTagsMap = + nextTypeTagsMap(); + +} // namespace + namespace value = sbe::value; SBEValue::SBEValue(value::TypeTags tag, value::Value val) : _tag(tag), _val(val) {} @@ -139,21 +170,7 @@ std::pair makeNaNValue() { }; bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2) { - if (tag1 == tag2) { - return true; - } - - static constexpr const char* kTempFieldName = "temp"; - - BSONObjBuilder minb1; - minb1.appendMinForType(kTempFieldName, value::tagToType(tag1)); - const BSONObj min1 = minb1.obj(); - - BSONObjBuilder minb2; - minb2.appendMinForType(kTempFieldName, value::tagToType(tag2)); - const BSONObj min2 = minb2.obj(); - - return min1.woCompare(min2) == 0; + return compareTypeTags(tag1, tag2) == 0; } bool sameTypeBracket(value::TypeTags tag1, value::TypeTags tag2) { @@ -453,49 +470,104 @@ value::TypeTags deserialize(const std::string& name) { str::stream() << "String " << name << " is not convertable to SBE type tag."); } -std::pair getMinMaxBoundForSBEType(sbe::value::TypeTags tag, bool isMin) { +int compareTypeTags(sbe::value::TypeTags a, sbe::value::TypeTags b) { + auto orderOfA = canonicalizeBSONTypeUnsafeLookup(tagToType(a)); + auto orderOfB = canonicalizeBSONTypeUnsafeLookup(tagToType(b)); + if (orderOfA < orderOfB) { + return -1; + } else if (orderOfA > orderOfB) { + return 1; + } + return 0; +} + + +std::vector sortTypeTags() { + // Sorts type tags according to the sort order. Breaks tie with the TypeTags enum values if + // two tags share the same sort order. + auto compareStrictOrder = [](sbe::value::TypeTags a, sbe::value::TypeTags b) -> bool { + auto result = compareTypeTags(a, b); + return result != 0 ? result < 0 : (a < b); + }; + + static constexpr size_t numTypeTags = size_t(sbe::value::TypeTags::TypeTagsMax); + std::vector typeTagsOrder; + + // Enumerates all the type tags. + for (uint8_t tagValue = 0; tagValue < numTypeTags; ++tagValue) { + auto tag = static_cast(tagValue); + + // Skips unsupported types. + if (sbe::value::tagToType(tag) == BSONType::EOO) { + continue; + } + typeTagsOrder.push_back(tag); + } + + std::sort(typeTagsOrder.begin(), typeTagsOrder.end(), compareStrictOrder); + + return typeTagsOrder; +} + +absl::flat_hash_map nextTypeTagsMap() { + absl::flat_hash_map nextTypeTagsMap; + + // Skips the last one as there is no next type. + sbe::value::TypeTags nextTag = kTypeTagsSorted[kTypeTagsSorted.size() - 1]; + invariant(!kTypeTagsSorted.empty()); + for (int32_t index = kTypeTagsSorted.size() - 2; index >= 0; --index) { + auto tag = kTypeTagsSorted[index]; + nextTypeTagsMap[tag] = nextTag; + + // If 'tag' and 'kTypeTagsSorted[index - 1]' are at the same canonical order, reuse + // 'nextTag'. For example, as compareTypeTags(NumberInt32, NumberDouble) == 0, their next + // type tags are both 'StringSmall'. + if (!(index > 0 && compareTypeTags(tag, kTypeTagsSorted[index - 1]) == 0)) { + nextTag = tag; + } + } + + return nextTypeTagsMap; +} + +sbe::value::TypeTags getNextType(sbe::value::TypeTags tag) { + auto it = kNextTypeTagsMap.find(tag); + tassert(9619600, + fmt::format("Type {} does not have a next type", tag), + it != kNextTypeTagsMap.end()); + return it->second; +} + +bool isVariableWidthType(sbe::value::TypeTags tag) { + return isVariableWidthType(tagToType(tag)); +} + +std::pair getMinBound(sbe::value::TypeTags tag) { switch (tag) { case sbe::value::TypeTags::MinKey: - return {{tag, 0}, false}; + return {{tag, 0}, true}; case sbe::value::TypeTags::MaxKey: - return {{tag, 0}, false}; + return {{tag, 0}, true}; case sbe::value::TypeTags::NumberInt32: case sbe::value::TypeTags::NumberInt64: case sbe::value::TypeTags::NumberDouble: case sbe::value::TypeTags::NumberDecimal: - if (isMin) { - return {{tag, - sbe::value::bitcastFrom(std::numeric_limits::quiet_NaN())}, - false}; - } else { - return {sbe::value::makeNewString(""), false}; - } + return {{sbe::value::TypeTags::NumberDouble, + sbe::value::bitcastFrom(std::numeric_limits::quiet_NaN())}, + true}; case sbe::value::TypeTags::StringSmall: case sbe::value::TypeTags::StringBig: case sbe::value::TypeTags::bsonString: case sbe::value::TypeTags::bsonSymbol: - if (isMin) { - return {sbe::value::makeNewString(""), true}; - } else { - return {sbe::value::makeNewObject(), false}; - } + return {sbe::value::makeNewString(""), true}; case sbe::value::TypeTags::Date: - if (isMin) { - return {{tag, sbe::value::bitcastFrom(Date_t::min().toMillisSinceEpoch())}, - true}; - } else { - return {{tag, sbe::value::bitcastFrom(Date_t::max().toMillisSinceEpoch())}, - true}; - } + return {{tag, sbe::value::bitcastFrom(Date_t::min().toMillisSinceEpoch())}, + true}; case sbe::value::TypeTags::Timestamp: - if (isMin) { - return {{tag, sbe::value::bitcastFrom(Timestamp::min().asULL())}, true}; - } else { - return {{tag, sbe::value::bitcastFrom(Timestamp::max().asULL())}, true}; - } + return {{tag, sbe::value::bitcastFrom(Timestamp::min().asULL())}, true}; case sbe::value::TypeTags::Null: return {{tag, 0}, true}; @@ -506,76 +578,86 @@ std::pair getMinMaxBoundForSBEType(sbe::value::TypeTags t case sbe::value::TypeTags::Object: case sbe::value::TypeTags::bsonObject: - if (isMin) { - return {sbe::value::makeNewObject(), true}; - } else { - return {sbe::value::makeNewArray(), false}; - } + return {sbe::value::makeNewObject(), true}; case sbe::value::TypeTags::Array: case sbe::value::TypeTags::ArraySet: case sbe::value::TypeTags::ArrayMultiSet: case sbe::value::TypeTags::bsonArray: - if (isMin) { - return {sbe::value::makeNewArray(), true}; - } else { - return {sbe::value::makeValue(Value(BSONBinData())), false}; - } + return {sbe::value::makeNewArray(), true}; case sbe::value::TypeTags::bsonBinData: - if (isMin) { - return {sbe::value::makeValue(Value(BSONBinData())), true}; - } else { - return {sbe::value::makeValue(Value(OID())), false}; - } + return {sbe::value::makeValue(Value(BSONBinData())), true}; case sbe::value::TypeTags::Boolean: - if (isMin) { - return {{tag, sbe::value::bitcastFrom(false)}, true}; - } else { - return {{tag, sbe::value::bitcastFrom(true)}, true}; - } + return {{tag, sbe::value::bitcastFrom(false)}, true}; case sbe::value::TypeTags::ObjectId: case sbe::value::TypeTags::bsonObjectId: - if (isMin) { - return {sbe::value::makeValue(Value(OID())), true}; - } else { - return {sbe::value::makeValue(Value(OID::max())), true}; - } + return {sbe::value::makeValue(Value(OID())), true}; case sbe::value::TypeTags::bsonRegex: - if (isMin) { - return {sbe::value::makeValue(Value(BSONRegEx("", ""))), true}; - } else { - return {sbe::value::makeValue(Value(BSONDBRef())), false}; - } + return {sbe::value::makeValue(Value(BSONRegEx("", ""))), true}; case sbe::value::TypeTags::bsonDBPointer: - if (isMin) { - return {sbe::value::makeValue(Value(BSONDBRef())), true}; - } else { - return {sbe::value::makeCopyBsonJavascript(StringData("")), false}; - } + return {sbe::value::makeValue(Value(BSONDBRef())), true}; case sbe::value::TypeTags::bsonJavascript: - if (isMin) { - return {sbe::value::makeCopyBsonJavascript(StringData("")), true}; - } else { - return {sbe::value::makeValue(Value(BSONCodeWScope())), false}; - } + return {sbe::value::makeCopyBsonJavascript(StringData("")), true}; case sbe::value::TypeTags::bsonCodeWScope: - if (isMin) { - return {sbe::value::makeValue(Value(BSONCodeWScope())), true}; - } else { - return {{sbe::value::TypeTags::MaxKey, 0}, false}; - } - + return {sbe::value::makeValue(Value(BSONCodeWScope())), true}; default: - return {{sbe::value::TypeTags::Nothing, 0}, false}; + tasserted(9619601, str::stream() << "Type not supported for getMinBound: " << tag); } - MONGO_UNREACHABLE; + + MONGO_UNREACHABLE_TASSERT(9619602); +} + +std::pair getMaxBound(sbe::value::TypeTags tag) { + // If the type is a variable width type, the maximum value cannot be represented with the same + // type. Therefore, we use the minimum value of the next type to represent the maximum bound. + // The inclusive flag is set to false to indicate that the bound is excluded. + if (isVariableWidthType(tag)) { + auto bound = getMinBound(getNextType(tag)); + bound.second = false; + return bound; + } + + switch (tag) { + case sbe::value::TypeTags::MinKey: + return {{tag, 0}, true}; + case sbe::value::TypeTags::MaxKey: + return {{tag, 0}, true}; + case sbe::value::TypeTags::NumberInt32: + case sbe::value::TypeTags::NumberInt64: + case sbe::value::TypeTags::NumberDouble: + case sbe::value::TypeTags::NumberDecimal: + return {{sbe::value::TypeTags::NumberDouble, + sbe::value::bitcastFrom(std::numeric_limits::infinity())}, + true}; + case sbe::value::TypeTags::Date: + return {{tag, sbe::value::bitcastFrom(Date_t::max().toMillisSinceEpoch())}, + true}; + case sbe::value::TypeTags::Timestamp: + return {{tag, sbe::value::bitcastFrom(Timestamp::max().asULL())}, true}; + + case sbe::value::TypeTags::Null: + return {{tag, 0}, true}; + + case sbe::value::TypeTags::bsonUndefined: + return {sbe::value::makeValue(Value(BSONUndefined)), true}; + + case sbe::value::TypeTags::Boolean: + return {{tag, sbe::value::bitcastFrom(true)}, true}; + case sbe::value::TypeTags::ObjectId: + case sbe::value::TypeTags::bsonObjectId: + return {sbe::value::makeValue(Value(OID::max())), true}; + default: + tasserted(9619603, str::stream() << "Type not supported for getMaxBound: " << tag); + } + + MONGO_UNREACHABLE_TASSERT(9619604); } bool sameTypeBracketInterval(sbe::value::TypeTags startTag, @@ -590,7 +672,7 @@ bool sameTypeBracketInterval(sbe::value::TypeTags startTag, return false; } - auto [max, maxInclusive] = getMinMaxBoundForSBEType(startTag, false /*isMin*/); + auto [max, maxInclusive] = getMinBound(getNextType(startTag)); return stats::compareValues(endTag, endVal, max.getTag(), max.getValue()) == 0; } @@ -600,9 +682,42 @@ bool isFullBracketInterval(sbe::value::TypeTags startTag, sbe::value::TypeTags endTag, sbe::value::Value endVal, bool endInclusive) { + // 'startInclusive' must be true because a full bracket interval includes the minimum value of + // the type. + if (!startInclusive) { + return false; + } - auto [expectedMin, minInclusive] = getMinMaxBoundForSBEType(startTag, true /*isMin*/); - auto [expectedMax, maxInclusive] = getMinMaxBoundForSBEType(startTag, false /*isMin*/); + // Short-circuits by first evaluating 'endInclusive'. This approach prevents unnecessary memory + // allocation by avoiding calls to getMinBound() and getMaxBound() if the conditions are not + // met. + // + // The logic checks if the start and end tags are of the same type. If they are, either + // isVariableWidthType(startTag) and (!endInclusive) must be false for the interval to be + // considered a full bracket interval. + // + // Example scenarios: + // - If 'startTag' is Object, the logic returns false Because isVariableWidthType() is true. + // We expect the end bound of a full bracket interval of object to be an empty Array. + // - If 'startTag' is NumberInt32, the logic returns false if 'endInclusive' is false, as we + // expect the end bound to be infinitiy and inclusive. + bool sameType = sameTypeClass(startTag, endTag); + if (sameType && (isVariableWidthType(startTag) || !endInclusive)) { + return false; + } else if (!sameType && (endInclusive || !sameTypeClass(endTag, getNextType(startTag)))) { + // If the start and end bounds are of different type classes, 'endInclusive' must be false + // and 'endTag' must be the next type of 'startTag' for the interval to be considered a full + // bracket interval. + // + // Example scenario: + // - If 'startTag' is Object, 'endTag' must be Array (or equivalent) and 'endInclusive' + // must be false for the interval to be valid. + return false; + } + + auto [expectedMin, minInclusive] = getMinBound(startTag); + auto [expectedMax, maxInclusive] = + sameType ? getMaxBound(startTag) : getMinBound(getNextType(startTag)); bool compareValuesMin = (stats::compareValues(startTag, startVal, expectedMin.getTag(), expectedMin.getValue()) == @@ -611,7 +726,7 @@ bool isFullBracketInterval(sbe::value::TypeTags startTag, bool compareValuesMax = (stats::compareValues(endTag, endVal, expectedMax.getTag(), expectedMax.getValue()) == 0); - return (compareValuesMin && (compareValuesMax && !endInclusive)); + return compareValuesMin && compareValuesMax; } } // namespace mongo::stats diff --git a/src/mongo/db/query/stats/value_utils.h b/src/mongo/db/query/stats/value_utils.h index dae56325506..65f4ade5323 100644 --- a/src/mongo/db/query/stats/value_utils.h +++ b/src/mongo/db/query/stats/value_utils.h @@ -195,10 +195,40 @@ std::string serialize(sbe::value::TypeTags tag); sbe::value::TypeTags deserialize(const std::string& name); /** - * Returns the minimum or maximum bound of the type 'tag'. If 'isMin' is false, returns the maximum - * bound. + * Three ways TypeTags comparison (aka spaceship operator) according to the BSON type sort order. */ -std::pair getMinMaxBoundForSBEType(sbe::value::TypeTags tag, bool isMin); +int compareTypeTags(sbe::value::TypeTags a, sbe::value::TypeTags b); + +/** + * Given a type, returns whether or not that type has a variable width. Returns true if the type has + * variable width, indicating the maxmimum value cannot be represented by the same type + * (e.g., objects, arrays). Returns false for types with fixed width (e.g., numbers, booleans). + */ +bool isVariableWidthType(sbe::value::TypeTags tag); + +/** + * Returns the minimum bound of the type 'tag'. + * The result is a pair where the first element is the minimum bound value (stats::SBEValue), and + * the second element is a boolean indicating if the bound is inclusive. + * + * Examples: + * - getMinBound(NumberInt32): (nan, true) + * - getMinBound(StringSmall): ("", true) + * - getMinBound(Object): ({}, true) + */ +std::pair getMinBound(sbe::value::TypeTags tag); + +/** + * Returns the maximum bound of the type 'tag'. + * The result is a pair where the first element is the maximum bound value (stats::SBEValue), and + * the second element is a boolean indicating if the bound is inclusive. + * + * Examples: + * - getMaxBound(NumberInt32): (infinity, true) + * - getMaxBound(StringSmall): ({}, false) + * - getMaxBound(Object): ([], false) + */ +std::pair getMaxBound(sbe::value::TypeTags tag); /** * Returns true if the interval is of the same type. This takes type-bracketing into consideration. @@ -219,9 +249,10 @@ bool sameTypeBracketInterval(sbe::value::TypeTags startTag, /** * Returns true if the interval covers a full type. This helps to determine if the interval is * estimable by a type count. The design of this method follows the definition of a full interval as - * inclusive the minimum value of the current type and exclusive the minimume value of the next - * type e.g., for NumberDecimal [quite_NaN, ""). Thus, it assumes that the start type tag and end - * type tag differ. + * inclusive the minimum value of the current type and either: + * a) inclusive the maximum value of the current type (if representable, e.g., [nan.0, inf.0]) + * or + * b) exclusive the minimume value of the next type (e.g., for Object, [{}, [])). */ bool isFullBracketInterval(sbe::value::TypeTags startTag, sbe::value::Value startVal, diff --git a/src/mongo/db/query/stats/value_utils_bm.cpp b/src/mongo/db/query/stats/value_utils_bm.cpp new file mode 100644 index 00000000000..957181e72f3 --- /dev/null +++ b/src/mongo/db/query/stats/value_utils_bm.cpp @@ -0,0 +1,107 @@ +/** + * Copyright (C) 2024-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#include +#include + +#include "mongo/db/exec/sbe/values/value.h" +#include "mongo/db/query/stats/test_utils.h" +#include "mongo/db/query/stats/value_utils.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault + + +namespace mongo::stats { + +enum class TypeTagsPair { + All = 0, + Shallow = 1, + Heap = 2, +}; + +auto genAllPairs(TypeTagsPair option) { + switch (option) { + case TypeTagsPair::Shallow: + return generateShallowTypeTagPairs(); + case TypeTagsPair::Heap: + return generateHeapTypeTagPairs(); + case TypeTagsPair::All: + default: + return generateAllTypeTagPairs(); + } +} + +void BM_sameTypeClass(benchmark::State& state) { + auto allTypeTagPairs = genAllPairs(static_cast(state.range(0))); + size_t i = 0; + for (auto _ : state) { + auto& typeTagPair = allTypeTagPairs[i]; + benchmark::DoNotOptimize(sameTypeClass(typeTagPair.first, typeTagPair.second)); + i = (i + 1) % allTypeTagPairs.size(); + } +} + +void BM_sameTypeBracket(benchmark::State& state) { + auto allTypeTagPairs = genAllPairs(static_cast(state.range(0))); + size_t i = 0; + for (auto _ : state) { + auto& typeTagPair = allTypeTagPairs[i]; + benchmark::DoNotOptimize(sameTypeBracket(typeTagPair.first, typeTagPair.second)); + i = (i + 1) % allTypeTagPairs.size(); + } +} + +void BM_sameTypeClassByComparingMin(benchmark::State& state) { + auto allTypeTagPairs = genAllPairs(static_cast(state.range(0))); + size_t i = 0; + for (auto _ : state) { + auto& typeTagPair = allTypeTagPairs[i]; + benchmark::DoNotOptimize( + sameTypeClassByComparingMin(typeTagPair.first, typeTagPair.second)); + i = (i + 1) % allTypeTagPairs.size(); + } +} + +void BM_isFullBracketInterval(benchmark::State& state) { + auto start = sbe::value::makeNewObject(); + auto end = sbe::value::makeNewArray(); + for (auto _ : state) { + benchmark::DoNotOptimize( + isFullBracketInterval(start.first, start.second, true, end.first, end.second, false)); + } + sbe::value::ValueGuard startGuard{start}, endGuard{end}; +} + +BENCHMARK(BM_sameTypeClass)->Arg(0)->Arg(1)->Arg(2); +BENCHMARK(BM_sameTypeBracket)->Arg(0)->Arg(1)->Arg(2); +BENCHMARK(BM_sameTypeClassByComparingMin)->Arg(0)->Arg(1)->Arg(2); +BENCHMARK(BM_isFullBracketInterval); + +} // namespace mongo::stats diff --git a/src/mongo/db/query/stats/value_utils_test.cpp b/src/mongo/db/query/stats/value_utils_test.cpp index 26eab4a6c9b..71776d3e0d1 100644 --- a/src/mongo/db/query/stats/value_utils_test.cpp +++ b/src/mongo/db/query/stats/value_utils_test.cpp @@ -27,13 +27,18 @@ * it in the license file. */ +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/bsontypes.h" #include "mongo/bson/json.h" #include "mongo/db/exec/sbe/values/value.h" #include "mongo/db/query/index_bounds.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/index_bounds_builder_test.h" +#include "mongo/db/query/stats/test_utils.h" #include "mongo/db/query/interval.h" +#include "mongo/db/query/stats/value_utils.h" #include "mongo/unittest/assert.h" #include "mongo/unittest/framework.h" @@ -221,4 +226,173 @@ TEST_F(ValueUtilsTest, SameTypeBracketedIntervalGteNan) { } +TEST_F(ValueUtilsTest, SameTypeClassAlignsWithMinComparison) { + auto allTypeTagPairs = generateAllTypeTagPairs(); + + // Asserts that sameTypeClass() behaves the same as the alternative implementation + // sameTypeClassByComparingMin() which compares the minimum values in BSON. + for (auto typeTagPair : allTypeTagPairs) { + bool lhs = sameTypeClassByComparingMin(typeTagPair.first, typeTagPair.second); + bool rhs = sameTypeClass(typeTagPair.first, typeTagPair.second); + ASSERT_EQ(lhs, rhs) << "first tag: " << typeTagPair.first + << " second tag: " << typeTagPair.second << " sameTypeClass: " << lhs + << " compareTypeTags: " << rhs; + } +} + +TEST_F(ValueUtilsTest, GetMinBoundAlignsWithAppendMinForType) { + // Asserts that the behavior of getMinBound() is consistent with the BSON implementation + // appendMinForType(). + for (size_t t = 0; t < size_t(sbe::value::TypeTags::TypeTagsMax); ++t) { + auto tag = static_cast(t); + + // Excludes all the extended types which are unsupported. + auto bsonType = sbe::value::tagToType(tag); + if (bsonType == BSONType::EOO) { + continue; + } + auto [actual, inclusive] = getMinBound(tag); + + BSONObjBuilder builder; + builder.appendMinForType("", bsonType); + auto obj = builder.obj(); + auto elem = obj.firstElement(); + auto expected = sbe::bson::convertFrom(elem); + sbe::value::ValueGuard guard{expected}; + + auto res = stats::compareValues( + actual.getTag(), actual.getValue(), expected.first, expected.second); + ASSERT_EQ(res, 0) << "tag: " << tag << ", getMinBound() returns: " << actual.get() + << ", expected returning: " << expected; + ASSERT_TRUE(inclusive); + } +} + +TEST_F(ValueUtilsTest, GetMaxBoundAlignsWithAppendMaxForType) { + // Asserts that the behavior of getMaxBound() is consistent with the BSON implementation + // appendMaxForType(). + for (size_t t = 0; t < size_t(sbe::value::TypeTags::TypeTagsMax); ++t) { + auto tag = static_cast(t); + + // Excludes all the extended types which are unsupported. + auto bsonType = sbe::value::tagToType(tag); + if (bsonType == BSONType::EOO) { + continue; + } + auto [actual, inclusive] = getMaxBound(tag); + + BSONObjBuilder builder; + builder.appendMaxForType("", bsonType); + auto obj = builder.obj(); + auto elem = obj.firstElement(); + auto expected = sbe::bson::convertFrom(elem); + sbe::value::ValueGuard guard{expected}; + + auto res = stats::compareValues( + actual.getTag(), actual.getValue(), expected.first, expected.second); + ASSERT_EQ(res, 0) << "tag: " << tag << ", getMaxBound() returns: " << actual.get() + << ", expected returning: " << expected; + ASSERT_EQ(inclusive, !isVariableWidthType(tag)); + } +} + +TEST_F(ValueUtilsTest, ReturnsTrueForFullBracketIntervals) { + // Asserts that isFullBracketInterval() returns true for all full bracket intervals of each + // BSONType. + for (int t = -1; t < BSONType::MaxKey; ++t) { + if (!isValidBSONType(t) || t == BSONType::EOO || t == BSONType::Array) { + continue; + } + BSONObj obj = BSON("a" << BSON("$type" << t)); + auto interval = buildInterval(obj); + + // Converts to SBE values. + bool startInclusive = interval.startInclusive; + bool endInclusive = interval.endInclusive; + auto [startTag, startVal] = sbe::bson::convertFrom(interval.start); + auto [endTag, endVal] = sbe::bson::convertFrom(interval.end); + sbe::value::ValueGuard startGuard{startTag, startVal}; + sbe::value::ValueGuard endGuard{endTag, endVal}; + + ASSERT_TRUE( + isFullBracketInterval(startTag, startVal, startInclusive, endTag, endVal, endInclusive)) + << "type: " << typeName(BSONType(t)); + } +} + +TEST_F(ValueUtilsTest, ReturnsFalseForNonFullBracketIntervalsWithStaticWidthType) { + auto [start, startInclusive] = getMinBound(sbe::value::TypeTags::NumberInt32); + auto [end, endInclusive] = getMaxBound(sbe::value::TypeTags::NumberInt32); + + // Asserts that the interval [start, end] can be considered as a full bracket + // interval if inclusiveness are both inclusive. + ASSERT_TRUE(isFullBracketInterval(start.getTag(), + start.getValue(), + startInclusive, + end.getTag(), + end.getValue(), + endInclusive)); + + // Asserts that the function returns false if either bound is exclusive. + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + false /*startInclusive*/, + end.getTag(), + end.getValue(), + false /*endInclusive*/)); + + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + true /*startInclusive*/, + end.getTag(), + end.getValue(), + false /*endInclusive*/)); + + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + false /*startInclusive*/, + end.getTag(), + end.getValue(), + true /*endInclusive*/)); +} + +TEST_F(ValueUtilsTest, ReturnsFalseForNonFullBracketIntervalsWithVariableWidthType) { + auto [start, startInclusive] = getMinBound(sbe::value::TypeTags::StringSmall); + auto [end, endInclusive] = getMaxBound(sbe::value::TypeTags::StringSmall); + + // Asserts that the interval [start, end] can be considered as a full bracket + // interval if inclusiveness are both inclusive. + ASSERT_TRUE(isFullBracketInterval(start.getTag(), + start.getValue(), + startInclusive, + end.getTag(), + end.getValue(), + endInclusive)); + + // Asserts that the function returns false if 'endInclusive' is true. + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + startInclusive /*startInclusive*/, + end.getTag(), + end.getValue(), + true /*endInclusive*/)); + + // Asserts that the function returns false the end bound is incorrect. + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + true /*startInclusive*/, + start.getTag(), + start.getValue(), + endInclusive /*endInclusive*/)); + + // Asserts that the short-circuit returns false when the function detects the end tag is not the + // next type of the start tag. + ASSERT_FALSE(isFullBracketInterval(start.getTag(), + start.getValue(), + startInclusive, + sbe::value::TypeTags::Array, + end.getValue(), + endInclusive)); +} + } // namespace mongo::stats