0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-11-24 00:17:37 +01:00

SERVER-96196 Refactor getMinMaxBoundForSBEType (#29327)

GitOrigin-RevId: 90c57d41022640a09a3b12545cc8dfc5791e6c52
This commit is contained in:
Chi-I Huang 2024-11-19 08:38:45 -08:00 committed by MongoDB Bot
parent 6982155e42
commit bf4a36d145
10 changed files with 710 additions and 110 deletions

View File

@ -185,7 +185,7 @@ EstimationResult interpolateEstimateInBucket(const ScalarHistogram& h,
// If the value is minimal for its type, and the operation is $lt or $lte return cardinality up
// to the previous bucket.
auto&& [min, inclusive] = stats::getMinMaxBoundForSBEType(tag, true /*isMin*/);
auto&& [min, inclusive] = stats::getMinBound(tag);
if (compareValues(min.getTag(), min.getValue(), tag, val) == 0) {
return {resultCard, resultNDV};
}

View File

@ -39,7 +39,8 @@ namespace {
namespace value = sbe::value;
using stats::CEHistogram;
using stats::getMinMaxBoundForSBEType;
using stats::getMaxBound;
using stats::getMinBound;
using stats::ScalarHistogram;
using stats::TypeCounts;
@ -741,19 +742,19 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromData) {
ASSERT_BSONOBJ_EQ(expected, hist.serialize());
// Minimum ObjectId.
auto&& [minOid, inclOid] = getMinMaxBoundForSBEType(value::TypeTags::ObjectId, true /*isMin*/);
auto&& [minOid, inclOid] = getMinBound(value::TypeTags::ObjectId);
auto minOidTag = minOid.getTag();
auto minOidVal = minOid.getValue();
ASSERT_EQ(1.0, estimateCardinality(hist, minOidTag, minOidVal, kEqual).card);
// Minimum date.
const auto&& [minDate, inclDate] = getMinMaxBoundForSBEType(Date, true /*isMin*/);
const auto&& [minDate, inclDate] = getMinBound(Date);
const auto minDateTag = minDate.getTag();
const auto minDateVal = minDate.getValue();
ASSERT_EQ(1.0, estimateCardinality(hist, minDateTag, minDateVal, kEqual).card);
// Minimum timestamp.
auto&& [minTs, inclTs] = getMinMaxBoundForSBEType(value::TypeTags::Timestamp, true /*isMin*/);
auto&& [minTs, inclTs] = getMinBound(value::TypeTags::Timestamp);
auto minTsTag = minTs.getTag();
auto minTsVal = minTs.getValue();
ASSERT_EQ(1.0, estimateCardinality(hist, minTsTag, minTsVal, kEqual).card);
@ -823,8 +824,7 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromData) {
ASSERT_EQ(3.0, expectedCard.card);
// [minTs, maxTs], estimated by the entire timestamp bucket.
auto&& [maxTs, inclMaxTs] =
getMinMaxBoundForSBEType(value::TypeTags::Timestamp, false /*isMin*/);
auto&& [maxTs, inclMaxTs] = getMaxBound(value::TypeTags::Timestamp);
const auto maxTsTag = maxTs.getTag();
const auto maxTsVal = maxTs.getValue();
expectedCard =
@ -849,20 +849,20 @@ TEST(ScalarHistogramEstimatorEdgeCasesTest, MinValueMixedHistogramFromBuckets) {
ASSERT_EQ(500.0, getTotals(hist).card);
// Minimum ObjectId.
auto&& [minOid, inclOid] = getMinMaxBoundForSBEType(value::TypeTags::ObjectId, true /*isMin*/);
auto&& [minOid, inclOid] = getMinBound(value::TypeTags::ObjectId);
auto minOidTag = minOid.getTag();
auto minOidVal = minOid.getValue();
ASSERT_APPROX_EQUAL(
1.9, estimateCardinality(hist, minOidTag, minOidVal, kEqual).card, kErrorBound);
// Minimum date.
const auto&& [minDate, inclDate] = getMinMaxBoundForSBEType(Date, true /*isMin*/);
const auto&& [minDate, inclDate] = getMinBound(Date);
const auto minDateTag = minDate.getTag();
const auto minDateVal = minDate.getValue();
ASSERT_EQ(4.0, estimateCardinality(hist, minDateTag, minDateVal, kEqual).card);
// Minimum timestamp.
auto&& [minTs, inclTs] = getMinMaxBoundForSBEType(value::TypeTags::Timestamp, true /*isMin*/);
auto&& [minTs, inclTs] = getMinBound(value::TypeTags::Timestamp);
auto minTsTag = minTs.getTag();
auto minTsVal = minTs.getValue();
ASSERT_APPROX_EQUAL(

View File

@ -83,3 +83,16 @@ mongo_cc_library(
"//src/mongo/util/concurrency:thread_pool", # TODO(SERVER-93876): Remove.
],
)
mongo_cc_library(
name = "test_utils",
srcs = [
"test_utils.cpp",
],
hdrs = [
"test_utils.h",
],
deps = [
"//src/mongo/db:sbe_values",
],
)

View File

@ -123,5 +123,17 @@ env.CppUnitTest(
],
LIBDEPS=[
"stats_test_utils",
"test_utils",
],
)
env.Benchmark(
target="value_utils_bm",
source=[
"value_utils_bm.cpp",
],
LIBDEPS=[
"stats_histograms",
"test_utils",
],
)

View File

@ -0,0 +1,89 @@
/**
* Copyright (C) 2024-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/query/stats/test_utils.h"
#include "mongo/logv2/log.h"
namespace mongo::stats {
bool sameTypeClassByComparingMin(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2) {
if (tag1 == tag2) {
return true;
}
static constexpr const char* kTempFieldName = "temp";
// As the type representations in SBEValues differ from those in BSONObj and they are not a
// one-to-one match, the implementation ensures correctness by converting to BSONType with
// tagToType() and then comparing with the values from BSONObjBuilder::appendMinForType.
BSONObjBuilder minb1;
minb1.appendMinForType(kTempFieldName, sbe::value::tagToType(tag1));
const BSONObj min1 = minb1.obj();
BSONObjBuilder minb2;
minb2.appendMinForType(kTempFieldName, sbe::value::tagToType(tag2));
const BSONObj min2 = minb2.obj();
return min1.woCompare(min2) == 0;
};
TypeTagPairs generateTypeTagPairs(size_t start, size_t end) {
invariant(end > start);
std::vector<std::pair<sbe::value::TypeTags, sbe::value::TypeTags>> allTypeTagPairs(
(end - start) * (end - start));
for (size_t first = start; first < end; ++first) {
for (size_t second = start; second < end; ++second) {
auto firstTag = static_cast<sbe::value::TypeTags>(first);
auto secondTag = static_cast<sbe::value::TypeTags>(second);
if (tagToType(firstTag) == BSONType::EOO || tagToType(secondTag) == BSONType::EOO) {
continue;
}
allTypeTagPairs.emplace_back(firstTag, secondTag);
}
}
return allTypeTagPairs;
}
TypeTagPairs generateAllTypeTagPairs() {
return generateTypeTagPairs(0, size_t(sbe::value::TypeTags::TypeTagsMax));
}
TypeTagPairs generateShallowTypeTagPairs() {
return generateTypeTagPairs(0, size_t(sbe::value::TypeTags::EndOfShallowTypeTags) + 1);
}
TypeTagPairs generateHeapTypeTagPairs() {
return generateTypeTagPairs(size_t(sbe::value::TypeTags::EndOfShallowTypeTags) + 1,
size_t(sbe::value::TypeTags::TypeTagsMax));
}
} // namespace mongo::stats

View File

@ -0,0 +1,59 @@
/**
* Copyright (C) 2024-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/exec/sbe/values/value.h"
namespace mongo::stats {
using TypeTagPairs = std::vector<std::pair<sbe::value::TypeTags, sbe::value::TypeTags>>;
/**
* An alternative implementation of sameTypeClass by comparing the minimum values of two TypeTags
* with BSONObjBuilder::appendMinForType.
*/
bool sameTypeClassByComparingMin(sbe::value::TypeTags tag1, sbe::value::TypeTags tag2);
/**
* Generates all possible pairs of TypeTags for testing.
*/
TypeTagPairs generateAllTypeTagPairs();
/**
* Generates all possible pairs of shallow TypeTags for testing.
*/
TypeTagPairs generateShallowTypeTagPairs();
/**
* Generates all possible pairs of heap-based TypeTags for testing.
*/
TypeTagPairs generateHeapTypeTagPairs();
} // namespace mongo::stats

View File

@ -46,6 +46,37 @@
#include "mongo/util/str.h"
namespace mongo::stats {
/**
* Generates and returns a sorted vector of all type tags.
*
* This function enumerates all possible type tags defined in the sbe::value::TypeTags enum. It then
* sorts these type tags using the compareStrictOrder() function. The compareStrictOrder() function
* sorts according to their sort order. If two type tags share the same sort order, it breaks the
* tie using the TypeTags enum values.
*/
std::vector<sbe::value::TypeTags> sortTypeTags();
/**
* Generates and returns a map of type tags to their respective subsequent type tags.
*
* This function creates a mapping from each type tag to its subsequent type tag, determined by the
* order of type tags in the 'kTypeTagsSorted' vector. If two type tags share the same canonical
* order, they will map to the same subsequent type tag.
*
* The function skips the last type tag in the vector, as there is no subsequent type tag for it.
*/
absl::flat_hash_map<sbe::value::TypeTags, sbe::value::TypeTags> nextTypeTagsMap();
namespace {
const static std::vector<sbe::value::TypeTags> kTypeTagsSorted = sortTypeTags();
const static absl::flat_hash_map<sbe::value::TypeTags, sbe::value::TypeTags> kNextTypeTagsMap =
nextTypeTagsMap();
} // namespace
namespace value = sbe::value;
SBEValue::SBEValue(value::TypeTags tag, value::Value val) : _tag(tag), _val(val) {}
@ -139,21 +170,7 @@ std::pair<value::TypeTags, value::Value> makeNaNValue() {
};
bool sameTypeClass(value::TypeTags tag1, value::TypeTags tag2) {
if (tag1 == tag2) {
return true;
}
static constexpr const char* kTempFieldName = "temp";
BSONObjBuilder minb1;
minb1.appendMinForType(kTempFieldName, value::tagToType(tag1));
const BSONObj min1 = minb1.obj();
BSONObjBuilder minb2;
minb2.appendMinForType(kTempFieldName, value::tagToType(tag2));
const BSONObj min2 = minb2.obj();
return min1.woCompare(min2) == 0;
return compareTypeTags(tag1, tag2) == 0;
}
bool sameTypeBracket(value::TypeTags tag1, value::TypeTags tag2) {
@ -453,49 +470,104 @@ value::TypeTags deserialize(const std::string& name) {
str::stream() << "String " << name << " is not convertable to SBE type tag.");
}
std::pair<stats::SBEValue, bool> getMinMaxBoundForSBEType(sbe::value::TypeTags tag, bool isMin) {
int compareTypeTags(sbe::value::TypeTags a, sbe::value::TypeTags b) {
auto orderOfA = canonicalizeBSONTypeUnsafeLookup(tagToType(a));
auto orderOfB = canonicalizeBSONTypeUnsafeLookup(tagToType(b));
if (orderOfA < orderOfB) {
return -1;
} else if (orderOfA > orderOfB) {
return 1;
}
return 0;
}
std::vector<sbe::value::TypeTags> sortTypeTags() {
// Sorts type tags according to the sort order. Breaks tie with the TypeTags enum values if
// two tags share the same sort order.
auto compareStrictOrder = [](sbe::value::TypeTags a, sbe::value::TypeTags b) -> bool {
auto result = compareTypeTags(a, b);
return result != 0 ? result < 0 : (a < b);
};
static constexpr size_t numTypeTags = size_t(sbe::value::TypeTags::TypeTagsMax);
std::vector<sbe::value::TypeTags> typeTagsOrder;
// Enumerates all the type tags.
for (uint8_t tagValue = 0; tagValue < numTypeTags; ++tagValue) {
auto tag = static_cast<sbe::value::TypeTags>(tagValue);
// Skips unsupported types.
if (sbe::value::tagToType(tag) == BSONType::EOO) {
continue;
}
typeTagsOrder.push_back(tag);
}
std::sort(typeTagsOrder.begin(), typeTagsOrder.end(), compareStrictOrder);
return typeTagsOrder;
}
absl::flat_hash_map<sbe::value::TypeTags, sbe::value::TypeTags> nextTypeTagsMap() {
absl::flat_hash_map<sbe::value::TypeTags, sbe::value::TypeTags> nextTypeTagsMap;
// Skips the last one as there is no next type.
sbe::value::TypeTags nextTag = kTypeTagsSorted[kTypeTagsSorted.size() - 1];
invariant(!kTypeTagsSorted.empty());
for (int32_t index = kTypeTagsSorted.size() - 2; index >= 0; --index) {
auto tag = kTypeTagsSorted[index];
nextTypeTagsMap[tag] = nextTag;
// If 'tag' and 'kTypeTagsSorted[index - 1]' are at the same canonical order, reuse
// 'nextTag'. For example, as compareTypeTags(NumberInt32, NumberDouble) == 0, their next
// type tags are both 'StringSmall'.
if (!(index > 0 && compareTypeTags(tag, kTypeTagsSorted[index - 1]) == 0)) {
nextTag = tag;
}
}
return nextTypeTagsMap;
}
sbe::value::TypeTags getNextType(sbe::value::TypeTags tag) {
auto it = kNextTypeTagsMap.find(tag);
tassert(9619600,
fmt::format("Type {} does not have a next type", tag),
it != kNextTypeTagsMap.end());
return it->second;
}
bool isVariableWidthType(sbe::value::TypeTags tag) {
return isVariableWidthType(tagToType(tag));
}
std::pair<stats::SBEValue, bool> getMinBound(sbe::value::TypeTags tag) {
switch (tag) {
case sbe::value::TypeTags::MinKey:
return {{tag, 0}, false};
return {{tag, 0}, true};
case sbe::value::TypeTags::MaxKey:
return {{tag, 0}, false};
return {{tag, 0}, true};
case sbe::value::TypeTags::NumberInt32:
case sbe::value::TypeTags::NumberInt64:
case sbe::value::TypeTags::NumberDouble:
case sbe::value::TypeTags::NumberDecimal:
if (isMin) {
return {{tag,
sbe::value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN())},
false};
} else {
return {sbe::value::makeNewString(""), false};
}
return {{sbe::value::TypeTags::NumberDouble,
sbe::value::bitcastFrom<double>(std::numeric_limits<double>::quiet_NaN())},
true};
case sbe::value::TypeTags::StringSmall:
case sbe::value::TypeTags::StringBig:
case sbe::value::TypeTags::bsonString:
case sbe::value::TypeTags::bsonSymbol:
if (isMin) {
return {sbe::value::makeNewString(""), true};
} else {
return {sbe::value::makeNewObject(), false};
}
return {sbe::value::makeNewString(""), true};
case sbe::value::TypeTags::Date:
if (isMin) {
return {{tag, sbe::value::bitcastFrom<int64_t>(Date_t::min().toMillisSinceEpoch())},
true};
} else {
return {{tag, sbe::value::bitcastFrom<int64_t>(Date_t::max().toMillisSinceEpoch())},
true};
}
return {{tag, sbe::value::bitcastFrom<int64_t>(Date_t::min().toMillisSinceEpoch())},
true};
case sbe::value::TypeTags::Timestamp:
if (isMin) {
return {{tag, sbe::value::bitcastFrom<uint64_t>(Timestamp::min().asULL())}, true};
} else {
return {{tag, sbe::value::bitcastFrom<uint64_t>(Timestamp::max().asULL())}, true};
}
return {{tag, sbe::value::bitcastFrom<uint64_t>(Timestamp::min().asULL())}, true};
case sbe::value::TypeTags::Null:
return {{tag, 0}, true};
@ -506,76 +578,86 @@ std::pair<stats::SBEValue, bool> getMinMaxBoundForSBEType(sbe::value::TypeTags t
case sbe::value::TypeTags::Object:
case sbe::value::TypeTags::bsonObject:
if (isMin) {
return {sbe::value::makeNewObject(), true};
} else {
return {sbe::value::makeNewArray(), false};
}
return {sbe::value::makeNewObject(), true};
case sbe::value::TypeTags::Array:
case sbe::value::TypeTags::ArraySet:
case sbe::value::TypeTags::ArrayMultiSet:
case sbe::value::TypeTags::bsonArray:
if (isMin) {
return {sbe::value::makeNewArray(), true};
} else {
return {sbe::value::makeValue(Value(BSONBinData())), false};
}
return {sbe::value::makeNewArray(), true};
case sbe::value::TypeTags::bsonBinData:
if (isMin) {
return {sbe::value::makeValue(Value(BSONBinData())), true};
} else {
return {sbe::value::makeValue(Value(OID())), false};
}
return {sbe::value::makeValue(Value(BSONBinData())), true};
case sbe::value::TypeTags::Boolean:
if (isMin) {
return {{tag, sbe::value::bitcastFrom<bool>(false)}, true};
} else {
return {{tag, sbe::value::bitcastFrom<bool>(true)}, true};
}
return {{tag, sbe::value::bitcastFrom<bool>(false)}, true};
case sbe::value::TypeTags::ObjectId:
case sbe::value::TypeTags::bsonObjectId:
if (isMin) {
return {sbe::value::makeValue(Value(OID())), true};
} else {
return {sbe::value::makeValue(Value(OID::max())), true};
}
return {sbe::value::makeValue(Value(OID())), true};
case sbe::value::TypeTags::bsonRegex:
if (isMin) {
return {sbe::value::makeValue(Value(BSONRegEx("", ""))), true};
} else {
return {sbe::value::makeValue(Value(BSONDBRef())), false};
}
return {sbe::value::makeValue(Value(BSONRegEx("", ""))), true};
case sbe::value::TypeTags::bsonDBPointer:
if (isMin) {
return {sbe::value::makeValue(Value(BSONDBRef())), true};
} else {
return {sbe::value::makeCopyBsonJavascript(StringData("")), false};
}
return {sbe::value::makeValue(Value(BSONDBRef())), true};
case sbe::value::TypeTags::bsonJavascript:
if (isMin) {
return {sbe::value::makeCopyBsonJavascript(StringData("")), true};
} else {
return {sbe::value::makeValue(Value(BSONCodeWScope())), false};
}
return {sbe::value::makeCopyBsonJavascript(StringData("")), true};
case sbe::value::TypeTags::bsonCodeWScope:
if (isMin) {
return {sbe::value::makeValue(Value(BSONCodeWScope())), true};
} else {
return {{sbe::value::TypeTags::MaxKey, 0}, false};
}
return {sbe::value::makeValue(Value(BSONCodeWScope())), true};
default:
return {{sbe::value::TypeTags::Nothing, 0}, false};
tasserted(9619601, str::stream() << "Type not supported for getMinBound: " << tag);
}
MONGO_UNREACHABLE;
MONGO_UNREACHABLE_TASSERT(9619602);
}
std::pair<stats::SBEValue, bool> getMaxBound(sbe::value::TypeTags tag) {
// If the type is a variable width type, the maximum value cannot be represented with the same
// type. Therefore, we use the minimum value of the next type to represent the maximum bound.
// The inclusive flag is set to false to indicate that the bound is excluded.
if (isVariableWidthType(tag)) {
auto bound = getMinBound(getNextType(tag));
bound.second = false;
return bound;
}
switch (tag) {
case sbe::value::TypeTags::MinKey:
return {{tag, 0}, true};
case sbe::value::TypeTags::MaxKey:
return {{tag, 0}, true};
case sbe::value::TypeTags::NumberInt32:
case sbe::value::TypeTags::NumberInt64:
case sbe::value::TypeTags::NumberDouble:
case sbe::value::TypeTags::NumberDecimal:
return {{sbe::value::TypeTags::NumberDouble,
sbe::value::bitcastFrom<double>(std::numeric_limits<double>::infinity())},
true};
case sbe::value::TypeTags::Date:
return {{tag, sbe::value::bitcastFrom<int64_t>(Date_t::max().toMillisSinceEpoch())},
true};
case sbe::value::TypeTags::Timestamp:
return {{tag, sbe::value::bitcastFrom<uint64_t>(Timestamp::max().asULL())}, true};
case sbe::value::TypeTags::Null:
return {{tag, 0}, true};
case sbe::value::TypeTags::bsonUndefined:
return {sbe::value::makeValue(Value(BSONUndefined)), true};
case sbe::value::TypeTags::Boolean:
return {{tag, sbe::value::bitcastFrom<bool>(true)}, true};
case sbe::value::TypeTags::ObjectId:
case sbe::value::TypeTags::bsonObjectId:
return {sbe::value::makeValue(Value(OID::max())), true};
default:
tasserted(9619603, str::stream() << "Type not supported for getMaxBound: " << tag);
}
MONGO_UNREACHABLE_TASSERT(9619604);
}
bool sameTypeBracketInterval(sbe::value::TypeTags startTag,
@ -590,7 +672,7 @@ bool sameTypeBracketInterval(sbe::value::TypeTags startTag,
return false;
}
auto [max, maxInclusive] = getMinMaxBoundForSBEType(startTag, false /*isMin*/);
auto [max, maxInclusive] = getMinBound(getNextType(startTag));
return stats::compareValues(endTag, endVal, max.getTag(), max.getValue()) == 0;
}
@ -600,9 +682,42 @@ bool isFullBracketInterval(sbe::value::TypeTags startTag,
sbe::value::TypeTags endTag,
sbe::value::Value endVal,
bool endInclusive) {
// 'startInclusive' must be true because a full bracket interval includes the minimum value of
// the type.
if (!startInclusive) {
return false;
}
auto [expectedMin, minInclusive] = getMinMaxBoundForSBEType(startTag, true /*isMin*/);
auto [expectedMax, maxInclusive] = getMinMaxBoundForSBEType(startTag, false /*isMin*/);
// Short-circuits by first evaluating 'endInclusive'. This approach prevents unnecessary memory
// allocation by avoiding calls to getMinBound() and getMaxBound() if the conditions are not
// met.
//
// The logic checks if the start and end tags are of the same type. If they are, either
// isVariableWidthType(startTag) and (!endInclusive) must be false for the interval to be
// considered a full bracket interval.
//
// Example scenarios:
// - If 'startTag' is Object, the logic returns false Because isVariableWidthType() is true.
// We expect the end bound of a full bracket interval of object to be an empty Array.
// - If 'startTag' is NumberInt32, the logic returns false if 'endInclusive' is false, as we
// expect the end bound to be infinitiy and inclusive.
bool sameType = sameTypeClass(startTag, endTag);
if (sameType && (isVariableWidthType(startTag) || !endInclusive)) {
return false;
} else if (!sameType && (endInclusive || !sameTypeClass(endTag, getNextType(startTag)))) {
// If the start and end bounds are of different type classes, 'endInclusive' must be false
// and 'endTag' must be the next type of 'startTag' for the interval to be considered a full
// bracket interval.
//
// Example scenario:
// - If 'startTag' is Object, 'endTag' must be Array (or equivalent) and 'endInclusive'
// must be false for the interval to be valid.
return false;
}
auto [expectedMin, minInclusive] = getMinBound(startTag);
auto [expectedMax, maxInclusive] =
sameType ? getMaxBound(startTag) : getMinBound(getNextType(startTag));
bool compareValuesMin =
(stats::compareValues(startTag, startVal, expectedMin.getTag(), expectedMin.getValue()) ==
@ -611,7 +726,7 @@ bool isFullBracketInterval(sbe::value::TypeTags startTag,
bool compareValuesMax =
(stats::compareValues(endTag, endVal, expectedMax.getTag(), expectedMax.getValue()) == 0);
return (compareValuesMin && (compareValuesMax && !endInclusive));
return compareValuesMin && compareValuesMax;
}
} // namespace mongo::stats

View File

@ -195,10 +195,40 @@ std::string serialize(sbe::value::TypeTags tag);
sbe::value::TypeTags deserialize(const std::string& name);
/**
* Returns the minimum or maximum bound of the type 'tag'. If 'isMin' is false, returns the maximum
* bound.
* Three ways TypeTags comparison (aka spaceship operator) according to the BSON type sort order.
*/
std::pair<stats::SBEValue, bool> getMinMaxBoundForSBEType(sbe::value::TypeTags tag, bool isMin);
int compareTypeTags(sbe::value::TypeTags a, sbe::value::TypeTags b);
/**
* Given a type, returns whether or not that type has a variable width. Returns true if the type has
* variable width, indicating the maxmimum value cannot be represented by the same type
* (e.g., objects, arrays). Returns false for types with fixed width (e.g., numbers, booleans).
*/
bool isVariableWidthType(sbe::value::TypeTags tag);
/**
* Returns the minimum bound of the type 'tag'.
* The result is a pair where the first element is the minimum bound value (stats::SBEValue), and
* the second element is a boolean indicating if the bound is inclusive.
*
* Examples:
* - getMinBound(NumberInt32): (nan, true)
* - getMinBound(StringSmall): ("", true)
* - getMinBound(Object): ({}, true)
*/
std::pair<stats::SBEValue, bool> getMinBound(sbe::value::TypeTags tag);
/**
* Returns the maximum bound of the type 'tag'.
* The result is a pair where the first element is the maximum bound value (stats::SBEValue), and
* the second element is a boolean indicating if the bound is inclusive.
*
* Examples:
* - getMaxBound(NumberInt32): (infinity, true)
* - getMaxBound(StringSmall): ({}, false)
* - getMaxBound(Object): ([], false)
*/
std::pair<stats::SBEValue, bool> getMaxBound(sbe::value::TypeTags tag);
/**
* Returns true if the interval is of the same type. This takes type-bracketing into consideration.
@ -219,9 +249,10 @@ bool sameTypeBracketInterval(sbe::value::TypeTags startTag,
/**
* Returns true if the interval covers a full type. This helps to determine if the interval is
* estimable by a type count. The design of this method follows the definition of a full interval as
* inclusive the minimum value of the current type and exclusive the minimume value of the next
* type e.g., for NumberDecimal [quite_NaN, ""). Thus, it assumes that the start type tag and end
* type tag differ.
* inclusive the minimum value of the current type and either:
* a) inclusive the maximum value of the current type (if representable, e.g., [nan.0, inf.0])
* or
* b) exclusive the minimume value of the next type (e.g., for Object, [{}, [])).
*/
bool isFullBracketInterval(sbe::value::TypeTags startTag,
sbe::value::Value startVal,

View File

@ -0,0 +1,107 @@
/**
* Copyright (C) 2024-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include <benchmark/benchmark.h>
#include <fmt/format.h>
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/query/stats/test_utils.h"
#include "mongo/db/query/stats/value_utils.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
namespace mongo::stats {
enum class TypeTagsPair {
All = 0,
Shallow = 1,
Heap = 2,
};
auto genAllPairs(TypeTagsPair option) {
switch (option) {
case TypeTagsPair::Shallow:
return generateShallowTypeTagPairs();
case TypeTagsPair::Heap:
return generateHeapTypeTagPairs();
case TypeTagsPair::All:
default:
return generateAllTypeTagPairs();
}
}
void BM_sameTypeClass(benchmark::State& state) {
auto allTypeTagPairs = genAllPairs(static_cast<TypeTagsPair>(state.range(0)));
size_t i = 0;
for (auto _ : state) {
auto& typeTagPair = allTypeTagPairs[i];
benchmark::DoNotOptimize(sameTypeClass(typeTagPair.first, typeTagPair.second));
i = (i + 1) % allTypeTagPairs.size();
}
}
void BM_sameTypeBracket(benchmark::State& state) {
auto allTypeTagPairs = genAllPairs(static_cast<TypeTagsPair>(state.range(0)));
size_t i = 0;
for (auto _ : state) {
auto& typeTagPair = allTypeTagPairs[i];
benchmark::DoNotOptimize(sameTypeBracket(typeTagPair.first, typeTagPair.second));
i = (i + 1) % allTypeTagPairs.size();
}
}
void BM_sameTypeClassByComparingMin(benchmark::State& state) {
auto allTypeTagPairs = genAllPairs(static_cast<TypeTagsPair>(state.range(0)));
size_t i = 0;
for (auto _ : state) {
auto& typeTagPair = allTypeTagPairs[i];
benchmark::DoNotOptimize(
sameTypeClassByComparingMin(typeTagPair.first, typeTagPair.second));
i = (i + 1) % allTypeTagPairs.size();
}
}
void BM_isFullBracketInterval(benchmark::State& state) {
auto start = sbe::value::makeNewObject();
auto end = sbe::value::makeNewArray();
for (auto _ : state) {
benchmark::DoNotOptimize(
isFullBracketInterval(start.first, start.second, true, end.first, end.second, false));
}
sbe::value::ValueGuard startGuard{start}, endGuard{end};
}
BENCHMARK(BM_sameTypeClass)->Arg(0)->Arg(1)->Arg(2);
BENCHMARK(BM_sameTypeBracket)->Arg(0)->Arg(1)->Arg(2);
BENCHMARK(BM_sameTypeClassByComparingMin)->Arg(0)->Arg(1)->Arg(2);
BENCHMARK(BM_isFullBracketInterval);
} // namespace mongo::stats

View File

@ -27,13 +27,18 @@
* it in the license file.
*/
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/bsontypes.h"
#include "mongo/bson/json.h"
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/query/index_bounds.h"
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/index_bounds_builder_test.h"
#include "mongo/db/query/stats/test_utils.h"
#include "mongo/db/query/interval.h"
#include "mongo/db/query/stats/value_utils.h"
#include "mongo/unittest/assert.h"
#include "mongo/unittest/framework.h"
@ -221,4 +226,173 @@ TEST_F(ValueUtilsTest, SameTypeBracketedIntervalGteNan) {
}
TEST_F(ValueUtilsTest, SameTypeClassAlignsWithMinComparison) {
auto allTypeTagPairs = generateAllTypeTagPairs();
// Asserts that sameTypeClass() behaves the same as the alternative implementation
// sameTypeClassByComparingMin() which compares the minimum values in BSON.
for (auto typeTagPair : allTypeTagPairs) {
bool lhs = sameTypeClassByComparingMin(typeTagPair.first, typeTagPair.second);
bool rhs = sameTypeClass(typeTagPair.first, typeTagPair.second);
ASSERT_EQ(lhs, rhs) << "first tag: " << typeTagPair.first
<< " second tag: " << typeTagPair.second << " sameTypeClass: " << lhs
<< " compareTypeTags: " << rhs;
}
}
TEST_F(ValueUtilsTest, GetMinBoundAlignsWithAppendMinForType) {
// Asserts that the behavior of getMinBound() is consistent with the BSON implementation
// appendMinForType().
for (size_t t = 0; t < size_t(sbe::value::TypeTags::TypeTagsMax); ++t) {
auto tag = static_cast<sbe::value::TypeTags>(t);
// Excludes all the extended types which are unsupported.
auto bsonType = sbe::value::tagToType(tag);
if (bsonType == BSONType::EOO) {
continue;
}
auto [actual, inclusive] = getMinBound(tag);
BSONObjBuilder builder;
builder.appendMinForType("", bsonType);
auto obj = builder.obj();
auto elem = obj.firstElement();
auto expected = sbe::bson::convertFrom<false>(elem);
sbe::value::ValueGuard guard{expected};
auto res = stats::compareValues(
actual.getTag(), actual.getValue(), expected.first, expected.second);
ASSERT_EQ(res, 0) << "tag: " << tag << ", getMinBound() returns: " << actual.get()
<< ", expected returning: " << expected;
ASSERT_TRUE(inclusive);
}
}
TEST_F(ValueUtilsTest, GetMaxBoundAlignsWithAppendMaxForType) {
// Asserts that the behavior of getMaxBound() is consistent with the BSON implementation
// appendMaxForType().
for (size_t t = 0; t < size_t(sbe::value::TypeTags::TypeTagsMax); ++t) {
auto tag = static_cast<sbe::value::TypeTags>(t);
// Excludes all the extended types which are unsupported.
auto bsonType = sbe::value::tagToType(tag);
if (bsonType == BSONType::EOO) {
continue;
}
auto [actual, inclusive] = getMaxBound(tag);
BSONObjBuilder builder;
builder.appendMaxForType("", bsonType);
auto obj = builder.obj();
auto elem = obj.firstElement();
auto expected = sbe::bson::convertFrom<false>(elem);
sbe::value::ValueGuard guard{expected};
auto res = stats::compareValues(
actual.getTag(), actual.getValue(), expected.first, expected.second);
ASSERT_EQ(res, 0) << "tag: " << tag << ", getMaxBound() returns: " << actual.get()
<< ", expected returning: " << expected;
ASSERT_EQ(inclusive, !isVariableWidthType(tag));
}
}
TEST_F(ValueUtilsTest, ReturnsTrueForFullBracketIntervals) {
// Asserts that isFullBracketInterval() returns true for all full bracket intervals of each
// BSONType.
for (int t = -1; t < BSONType::MaxKey; ++t) {
if (!isValidBSONType(t) || t == BSONType::EOO || t == BSONType::Array) {
continue;
}
BSONObj obj = BSON("a" << BSON("$type" << t));
auto interval = buildInterval(obj);
// Converts to SBE values.
bool startInclusive = interval.startInclusive;
bool endInclusive = interval.endInclusive;
auto [startTag, startVal] = sbe::bson::convertFrom<false>(interval.start);
auto [endTag, endVal] = sbe::bson::convertFrom<false>(interval.end);
sbe::value::ValueGuard startGuard{startTag, startVal};
sbe::value::ValueGuard endGuard{endTag, endVal};
ASSERT_TRUE(
isFullBracketInterval(startTag, startVal, startInclusive, endTag, endVal, endInclusive))
<< "type: " << typeName(BSONType(t));
}
}
TEST_F(ValueUtilsTest, ReturnsFalseForNonFullBracketIntervalsWithStaticWidthType) {
auto [start, startInclusive] = getMinBound(sbe::value::TypeTags::NumberInt32);
auto [end, endInclusive] = getMaxBound(sbe::value::TypeTags::NumberInt32);
// Asserts that the interval [start, end] can be considered as a full bracket
// interval if inclusiveness are both inclusive.
ASSERT_TRUE(isFullBracketInterval(start.getTag(),
start.getValue(),
startInclusive,
end.getTag(),
end.getValue(),
endInclusive));
// Asserts that the function returns false if either bound is exclusive.
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
false /*startInclusive*/,
end.getTag(),
end.getValue(),
false /*endInclusive*/));
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
true /*startInclusive*/,
end.getTag(),
end.getValue(),
false /*endInclusive*/));
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
false /*startInclusive*/,
end.getTag(),
end.getValue(),
true /*endInclusive*/));
}
TEST_F(ValueUtilsTest, ReturnsFalseForNonFullBracketIntervalsWithVariableWidthType) {
auto [start, startInclusive] = getMinBound(sbe::value::TypeTags::StringSmall);
auto [end, endInclusive] = getMaxBound(sbe::value::TypeTags::StringSmall);
// Asserts that the interval [start, end] can be considered as a full bracket
// interval if inclusiveness are both inclusive.
ASSERT_TRUE(isFullBracketInterval(start.getTag(),
start.getValue(),
startInclusive,
end.getTag(),
end.getValue(),
endInclusive));
// Asserts that the function returns false if 'endInclusive' is true.
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
startInclusive /*startInclusive*/,
end.getTag(),
end.getValue(),
true /*endInclusive*/));
// Asserts that the function returns false the end bound is incorrect.
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
true /*startInclusive*/,
start.getTag(),
start.getValue(),
endInclusive /*endInclusive*/));
// Asserts that the short-circuit returns false when the function detects the end tag is not the
// next type of the start tag.
ASSERT_FALSE(isFullBracketInterval(start.getTag(),
start.getValue(),
startInclusive,
sbe::value::TypeTags::Array,
end.getValue(),
endInclusive));
}
} // namespace mongo::stats