SERVER-47416 Eliminate copies of KeyStringSet when possible

Also reusing memory for temporary data structure in KeyString generation for arrays.
2024-12-01 09:32:32 +01:00 · 2020-04-10 10:19:09 -04:00 · 2020-04-10 10:19:09 -04:00 · 0aab1bdb1f
commit 0aab1bdb1f
parent 1740d32001
13 changed files with 133 additions and 123 deletions
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@ -1349,7 +1349,7 @@ const IndexDescriptor* IndexCatalogImpl::refreshEntry(OperationContext* opCtx,

 Status IndexCatalogImpl::_indexKeys(OperationContext* opCtx,
                                    IndexCatalogEntry* index,
-                                    const std::vector<KeyString::Value>& keys,
+                                    const KeyStringSet& keys,
                                    const KeyStringSet& multikeyMetadataKeys,
                                    const MultikeyPaths& multikeyPaths,
                                    const BSONObj& obj,
@ -1431,7 +1431,7 @@ Status IndexCatalogImpl::_indexFilteredRecords(OperationContext* opCtx,

        Status status = _indexKeys(opCtx,
                                   index,
-                                   {keys->begin(), keys->end()},
+                                   *keys,
                                   *multikeyMetadataKeys,
                                   *multikeyPaths,
                                   *bsonRecord.docPtr,
@ -1515,7 +1515,7 @@ Status IndexCatalogImpl::_updateRecord(OperationContext* const opCtx,

 void IndexCatalogImpl::_unindexKeys(OperationContext* opCtx,
                                    IndexCatalogEntry* index,
-                                    const std::vector<KeyString::Value>& keys,
+                                    const KeyStringSet& keys,
                                    const BSONObj& obj,
                                    RecordId loc,
                                    bool logIfError,
@ -1602,7 +1602,7 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
            return;
        }
    }
-    _unindexKeys(opCtx, entry, {keys->begin(), keys->end()}, obj, loc, logIfError, keysDeletedOut);
+    _unindexKeys(opCtx, entry, *keys, obj, loc, logIfError, keysDeletedOut);
 }

 Status IndexCatalogImpl::indexRecords(OperationContext* opCtx,
--- a/src/mongo/db/catalog/index_catalog_impl.h
+++ b/src/mongo/db/catalog/index_catalog_impl.h
@ -308,7 +308,7 @@ private:

    Status _indexKeys(OperationContext* opCtx,
                      IndexCatalogEntry* index,
-                      const std::vector<KeyString::Value>& keys,
+                      const KeyStringSet& keys,
                      const KeyStringSet& multikeyMetadataKeys,
                      const MultikeyPaths& multikeyPaths,
                      const BSONObj& obj,
@ -336,7 +336,7 @@ private:

    void _unindexKeys(OperationContext* opCtx,
                      IndexCatalogEntry* index,
-                      const std::vector<KeyString::Value>& keys,
+                      const KeyStringSet& keys,
                      const BSONObj& obj,
                      RecordId loc,
                      bool logIfError,
--- a/src/mongo/db/index/btree_key_generator.cpp
+++ b/src/mongo/db/index/btree_key_generator.cpp
@ -159,8 +159,10 @@ BSONElement BtreeKeyGenerator::_extractNextElement(const BSONObj& obj,
    return BSONElement();
 }

-void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames,
-                                            std::vector<BSONElement>* fixed,
+void BtreeKeyGenerator::_getKeysArrEltFixed(const std::vector<const char*>& fieldNames,
+                                            const std::vector<BSONElement>& fixed,
+                                            std::vector<const char*>* fieldNamesTemp,
+                                            std::vector<BSONElement>* fixedTemp,
                                            SharedBufferFragmentBuilder& pooledBufferBuilder,
                                            const BSONElement& arrEntry,
                                            KeyStringSet::sequence_type* keys,
@ -171,16 +173,26 @@ void BtreeKeyGenerator::_getKeysArrEltFixed(std::vector<const char*>* fieldNames
                                            const std::vector<PositionalPathInfo>& positionalInfo,
                                            MultikeyPaths* multikeyPaths,
                                            boost::optional<RecordId> id) const {
+    // fieldNamesTemp and fixedTemp are passed in by the caller to be used as temporary data
+    // structures as we need them to be mutable in the recursion. When they are stored outside we
+    // can reuse their memory.
+    fieldNamesTemp->clear();
+    fixedTemp->clear();
+    fieldNamesTemp->reserve(fieldNames.size());
+    fixedTemp->reserve(fixed.size());
+    std::copy(fieldNames.begin(), fieldNames.end(), std::back_inserter(*fieldNamesTemp));
+    std::copy(fixed.begin(), fixed.end(), std::back_inserter(*fixedTemp));
+
    // Set up any terminal array values.
    for (const auto idx : arrIdxs) {
-        if (*(*fieldNames)[idx] == '\0') {
-            (*fixed)[idx] = mayExpandArrayUnembedded ? arrEntry : arrObjElt;
+        if (*(*fieldNamesTemp)[idx] == '\0') {
+            (*fixedTemp)[idx] = mayExpandArrayUnembedded ? arrEntry : arrObjElt;
        }
    }

    // Recurse.
-    _getKeysWithArray(*fieldNames,
-                      *fixed,
+    _getKeysWithArray(fieldNamesTemp,
+                      fixedTemp,
                      pooledBufferBuilder,
                      arrEntry.type() == Object ? arrEntry.embeddedObject() : BSONObj(),
                      keys,
@ -240,10 +252,11 @@ void BtreeKeyGenerator::getKeys(SharedBufferFragmentBuilder& pooledBufferBuilder
        // Extract the underlying sequence and insert elements unsorted to avoid O(N^2) when
        // inserting element by element if array
        auto seq = keys->extract_sequence();
-        // '_fieldNames' and '_fixed' are passed by value so that their copies can be mutated as
-        // part of the _getKeysWithArray method.
-        _getKeysWithArray(_fieldNames,
-                          _fixed,
+        // '_fieldNames' and '_fixed' are mutated by _getKeysWithArray so pass in copies
+        auto fieldNamesCopy = _fieldNames;
+        auto fixedCopy = _fixed;
+        _getKeysWithArray(&fieldNamesCopy,
+                          &fixedCopy,
                          pooledBufferBuilder,
                          obj,
                          &seq,
@ -294,8 +307,8 @@ void BtreeKeyGenerator::_getKeysWithoutArray(SharedBufferFragmentBuilder& pooled
    keys->insert(keyString.release());
 }

-void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
-                                          std::vector<BSONElement> fixed,
+void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*>* fieldNames,
+                                          std::vector<BSONElement>* fixed,
                                          SharedBufferFragmentBuilder& pooledBufferBuilder,
                                          const BSONObj& obj,
                                          KeyStringSet::sequence_type* keys,
@ -329,24 +342,24 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
    // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the
    // first element of the array, so we'd have a
    // std::vector<boost::optional<size_t>>{{1U}, boost::none}.
-    std::vector<boost::optional<size_t>> arrComponents(fieldNames.size());
+    std::vector<boost::optional<size_t>> arrComponents(fieldNames->size());

    bool mayExpandArrayUnembedded = true;
-    for (size_t i = 0; i < fieldNames.size(); ++i) {
-        if (*fieldNames[i] == '\0') {
+    for (size_t i = 0; i < fieldNames->size(); ++i) {
+        if (*(*fieldNames)[i] == '\0') {
            continue;
        }

        bool arrayNestedArray;
        // Extract element matching fieldName[ i ] from object xor array.
        BSONElement e =
-            _extractNextElement(obj, positionalInfo[i], &fieldNames[i], &arrayNestedArray);
+            _extractNextElement(obj, positionalInfo[i], &(*fieldNames)[i], &arrayNestedArray);

        if (e.eoo()) {
            // if field not present, set to null
-            fixed[i] = nullElt;
+            (*fixed)[i] = nullElt;
            // done expanding this field name
-            fieldNames[i] = "";
+            (*fieldNames)[i] = "";
            numNotFound++;
        } else if (e.type() == Array) {
            arrIdxs.insert(i);
@ -362,17 +375,17 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
            }
        } else {
            // not an array - no need for further expansion
-            fixed[i] = e;
+            (*fixed)[i] = e;
        }
    }

    if (arrElt.eoo()) {
        // No array, so generate a single key.
-        if (_isSparse && numNotFound == fieldNames.size()) {
+        if (_isSparse && numNotFound == fieldNames->size()) {
            return;
        }
        KeyString::PooledBuilder keyString(pooledBufferBuilder, _keyStringVersion, _ordering);
-        for (const auto& elem : fixed) {
+        for (const auto& elem : *fixed) {
            if (_collator) {
                keyString.appendBSONElement(elem, [&](StringData stringData) {
                    return _collator->getComparisonString(stringData);
@ -396,15 +409,19 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
                // multikey and may occur mid-path. For instance, the indexed path "a.b.c" has
                // multikey components {0, 1} given the document {a: [{b: []}, {b: 1}]}.
                size_t fullPathLength = _pathLengths[i];
-                size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
+                size_t suffixPathLength = FieldRef{(*fieldNames)[i]}.numParts();
                invariant(suffixPathLength < fullPathLength);
                arrComponents[i] = fullPathLength - suffixPathLength - 1;
            }
        }

        // For an empty array, set matching fields to undefined.
-        _getKeysArrEltFixed(&fieldNames,
-                            &fixed,
+        std::vector<const char*> fieldNamesTemp;
+        std::vector<BSONElement> fixedTemp;
+        _getKeysArrEltFixed(*fieldNames,
+                            *fixed,
+                            &fieldNamesTemp,
+                            &fixedTemp,
                            pooledBufferBuilder,
                            undefinedElt,
                            keys,
@ -422,11 +439,11 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
        // and then traverse the remainder of the field path up front. This prevents us from
        // having to look up the indexed element again on each recursive call (i.e. once per
        // array element).
-        std::vector<PositionalPathInfo> subPositionalInfo(fixed.size());
-        for (size_t i = 0; i < fieldNames.size(); ++i) {
+        std::vector<PositionalPathInfo> subPositionalInfo(fixed->size());
+        for (size_t i = 0; i < fieldNames->size(); ++i) {
            const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end();

-            if (*fieldNames[i] == '\0') {
+            if (*(*fieldNames)[i] == '\0') {
                // We've reached the end of the path.
                if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) {
                    // The 'arrElt' array value isn't expanded into multiple elements when the last
@ -444,7 +461,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
            // we must have traversed through 'arrElt'.
            invariant(fieldIsArray);

-            StringData part = fieldNames[i];
+            StringData part = (*fieldNames)[i];
            part = part.substr(0, part.find('.'));
            subPositionalInfo[i].positionallyIndexedElt = arrObj[part];
            if (subPositionalInfo[i].positionallyIndexedElt.eoo()) {
@ -466,7 +483,7 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
                    // latter from the former yields the number of components in the prefix "a.b",
                    // i.e. 2.
                    size_t fullPathLength = _pathLengths[i];
-                    size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
+                    size_t suffixPathLength = FieldRef{(*fieldNames)[i]}.numParts();
                    invariant(suffixPathLength < fullPathLength);
                    arrComponents[i] = fullPathLength - suffixPathLength - 1;
                }
@ -480,15 +497,19 @@ void BtreeKeyGenerator::_getKeysWithArray(std::vector<const char*> fieldNames,
            // 'arrElt' array value when generating keys. It therefore cannot cause the index to be
            // multikey.
            subPositionalInfo[i].arrayObj = arrObj;
-            subPositionalInfo[i].remainingPath = fieldNames[i];
+            subPositionalInfo[i].remainingPath = (*fieldNames)[i];
            subPositionalInfo[i].dottedElt = dps::extractElementAtPathOrArrayAlongPath(
                arrObj, subPositionalInfo[i].remainingPath);
        }

        // Generate a key for each element of the indexed array.
+        std::vector<const char*> fieldNamesTemp;
+        std::vector<BSONElement> fixedTemp;
        for (const auto arrObjElem : arrObj) {
-            _getKeysArrEltFixed(&fieldNames,
-                                &fixed,
+            _getKeysArrEltFixed(*fieldNames,
+                                *fixed,
+                                &fieldNamesTemp,
+                                &fixedTemp,
                                pooledBufferBuilder,
                                arrObjElem,
                                keys,
--- a/src/mongo/db/index/btree_key_generator.h
+++ b/src/mongo/db/index/btree_key_generator.h
@ -147,9 +147,10 @@ private:

    /**
     * This recursive method does the heavy-lifting for getKeys().
+     * It will modify 'fieldNames' and 'fixed'.
     */
-    void _getKeysWithArray(std::vector<const char*> fieldNames,
-                           std::vector<BSONElement> fixed,
+    void _getKeysWithArray(std::vector<const char*>* fieldNames,
+                           std::vector<BSONElement>* fixed,
                           SharedBufferFragmentBuilder& pooledBufferBuilder,
                           const BSONObj& obj,
                           KeyStringSet::sequence_type* keys,
@ -208,10 +209,14 @@ private:
    /**
     * Sets extracted elements in 'fixed' for field paths that we have traversed to the end.
     *
+     * fieldNamesTemp and fixedTemp are temporary vectors that will be modified by this method
+     *
     * Then calls _getKeysWithArray() recursively.
     */
-    void _getKeysArrEltFixed(std::vector<const char*>* fieldNames,
-                             std::vector<BSONElement>* fixed,
+    void _getKeysArrEltFixed(const std::vector<const char*>& fieldNames,
+                             const std::vector<BSONElement>& fixed,
+                             std::vector<const char*>* fieldNamesTemp,
+                             std::vector<BSONElement>* fixedTemp,
                             SharedBufferFragmentBuilder& pooledBufferBuilder,
                             const BSONElement& arrEntry,
                             KeyStringSet::sequence_type* keys,
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@ -60,10 +60,8 @@

 namespace mongo {

-using std::endl;
 using std::pair;
 using std::set;
-using std::vector;

 using IndexVersion = IndexDescriptor::IndexVersion;

@ -84,9 +82,6 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) {
                       [](const MultikeyComponents& components) { return !components.empty(); });
 }

-std::vector<KeyString::Value> asVector(const KeyStringSet& keySet) {
-    return {keySet.begin(), keySet.end()};
-}
 }  // namespace

 struct BtreeExternalSortComparison {
@ -148,18 +143,12 @@ Status AbstractIndexAccessMethod::insert(OperationContext* opCtx,
            loc,
            kNoopOnSuppressedErrorFn);

-    return insertKeys(opCtx,
-                      {keys->begin(), keys->end()},
-                      {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()},
-                      *multikeyPaths,
-                      loc,
-                      options,
-                      result);
+    return insertKeys(opCtx, *keys, *multikeyMetadataKeys, *multikeyPaths, loc, options, result);
 }

 Status AbstractIndexAccessMethod::insertKeys(OperationContext* opCtx,
-                                             const vector<KeyString::Value>& keys,
-                                             const vector<KeyString::Value>& multikeyMetadataKeys,
+                                             const KeyStringSet& keys,
+                                             const KeyStringSet& multikeyMetadataKeys,
                                             const MultikeyPaths& multikeyPaths,
                                             const RecordId& loc,
                                             const InsertDeleteOptions& options,
@ -233,7 +222,7 @@ std::unique_ptr<SortedDataInterface::Cursor> AbstractIndexAccessMethod::newCurso
 }

 Status AbstractIndexAccessMethod::removeKeys(OperationContext* opCtx,
-                                             const std::vector<KeyString::Value>& keys,
+                                             const KeyStringSet& keys,
                                             const RecordId& loc,
                                             const InsertDeleteOptions& options,
                                             int64_t* numDeleted) {
@ -318,32 +307,21 @@ long long AbstractIndexAccessMethod::getSpaceUsedBytes(OperationContext* opCtx)
    return _newInterface->getSpaceUsedBytes(opCtx);
 }

-pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMethod::setDifference(
-    const KeyStringSet& left, const KeyStringSet& right, Ordering ordering) {
+pair<KeyStringSet, KeyStringSet> AbstractIndexAccessMethod::setDifference(
+    const KeyStringSet& left, const KeyStringSet& right) {
    // Two iterators to traverse the two sets in sorted order.
    auto leftIt = left.begin();
    auto rightIt = right.begin();
-    vector<KeyString::Value> onlyLeft;
-    vector<KeyString::Value> onlyRight;
+    KeyStringSet::sequence_type onlyLeft;
+    KeyStringSet::sequence_type onlyRight;

    while (leftIt != left.end() && rightIt != right.end()) {
-        const int cmp = leftIt->compare(*rightIt);
+        // Use compareWithTypeBits instead of the regular compare as we want just a difference in
+        // typeinfo to also result in an index change.
+        const int cmp = leftIt->compareWithTypeBits(*rightIt);
        if (cmp == 0) {
-            /*
-             * 'leftIt' and 'rightIt' compare equal using compare(), but may not be identical, which
-             * should result in an index change.
-             */
-            auto leftKey = KeyString::toBson(
-                leftIt->getBuffer(), leftIt->getSize(), ordering, leftIt->getTypeBits());
-            auto rightKey = KeyString::toBson(
-                rightIt->getBuffer(), rightIt->getSize(), ordering, rightIt->getTypeBits());
-            if (!leftKey.binaryEqual(rightKey)) {
-                onlyLeft.push_back(*leftIt);
-                onlyRight.push_back(*rightIt);
-            }
            ++leftIt;
            ++rightIt;
-            continue;
        } else if (cmp > 0) {
            onlyRight.push_back(*rightIt);
            ++rightIt;
@ -357,7 +335,15 @@ pair<vector<KeyString::Value>, vector<KeyString::Value>> AbstractIndexAccessMeth
    onlyLeft.insert(onlyLeft.end(), leftIt, left.end());
    onlyRight.insert(onlyRight.end(), rightIt, right.end());

-    return {std::move(onlyLeft), std::move(onlyRight)};
+    KeyStringSet outLeft;
+    KeyStringSet outRight;
+
+    // The above algorithm guarantees that the elements are sorted and unique, so we can let the
+    // container know so we get O(1) complexity adopting it.
+    outLeft.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyLeft));
+    outRight.adopt_sequence(boost::container::ordered_unique_range_t(), std::move(onlyRight));
+
+    return {{std::move(outLeft)}, {std::move(outRight)}};
 }

 void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx,
@ -405,8 +391,7 @@ void AbstractIndexAccessMethod::prepareUpdate(OperationContext* opCtx,
    ticket->loc = record;
    ticket->dupsAllowed = options.dupsAllowed;

-    std::tie(ticket->removed, ticket->added) =
-        setDifference(ticket->oldKeys, ticket->newKeys, getSortedDataInterface()->getOrdering());
+    std::tie(ticket->removed, ticket->added) = setDifference(ticket->oldKeys, ticket->newKeys);

    ticket->_isValid = true;
 }
@ -435,8 +420,7 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx,
    // Add all new data keys, and all new multikey metadata keys, into the index. When iterating
    // over the data keys, each of them should point to the doc's RecordId. When iterating over
    // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'.
-    const auto newMultikeyMetadataKeys = asVector(ticket.newMultikeyMetadataKeys);
-    for (const auto keySet : {&ticket.added, &newMultikeyMetadataKeys}) {
+    for (const auto keySet : {&ticket.added, &ticket.newMultikeyMetadataKeys}) {
        for (const auto& keyString : *keySet) {
            Status status = _newInterface->insert(opCtx, keyString, ticket.dupsAllowed);
            if (isFatalError(opCtx, status, keyString)) {
@ -446,9 +430,7 @@ Status AbstractIndexAccessMethod::update(OperationContext* opCtx,
    }

    if (shouldMarkIndexAsMultikey(
-            ticket.newKeys.size(),
-            {ticket.newMultikeyMetadataKeys.begin(), ticket.newMultikeyMetadataKeys.end()},
-            ticket.newMultikeyPaths)) {
+            ticket.newKeys.size(), ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) {
        _indexCatalogEntry->setMultikey(opCtx, ticket.newMultikeyPaths);
    }

@ -566,7 +548,8 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt
        } else {
            invariant(_indexMultikeyPaths.size() == multikeyPaths->size());
            for (size_t i = 0; i < multikeyPaths->size(); ++i) {
-                _indexMultikeyPaths[i].insert((*multikeyPaths)[i].begin(),
+                _indexMultikeyPaths[i].insert(boost::container::ordered_unique_range_t(),
+                                              (*multikeyPaths)[i].begin(),
                                              (*multikeyPaths)[i].end());
            }
        }
@ -579,9 +562,7 @@ Status AbstractIndexAccessMethod::BulkBuilderImpl::insert(OperationContext* opCt

    _isMultiKey = _isMultiKey ||
        _indexCatalogEntry->accessMethod()->shouldMarkIndexAsMultikey(
-            keys->size(),
-            {_multikeyMetadataKeys.begin(), _multikeyMetadataKeys.end()},
-            *multikeyPaths);
+            keys->size(), _multikeyMetadataKeys, *multikeyPaths);

    return Status::OK();
 }
@ -790,7 +771,7 @@ void AbstractIndexAccessMethod::getKeys(SharedBufferFragmentBuilder& pooledBuffe

 bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey(
    size_t numberOfKeys,
-    const vector<KeyString::Value>& multikeyMetadataKeys,
+    const KeyStringSet& multikeyMetadataKeys,
    const MultikeyPaths& multikeyPaths) const {
    return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths);
 }
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@ -91,8 +91,8 @@ public:
                          InsertResult* result) = 0;

    virtual Status insertKeys(OperationContext* opCtx,
-                              const std::vector<KeyString::Value>& keys,
-                              const std::vector<KeyString::Value>& multikeyMetadataKeys,
+                              const KeyStringSet& keys,
+                              const KeyStringSet& multikeyMetadataKeys,
                              const MultikeyPaths& multikeyPaths,
                              const RecordId& loc,
                              const InsertDeleteOptions& options,
@ -103,7 +103,7 @@ public:
     * 'numDeleted' will be set to the number of keys removed from the index for the provided keys.
     */
    virtual Status removeKeys(OperationContext* opCtx,
-                              const std::vector<KeyString::Value>& keys,
+                              const KeyStringSet& keys,
                              const RecordId& loc,
                              const InsertDeleteOptions& options,
                              int64_t* numDeleted) = 0;
@ -319,10 +319,9 @@ public:
     * Given the set of keys, multikeyMetadataKeys and multikeyPaths generated by a particular
     * document, return 'true' if the index should be marked as multikey and 'false' otherwise.
     */
-    virtual bool shouldMarkIndexAsMultikey(
-        size_t numberOfKeys,
-        const std::vector<KeyString::Value>& multikeyMetadataKeys,
-        const MultikeyPaths& multikeyPaths) const = 0;
+    virtual bool shouldMarkIndexAsMultikey(size_t numberOfKeys,
+                                           const KeyStringSet& multikeyMetadataKeys,
+                                           const MultikeyPaths& multikeyPaths) const = 0;

    /**
     * Returns the intersection of 'fields' and the set of multikey metadata paths stored in the
@ -392,8 +391,8 @@ struct UpdateTicket {

    KeyStringSet newMultikeyMetadataKeys;

-    std::vector<KeyString::Value> removed;
-    std::vector<KeyString::Value> added;
+    KeyStringSet removed;
+    KeyStringSet added;

    RecordId loc;
    bool dupsAllowed;
@ -437,15 +436,15 @@ class AbstractIndexAccessMethod : public IndexAccessMethod {

 public:
    /**
-     * Splits the sets 'left' and 'right' into two vectors, the first containing the elements that
+     * Splits the sets 'left' and 'right' into two sets, the first containing the elements that
     * only appeared in 'left', and the second containing only elements that appeared in 'right'.
     *
     * Note this considers objects which are not identical as distinct objects. For example,
     * setDifference({BSON("a" << 0.0)}, {BSON("a" << 0LL)}) would result in the pair
     * ( {BSON("a" << 0.0)}, {BSON("a" << 0LL)} ).
     */
-    static std::pair<std::vector<KeyString::Value>, std::vector<KeyString::Value>> setDifference(
-        const KeyStringSet& left, const KeyStringSet& right, Ordering ordering);
+    static std::pair<KeyStringSet, KeyStringSet> setDifference(const KeyStringSet& left,
+                                                               const KeyStringSet& right);

    AbstractIndexAccessMethod(IndexCatalogEntry* btreeState,
                              std::unique_ptr<SortedDataInterface> btree);
@ -457,15 +456,15 @@ public:
                  InsertResult* result) final;

    Status insertKeys(OperationContext* opCtx,
-                      const std::vector<KeyString::Value>& keys,
-                      const std::vector<KeyString::Value>& multikeyMetadataKeys,
+                      const KeyStringSet& keys,
+                      const KeyStringSet& multikeyMetadataKeys,
                      const MultikeyPaths& multikeyPaths,
                      const RecordId& loc,
                      const InsertDeleteOptions& options,
                      InsertResult* result) final;

    Status removeKeys(OperationContext* opCtx,
-                      const std::vector<KeyString::Value>& keys,
+                      const KeyStringSet& keys,
                      const RecordId& loc,
                      const InsertDeleteOptions& options,
                      int64_t* numDeleted) final;
@ -524,7 +523,7 @@ public:
                 OnSuppressedErrorFn onSuppressedError) const final;

    bool shouldMarkIndexAsMultikey(size_t numberOfKeys,
-                                   const std::vector<KeyString::Value>& multikeyMetadataKeys,
+                                   const KeyStringSet& multikeyMetadataKeys,
                                   const MultikeyPaths& multikeyPaths) const override;

    SortedDataInterface* getSortedDataInterface() const override final;
--- a/src/mongo/db/index/index_build_interceptor.cpp
+++ b/src/mongo/db/index/index_build_interceptor.cpp
@ -383,7 +383,7 @@ boost::optional<MultikeyPaths> IndexBuildInterceptor::getMultikeyPaths() const {
 }

 Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx,
-                                        const std::vector<KeyString::Value>& keys,
+                                        const KeyStringSet& keys,
                                        const KeyStringSet& multikeyMetadataKeys,
                                        const MultikeyPaths& multikeyPaths,
                                        RecordId loc,
--- a/src/mongo/db/index/index_build_interceptor.h
+++ b/src/mongo/db/index/index_build_interceptor.h
@ -94,7 +94,7 @@ public:
     * On success, `numKeysOut` if non-null will contain the number of keys added or removed.
     */
    Status sideWrite(OperationContext* opCtx,
-                     const std::vector<KeyString::Value>& keys,
+                     const KeyStringSet& keys,
                     const KeyStringSet& multikeyMetadataKeys,
                     const MultikeyPaths& multikeyPaths,
                     RecordId loc,
--- a/src/mongo/db/index/wildcard_access_method.cpp
+++ b/src/mongo/db/index/wildcard_access_method.cpp
@ -46,10 +46,9 @@ WildcardAccessMethod::WildcardAccessMethod(IndexCatalogEntry* wildcardState,
              getSortedDataInterface()->getKeyStringVersion(),
              getSortedDataInterface()->getOrdering()) {}

-bool WildcardAccessMethod::shouldMarkIndexAsMultikey(
-    size_t numberOfKeys,
-    const std::vector<KeyString::Value>& multikeyMetadataKeys,
-    const MultikeyPaths& multikeyPaths) const {
+bool WildcardAccessMethod::shouldMarkIndexAsMultikey(size_t numberOfKeys,
+                                                     const KeyStringSet& multikeyMetadataKeys,
+                                                     const MultikeyPaths& multikeyPaths) const {
    return !multikeyMetadataKeys.empty();
 }

--- a/src/mongo/db/index/wildcard_access_method.h
+++ b/src/mongo/db/index/wildcard_access_method.h
@ -67,7 +67,7 @@ public:
     * vector is non-empty.
     */
    bool shouldMarkIndexAsMultikey(size_t numberOfKeys,
-                                   const std::vector<KeyString::Value>& multikeyMetadataKeys,
+                                   const KeyStringSet& multikeyMetadataKeys,
                                   const MultikeyPaths& multikeyPaths) const final;

    /**
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@ -2564,6 +2564,10 @@ int compare(const char* leftBuf, const char* rightBuf, size_t leftSize, size_t r
    return leftSize < rightSize ? -1 : 1;
 }

+int Value::compareWithTypeBits(const Value& other) const {
+    return KeyString::compare(getBuffer(), other.getBuffer(), _buffer.size(), other._buffer.size());
+}
+
 template class BuilderBase<Builder>;
 template class BuilderBase<HeapBuilder>;
 template class BuilderBase<PooledBuilder>;
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@ -328,6 +328,8 @@ public:
    template <class T>
    int compare(const T& other) const;

+    int compareWithTypeBits(const Value& other) const;
+
    template <class T>
    int compareWithoutRecordId(const T& other) const;

--- a/src/mongo/dbtests/index_access_method_test.cpp
+++ b/src/mongo/dbtests/index_access_method_test.cpp
@ -53,7 +53,7 @@ KeyStringSet makeKeyStringSet(std::initializer_list<BSONObj> objs) {
 TEST(IndexAccessMethodSetDifference, EmptyInputsShouldHaveNoDifference) {
    KeyStringSet left;
    KeyStringSet right;
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQ(0UL, diff.first.size());
    ASSERT_EQ(0UL, diff.second.size());
 }
@ -62,7 +62,7 @@ TEST(IndexAccessMethodSetDifference, EmptyLeftShouldHaveNoDifference) {
    KeyStringSet left;
    auto right = makeKeyStringSet({BSON("" << 0)});

-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQ(0UL, diff.first.size());
    ASSERT_EQ(1UL, diff.second.size());
 }
@ -71,7 +71,7 @@ TEST(IndexAccessMethodSetDifference, EmptyRightShouldReturnAllOfLeft) {
    auto left = makeKeyStringSet({BSON("" << 0), BSON("" << 1)});
    KeyStringSet right;

-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQ(2UL, diff.first.size());
    ASSERT_EQ(0UL, diff.second.size());
 }
@ -86,7 +86,7 @@ TEST(IndexAccessMethodSetDifference, IdenticalSetsShouldHaveNoDifference) {
                                        << "string"),
                                   BSON("" << BSONNULL)});

-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQ(0UL, diff.first.size());
    ASSERT_EQ(0UL, diff.second.size());
 }
@ -98,8 +98,7 @@ TEST(IndexAccessMethodSetDifference, IdenticalSetsShouldHaveNoDifference) {
 void assertDistinct(BSONObj left, BSONObj right) {
    auto leftSet = makeKeyStringSet({left});
    auto rightSet = makeKeyStringSet({right});
-    auto diff =
-        AbstractIndexAccessMethod::setDifference(leftSet, rightSet, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(leftSet, rightSet);
    ASSERT_EQ(1UL, diff.first.size());
    ASSERT_EQ(1UL, diff.second.size());
 }
@ -152,7 +151,7 @@ TEST(IndexAccessMethodSetDifference, ShouldDetectOneDifferenceAmongManySimilarit
                                   BSON("" << BSON("sub"
                                                   << "document")),
                                   BSON("" << BSON_ARRAY(1 << "hi" << 42))});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(1UL, diff.first.size());
    ASSERT_EQUALS(1UL, diff.second.size());
 }
@ -160,7 +159,7 @@ TEST(IndexAccessMethodSetDifference, ShouldDetectOneDifferenceAmongManySimilarit
 TEST(IndexAccessMethodSetDifference, SingleObjInLeftShouldFindCorrespondingObjInRight) {
    auto left = makeKeyStringSet({BSON("" << 2)});
    auto right = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(0UL, diff.first.size());
    ASSERT_EQUALS(2UL, diff.second.size());
 }
@ -168,7 +167,7 @@ TEST(IndexAccessMethodSetDifference, SingleObjInLeftShouldFindCorrespondingObjIn
 TEST(IndexAccessMethodSetDifference, SingleObjInRightShouldFindCorrespondingObjInLeft) {
    auto left = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)});
    auto right = makeKeyStringSet({BSON("" << 2)});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(2UL, diff.first.size());
    ASSERT_EQUALS(0UL, diff.second.size());
 }
@ -176,7 +175,7 @@ TEST(IndexAccessMethodSetDifference, SingleObjInRightShouldFindCorrespondingObjI
 TEST(IndexAccessMethodSetDifference, LeftSetAllSmallerThanRightShouldBeDisjoint) {
    auto left = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)});
    auto right = makeKeyStringSet({BSON("" << 4), BSON("" << 5), BSON("" << 6)});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(3UL, diff.first.size());
    ASSERT_EQUALS(3UL, diff.second.size());
    for (auto&& obj : diff.first) {
@ -190,7 +189,7 @@ TEST(IndexAccessMethodSetDifference, LeftSetAllSmallerThanRightShouldBeDisjoint)
 TEST(IndexAccessMethodSetDifference, LeftSetAllLargerThanRightShouldBeDisjoint) {
    auto left = makeKeyStringSet({BSON("" << 4), BSON("" << 5), BSON("" << 6)});
    auto right = makeKeyStringSet({BSON("" << 1), BSON("" << 2), BSON("" << 3)});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(3UL, diff.first.size());
    ASSERT_EQUALS(3UL, diff.second.size());
    for (auto&& obj : diff.first) {
@ -205,7 +204,7 @@ TEST(IndexAccessMethodSetDifference, ShouldNotReportOverlapsFromNonDisjointSets)
    auto left = makeKeyStringSet({BSON("" << 0), BSON("" << 1), BSON("" << 4), BSON("" << 6)});
    auto right = makeKeyStringSet(
        {BSON("" << -1), BSON("" << 1), BSON("" << 3), BSON("" << 4), BSON("" << 7)});
-    auto diff = AbstractIndexAccessMethod::setDifference(left, right, Ordering::make(BSONObj()));
+    auto diff = AbstractIndexAccessMethod::setDifference(left, right);
    ASSERT_EQUALS(2UL, diff.first.size());   // 0, 6.
    ASSERT_EQUALS(3UL, diff.second.size());  // -1, 3, 7.
    for (auto&& keyString : diff.first) {