From cfe3e7e4d18297b7781554e62c97409e68385812 Mon Sep 17 00:00:00 2001 From: Projjal Chanda Date: Tue, 27 Aug 2024 16:10:55 +0530 Subject: [PATCH] SERVER-93678: Add a shell utility function to compare strings given a collation object (#26234) GitOrigin-RevId: 6480e21ac48c379052ece62f0efb9e60771cb2ea --- .eslintrc.yml | 1 + jstests/core/shell/comparisonWithCollation.js | 59 +++++++++++++++++++ src/mongo/shell/SConscript | 1 + src/mongo/shell/shell_utils.cpp | 32 ++++++++++ 4 files changed, 93 insertions(+) create mode 100644 jstests/core/shell/comparisonWithCollation.js diff --git a/.eslintrc.yml b/.eslintrc.yml index de5260d22f4..f4f96c94442 100644 --- a/.eslintrc.yml +++ b/.eslintrc.yml @@ -150,6 +150,7 @@ globals: _fnvHashToHexString: true _resultSetsEqualUnordered: true getStringWidth: true + _compareStringsWithCollation: true # likely could be replaced with `path` _copyFileRange: true diff --git a/jstests/core/shell/comparisonWithCollation.js b/jstests/core/shell/comparisonWithCollation.js new file mode 100644 index 00000000000..2ebe6826891 --- /dev/null +++ b/jstests/core/shell/comparisonWithCollation.js @@ -0,0 +1,59 @@ +/** + * Tests the shell util '_compareStringsWithCollation' + */ + +assert.eq(_compareStringsWithCollation("abc", "abc", {locale: "en_US"}), 0); +assert.gt(_compareStringsWithCollation("bcd", "abc", {locale: "en_US"}), 0); +assert.lt(_compareStringsWithCollation("abc", "ABC", {locale: "en_US"}), 0); + +// zero length strings and null bytes +assert.eq(_compareStringsWithCollation("", "", {locale: "en_US"}), 0); +assert.gt(_compareStringsWithCollation("abc", "", {locale: "en_US"}), 0); +assert.gt(_compareStringsWithCollation("abc", "", {locale: "en_US", strength: 2}), 0); +assert.eq(_compareStringsWithCollation("\0", "", {locale: "en_US"}), 0); +assert.lt(_compareStringsWithCollation("\0", "ab", {locale: "en_US"}), 0); +assert.gt(_compareStringsWithCollation("a\0c", "a\0b", {locale: "en_US"}), 0); +assert.eq(_compareStringsWithCollation("a", "a\0", {locale: "en_US"}), 0); + +// case-level and diatrics +assert.eq(_compareStringsWithCollation("abc", "ABC", {locale: "en_US", strength: 1}), 0); +assert.eq(_compareStringsWithCollation("abc", "ABC", {locale: "en_US", strength: 2}), 0); +assert.lt(_compareStringsWithCollation("abc", "ABC", {locale: "en_US", strength: 3}), 0); +assert.lt( + _compareStringsWithCollation("abc", "ABC", {locale: "en_US", strength: 1, caseLevel: true}), 0); +assert.lt( + _compareStringsWithCollation("abc", "ABC", {locale: "en_US", strength: 2, caseLevel: true}), 0); + +assert.eq(_compareStringsWithCollation("eaio", "éáïô", {locale: "en_US", strength: 1}), 0); +assert.lt(_compareStringsWithCollation("eaio", "éáïô", {locale: "en_US", strength: 2}), 0); + +assert.gt(_compareStringsWithCollation("abc", "ABC", {locale: "en_US", caseFirst: "upper"}), 0); +assert.lt(_compareStringsWithCollation("abc", "ABC", {locale: "en_US", caseFirst: "lower"}), 0); + +// numeric ordering +assert.gt(_compareStringsWithCollation("10", "2", {locale: "en_US", numericOrdering: true}), 0); +assert.lt(_compareStringsWithCollation("10", "2", {locale: "en_US", numericOrdering: false}), 0); + +// Ignore whitespace and punctuation +assert.eq(_compareStringsWithCollation("a b, c", "abc", {locale: "en_US", alternate: "shifted"}), + 0); +assert.neq(_compareStringsWithCollation( + "a b, c", "abc", {locale: "en_US", strength: 4, alternate: "shifted"}), + 0); +assert.eq(_compareStringsWithCollation( + "a b, c", "abc", {locale: "en_US", alternate: "shifted", maxVariable: "punct"}), + 0); +assert.neq(_compareStringsWithCollation( + "a b, c", "abc", {locale: "en_US", alternate: "shifted", maxVariable: "space"}), + 0); +assert.eq(_compareStringsWithCollation( + "a b c", "abc", {locale: "en_US", alternate: "shifted", maxVariable: "space"}), + 0); + +// error cases +assert.throwsWithCode(() => _compareStringsWithCollation("", ""), 9367804); +assert.throwsWithCode(() => _compareStringsWithCollation(1, "", {locale: "en_US"}), 9367801); +assert.throwsWithCode(() => _compareStringsWithCollation("", 1, {locale: "en_US"}), 9367803); +assert.throwsWithCode(() => _compareStringsWithCollation({a: ""}, "", {locale: "en_US"}), 9367801); +assert.throwsWithCode(() => _compareStringsWithCollation("", "", ""), 9367805); +assert.throwsWithCode(() => _compareStringsWithCollation("", "", {}), 40414); diff --git a/src/mongo/shell/SConscript b/src/mongo/shell/SConscript index 7af19a17a50..814b3d19e8e 100644 --- a/src/mongo/shell/SConscript +++ b/src/mongo/shell/SConscript @@ -116,6 +116,7 @@ env.Library( LIBDEPS_PRIVATE=[ "$BUILD_DIR/mongo/bson/util/bson_column", "$BUILD_DIR/mongo/db/auth/security_token_auth", + "$BUILD_DIR/mongo/db/query/collation/collator_icu", "$BUILD_DIR/mongo/db/storage/record_store_base", "$BUILD_DIR/mongo/util/icu", "program_runner", diff --git a/src/mongo/shell/shell_utils.cpp b/src/mongo/shell/shell_utils.cpp index 6d000dfce6d..8add6ddf782 100644 --- a/src/mongo/shell/shell_utils.cpp +++ b/src/mongo/shell/shell_utils.cpp @@ -75,6 +75,7 @@ #include "mongo/db/auth/validated_tenancy_scope_factory.h" #include "mongo/db/database_name.h" #include "mongo/db/hasher.h" +#include "mongo/db/query/collation/collator_factory_icu.h" #include "mongo/platform/decimal128.h" #include "mongo/platform/mutex.h" #include "mongo/platform/random.h" @@ -993,6 +994,36 @@ BSONObj _resultSetsEqualUnordered(const BSONObj& input, void*) { return BSON("" << true); } +/* + * Takes two strings and a valid collation document and returns the comparison result (a number < 0 + * if 'left' is less than 'right', a number > 0 if 'left' is greater than 'right', and 0 if 'left' + * and 'right' are equal) with respect to the collation + * Refer to https://www.mongodb.com/docs/manual/reference/collation and + * https://unicode-org.github.io/icu/userguide/collation for the expected behaviour when collation + * is specified + */ +BSONObj _compareStringsWithCollation(const BSONObj& input, void*) { + BSONObjIterator i(input); + + uassert(9367800, "Expected left argument", i.more()); + auto left = i.next(); + uassert(9367801, "Left argument should be a string", left.type() == BSONType::String); + + uassert(9367802, "Expected right argument", i.more()); + auto right = i.next(); + uassert(9367803, "Right argument should be string", right.type() == BSONType::String); + + uassert(9367804, "Expected collation argument", i.more()); + auto collatorSpec = i.next(); + uassert(9367805, "Expected a collation object", collatorSpec.type() == BSONType::Object); + + CollatorFactoryICU collationFactory; + auto collator = uassertStatusOK(collationFactory.makeFromBSON(collatorSpec.Obj())); + + int cmp = collator->compare(left.valueStringData(), right.valueStringData()); + return BSON("" << cmp); +} + void installShellUtils(Scope& scope) { scope.injectNative("getMemInfo", JSGetMemInfo); scope.injectNative("_createSecurityToken", _createSecurityToken); @@ -1016,6 +1047,7 @@ void installShellUtils(Scope& scope) { scope.injectNative("_buildBsonObj", _buildBsonObj); scope.injectNative("_fnvHashToHexString", _fnvHashToHexString); scope.injectNative("_resultSetsEqualUnordered", _resultSetsEqualUnordered); + scope.injectNative("_compareStringsWithCollation", _compareStringsWithCollation); installShellUtilsLauncher(scope); installShellUtilsExtended(scope);