mirror of
https://github.com/mongodb/mongo.git
synced 2024-11-21 12:39:08 +01:00
SERVER-87392 Expose a getStringWidth helper to the test runner (#19531)
GitOrigin-RevId: 990d3a0f1b16a5e29566682913c0288378c89e4c
This commit is contained in:
parent
cdd13b9cb6
commit
968a0d3d29
@ -146,6 +146,7 @@ globals:
|
||||
indentStr: true
|
||||
_forgetReplSet: true
|
||||
_fnvHashToHexString: true
|
||||
getStringWidth: true
|
||||
|
||||
# likely could be replaced with `path`
|
||||
_copyFileRange: true
|
||||
|
32
jstests/noPassthrough/shell_i18n.js
Normal file
32
jstests/noPassthrough/shell_i18n.js
Normal file
@ -0,0 +1,32 @@
|
||||
'use strict';
|
||||
|
||||
// Ensure that our implementation of `icuGetStringWidth` (exposed as the `getStringWidth` global
|
||||
// function) produces correct results.
|
||||
function testGetStringWidth() {
|
||||
assert.eq(getStringWidth('a'), 1);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x0061)), 1);
|
||||
assert.eq(getStringWidth('丁'), 2);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x4E01)), 2);
|
||||
assert.eq(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
|
||||
assert.eq(getStringWidth('👅'), 2);
|
||||
assert.eq(getStringWidth('\ud83d'), 1);
|
||||
assert.eq(getStringWidth('\udc78'), 1);
|
||||
assert.eq(getStringWidth('\u0000'), 0);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x0007)), 0);
|
||||
assert.eq(getStringWidth('\n'), 0);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x00AD)), 1);
|
||||
assert.eq(getStringWidth('\u200Ef\u200F'), 1);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x10FFEF)), 1);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x3FFEF)), 1);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x0301)), 0);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x1B44)), 1);
|
||||
assert.eq(getStringWidth(String.fromCharCode(0x20DD)), 0);
|
||||
assert.eq(getStringWidth('👩👩👧👧'), 8);
|
||||
assert.eq(getStringWidth('❤️'), 1);
|
||||
assert.eq(getStringWidth('👩❤️👩'), 5);
|
||||
assert.eq(getStringWidth('❤'), 1);
|
||||
assert.eq(getStringWidth('\u01d4'), 1);
|
||||
assert.eq(getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
||||
}
|
||||
|
||||
testGetStringWidth();
|
@ -176,6 +176,7 @@ env.Library(
|
||||
'$BUILD_DIR/mongo/bson/util/bson_column',
|
||||
'$BUILD_DIR/mongo/db/auth/security_token_auth',
|
||||
'$BUILD_DIR/mongo/db/storage/record_store_base',
|
||||
'$BUILD_DIR/mongo/util/icu',
|
||||
'program_runner',
|
||||
],
|
||||
)
|
||||
|
@ -27,7 +27,6 @@
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
|
||||
#include <boost/filesystem/directory.hpp>
|
||||
#include <boost/filesystem/exception.hpp>
|
||||
#include <boost/filesystem/file_status.hpp>
|
||||
@ -52,7 +51,6 @@
|
||||
|
||||
#include "mongo/base/data_range_cursor.h"
|
||||
#include "mongo/base/error_codes.h"
|
||||
#include "mongo/base/status_with.h"
|
||||
#include "mongo/base/string_data.h"
|
||||
#include "mongo/bson/bson_bin_util.h"
|
||||
#include "mongo/bson/bson_validate.h"
|
||||
@ -67,6 +65,7 @@
|
||||
#include "mongo/shell/shell_utils.h"
|
||||
#include "mongo/util/assert_util.h"
|
||||
#include "mongo/util/errno_util.h"
|
||||
#include "mongo/util/icu.h"
|
||||
#include "mongo/util/md5.h"
|
||||
#include "mongo/util/md5.hpp"
|
||||
#include "mongo/util/net/socket_utils.h"
|
||||
@ -635,6 +634,14 @@ BSONObj shellGetEnv(const BSONObj& a, void*) {
|
||||
return BSON("" << result.c_str());
|
||||
}
|
||||
|
||||
BSONObj getStringWidth(const BSONObj& a, void* data) {
|
||||
uassert(8730901,
|
||||
"getStringWidth takes a single string argument",
|
||||
a.nFields() == 1 && a.firstElementType() == String);
|
||||
const auto str = a.firstElement().valueStringData();
|
||||
int width = icuGetStringWidth(str, false, true);
|
||||
return BSON("" << width);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -661,6 +668,7 @@ void installShellUtilsExtended(Scope& scope) {
|
||||
scope.injectNative("_readDumpFile", readDumpFile);
|
||||
scope.injectNative("_getEnv", shellGetEnv);
|
||||
scope.injectNative("writeBsonArrayToFile", writeBsonArrayToFile);
|
||||
scope.injectNative("getStringWidth", getStringWidth);
|
||||
}
|
||||
|
||||
} // namespace shell_utils
|
||||
|
@ -806,9 +806,7 @@ icuEnv.Library(
|
||||
|
||||
icuEnv.Library(
|
||||
target='icu',
|
||||
source=[
|
||||
'icu.cpp',
|
||||
],
|
||||
source=['icu.cpp', '../../third_party/node/icu_get_string_width.cpp'],
|
||||
LIBDEPS_PRIVATE=[
|
||||
'$BUILD_DIR/mongo/base',
|
||||
'$BUILD_DIR/third_party/shim_icu',
|
||||
|
@ -30,13 +30,16 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/umachine.h>
|
||||
#include <unicode/usprep.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf8.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <vector>
|
||||
|
||||
#include <absl/base/attributes.h>
|
||||
#include <boost/move/utility_core.hpp>
|
||||
#include <unicode/umachine.h>
|
||||
|
||||
#include "mongo/base/error_codes.h"
|
||||
#include "mongo/base/init.h" // IWYU pragma: keep
|
||||
|
@ -59,4 +59,7 @@ StatusWith<std::string> icuSaslPrep(StringData str, UStringPrepOptions = kUStrin
|
||||
*/
|
||||
StatusWith<std::string> icuX509DNPrep(StringData str);
|
||||
|
||||
// Similar to mk_wcswidth, but use the larger unicode database for character lookup.
|
||||
int icuGetStringWidth(StringData str, bool ambiguousAsFullWidth, bool expandEmojiSequence);
|
||||
|
||||
} // namespace mongo
|
||||
|
24
src/third_party/node/LICENSE
vendored
Normal file
24
src/third_party/node/LICENSE
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
Node.js is licensed for use as follows:
|
||||
|
||||
"""
|
||||
Copyright Node.js contributors. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
"""
|
||||
|
69
src/third_party/node/icu_get_string_width.cpp
vendored
Normal file
69
src/third_party/node/icu_get_string_width.cpp
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
#include "mongo/util/icu.h"
|
||||
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/utf8.h>
|
||||
|
||||
namespace mongo {
|
||||
namespace {
|
||||
|
||||
int getColumnWidth(UChar32 codepoint, bool ambiguousAsFullWidth) {
|
||||
const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
|
||||
switch (eaw) {
|
||||
case U_EA_FULLWIDTH:
|
||||
case U_EA_WIDE:
|
||||
return 2;
|
||||
case U_EA_AMBIGUOUS:
|
||||
if (ambiguousAsFullWidth) {
|
||||
return 2;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case U_EA_NEUTRAL:
|
||||
if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
|
||||
return 2;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case U_EA_HALFWIDTH:
|
||||
case U_EA_NARROW:
|
||||
default:
|
||||
const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code
|
||||
U_GC_CF_MASK | // Format control character
|
||||
U_GC_ME_MASK | // Enclosing mark
|
||||
U_GC_MN_MASK; // Nonspacing mark
|
||||
if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width
|
||||
((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
|
||||
u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// This is a modified version of GetStringWidth from node. We don't currently support wide strings
|
||||
// in the test runner, so it has been converted to use the U8 cursor API. For more details on
|
||||
// string width calculation see: https://www.unicode.org/reports/tr11/
|
||||
int icuGetStringWidth(StringData value, bool ambiguousAsFullWidth, bool expandEmojiSequence) {
|
||||
const uint8_t* str = reinterpret_cast<const uint8_t*>(value.data());
|
||||
UChar32 output = 0;
|
||||
UChar32 previous;
|
||||
int offset = 0;
|
||||
int strLength = static_cast<int>(value.length());
|
||||
int width = 0;
|
||||
|
||||
while (offset < strLength) {
|
||||
previous = output;
|
||||
U8_NEXT(str, offset, strLength, output);
|
||||
|
||||
if (!expandEmojiSequence && offset > 0 && previous == 0x200d &&
|
||||
(u_hasBinaryProperty(output, UCHAR_EMOJI_PRESENTATION) ||
|
||||
u_hasBinaryProperty(output, UCHAR_EMOJI_MODIFIER))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
width += getColumnWidth(output, ambiguousAsFullWidth);
|
||||
}
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
} // namespace mongo
|
Loading…
Reference in New Issue
Block a user