mirror of
https://github.com/nodejs/node.git
synced 2024-11-21 21:19:50 +01:00
3646872f60
This makes sure the DEL character (ASCII 127) is detected as a zero width character even if Node.js is not built with ICU. Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de> PR-URL: https://github.com/nodejs/node/pull/33650 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net>
96 lines
4.0 KiB
JavaScript
96 lines
4.0 KiB
JavaScript
// Flags: --expose-internals
|
|
'use strict';
|
|
const common = require('../common');
|
|
|
|
const assert = require('assert');
|
|
const { getStringWidth } = require('internal/util/inspect');
|
|
|
|
// Test column width
|
|
|
|
// Ll (Lowercase Letter): LATIN SMALL LETTER A
|
|
assert.strictEqual(getStringWidth('a'), 1);
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0061)), 1);
|
|
// Lo (Other Letter)
|
|
assert.strictEqual(getStringWidth('丁'), 2);
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x4E01)), 2);
|
|
// Surrogate pairs
|
|
assert.strictEqual(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
|
|
assert.strictEqual(getStringWidth('👅'), 2);
|
|
// Cs (Surrogate): High Surrogate
|
|
assert.strictEqual(getStringWidth('\ud83d'), 1);
|
|
// Cs (Surrogate): Low Surrogate
|
|
assert.strictEqual(getStringWidth('\udc78'), 1);
|
|
// Cc (Control): NULL
|
|
assert.strictEqual(getStringWidth('\u0000'), 0);
|
|
// Cc (Control): BELL
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0007)), 0);
|
|
// Cc (Control): LINE FEED
|
|
assert.strictEqual(getStringWidth('\n'), 0);
|
|
// Cf (Format): SOFT HYPHEN
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x00AD)), 1);
|
|
// Cf (Format): LEFT-TO-RIGHT MARK
|
|
// Cf (Format): RIGHT-TO-LEFT MARK
|
|
assert.strictEqual(getStringWidth('\u200Ef\u200F'), 1);
|
|
// Cn (Unassigned): Not a character
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x10FFEF)), 1);
|
|
// Cn (Unassigned): Not a character (but in a CJK range)
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x3FFEF)), 1);
|
|
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x0301)), 0);
|
|
// Mc (Spacing Mark): BALINESE ADEG ADEG
|
|
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
|
|
// character.
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x1B44)), 1);
|
|
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
|
|
assert.strictEqual(getStringWidth(String.fromCharCode(0x20DD)), 0);
|
|
|
|
// The following is an emoji sequence with ZWJ (zero-width-joiner). In some
|
|
// implementations, it is represented as a single glyph, in other
|
|
// implementations as a sequence of individual glyphs. By default, each
|
|
// component will be counted individually, since not a lot of systems support
|
|
// these fully.
|
|
// See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
|
|
assert.strictEqual(getStringWidth('👩👩👧👧'), 8);
|
|
// TODO(BridgeAR): This should have a width of two and six. The heart contains
|
|
// the \uFE0F variation selector that indicates that it should be displayed as
|
|
// emoji instead of as text. Emojis are all full width characters when not being
|
|
// rendered as text.
|
|
// https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)
|
|
assert.strictEqual(getStringWidth('❤️'), 1);
|
|
assert.strictEqual(getStringWidth('👩❤️👩'), 5);
|
|
// The length of one is correct. It is an emoji treated as text.
|
|
assert.strictEqual(getStringWidth('❤'), 1);
|
|
|
|
// By default, unicode characters whose width is considered ambiguous will
|
|
// be considered half-width. For these characters, getStringWidth will return
|
|
// 1. In some contexts, however, it is more appropriate to consider them full
|
|
// width. By default, the algorithm will assume half width.
|
|
assert.strictEqual(getStringWidth('\u01d4'), 1);
|
|
|
|
// Control chars and combining chars are zero
|
|
assert.strictEqual(getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
|
|
|
// Test that the fast path for ASCII characters yields results consistent
|
|
// with the 'slow' path.
|
|
for (let i = 0; i < 256; i++) {
|
|
const char = String.fromCharCode(i);
|
|
assert.strictEqual(
|
|
getStringWidth(char + '🎉'),
|
|
getStringWidth(char) + 2);
|
|
|
|
if (i < 32 || (i >= 127 && i < 160)) { // Control character
|
|
assert.strictEqual(getStringWidth(char), 0);
|
|
} else { // Regular ASCII character
|
|
assert.strictEqual(getStringWidth(char), 1);
|
|
}
|
|
}
|
|
|
|
if (common.hasIntl) {
|
|
const a = '한글'.normalize('NFD'); // 한글
|
|
const b = '한글'.normalize('NFC'); // 한글
|
|
assert.strictEqual(a.length, 6);
|
|
assert.strictEqual(b.length, 2);
|
|
assert.strictEqual(getStringWidth(a), 4);
|
|
assert.strictEqual(getStringWidth(b), 4);
|
|
}
|