diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index 7c24a8e195a301..3257ee3a1e4a1f 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -63,6 +63,9 @@ if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true; + // TODO(BridgeAR): Expose the options to the user. That is probably the + // best thing possible at the moment, since it's difficult to know what + // the receiving end supports. getStringWidth = function getStringWidth(str) { let width = 0; str = stripVTControlCharacters(str); diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 7cb7ab35d5060c..46c6ef39f861ec 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -767,18 +767,10 @@ static int GetColumnWidth(UChar32 codepoint, // Returns the column width for the given String. static void GetStringWidth(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); - if (args.Length() < 1) - return; + CHECK(args[0]->IsString()); bool ambiguous_as_full_width = args[1]->IsTrue(); - bool expand_emoji_sequence = args[2]->IsTrue(); - - if (args[0]->IsNumber()) { - uint32_t val; - if (!args[0]->Uint32Value(env->context()).To(&val)) return; - args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width)); - return; - } + bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue(); TwoByteValue value(env->isolate(), args[0]); // reinterpret_cast is required by windows to compile @@ -803,6 +795,7 @@ static void GetStringWidth(const FunctionCallbackInfo& args) { // in advance if a particular sequence is going to be supported. // The expand_emoji_sequence option allows the caller to skip this // check and count each code within an emoji sequence separately. + // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences if (!expand_emoji_sequence && n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner) (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) || diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 48384f916d9126..a427601803afd3 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -12,82 +12,78 @@ const readline = require('internal/readline/utils'); // Ll (Lowercase Letter): LATIN SMALL LETTER A assert.strictEqual(readline.getStringWidth('a'), 1); -assert.strictEqual(readline.getStringWidth(0x0061), 1); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0061)), 1); // Lo (Other Letter) assert.strictEqual(readline.getStringWidth('丁'), 2); -assert.strictEqual(readline.getStringWidth(0x4E01), 2); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x4E01)), 2); // Surrogate pairs -assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2); +assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 4); assert.strictEqual(readline.getStringWidth('πŸ‘…'), 2); // Cs (Surrogate): High Surrogate assert.strictEqual(readline.getStringWidth('\ud83d'), 1); // Cs (Surrogate): Low Surrogate assert.strictEqual(readline.getStringWidth('\udc78'), 1); // Cc (Control): NULL -assert.strictEqual(readline.getStringWidth(0), 0); +assert.strictEqual(readline.getStringWidth('\u0000'), 0); // Cc (Control): BELL -assert.strictEqual(readline.getStringWidth(0x0007), 0); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0007)), 0); // Cc (Control): LINE FEED assert.strictEqual(readline.getStringWidth('\n'), 0); // Cf (Format): SOFT HYPHEN -assert.strictEqual(readline.getStringWidth(0x00AD), 1); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x00AD)), 1); // Cf (Format): LEFT-TO-RIGHT MARK // Cf (Format): RIGHT-TO-LEFT MARK assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1); // Cn (Unassigned): Not a character -assert.strictEqual(readline.getStringWidth(0x10FFEF), 1); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x10FFEF)), 1); // Cn (Unassigned): Not a character (but in a CJK range) -assert.strictEqual(readline.getStringWidth(0x3FFEF), 2); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x3FFEF)), 1); // Mn (Nonspacing Mark): COMBINING ACUTE ACCENT -assert.strictEqual(readline.getStringWidth(0x0301), 0); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0301)), 0); // Mc (Spacing Mark): BALINESE ADEG ADEG // Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width // character. -assert.strictEqual(readline.getStringWidth(0x1B44), 1); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x1B44)), 1); // Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE -assert.strictEqual(readline.getStringWidth(0x20DD), 0); +assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0); -// The following is an emoji sequence. In some implementations, it is -// represented as a single glyph, in other implementations as a sequence -// of individual glyphs. By default, the algorithm will assume the single -// glyph interpretation and return a value of 2. By passing the -// expandEmojiSequence: true option, each component will be counted -// individually. -assert.strictEqual(readline.getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§'), 2); -assert.strictEqual( - readline.getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§', { expandEmojiSequence: true }), 8); +// The following is an emoji sequence with ZWJ (zero-width-joiner). In some +// implementations, it is represented as a single glyph, in other +// implementations as a sequence of individual glyphs. By default, each +// component will be counted individually, since not a lot of systems support +// these fully. +// See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences +assert.strictEqual(readline.getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§'), 8); +// TODO(BridgeAR): This should have a width of two and six. The heart contains +// the \uFE0F variation selector that indicates that it should be displayed as +// emoji instead of as text. Emojis are all full width characters when not being +// rendered as text. +// https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block) +assert.strictEqual(readline.getStringWidth('❀️'), 1); +assert.strictEqual(readline.getStringWidth('πŸ‘©β€β€οΈβ€πŸ‘©'), 5); +// The length of one is correct. It is an emoji treated as text. +assert.strictEqual(readline.getStringWidth('❀'), 1); // By default, unicode characters whose width is considered ambiguous will // be considered half-width. For these characters, getStringWidth will return // 1. In some contexts, however, it is more appropriate to consider them full -// width. By default, the algorithm will assume half width. By passing -// the ambiguousAsFullWidth: true option, ambiguous characters will be counted -// as 2 columns. +// width. By default, the algorithm will assume half width. assert.strictEqual(readline.getStringWidth('\u01d4'), 1); -assert.strictEqual( - readline.getStringWidth('\u01d4', { ambiguousAsFullWidth: true }), 2); // Control chars and combining chars are zero assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); // Test that the fast path for ASCII characters yields results consistent // with the 'slow' path. -for (const ambiguousAsFullWidth of [ false, true ]) { - for (let i = 0; i < 256; i++) { - const char = String.fromCharCode(i); - assert.strictEqual( - readline.getStringWidth(i, { ambiguousAsFullWidth }), - readline.getStringWidth(char, { ambiguousAsFullWidth })); - assert.strictEqual( - readline.getStringWidth(char + 'πŸŽ‰', { ambiguousAsFullWidth }), - readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2); +for (let i = 0; i < 256; i++) { + const char = String.fromCharCode(i); + assert.strictEqual( + readline.getStringWidth(char + 'πŸŽ‰'), + readline.getStringWidth(char) + 2); - if (i < 32 || (i >= 127 && i < 160)) { // Control character - assert.strictEqual( - readline.getStringWidth(i, { ambiguousAsFullWidth }), 0); - } else if (i < 127) { // Regular ASCII character - assert.strictEqual( - readline.getStringWidth(i, { ambiguousAsFullWidth }), 1); - } + if (i < 32 || (i >= 127 && i < 160)) { // Control character + assert.strictEqual(readline.getStringWidth(char), 0); + } else { // Regular ASCII character + assert.strictEqual(readline.getStringWidth(char), 1); } }