From 1b418298284ca48f92873ff0d3eebe179d8468ed Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Sat, 11 Jan 2020 19:48:40 +0100 Subject: [PATCH] util: improve unicode support The array grouping function relies on the width of the characters. It was not calculated correct so far, since it used the string length instead. This improves the unicode output by calculating the mono-spaced font width (other fonts might differ). PR-URL: https://github.com/nodejs/node/pull/31319 Reviewed-By: James M Snell Reviewed-By: Steven R Loomis Reviewed-By: Rich Trott Reviewed-By: Minwoo Jung --- lib/internal/cli_table.js | 2 +- lib/internal/readline/utils.js | 117 ---------------- lib/internal/repl/utils.js | 6 +- lib/internal/util/inspect.js | 131 ++++++++++++++++-- lib/readline.js | 8 +- test/parallel/test-icu-stringwidth.js | 58 ++++---- test/parallel/test-readline-interface.js | 55 ++++---- test/parallel/test-readline-tab-complete.js | 2 +- test/parallel/test-repl-history-navigation.js | 2 +- test/parallel/test-repl-top-level-await.js | 2 +- test/parallel/test-util-inspect.js | 20 +++ 11 files changed, 211 insertions(+), 192 deletions(-) diff --git a/lib/internal/cli_table.js b/lib/internal/cli_table.js index f0f09b4f8fe870..853f02797fce41 100644 --- a/lib/internal/cli_table.js +++ b/lib/internal/cli_table.js @@ -6,7 +6,7 @@ const { ObjectPrototypeHasOwnProperty, } = primordials; -const { getStringWidth } = require('internal/readline/utils'); +const { getStringWidth } = require('internal/util/inspect'); // The use of Unicode characters below is the only non-comment use of non-ASCII // Unicode characters in Node.js built-in modules. If they are ever removed or diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index ffe0cee9d4ba3d..0b9fe8cde4f5ce 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -1,25 +1,13 @@ 'use strict'; const { - RegExp, Symbol, } = primordials; -// Regex used for ansi escape code splitting -// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js -// License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore -// Matches all ansi escape code sequences in a string -const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' + - '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' + - '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))'; -const ansi = new RegExp(ansiPattern, 'g'); - const kUTF16SurrogateThreshold = 0x10000; // 2 ** 16 const kEscape = '\x1b'; const kSubstringSearch = Symbol('kSubstringSearch'); -let getStringWidth; - function CSI(strings, ...args) { let ret = `${kEscape}[`; for (let n = 0; n < strings.length; n++) { @@ -59,109 +47,6 @@ function charLengthAt(str, i) { return str.codePointAt(i) >= kUTF16SurrogateThreshold ? 2 : 1; } -if (internalBinding('config').hasIntl) { - const icu = internalBinding('icu'); - // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) - // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true; - // TODO(BridgeAR): Expose the options to the user. That is probably the - // best thing possible at the moment, since it's difficult to know what - // the receiving end supports. - getStringWidth = function getStringWidth(str) { - let width = 0; - str = stripVTControlCharacters(str); - for (let i = 0; i < str.length; i++) { - // Try to avoid calling into C++ by first handling the ASCII portion of - // the string. If it is fully ASCII, we skip the C++ part. - const code = str.charCodeAt(i); - if (code >= 127) { - width += icu.getStringWidth(str.slice(i)); - break; - } - width += code >= 32 ? 1 : 0; - } - return width; - }; -} else { - /** - * Returns the number of columns required to display the given string. - */ - getStringWidth = function getStringWidth(str) { - let width = 0; - - str = stripVTControlCharacters(str); - - for (const char of str) { - const code = char.codePointAt(0); - if (isFullWidthCodePoint(code)) { - width += 2; - } else if (!isZeroWidthCodePoint(code)) { - width++; - } - } - - return width; - }; - - /** - * Returns true if the character represented by a given - * Unicode code point is full-width. Otherwise returns false. - */ - const isFullWidthCodePoint = (code) => { - // Code points are partially derived from: - // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt - return code >= 0x1100 && ( - code <= 0x115f || // Hangul Jamo - code === 0x2329 || // LEFT-POINTING ANGLE BRACKET - code === 0x232a || // RIGHT-POINTING ANGLE BRACKET - // CJK Radicals Supplement .. Enclosed CJK Letters and Months - (code >= 0x2e80 && code <= 0x3247 && code !== 0x303f) || - // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A - (code >= 0x3250 && code <= 0x4dbf) || - // CJK Unified Ideographs .. Yi Radicals - (code >= 0x4e00 && code <= 0xa4c6) || - // Hangul Jamo Extended-A - (code >= 0xa960 && code <= 0xa97c) || - // Hangul Syllables - (code >= 0xac00 && code <= 0xd7a3) || - // CJK Compatibility Ideographs - (code >= 0xf900 && code <= 0xfaff) || - // Vertical Forms - (code >= 0xfe10 && code <= 0xfe19) || - // CJK Compatibility Forms .. Small Form Variants - (code >= 0xfe30 && code <= 0xfe6b) || - // Halfwidth and Fullwidth Forms - (code >= 0xff01 && code <= 0xff60) || - (code >= 0xffe0 && code <= 0xffe6) || - // Kana Supplement - (code >= 0x1b000 && code <= 0x1b001) || - // Enclosed Ideographic Supplement - (code >= 0x1f200 && code <= 0x1f251) || - // Miscellaneous Symbols and Pictographs .. Emoticons - (code >= 0x1f300 && code <= 0x1f64f) || - // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane - (code >= 0x20000 && code <= 0x3fffd) - ); - }; - - const isZeroWidthCodePoint = (code) => { - return code <= 0x1F || // C0 control codes - (code > 0x7F && code <= 0x9F) || // C1 control codes - (code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks - (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters - (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors - (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks - (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors - }; -} - -/** - * Tries to remove all VT control characters. Use to estimate displayed - * string width. May be buggy due to not running a real state machine - */ -function stripVTControlCharacters(str) { - return str.replace(ansi, ''); -} - /* Some patterns seen in terminal key escape codes, derived from combos seen at http://www.midnight-commander.org/browser/lib/tty/key.c @@ -477,8 +362,6 @@ module.exports = { charLengthLeft, commonPrefix, emitKeys, - getStringWidth, kSubstringSearch, - stripVTControlCharacters, CSI }; diff --git a/lib/internal/repl/utils.js b/lib/internal/repl/utils.js index 3449eab785fe2c..49a40e3030a3e2 100644 --- a/lib/internal/repl/utils.js +++ b/lib/internal/repl/utils.js @@ -32,11 +32,13 @@ const { const { commonPrefix, - getStringWidth, kSubstringSearch, } = require('internal/readline/utils'); -const { inspect } = require('util'); +const { + getStringWidth, + inspect, +} = require('internal/util/inspect'); const debug = require('internal/util/debuglog').debuglog('repl'); diff --git a/lib/internal/util/inspect.js b/lib/internal/util/inspect.js index 1169e92fdec871..46eea7fc79711b 100644 --- a/lib/internal/util/inspect.js +++ b/lib/internal/util/inspect.js @@ -193,6 +193,17 @@ const meta = [ '', '', '', '', '', '', '', '\\\\' ]; +// Regex used for ansi escape code splitting +// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js +// License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore +// Matches all ansi escape code sequences in a string +const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' + + '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' + + '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))'; +const ansi = new RegExp(ansiPattern, 'g'); + +let getStringWidth; + function getUserOptions(ctx) { return { stylize: ctx.stylize, @@ -1163,7 +1174,7 @@ function groupArrayElements(ctx, output, value) { // entries length of all output entries. We have to remove colors first, // otherwise the length would not be calculated properly. for (; i < outputLength; i++) { - const len = ctx.colors ? removeColors(output[i]).length : output[i].length; + const len = getStringWidth(output[i], ctx.colors); dataLen[i] = len; totalLength += len + separatorSpace; if (maxLength < len) @@ -1206,8 +1217,6 @@ function groupArrayElements(ctx, output, value) { if (columns <= 1) { return output; } - // TODO(BridgeAR): Add unicode support. Use the readline getStringWidth - // function. const tmp = []; const maxLineLength = []; for (let i = 0; i < columns; i++) { @@ -1582,11 +1591,8 @@ function formatProperty(ctx, value, recurseTimes, key, type, desc) { const diff = (ctx.compact !== true || type !== kObjectType) ? 2 : 3; ctx.indentationLvl += diff; str = formatValue(ctx, desc.value, recurseTimes); - if (diff === 3) { - const len = ctx.colors ? removeColors(str).length : str.length; - if (ctx.breakLength < len) { - extra = `\n${' '.repeat(ctx.indentationLvl)}`; - } + if (diff === 3 && ctx.breakLength < getStringWidth(str, ctx.colors)) { + extra = `\n${' '.repeat(ctx.indentationLvl)}`; } ctx.indentationLvl -= diff; } else if (desc.get !== undefined) { @@ -1898,9 +1904,116 @@ function formatWithOptions(inspectOptions, ...args) { return str; } +if (internalBinding('config').hasIntl) { + const icu = internalBinding('icu'); + // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) + // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true; + // TODO(BridgeAR): Expose the options to the user. That is probably the + // best thing possible at the moment, since it's difficult to know what + // the receiving end supports. + getStringWidth = function getStringWidth(str, removeControlChars = true) { + let width = 0; + if (removeControlChars) + str = stripVTControlCharacters(str); + for (let i = 0; i < str.length; i++) { + // Try to avoid calling into C++ by first handling the ASCII portion of + // the string. If it is fully ASCII, we skip the C++ part. + const code = str.charCodeAt(i); + if (code >= 127) { + width += icu.getStringWidth(str.slice(i)); + break; + } + width += code >= 32 ? 1 : 0; + } + return width; + }; +} else { + /** + * Returns the number of columns required to display the given string. + */ + getStringWidth = function getStringWidth(str, removeControlChars = true) { + let width = 0; + + if (removeControlChars) + str = stripVTControlCharacters(str); + + for (const char of str) { + const code = char.codePointAt(0); + if (isFullWidthCodePoint(code)) { + width += 2; + } else if (!isZeroWidthCodePoint(code)) { + width++; + } + } + + return width; + }; + + /** + * Returns true if the character represented by a given + * Unicode code point is full-width. Otherwise returns false. + */ + const isFullWidthCodePoint = (code) => { + // Code points are partially derived from: + // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt + return code >= 0x1100 && ( + code <= 0x115f || // Hangul Jamo + code === 0x2329 || // LEFT-POINTING ANGLE BRACKET + code === 0x232a || // RIGHT-POINTING ANGLE BRACKET + // CJK Radicals Supplement .. Enclosed CJK Letters and Months + (code >= 0x2e80 && code <= 0x3247 && code !== 0x303f) || + // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A + (code >= 0x3250 && code <= 0x4dbf) || + // CJK Unified Ideographs .. Yi Radicals + (code >= 0x4e00 && code <= 0xa4c6) || + // Hangul Jamo Extended-A + (code >= 0xa960 && code <= 0xa97c) || + // Hangul Syllables + (code >= 0xac00 && code <= 0xd7a3) || + // CJK Compatibility Ideographs + (code >= 0xf900 && code <= 0xfaff) || + // Vertical Forms + (code >= 0xfe10 && code <= 0xfe19) || + // CJK Compatibility Forms .. Small Form Variants + (code >= 0xfe30 && code <= 0xfe6b) || + // Halfwidth and Fullwidth Forms + (code >= 0xff01 && code <= 0xff60) || + (code >= 0xffe0 && code <= 0xffe6) || + // Kana Supplement + (code >= 0x1b000 && code <= 0x1b001) || + // Enclosed Ideographic Supplement + (code >= 0x1f200 && code <= 0x1f251) || + // Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff + // Emoticons 0x1f600 - 0x1f64f + (code >= 0x1f300 && code <= 0x1f64f) || + // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane + (code >= 0x20000 && code <= 0x3fffd) + ); + }; + + const isZeroWidthCodePoint = (code) => { + return code <= 0x1F || // C0 control codes + (code > 0x7F && code <= 0x9F) || // C1 control codes + (code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks + (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters + (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors + (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks + (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors + }; +} + +/** + * Remove all VT control characters. Use to estimate displayed string width. + */ +function stripVTControlCharacters(str) { + return str.replace(ansi, ''); +} + module.exports = { inspect, format, formatWithOptions, - inspectDefaultOptions + getStringWidth, + inspectDefaultOptions, + stripVTControlCharacters }; diff --git a/lib/readline.js b/lib/readline.js index a87cd8f183c973..e86fb50b22cec4 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -46,7 +46,11 @@ const { ERR_INVALID_OPT_VALUE } = require('internal/errors').codes; const { validateString } = require('internal/validators'); -const { inspect } = require('internal/util/inspect'); +const { + inspect, + getStringWidth, + stripVTControlCharacters, +} = require('internal/util/inspect'); const EventEmitter = require('events'); const { charLengthAt, @@ -54,9 +58,7 @@ const { commonPrefix, CSI, emitKeys, - getStringWidth, kSubstringSearch, - stripVTControlCharacters } = require('internal/readline/utils'); const { clearTimeout, setTimeout } = require('timers'); diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index a427601803afd3..e2f22941158578 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -6,46 +6,46 @@ if (!common.hasIntl) common.skip('missing Intl'); const assert = require('assert'); -const readline = require('internal/readline/utils'); +const { getStringWidth } = require('internal/util/inspect'); // Test column width // Ll (Lowercase Letter): LATIN SMALL LETTER A -assert.strictEqual(readline.getStringWidth('a'), 1); -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0061)), 1); +assert.strictEqual(getStringWidth('a'), 1); +assert.strictEqual(getStringWidth(String.fromCharCode(0x0061)), 1); // Lo (Other Letter) -assert.strictEqual(readline.getStringWidth('丁'), 2); -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x4E01)), 2); +assert.strictEqual(getStringWidth('丁'), 2); +assert.strictEqual(getStringWidth(String.fromCharCode(0x4E01)), 2); // Surrogate pairs -assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 4); -assert.strictEqual(readline.getStringWidth('πŸ‘…'), 2); +assert.strictEqual(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4); +assert.strictEqual(getStringWidth('πŸ‘…'), 2); // Cs (Surrogate): High Surrogate -assert.strictEqual(readline.getStringWidth('\ud83d'), 1); +assert.strictEqual(getStringWidth('\ud83d'), 1); // Cs (Surrogate): Low Surrogate -assert.strictEqual(readline.getStringWidth('\udc78'), 1); +assert.strictEqual(getStringWidth('\udc78'), 1); // Cc (Control): NULL -assert.strictEqual(readline.getStringWidth('\u0000'), 0); +assert.strictEqual(getStringWidth('\u0000'), 0); // Cc (Control): BELL -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0007)), 0); +assert.strictEqual(getStringWidth(String.fromCharCode(0x0007)), 0); // Cc (Control): LINE FEED -assert.strictEqual(readline.getStringWidth('\n'), 0); +assert.strictEqual(getStringWidth('\n'), 0); // Cf (Format): SOFT HYPHEN -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x00AD)), 1); +assert.strictEqual(getStringWidth(String.fromCharCode(0x00AD)), 1); // Cf (Format): LEFT-TO-RIGHT MARK // Cf (Format): RIGHT-TO-LEFT MARK -assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1); +assert.strictEqual(getStringWidth('\u200Ef\u200F'), 1); // Cn (Unassigned): Not a character -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x10FFEF)), 1); +assert.strictEqual(getStringWidth(String.fromCharCode(0x10FFEF)), 1); // Cn (Unassigned): Not a character (but in a CJK range) -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x3FFEF)), 1); +assert.strictEqual(getStringWidth(String.fromCharCode(0x3FFEF)), 1); // Mn (Nonspacing Mark): COMBINING ACUTE ACCENT -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0301)), 0); +assert.strictEqual(getStringWidth(String.fromCharCode(0x0301)), 0); // Mc (Spacing Mark): BALINESE ADEG ADEG // Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width // character. -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x1B44)), 1); +assert.strictEqual(getStringWidth(String.fromCharCode(0x1B44)), 1); // Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE -assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0); +assert.strictEqual(getStringWidth(String.fromCharCode(0x20DD)), 0); // The following is an emoji sequence with ZWJ (zero-width-joiner). In some // implementations, it is represented as a single glyph, in other @@ -53,37 +53,37 @@ assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0); // component will be counted individually, since not a lot of systems support // these fully. // See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences -assert.strictEqual(readline.getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§'), 8); +assert.strictEqual(getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§'), 8); // TODO(BridgeAR): This should have a width of two and six. The heart contains // the \uFE0F variation selector that indicates that it should be displayed as // emoji instead of as text. Emojis are all full width characters when not being // rendered as text. // https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block) -assert.strictEqual(readline.getStringWidth('❀️'), 1); -assert.strictEqual(readline.getStringWidth('πŸ‘©β€β€οΈβ€πŸ‘©'), 5); +assert.strictEqual(getStringWidth('❀️'), 1); +assert.strictEqual(getStringWidth('πŸ‘©β€β€οΈβ€πŸ‘©'), 5); // The length of one is correct. It is an emoji treated as text. -assert.strictEqual(readline.getStringWidth('❀'), 1); +assert.strictEqual(getStringWidth('❀'), 1); // By default, unicode characters whose width is considered ambiguous will // be considered half-width. For these characters, getStringWidth will return // 1. In some contexts, however, it is more appropriate to consider them full // width. By default, the algorithm will assume half width. -assert.strictEqual(readline.getStringWidth('\u01d4'), 1); +assert.strictEqual(getStringWidth('\u01d4'), 1); // Control chars and combining chars are zero -assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); +assert.strictEqual(getStringWidth('\u200E\n\u220A\u20D2'), 1); // Test that the fast path for ASCII characters yields results consistent // with the 'slow' path. for (let i = 0; i < 256; i++) { const char = String.fromCharCode(i); assert.strictEqual( - readline.getStringWidth(char + 'πŸŽ‰'), - readline.getStringWidth(char) + 2); + getStringWidth(char + 'πŸŽ‰'), + getStringWidth(char) + 2); if (i < 32 || (i >= 127 && i < 160)) { // Control character - assert.strictEqual(readline.getStringWidth(char), 0); + assert.strictEqual(getStringWidth(char), 0); } else { // Regular ASCII character - assert.strictEqual(readline.getStringWidth(char), 1); + assert.strictEqual(getStringWidth(char), 1); } } diff --git a/test/parallel/test-readline-interface.js b/test/parallel/test-readline-interface.js index e0619c76d7511c..3295997a6db688 100644 --- a/test/parallel/test-readline-interface.js +++ b/test/parallel/test-readline-interface.js @@ -25,7 +25,10 @@ const common = require('../common'); const assert = require('assert'); const readline = require('readline'); -const internalReadline = require('internal/readline/utils'); +const { + getStringWidth, + stripVTControlCharacters +} = require('internal/util/inspect'); const EventEmitter = require('events').EventEmitter; const { Writable, Readable } = require('stream'); @@ -1120,48 +1123,44 @@ function isWarned(emitter) { } // Wide characters should be treated as two columns. - assert.strictEqual(internalReadline.getStringWidth('a'), 1); - assert.strictEqual(internalReadline.getStringWidth('あ'), 2); - assert.strictEqual(internalReadline.getStringWidth('θ°’'), 2); - assert.strictEqual(internalReadline.getStringWidth('κ³ '), 2); - assert.strictEqual( - internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2); - assert.strictEqual(internalReadline.getStringWidth('abcde'), 5); - assert.strictEqual(internalReadline.getStringWidth('叀池や'), 6); - assert.strictEqual(internalReadline.getStringWidth('γƒŽγƒΌγƒ‰.js'), 9); - assert.strictEqual(internalReadline.getStringWidth('δ½ ε₯½'), 4); - assert.strictEqual(internalReadline.getStringWidth('μ•ˆλ…•ν•˜μ„Έμš”'), 10); - assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5); - assert.strictEqual(internalReadline.getStringWidth('πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦'), 8); - assert.strictEqual(internalReadline.getStringWidth('πŸ•π·γ‚πŸ’»πŸ˜€'), 9); + assert.strictEqual(getStringWidth('a'), 1); + assert.strictEqual(getStringWidth('あ'), 2); + assert.strictEqual(getStringWidth('θ°’'), 2); + assert.strictEqual(getStringWidth('κ³ '), 2); + assert.strictEqual(getStringWidth(String.fromCodePoint(0x1f251)), 2); + assert.strictEqual(getStringWidth('abcde'), 5); + assert.strictEqual(getStringWidth('叀池や'), 6); + assert.strictEqual(getStringWidth('γƒŽγƒΌγƒ‰.js'), 9); + assert.strictEqual(getStringWidth('δ½ ε₯½'), 4); + assert.strictEqual(getStringWidth('μ•ˆλ…•ν•˜μ„Έμš”'), 10); + assert.strictEqual(getStringWidth('A\ud83c\ude00BC'), 5); + assert.strictEqual(getStringWidth('πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦'), 8); + assert.strictEqual(getStringWidth('πŸ•π·γ‚πŸ’»πŸ˜€'), 9); // TODO(BridgeAR): This should have a width of 4. - assert.strictEqual(internalReadline.getStringWidth('⓬β“ͺ'), 2); - assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0); + assert.strictEqual(getStringWidth('⓬β“ͺ'), 2); + assert.strictEqual(getStringWidth('\u0301\u200D\u200E'), 0); // Check if vt control chars are stripped assert.strictEqual( - internalReadline.stripVTControlCharacters('\u001b[31m> \u001b[39m'), + stripVTControlCharacters('\u001b[31m> \u001b[39m'), '> ' ); assert.strictEqual( - internalReadline.stripVTControlCharacters('\u001b[31m> \u001b[39m> '), + stripVTControlCharacters('\u001b[31m> \u001b[39m> '), '> > ' ); assert.strictEqual( - internalReadline.stripVTControlCharacters('\u001b[31m\u001b[39m'), + stripVTControlCharacters('\u001b[31m\u001b[39m'), '' ); assert.strictEqual( - internalReadline.stripVTControlCharacters('> '), + stripVTControlCharacters('> '), '> ' ); - assert.strictEqual(internalReadline - .getStringWidth('\u001b[31m> \u001b[39m'), 2); - assert.strictEqual(internalReadline - .getStringWidth('\u001b[31m> \u001b[39m> '), 4); - assert.strictEqual(internalReadline - .getStringWidth('\u001b[31m\u001b[39m'), 0); - assert.strictEqual(internalReadline.getStringWidth('> '), 2); + assert.strictEqual(getStringWidth('\u001b[31m> \u001b[39m'), 2); + assert.strictEqual(getStringWidth('\u001b[31m> \u001b[39m> '), 4); + assert.strictEqual(getStringWidth('\u001b[31m\u001b[39m'), 0); + assert.strictEqual(getStringWidth('> '), 2); { const fi = new FakeInput(); diff --git a/test/parallel/test-readline-tab-complete.js b/test/parallel/test-readline-tab-complete.js index bbdb18bd8015de..5afa60e4dae9fd 100644 --- a/test/parallel/test-readline-tab-complete.js +++ b/test/parallel/test-readline-tab-complete.js @@ -6,7 +6,7 @@ const common = require('../common'); const readline = require('readline'); const assert = require('assert'); const EventEmitter = require('events').EventEmitter; -const { getStringWidth } = require('internal/readline/utils'); +const { getStringWidth } = require('internal/util/inspect'); // This test verifies that the tab completion supports unicode and the writes // are limited to the minimum. diff --git a/test/parallel/test-repl-history-navigation.js b/test/parallel/test-repl-history-navigation.js index 8a9f1a6dadf386..c4ca7f8147b021 100644 --- a/test/parallel/test-repl-history-navigation.js +++ b/test/parallel/test-repl-history-navigation.js @@ -157,7 +157,7 @@ const tests = [ env: { NODE_REPL_HISTORY: defaultHistoryPath }, skip: !process.features.inspector, test: [ - // あ is a fill width character with a length of one. + // あ is a full width character with a length of one. // πŸ• is a full width character with a length of two. // 𐐷 is a half width character with the length of two. // '\u0301', '0x200D', '\u200E' are zero width characters. diff --git a/test/parallel/test-repl-top-level-await.js b/test/parallel/test-repl-top-level-await.js index 59f0cb1617ad4a..48e9e37605a085 100644 --- a/test/parallel/test-repl-top-level-await.js +++ b/test/parallel/test-repl-top-level-await.js @@ -3,7 +3,7 @@ const common = require('../common'); const ArrayStream = require('../common/arraystream'); const assert = require('assert'); -const { stripVTControlCharacters } = require('internal/readline/utils'); +const { stripVTControlCharacters } = require('internal/util/inspect'); const repl = require('repl'); common.skipIfInspectorDisabled(); diff --git a/test/parallel/test-util-inspect.js b/test/parallel/test-util-inspect.js index 5df203e6d33be0..2f7e8a7cf93ca7 100644 --- a/test/parallel/test-util-inspect.js +++ b/test/parallel/test-util-inspect.js @@ -2441,6 +2441,26 @@ assert.strictEqual( assert.strictEqual(out, expected); + // Unicode support. あ has a length of one and a width of two. + obj = [ + '123', '123', '123', '123', 'あああ', + '123', '123', '123', '123', 'あああ' + ]; + + out = util.inspect(obj, { compact: 3 }); + + expected = [ + '[', + " '123', '123',", + " '123', '123',", + " 'あああ', '123',", + " '123', '123',", + " '123', 'あああ'", + ']', + ].join('\n'); + + assert.strictEqual(out, expected); + // Verify that array grouping and line consolidation does not happen together. obj = { a: {