From 03efa716f07fdba7ad500d4a1c37240fb340e5df Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Tue, 31 Dec 2019 15:07:37 +0100 Subject: [PATCH] readline: improve getStringWidth() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Simplify the getStringWidth function used by Intl builds by removing dead code (the options were unused) and by refactoring the logic. 2. Improve the getStringWidth unicode handling used by non-Intl builds. The getStringWidth function returned the wrong width for multiple inputs. It's now improved by supporting various zero width characters and more full width characters. PR-URL: https://github.com/nodejs/node/pull/31112 Reviewed-By: MichaΓ«l Zasso Reviewed-By: James M Snell --- lib/internal/readline/utils.js | 74 +++++++++--------------- lib/readline.js | 16 ++--- test/parallel/test-readline-interface.js | 60 +++++-------------- 3 files changed, 49 insertions(+), 101 deletions(-) diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index 6a2d1553655071..e4d718d94f400b 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -1,8 +1,6 @@ 'use strict'; const { - Boolean, - NumberIsInteger, RegExp, Symbol, } = primordials; @@ -21,7 +19,6 @@ const kEscape = '\x1b'; const kSubstringSearch = Symbol('kSubstringSearch'); let getStringWidth; -let isFullWidthCodePoint; function CSI(strings, ...args) { let ret = `${kEscape}[`; @@ -41,63 +38,37 @@ CSI.kClearScreenDown = CSI`0J`; if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); - getStringWidth = function getStringWidth(str, options) { - options = options || {}; - if (NumberIsInteger(str)) { - // Provide information about the character with code point 'str'. - return icu.getStringWidth( - str, - Boolean(options.ambiguousAsFullWidth), - false - ); - } - str = stripVTControlCharacters(String(str)); + // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) + // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true; + getStringWidth = function getStringWidth(str) { let width = 0; + str = stripVTControlCharacters(str); for (let i = 0; i < str.length; i++) { // Try to avoid calling into C++ by first handling the ASCII portion of // the string. If it is fully ASCII, we skip the C++ part. const code = str.charCodeAt(i); - if (code < 127) { - width += code >= 32; - continue; + if (code >= 127) { + width += icu.getStringWidth(str.slice(i)); + break; } - width += icu.getStringWidth( - str.slice(i), - Boolean(options.ambiguousAsFullWidth), - Boolean(options.expandEmojiSequence) - ); - break; + width += code >= 32 ? 1 : 0; } return width; }; - isFullWidthCodePoint = - function isFullWidthCodePoint(code, options) { - if (typeof code !== 'number') - return false; - return icu.getStringWidth(code, options) === 2; - }; } else { /** * Returns the number of columns required to display the given string. */ getStringWidth = function getStringWidth(str) { - if (NumberIsInteger(str)) - return isFullWidthCodePoint(str) ? 2 : 1; - let width = 0; - str = stripVTControlCharacters(String(str)); - - for (let i = 0; i < str.length; i++) { - const code = str.codePointAt(i); - - if (code >= kUTF16SurrogateThreshold) { // Surrogates. - i++; - } + str = stripVTControlCharacters(str); + for (const char of str) { + const code = char.codePointAt(0); if (isFullWidthCodePoint(code)) { width += 2; - } else { + } else if (!isZeroWidthCodePoint(code)) { width++; } } @@ -109,10 +80,10 @@ if (internalBinding('config').hasIntl) { * Returns true if the character represented by a given * Unicode code point is full-width. Otherwise returns false. */ - isFullWidthCodePoint = function isFullWidthCodePoint(code) { - // Code points are derived from: + const isFullWidthCodePoint = (code) => { + // Code points are partially derived from: // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt - return NumberIsInteger(code) && code >= 0x1100 && ( + return code >= 0x1100 && ( code <= 0x115f || // Hangul Jamo code === 0x2329 || // LEFT-POINTING ANGLE BRACKET code === 0x232a || // RIGHT-POINTING ANGLE BRACKET @@ -139,10 +110,23 @@ if (internalBinding('config').hasIntl) { (code >= 0x1b000 && code <= 0x1b001) || // Enclosed Ideographic Supplement (code >= 0x1f200 && code <= 0x1f251) || + // Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff + // Emoticons 0x1f600 - 0x1f64f + (code >= 0x1f300 && code <= 0x1f64f) || // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane (code >= 0x20000 && code <= 0x3fffd) ); }; + + const isZeroWidthCodePoint = (code) => { + return code <= 0x1F || // C0 control codes + (code > 0x7F && code <= 0x9F) || // C1 control codes + (code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks + (code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters + (code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors + (code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks + (code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors + }; } /** @@ -471,9 +455,7 @@ module.exports = { commonPrefix, emitKeys, getStringWidth, - isFullWidthCodePoint, kSubstringSearch, - kUTF16SurrogateThreshold, stripVTControlCharacters, CSI }; diff --git a/lib/readline.js b/lib/readline.js index 62565b60701d6e..4533557690f455 100644 --- a/lib/readline.js +++ b/lib/readline.js @@ -53,9 +53,7 @@ const { CSI, emitKeys, getStringWidth, - isFullWidthCodePoint, kSubstringSearch, - kUTF16SurrogateThreshold, stripVTControlCharacters } = require('internal/readline/utils'); @@ -743,18 +741,14 @@ Interface.prototype._getDisplayPos = function(str) { const col = this.columns; let rows = 0; str = stripVTControlCharacters(str); - for (let i = 0, len = str.length; i < len; i++) { - const code = str.codePointAt(i); - if (code >= kUTF16SurrogateThreshold) { // Surrogates. - i++; - } - if (code === 0x0a) { // new line \n - // rows must be incremented by 1 even if offset = 0 or col = +Infinity + for (const char of str) { + if (char === '\n') { + // Rows must be incremented by 1 even if offset = 0 or col = +Infinity. rows += MathCeil(offset / col) || 1; offset = 0; continue; } - const width = getStringWidth(code); + const width = getStringWidth(char); if (width === 0 || width === 1) { offset += width; } else { // width === 2 @@ -781,7 +775,7 @@ Interface.prototype.getCursorPos = function() { // move the cursor to the beginning of the next line. if (cols + 1 === columns && this.cursor < this.line.length && - isFullWidthCodePoint(this.line.codePointAt(this.cursor))) { + getStringWidth(this.line[this.cursor]) > 1) { rows++; cols = 0; } diff --git a/test/parallel/test-readline-interface.js b/test/parallel/test-readline-interface.js index f141fe125d5224..e0619c76d7511c 100644 --- a/test/parallel/test-readline-interface.js +++ b/test/parallel/test-readline-interface.js @@ -716,11 +716,7 @@ function isWarned(emitter) { fi.emit('keypress', '.', { name: 'right' }); cursorPos = rli.getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - if (common.hasIntl) { - assert.strictEqual(cursorPos.cols, 2); - } else { - assert.strictEqual(cursorPos.cols, 1); - } + assert.strictEqual(cursorPos.cols, 2); rli.on('line', common.mustCall((line) => { assert.strictEqual(line, 'πŸ’»'); @@ -749,14 +745,7 @@ function isWarned(emitter) { fi.emit('data', 'πŸ•'); cursorPos = rli.getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - - if (common.hasIntl) { - assert.strictEqual(cursorPos.cols, 2); - } else { - assert.strictEqual(cursorPos.cols, 1); - // Fix cursor position without internationalization - fi.emit('keypress', '.', { name: 'left' }); - } + assert.strictEqual(cursorPos.cols, 2); rli.on('line', common.mustCall((line) => { assert.strictEqual(line, 'πŸ•πŸ’»'); @@ -780,22 +769,12 @@ function isWarned(emitter) { fi.emit('keypress', '.', { name: 'right' }); let cursorPos = rli.getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - if (common.hasIntl) { - assert.strictEqual(cursorPos.cols, 2); - } else { - assert.strictEqual(cursorPos.cols, 1); - // Fix cursor position without internationalization - fi.emit('keypress', '.', { name: 'right' }); - } + assert.strictEqual(cursorPos.cols, 2); fi.emit('data', 'πŸ•'); cursorPos = rli.getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - if (common.hasIntl) { - assert.strictEqual(cursorPos.cols, 4); - } else { - assert.strictEqual(cursorPos.cols, 2); - } + assert.strictEqual(cursorPos.cols, 4); rli.on('line', common.mustCall((line) => { assert.strictEqual(line, 'πŸ’»πŸ•'); @@ -957,11 +936,7 @@ function isWarned(emitter) { fi.emit('data', 'πŸ’»'); let cursorPos = rli.getCursorPos(); assert.strictEqual(cursorPos.rows, 0); - if (common.hasIntl) { - assert.strictEqual(cursorPos.cols, 2); - } else { - assert.strictEqual(cursorPos.cols, 1); - } + assert.strictEqual(cursorPos.cols, 2); // Delete left character fi.emit('keypress', '.', { ctrl: true, name: 'h' }); cursorPos = rli.getCursorPos(); @@ -1144,27 +1119,24 @@ function isWarned(emitter) { } } - // isFullWidthCodePoint() should return false for non-numeric values - [true, false, null, undefined, {}, [], 'あ'].forEach((v) => { - assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'), false); - }); - // Wide characters should be treated as two columns. - assert.strictEqual(internalReadline.isFullWidthCodePoint('a'.charCodeAt(0)), - false); - assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'.charCodeAt(0)), - true); - assert.strictEqual(internalReadline.isFullWidthCodePoint('θ°’'.charCodeAt(0)), - true); - assert.strictEqual(internalReadline.isFullWidthCodePoint('κ³ '.charCodeAt(0)), - true); - assert.strictEqual(internalReadline.isFullWidthCodePoint(0x1f251), true); + assert.strictEqual(internalReadline.getStringWidth('a'), 1); + assert.strictEqual(internalReadline.getStringWidth('あ'), 2); + assert.strictEqual(internalReadline.getStringWidth('θ°’'), 2); + assert.strictEqual(internalReadline.getStringWidth('κ³ '), 2); + assert.strictEqual( + internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2); assert.strictEqual(internalReadline.getStringWidth('abcde'), 5); assert.strictEqual(internalReadline.getStringWidth('叀池や'), 6); assert.strictEqual(internalReadline.getStringWidth('γƒŽγƒΌγƒ‰.js'), 9); assert.strictEqual(internalReadline.getStringWidth('δ½ ε₯½'), 4); assert.strictEqual(internalReadline.getStringWidth('μ•ˆλ…•ν•˜μ„Έμš”'), 10); assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5); + assert.strictEqual(internalReadline.getStringWidth('πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦'), 8); + assert.strictEqual(internalReadline.getStringWidth('πŸ•π·γ‚πŸ’»πŸ˜€'), 9); + // TODO(BridgeAR): This should have a width of 4. + assert.strictEqual(internalReadline.getStringWidth('⓬β“ͺ'), 2); + assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0); // Check if vt control chars are stripped assert.strictEqual(