Skip to content

Commit

Permalink
util,readline: NFC-normalize strings before getStringWidth
Browse files Browse the repository at this point in the history
The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.

PR-URL: #33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
addaleax authored and targos committed May 13, 2020
1 parent d217b79 commit e0e8a9a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
15 changes: 10 additions & 5 deletions lib/internal/util/inspect.js
Expand Up @@ -1914,6 +1914,13 @@ function formatWithOptions(inspectOptions, ...args) {
return str;
}

function prepareStringForGetStringWidth(str, removeControlChars) {
str = str.normalize('NFC');
if (removeControlChars)
str = stripVTControlCharacters(str);
return str;
}

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
Expand All @@ -1923,8 +1930,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
Expand All @@ -1944,9 +1951,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;

if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-icu-stringwidth.js
Expand Up @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1);
}
}

{
const a = '한글'.normalize('NFD'); // 한글
const b = '한글'.normalize('NFC'); // 한글
assert.strictEqual(a.length, 6);
assert.strictEqual(b.length, 2);
assert.strictEqual(getStringWidth(a), 4);
assert.strictEqual(getStringWidth(b), 4);
}

0 comments on commit e0e8a9a

Please sign in to comment.