Skip to content

Commit

Permalink
util,readline: NFC-normalize strings before getStringWidth
Browse files Browse the repository at this point in the history
The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.

PR-URL: #33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
addaleax authored and BridgeAR committed Apr 28, 2020
1 parent 0f4d513 commit c239cc6
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
15 changes: 10 additions & 5 deletions lib/internal/util/inspect.js
Expand Up @@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
return str;
}

function prepareStringForGetStringWidth(str, removeControlChars) {
str = str.normalize('NFC');
if (removeControlChars)
str = stripVTControlCharacters(str);
return str;
}

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
Expand All @@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
Expand All @@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;

if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-icu-stringwidth.js
Expand Up @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1);
}
}

{
const a = '한글'.normalize('NFD'); // 한글
const b = '한글'.normalize('NFC'); // 한글
assert.strictEqual(a.length, 6);
assert.strictEqual(b.length, 2);
assert.strictEqual(getStringWidth(a), 4);
assert.strictEqual(getStringWidth(b), 4);
}

0 comments on commit c239cc6

Please sign in to comment.