Skip to content

Commit

Permalink
util,readline: NFC-normalize strings before getStringWidth
Browse files Browse the repository at this point in the history
The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.

PR-URL: #33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
addaleax authored and BethGriggs committed Apr 28, 2020
1 parent 4abc45a commit c82c084
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
15 changes: 10 additions & 5 deletions lib/internal/util/inspect.js
Expand Up @@ -1922,6 +1922,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
return str;
}

function prepareStringForGetStringWidth(str, removeControlChars) {
str = str.normalize('NFC');
if (removeControlChars)
str = stripVTControlCharacters(str);
return str;
}

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
Expand All @@ -1931,8 +1938,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;
if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
Expand All @@ -1952,9 +1959,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0;

if (removeControlChars)
str = stripVTControlCharacters(str);

str = prepareStringForGetStringWidth(str, removeControlChars);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-icu-stringwidth.js
Expand Up @@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1);
}
}

{
const a = '한글'.normalize('NFD'); // 한글
const b = '한글'.normalize('NFC'); // 한글
assert.strictEqual(a.length, 6);
assert.strictEqual(b.length, 2);
assert.strictEqual(getStringWidth(a), 4);
assert.strictEqual(getStringWidth(b), 4);
}

0 comments on commit c82c084

Please sign in to comment.