From 79e2721800ef899089b98250918b9a13011c041d Mon Sep 17 00:00:00 2001 From: Ika Date: Thu, 24 Jan 2019 21:13:02 +0800 Subject: [PATCH 1/4] test: add tests --- tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap | 4 ++++ tests/html_whitespace/non-breaking-whitespace.html | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap b/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap index ac9e673c06e5..6cabd49ea65b 100644 --- a/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap +++ b/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap @@ -203,6 +203,8 @@ printWidth: 80 Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. + +Prix : 32 € =====================================output===================================== @@ -214,6 +216,8 @@ printWidth: 80 Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. + +Prix : 32 € ================================================================================ `; diff --git a/tests/html_whitespace/non-breaking-whitespace.html b/tests/html_whitespace/non-breaking-whitespace.html index 371f0eeae4b8..55a8b9477611 100644 --- a/tests/html_whitespace/non-breaking-whitespace.html +++ b/tests/html_whitespace/non-breaking-whitespace.html @@ -2,3 +2,5 @@ Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. + +Prix : 32 € From 96b684c5900c53445db8cd81116c59d035e4dfc2 Mon Sep 17 00:00:00 2001 From: Ika Date: Thu, 24 Jan 2019 21:28:59 +0800 Subject: [PATCH 2/4] fix(html): do not format non-normal whitespaces as normal whitespaces --- src/language-html/printer-html.js | 3 +-- tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/language-html/printer-html.js b/src/language-html/printer-html.js index 896b54a3715a..292ca8384d6f 100644 --- a/src/language-html/printer-html.js +++ b/src/language-html/printer-html.js @@ -900,8 +900,7 @@ function getTextValueParts(node, value = node.value) { dedentString(value.replace(/^\s*?\n|\n\s*?$/g, "")), hardline ) - : // non-breaking whitespace: 0xA0 - join(line, value.split(/[^\S\xA0]+/)).parts; + : join(line, value.split(/[ \t\n]+/)).parts; } function printEmbeddedAttributeValue(node, originalTextToDoc, options) { diff --git a/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap b/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap index 6cabd49ea65b..4e8a8a5339be 100644 --- a/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap +++ b/tests/html_whitespace/__snapshots__/jsfmt.spec.js.snap @@ -217,7 +217,7 @@ printWidth: 80 >Nihil aut odit omnis. Quam maxime est molestiae. Maxime dolorem dolores voluptas quaerat ut qui sunt vitae error. -Prix : 32 € +Prix : 32 € ================================================================================ `; From 581c000908f83b839103264475112e579c93ab09 Mon Sep 17 00:00:00 2001 From: Ika Date: Thu, 24 Jan 2019 21:37:21 +0800 Subject: [PATCH 3/4] docs: add changelog item --- CHANGELOG.unreleased.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.unreleased.md b/CHANGELOG.unreleased.md index 7b5a8f5be608..22e151508167 100644 --- a/CHANGELOG.unreleased.md +++ b/CHANGELOG.unreleased.md @@ -41,3 +41,30 @@ Examples: ``` --> + +- HTML: Do not format non-normal whitespaces as normal whitespaces ([#5797] by [@ikatyang]) + + Previously, only non-breaking whitespaces (U+00A0) are marked as non-normal whitespace, + which means other non-normal whitespaces such as non-breaking narrow whitespaces (U+202F) + could be formatted as normal whitespaces, which breaks the output. Instead of using blacklist, + we now use whitelist to mark every whitespace that is not + + - standard whitespace + - line break + - tab + + as non-normal whitespace. + + (`·` represents a non-breaking narrow whitespace) + + + ```html + + Prix·:·32·€ + + + Prix : 32 € + + + Prix·:·32·€ + ``` From 6a760820f7dd7859df5b2842b65dbbf0e184fc52 Mon Sep 17 00:00:00 2001 From: Ika Date: Mon, 28 Jan 2019 15:56:35 +0800 Subject: [PATCH 4/4] fix: ASCII whitespace --- CHANGELOG.unreleased.md | 12 +++--------- src/language-html/printer-html.js | 3 ++- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.unreleased.md b/CHANGELOG.unreleased.md index 22e151508167..a87822287d75 100644 --- a/CHANGELOG.unreleased.md +++ b/CHANGELOG.unreleased.md @@ -42,18 +42,12 @@ Examples: --> -- HTML: Do not format non-normal whitespaces as normal whitespaces ([#5797] by [@ikatyang]) +- HTML: Do not format non-normal whitespace as normal whitespace ([#5797] by [@ikatyang]) Previously, only non-breaking whitespaces (U+00A0) are marked as non-normal whitespace, which means other non-normal whitespaces such as non-breaking narrow whitespaces (U+202F) - could be formatted as normal whitespaces, which breaks the output. Instead of using blacklist, - we now use whitelist to mark every whitespace that is not - - - standard whitespace - - line break - - tab - - as non-normal whitespace. + could be formatted as normal whitespaces, which breaks the output. We now follow the spec to + exclude all non-[ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace) from whitespace normalization. (`·` represents a non-breaking narrow whitespace) diff --git a/src/language-html/printer-html.js b/src/language-html/printer-html.js index 292ca8384d6f..20f4bc1b1405 100644 --- a/src/language-html/printer-html.js +++ b/src/language-html/printer-html.js @@ -900,7 +900,8 @@ function getTextValueParts(node, value = node.value) { dedentString(value.replace(/^\s*?\n|\n\s*?$/g, "")), hardline ) - : join(line, value.split(/[ \t\n]+/)).parts; + : // https://infra.spec.whatwg.org/#ascii-whitespace + join(line, value.split(/[\t\n\f\r ]+/)).parts; } function printEmbeddedAttributeValue(node, originalTextToDoc, options) {