Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "semantic line break" support (alternative to proseWrap: always) #16189

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/common/common-options.evaluate.js
Expand Up @@ -33,6 +33,10 @@ const options = {
value: "preserve",
description: "Wrap prose as-is.",
},
{
value: "sembr",
description: "Follow the 'Semantic Line Breaks' specification (https://sembr.org/).",
},
],
},
bracketSameLine: {
Expand Down
2 changes: 1 addition & 1 deletion src/index.d.ts
Expand Up @@ -384,7 +384,7 @@ export interface RequiredOptions extends doc.printer.Options {
* In some cases you may want to rely on editor/viewer soft wrapping instead, so this option allows you to opt out.
* @default "preserve"
*/
proseWrap: "always" | "never" | "preserve";
proseWrap: "always" | "never" | "preserve" | "sembr";
/**
* Include parentheses around a sole arrow function parameter.
* @default "always"
Expand Down
62 changes: 57 additions & 5 deletions src/language-markdown/print-whitespace.js
Expand Up @@ -11,7 +11,7 @@ import {
* @typedef {import("./utils.js").WhitespaceValue} WhitespaceValue
* @typedef {import("./utils.js").WordKind} WordKind
* @typedef {import("../common/ast-path.js").default} AstPath
* @typedef {"always" | "never" | "preserve"} ProseWrap
* @typedef {"always" | "never" | "preserve" | "sembr"} ProseWrap
* @typedef {{ next?: WordNode | null, previous?: WordNode | null }}
* AdjacentNodes Nodes adjacent to a `whitespace` node. Are always of type
* `word`.
Expand Down Expand Up @@ -58,6 +58,17 @@ const lineBreakBetweenTheseAndCJConvertsToSpace = new Set(
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
);

/**
* For semantic line breaks (see: https://sembr.org/), a line break MUST occur
* after period (.), exclamation mark (!), or question mark (?), and SHOULD
* occur after a comma (,), semicolon (;), colon (:), or em dash (—).
*
* Note that the spec itself distinguishes between independent and dependent
* clauses, however we cannot do that so we rely on punctuation and `MAY` in the
* spec.
*/
const sembrBreakAfter = new Set(".!?,;:—");

/**
* Determine the preferred style of spacing between Chinese or Japanese and non-CJK
* characters in the parent `sentence` node.
Expand Down Expand Up @@ -223,7 +234,7 @@ function isNonCJKOrKoreanLetter(kind) {
*/
function isBreakable(path, value, proseWrap, isLink, canBeSpace) {
if (
proseWrap !== "always" ||
(proseWrap !== "always" && proseWrap !== "sembr") ||
path.hasAncestor((node) => SINGLE_LINE_NODE_TYPES.has(node.type))
) {
return false;
Expand All @@ -233,8 +244,8 @@ function isBreakable(path, value, proseWrap, isLink, canBeSpace) {
return value !== "";
}

// Spaces are always breakable
if (value === " ") {
// Spaces are always breakable in 'always' mode
if (proseWrap === "always" && value === " ") {
return true;
}

Expand Down Expand Up @@ -265,6 +276,40 @@ function isBreakable(path, value, proseWrap, isLink, canBeSpace) {
return true;
}

/**
* Check whether whitespace must be printed as a linebreak. Only call this if
* `isBreakable(...)` returns `true`.
*
* @param {AstPath} path
* @param {WhitespaceValue} value
* @param {ProseWrap} proseWrap
* @param {boolean} isLink
* @param {boolean} canBeSpace
* @returns {boolean}
*/
function isForcedBreak(path, value, proseWrap, isLink, canBeSpace) {
if (proseWrap !== "sembr") {
return false;
}
/* // This check has already been made for us, no need to repeat it.
if (isBreakable(path, value, proseWrap, isLink, canBeSpace)) {
return false;
}
*/

/** @type {AdjacentNodes} */
const { previous } = path;

if (
previous?.kind === KIND_NON_CJK &&
sembrBreakAfter.has(previous.value.at(-1))
) {
return true;
}

return false;
}

/**
* @param {AstPath} path
* @param {WhitespaceValue} value
Expand All @@ -282,7 +327,14 @@ function printWhitespace(path, value, proseWrap, isLink) {
(value === "\n" && lineBreakCanBeConvertedToSpace(path, isLink));

if (isBreakable(path, value, proseWrap, isLink, canBeSpace)) {
return canBeSpace ? line : softline;
const forcedBreak = isForcedBreak(
path,
value,
proseWrap,
isLink,
canBeSpace,
);
return forcedBreak ? hardline : canBeSpace ? line : softline;
}

return canBeSpace ? " " : "";
Expand Down
2 changes: 1 addition & 1 deletion src/language-markdown/printer-markdown.js
Expand Up @@ -350,7 +350,7 @@ function genericPrint(path, options, print) {
];
}
case "definition": {
const lineOrSpace = options.proseWrap === "always" ? line : " ";
const lineOrSpace = options.proseWrap === "always" || options.proseWrap === "sembr" ? line : " ";
return group([
printLinkReference(node),
":",
Expand Down
1 change: 1 addition & 0 deletions src/language-yaml/print/mapping-item.js
Expand Up @@ -176,6 +176,7 @@ function isAbsolutelyPrintedAsSingleLineNode(node, options) {
switch (options.proseWrap) {
case "never":
return !node.value.includes("\n");
case "sembr":
case "always":
return !/[\n ]/.test(node.value);
default:
Expand Down