feat: support multi-stop token in 'until', macro delimiters

Until argspec's behavior was fixed, and while doing so, a support for multi-token stops was added. Also, now it properly supports macro delimiters (which was really just a by-product of applying uniform treatment to any logic related to finding braces). This fixes #46.
siefkenj · Jan 20, 2024 · 2067555 · 2067555
1 parent 191c53c
commit 2067555
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 49 deletions.
diff --git a/packages/unified-latex-util-argspec/libs/argspec-types.ts b/packages/unified-latex-util-argspec/libs/argspec-types.ts
@@ -3,7 +3,10 @@ export type Node = Optional | Mandatory | Verbatim | Body | Until;
 type Optional = OptionalArg | OptionalStar | OptionalToken | Embellishment;
 
 // Make several optional properties available in all `AstNode`s
-interface AstNode extends Partial<Arg>, Partial<LeadingWhitespace>, DefaultArgument {
+interface AstNode
+    extends Partial<Arg>,
+        Partial<LeadingWhitespace>,
+        DefaultArgument {
     type: string;
 }
 

diff --git a/packages/unified-latex-util-arguments/libs/gobble-single-argument.ts b/packages/unified-latex-util-arguments/libs/gobble-single-argument.ts
@@ -38,18 +38,18 @@ export function gobbleSingleArgument(
     // If `argSpec` specifies leading whitespace is not allowed,
     // this function does nothing.
     const gobbleWhitespace = argSpec.noLeadingWhitespace
-        ? () => { }
+        ? () => {}
         : () => {
-            while (currPos < nodes.length) {
-                if (!match.whitespace(nodes[currPos])) {
-                    break;
-                }
-                currPos++;
-            }
-        };
+              while (currPos < nodes.length) {
+                  if (!match.whitespace(nodes[currPos])) {
+                      break;
+                  }
+                  currPos++;
+              }
+          };
 
-    const openMark = parseBrace(argSpec.openBrace);
-    const closeMark = parseBrace(argSpec.closeBrace);
+    const openMark = parseToken(argSpec.openBrace);
+    const closeMark = parseToken(argSpec.closeBrace);
 
     // Only mandatory arguments can be wrapped in {...}.
     // Since we already parse such things as groups, we need to
@@ -148,13 +148,10 @@ export function gobbleSingleArgument(
                 break;
             }
             case "until": {
-                const stopTokens: (string | Ast.Whitespace)[] = argSpec.stopTokens.map(rawToken => {
-                    if (rawToken === " ") {
-                        return { type: "whitespace" };
-                    }
-                    return rawToken;
-                });
-
+                const stopTokens = argSpec.stopTokens.map(parseToken);
+                // TODO: in order to match xparse's behavior, multiple spaces at the start
+                // or in a middle should be collapsed to a single whitespace token,
+                // and spaces at the end should be ignored.
                 let nextStartPos = startPos;
                 let bracePos: [number, number] | undefined;
                 while (nextStartPos < nodes.length) {
@@ -164,7 +161,9 @@ export function gobbleSingleArgument(
                         undefined,
                         stopTokens[0]
                     );
-                    if (!bracePos) { break; }
+                    if (!bracePos) {
+                        break;
+                    }
                     let nextBracePos: [number, number] | undefined = bracePos;
                     let i = 1;
                     for (; i < stopTokens.length && nextBracePos; i++) {
@@ -176,7 +175,9 @@ export function gobbleSingleArgument(
                             /* endPos */ nextBracePos[1] + 1
                         );
                     }
-                    if (i >= stopTokens.length && nextBracePos) { break; }
+                    if (i >= stopTokens.length && nextBracePos) {
+                        break;
+                    }
                     nextStartPos = bracePos[0] + 1;
                 }
 
@@ -189,7 +190,9 @@ export function gobbleSingleArgument(
                     openMark: "",
                     closeMark: printRaw(argSpec.stopTokens),
                 });
-                currPos = bracePos[1];
+                // Since `stopTokens` may comprise of more than one token,
+                // we need to advance `currPos` further
+                currPos = bracePos[1] + stopTokens.length - 1;
                 if (currPos < nodes.length) {
                     currPos++;
                 }
@@ -245,7 +248,7 @@ function cloneStringNode(node: Ast.String, content: string): Ast.String {
     return Object.assign({}, node, { content });
 }
 
-type Braces = string | Ast.Macro | Ast.Whitespace
+type Braces = string | Ast.Macro | Ast.Whitespace;
 /**
  * Find the position of the open brace and the closing brace.
  * Returns undefined if the brace isn't found.
@@ -263,7 +266,9 @@ function findBracePositions(
     let closeMarkPos: number | undefined = startPos;
     if (openMark) {
         openMarkPos = findDelimiter(nodes, openMark, openMarkPos, openMarkPos);
-        if (openMarkPos === undefined) { return; }
+        if (openMarkPos === undefined) {
+            return;
+        }
         closeMarkPos = openMarkPos + 1;
     }
     if (!closeMark) {
@@ -285,18 +290,27 @@ function findBracePositions(
         }
         return [openMarkPos, closeMarkPos];
     }
-    closeMarkPos = findDelimiter(nodes, closeMark, closeMarkPos, endPos)
-    if (closeMarkPos === undefined) { return; }
+    closeMarkPos = findDelimiter(nodes, closeMark, closeMarkPos, endPos);
+    if (closeMarkPos === undefined) {
+        return;
+    }
     return [openMarkPos, closeMarkPos];
 }
 
-function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPos?: number): number | undefined {
+function findDelimiter(
+    nodes: Ast.Node[],
+    token: Braces,
+    startPos: number,
+    endPos?: number
+): number | undefined {
     let closeMarkPos = scan(nodes, token, {
         startIndex: startPos,
         allowSubstringMatches: true,
-        endIndex: endPos
+        endIndex: endPos,
     });
-    if (closeMarkPos === null) { return; }
+    if (closeMarkPos === null) {
+        return;
+    }
     const closingNode = nodes[closeMarkPos];
     if (match.anyString(closingNode) && typeof token === "string") {
         const closingNodeContent = closingNode.content;
@@ -308,7 +322,9 @@ function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPo
                 // `closeMarkPos` need to be increased, so double-check that we are bounded by
                 // `endPos` before mutating `nodes`. `closeMarkPos` is already less than or equal to `endPos`,
                 // so we only need to check for an equality here.
-                if (closeMarkPos === endPos) { return; }
+                if (closeMarkPos === endPos) {
+                    return;
+                }
                 nodes.splice(
                     closeMarkPos,
                     0,
@@ -332,13 +348,17 @@ function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPo
     return closeMarkPos;
 }
 
-function parseBrace(str: string | undefined): string | Ast.Macro {
-    if (!str) { return ""; }
+function parseToken(
+    str: string | undefined
+): string | Ast.Whitespace | Ast.Macro {
+    if (!str) {
+        return "";
+    }
+    if (!str.trim()) {
+        return { type: "whitespace" };
+    }
     if (str.startsWith("\\")) {
-        return {
-            type: "macro",
-            content: str.slice(1)
-        }
+        return { type: "macro", content: str.slice(1) };
     }
     return str;
-}
+}
diff --git a/packages/unified-latex-util-arguments/tests/gobble-single-argument.test.ts b/packages/unified-latex-util-arguments/tests/gobble-single-argument.test.ts
@@ -498,6 +498,7 @@ describe("unified-latex-util-arguments", () => {
             { type: "string", content: "]" },
             { type: "string", content: "y" },
         ];
+
         expect(
             gobbleSingleArgument([...ast], parseArgspec("!o")[0])
         ).toMatchObject({
@@ -599,6 +600,41 @@ describe("unified-latex-util-arguments", () => {
         });
         expect(nodes).toEqual([{ content: "yx", type: "string" }]);
     });
+    it("can gobble an 'until' argument with multiple stop tokens", () => {
+        let argspec = parseArgspec("u{a \\bcd}")[0];
+        value = "asdf asydfxya{x}sa \\bcd2df";
+        file = processLatexToAstViaUnified().processSync({ value });
+        let nodes = trimRenderInfo((file.result as any).content) as Ast.Node[];
+        expect(gobbleSingleArgument(nodes, argspec)).toEqual({
+            argument: {
+                type: "argument",
+                content: [
+                    // Due to a current implementation of gobbleSingleArgument,
+                    // we may introduce extra string split during the search.
+                    { type: "string", content: "a" },
+                    { type: "string", content: "sdf" },
+                    { type: "whitespace" },
+                    { type: "string", content: "a" },
+                    { type: "string", content: "sydfxy" },
+                    { type: "string", content: "a" },
+                    {
+                        type: "group",
+                        content: [{ type: "string", content: "x" }],
+                    },
+                    { type: "string", content: "s" },
+                ],
+                openMark: "",
+                closeMark: "a \\bcd",
+            },
+            nodesRemoved: 11,
+        });
+        expect(nodes).toEqual([
+            {
+                type: "string",
+                content: "2df",
+            },
+        ]);
+    });
     it("gobbleSingleArgument gobbles non-punctuation delimited arguments", () => {
         let ast: Ast.Node[] = [
             { type: "whitespace" },
@@ -692,6 +728,46 @@ describe("unified-latex-util-arguments", () => {
             }
         );
     });
+    it("gobbleSingleArgument gobbles arguments delimited by tokens", () => {
+        let ast: Ast.Node[] = [
+            { type: "macro", content: "a" },
+            { type: "group", content: [{ type: "string", content: "123" }] },
+            { type: "string", content: "1" },
+        ];
+        expect(
+            gobbleSingleArgument(ast, parseArgspec("r\\a{ 1 }")[0])
+        ).toMatchObject({
+            argument: {
+                type: "argument",
+                content: [
+                    {
+                        type: "group",
+                        content: [{ type: "string", content: "123" }],
+                    },
+                ],
+                openMark: "\\a",
+                closeMark: "1",
+            },
+            nodesRemoved: 3,
+        });
+
+        ast = [
+            { type: "macro", content: "abc" },
+            { type: "string", content: "123" },
+            { type: "macro", content: "def" },
+        ];
+        expect(
+            gobbleSingleArgument(ast, parseArgspec("r\\abc\\def")[0])
+        ).toMatchObject({
+            argument: {
+                type: "argument",
+                content: [{ type: "string", content: "123" }],
+                openMark: "\\abc",
+                closeMark: "\\def",
+            },
+            nodesRemoved: 3,
+        });
+    });
     it("can gobble embellishments", () => {
         let ast: Ast.Node[] = [{ type: "string", content: "xxx" }];
         expect(gobbleSingleArgument(ast, parseArgspec("e{}")[0])).toMatchObject(

diff --git a/packages/unified-latex-util-pegjs/grammars/xparse-argspec.pegjs b/packages/unified-latex-util-pegjs/grammars/xparse-argspec.pegjs
@@ -88,9 +88,9 @@ required
         }
     / "r" braceSpec:brace_spec { return createNode("mandatory", braceSpec); }
 
-// An "until" argument gobbles tokens until the specified stop token(s)
+// An "until" argument gobbles tokens until the specified stop token(s). Until token allows whitespace.
 until
-    = "u" stopTokens:(x:until_token { return [x] } / '{' @(until_token+) '}') {
+    = "u" stopTokens:(x:token { return [x] } / '{' @(token_or_whitespace+) '}') {
             return createNode("until", { stopTokens });
         }
 
@@ -107,7 +107,7 @@ brace_spec
     / "{}" { return { openBrace: "{", closeBrace: "}"}}
 
 braced_group
-    = "{" content:(macro_name / non_brace / braced_group)* "}" {
+    = "{" content:( token_or_whitespace / braced_group)* "}" {
             return content;
         }
 
@@ -122,13 +122,12 @@ macro_name
 token
     = macro_name / non_brace
 
-// Until token allows whitespace
-until_token
-	= macro_name / ![{}] @.
+token_or_whitespace
+	= token / whitespace_token
 
-// No need to separate individual characters here
+// No need to separate individual characters here, just need to trim enclosing whitespaces
 group
-    = x:braced_group { return x.map(arrayContent).join(''); }
+    = x:braced_group { return x.map(arrayContent).join('').trim(); }
 
 token_or_group
     = token / group

diff --git a/packages/unified-latex-util-scan/libs/scan.ts b/packages/unified-latex-util-scan/libs/scan.ts
@@ -20,7 +20,7 @@ export function scan(
          */
         endIndex?: number;
         /**
-         * If `true`, whitespace and comments will be skilled but any other
+         * If `true`, whitespace and comments will be skipped but any other
          * node that doesn't match `token` will cause the scan to terminate.
          */
         onlySkipWhitespaceAndComments?: boolean;
@@ -31,13 +31,19 @@ export function scan(
         allowSubstringMatches?: boolean;
     }
 ): number | null {
-    const { startIndex, endIndex, onlySkipWhitespaceAndComments, allowSubstringMatches } =
-        options || {};
+    const {
+        startIndex,
+        endIndex,
+        onlySkipWhitespaceAndComments,
+        allowSubstringMatches,
+    } = options || {};
     if (typeof token === "string") {
         token = { type: "string", content: token } as Ast.String;
     }
+    const start = typeof startIndex === "number" ? startIndex : 0;
+    const end = typeof endIndex === "number" ? endIndex : nodes.length - 1;
 
-    for (let i = startIndex || 0; i <= (endIndex || nodes.length - 1); i++) {
+    for (let i = start; i <= end; i++) {
         const node = nodes[i];
         if (node.type === token.type) {
             switch (node.type) {

diff --git a/tsconfig.build.json b/tsconfig.build.json
@@ -30,6 +30,6 @@
         "moduleResolution": "Bundler",
         "resolveJsonModule": true,
         "customConditions": ["_bundle"],
-        "forceConsistentCasingInFileNames": true,
+        "forceConsistentCasingInFileNames": true
     }
 }