Skip to content

Commit

Permalink
feat: support multi-stop token in 'until', macro delimiters
Browse files Browse the repository at this point in the history
Until argspec's behavior was fixed, and while doing so, a support for
multi-token stops was added. Also, now it properly supports macro
delimiters (which was really just a by-product of applying uniform
treatment to any logic related to finding braces). This fixes
#46.
  • Loading branch information
theseanl committed Jan 20, 2024
1 parent 191c53c commit e416cd8
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ export function gobbleSingleArgument(
}
};

const openMark = parseBrace(argSpec.openBrace);
const closeMark = parseBrace(argSpec.closeBrace);
const openMark = parseToken(argSpec.openBrace);
const closeMark = parseToken(argSpec.closeBrace);

// Only mandatory arguments can be wrapped in {...}.
// Since we already parse such things as groups, we need to
Expand Down Expand Up @@ -148,13 +148,10 @@ export function gobbleSingleArgument(
break;
}
case "until": {
const stopTokens: (string | Ast.Whitespace)[] = argSpec.stopTokens.map(rawToken => {
if (rawToken === " ") {
return { type: "whitespace" };
}
return rawToken;
});

const stopTokens = argSpec.stopTokens.map(parseToken);
// TODO: in order to match xparse's behavior, multiple spaces at the start
// or in a middle should be collapsed to a single whitespace token,
// and spaces at the end should be ignored.
let nextStartPos = startPos;
let bracePos: [number, number] | undefined;
while (nextStartPos < nodes.length) {
Expand Down Expand Up @@ -189,7 +186,9 @@ export function gobbleSingleArgument(
openMark: "",
closeMark: printRaw(argSpec.stopTokens),
});
currPos = bracePos[1];
// Since `stopTokens` may comprise of more than one token,
// we need to advance `currPos` further
currPos = bracePos[1] + stopTokens.length - 1;
if (currPos < nodes.length) {
currPos++;
}
Expand Down Expand Up @@ -332,13 +331,11 @@ function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPo
return closeMarkPos;
}

function parseBrace(str: string | undefined): string | Ast.Macro {
function parseToken(str: string | undefined): string | Ast.Whitespace | Ast.Macro {
if (!str) { return ""; }
if (!str.trim()) { return { type: "whitespace" }; }
if (str.startsWith("\\")) {
return {
type: "macro",
content: str.slice(1)
}
return { type: "macro", content: str.slice(1) };
}
return str;
}
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ describe("unified-latex-util-arguments", () => {
{ type: "string", content: "]" },
{ type: "string", content: "y" },
];

expect(
gobbleSingleArgument([...ast], parseArgspec("!o")[0])
).toMatchObject({
Expand Down Expand Up @@ -599,6 +600,38 @@ describe("unified-latex-util-arguments", () => {
});
expect(nodes).toEqual([{ content: "yx", type: "string" }]);
});
it("can gobble an 'until' argument with multiple stop tokens", () => {
let argspec = parseArgspec("u{a \\bcd}")[0];
value = "asdf asydfxya{x}sa \\bcd2df";
file = processLatexToAstViaUnified().processSync({ value });
let nodes = trimRenderInfo((file.result as any).content) as Ast.Node[];
expect(gobbleSingleArgument(nodes, argspec)).toEqual({
argument: {
type: "argument",
content: [
// Due to a current implementation of gobbleSingleArgument,
// we may introduce extra string split during the search.
{ "type": "string", "content": "a" },
{ "type": "string", "content": "sdf" },
{ "type": "whitespace" },
{ "type": "string", "content": "a" },
{ "type": "string", "content": "sydfxy" },
{ "type": "string", "content": "a" },
{ "type": "group", "content": [{ "type": "string", "content": "x" }] },
{ "type": "string", "content": "s" },
],
openMark: "",
closeMark: "a \\bcd",
},
nodesRemoved: 11,
});
expect(nodes).toEqual([
{
"type": "string",
"content": "2df"
}
]);
});
it("gobbleSingleArgument gobbles non-punctuation delimited arguments", () => {
let ast: Ast.Node[] = [
{ type: "whitespace" },
Expand Down Expand Up @@ -692,6 +725,41 @@ describe("unified-latex-util-arguments", () => {
}
);
});
it("gobbleSingleArgument gobbles arguments delimited by tokens", () => {
let ast: Ast.Node[] = [
{ "type": "macro", "content": "a" },
{ "type": "group", "content": [{ "type": "string", "content": "123" }] },
{ "type": "string", "content": "1" }
];
expect(gobbleSingleArgument(ast, parseArgspec("r\\a{ 1 }")[0])).toMatchObject(
{
argument: {
type: "argument",
content: [{ type: "group", content: [{ type: "string", content: "123" }] }],
openMark: "\\a",
closeMark: "1",
},
nodesRemoved: 3,
}
);

ast = [
{ "type": "macro", "content": "abc" },
{ "type": "string", "content": "123" },
{ "type": "macro", "content": "def" }
];
expect(gobbleSingleArgument(ast, parseArgspec("r\\abc\\def")[0])).toMatchObject(
{
argument: {
type: "argument",
content: [{ type: "string", content: "123" }],
openMark: "\\abc",
closeMark: "\\def",
},
nodesRemoved: 3,
}
);
})
it("can gobble embellishments", () => {
let ast: Ast.Node[] = [{ type: "string", content: "xxx" }];
expect(gobbleSingleArgument(ast, parseArgspec("e{}")[0])).toMatchObject(
Expand Down
15 changes: 7 additions & 8 deletions packages/unified-latex-util-pegjs/grammars/xparse-argspec.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ required
}
/ "r" braceSpec:brace_spec { return createNode("mandatory", braceSpec); }
// An "until" argument gobbles tokens until the specified stop token(s)
// An "until" argument gobbles tokens until the specified stop token(s). Until token allows whitespace.
until
= "u" stopTokens:(x:until_token { return [x] } / '{' @(until_token+) '}') {
= "u" stopTokens:(x:token { return [x] } / '{' @(token_or_whitespace+) '}') {
return createNode("until", { stopTokens });
}
Expand All @@ -107,7 +107,7 @@ brace_spec
/ "{}" { return { openBrace: "{", closeBrace: "}"}}
braced_group
= "{" content:(macro_name / non_brace / braced_group)* "}" {
= "{" content:( token_or_whitespace / braced_group)* "}" {
return content;
}
Expand All @@ -122,13 +122,12 @@ macro_name
token
= macro_name / non_brace
// Until token allows whitespace
until_token
= macro_name / ![{}] @.
token_or_whitespace
= token / whitespace_token
// No need to separate individual characters here
// No need to separate individual characters here, just need to trim enclosing whitespaces
group
= x:braced_group { return x.map(arrayContent).join(''); }
= x:braced_group { return x.map(arrayContent).join('').trim(); }
token_or_group
= token / group
Expand Down
6 changes: 4 additions & 2 deletions packages/unified-latex-util-scan/libs/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export function scan(
*/
endIndex?: number;
/**
* If `true`, whitespace and comments will be skilled but any other
* If `true`, whitespace and comments will be skipped but any other
* node that doesn't match `token` will cause the scan to terminate.
*/
onlySkipWhitespaceAndComments?: boolean;
Expand All @@ -36,8 +36,10 @@ export function scan(
if (typeof token === "string") {
token = { type: "string", content: token } as Ast.String;
}
const start = typeof startIndex === "number" ? startIndex : 0;
const end = typeof endIndex === "number" ? endIndex : nodes.length - 1;

for (let i = startIndex || 0; i <= (endIndex || nodes.length - 1); i++) {
for (let i = start; i <= end; i++) {
const node = nodes[i];
if (node.type === token.type) {
switch (node.type) {
Expand Down

0 comments on commit e416cd8

Please sign in to comment.