Skip to content

Commit

Permalink
feat: support multi-stop token in 'until', macro delimiters
Browse files Browse the repository at this point in the history
Until argspec's behavior was fixed, and while doing so, a support for
multi-token stops was added. Also, now it properly supports macro
delimiters (which was really just a by-product of applying uniform
treatment to any logic related to finding braces). This fixes
#46.
  • Loading branch information
theseanl committed Jan 20, 2024
1 parent 191c53c commit 2067555
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 49 deletions.
5 changes: 4 additions & 1 deletion packages/unified-latex-util-argspec/libs/argspec-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ export type Node = Optional | Mandatory | Verbatim | Body | Until;
type Optional = OptionalArg | OptionalStar | OptionalToken | Embellishment;

// Make several optional properties available in all `AstNode`s
interface AstNode extends Partial<Arg>, Partial<LeadingWhitespace>, DefaultArgument {
interface AstNode
extends Partial<Arg>,
Partial<LeadingWhitespace>,
DefaultArgument {
type: string;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,18 @@ export function gobbleSingleArgument(
// If `argSpec` specifies leading whitespace is not allowed,
// this function does nothing.
const gobbleWhitespace = argSpec.noLeadingWhitespace
? () => { }
? () => {}
: () => {
while (currPos < nodes.length) {
if (!match.whitespace(nodes[currPos])) {
break;
}
currPos++;
}
};
while (currPos < nodes.length) {
if (!match.whitespace(nodes[currPos])) {
break;
}
currPos++;
}
};

const openMark = parseBrace(argSpec.openBrace);
const closeMark = parseBrace(argSpec.closeBrace);
const openMark = parseToken(argSpec.openBrace);
const closeMark = parseToken(argSpec.closeBrace);

// Only mandatory arguments can be wrapped in {...}.
// Since we already parse such things as groups, we need to
Expand Down Expand Up @@ -148,13 +148,10 @@ export function gobbleSingleArgument(
break;
}
case "until": {
const stopTokens: (string | Ast.Whitespace)[] = argSpec.stopTokens.map(rawToken => {
if (rawToken === " ") {
return { type: "whitespace" };
}
return rawToken;
});

const stopTokens = argSpec.stopTokens.map(parseToken);
// TODO: in order to match xparse's behavior, multiple spaces at the start
// or in a middle should be collapsed to a single whitespace token,
// and spaces at the end should be ignored.
let nextStartPos = startPos;
let bracePos: [number, number] | undefined;
while (nextStartPos < nodes.length) {
Expand All @@ -164,7 +161,9 @@ export function gobbleSingleArgument(
undefined,
stopTokens[0]
);
if (!bracePos) { break; }
if (!bracePos) {
break;
}
let nextBracePos: [number, number] | undefined = bracePos;
let i = 1;
for (; i < stopTokens.length && nextBracePos; i++) {
Expand All @@ -176,7 +175,9 @@ export function gobbleSingleArgument(
/* endPos */ nextBracePos[1] + 1
);
}
if (i >= stopTokens.length && nextBracePos) { break; }
if (i >= stopTokens.length && nextBracePos) {
break;
}
nextStartPos = bracePos[0] + 1;
}

Expand All @@ -189,7 +190,9 @@ export function gobbleSingleArgument(
openMark: "",
closeMark: printRaw(argSpec.stopTokens),
});
currPos = bracePos[1];
// Since `stopTokens` may comprise of more than one token,
// we need to advance `currPos` further
currPos = bracePos[1] + stopTokens.length - 1;
if (currPos < nodes.length) {
currPos++;
}
Expand Down Expand Up @@ -245,7 +248,7 @@ function cloneStringNode(node: Ast.String, content: string): Ast.String {
return Object.assign({}, node, { content });
}

type Braces = string | Ast.Macro | Ast.Whitespace
type Braces = string | Ast.Macro | Ast.Whitespace;
/**
* Find the position of the open brace and the closing brace.
* Returns undefined if the brace isn't found.
Expand All @@ -263,7 +266,9 @@ function findBracePositions(
let closeMarkPos: number | undefined = startPos;
if (openMark) {
openMarkPos = findDelimiter(nodes, openMark, openMarkPos, openMarkPos);
if (openMarkPos === undefined) { return; }
if (openMarkPos === undefined) {
return;
}
closeMarkPos = openMarkPos + 1;
}
if (!closeMark) {
Expand All @@ -285,18 +290,27 @@ function findBracePositions(
}
return [openMarkPos, closeMarkPos];
}
closeMarkPos = findDelimiter(nodes, closeMark, closeMarkPos, endPos)
if (closeMarkPos === undefined) { return; }
closeMarkPos = findDelimiter(nodes, closeMark, closeMarkPos, endPos);
if (closeMarkPos === undefined) {
return;
}
return [openMarkPos, closeMarkPos];
}

function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPos?: number): number | undefined {
function findDelimiter(
nodes: Ast.Node[],
token: Braces,
startPos: number,
endPos?: number
): number | undefined {
let closeMarkPos = scan(nodes, token, {
startIndex: startPos,
allowSubstringMatches: true,
endIndex: endPos
endIndex: endPos,
});
if (closeMarkPos === null) { return; }
if (closeMarkPos === null) {
return;
}
const closingNode = nodes[closeMarkPos];
if (match.anyString(closingNode) && typeof token === "string") {
const closingNodeContent = closingNode.content;
Expand All @@ -308,7 +322,9 @@ function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPo
// `closeMarkPos` need to be increased, so double-check that we are bounded by
// `endPos` before mutating `nodes`. `closeMarkPos` is already less than or equal to `endPos`,
// so we only need to check for an equality here.
if (closeMarkPos === endPos) { return; }
if (closeMarkPos === endPos) {
return;
}
nodes.splice(
closeMarkPos,
0,
Expand All @@ -332,13 +348,17 @@ function findDelimiter(nodes: Ast.Node[], token: Braces, startPos: number, endPo
return closeMarkPos;
}

function parseBrace(str: string | undefined): string | Ast.Macro {
if (!str) { return ""; }
function parseToken(
str: string | undefined
): string | Ast.Whitespace | Ast.Macro {
if (!str) {
return "";
}
if (!str.trim()) {
return { type: "whitespace" };
}
if (str.startsWith("\\")) {
return {
type: "macro",
content: str.slice(1)
}
return { type: "macro", content: str.slice(1) };
}
return str;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ describe("unified-latex-util-arguments", () => {
{ type: "string", content: "]" },
{ type: "string", content: "y" },
];

expect(
gobbleSingleArgument([...ast], parseArgspec("!o")[0])
).toMatchObject({
Expand Down Expand Up @@ -599,6 +600,41 @@ describe("unified-latex-util-arguments", () => {
});
expect(nodes).toEqual([{ content: "yx", type: "string" }]);
});
it("can gobble an 'until' argument with multiple stop tokens", () => {
let argspec = parseArgspec("u{a \\bcd}")[0];
value = "asdf asydfxya{x}sa \\bcd2df";
file = processLatexToAstViaUnified().processSync({ value });
let nodes = trimRenderInfo((file.result as any).content) as Ast.Node[];
expect(gobbleSingleArgument(nodes, argspec)).toEqual({
argument: {
type: "argument",
content: [
// Due to a current implementation of gobbleSingleArgument,
// we may introduce extra string split during the search.
{ type: "string", content: "a" },
{ type: "string", content: "sdf" },
{ type: "whitespace" },
{ type: "string", content: "a" },
{ type: "string", content: "sydfxy" },
{ type: "string", content: "a" },
{
type: "group",
content: [{ type: "string", content: "x" }],
},
{ type: "string", content: "s" },
],
openMark: "",
closeMark: "a \\bcd",
},
nodesRemoved: 11,
});
expect(nodes).toEqual([
{
type: "string",
content: "2df",
},
]);
});
it("gobbleSingleArgument gobbles non-punctuation delimited arguments", () => {
let ast: Ast.Node[] = [
{ type: "whitespace" },
Expand Down Expand Up @@ -692,6 +728,46 @@ describe("unified-latex-util-arguments", () => {
}
);
});
it("gobbleSingleArgument gobbles arguments delimited by tokens", () => {
let ast: Ast.Node[] = [
{ type: "macro", content: "a" },
{ type: "group", content: [{ type: "string", content: "123" }] },
{ type: "string", content: "1" },
];
expect(
gobbleSingleArgument(ast, parseArgspec("r\\a{ 1 }")[0])
).toMatchObject({
argument: {
type: "argument",
content: [
{
type: "group",
content: [{ type: "string", content: "123" }],
},
],
openMark: "\\a",
closeMark: "1",
},
nodesRemoved: 3,
});

ast = [
{ type: "macro", content: "abc" },
{ type: "string", content: "123" },
{ type: "macro", content: "def" },
];
expect(
gobbleSingleArgument(ast, parseArgspec("r\\abc\\def")[0])
).toMatchObject({
argument: {
type: "argument",
content: [{ type: "string", content: "123" }],
openMark: "\\abc",
closeMark: "\\def",
},
nodesRemoved: 3,
});
});
it("can gobble embellishments", () => {
let ast: Ast.Node[] = [{ type: "string", content: "xxx" }];
expect(gobbleSingleArgument(ast, parseArgspec("e{}")[0])).toMatchObject(
Expand Down
15 changes: 7 additions & 8 deletions packages/unified-latex-util-pegjs/grammars/xparse-argspec.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ required
}
/ "r" braceSpec:brace_spec { return createNode("mandatory", braceSpec); }
// An "until" argument gobbles tokens until the specified stop token(s)
// An "until" argument gobbles tokens until the specified stop token(s). Until token allows whitespace.
until
= "u" stopTokens:(x:until_token { return [x] } / '{' @(until_token+) '}') {
= "u" stopTokens:(x:token { return [x] } / '{' @(token_or_whitespace+) '}') {
return createNode("until", { stopTokens });
}
Expand All @@ -107,7 +107,7 @@ brace_spec
/ "{}" { return { openBrace: "{", closeBrace: "}"}}
braced_group
= "{" content:(macro_name / non_brace / braced_group)* "}" {
= "{" content:( token_or_whitespace / braced_group)* "}" {
return content;
}
Expand All @@ -122,13 +122,12 @@ macro_name
token
= macro_name / non_brace
// Until token allows whitespace
until_token
= macro_name / ![{}] @.
token_or_whitespace
= token / whitespace_token
// No need to separate individual characters here
// No need to separate individual characters here, just need to trim enclosing whitespaces
group
= x:braced_group { return x.map(arrayContent).join(''); }
= x:braced_group { return x.map(arrayContent).join('').trim(); }
token_or_group
= token / group
Expand Down
14 changes: 10 additions & 4 deletions packages/unified-latex-util-scan/libs/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export function scan(
*/
endIndex?: number;
/**
* If `true`, whitespace and comments will be skilled but any other
* If `true`, whitespace and comments will be skipped but any other
* node that doesn't match `token` will cause the scan to terminate.
*/
onlySkipWhitespaceAndComments?: boolean;
Expand All @@ -31,13 +31,19 @@ export function scan(
allowSubstringMatches?: boolean;
}
): number | null {
const { startIndex, endIndex, onlySkipWhitespaceAndComments, allowSubstringMatches } =
options || {};
const {
startIndex,
endIndex,
onlySkipWhitespaceAndComments,
allowSubstringMatches,
} = options || {};
if (typeof token === "string") {
token = { type: "string", content: token } as Ast.String;
}
const start = typeof startIndex === "number" ? startIndex : 0;
const end = typeof endIndex === "number" ? endIndex : nodes.length - 1;

for (let i = startIndex || 0; i <= (endIndex || nodes.length - 1); i++) {
for (let i = start; i <= end; i++) {
const node = nodes[i];
if (node.type === token.type) {
switch (node.type) {
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.build.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
"moduleResolution": "Bundler",
"resolveJsonModule": true,
"customConditions": ["_bundle"],
"forceConsistentCasingInFileNames": true,
"forceConsistentCasingInFileNames": true
}
}

0 comments on commit 2067555

Please sign in to comment.