Skip to content

Commit

Permalink
fix: parsing error in EOF after 4 quotes (#182)
Browse files Browse the repository at this point in the history
* fix: parsing error in EOF after 4 quotes

* Create swift-cars-yawn.md
  • Loading branch information
ota-meshi committed Nov 11, 2023
1 parent c5c393c commit 4cab5da
Show file tree
Hide file tree
Showing 19 changed files with 815 additions and 105 deletions.
5 changes: 5 additions & 0 deletions .changeset/swift-cars-yawn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"toml-eslint-parser": patch
---

fix: parsing error in EOF after 4 quotes
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"eslint-fix": "npm run lint -- --fix",
"test": "env-cmd -e tz mocha --require ts-node/register \"tests/src/**/*.ts\" --reporter dot --timeout 60000",
"cover": "nyc --reporter=lcov npm run test",
"debug": "env-cmd -e tz mocha --require ts-node/register/transpile-only --inspect \"tests/src/**/*.ts\" --reporter dot",
"debug": "env-cmd -e tz mocha --require ts-node/register/transpile-only \"tests/src/**/*.ts\" --reporter dot",
"preversion": "npm run lint && npm test",
"update-fixtures": "env-cmd -e tz ts-node --transpile-only ./tools/update-fixtures.ts",
"benchmark": "ts-node --transpile-only benchmark/index.ts",
Expand Down
77 changes: 22 additions & 55 deletions src/tokenizer/code-point-iterator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,76 +35,43 @@ export class CodePointIterator {
return CodePoint.EOF;
}

this.start.offset = this.end.offset;
this.start.line = this.end.line;
this.start.column = this.end.column;
return (this.lastCodePoint = this.moveAt(this.end));
}

public eat(cp: number): boolean {
if (this.text.codePointAt(this.end.offset) === cp) {
this.next();
return true;
}
return false;
}

public moveAt(pos: Position): number {
this.start.offset = this.end.offset = pos.offset;
this.start.line = this.end.line = pos.line;
this.start.column = this.end.column = pos.column;

const cp = this.text.codePointAt(this.start.offset) ?? CodePoint.EOF;
if (cp === CodePoint.EOF) {
this.end = this.start;
return (this.lastCodePoint = cp);
return cp;
}
const shift = cp >= 0x10000 ? 2 : 1;
this.end.offset = this.start.offset + shift;
this.end.offset += shift;
if (cp === CodePoint.LINE_FEED) {
this.end.line = this.start.line + 1;
this.end.line += 1;
this.end.column = 0;
} else if (cp === CodePoint.CARRIAGE_RETURN) {
if (this.text.codePointAt(this.end.offset) === CodePoint.LINE_FEED) {
this.end.offset++;
this.end.line = this.start.line + 1;
this.end.line += 1;
this.end.column = 0;
}
return (this.lastCodePoint = CodePoint.LINE_FEED);
return CodePoint.LINE_FEED;
} else {
this.end.column = this.start.column + shift;
}

return (this.lastCodePoint = cp);
}

public *iterateSubCodePoints(): IterableIterator<number> {
let index = this.end.offset;
while (true) {
let cp = this.text.codePointAt(index) ?? CodePoint.EOF;
if (cp === CodePoint.CARRIAGE_RETURN) {
if (this.text.codePointAt(index) === CodePoint.LINE_FEED) {
cp = this.text.codePointAt(++index) ?? CodePoint.EOF;
} else {
cp = CodePoint.LINE_FEED;
}
}
if (cp === CodePoint.EOF) {
return;
}
yield cp;
index += cp >= 0x10000 ? 2 : 1;
this.end.column += shift;
}
}

public subCodePoints(): {
next(): number;
count: number;
} {
const sub = this.iterateSubCodePoints();
let end = false;
let count = 0;
return {
next() {
if (end) {
return CodePoint.EOF;
}
const r = sub.next();
if (r.done) {
end = true;
return CodePoint.EOF;
}
count++;
return r.value;
},
get count() {
return count;
},
};
return cp;
}
}
127 changes: 83 additions & 44 deletions src/tokenizer/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,23 @@ export class Tokenizer {
return (this.lastCodePoint = this.codePointIterator.next());
}

/**
* Eat the next code point.
*/
private eatCode(cp: number): boolean {
if (this.lastCodePoint === CodePoint.EOF) {
return false;
}
if (this.backCode) {
if (this.lastCodePoint === cp) {
this.backCode = false;
return true;
}
return false;
}
return this.codePointIterator.eat(cp);
}

/**
* Skip code point iterator.
*/
Expand All @@ -244,6 +261,16 @@ export class Tokenizer {
this.lastCodePoint = this.codePointIterator.next();
}

/**
* move offset
*/
private moveAt(loc: Position): void {
if (this.backCode) {
this.backCode = false;
}
this.lastCodePoint = this.codePointIterator.moveAt(loc);
}

/**
* Back the current code point as the given state.
*/
Expand Down Expand Up @@ -564,21 +591,21 @@ export class Tokenizer {
return this.reportParseErrorControlChar();
}
if (cp === CodePoint.QUOTATION_MARK) {
const nextPoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
nextPoints.next() === CodePoint.QUOTATION_MARK &&
nextPoints.next() === CodePoint.QUOTATION_MARK
this.eatCode(CodePoint.QUOTATION_MARK) &&
this.eatCode(CodePoint.QUOTATION_MARK)
) {
if (nextPoints.next() === CodePoint.QUOTATION_MARK) {
if (this.eatCode(CodePoint.QUOTATION_MARK)) {
out.push(CodePoint.QUOTATION_MARK);
if (nextPoints.next() === CodePoint.QUOTATION_MARK) {
if (this.eatCode(CodePoint.QUOTATION_MARK)) {
out.push(CodePoint.QUOTATION_MARK);
if (nextPoints.next() === CodePoint.QUOTATION_MARK) {
if (this.eatCode(CodePoint.QUOTATION_MARK)) {
this.moveAt(startPos);
return this.reportParseError("invalid-three-quotes");
}
}
}
this.skip(nextPoints.count - 1);
// end
this.endToken(
"MultiLineBasicString",
Expand All @@ -587,6 +614,7 @@ export class Tokenizer {
);
return "DATA";
}
this.moveAt(startPos);
}
if (cp === CodePoint.BACKSLASH) {
cp = this.nextCode();
Expand Down Expand Up @@ -625,11 +653,14 @@ export class Tokenizer {
continue;
} else if (isWhitespace(cp)) {
let valid = true;
for (const nextCp of this.codePointIterator.iterateSubCodePoints()) {
const startPos = { ...this.codePointIterator.start };
let nextCp: number;
while ((nextCp = this.nextCode()) !== CodePoint.EOF) {
if (nextCp === CodePoint.LINE_FEED) {
break;
}
if (!isWhitespace(nextCp)) {
this.moveAt(startPos);
valid = false;
break;
}
Expand Down Expand Up @@ -690,21 +721,21 @@ export class Tokenizer {
return this.reportParseErrorControlChar();
}
if (cp === CodePoint.SINGLE_QUOTE) {
const nextPoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
nextPoints.next() === CodePoint.SINGLE_QUOTE &&
nextPoints.next() === CodePoint.SINGLE_QUOTE
this.eatCode(CodePoint.SINGLE_QUOTE) &&
this.eatCode(CodePoint.SINGLE_QUOTE)
) {
if (nextPoints.next() === CodePoint.SINGLE_QUOTE) {
if (this.eatCode(CodePoint.SINGLE_QUOTE)) {
out.push(CodePoint.SINGLE_QUOTE);
if (nextPoints.next() === CodePoint.SINGLE_QUOTE) {
if (this.eatCode(CodePoint.SINGLE_QUOTE)) {
out.push(CodePoint.SINGLE_QUOTE);
if (nextPoints.next() === CodePoint.SINGLE_QUOTE) {
if (this.eatCode(CodePoint.SINGLE_QUOTE)) {
this.moveAt(startPos);
return this.reportParseError("invalid-three-quotes");
}
}
}
this.skip(nextPoints.count - 1);
// end
this.endToken(
"MultiLineLiteralString",
Expand All @@ -713,6 +744,7 @@ export class Tokenizer {
);
return "DATA";
}
this.moveAt(startPos);
}
out.push(cp);
cp = this.nextCode();
Expand All @@ -732,29 +764,29 @@ export class Tokenizer {

private NAN_OR_INF(cp: number): TokenizerState {
if (cp === CodePoint.LATIN_SMALL_N) {
const codePoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
codePoints.next() === CodePoint.LATIN_SMALL_A &&
codePoints.next() === CodePoint.LATIN_SMALL_N
this.eatCode(CodePoint.LATIN_SMALL_A) &&
this.eatCode(CodePoint.LATIN_SMALL_N)
) {
this.skip(2);
this.endToken("Float", "end", NaN);
return "DATA";
}
this.moveAt(startPos);
} else if (cp === CodePoint.LATIN_SMALL_I) {
const codePoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
codePoints.next() === CodePoint.LATIN_SMALL_N &&
codePoints.next() === CodePoint.LATIN_SMALL_F
this.eatCode(CodePoint.LATIN_SMALL_N) &&
this.eatCode(CodePoint.LATIN_SMALL_F)
) {
this.skip(2);
this.endToken(
"Float",
"end",
this.text[this.tokenStart.offset] === "-" ? -Infinity : Infinity,
);
return "DATA";
}
this.moveAt(startPos);
}
return this.reportParseError("unexpected-char");
}
Expand All @@ -769,14 +801,14 @@ export class Tokenizer {
: CodePoint.NULL;
if (cp === CodePoint.DIGIT_0) {
if (sign === CodePoint.NULL) {
const subCodePoints = this.codePointIterator.subCodePoints();
const nextCp = subCodePoints.next();
const startPos = { ...this.codePointIterator.start };
const nextCp = this.nextCode();
if (isDigit(nextCp)) {
const nextNextCp = subCodePoints.next();
const nextNextCp = this.nextCode();
if (
(isDigit(nextNextCp) &&
isDigit(subCodePoints.next()) &&
subCodePoints.next() === CodePoint.DASH) ||
isDigit(this.nextCode()) &&
this.eatCode(CodePoint.DASH)) ||
nextNextCp === CodePoint.COLON
) {
const isDate = nextNextCp !== CodePoint.COLON;
Expand All @@ -790,10 +822,13 @@ export class Tokenizer {
second: 0,
};
this.data = data;
this.moveAt(startPos);
return this.back(isDate ? "DATE_YEAR" : "TIME_HOUR");
}
this.moveAt(startPos);
return this.reportParseError("invalid-leading-zero");
}
this.moveAt(startPos);
}

cp = this.nextCode();
Expand Down Expand Up @@ -974,30 +1009,30 @@ export class Tokenizer {

private BOOLEAN(cp: number): TokenizerState {
if (cp === CodePoint.LATIN_SMALL_T) {
const codePoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
codePoints.next() === CodePoint.LATIN_SMALL_R &&
codePoints.next() === CodePoint.LATIN_SMALL_U &&
codePoints.next() === CodePoint.LATIN_SMALL_E
this.eatCode(CodePoint.LATIN_SMALL_R) &&
this.eatCode(CodePoint.LATIN_SMALL_U) &&
this.eatCode(CodePoint.LATIN_SMALL_E)
) {
// true
this.skip(codePoints.count);
this.endToken("Boolean", "end", true);
return "DATA";
}
this.moveAt(startPos);
} else if (cp === CodePoint.LATIN_SMALL_F) {
const codePoints = this.codePointIterator.subCodePoints();
const startPos = { ...this.codePointIterator.start };
if (
codePoints.next() === CodePoint.LATIN_SMALL_A &&
codePoints.next() === CodePoint.LATIN_SMALL_L &&
codePoints.next() === CodePoint.LATIN_SMALL_S &&
codePoints.next() === CodePoint.LATIN_SMALL_E
this.eatCode(CodePoint.LATIN_SMALL_A) &&
this.eatCode(CodePoint.LATIN_SMALL_L) &&
this.eatCode(CodePoint.LATIN_SMALL_S) &&
this.eatCode(CodePoint.LATIN_SMALL_E)
) {
// false
this.skip(codePoints.count);
this.endToken("Boolean", "end", false);
return "DATA";
}
this.moveAt(startPos);
}
return this.reportParseError("unexpected-char");
}
Expand Down Expand Up @@ -1052,10 +1087,12 @@ export class Tokenizer {
return "TIME_HOUR";
}
if (cp === CodePoint.SPACE) {
const subCodePoints = this.codePointIterator.subCodePoints();
if (isDigit(subCodePoints.next()) && isDigit(subCodePoints.next())) {
const startPos = { ...this.codePointIterator.start };
if (isDigit(this.nextCode()) && isDigit(this.nextCode())) {
this.moveAt(startPos);
return "TIME_HOUR";
}
this.moveAt(startPos);
}
const dateValue = getDateFromDateTimeData(data, "");
this.endToken("LocalDate", "start", dateValue);
Expand Down Expand Up @@ -1247,18 +1284,20 @@ export class Tokenizer {
}

private parseUnicode(count: number): number {
const startLoc = { ...this.codePointIterator.start };
const start = this.codePointIterator.end.offset;
let charCount = 0;
for (const cp of this.codePointIterator.iterateSubCodePoints()) {
let cp: number;
while ((cp = this.nextCode()) !== CodePoint.EOF) {
if (!isHexDig(cp)) {
this.moveAt(startLoc);
return this.reportParseError("invalid-char-in-escape-sequence");
}
charCount++;
if (charCount >= count) {
break;
}
}
const start = this.codePointIterator.end.offset;
this.skip(charCount);
const end = this.codePointIterator.end.offset;
const code = this.text.slice(start, end);
const codePoint = parseInt(code, 16);
Expand Down

0 comments on commit 4cab5da

Please sign in to comment.