Skip to content

Commit

Permalink
Refactor Lexer
Browse files Browse the repository at this point in the history
The lexer needed some cleanup, I found myself doing this as part of a Unicode RFC, but factoring all that out to make the Unicode RFC PR easier to follow.

* Always use hexadecimal form for code values.
* Remove use of `isNaN` for checking source over-reads.
* Defines `isSourceCharacter`
* Add more documentation and comments, also replaces regex with lexical grammar
* Simplifies error messages
* Adds additional tests
  • Loading branch information
leebyron committed May 19, 2021
1 parent 9a4a228 commit 2cd8510
Show file tree
Hide file tree
Showing 4 changed files with 552 additions and 459 deletions.
100 changes: 66 additions & 34 deletions src/language/__tests__/lexer-test.js
Expand Up @@ -29,12 +29,12 @@ function expectSyntaxError(text: string) {
describe('Lexer', () => {
it('disallows uncommon control characters', () => {
expectSyntaxError('\u0007').to.deep.equal({
message: 'Syntax Error: Cannot contain the invalid character "\\u0007".',
message: 'Syntax Error: Invalid character: U+0007.',
locations: [{ line: 1, column: 1 }],
});
});

it('accepts BOM header', () => {
it('ignores BOM header', () => {
expect(lexOne('\uFEFF foo')).to.contain({
kind: TokenKind.NAME,
start: 2,
Expand Down Expand Up @@ -138,6 +138,13 @@ describe('Lexer', () => {
value: 'foo',
});

expect(lexOne('\t\tfoo\t\t')).to.contain({
kind: TokenKind.NAME,
start: 2,
end: 5,
value: 'foo',
});

expect(
lexOne(`
#comment
Expand Down Expand Up @@ -166,7 +173,7 @@ describe('Lexer', () => {
caughtError = error;
}
expect(String(caughtError)).to.equal(dedent`
Syntax Error: Cannot parse the unexpected character "?".
Syntax Error: Unexpected character: "?".
GraphQL request:3:5
2 |
Expand All @@ -186,7 +193,7 @@ describe('Lexer', () => {
caughtError = error;
}
expect(String(caughtError)).to.equal(dedent`
Syntax Error: Cannot parse the unexpected character "?".
Syntax Error: Unexpected character: "?".
foo.js:13:6
12 |
Expand All @@ -205,7 +212,7 @@ describe('Lexer', () => {
caughtError = error;
}
expect(String(caughtError)).to.equal(dedent`
Syntax Error: Cannot parse the unexpected character "?".
Syntax Error: Unexpected character: "?".
foo.js:1:5
1 | ?
Expand Down Expand Up @@ -293,13 +300,13 @@ describe('Lexer', () => {

expectSyntaxError('"contains unescaped \u0007 control char"').to.deep.equal(
{
message: 'Syntax Error: Invalid character within String: "\\u0007".',
message: 'Syntax Error: Invalid character within String: U+0007.',
locations: [{ line: 1, column: 21 }],
},
);

expectSyntaxError('"null-byte is not \u0000 end of file"').to.deep.equal({
message: 'Syntax Error: Invalid character within String: "\\u0000".',
message: 'Syntax Error: Invalid character within String: U+0000.',
locations: [{ line: 1, column: 19 }],
});

Expand All @@ -314,38 +321,38 @@ describe('Lexer', () => {
});

expectSyntaxError('"bad \\z esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\z.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid character escape sequence: "\\z".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\x esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\x.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid character escape sequence: "\\x".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\u1 esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\u1 es.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u1 es".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\u0XX1 esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\u0XX1.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid Unicode escape sequence: "\\u0XX1".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\uXXXX esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\uXXXX.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXX".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\uFXXX esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\uFXXX.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uFXXX".',
locations: [{ line: 1, column: 6 }],
});

expectSyntaxError('"bad \\uXXXF esc"').to.deep.equal({
message: 'Syntax Error: Invalid character escape sequence: \\uXXXF.',
locations: [{ line: 1, column: 7 }],
message: 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".',
locations: [{ line: 1, column: 6 }],
});
});

Expand Down Expand Up @@ -481,14 +488,14 @@ describe('Lexer', () => {
expectSyntaxError(
'"""contains unescaped \u0007 control char"""',
).to.deep.equal({
message: 'Syntax Error: Invalid character within String: "\\u0007".',
message: 'Syntax Error: Invalid character within String: U+0007.',
locations: [{ line: 1, column: 23 }],
});

expectSyntaxError(
'"""null-byte is not \u0000 end of file"""',
).to.deep.equal({
message: 'Syntax Error: Invalid character within String: "\\u0000".',
message: 'Syntax Error: Invalid character within String: U+0000.',
locations: [{ line: 1, column: 21 }],
});
});
Expand Down Expand Up @@ -624,7 +631,7 @@ describe('Lexer', () => {
});

expectSyntaxError('+1').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character "+".',
message: 'Syntax Error: Unexpected character: "+".',
locations: [{ line: 1, column: 1 }],
});

Expand All @@ -649,7 +656,7 @@ describe('Lexer', () => {
});

expectSyntaxError('.123').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character ".".',
message: 'Syntax Error: Unexpected character: ".".',
locations: [{ line: 1, column: 1 }],
});

Expand All @@ -673,6 +680,11 @@ describe('Lexer', () => {
locations: [{ line: 1, column: 5 }],
});

expectSyntaxError('1.0e"').to.deep.equal({
message: "Syntax Error: Invalid number, expected digit but got: '\"'.",
locations: [{ line: 1, column: 5 }],
});

expectSyntaxError('1.2e3e').to.deep.equal({
message: 'Syntax Error: Invalid number, expected digit but got: "e".',
locations: [{ line: 1, column: 6 }],
Expand Down Expand Up @@ -707,7 +719,7 @@ describe('Lexer', () => {
locations: [{ line: 1, column: 2 }],
});
expectSyntaxError('1\u00DF').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character "\\u00DF".',
message: 'Syntax Error: Unexpected character: U+00DF.',
locations: [{ line: 1, column: 2 }],
});
expectSyntaxError('1.23f').to.deep.equal({
Expand Down Expand Up @@ -815,22 +827,17 @@ describe('Lexer', () => {

it('lex reports useful unknown character error', () => {
expectSyntaxError('..').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character ".".',
message: 'Syntax Error: Unexpected character: ".".',
locations: [{ line: 1, column: 1 }],
});

expectSyntaxError('?').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character "?".',
message: 'Syntax Error: Unexpected character: "?".',
locations: [{ line: 1, column: 1 }],
});

expectSyntaxError('\u203B').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character "\\u203B".',
locations: [{ line: 1, column: 1 }],
});

expectSyntaxError('\u200b').to.deep.equal({
message: 'Syntax Error: Cannot parse the unexpected character "\\u200B".',
message: 'Syntax Error: Unexpected character: U+203B.',
locations: [{ line: 1, column: 1 }],
});
});
Expand Down Expand Up @@ -893,6 +900,31 @@ describe('Lexer', () => {
TokenKind.EOF,
]);
});

it('lexes comments', () => {
expect(lexOne('# Comment').prev).to.contain({
kind: TokenKind.COMMENT,
start: 0,
end: 9,
value: ' Comment',
});
expect(lexOne('# Comment\nAnother line').prev).to.contain({
kind: TokenKind.COMMENT,
start: 0,
end: 9,
value: ' Comment',
});
expect(lexOne('# Comment\r\nAnother line').prev).to.contain({
kind: TokenKind.COMMENT,
start: 0,
end: 9,
value: ' Comment',
});
expectSyntaxError('# \u0007').to.deep.equal({
message: 'Syntax Error: Invalid character: U+0007.',
locations: [{ line: 1, column: 3 }],
});
});
});

describe('isPunctuatorTokenKind', () => {
Expand Down
1 change: 0 additions & 1 deletion src/language/ast.d.ts
Expand Up @@ -85,7 +85,6 @@ export class Token {
end: number,
line: number,
column: number,
prev: Token | null,
value?: string,
);

Expand Down
3 changes: 1 addition & 2 deletions src/language/ast.js
Expand Up @@ -93,7 +93,6 @@ export class Token {
end: number,
line: number,
column: number,
prev: Token | null,
value?: string,
) {
this.kind = kind;
Expand All @@ -102,7 +101,7 @@ export class Token {
this.line = line;
this.column = column;
this.value = value;
this.prev = prev;
this.prev = null;
this.next = null;
}

Expand Down

0 comments on commit 2cd8510

Please sign in to comment.