Skip to content

Commit

Permalink
Source phase imports (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
guybedford committed Mar 25, 2024
1 parent f44438c commit eaa7e3c
Show file tree
Hide file tree
Showing 10 changed files with 245 additions and 126 deletions.
21 changes: 18 additions & 3 deletions README.md
Expand Up @@ -6,6 +6,8 @@ A JS module syntax lexer used in [es-module-shims](https://github.com/guybedford

Outputs the list of exports and locations of import specifiers, including dynamic import and import meta handling.

Supports new syntax features including import attributes and source phase imports.

A very small single JS file (4KiB gzipped) that includes inlined Web Assembly for very fast source analysis of ECMAScript module syntax only.

For an example of the performance, Angular 1 (720KiB) is fully parsed in 5ms, in comparison to the fastest JS parser, Acorn which takes over 100ms.
Expand All @@ -20,6 +22,8 @@ _Comprehensively handles the JS language grammar while remaining small and fast.
npm install es-module-lexer
```

See [types/lexer.d.ts](types/lexer.d.ts) for the type definitions.

For use in CommonJS:

```js
Expand Down Expand Up @@ -60,6 +64,10 @@ import { init, parse } from 'es-module-lexer';
// Comments provided to demonstrate edge cases
import /*comment!*/ ( 'asdf', { assert: { type: 'json' }});
import /*comment!*/.meta.asdf;
// Source phase imports:
import source mod from './mod.wasm';
import.source('./mod.wasm);
`;

const [imports, exports] = parse(source, 'optional-sourcename');
Expand Down Expand Up @@ -98,10 +106,10 @@ import { init, parse } from 'es-module-lexer';
// Returns -1
exports[2].le;

// Dynamic imports are indicated by imports[2].d > -1
// In this case the "d" index is the start of the dynamic import bracket
// Import type is provided by `t` value
// (1 for static, 2, for dynamic)
// Returns true
imports[2].d > -1;
imports[2].t == 2;

// Returns "asdf" (only for string literal dynamic imports)
imports[2].n
Expand All @@ -128,6 +136,13 @@ import { init, parse } from 'es-module-lexer';
// Returns "import /*comment!*/.meta"
source.slice(imports[4].s, imports[4].e);
// ss and se are the same for import meta

// Returns "'./mod.wasm'"
source.slice(imports[5].s, imports[5].e);

// Import type 4 and 5 for static and dynamic source phase
imports[5].t === 4;
imports[6].t === 5;
})();
```

Expand Down
4 changes: 2 additions & 2 deletions chompfile.toml
Expand Up @@ -96,7 +96,7 @@ deps = ['src/lexer.h', 'src/lexer.c']
run = """
${{ WASI_PATH }}/bin/clang src/lexer.c --sysroot=${{ WASI_PATH }}/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
"-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,\
--export=parse,--export=sa,--export=e,--export=ri,--export=re,--export=is,--export=ie,--export=ss,--export=ip,--export=se,--export=ai,--export=id,--export=es,--export=ee,--export=els,--export=ele,--export=f,--export=ms,--export=__heap_base" \
--export=parse,--export=sa,--export=e,--export=ri,--export=re,--export=is,--export=ie,--export=it,--export=ss,--export=ip,--export=se,--export=ai,--export=id,--export=es,--export=ee,--export=els,--export=ele,--export=f,--export=ms,--export=__heap_base" \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz
"""
Expand All @@ -110,7 +110,7 @@ run = """
${{ EMSDK_PATH }}/emsdk activate 1.40.1-fastcomp
${{ EMSDK_PATH }}/fastcomp/emscripten/emcc ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -Oz --closure 1 \
-s EXPORTED_FUNCTIONS="['_parse','_sa','_e','_ri','_re','_is','_ie','_ss','_ip','_se','_ai','_id','_es','_ee','_els','_ele','_f','_ms','_setSource']" \
-s EXPORTED_FUNCTIONS="['_parse','_sa','_e','_ri','_re','_it','_is','_ie','_ss','_ip','_se','_ai','_id','_es','_ee','_els','_ele','_f','_ms','_setSource']" \
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=4997968 -s --separate-asm -Wno-logical-op-parentheses -Wno-parentheses
# rm lib/lexer.emcc.js
Expand Down
10 changes: 5 additions & 5 deletions lib/lexer.asm.js

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions lib/lexer.emcc.asm.js

Large diffs are not rendered by default.

Binary file modified lib/lexer.wasm
Binary file not shown.
6 changes: 3 additions & 3 deletions src/lexer.asm.js
Expand Up @@ -13,7 +13,7 @@ const copy = new Uint8Array(new Uint16Array([1]).buffer)[0] === 1 ? function (sr
outBuf16[i++] = (ch & 0xff) << 8 | ch >>> 8;
}
};
const words = 'xportmportlassetaromsyncunctionssertvoyiedelecontininstantybreareturdebuggeawaithrwhileforifcatcfinallels';
const words = 'xportmportlassetaourceromsyncunctionssertvoyiedelecontininstantybreareturdebuggeawaithrwhileforifcatcfinallels';

let source, name;
export function parse (_source, _name = '@') {
Expand Down Expand Up @@ -44,11 +44,11 @@ export function parse (_source, _name = '@') {

const imports = [], exports = [];
while (asm.ri()) {
const s = asm.is(), e = asm.ie(), a = asm.ai(), d = asm.id(), ss = asm.ss(), se = asm.se();
const s = asm.is(), e = asm.ie(), a = asm.ai(), d = asm.id(), ss = asm.ss(), se = asm.se(), t = asm.it();
let n;
if (asm.ip())
n = readString(d === -1 ? s : s + 1, source.charCodeAt(d === -1 ? s - 1 : s));
imports.push({ n, s, e, ss, se, d, a });
imports.push({ t, n, s, e, ss, se, d, a });
}
while (asm.re()) {
const s = asm.es(), e = asm.ee(), ls = asm.els(), le = asm.ele();
Expand Down
224 changes: 120 additions & 104 deletions src/lexer.c
Expand Up @@ -28,6 +28,7 @@ static const char16_t BREA[] = { 'b', 'r', 'e', 'a' };
static const char16_t CONTIN[] = { 'c', 'o', 'n', 't', 'i', 'n' };
static const char16_t SYNC[] = {'s', 'y', 'n', 'c'};
static const char16_t UNCTION[] = {'u', 'n', 'c', 't', 'i', 'o', 'n'};
static const char16_t OURCE[] = {'o', 'u', 'r', 'c', 'e'};

// Note: parsing is based on the _assumption_ that the source is already valid
bool parse () {
Expand Down Expand Up @@ -239,124 +240,136 @@ void tryParseImportStatement () {

char16_t ch = commentWhitespace(true);

switch (ch) {
// dynamic import
case '(':
openTokenStack[openTokenDepth].token = ImportParen;
openTokenStack[openTokenDepth++].pos = pos;
if (*lastTokenPos == '.')
return;
// dynamic import indicated by positive d
char16_t* dynamicPos = pos;
// try parse a string, to record a safe dynamic import string
pos++;
ch = commentWhitespace(true);
addImport(startPos, pos, 0, dynamicPos);
dynamicImportStack[dynamicImportStackDepth++] = import_write_head;
if (ch == '\'') {
stringLiteral(ch);
}
else if (ch == '"') {
stringLiteral(ch);
}
else {
pos--;
return;
}
pos++;
char16_t* endPos = pos;
bool source_keyword = false;

if (ch == '.') {
// import.meta
pos++;
ch = commentWhitespace(true);
// import.meta indicated by d == -2
if (ch == 'm' && memcmp(pos + 1, &ETA[0], 3 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.')) {
addImport(startPos, startPos, pos + 4, IMPORT_META);
return;
}
else if (ch == 's' && memcmp(pos + 1, &OURCE[0], 5 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.')) {
source_keyword = true;
pos += 6;
ch = commentWhitespace(true);
if (ch == ',') {
pos++;
ch = commentWhitespace(true);
import_write_head->end = endPos;
import_write_head->assert_index = pos;
import_write_head->safe = true;
pos--;
}
else if (ch == ')') {
openTokenDepth--;
import_write_head->end = endPos;
import_write_head->statement_end = pos + 1;
import_write_head->safe = true;
dynamicImportStackDepth--;
}
else {
pos--;
}
}
else {
return;
// import.meta
case '.':
}
}
else if (pos > startPos + 6 && ch == 's' && memcmp(pos + 1, &OURCE[0], 5 * 2) == 0 && isBrOrWs(*(pos + 6))) {
source_keyword = true;
pos += 6;
ch = commentWhitespace(true);
}

// dynamic import
if (ch == '(') {
openTokenStack[openTokenDepth].token = ImportParen;
openTokenStack[openTokenDepth++].pos = pos;
if (*lastTokenPos == '.')
return;
// dynamic import indicated by positive d
char16_t* dynamicPos = pos;
// try parse a string, to record a safe dynamic import string
pos++;
ch = commentWhitespace(true);
addImport(startPos, pos, 0, dynamicPos);
if (source_keyword)
import_write_head->import_ty = DynamicSourcePhase;
dynamicImportStack[dynamicImportStackDepth++] = import_write_head;
if (ch == '\'') {
stringLiteral(ch);
}
else if (ch == '"') {
stringLiteral(ch);
}
else {
pos--;
return;
}
pos++;
char16_t* endPos = pos;
ch = commentWhitespace(true);
if (ch == ',') {
pos++;
ch = commentWhitespace(true);
// import.meta indicated by d == -2
if (ch == 'm' && memcmp(pos + 1, &ETA[0], 3 * 2) == 0 && (isSpread(lastTokenPos) || *lastTokenPos != '.'))
addImport(startPos, startPos, pos + 4, IMPORT_META);
import_write_head->end = endPos;
import_write_head->assert_index = pos;
import_write_head->safe = true;
pos--;
}
else if (ch == ')') {
openTokenDepth--;
import_write_head->end = endPos;
import_write_head->statement_end = pos + 1;
import_write_head->safe = true;
dynamicImportStackDepth--;
}
else {
pos--;
}
return;
}

if (ch == '{' && !source_keyword) {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}

default:
// no space after "import" -> not an import keyword
if (pos == startPos + 6) {
pos--;
break;
}
case '"':
case '\'':
case '*': {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}
while (pos < end) {
ch = *pos;
if (isQuote(ch)) {
readImportString(startPos, ch);
return;
}
while (pos < end) {
ch = commentWhitespace(true);
if (isQuote(ch)) {
stringLiteral(ch);
} else if (ch == '}') {
pos++;
break;
}
syntaxError();
break;
pos++;
}

case '{': {
// import statement only permitted at base-level
if (openTokenDepth != 0) {
pos--;
return;
}

while (pos < end) {
ch = commentWhitespace(true);
ch = commentWhitespace(true);
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) != 0) {
syntaxError();
return;
}

if (isQuote(ch)) {
stringLiteral(ch);
} else if (ch == '}') {
pos++;
break;
}
pos += 4;
ch = commentWhitespace(true);

pos++;
}
if (!isQuote(ch)) {
return syntaxError();
}

ch = commentWhitespace(true);
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) != 0) {
syntaxError();
break;
readImportString(startPos, ch, false);
}
else {
if (source_keyword || !(ch == '"' || ch == '\'' || ch == '*')) {
// no space after "import" -> not an import keyword
if (pos == startPos + (source_keyword ? 12 : 6)) {
pos--;
return;
}

pos += 4;
ch = commentWhitespace(true);

if (!isQuote(ch)) {
return syntaxError();
}
// import statement only permitted at base-level
if (openTokenDepth != 0 ) {
pos--;
return;
}
while (pos < end) {
ch = *pos;
if (isQuote(ch)) {
readImportString(startPos, ch, source_keyword);
return;
}

readImportString(startPos, ch);

break;
pos++;
}
syntaxError();
}
}

Expand Down Expand Up @@ -572,7 +585,7 @@ void tryParseExportStatement () {
// from ...
if (ch == 'f' && memcmp(pos + 1, &ROM[0], 3 * 2) == 0) {
pos += 4;
readImportString(sStartPos, commentWhitespace(true));
readImportString(sStartPos, commentWhitespace(true), false);

// There were no local names.
for (Export* exprt = prev_export_write_head == NULL ? first_export : prev_export_write_head->next; exprt != NULL; exprt = exprt->next) {
Expand Down Expand Up @@ -619,7 +632,7 @@ char16_t readExportAs (char16_t* startPos, char16_t* endPos) {
return ch;
}

void readImportString (const char16_t* ss, char16_t ch) {
void readImportString (const char16_t* ss, char16_t ch, bool source_phase) {
const char16_t* startPos = pos + 1;
if (ch == '\'') {
stringLiteral(ch);
Expand All @@ -632,6 +645,9 @@ void readImportString (const char16_t* ss, char16_t ch) {
return;
}
addImport(ss, startPos, pos, STANDARD_IMPORT);
if (source_phase) {
import_write_head->import_ty = StaticSourcePhase;
}
pos++;
ch = commentWhitespace(false);
if (!(ch == 'a' && memcmp(pos + 1, &SSERT[0], 5 * 2) == 0) && !(ch == 'w' && *(pos + 1) == 'i' && *(pos + 2) == 't' && *(pos + 3) == 'h')) {
Expand Down

0 comments on commit eaa7e3c

Please sign in to comment.