Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP migration of es-module-lexer to Wasm components #143

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
33 changes: 17 additions & 16 deletions .github/workflows/build.yml
Expand Up @@ -7,9 +7,8 @@ on:
branches: main

env:
WASI_VERSION: 12
WASI_VERSION_FULL: "12.0"
EMCC_VERSION: "1.40.1-fastcomp"
WASI_VERSION: 17
WASI_VERSION_FULL: "17.0"

jobs:
build:
Expand All @@ -19,6 +18,19 @@ jobs:
- name: Check out Git repository
uses: actions/checkout@v2

- name: Install Rust
run: rustup update stable --no-self-update && rustup default stable
- name: Install wasm32-unknown-unknown target
run: rustup target add wasm32-unknown-unknown
- name: Install wasm32-wasi target
run: rustup target add wasm32-wasi

- name: Install WIT Bindgen
run: cargo install --git https://github.com/bytecodealliance/wit-bindgen wit-bindgen-cli

- name: Install Wasm Tools
run: cargo install wasm-tools

- name: Install wasi-sdk
shell: bash
run: |
Expand All @@ -40,25 +52,14 @@ jobs:
# print clang version
./wasi-sdk-${WASI_VERSION_FULL}/bin/clang --version

- name: Install Emscripten
run: |
git clone https://github.com/emscripten-core/emsdk.git
cd ./emsdk
./emsdk install ${{ env.EMCC_VERSION }}
./emsdk activate ${{ env.EMCC_VERSION }}
./fastcomp/emscripten/emcc -v

- name: npm Install
run: npm install

- name: Setup Chomp
uses: guybedford/chomp-action@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Compile to Wasm & Test Wasm
env:
WASI_PATH: './wasi-sdk-${{ env.WASI_VERSION_FULL }}'
EMSDK_PATH: './emsdk'
EMSCRIPTEN_BIN: './emsdk/fastcomp/emscripten/emcc'
run: chomp test

- name: Footprint
Expand Down
10 changes: 3 additions & 7 deletions .gitignore
@@ -1,8 +1,4 @@
node_modules
lib/lexer.wat
dist
/node_modules
/obj
/dist
package-lock.json
yarn.lock
lib/lexer.emcc.js
src/lexer.js
types/lexer.d.ts
244 changes: 104 additions & 140 deletions chompfile.toml
@@ -1,179 +1,138 @@
version = 0.1
default-task = 'build'

extensions = ['chomp@0.1:npm', 'chomp@0.1:footprint', 'chomp@0.1:terser']

[env-default]
WASI_PATH = '../wasi-sdk-12.0'
EMSDK_PATH = '../emsdk'
WABT_PATH = '../wabt'
WASM_TOOLS = 'wasm-tools'
WASI_PATH = '/opt/wasi-sdk'
JSCT = 'jsct'
WIT_BINDGEN = 'wit-bindgen'

[[task]]
name = 'build'
deps = ['dist/lexer.js', 'dist/lexer.cjs', 'dist/lexer.asm.js', 'types/lexer.d.ts']

[[task]]
name = 'bench'
serial = true
deps = ['bench:js', 'bench:wasm']

[[task]]
name = 'bench:js'
deps = ['dist/lexer.asm.js']
env = { BENCH = 'js' }
run = 'node --expose-gc bench/index.js'

[[task]]
name = 'bench:wasm'
deps = ['dist/lexer.js']
env = { BENCH = 'wasm' }
run = 'node --expose-gc bench/index.js'

[[task]]
target = 'dist/lexer.asm.js'
dep = 'lib/lexer.asm.js'
template = 'terser'
[task.template-options]
module = true
compress = { ecma = 6, unsafe = true }
output = { preamble = '/* es-module-lexer #PJSON_VERSION */' }
deps = ['dist/lexer.js', 'dist/lexer.cjs', 'dist/lexer.asm.js', 'dist/lexer.d.ts']

[[task]]
target = 'dist/lexer.cjs'
deps = ['dist/lexer.js']
run = 'babel dist/lexer.js | terser -c -m -o dist/lexer.cjs'

[[task]]
name = 'build:swc'
target = 'src/lexer.js'
dep = 'src/lexer.ts'
# Note we should use the chomp swc template, but
# https://github.com/swc-project/cli/issues/113 means we always get a sourcemap
# even when we set "source-maps = false", so for now we have ejected the
# template to its raw "run" command, and added an "rm" step.
targets = ['dist/lexer.js', 'dist/lexer.d.ts']
deps = ['obj/lexer-component.wasm', 'package.json', 'npm:install']
engine = 'node'
run = '''
node ./node_modules/@swc/cli/bin/swc.js $DEP -o $TARGET --no-swcrc -C jsc.parser.syntax=typescript -C jsc.parser.importAssertions=true -C jsc.parser.topLevelAwait=true -C jsc.parser.importMeta=true -C jsc.parser.privateMethod=true -C jsc.parser.dynamicImport=true -C jsc.target=es2016 -C jsc.experimental.keepImportAssertions=true
rm src/lexer.js.map
'''
import { transpile } from 'js-component-tools';
import { readFile, writeFile } from 'node:fs/promises';

const { files } = await transpile(await readFile('obj/lexer-component.wasm'), {
outDir: 'dist',
name: 'lexer',
validLiftingOptimization: true,
base64Cutoff: 9000,
optimize: true,
minify: true
});

[[task]]
# Note swc does not support emitting typings
# (https://github.com/swc-project/swc/issues/657), so while swc is used to
# generate the .js file, tsc is still needed to generate the d.ts file.
name = 'build:types'
target = 'types/lexer.d.ts'
dep = 'src/lexer.ts'
run = '''
tsc --strict --declaration --emitDeclarationOnly --outdir types src/lexer.ts
const { version } = JSON.parse(await readFile('package.json'));

await writeFile('dist/lexer.js', `/* es-module-lexer ${version} */\n${
files['dist/lexer.js']
.replace(/parse\((\w),(\w)\)\{/, `parse($1,$2){$1+='\\0';`)
.replace(/n:(\w+)/, 'n:$1?eval($1):$1')
.replace(/n:([^e])/g, `n:$1&&($1[0]==='"'||$1[0]==="'")?eval($1):$1`)
}`);
await writeFile('dist/lexer.d.ts', files['dist/lexer.d.ts']);
'''

[[task]]
target = 'dist/lexer.js'
deps = ['src/lexer.js', 'lib/lexer.wasm', 'package.json']
target = 'dist/lexer.asm.js'
deps = ['obj/lexer-component.wasm', 'package.json', 'npm:install', 'obj/readstring.min.js']
engine = 'node'
run = '''
import { readFileSync, writeFileSync } from 'fs';
import { minify } from 'terser';
import { transpile } from 'js-component-tools';
import { readFile, writeFile } from 'node:fs/promises';

const { files } = await transpile(await readFile('obj/lexer-component.wasm'), {
outDir: 'dist',
name: 'lexer.asm',
validLiftingOptimization: true,
optimize: true,
minify: true,
asm: true
});

const wasmBuffer = readFileSync('lib/lexer.wasm');
const jsSource = readFileSync('src/lexer.js', 'utf8');
const pjson = JSON.parse(readFileSync('package.json', 'utf8'));
const { version } = JSON.parse(await readFile('package.json'));
const readstring = await readFile('obj/readstring.min.js', 'utf8');

const jsSourceProcessed = jsSource.replace('WASM_BINARY', wasmBuffer.toString('base64'));
await writeFile('dist/lexer.asm.js', `/* es-module-lexer asm.js ${version} */\n${readstring}${
files['dist/lexer.asm.js']
.replace(/parse\((\w),(\w)\)\{/, `parse($1,$2){$1+='\\0';`)
.replace(/n:(\w)/, 'n:$1?readString($1):$1')
.replace(/n:([a-qs-zA-Z])/g, `n:$1&&($1[0]==='"'||$1[0]==="'")?readString($1):$1`)
}`);
'''

const { code: minified } = await minify(jsSourceProcessed, {
module: true,
output: {
preamble: `/* es-module-lexer ${pjson.version} */`
}
});
[[task]]
target = 'obj/readstring.min.js'
dep = 'src/readstring.js'
template = 'terser'

writeFileSync('dist/lexer.js', minified ? minified : jsSourceProcessed);
[[task]]
target = 'obj/lexer-component.wasm'
stdio = 'stderr-only'
deps = ['obj/lexer.wasm', 'src/lexer-world.wit']
# Switch back to jsct after next release with encoding fix
run = '''
${{ WASM_TOOLS }} component new $DEP --encoding utf16 --wit src/lexer-world.wit -o $TARGET
'''

[[task]]
target = 'lib/lexer.wasm'
deps = ['src/lexer.h', 'src/lexer.c']
target = 'obj/lexer.wasm'
deps = ['obj/lexer_world.c', 'obj/lexer_world.h', 'src/lexer.c', 'src/lexer.h']
stdio = 'stderr-only'
run = """
${{ WASI_PATH }}/bin/clang src/lexer.c --sysroot=${{ WASI_PATH }}/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
"-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,\
--export=parse,--export=sa,--export=e,--export=ri,--export=re,--export=is,--export=ie,--export=ss,--export=ip,--export=se,--export=ai,--export=id,--export=es,--export=ee,--export=els,--export=ele,--export=f,--export=__heap_base" \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz
${{ WASI_PATH }}/bin/clang src/lexer.c obj/lexer_world.c --sysroot=${{ WASI_PATH }}/share/wasi-sysroot -o obj/lexer.wasm -nostartfiles \
"-Wl,-z,stack-size=1331200,--no-entry,--compress-relocations,--strip-all" \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz
"""

[[task]]
target = 'lib/lexer.emcc.asm.js'
deps = ['src/lexer.h', 'src/lexer.c']
env = { PYTHONHOME = '' }
run = """
${{ EMSDK_PATH }}/emsdk install 1.40.1-fastcomp
${{ EMSDK_PATH }}/emsdk activate 1.40.1-fastcomp

${{ EMSDK_PATH }}/fastcomp/emscripten/emcc ./src/lexer.c -o lib/lexer.emcc.js -s WASM=0 -Oz --closure 1 \
-s EXPORTED_FUNCTIONS="['_parse','_sa','_e','_ri','_re','_is','_ie','_ss','_ip','_se','_ai','_id','_es','_ee','_els','_ele','_f','_setSource']" \
-s ERROR_ON_UNDEFINED_SYMBOLS=0 -s SINGLE_FILE=1 -s TOTAL_STACK=4997968 -s --separate-asm -Wno-logical-op-parentheses -Wno-parentheses

# rm lib/lexer.emcc.js
"""
targets = ['obj/lexer.c', 'obj/lexer_world.h']
dep = 'src/lexer-world.wit'
# stdio = 'stderr-only'
run = '''
${{ WIT_BINDGEN }} guest c --string-encoding utf16 src/lexer-world.wit --no-helpers --out-dir obj
mv obj/lexer.h obj/lexer_world.h
'''

[[task]]
target = 'lib/lexer.asm.js'
deps = ['lib/lexer.emcc.asm.js', 'src/lexer.asm.js']
target = 'obj/lexer_world.c'
dep = 'obj/lexer.c'
engine = 'node'
run = '''
import { readFileSync, writeFileSync } from 'fs';

const wrapper_start = readFileSync('src/lexer.asm.js', 'utf8');
let source = readFileSync('lib/lexer.emcc.asm.js', 'utf8').trim();

const endFuncs = 'EMSCRIPTEN_END_FUNCS';
const removeFunc = name => [new RegExp(`function ${name}\\([^]+?}\\s*(function|return\\s?{[^{}]+};?\\s*}\\s*$)`), '$1'];

const replacements = [
[/Module\["asm"\]=\s?\(\/\*\* @suppress {uselessCode} \*\/ function\(/, 'function asmInit('],
[/\)$/, ''],
[/,\s?_(\w+):/g, ',$1:', null, endFuncs],
['setSource:', 'ses:', null, endFuncs],
['parse:', 'p:', null, endFuncs],
[/___errno_location:\s?(\w+),/, '', removeFunc, endFuncs],
[/_apply_relocations:\s?(\w+),/, '', removeFunc, endFuncs],
[/,\s?free:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?malloc:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memcpy:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?memset:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackAlloc:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?emscripten_get_sbrk_ptr:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackRestore:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s?stackSave:\s?(\w+)/, '', removeFunc, endFuncs],
[/,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?env\.\w+\|0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0,\s*\w+\s?=\s?0\.0,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+,\s*\w+\s?=\s?env\.\w+/, ''],
[/,\s*\w+\s?=\s?\d+,\s*\w+\s?=\s?0.0;/, ';'],
[/function \w+\(\w+\)\s?{[^{}]+{[^{}s]+s\(\)[^{}]+}[^{}]+}/, ''],
[/\s*\/\/ EMSCRIPTEN_END_FUNCS\s*return\{/, ` function su(a) {
a = a | 0;
v = a + 992 + 15 & -16;
return 992;
}
return {
su,`],
[/\s*\/\/ EMSCRIPTEN_START_FUNCS\s*/, ''],
];

for (const [from, to, add, after] of replacements) {
const [matched, match] = source.match(from) || [];
if (!matched) {
console.log(source.slice(0, 1000));
throw new Error(`Match not found for ${from} -> ${to}${after ? `, after ${after}` : ''}`);
}
const afterIndex = after ? source.indexOf(after) : 0;
const replaced = source.slice(0, afterIndex) + source.slice(afterIndex).replace(from, to);
if (add) replacements.push(add(match));
source = replaced;
}

writeFileSync(process.env.TARGET, wrapper_start + source);
import { readFile, writeFile } from 'fs/promises';
await writeFile(process.env.TARGET, (await readFile(process.env.DEP, 'utf8')).replace('lexer.h', 'lexer_world.h'));
'''

[[task]]
name = 'bench'
serial = true
deps = ['bench:js', 'bench:wasm']

[[task]]
name = 'bench:js'
deps = ['dist/lexer.asm.js']
env = { BENCH = 'js' }
run = 'node --expose-gc bench/index.js'

[[task]]
name = 'bench:wasm'
deps = ['dist/lexer.js']
env = { BENCH = 'wasm' }
run = 'node --expose-gc bench/index.js'

[[task]]
name = 'test'
deps = ['test:wasm', 'test:asm']
Expand All @@ -195,9 +154,14 @@ env = { WASM = '1' }
run = 'mocha -b -u tdd test/*.cjs'

[[task]]
target = 'lib/lexer.wat'
dep = 'lib/lexer.wasm'
run = '${{ WABT_PATH }}/bin/wasm2wat lib/lexer.wasm -o lib/lexer.wat'
target = 'obj/lexer.wat'
dep = 'obj/lexer.wasm'
run = '${{ jsct }} parse -t $DEP -o $TARGET'

[[task]]
target = 'obj/lexer-component.core.wat'
dep = 'obj/lexer-component.core.wasm'
run = '${{ jsct }} parse -t $DEP -o $TARGET'

[[task]]
name = 'footprint'
Expand Down