Skip to content

Commit 2be5b2d

Browse files
authoredAug 30, 2024··
feat: introduce experimental JavaScript RegExp Engine (#761)
1 parent 523f5fd commit 2be5b2d

37 files changed

+4880
-985
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ packages/shiki/src/assets/themes
1515
packages/shiki/src/assets/*.json
1616
cache
1717
.eslintcache
18+
report-engine-js-compat.json

‎bench/engines.bench.ts

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import fs from 'node:fs/promises'
2+
import { bench, describe } from 'vitest'
3+
import type { BundledLanguage } from 'shiki'
4+
import { createHighlighter, createJavaScriptRegexEngine, createWasmOnigEngine } from 'shiki'
5+
import type { ReportItem } from '../scripts/report-engine-js-compat'
6+
7+
describe('engines', async () => {
8+
const js = createJavaScriptRegexEngine()
9+
const wasm = await createWasmOnigEngine(() => import('shiki/wasm'))
10+
11+
// Run `npx jiti scripts/report-engine-js-compat.ts` to generate the report first
12+
const report = await fs.readFile('../scripts/report-engine-js-compat.json', 'utf-8').then(JSON.parse) as ReportItem[]
13+
const langs = report.filter(i => i.highlightMatch === true).map(i => i.lang) as BundledLanguage[]
14+
const samples = await Promise.all(langs.map(lang => fs.readFile(`../tm-grammars-themes/samples/${lang}.sample`, 'utf-8')))
15+
16+
const shikiJs = await createHighlighter({
17+
langs,
18+
themes: ['vitesse-dark'],
19+
engine: js,
20+
})
21+
22+
const shikiWasm = await createHighlighter({
23+
langs,
24+
themes: ['vitesse-dark'],
25+
engine: wasm,
26+
})
27+
28+
bench('js', () => {
29+
for (const lang of langs) {
30+
shikiJs.codeToTokensBase(samples[langs.indexOf(lang)], { lang, theme: 'vitesse-dark' })
31+
}
32+
}, { warmupIterations: 10, iterations: 30 })
33+
34+
bench('wasm', () => {
35+
for (const lang of langs) {
36+
shikiWasm.codeToTokensBase(samples[langs.indexOf(lang)], { lang, theme: 'vitesse-dark' })
37+
}
38+
}, { warmupIterations: 10, iterations: 30 })
39+
})

‎package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"type": "module",
33
"version": "1.14.1",
44
"private": true,
5-
"packageManager": "pnpm@9.8.0",
5+
"packageManager": "pnpm@9.9.0",
66
"scripts": {
77
"lint": "eslint . --cache",
88
"release": "bumpp && pnpm -r publish",
@@ -46,6 +46,7 @@
4646
"mdast-util-gfm": "catalog:",
4747
"mdast-util-to-hast": "catalog:",
4848
"ofetch": "catalog:",
49+
"picocolors": "catalog:",
4950
"pnpm": "catalog:",
5051
"prettier": "catalog:",
5152
"rimraf": "catalog:",

‎packages/core/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
},
7171
"devDependencies": {
7272
"hast-util-to-html": "catalog:",
73+
"oniguruma-to-js": "catalog:",
7374
"vscode-oniguruma": "catalog:"
7475
}
7576
}
+133
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import { onigurumaToRegexp } from 'oniguruma-to-js'
2+
import type { PatternScanner, RegexEngine, RegexEngineString } from '../textmate'
3+
import type { JavaScriptRegexEngineOptions } from '../types/engines'
4+
5+
const MAX = 4294967295
6+
7+
export class JavaScriptScanner implements PatternScanner {
8+
regexps: (RegExp | null)[]
9+
10+
constructor(
11+
public patterns: string[],
12+
public cache: Map<string, RegExp | Error>,
13+
public forgiving: boolean,
14+
) {
15+
this.regexps = patterns.map((p) => {
16+
const cached = cache?.get(p)
17+
if (cached) {
18+
if (cached instanceof RegExp) {
19+
return cached
20+
}
21+
if (forgiving)
22+
return null
23+
throw cached
24+
}
25+
try {
26+
const regex = onigurumaToRegexp(
27+
p
28+
// YAML specific handling; TODO: move to tm-grammars
29+
.replaceAll('[^\\s[-?:,\\[\\]{}#&*!|>\'"%@`]]', '[^\\s\\-?:,\\[\\]{}#&*!|>\'"%@`]'),
30+
{ flags: 'dg' },
31+
)
32+
cache?.set(p, regex)
33+
return regex
34+
}
35+
catch (e) {
36+
cache?.set(p, e as Error)
37+
if (forgiving)
38+
return null
39+
// console.error({ ...e })
40+
throw e
41+
}
42+
})
43+
}
44+
45+
findNextMatchSync(string: string | RegexEngineString, startPosition: number) {
46+
const str = typeof string === 'string'
47+
? string
48+
: string.content
49+
const pending: [index: number, match: RegExpExecArray][] = []
50+
51+
function toResult(index: number, match: RegExpExecArray) {
52+
return {
53+
index,
54+
captureIndices: match.indices!.map((indice) => {
55+
if (indice == null) {
56+
return {
57+
end: MAX,
58+
start: MAX,
59+
length: 0,
60+
}
61+
}
62+
return {
63+
start: indice[0],
64+
length: indice[1] - indice[0],
65+
end: indice[1],
66+
}
67+
}),
68+
}
69+
}
70+
71+
for (let i = 0; i < this.regexps.length; i++) {
72+
const regexp = this.regexps[i]
73+
if (!regexp)
74+
continue
75+
try {
76+
regexp.lastIndex = startPosition
77+
const match = regexp.exec(str)
78+
if (!match)
79+
continue
80+
// If the match is at the start position, return it immediately
81+
if (match.index === startPosition) {
82+
return toResult(i, match)
83+
}
84+
// Otherwise, store it for later
85+
pending.push([i, match])
86+
}
87+
catch (e) {
88+
if (this.forgiving)
89+
continue
90+
throw e
91+
}
92+
}
93+
94+
// Find the closest match to the start position
95+
if (pending.length) {
96+
const minIndex = Math.min(...pending.map(m => m[1].index))
97+
for (const [i, match] of pending) {
98+
if (match.index === minIndex) {
99+
return toResult(i, match)
100+
}
101+
}
102+
}
103+
104+
return null
105+
}
106+
}
107+
108+
/**
109+
* Use the modern JavaScript RegExp engine to implement the OnigScanner.
110+
*
111+
* As Oniguruma regex is more powerful than JavaScript regex, some patterns may not be supported.
112+
* Errors will be thrown when parsing TextMate grammars with unsupported patterns.
113+
* Set `forgiving` to `true` to ignore these errors and skip the unsupported patterns.
114+
*
115+
* @experimental
116+
*/
117+
export function createJavaScriptRegexEngine(options: JavaScriptRegexEngineOptions = {}): RegexEngine {
118+
const {
119+
forgiving = false,
120+
cache = new Map(),
121+
} = options
122+
123+
return {
124+
createScanner(patterns: string[]) {
125+
return new JavaScriptScanner(patterns, cache, forgiving)
126+
},
127+
createString(s: string) {
128+
return {
129+
content: s,
130+
}
131+
},
132+
}
133+
}

‎packages/core/src/oniguruma/index.ts ‎packages/core/src/engines/oniguruma/index.ts

+43-48
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22
* Copyright (C) Microsoft Corporation. All rights reserved.
33
*-------------------------------------------------------- */
44

5-
import { ShikiError } from '../error'
6-
import type { IOnigBinding, IOnigCaptureIndex, IOnigMatch, OnigScanner as IOnigScanner, OnigString as IOnigString, Pointer } from './types'
5+
import { ShikiError } from '../../error'
6+
import type { LoadWasmOptions, WebAssemblyInstance, WebAssemblyInstantiator } from '../../types'
7+
import type { IOnigCaptureIndex, IOnigMatch, OnigScanner as IOnigScanner, OnigString as IOnigString } from '../../../vendor/vscode-textmate/src/main'
78
import createOnigasm from './onig'
89

10+
export type Instantiator = (importObject: Record<string, Record<string, WebAssembly.ImportValue>>) => Promise<WebAssembly.Exports>
11+
12+
export type Pointer = number
13+
914
export const enum FindOption {
1015
None = 0,
1116
/**
@@ -20,14 +25,25 @@ export const enum FindOption {
2025
* equivalent of ONIG_OPTION_NOT_BEGIN_POSITION: (start) isn't considered as start position of search (* fail \G)
2126
*/
2227
NotBeginPosition = 4,
23-
/**
24-
* used for debugging purposes.
25-
*/
26-
DebugCall = 8,
28+
}
29+
30+
export interface IOnigBinding {
31+
HEAPU8: Uint8Array
32+
HEAPU32: Uint32Array
33+
34+
UTF8ToString: (ptr: Pointer) => string
35+
36+
omalloc: (count: number) => Pointer
37+
ofree: (ptr: Pointer) => void
38+
getLastOnigError: () => Pointer
39+
createOnigScanner: (strPtrsPtr: Pointer, strLenPtr: Pointer, count: number) => Pointer
40+
freeOnigScanner: (ptr: Pointer) => void
41+
findNextOnigScannerMatch: (scanner: Pointer, strCacheId: number, strData: Pointer, strLength: number, position: number, options: number) => number
42+
// findNextOnigScannerMatchDbg: (scanner: Pointer, strCacheId: number, strData: Pointer, strLength: number, position: number, options: number) => number
2743
}
2844

2945
let onigBinding: IOnigBinding | null = null
30-
let defaultDebugCall = false
46+
// let defaultDebugCall = false
3147

3248
function throwLastOnigError(onigBinding: IOnigBinding): void {
3349
throw new ShikiError(onigBinding.UTF8ToString(onigBinding.getLastOnigError()))
@@ -294,34 +310,33 @@ export class OnigScanner implements IOnigScanner {
294310
public findNextMatchSync(string: string | OnigString, startPosition: number, debugCall: boolean): IOnigMatch | null
295311
public findNextMatchSync(string: string | OnigString, startPosition: number): IOnigMatch | null
296312
public findNextMatchSync(string: string | OnigString, startPosition: number, arg?: number | boolean): IOnigMatch | null {
297-
let debugCall = defaultDebugCall
313+
// let debugCall = defaultDebugCall
298314
let options = FindOption.None
299315
if (typeof arg === 'number') {
300-
if (arg & FindOption.DebugCall)
301-
debugCall = true
302-
316+
// if (arg & FindOption.DebugCall)
317+
// debugCall = true
303318
options = arg
304319
}
305320
else if (typeof arg === 'boolean') {
306-
debugCall = arg
321+
// debugCall = arg
307322
}
308323
if (typeof string === 'string') {
309324
string = new OnigString(string)
310-
const result = this._findNextMatchSync(string, startPosition, debugCall, options)
325+
const result = this._findNextMatchSync(string, startPosition, false, options)
311326
string.dispose()
312327
return result
313328
}
314-
return this._findNextMatchSync(string, startPosition, debugCall, options)
329+
return this._findNextMatchSync(string, startPosition, false, options)
315330
}
316331

317332
private _findNextMatchSync(string: OnigString, startPosition: number, debugCall: boolean, options: number): IOnigMatch | null {
318333
const onigBinding = this._onigBinding
319-
let resultPtr: Pointer
320-
if (debugCall)
321-
resultPtr = onigBinding.findNextOnigScannerMatchDbg(this._ptr, string.id, string.ptr, string.utf8Length, string.convertUtf16OffsetToUtf8(startPosition), options)
334+
// let resultPtr: Pointer
335+
// if (debugCall)
336+
// resultPtr = onigBinding.findNextOnigScannerMatchDbg(this._ptr, string.id, string.ptr, string.utf8Length, string.convertUtf16OffsetToUtf8(startPosition), options)
322337

323-
else
324-
resultPtr = onigBinding.findNextOnigScannerMatch(this._ptr, string.id, string.ptr, string.utf8Length, string.convertUtf16OffsetToUtf8(startPosition), options)
338+
// else
339+
const resultPtr = onigBinding.findNextOnigScannerMatch(this._ptr, string.id, string.ptr, string.utf8Length, string.convertUtf16OffsetToUtf8(startPosition), options)
325340

326341
if (resultPtr === 0) {
327342
// no match
@@ -348,17 +363,6 @@ export class OnigScanner implements IOnigScanner {
348363
}
349364
}
350365

351-
export interface WebAssemblyInstantiator {
352-
(importObject: Record<string, Record<string, WebAssembly.ImportValue>> | undefined): Promise<WebAssemblyInstance>
353-
}
354-
355-
export type WebAssemblyInstance = WebAssembly.WebAssemblyInstantiatedSource | WebAssembly.Instance | WebAssembly.Instance['exports']
356-
357-
export type OnigurumaLoadOptions =
358-
| { instantiator: WebAssemblyInstantiator }
359-
| { default: WebAssemblyInstantiator }
360-
| { data: ArrayBufferView | ArrayBuffer | Response }
361-
362366
function isInstantiatorOptionsObject(dataOrOptions: any): dataOrOptions is { instantiator: WebAssemblyInstantiator } {
363367
return (typeof dataOrOptions.instantiator === 'function')
364368
}
@@ -385,15 +389,6 @@ function isArrayBuffer(data: any): data is ArrayBuffer | ArrayBufferView {
385389

386390
let initPromise: Promise<void>
387391

388-
type Awaitable<T> = T | Promise<T>
389-
390-
export type LoadWasmOptionsPlain =
391-
| OnigurumaLoadOptions
392-
| WebAssemblyInstantiator
393-
| ArrayBufferView | ArrayBuffer | Response
394-
395-
export type LoadWasmOptions = Awaitable<LoadWasmOptionsPlain> | (() => Awaitable<LoadWasmOptionsPlain>)
396-
397392
export function loadWasm(options: LoadWasmOptions): Promise<void> {
398393
if (initPromise)
399394
return initPromise
@@ -461,14 +456,14 @@ function _makeResponseNonStreamingLoader(data: Response): WebAssemblyInstantiato
461456
}
462457
}
463458

464-
export function createOnigString(str: string) {
465-
return new OnigString(str)
466-
}
459+
// export function createOnigString(str: string) {
460+
// return new OnigString(str)
461+
// }
467462

468-
export function createOnigScanner(patterns: string[]) {
469-
return new OnigScanner(patterns)
470-
}
463+
// export function createOnigScanner(patterns: string[]) {
464+
// return new OnigScanner(patterns)
465+
// }
471466

472-
export function setDefaultDebugCall(_defaultDebugCall: boolean): void {
473-
defaultDebugCall = _defaultDebugCall
474-
}
467+
// export function setDefaultDebugCall(_defaultDebugCall: boolean): void {
468+
// defaultDebugCall = _defaultDebugCall
469+
// }

‎packages/core/src/oniguruma/onig.ts ‎packages/core/src/engines/oniguruma/onig.ts

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
1-
import type { IOnigBinding, Instantiator } from './types'
1+
import type { IOnigBinding, Instantiator } from '.'
2+
3+
function getHeapMax() {
4+
return 2147483648
5+
}
6+
7+
function _emscripten_get_now() {
8+
return typeof performance !== 'undefined' ? performance.now() : Date.now()
9+
}
10+
11+
const alignUp = (x: number, multiple: number) => x + ((multiple - (x % multiple)) % multiple)
212

313
export default async function main(init: Instantiator): Promise<IOnigBinding> {
414
let wasmMemory: any
@@ -12,15 +22,10 @@ export default async function main(init: Instantiator): Promise<IOnigBinding> {
1222
binding.HEAPU32 = new Uint32Array(buf)
1323
}
1424

15-
function _emscripten_get_now() {
16-
return typeof performance !== 'undefined' ? performance.now() : Date.now()
17-
}
1825
function _emscripten_memcpy_big(dest: number, src: number, num: number) {
1926
binding.HEAPU8.copyWithin(dest, src, src + num)
2027
}
21-
function getHeapMax() {
22-
return 2147483648
23-
}
28+
2429
function emscripten_realloc_buffer(size: number) {
2530
try {
2631
wasmMemory.grow((size - buffer.byteLength + 65535) >>> 16)
@@ -36,7 +41,6 @@ export default async function main(init: Instantiator): Promise<IOnigBinding> {
3641
if (requestedSize > maxHeapSize)
3742
return false
3843

39-
const alignUp = (x: number, multiple: number) => x + ((multiple - (x % multiple)) % multiple)
4044
for (let cutDown = 1; cutDown <= 4; cutDown *= 2) {
4145
let overGrownHeapSize = oldSize * (1 + 0.2 / cutDown)
4246
overGrownHeapSize = Math.min(overGrownHeapSize, requestedSize + 100663296)

‎packages/core/src/engines/wasm.ts

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import type { RegexEngine } from '../textmate'
2+
import type { LoadWasmOptions } from '../types'
3+
import { OnigScanner, OnigString, loadWasm } from './oniguruma'
4+
5+
export { loadWasm }
6+
7+
export async function createWasmOnigEngine(options?: LoadWasmOptions | null): Promise<RegexEngine> {
8+
if (options)
9+
await loadWasm(options)
10+
11+
return {
12+
createScanner(patterns) {
13+
return new OnigScanner(patterns)
14+
},
15+
createString(s) {
16+
return new OnigString(s)
17+
},
18+
}
19+
}

‎packages/core/src/index.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ export * from './bundle-factory'
33
export * from './utils'
44
export * from './types'
55

6-
export { loadWasm } from './oniguruma'
6+
export { createWasmOnigEngine, loadWasm } from './engines/wasm'
7+
export { createJavaScriptRegexEngine } from './engines/javascript'
8+
79
export { createShikiInternal, getShikiInternal, setDefaultWasmLoader } from './internal'
810
export { codeToTokensBase, tokenizeWithTheme } from './code-to-tokens-base'
911
export { codeToTokens } from './code-to-tokens'

‎packages/core/src/internal.ts

+3-14
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import type { HighlighterCoreOptions, LanguageInput, LanguageRegistration, MaybeGetter, ShikiInternal, SpecialLanguage, SpecialTheme, ThemeInput, ThemeRegistrationAny, ThemeRegistrationResolved } from './types'
2-
import type { LoadWasmOptions } from './oniguruma'
3-
import { createOnigScanner, createOnigString, loadWasm } from './oniguruma'
1+
import type { HighlighterCoreOptions, LanguageInput, LanguageRegistration, LoadWasmOptions, MaybeGetter, ShikiInternal, SpecialLanguage, SpecialTheme, ThemeInput, ThemeRegistrationAny, ThemeRegistrationResolved } from './types'
42
import { Registry } from './registry'
53
import { Resolver } from './resolver'
64
import { normalizeTheme } from './normalize'
75
import { isSpecialLang, isSpecialTheme } from './utils'
86
import { ShikiError } from './error'
7+
import { createWasmOnigEngine } from './engines/wasm'
98

109
let _defaultWasmLoader: LoadWasmOptions | undefined
1110
/**
@@ -40,26 +39,16 @@ export async function createShikiInternal(options: HighlighterCoreOptions = {}):
4039
)).flat()))
4140
}
4241

43-
const wasmLoader = options.loadWasm || _defaultWasmLoader
44-
4542
const [
4643
themes,
4744
langs,
4845
] = await Promise.all([
4946
Promise.all((options.themes || []).map(normalizeGetter)).then(r => r.map(normalizeTheme)),
5047
resolveLangs(options.langs || []),
51-
wasmLoader ? loadWasm(wasmLoader) : undefined,
5248
] as const)
5349

5450
const resolver = new Resolver(
55-
Promise.resolve({
56-
createOnigScanner(patterns) {
57-
return createOnigScanner(patterns)
58-
},
59-
createOnigString(s) {
60-
return createOnigString(s)
61-
},
62-
}),
51+
Promise.resolve(options.engine || createWasmOnigEngine(options.loadWasm || _defaultWasmLoader)),
6352
langs,
6453
)
6554

‎packages/core/src/oniguruma/types.ts

-61
This file was deleted.

‎packages/core/src/resolver.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import type { IOnigLib, RegistryOptions } from './textmate'
1+
import type { IOnigLib } from '../vendor/vscode-textmate/src/main'
2+
import type { RegexEngine, RegistryOptions } from './textmate'
23
import type { LanguageRegistration } from './types'
34

45
export class Resolver implements RegistryOptions {
@@ -8,8 +9,12 @@ export class Resolver implements RegistryOptions {
89

910
private readonly _onigLibPromise: Promise<IOnigLib>
1011

11-
constructor(onigLibPromise: Promise<IOnigLib>, langs: LanguageRegistration[]) {
12+
constructor(onigLibPromise: Promise<RegexEngine>, langs: LanguageRegistration[]) {
1213
this._onigLibPromise = onigLibPromise
14+
.then(engine => ({
15+
createOnigScanner: patterns => engine.createScanner(patterns),
16+
createOnigString: s => engine.createString(s),
17+
}))
1318
langs.forEach(i => this.addLanguage(i))
1419
}
1520

‎packages/core/src/textmate.ts

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
// We re-bundled vscode-textmate from source to have ESM support.
2+
import type { OnigScanner, OnigString } from '../vendor/vscode-textmate/src/main'
3+
24
// This file re-exports some runtime values we need.
35
export { Registry, INITIAL, StateStack } from '../vendor/vscode-textmate/src/main'
46
export { Theme } from '../vendor/vscode-textmate/src/theme'
@@ -7,8 +9,19 @@ export type {
79
IRawGrammar,
810
IGrammar,
911
IGrammarConfiguration,
10-
IOnigLib,
1112
RegistryOptions,
1213
} from '../vendor/vscode-textmate/src/main'
1314
export type { IRawThemeSetting } from '../vendor/vscode-textmate/src/theme'
1415
export * from './stack-element-metadata'
16+
17+
export interface PatternScanner extends OnigScanner {}
18+
19+
export interface RegexEngineString extends OnigString {}
20+
21+
/**
22+
* Engine for RegExp matching and scanning.
23+
*/
24+
export interface RegexEngine {
25+
createScanner: (patterns: string[]) => PatternScanner
26+
createString: (s: string) => RegexEngineString
27+
}

‎packages/core/src/transformer-decorations.ts

+31-31
Original file line numberDiff line numberDiff line change
@@ -62,29 +62,6 @@ export function transformerDecorations(): ShikiTransformer {
6262
return map.get(shiki.meta)!
6363
}
6464

65-
function verifyIntersections(items: ResolvedDecorationItem[]) {
66-
for (let i = 0; i < items.length; i++) {
67-
const foo = items[i]
68-
if (foo.start.offset > foo.end.offset)
69-
throw new ShikiError(`Invalid decoration range: ${JSON.stringify(foo.start)} - ${JSON.stringify(foo.end)}`)
70-
71-
for (let j = i + 1; j < items.length; j++) {
72-
const bar = items[j]
73-
const isFooHasBarStart = foo.start.offset < bar.start.offset && bar.start.offset < foo.end.offset
74-
const isFooHasBarEnd = foo.start.offset < bar.end.offset && bar.end.offset < foo.end.offset
75-
const isBarHasFooStart = bar.start.offset < foo.start.offset && foo.start.offset < bar.end.offset
76-
const isBarHasFooEnd = bar.start.offset < foo.end.offset && foo.end.offset < bar.end.offset
77-
if (isFooHasBarStart || isFooHasBarEnd || isBarHasFooStart || isBarHasFooEnd) {
78-
if (isFooHasBarEnd && isFooHasBarEnd)
79-
continue // nested
80-
if (isBarHasFooStart && isBarHasFooEnd)
81-
continue // nested
82-
throw new ShikiError(`Decorations ${JSON.stringify(foo.start)} and ${JSON.stringify(bar.start)} intersect.`)
83-
}
84-
}
85-
}
86-
}
87-
8865
return {
8966
name: 'shiki:decorations',
9067
tokens(tokens) {
@@ -111,14 +88,6 @@ export function transformerDecorations(): ShikiTransformer {
11188
let startIndex = -1
11289
let endIndex = -1
11390

114-
function stringify(el: ElementContent): string {
115-
if (el.type === 'text')
116-
return el.value
117-
if (el.type === 'element')
118-
return el.children.map(stringify).join('')
119-
return ''
120-
}
121-
12291
if (start === 0)
12392
startIndex = 0
12493
if (end === 0)
@@ -206,3 +175,34 @@ export function transformerDecorations(): ShikiTransformer {
206175
},
207176
}
208177
}
178+
179+
function verifyIntersections(items: ResolvedDecorationItem[]) {
180+
for (let i = 0; i < items.length; i++) {
181+
const foo = items[i]
182+
if (foo.start.offset > foo.end.offset)
183+
throw new ShikiError(`Invalid decoration range: ${JSON.stringify(foo.start)} - ${JSON.stringify(foo.end)}`)
184+
185+
for (let j = i + 1; j < items.length; j++) {
186+
const bar = items[j]
187+
const isFooHasBarStart = foo.start.offset < bar.start.offset && bar.start.offset < foo.end.offset
188+
const isFooHasBarEnd = foo.start.offset < bar.end.offset && bar.end.offset < foo.end.offset
189+
const isBarHasFooStart = bar.start.offset < foo.start.offset && foo.start.offset < bar.end.offset
190+
const isBarHasFooEnd = bar.start.offset < foo.end.offset && foo.end.offset < bar.end.offset
191+
if (isFooHasBarStart || isFooHasBarEnd || isBarHasFooStart || isBarHasFooEnd) {
192+
if (isFooHasBarEnd && isFooHasBarEnd)
193+
continue // nested
194+
if (isBarHasFooStart && isBarHasFooEnd)
195+
continue // nested
196+
throw new ShikiError(`Decorations ${JSON.stringify(foo.start)} and ${JSON.stringify(bar.start)} intersect.`)
197+
}
198+
}
199+
}
200+
}
201+
202+
function stringify(el: ElementContent): string {
203+
if (el.type === 'text')
204+
return el.value
205+
if (el.type === 'element')
206+
return el.children.map(stringify).join('')
207+
return ''
208+
}

‎packages/core/src/types/engines.ts

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
type Awaitable<T> = T | Promise<T>
2+
3+
export interface WebAssemblyInstantiator {
4+
(importObject: Record<string, Record<string, WebAssembly.ImportValue>> | undefined): Promise<WebAssemblyInstance>
5+
}
6+
7+
export type WebAssemblyInstance = WebAssembly.WebAssemblyInstantiatedSource | WebAssembly.Instance | WebAssembly.Instance['exports']
8+
9+
export type OnigurumaLoadOptions =
10+
| { instantiator: WebAssemblyInstantiator }
11+
| { default: WebAssemblyInstantiator }
12+
| { data: ArrayBufferView | ArrayBuffer | Response }
13+
14+
export type LoadWasmOptionsPlain =
15+
| OnigurumaLoadOptions
16+
| WebAssemblyInstantiator
17+
| ArrayBufferView | ArrayBuffer | Response
18+
19+
export type LoadWasmOptions = Awaitable<LoadWasmOptionsPlain> | (() => Awaitable<LoadWasmOptionsPlain>)
20+
21+
export interface JavaScriptRegexEngineOptions {
22+
/**
23+
* Whether to allow invalid regex patterns.
24+
*/
25+
forgiving?: boolean
26+
27+
/**
28+
* Cache for regex patterns.
29+
*/
30+
cache?: Map<string, RegExp | Error>
31+
}

‎packages/core/src/types/index.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,8 @@ export * from './transformers'
88
export * from './utils'
99
export * from './decorations'
1010

11-
export { WebAssemblyInstantiator } from '../oniguruma'
11+
export type {
12+
LoadWasmOptions,
13+
WebAssemblyInstantiator,
14+
WebAssemblyInstance,
15+
} from './engines'

‎packages/core/src/types/options.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import type { LoadWasmOptions } from '../oniguruma'
1+
import type { LoadWasmOptions } from '../types'
2+
import type { RegexEngine } from '../textmate'
23
import type { StringLiteralUnion } from './utils'
34
import type { LanguageInput, SpecialLanguage } from './langs'
45
import type { SpecialTheme, ThemeInput, ThemeRegistrationAny } from './themes'
@@ -29,9 +30,13 @@ export interface HighlighterCoreOptions {
2930
* @default true
3031
*/
3132
warnings?: boolean
33+
/**
34+
* Custom RegExp engine.
35+
*/
36+
engine?: RegexEngine | Promise<RegexEngine>
3237
}
3338

34-
export interface BundledHighlighterOptions<L extends string, T extends string> {
39+
export interface BundledHighlighterOptions<L extends string, T extends string> extends Pick<HighlighterCoreOptions, 'warnings' | 'engine'> {
3540
/**
3641
* Theme registation
3742
*

‎packages/core/src/wasm-inlined.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// @ts-expect-error this will be compiled to ArrayBuffer
22
import binary from 'vscode-oniguruma/release/onig.wasm'
3-
import type { WebAssemblyInstantiator } from './oniguruma'
3+
import type { WebAssemblyInstantiator } from './types'
44

55
export const wasmBinary = binary as ArrayBuffer
66

‎packages/rehype/test/core.test.ts

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ it('run with rehype-raw', async () => {
4848
],
4949
})
5050

51+
// eslint-disable-next-line unicorn/consistent-function-scoping
5152
const rehypeMetaString = () => (tree: Root) => {
5253
visit(tree, 'element', (node) => {
5354
if (node.tagName === 'code' && node.data?.meta) {

‎packages/shiki/test/engine-js/__records__/html-basic.json

+1,383
Large diffs are not rendered by default.

‎packages/shiki/test/engine-js/__records__/json-basic.json

+836
Large diffs are not rendered by default.

‎packages/shiki/test/engine-js/__records__/ts-basic.json

+816
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { wasmBinary } from '@shikijs/core/wasm-inlined'
3+
import type { RegexEngine } from '@shikijs/core/textmate'
4+
import type { LanguageRegistration, ThemeRegistration } from '../../src/core'
5+
import { createHighlighterCore, createJavaScriptRegexEngine, loadWasm } from '../../src/core'
6+
7+
import { OnigScanner, OnigString } from '../../../core/src/engines/oniguruma'
8+
import type { Instance } from './types'
9+
10+
await loadWasm({ instantiator: obj => WebAssembly.instantiate(wasmBinary, obj) })
11+
12+
function createWasmOnigLibWrapper(): RegexEngine & { instances: Instance[] } {
13+
const instances: Instance[] = []
14+
15+
return {
16+
instances,
17+
createScanner(patterns) {
18+
const scanner = new OnigScanner(patterns)
19+
const instance: Instance = {
20+
constractor: [patterns],
21+
executions: [],
22+
}
23+
instances.push(instance)
24+
return {
25+
findNextMatchSync(string: string | OnigString, startPosition: number) {
26+
const result = scanner.findNextMatchSync(string, startPosition)
27+
instance.executions.push({ args: [typeof string === 'string' ? string : string.content, startPosition], result })
28+
return result
29+
},
30+
}
31+
},
32+
createString(s) {
33+
return new OnigString(s)
34+
},
35+
}
36+
}
37+
38+
export interface Cases {
39+
name: string
40+
theme: () => Promise<{ default: ThemeRegistration }>
41+
lang: () => Promise<{ default: LanguageRegistration[] }>
42+
cases: string[]
43+
}
44+
45+
const cases: Cases[] = [
46+
{
47+
name: 'json-basic',
48+
theme: () => import('../../src/assets/themes/nord'),
49+
lang: () => import('../../src/assets/langs/json'),
50+
cases: [
51+
'{"foo":{"bar":1}}',
52+
'[undefined, null, true, false, 0, 1, 1.1, "foo", [], {}]',
53+
],
54+
},
55+
{
56+
name: 'html-basic',
57+
theme: () => import('../../src/assets/themes/nord'),
58+
lang: () => import('../../src/assets/langs/html'),
59+
cases: [
60+
'<div class="foo">bar</div>',
61+
'<!DOCTYPE html><html><head><title>foo</title></head><body>bar</body></html>',
62+
],
63+
},
64+
{
65+
name: 'ts-basic',
66+
theme: () => import('../../src/assets/themes/nord'),
67+
lang: () => import('../../src/assets/langs/typescript'),
68+
cases: [
69+
'const foo: string = "bar"',
70+
],
71+
},
72+
]
73+
74+
describe('cases', async () => {
75+
const resolved = await Promise.all(cases.map(async (c) => {
76+
const theme = await c.theme().then(r => r.default)
77+
const lang = await c.lang().then(r => r.default)
78+
return {
79+
theme,
80+
lang,
81+
c,
82+
}
83+
}))
84+
85+
for (const c of resolved) {
86+
it(c.c.name, async () => {
87+
const wasm = createWasmOnigLibWrapper()
88+
const native = createJavaScriptRegexEngine()
89+
90+
const shiki1 = await createHighlighterCore({
91+
langs: c.lang,
92+
themes: [c.theme],
93+
engine: wasm,
94+
})
95+
const shiki2 = await createHighlighterCore({
96+
langs: c.lang,
97+
themes: [c.theme],
98+
engine: native,
99+
})
100+
101+
const lang = c.lang[0].name
102+
const theme = c.theme.name!
103+
104+
const compare: [any, any][] = []
105+
106+
for (const code of c.c.cases) {
107+
compare.push([
108+
shiki1.codeToTokensBase(code, { lang, theme }),
109+
shiki2.codeToTokensBase(code, { lang, theme }),
110+
])
111+
}
112+
113+
await expect(JSON.stringify(wasm.instances, null, 2))
114+
.toMatchFileSnapshot(`./__records__/${c.c.name}.json`)
115+
116+
for (const [a, b] of compare) {
117+
expect.soft(a).toEqual(b)
118+
}
119+
})
120+
}
121+
})
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { createHighlighter, createJavaScriptRegexEngine } from '../../src'
3+
4+
describe('should', () => {
5+
it('works', async () => {
6+
const shiki = await createHighlighter({
7+
themes: ['vitesse-light'],
8+
langs: ['javascript'],
9+
engine: createJavaScriptRegexEngine(),
10+
})
11+
12+
expect(shiki.codeToHtml('console.log', { lang: 'js', theme: 'vitesse-light' }))
13+
.toMatchInlineSnapshot(`"<pre class="shiki vitesse-light" style="background-color:#ffffff;color:#393a34" tabindex="0"><code><span class="line"><span style="color:#B07D48">console</span><span style="color:#999999">.</span><span style="color:#B07D48">log</span></span></code></pre>"`)
14+
})
15+
16+
it('dynamic load theme and lang', async () => {
17+
const shiki = await createHighlighter({
18+
themes: ['vitesse-light'],
19+
langs: ['javascript', 'ts'],
20+
engine: createJavaScriptRegexEngine(),
21+
})
22+
23+
await shiki.loadLanguage('css')
24+
await shiki.loadTheme('min-dark')
25+
26+
expect(shiki.getLoadedLanguages())
27+
.toMatchInlineSnapshot(`
28+
[
29+
"javascript",
30+
"typescript",
31+
"css",
32+
"js",
33+
"ts",
34+
]
35+
`)
36+
expect(shiki.getLoadedThemes())
37+
.toMatchInlineSnapshot(`
38+
[
39+
"vitesse-light",
40+
"min-dark",
41+
]
42+
`)
43+
44+
expect(shiki.codeToHtml('@media foo { .bar { padding-left: 1px; }}', { lang: 'css', theme: 'min-dark' }))
45+
.toMatchInlineSnapshot(`"<pre class="shiki min-dark" style="background-color:#1f1f1f;color:#b392f0" tabindex="0"><code><span class="line"><span style="color:#B392F0">@media foo { .bar { </span><span style="color:#79B8FF">padding-left</span><span style="color:#F97583">:</span><span style="color:#F8F8F8"> 1px</span><span style="color:#B392F0">; }}</span></span></code></pre>"`)
46+
})
47+
})
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<pre class="shiki monokai" style="background-color:#272822;color:#F8F8F2" tabindex="0"><code><span class="line"><span style="color:#66D9EF;font-style:italic">type</span><span> </span><span style="color:#A6E22E;text-decoration:underline">Foo</span><span style="color:#F92672"> =</span><span style="color:#F8F8F2"> { bar</span><span style="color:#F92672">:</span><span style="color:#66D9EF;font-style:italic"> string</span><span style="color:#F8F8F2"> }</span></span></code></pre>
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import type { IOnigMatch } from '../../../core/vendor/vscode-textmate/src/main'
2+
3+
export interface Instance {
4+
constractor: [string[]]
5+
executions: Executions[]
6+
}
7+
8+
export interface Executions {
9+
args: [str: string, start: number]
10+
result: IOnigMatch | null
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { fileURLToPath } from 'node:url'
2+
import { basename } from 'node:path'
3+
import { promises as fs } from 'node:fs'
4+
import { describe, expect, it, onTestFailed } from 'vitest'
5+
import fg from 'fast-glob'
6+
import { JavaScriptScanner } from '../../../core/src/engines/javascript'
7+
import type { Instance } from './types'
8+
9+
const files = await fg('*.json', {
10+
cwd: fileURLToPath(new URL('./__records__', import.meta.url)),
11+
absolute: true,
12+
onlyFiles: true,
13+
})
14+
15+
const cache = new Map<string, RegExp | Error>()
16+
17+
for (const file of files) {
18+
// Some token positions are off in this record
19+
const name = basename(file, '.json')
20+
if (name === 'ts-basic')
21+
continue
22+
23+
describe(`record: ${name}`, async () => {
24+
const instances = JSON.parse(await fs.readFile(file, 'utf-8')) as Instance[]
25+
let i = 0
26+
for (const instance of instances) {
27+
describe(`instances ${i++}`, () => {
28+
const scanner = new JavaScriptScanner(instance.constractor[0], cache, false)
29+
let j = 0
30+
for (const execution of instance.executions) {
31+
it(`case ${j++}`, () => {
32+
onTestFailed(() => {
33+
console.error({
34+
patterns: scanner.patterns,
35+
regexps: scanner.regexps,
36+
})
37+
})
38+
const result = scanner.findNextMatchSync(...execution.args)
39+
expect(result).toEqual(execution.result)
40+
})
41+
}
42+
})
43+
}
44+
})
45+
}

‎packages/shiki/test/out/injections-side-effects-angular-after.html

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎packages/shiki/test/out/injections-side-effects-angular-ts-after.html

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎packages/shiki/test/themes.test.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ import type { ThemedToken } from '../src'
33
import { codeToHtml, codeToTokensBase, codeToTokensWithThemes } from '../src'
44
import { syncThemesTokenization } from '../../core/src/code-to-tokens-themes'
55

6-
describe('syncThemesTokenization', () => {
7-
function stringifyTokens(tokens: ThemedToken[][]) {
8-
return tokens.map(line => line.map(token => token.content).join(' ')).join('\n')
9-
}
6+
function stringifyTokens(tokens: ThemedToken[][]) {
7+
return tokens.map(line => line.map(token => token.content).join(' ')).join('\n')
8+
}
109

10+
describe('syncThemesTokenization', () => {
1111
it('two themes', async () => {
1212
const lines1 = await codeToTokensBase('console.log("hello")', { lang: 'js', theme: 'vitesse-dark', includeExplanation: true })
1313
const lines2 = await codeToTokensBase('console.log("hello")', { lang: 'js', theme: 'min-light', includeExplanation: true })

‎packages/shiki/test/wasm4.test.ts

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ it('wasm', async () => {
1111
const shiki = await createHighlighterCore({
1212
themes: [nord],
1313
langs: [js as any],
14+
// eslint-disable-next-line unicorn/consistent-function-scoping
1415
loadWasm: Promise.resolve().then(() => obj => WebAssembly.instantiate(wasmBinary, obj).then(r => r.instance)),
1516
})
1617

‎packages/vitepress-twoslash/src/renderer-floating-vue.ts

+23-23
Original file line numberDiff line numberDiff line change
@@ -47,29 +47,6 @@ export function rendererFloatingVue(options: TwoslashFloatingVueRendererOptions
4747
'theme': floatingVueTheme,
4848
}
4949

50-
function compose(parts: { token: Element | Text, popup: Element }): Element[] {
51-
return [
52-
{
53-
type: 'element',
54-
tagName: 'span',
55-
properties: {},
56-
children: [parts.token],
57-
},
58-
{
59-
type: 'element',
60-
tagName: 'template',
61-
properties: {
62-
'v-slot:popper': '{}',
63-
},
64-
content: {
65-
type: 'root',
66-
children: [vPre(parts.popup)],
67-
},
68-
children: [],
69-
},
70-
]
71-
}
72-
7350
const rich = rendererRich({
7451
classExtra: classCopyIgnore,
7552
...options,
@@ -194,3 +171,26 @@ function renderMarkdownInline(this: ShikiTransformerContextCommon, md: string, c
194171
return children[0].children
195172
return children
196173
}
174+
175+
function compose(parts: { token: Element | Text, popup: Element }): Element[] {
176+
return [
177+
{
178+
type: 'element',
179+
tagName: 'span',
180+
properties: {},
181+
children: [parts.token],
182+
},
183+
{
184+
type: 'element',
185+
tagName: 'template',
186+
properties: {
187+
'v-slot:popper': '{}',
188+
},
189+
content: {
190+
type: 'root',
191+
children: [vPre(parts.popup)],
192+
},
193+
children: [],
194+
},
195+
]
196+
}

‎pnpm-lock.yaml

+814-767
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎pnpm-workspace.yaml

+20-18
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ packages:
66
- docs
77
- packages/@shikijs/core/vendor/*
88
catalog:
9-
'@antfu/eslint-config': ^2.27.0
10-
'@antfu/ni': ^0.22.4
9+
'@antfu/eslint-config': ^3.0.0
10+
'@antfu/ni': ^0.23.0
1111
'@antfu/utils': ^0.7.10
1212
'@iconify-json/carbon': ^1.1.37
1313
'@iconify-json/codicon': ^1.1.51
@@ -24,22 +24,22 @@ catalog:
2424
'@types/hast': ^3.0.4
2525
'@types/markdown-it': ^14.1.2
2626
'@types/minimist': ^1.2.5
27-
'@types/node': ^22.5.0
28-
'@unocss/reset': ^0.62.2
27+
'@types/node': ^22.5.1
28+
'@unocss/reset': ^0.62.3
2929
'@vitest/coverage-v8': ^2.0.5
30-
'@vueuse/core': ^11.0.1
30+
'@vueuse/core': ^11.0.3
3131
ansi-sequence-parser: ^1.1.1
32-
bumpp: ^9.5.1
32+
bumpp: ^9.5.2
3333
chalk: ^5.3.0
34-
eslint: ^9.9.0
34+
eslint: ^9.9.1
3535
eslint-plugin-format: ^0.1.2
3636
esno: ^4.7.0
3737
fast-glob: ^3.3.2
3838
floating-vue: ^5.2.2
3939
fs-extra: ^11.2.0
4040
fuse.js: ^7.0.0
4141
hast-util-from-html: ^2.0.2
42-
hast-util-to-html: ^9.0.1
42+
hast-util-to-html: ^9.0.2
4343
hast-util-to-string: ^3.0.0
4444
jsonc-parser: ^3.3.1
4545
lint-staged: ^15.2.9
@@ -48,40 +48,42 @@ catalog:
4848
mdast-util-gfm: ^3.0.0
4949
mdast-util-to-hast: ^13.2.0
5050
minimist: ^1.2.8
51-
monaco-editor-core: ^0.50.0
51+
monaco-editor-core: ^0.51.0
5252
ofetch: ^1.3.4
53+
oniguruma-to-js: ^0.2.3
54+
picocolors: ^1.0.1
5355
pinia: ^2.2.2
54-
pnpm: ^9.8.0
56+
pnpm: ^9.9.0
5557
prettier: ^3.3.3
5658
rehype-raw: ^7.0.0
5759
rehype-stringify: ^10.0.0
5860
remark-parse: ^11.0.0
5961
remark-rehype: ^11.1.0
6062
rimraf: ^6.0.1
61-
rollup: ^4.21.0
63+
rollup: ^4.21.2
6264
rollup-plugin-copy: ^3.5.0
6365
rollup-plugin-dts: ^6.1.1
6466
rollup-plugin-esbuild: ^6.1.1
6567
rollup-plugin-typescript2: ^0.36.0
6668
shiki-legacy: npm:shiki@^0.14.7
6769
simple-git-hooks: ^2.11.1
68-
taze: ^0.16.6
69-
tm-grammars: ^1.17.3
70-
tm-themes: ^1.8.0
70+
taze: ^0.16.7
71+
tm-grammars: ^1.17.9
72+
tm-themes: ^1.8.1
7173
twoslash: ^0.2.9
7274
twoslash-vue: ^0.2.9
7375
typescript: ^5.5.4
7476
unbuild: ^2.0.0
7577
unified: ^11.0.5
7678
unist-util-visit: ^5.0.0
77-
unocss: ^0.62.2
79+
unocss: ^0.62.3
7880
unplugin-vue-components: ^0.27.4
7981
vite: ^5.4.2
8082
vite-tsconfig-paths: ^5.0.1
81-
vitepress: ^1.3.3
83+
vitepress: ^1.3.4
8284
vitepress-plugin-mermaid: ^2.0.16
8385
vitest: ^2.0.5
8486
vscode-oniguruma: ^1.7.0
8587
vue: ^3.4.38
86-
vue-tsc: ^2.0.29
87-
wrangler: ^3.72.1
88+
vue-tsc: ^2.1.2
89+
wrangler: ^3.73.0

‎scripts/report-engine-js-compat.md

+230
Large diffs are not rendered by default.

‎scripts/report-engine-js-compat.ts

+173
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
import fs from 'node:fs/promises'
2+
import process from 'node:process'
3+
import { bundledLanguages, createHighlighter, createJavaScriptRegexEngine } from 'shiki'
4+
import c from 'picocolors'
5+
import { version } from '../package.json'
6+
7+
const engine = createJavaScriptRegexEngine()
8+
9+
export interface ReportItem {
10+
lang: string
11+
highlightMatch: boolean | 'error'
12+
patternsParsable: number
13+
patternsFailed: [string, unknown][]
14+
highlightA?: string
15+
highlightB?: string
16+
}
17+
18+
async function run() {
19+
const report: ReportItem[] = []
20+
21+
for (const lang of Object.keys(bundledLanguages)) {
22+
const sample = await fs.readFile(`../tm-grammars-themes/samples/${lang}.sample`, 'utf-8')
23+
.catch(() => '')
24+
25+
if (!sample) {
26+
console.log(c.dim(`[${lang}] Sample not found`))
27+
continue
28+
}
29+
30+
let shiki = null
31+
const parsablePatterns: string[] = []
32+
const unparsablePatterns: [string, unknown][] = []
33+
34+
const shikiWasm = await createHighlighter({
35+
langs: [lang],
36+
themes: ['vitesse-dark'],
37+
})
38+
39+
const grammar = shikiWasm.getLanguage(lang) as any
40+
const patterns = getPatternsOfGrammar(grammar._grammar)
41+
let highlightMatch: boolean | 'error' = false
42+
43+
for (const pattern of patterns) {
44+
try {
45+
engine.createScanner([pattern])
46+
parsablePatterns.push(pattern)
47+
}
48+
catch (e: any) {
49+
unparsablePatterns.push([pattern, String(e.cause || e)])
50+
}
51+
}
52+
53+
const highlightA = shikiWasm.codeToHtml(sample, { lang, theme: 'vitesse-dark' })
54+
let highlightB: string | undefined
55+
56+
try {
57+
shiki = await createHighlighter({
58+
langs: [lang],
59+
themes: ['vitesse-dark'],
60+
engine,
61+
})
62+
63+
highlightB = shiki.codeToHtml(sample, { lang, theme: 'vitesse-dark' })
64+
65+
highlightMatch = highlightA === highlightB
66+
67+
if (!highlightMatch) {
68+
console.log(c.yellow(`[${lang}] Mismatch`))
69+
}
70+
else {
71+
console.log(c.green(`[${lang}] OK`))
72+
}
73+
}
74+
catch (e) {
75+
highlightMatch = 'error'
76+
console.log(c.red(`[${lang}] Error ${e}`))
77+
}
78+
finally {
79+
report.push({
80+
lang,
81+
highlightMatch,
82+
patternsParsable: parsablePatterns.length,
83+
patternsFailed: unparsablePatterns,
84+
...highlightMatch === true
85+
? {}
86+
: {
87+
highlightA,
88+
highlightB,
89+
},
90+
})
91+
92+
shikiWasm?.dispose()
93+
shiki?.dispose()
94+
}
95+
}
96+
97+
const order = [true, false, 'error']
98+
99+
report
100+
.sort((a, b) => {
101+
const aOrder = order.indexOf(a.highlightMatch)
102+
const bOrder = order.indexOf(b.highlightMatch)
103+
104+
if (aOrder !== bOrder)
105+
return aOrder - bOrder
106+
107+
return (a.patternsFailed.length - b.patternsFailed.length) || a.lang.localeCompare(b.lang)
108+
})
109+
110+
await fs.writeFile(
111+
new URL('./report-engine-js-compat.json', import.meta.url),
112+
JSON.stringify(report, null, 2),
113+
)
114+
115+
const table: readonly [string, string, string, string][] = [
116+
['Language', 'Highlight Match', 'Patterns Parsable', 'Patterns Failed'],
117+
['---', ':---', '---', '---'],
118+
...report
119+
.map(item => [
120+
item.lang,
121+
item.highlightMatch === true ? '✅ OK' : item.highlightMatch === 'error' ? '❌ Error' : '⚠️ Mismatch',
122+
item.patternsParsable === 0 ? '-' : item.patternsParsable.toString(),
123+
item.patternsFailed.length === 0 ? '-' : item.patternsFailed.length.toString(),
124+
] as [string, string, string, string]),
125+
]
126+
127+
const markdown = [
128+
'# Report: JavaScript RegExp Engine Compatibility',
129+
'',
130+
`> At ${new Date().toDateString()}`,
131+
'>',
132+
`> Version \`${version}\``,
133+
'>',
134+
`> Runtime: Node.js v${process.versions.node}`,
135+
'',
136+
'| Status | Number |',
137+
'| :--- | ---: |',
138+
`| Total Languages | ${report.length} |`,
139+
`| OK | ${report.filter(item => item.highlightMatch === true).length} |`,
140+
`| Mismatch | ${report.filter(item => item.highlightMatch === false).length} |`,
141+
`| Error | ${report.filter(item => item.highlightMatch === 'error').length} |`,
142+
'',
143+
table.map(row => `| ${row.join(' | ')} |`).join('\n'),
144+
].join('\n')
145+
await fs.writeFile(
146+
new URL('./report-engine-js-compat.md', import.meta.url),
147+
markdown,
148+
)
149+
}
150+
151+
function getPatternsOfGrammar(grammar: any) {
152+
const patterns = new Set<string>()
153+
154+
const scan = (obj: any) => {
155+
if (!obj)
156+
return
157+
if (typeof obj.match === 'string')
158+
patterns.add(obj.match)
159+
if (typeof obj.begin === 'string')
160+
patterns.add(obj.begin)
161+
if (typeof obj.end === 'string')
162+
patterns.add(obj.end)
163+
if (obj.patterns)
164+
obj.patterns.forEach(scan)
165+
Object.values(obj.repository || {}).forEach(scan)
166+
}
167+
168+
scan(grammar)
169+
170+
return patterns
171+
}
172+
173+
run()

0 commit comments

Comments
 (0)
Please sign in to comment.