Custom template tokenizers (#148)

* add support for custom template tokenizers * fix types * Make existing tests pass * merge tokens into existing tokenizer * add ast test * Write documentation for custom template tokenizers * forward tokenizer controls to custom tokenizer * document attributes used to control tokenizer * refactor template text tokenizing into separate method. * fix mock tokenizer token ranges * guard against empty text nodes * don't parse mustaches when template lang isn't html * test if tokenizer gets unprocessed mustaches * don't call the custom tokinzer on root text nodes if we are already processing a custom template lang * don't have empty tokens in custom tokenizer * add disclaimer for templateTokenizer option * prevent nested template parsing by checking if template is top level instead of maintaining a flag
vuejs · Apr 14, 2022 · 9448a78 · 9448a78
1 parent 62b6986
commit 9448a78
Show file tree

Hide file tree

Showing 12 changed files with 1,350 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -194,6 +194,29 @@ If set to `true`, to parse expressions in `v-bind` CSS functions inside `<style>
 
 See also to [here](https://github.com/vuejs/rfcs/blob/master/active-rfcs/0043-sfc-style-variables.md).
 
+### parserOptions.templateTokenizer
+
+You can use `parserOptions.templateTokenizer` property to specify custom tokenizers to parse `<template lang="...">` tags.
+
+For example to enable parsing of pug templates:
+
+```jsonc
+{
+    "parser": "vue-eslint-parser",
+    "parserOptions": {
+        "templateTokenizer": {
+             // template tokenizer for `<template lang="pug">`
+            "pug": "vue-eslint-parser-template-tokenizer-pug",
+        }
+    }
+}
+```
+
+This option is only intended for plugin developers. **Be careful** when using this option directly, as it may change behaviour of rules you might have enabled.  
+If you just want **pug** support, use [eslint-plugin-vue-pug](https://github.com/rashfael/eslint-plugin-vue-pug) instead, which uses this option internally.
+
+See [implementing-custom-template-tokenizers.md](./docs/implementing-custom-template-tokenizers.md) for information on creating your own template tokenizer.
+
 ## 🎇 Usage for custom rules / plugins
 
 - This parser provides `parserServices` to traverse `<template>`.

diff --git a/docs/implementing-custom-template-tokenizers.md b/docs/implementing-custom-template-tokenizers.md
@@ -0,0 +1,70 @@
+# Implementing custom template tokenizers
+
+A custom template tokenizer needs to create two types of tokens from the text it is given:
+
+- Low level [tokens](https://github.com/vuejs/vue-eslint-parser/blob/master/src/ast/tokens.ts), which can be of an [existing HTML type](https://github.com/vuejs/vue-eslint-parser/blob/master/src/html/tokenizer.ts#L59) or even new types.
+- Intermediate tokens, which **must** be of type `StartTag`, `EndTag`, `Text` or `Mustache` (see [IntermediateTokenizer](https://github.com/vuejs/vue-eslint-parser/blob/master/src/html/intermediate-tokenizer.ts#L33)).
+
+Token ranges and locations must count from the start of the document. To help with this, custom tokenizers are initialized with a starting line and column.
+
+## Interface
+
+```ts
+class CustomTokenizer {
+    /**
+     * The tokenized low level tokens, excluding comments.
+     */
+    tokens: Token[]
+     /**
+     * The tokenized low level comment tokens
+     */
+    comments: Token[]
+    errors: ParseError[]
+
+    /**
+     * Used to control tokenization of {{ expressions }}. If false, don't produce VExpressionStart/End tokens
+     */
+    expressionEnabled: boolean = true
+
+    /**
+     * The current namespace. Set and used by the parser. You probably can ignore this.
+     */
+    namespace: string = "http://www.w3.org/1999/xhtml"
+
+    /**
+     * The current tokenizer state. Set by the parser. You can probably ignore this.
+     */
+    state: string = "DATA"
+
+    /**
+     * The complete source code text. Used by the parser and set via the constructor.
+     */
+    text: string
+
+    /**
+     * Initialize this tokenizer.
+     * @param templateText The contents of the <template> tag.
+     * @param text The complete source code
+     * @param {startingLine, startingColumn} The starting location of the templateText. Your token positions need to include this offset.
+     */
+    constructor (templateText: string, text: string, { startingLine: number, startingColumn: number }) {
+        this.text = text
+    }
+
+    /**
+     * Get the next intermediate token.
+     * @returns The intermediate token or null.
+     */
+    nextToken (): IntermediateToken | null {
+
+    }
+}
+```
+
+## Behaviour
+
+When the html parser encounters a `<template lang="...">` tag that matches a configured custom tokenizer, it will initialize a new instance of this tokenizer with the contents of the template tag. It will then call the `nextToken` method of this tokenizer until it returns `null`. After having consumed all intermediate tokens it will copy the low level tokens, comments and errors from the tokenizer instance.
+
+## Examples
+
+For a working example, see [vue-eslint-parser-template-tokenizer-pug](https://github.com/rashfael/vue-eslint-parser-template-tokenizer-pug/).
diff --git a/src/common/parser-options.ts b/src/common/parser-options.ts
@@ -40,6 +40,8 @@ export interface ParserOptions {
 
     // others
     // [key: string]: any
+
+    templateTokenizer?: { [key: string]: string }
 }
 
 export function isSFCFile(parserOptions: ParserOptions) {

diff --git a/src/html/parser.ts b/src/html/parser.ts
@@ -15,6 +15,7 @@ import type {
     VDocumentFragment,
     VElement,
     VExpressionContainer,
+    VLiteral,
 } from "../ast"
 import { NS, ParseError } from "../ast"
 import { debug } from "../common/debug"
@@ -51,6 +52,8 @@ import {
     getScriptParser,
     getParserLangFromSFC,
 } from "../common/parser-options"
+import sortedIndexBy from "lodash/sortedIndexBy"
+import sortedLastIndexBy from "lodash/sortedLastIndexBy"
 
 const DIRECTIVE_NAME = /^(?:v-|[.:@#]).*[^.:@#]$/u
 const DT_DD = /^d[dt]$/u
@@ -474,6 +477,48 @@ export class Parser {
         }
     }
 
+    /**
+     * Process the given template text token with a configured template tokenizer, based on language.
+     * @param token The template text token to process.
+     * @param lang The template language the text token should be parsed as.
+     */
+    private processTemplateText(token: Text, lang: string): void {
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const TemplateTokenizer = require(this.baseParserOptions
+            .templateTokenizer![lang])
+        const templateTokenizer = new TemplateTokenizer(
+            token.value,
+            this.text,
+            {
+                startingLine: token.loc.start.line,
+                startingColumn: token.loc.start.column,
+            },
+        )
+
+        // override this.tokenizer to forward expressionEnabled and state changes
+        const rootTokenizer = this.tokenizer
+        this.tokenizer = templateTokenizer
+
+        let templateToken: IntermediateToken | null = null
+        while ((templateToken = templateTokenizer.nextToken()) != null) {
+            ;(this as any)[templateToken.type](templateToken)
+        }
+
+        this.tokenizer = rootTokenizer
+
+        const index = sortedIndexBy(
+            this.tokenizer.tokens,
+            token,
+            (x) => x.range[0],
+        )
+        const count =
+            sortedLastIndexBy(this.tokenizer.tokens, token, (x) => x.range[1]) -
+            index
+        this.tokenizer.tokens.splice(index, count, ...templateTokenizer.tokens)
+        this.tokenizer.comments.push(...templateTokenizer.comments)
+        this.tokenizer.errors.push(...templateTokenizer.errors)
+    }
+
     /**
      * Handle the start tag token.
      * @param token The token to handle.
@@ -575,11 +620,12 @@ export class Parser {
                 const lang = langAttr?.value?.value
 
                 if (elementName === "template") {
+                    this.expressionEnabled = true
                     if (lang && lang !== "html") {
                         // It is not an HTML template.
                         this.tokenizer.state = "RAWTEXT"
+                        this.expressionEnabled = false
                     }
-                    this.expressionEnabled = true
                 } else if (this.isSFC) {
                     // Element is Custom Block. e.g. <i18n>
                     // Referred to the Vue parser. See https://github.com/vuejs/vue-next/blob/cbaa3805064cb581fc2007cf63774c91d39844fe/packages/compiler-sfc/src/parse.ts#L127
@@ -639,8 +685,26 @@ export class Parser {
      */
     protected Text(token: Text): void {
         debug("[html] Text %j", token)
-
         const parent = this.currentNode
+        if (
+            token.value &&
+            parent.type === "VElement" &&
+            parent.name === "template" &&
+            parent.parent.type === "VDocumentFragment"
+        ) {
+            const langAttribute = parent.startTag.attributes.find(
+                (a) => a.key.name === "lang",
+            )
+            const lang = (langAttribute?.value as VLiteral)?.value
+            if (
+                lang &&
+                lang !== "html" &&
+                this.baseParserOptions.templateTokenizer?.[lang]
+            ) {
+                this.processTemplateText(token, lang)
+                return
+            }
+        }
         parent.children.push({
             type: "VText",
             range: token.range,

diff --git a/src/index.ts b/src/index.ts
@@ -108,13 +108,14 @@ function parseAsSFC(code: string, options: ParserOptions) {
     const scripts = rootAST.children.filter(isScriptElement)
     const template = rootAST.children.find(isTemplateElement)
     const templateLang = getLang(template) || "html"
+    const hasTemplateTokenizer = options?.templateTokenizer?.[templateLang]
     const concreteInfo: AST.HasConcreteInfo = {
         tokens: rootAST.tokens,
         comments: rootAST.comments,
         errors: rootAST.errors,
     }
     const templateBody =
-        template != null && templateLang === "html"
+        template != null && (templateLang === "html" || hasTemplateTokenizer)
             ? Object.assign(template, concreteInfo)
             : undefined
 

diff --git a/test/ast.js b/test/ast.js
@@ -186,6 +186,16 @@ describe("Template AST", () => {
         const services = fs.existsSync(servicesPath)
             ? JSON.parse(fs.readFileSync(servicesPath, "utf8"))
             : null
+        if (parserOptions.templateTokenizer) {
+            parserOptions.templateTokenizer = Object.fromEntries(
+                Object.entries(parserOptions.templateTokenizer).map(
+                    ([key, value]) => [
+                        key,
+                        path.resolve(__dirname, "../", value),
+                    ],
+                ),
+            )
+        }
         const options = Object.assign(
             { filePath: sourcePath },
             PARSER_OPTIONS,