Skip to content

Commit

Permalink
Add support for options.passThrough
Browse files Browse the repository at this point in the history
* Add support for options
* Add `passThrough` to pass through otherwise unknown nodes
  from the “malformed” tree to the well formed tree

Related to wooorm/xdm#17.
  • Loading branch information
wooorm committed Mar 5, 2021
1 parent bee5dd9 commit f64e56f
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 8 deletions.
59 changes: 53 additions & 6 deletions index.js
Expand Up @@ -2,6 +2,7 @@

var Parser = require('parse5/lib/parser')
var pos = require('unist-util-position')
var visit = require('unist-util-visit')
var fromParse5 = require('hast-util-from-parse5')
var toParse5 = require('hast-util-to-parse5')
var voids = require('html-void-elements')
Expand All @@ -19,12 +20,9 @@ var endTagToken = 'END_TAG_TOKEN'
var commentToken = 'COMMENT_TOKEN'
var doctypeToken = 'DOCTYPE_TOKEN'

var parseOptions = {
sourceCodeLocationInfo: true,
scriptingEnabled: false
}
var parseOptions = {sourceCodeLocationInfo: true, scriptingEnabled: false}

function wrap(tree, file) {
function wrap(tree, file, options) {
var parser = new Parser(parseOptions)
var one = zwitch('type', {
handlers: {
Expand All @@ -37,11 +35,32 @@ function wrap(tree, file) {
},
unknown: unknown
})
var stitches
var tokenizer
var preprocessor
var posTracker
var locationTracker
var result = fromParse5(documentMode(tree) ? document() : fragment(), file)
var result
var index

if (file && !('contents' in file)) {
options = file
file = undefined
}

if (options && options.passThrough) {
index = -1

while (++index < options.passThrough.length) {
one.handlers[options.passThrough[index]] = stitch
}
}

result = fromParse5(documentMode(tree) ? document() : fragment(), file)

if (stitches) {
visit(result, 'comment', mend)
}

// Unpack if possible and when not given a `root`.
if (tree.type !== 'root' && result.children.length === 1) {
Expand All @@ -50,6 +69,13 @@ function wrap(tree, file) {

return result

function mend(node, index, parent) {
if (node.value.stitch) {
parent.children[index] = node.value.stitch
return index
}
}

function fragment() {
var context = {
nodeName: 'template',
Expand Down Expand Up @@ -208,6 +234,27 @@ function wrap(tree, file) {
}
}

function stitch(node) {
var clone = Object.assign({}, node)

stitches = true

// Recurse, because to somewhat handle `[<x>]</x>` (where `[]` denotes the
// passed through node).
if (node.children) {
clone.children = wrap(
{type: 'root', children: node.children},
file,
options
).children
}

// Hack: `value` is supposed to be a string, but as none of the tools
// (`parse5` or `hast-util-from-parse5`) looks at it, we can pass nodes
// through.
comment({value: {stitch: clone}})
}

function resetTokenizer() {
// Reset tokenizer:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>.
Expand Down
1 change: 1 addition & 0 deletions package.json
Expand Up @@ -36,6 +36,7 @@
"html-void-elements": "^1.0.0",
"parse5": "^6.0.0",
"unist-util-position": "^3.0.0",
"unist-util-visit": "^2.0.0",
"vfile": "^4.0.0",
"web-namespaces": "^1.0.0",
"xtend": "^4.0.0",
Expand Down
9 changes: 8 additions & 1 deletion readme.md
Expand Up @@ -61,12 +61,19 @@ Yields:

## API

### `raw(tree[, file])`
### `raw(tree[, file][, options])`

Given a [**hast**][hast] [*tree*][tree] and an optional [vfile][] (for
[positional info][position-information]), return a new parsed-again
[**hast**][hast] [*tree*][tree].

###### `options.passThrough`

List of custom hast node types to pass through (keep) in hast
(`Array.<string>`, default: `[]`).
If the passed through nodes have children, those children are expected to be
hast and will be handled.

## Security

Use of `hast-util-raw` can open you up to a [cross-site scripting (XSS)][xss]
Expand Down
62 changes: 62 additions & 0 deletions test.js
Expand Up @@ -185,6 +185,68 @@ test('raw', function (t) {
'should not discard HTML broken over several raw nodes'
)

t.deepEqual(
raw(u('root', [u('custom', 'x')]), {passThrough: ['custom']}),
u('root', {data: {quirksMode: false}}, [u('custom', 'x')]),
'should support passing through nodes w/o children'
)

t.deepEqual(
raw(u('root', [u('custom', [u('raw', '<i>j</i>')])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [h('i', 'j')])]),
'should support passing through nodes w/ `raw` children'
)

t.deepEqual(
raw(u('root', [u('custom', [u('comment', 'x')])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [u('comment', 'x')])]),
'should support passing through nodes w/ `comment` children'
)

t.deepEqual(
raw(u('root', [u('custom', [])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [])]),
'should support passing through nodes w/ `0` children'
)

t.deepEqual(
raw(u('root', [u('custom', [u('raw', '<x')])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [])]),
'should support passing through nodes w/ broken raw children (1)'
)

t.deepEqual(
raw(u('root', [u('custom', [u('raw', '<x>')])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [h('x')])]),
'should support passing through nodes w/ broken raw children (2)'
)

t.deepEqual(
raw(u('root', [u('custom', [u('raw', '</x>')])]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [])]),
'should support passing through nodes w/ broken raw children (3)'
)

t.deepEqual(
raw(u('root', [u('custom', [u('raw', '<x>')]), u('raw', '</x>')]), {
passThrough: ['custom']
}),
u('root', {data: {quirksMode: false}}, [u('custom', [h('x')])]),
'should support passing through nodes w/ broken raw children (4)'
)

t.deepEqual(
raw(u('root', [u('raw', '<script>alert(1)</script>')])),
u('root', {data: {quirksMode: false}}, [
Expand Down
7 changes: 7 additions & 0 deletions types/hast-util-raw-test.ts
Expand Up @@ -6,7 +6,14 @@ raw({type: 'element', tagName: 'div', properties: {}, children: []}) // $ExpectT
// prettier-ignore
raw({type: 'element', tagName: 'div', properties: {}, children: []}, vFile('test')) // $ExpectType Node

raw({type: 'raw'}, {}) // $ExpectType Node
raw({type: 'raw'}, {passThrough: []}) // $ExpectType Node
raw({type: 'raw'}, {passThrough: ['x']}) // $ExpectType Node
raw({type: 'raw'}, vFile(), {}) // $ExpectType Node

raw() // $ExpectError
raw({}) // $ExpectError
// prettier-ignore
raw({type: 'element', tagName: 'div', properties: {}, children: []}, 'not a vFile') // $ExpectError
raw({type: 'raw'}, {x: 1}) // $ExpectError
raw({type: 'raw'}, {}, vFile()) // $ExpectError
15 changes: 14 additions & 1 deletion types/index.d.ts
Expand Up @@ -3,11 +3,24 @@
import {Node} from 'hast'
import {VFile} from 'vfile'

declare namespace raw {
interface Options {
/**
* List of custom hast node types to pass through (keep) in hast.
* If the passed through nodes have children, those children are expected to
* be hast and will be handled.
*/
passThrough?: string[]
}
}

/**
* Given a hast tree and an optional vfile (for positional info), return a new parsed-again hast tree.
* @param tree original hast tree
* @param file positional info
* @param options settings
*/
declare function raw(tree: Node, file?: VFile): Node
declare function raw(tree: Node, file?: VFile, options?: raw.Options): Node
declare function raw(tree: Node, options?: raw.Options): Node

export = raw

0 comments on commit f64e56f

Please sign in to comment.