Skip to content

Commit

Permalink
Move fork back into main project (inikulin#362)
Browse files Browse the repository at this point in the history
- Ported to TypeScript
- Switched to npm workspaces, in favour of `lerna`
- Switched to Jest as the test runner
- Switched to the `entities` module for decoding entities
  - A version of parse5's decoding logic was adapted for `entities`. Adopting this dependency allows us to share the maintenance with the `entities` & `htmlparser2` modules.
- Moved the docs back to TSDoc comments
- Switched to the state machine pattern of `htmlparser2` for tokenizer
- No more mixins: Merged location & error mixins into the main classes
- Introduced tag IDs, to avoid branching over large numbers of strings.
- Introduced Maps and Sets where appropriate
- Switched the order of the formatted elements list
- Introduced array helpers in a lot of places
- Updated tests to no longer build objects of tests (used `describe`/`it` constructs instead)

Co-authored-by: Titus <tituswormer@gmail.com>
Co-authored-by: 43081j <43081j@users.noreply.github.com>
  • Loading branch information
3 people authored and jmbpwtw committed Feb 16, 2023
1 parent 192f5e5 commit 0c35e95
Show file tree
Hide file tree
Showing 162 changed files with 21,064 additions and 33,262 deletions.
6 changes: 3 additions & 3 deletions .eslintignore
@@ -1,4 +1,4 @@
packages/parse5/lib/tokenizer/named-entity-data.js
test/benchmark/node_modules/**/*.js
test/memory_benchmark/node_modules/**/*.js
test/data/html5lib-tests
packages/*/dist/
test/dist/
node_modules
25 changes: 21 additions & 4 deletions .eslintrc.json
Expand Up @@ -2,7 +2,7 @@
"env": {
"es2020": true,
"node": true,
"mocha": true
"jest": true
},
"extends": ["eslint:recommended", "prettier", "plugin:unicorn/recommended"],
"rules": {
Expand All @@ -21,6 +21,7 @@
}
],
"prefer-template": "error",
"arrow-body-style": ["error", "as-needed"],

"unicorn/no-null": "off",
"unicorn/prevent-abbreviations": "off",
Expand All @@ -30,10 +31,26 @@
"unicorn/no-array-reduce": "off",
"unicorn/no-for-loop": "off",
"unicorn/consistent-destructuring": "off",
"unicorn/prefer-switch": ["error", { "emptyDefaultCase": "do-nothing-comment" }],
"unicorn/number-literal-case": "off"
"unicorn/prefer-switch": ["error", { "emptyDefaultCase": "do-nothing-comment" }]
},
"parserOptions": {
"sourceType": "module"
}
},
"overrides": [
{
"files": "*.ts",
"extends": [
"plugin:@typescript-eslint/eslint-recommended",
"plugin:@typescript-eslint/recommended",
"prettier"
],
"rules": {
"@typescript-eslint/no-non-null-assertion": "warn",
"@typescript-eslint/no-explicit-any": "warn",
"@typescript-eslint/explicit-function-return-type": "error",

"@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
}
}
]
}
2 changes: 1 addition & 1 deletion .gitattributes
@@ -1,3 +1,3 @@
# Exclude the HTML files from GitHub's language statistics
# https://github.com/github/linguist#using-gitattributes
test/data/* linguist-vendored
packages/test-utils/data/* linguist-vendored
5 changes: 3 additions & 2 deletions .gitignore
Expand Up @@ -4,6 +4,7 @@
node_modules
docs/build
docs/05_api_reference.md
package-lock.json
bench/package-lock.json
packages/*/dist/
test/dist/
.DS_Store
tsconfig.tsbuildinfo
2 changes: 1 addition & 1 deletion .gitmodules
@@ -1,3 +1,3 @@
[submodule "test/data/html5lib-tests"]
[submodule "html5lib-tests"]
path = test/data/html5lib-tests
url = https://github.com/HTMLParseErrorWG/html5lib-tests
4 changes: 4 additions & 0 deletions .husky/pre-commit
@@ -0,0 +1,4 @@
#!/bin/sh
. "$(dirname "$0")/_/husky.sh"

npm run pre-commit
4 changes: 3 additions & 1 deletion .prettierignore
@@ -1,2 +1,4 @@
packages/parse5/lib/tokenizer/named-entity-data.js
packages/*/dist/
test/dist/
docs
test/data/html5lib-tests
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -9,7 +9,7 @@
</p>

<p align="center">
<a href="https://travis-ci.org/inikulin/parse5"><img alt="Build Status" src="https://api.travis-ci.org/inikulin/parse5.svg"></a>
<a href="https://github.com/inikulin/parse5/actions/workflows/nodejs-test.yml"><img alt="Build Status" src="https://img.shields.io/github/workflow/status/inikulin/parse5/Node.js%20CI?label=tests&style=flat"></a>
<a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="http://img.shields.io/npm/dm/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="http://img.shields.io/npm/dt/parse5.svg"></a>
Expand Down
13 changes: 0 additions & 13 deletions bench/memory/named-entity-data.js

This file was deleted.

8 changes: 4 additions & 4 deletions bench/memory/sax-parser.js
@@ -1,8 +1,8 @@
import * as fs from 'node:fs';
import { readFile } from 'node:fs/promises';
import format from 'human-format';
import promisifyEvent from 'promisify-event';
import memwatch from '@airbnb/node-memwatch';
import SAXParser from '../../packages/parse5-sax-parser/lib/index.js';
import { SAXParser } from '../../packages/parse5-sax-parser/dist/index.js';

main();

Expand All @@ -15,7 +15,7 @@ async function main() {
let heapDiff = null;

memwatch.on('stats', (stats) => {
maxMemUsage = Math.max(maxMemUsage, stats['current_base']);
maxMemUsage = Math.max(maxMemUsage, stats.used_heap_size);
});

startDate = new Date();
Expand All @@ -35,7 +35,7 @@ async function main() {
}

async function parse() {
const data = fs.readFileSync('../test/data/huge-page/huge-page.html', 'utf8');
const data = await readFile(new URL('../../test/data/huge-page/huge-page.html', import.meta.url), 'utf8');
let parsedDataSize = 0;
const stream = new SAXParser();

Expand Down
5 changes: 3 additions & 2 deletions bench/package.json
@@ -1,15 +1,16 @@
{
"name": "parse5-benchmarks",
"private": "true",
"type": "module",
"version": "1.0.0",
"description": "parse5 regression benchmarks",
"author": "Ivan Nikulin <ifaaan@gmail.com>",
"license": "MIT",
"dependencies": {
"benchmark": "^2.1.4",
"human-format": "^0.7.0",
"human-format": "^0.11.0",
"@airbnb/node-memwatch": "^2.0.0",
"parse5": "*",
"parse5": "npm:parse5",
"promisify-event": "^1.0.0"
}
}
24 changes: 13 additions & 11 deletions bench/perf/index.js
@@ -1,10 +1,10 @@
import { readFileSync, createReadStream, readdirSync } from 'node:fs';
import Benchmark from 'benchmark';
import { loadTreeConstructionTestData } from '../../test/utils/generate-parsing-tests.js';
import { loadSAXParserTestData } from '../../test/utils/load-sax-parser-test-data.js';
import { treeAdapters, WritableStreamStub } from '../../test/utils/common.js';
import * as parse5 from '../../packages/parse5/lib/index.js';
import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/lib/index.js';
import { loadTreeConstructionTestData } from 'parse5-test-utils/dist/generate-parsing-tests.js';
import { loadSAXParserTestData } from 'parse5-test-utils/dist/load-sax-parser-test-data.js';
import { treeAdapters, WritableStreamStub } from 'parse5-test-utils/dist/common.js';
import * as parse5 from '../../packages/parse5/dist/index.js';
import { ParserStream as parse5Stream } from '../../packages/parse5-parser-stream/dist/index.js';
import * as parse5Upstream from 'parse5';

const hugePagePath = new URL('../../test/data/huge-page/huge-page.html', import.meta.url);
Expand Down Expand Up @@ -46,19 +46,21 @@ global.runMicro = function (parser) {
const pages = loadSAXParserTestData().map((test) => test.src);

global.runPages = function (parser) {
for (let j = 0; j < pages.length; j++) {
parser.parse(pages[j]);
for (const page of pages) {
parser.parse(page);
}
};

// Stream data
global.files = readdirSync(saxPath).map((dirName) => {
return new URL(`${dirName}/src.html`, saxPath).pathname;
});
global.files = readdirSync(saxPath).map((dirName) => new URL(`${dirName}/src.html`, saxPath).pathname);

// Utils
function getHz(suite, testName) {
return suite.find((t) => t.name === testName).hz;
for (let i = 0; i < suite.length; i++) {
if (suite[i].name === testName) {
return suite[i].hz;
}
}
}

function runBench({ name, workingCopyFn, upstreamFn, defer = false }) {
Expand Down
5 changes: 0 additions & 5 deletions lerna.json

This file was deleted.

0 comments on commit 0c35e95

Please sign in to comment.