diff --git a/.gitignore b/.gitignore index d13b004..d561b7b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ package-lock.json .nyc_output build/ coverage +.vscode diff --git a/README.md b/README.md index e0fa9b5..b567cb1 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ Behold my latest inator! The `linkinator` provides an API and CLI for crawling w - 🔥Scan any element that includes links, not just `` - 🔥Supports redirects, absolute links, relative links, all the things - 🔥Configure specific regex patterns to skip +- 🔥Scan markdown files without transpilation ## Installation @@ -58,6 +59,9 @@ $ linkinator LOCATION [ --arguments ] --timeout Request timeout in ms. Defaults to 0 (no timeout). + + --markdown + Automatically parse and scan markdown if scanning from a location on disk. --help Show this command. @@ -101,6 +105,12 @@ Maybe you're going to pipe the output to another program. Use the `--format` op $ linkinator ./docs --format CSV ``` +Let's make sure the `README.md` in our repo doesn't have any busted links: + +```sh +$ linkinator ./README.md +``` + ### Configuration file You can pass options directly to the `linkinator` CLI, or you can define a config file. By default, `linkinator` will look for a `linkinator.config.json` file in the current working directory. @@ -113,6 +123,7 @@ All options are optional. It should look like this: "silent": true, "concurrency": 100, "timeout": 0, + "markdown": true, "skip": "www.googleapis.com" } ``` @@ -132,6 +143,7 @@ Asynchronous method that runs a site wide scan. Options come in the form of an o - `port` (number) - When the `path` is provided as a local path on disk, the `port` on which to start the temporary web server. Defaults to a random high range order port. - `recurse` (boolean) - By default, all scans are shallow. Only the top level links on the requested page will be scanned. By setting `recurse` to `true`, the crawler will follow all links on the page, and continue scanning links **on the same domain** for as long as it can go. Results are cached, so no worries about loops. - `timeout` (number) - By default, requests made by linkinator do not time out (or follow the settings of the OS). This option (in milliseconds) will fail requests after the configured amount of time. +- `markdown` (boolean) - Automatically parse and scan markdown if scanning from a location on disk. - `linksToSkip` (array | function) - An array of regular expression strings that should be skipped, OR an async function that's called for each link with the link URL as its only argument. Return a Promise that resolves to `true` to skip the link or `false` to check it. #### linkinator.LinkChecker() diff --git a/package.json b/package.json index 1deab20..8974fc4 100644 --- a/package.json +++ b/package.json @@ -21,23 +21,23 @@ "dependencies": { "chalk": "^4.0.0", "cheerio": "^1.0.0-rc.2", - "finalhandler": "^1.1.2", + "express": "^4.17.1", "gaxios": "^4.0.0", "jsonexport": "^3.0.0", + "marked": "^1.2.5", "meow": "^8.0.0", "p-queue": "^6.2.1", - "serve-static": "^1.14.1", "server-destroy": "^1.0.1", "update-notifier": "^5.0.0" }, "devDependencies": { "@types/chai": "^4.2.7", "@types/cheerio": "^0.22.10", - "@types/finalhandler": "^1.1.0", + "@types/express": "^4.17.9", + "@types/marked": "^1.2.0", "@types/meow": "^5.0.0", "@types/mocha": "^8.0.0", "@types/node": "^12.7.12", - "@types/serve-static": "^1.13.3", "@types/server-destroy": "^1.0.0", "@types/sinon": "^9.0.0", "@types/update-notifier": "^5.0.0", diff --git a/src/cli.ts b/src/cli.ts index 50e2fad..ff37ab8 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -22,7 +22,7 @@ const cli = meow( Positional arguments LOCATION - Required. Either the URL or the path on disk to check for broken links. + Required. Either the URLs or the paths on disk to check for broken links. Flags --config @@ -46,6 +46,9 @@ const cli = meow( --timeout Request timeout in ms. Defaults to 0 (no timeout). + --markdown + Automatically parse and scan markdown if scanning from a location on disk. + --help Show this command. @@ -65,6 +68,7 @@ const cli = meow( format: {type: 'string', alias: 'f'}, silent: {type: 'boolean'}, timeout: {type: 'number'}, + markdown: {type: 'boolean'}, }, booleanDefault: undefined, } @@ -115,6 +119,7 @@ async function main() { path: cli.input[0], recurse: flags.recurse, timeout: Number(flags.timeout), + markdown: flags.markdown, concurrency: Number(flags.concurrency), }; if (flags.skip) { diff --git a/src/config.ts b/src/config.ts index ad380d2..65972c6 100644 --- a/src/config.ts +++ b/src/config.ts @@ -11,6 +11,7 @@ export interface Flags { format?: string; silent?: boolean; timeout?: number; + markdown?: boolean; } export async function getConfig(flags: Flags) { diff --git a/src/index.ts b/src/index.ts index 3fafc1a..39f014b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,14 +2,19 @@ import {EventEmitter} from 'events'; import * as gaxios from 'gaxios'; import * as http from 'http'; import enableDestroy = require('server-destroy'); +import * as express from 'express'; +import * as fs from 'fs'; +import * as util from 'util'; +import * as path from 'path'; +import * as marked from 'marked'; import PQueue, {DefaultAddOptions} from 'p-queue'; import {getLinks} from './links'; import {URL} from 'url'; import PriorityQueue from 'p-queue/dist/priority-queue'; -import finalhandler = require('finalhandler'); -import serveStatic = require('serve-static'); +const stat = util.promisify(fs.stat); +const readFile = util.promisify(fs.readFile); export interface CheckOptions { concurrency?: number; @@ -17,6 +22,7 @@ export interface CheckOptions { path: string; recurse?: boolean; timeout?: number; + markdown?: boolean; linksToSkip?: string[] | ((link: string) => Promise); } @@ -59,12 +65,27 @@ export class LinkChecker extends EventEmitter { */ async check(options: CheckOptions) { options.linksToSkip = options.linksToSkip || []; + options.path = path.normalize(options.path); let server: http.Server | undefined; if (!options.path.startsWith('http')) { + let localDirectory = options.path; + let localFile = ''; + const s = await stat(options.path); + if (s.isFile()) { + const pathParts = options.path.split(path.sep); + localFile = path.sep + pathParts[pathParts.length - 1]; + localDirectory = pathParts + .slice(0, pathParts.length - 1) + .join(path.sep); + } const port = options.port || 5000 + Math.round(Math.random() * 1000); - server = await this.startWebServer(options.path, port); + server = await this.startWebServer( + localDirectory, + port, + options.markdown + ); enableDestroy(server); - options.path = `http://localhost:${port}`; + options.path = `http://localhost:${port}${localFile}`; } const queue = new PQueue({ @@ -101,19 +122,35 @@ export class LinkChecker extends EventEmitter { * Spin up a local HTTP server to serve static requests from disk * @param root The local path that should be mounted as a static web server * @param port The port on which to start the local web server + * @param markdown If markdown should be automatically compiled and served * @private * @returns Promise that resolves with the instance of the HTTP server */ - private startWebServer(root: string, port: number): Promise { - return new Promise((resolve, reject) => { - const serve = serveStatic(root); - const server = http - .createServer((req, res) => - serve(req, res, finalhandler(req, res) as () => void) - ) - .listen(port, () => resolve(server)) - .on('error', reject); + private async startWebServer(root: string, port: number, markdown?: boolean) { + const app = express() + .use(async (req, res, next) => { + if (!markdown) { + return next(); + } + const pathParts = req.path.split('/').filter(x => !!x); + if (pathParts.length === 0) { + return next(); + } + const ext = path.extname(pathParts[pathParts.length - 1]); + if (ext.toLowerCase() === '.md') { + const filePath = path.join(path.resolve(root), req.path); + const data = await readFile(filePath, {encoding: 'utf-8'}); + const result = marked(data, {gfm: true}); + res.send(result).end(); + return; + } + return next(); + }) + .use(express.static(path.resolve(root))); + const server = await new Promise(resolve => { + const s = app.listen(port, () => resolve(s)); }); + return server; } /** diff --git a/test/fixtures/markdown/LICENSE.md b/test/fixtures/markdown/LICENSE.md new file mode 100644 index 0000000..3336d55 --- /dev/null +++ b/test/fixtures/markdown/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) Justin Beckwith (jbeckwith.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/test/fixtures/markdown/README.md b/test/fixtures/markdown/README.md new file mode 100644 index 0000000..0d0f573 --- /dev/null +++ b/test/fixtures/markdown/README.md @@ -0,0 +1,6 @@ +# Say hello to my README +This has [a link](LICENSE.md) to something. + +Also here is my cat. +![booboobadkitteh](boo.jpg) + diff --git a/test/fixtures/markdown/boo.jpg b/test/fixtures/markdown/boo.jpg new file mode 100644 index 0000000..50c1943 Binary files /dev/null and b/test/fixtures/markdown/boo.jpg differ diff --git a/test/test.ts b/test/test.ts index c17bff3..1a15539 100644 --- a/test/test.ts +++ b/test/test.ts @@ -280,4 +280,13 @@ describe('linkinator', () => { }); assert.ok(!results.passed); }); + + it('should handle markdown', async () => { + const results = await check({ + path: 'test/fixtures/markdown/README.md', + markdown: true, + }); + assert.strictEqual(results.links.length, 3); + assert.ok(results.passed); + }); }); diff --git a/tsconfig.json b/tsconfig.json index 38f57e2..e2c31a6 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,7 +7,6 @@ }, "include": [ "src/*.ts", - "test/*.ts", - "system-test/*.ts" + "test/*.ts" ] }