From bae9d3899f861b1db6af3f4d16fdda3d5e80cbcb Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Sun, 29 Nov 2020 11:06:24 -0800 Subject: [PATCH] feat: add --server-root flag (#191) --- README.md | 43 ++++++++++++++++++++++---------------- src/cli.ts | 33 +++++++++++++++++------------ src/config.ts | 1 + src/index.ts | 58 ++++++++++++++++++++++++++++++++++++++++----------- test/test.ts | 20 ++++++++++++++++++ test/zcli.ts | 11 ++++++++++ 6 files changed, 123 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 391de9e..102feba 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,11 @@ > A super simple site crawler and broken link checker. [![npm version](https://img.shields.io/npm/v/linkinator.svg)](https://www.npmjs.org/package/linkinator) -[![Build Status](https://api.cirrus-ci.com/github/JustinBeckwith/linkinator.svg)](https://cirrus-ci.com/github/JustinBeckwith/linkinator) +[![Build Status](https://github.com/JustinBeckwith/linkinator/workflows/ci/badge.svg)](https://github.com/JustinBeckwith/linkinator/actions) [![codecov](https://codecov.io/gh/JustinBeckwith/linkinator/branch/master/graph/badge.svg)](https://codecov.io/gh/JustinBeckwith/linkinator) [![Dependency Status](https://img.shields.io/david/JustinBeckwith/linkinator.svg)](https://david-dm.org/JustinBeckwith/linkinator) [![Known Vulnerabilities](https://snyk.io/test/github/JustinBeckwith/linkinator/badge.svg)](https://snyk.io/test/github/JustinBeckwith/linkinator) +[![Code Style: Google](https://img.shields.io/badge/code%20style-google-blueviolet.svg)](https://github.com/google/gts) [![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg)](https://github.com/semantic-release/semantic-release) @@ -26,7 +27,7 @@ $ npm install linkinator You can use this as a library, or as a CLI. Let's see the CLI! -```sh +``` $ linkinator LOCATION [ --arguments ] Positional arguments @@ -36,35 +37,39 @@ $ linkinator LOCATION [ --arguments ] Flags - --config - Path to the config file to use. Looks for `linkinator.config.json` by default. - --concurrency The number of connections to make simultaneously. Defaults to 100. - --recurse, -r - Recursively follow links on the same root domain. - - --skip, -s - List of urls in regexy form to not include in the check. + --config + Path to the config file to use. Looks for `linkinator.config.json` by default. + + --format, -f + Return the data in CSV or JSON format. + + --help + Show this command. --include, -i List of urls in regexy form to include. The opposite of --skip. - --format, -f - Return the data in CSV or JSON format. + --markdown + Automatically parse and scan markdown if scanning from a location on disk. + + --recurse, -r + Recursively follow links on the same root domain. + + --server-root + When scanning a locally directory, customize the location on disk + where the server is started. Defaults to the path passed in [LOCATION]. --silent Only output broken links. + --skip, -s + List of urls in regexy form to not include in the check. + --timeout Request timeout in ms. Defaults to 0 (no timeout). - - --markdown - Automatically parse and scan markdown if scanning from a location on disk. - - --help - Show this command. ``` ### Command Examples @@ -142,6 +147,8 @@ Asynchronous method that runs a site wide scan. Options come in the form of an o - `concurrency` (number) - The number of connections to make simultaneously. Defaults to 100. - `port` (number) - When the `path` is provided as a local path on disk, the `port` on which to start the temporary web server. Defaults to a random high range order port. - `recurse` (boolean) - By default, all scans are shallow. Only the top level links on the requested page will be scanned. By setting `recurse` to `true`, the crawler will follow all links on the page, and continue scanning links **on the same domain** for as long as it can go. Results are cached, so no worries about loops. +- `serverRoot` (string) - When scanning a locally directory, customize the location on disk +where the server is started. Defaults to the path passed in `path`. - `timeout` (number) - By default, requests made by linkinator do not time out (or follow the settings of the OS). This option (in milliseconds) will fail requests after the configured amount of time. - `markdown` (boolean) - Automatically parse and scan markdown if scanning from a location on disk. - `linksToSkip` (array | function) - An array of regular expression strings that should be skipped, OR an async function that's called for each link with the link URL as its only argument. Return a Promise that resolves to `true` to skip the link or `false` to check it. diff --git a/src/cli.ts b/src/cli.ts index ff37ab8..960ddf5 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -25,33 +25,38 @@ const cli = meow( Required. Either the URLs or the paths on disk to check for broken links. Flags + + --concurrency + The number of connections to make simultaneously. Defaults to 100. + --config Path to the config file to use. Looks for \`linkinator.config.json\` by default. - --concurrency - The number of connections to make simultaneously. Defaults to 100. + --format, -f + Return the data in CSV or JSON format. + + --help + Show this command. + + --markdown + Automatically parse and scan markdown if scanning from a location on disk. --recurse, -r Recursively follow links on the same root domain. - --skip, -s - List of urls in regexy form to not include in the check. - - --format, -f - Return the data in CSV or JSON format. + --server-root + When scanning a locally directory, customize the location on disk + where the server is started. Defaults to the path passed in [LOCATION]. --silent Only output broken links + --skip, -s + List of urls in regexy form to not include in the check. + --timeout Request timeout in ms. Defaults to 0 (no timeout). - --markdown - Automatically parse and scan markdown if scanning from a location on disk. - - --help - Show this command. - Examples $ linkinator docs/ $ linkinator https://www.google.com @@ -69,6 +74,7 @@ const cli = meow( silent: {type: 'boolean'}, timeout: {type: 'number'}, markdown: {type: 'boolean'}, + serverRoot: {type: 'string'}, }, booleanDefault: undefined, } @@ -121,6 +127,7 @@ async function main() { timeout: Number(flags.timeout), markdown: flags.markdown, concurrency: Number(flags.concurrency), + serverRoot: flags.serverRoot, }; if (flags.skip) { if (typeof flags.skip === 'string') { diff --git a/src/config.ts b/src/config.ts index 65972c6..9de62c9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,6 +12,7 @@ export interface Flags { silent?: boolean; timeout?: number; markdown?: boolean; + serverRoot?: string; } export async function getConfig(flags: Flags) { diff --git a/src/index.ts b/src/index.ts index 39f014b..11863a7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,6 +24,7 @@ export interface CheckOptions { timeout?: number; markdown?: boolean; linksToSkip?: string[] | ((link: string) => Promise); + serverRoot?: string; } export enum LinkState { @@ -64,28 +65,20 @@ export class LinkChecker extends EventEmitter { * @param options Options to use while checking for 404s */ async check(options: CheckOptions) { + this.validateOptions(options); options.linksToSkip = options.linksToSkip || []; options.path = path.normalize(options.path); let server: http.Server | undefined; if (!options.path.startsWith('http')) { - let localDirectory = options.path; - let localFile = ''; - const s = await stat(options.path); - if (s.isFile()) { - const pathParts = options.path.split(path.sep); - localFile = path.sep + pathParts[pathParts.length - 1]; - localDirectory = pathParts - .slice(0, pathParts.length - 1) - .join(path.sep); - } + const serverOptions = await this.getServerRoot(options); const port = options.port || 5000 + Math.round(Math.random() * 1000); server = await this.startWebServer( - localDirectory, + serverOptions.serverRoot, port, options.markdown ); enableDestroy(server); - options.path = `http://localhost:${port}${localFile}`; + options.path = `http://localhost:${port}${serverOptions.path}`; } const queue = new PQueue({ @@ -118,6 +111,47 @@ export class LinkChecker extends EventEmitter { return result; } + /** + * Validate the provided flags all work with each other. + * @param options CheckOptions passed in from the CLI (or API) + */ + private validateOptions(options: CheckOptions) { + if (options.serverRoot && options.path.startsWith('http')) { + throw new Error( + "'serverRoot' cannot be defined when the 'path' points to an HTTP endpoint." + ); + } + } + + /** + * Figure out which directory should be used as the root for the web server, + * and how that impacts the path to the file for the first request. + * @param options CheckOptions passed in from the CLI or API + */ + private async getServerRoot(options: CheckOptions) { + if (options.serverRoot) { + const filePath = options.path.startsWith('/') + ? options.path + : '/' + options.path; + return { + serverRoot: options.serverRoot, + path: filePath, + }; + } + let localDirectory = options.path; + let localFile = ''; + const s = await stat(options.path); + if (s.isFile()) { + const pathParts = options.path.split(path.sep); + localFile = path.sep + pathParts[pathParts.length - 1]; + localDirectory = pathParts.slice(0, pathParts.length - 1).join(path.sep); + } + return { + serverRoot: localDirectory, + path: localFile, + }; + } + /** * Spin up a local HTTP server to serve static requests from disk * @param root The local path that should be mounted as a static web server diff --git a/test/test.ts b/test/test.ts index 1a15539..e5df56d 100644 --- a/test/test.ts +++ b/test/test.ts @@ -289,4 +289,24 @@ describe('linkinator', () => { assert.strictEqual(results.links.length, 3); assert.ok(results.passed); }); + + it('should throw an error if you pass server-root and an http based path', async () => { + await assert.rejects( + check({ + path: 'https://jbeckwith.com', + serverRoot: process.cwd(), + }), + /cannot be defined/ + ); + }); + + it('should allow overriding the server root', async () => { + const results = await check({ + serverRoot: 'test/fixtures/markdown', + markdown: true, + path: 'README.md', + }); + assert.strictEqual(results.links.length, 3); + assert.ok(results.passed); + }); }); diff --git a/test/zcli.ts b/test/zcli.ts index aeec0f3..24f3692 100644 --- a/test/zcli.ts +++ b/test/zcli.ts @@ -61,4 +61,15 @@ describe('cli', () => { ]); assert.strictEqual(res.stdout.indexOf('['), -1); }); + + it('should accept a server-root', async () => { + const res = await execa('npx', [ + 'linkinator', + '--markdown', + '--server-root', + 'test/fixtures/markdown', + 'README.md', + ]); + assert.ok(res.stdout.includes('Successfully scanned')); + }); });