Skip to content

Commit

Permalink
feat: add support for multiple paths (#194)
Browse files Browse the repository at this point in the history
  • Loading branch information
JustinBeckwith committed Dec 1, 2020
1 parent ce649d4 commit e70dff6
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 57 deletions.
4 changes: 2 additions & 2 deletions src/cli.ts
Expand Up @@ -83,7 +83,7 @@ const cli = meow(
let flags: Flags;

async function main() {
if (cli.input.length !== 1) {
if (cli.input.length < 1) {
cli.showHelp();
return;
}
Expand Down Expand Up @@ -122,7 +122,7 @@ async function main() {
log(`${state} ${chalk.gray(link.url)}`);
});
const opts: CheckOptions = {
path: cli.input[0],
path: cli.input,
recurse: flags.recurse,
timeout: Number(flags.timeout),
markdown: flags.markdown,
Expand Down
145 changes: 91 additions & 54 deletions src/index.ts
Expand Up @@ -19,7 +19,7 @@ const readFile = util.promisify(fs.readFile);
export interface CheckOptions {
concurrency?: number;
port?: number;
path: string;
path: string | string[];
recurse?: boolean;
timeout?: number;
markdown?: boolean;
Expand Down Expand Up @@ -53,6 +53,7 @@ interface CrawlOptions {
cache: Set<string>;
checkOptions: CheckOptions;
queue: PQueue<PriorityQueue, DefaultAddOptions>;
rootPath: string;
}

/**
Expand All @@ -64,41 +65,56 @@ export class LinkChecker extends EventEmitter {
* status codes.
* @param options Options to use while checking for 404s
*/
async check(options: CheckOptions) {
this.validateOptions(options);
async check(opts: CheckOptions) {
const options = await this.processOptions(opts);
if (!Array.isArray(options.path)) {
options.path = [options.path];
}
options.linksToSkip = options.linksToSkip || [];
options.path = path.normalize(options.path);
let server: http.Server | undefined;
if (!options.path.startsWith('http')) {
const serverOptions = await this.getServerRoot(options);
const hasHttpPaths = options.path.find(x => x.startsWith('http'));
if (!hasHttpPaths) {
const port = options.port || 5000 + Math.round(Math.random() * 1000);
server = await this.startWebServer(
serverOptions.serverRoot,
options.serverRoot!,
port,
options.markdown
);
enableDestroy(server);
options.path = `http://localhost:${port}${serverOptions.path}`;
for (let i = 0; i < options.path.length; i++) {
if (options.path[i].startsWith('/')) {
options.path[i] = options.path[i].slice(1);
}
options.path[i] = `http://localhost:${port}/${options.path[i]}`;
}
}

if (process.env.LINKINATOR_DEBUG) {
console.log(options);
}

const queue = new PQueue({
concurrency: options.concurrency || 100,
});

const results = new Array<LinkResult>();
const url = new URL(options.path);
const initCache: Set<string> = new Set();
initCache.add(url.href);
queue.add(async () => {
await this.crawl({
url: new URL(options.path),
crawl: true,
checkOptions: options,
results,
cache: initCache,
queue,

for (const path of options.path) {
const url = new URL(path);
initCache.add(url.href);
queue.add(async () => {
await this.crawl({
url,
crawl: true,
checkOptions: options,
results,
cache: initCache,
queue,
rootPath: path,
});
});
});
}
await queue.onIdle();

const result = {
Expand All @@ -115,41 +131,65 @@ export class LinkChecker extends EventEmitter {
* Validate the provided flags all work with each other.
* @param options CheckOptions passed in from the CLI (or API)
*/
private validateOptions(options: CheckOptions) {
if (options.serverRoot && options.path.startsWith('http')) {
private async processOptions(opts: CheckOptions): Promise<CheckOptions> {
const options = Object.assign({}, opts);

// ensure at least one path is provided
if (options.path.length === 0) {
throw new Error('At least one path must be provided');
}

// normalize options.path to an array of strings
if (!Array.isArray(options.path)) {
options.path = [options.path];
}

// Ensure we do not mix http:// and file system paths. The paths passed in
// must all be filesystem paths, or HTTP paths.
let isUrlType: boolean | undefined = undefined;
for (const path of options.path) {
const innerIsUrlType = path.startsWith('http');
if (isUrlType === undefined) {
isUrlType = innerIsUrlType;
} else if (innerIsUrlType !== isUrlType) {
throw new Error(
'Paths cannot be mixed between HTTP and local filesystem paths.'
);
}
}

// if there is a server root, make sure there are no HTTP paths
if (options.serverRoot && isUrlType) {
throw new Error(
"'serverRoot' cannot be defined when the 'path' points to an HTTP endpoint."
);
}
}

/**
* Figure out which directory should be used as the root for the web server,
* and how that impacts the path to the file for the first request.
* @param options CheckOptions passed in from the CLI or API
*/
private async getServerRoot(options: CheckOptions) {
if (options.serverRoot) {
const filePath = options.path.startsWith('/')
? options.path
: '/' + options.path;
return {
serverRoot: options.serverRoot,
path: filePath,
};
}
let localDirectory = options.path;
let localFile = '';
const s = await stat(options.path);
if (s.isFile()) {
const pathParts = options.path.split(path.sep);
localFile = path.sep + pathParts[pathParts.length - 1];
localDirectory = pathParts.slice(0, pathParts.length - 1).join(path.sep);
// Figure out which directory should be used as the root for the web server,
// and how that impacts the path to the file for the first request.
if (!options.serverRoot && !isUrlType) {
// if the serverRoot wasn't defined, and there are multiple paths, just
// use process.cwd().
if (options.path.length > 1) {
options.serverRoot = process.cwd();
} else {
// if there's a single path, try to be smart and figure it out
const s = await stat(options.path[0]);
options.serverRoot = options.path[0];
if (s.isFile()) {
const pathParts = options.path[0].split(path.sep);
options.path = [path.sep + pathParts[pathParts.length - 1]];
options.serverRoot = pathParts
.slice(0, pathParts.length - 1)
.join(path.sep);
} else {
options.serverRoot = options.path[0];
options.path = '/';
}
}
}
return {
serverRoot: localDirectory,
path: localFile,
};

return options;
}

/**
Expand All @@ -167,9 +207,6 @@ export class LinkChecker extends EventEmitter {
return next();
}
const pathParts = req.path.split('/').filter(x => !!x);
if (pathParts.length === 0) {
return next();
}
const ext = path.extname(pathParts[pathParts.length - 1]);
if (ext.toLowerCase() === '.md') {
const filePath = path.join(path.resolve(root), req.path);
Expand Down Expand Up @@ -340,13 +377,12 @@ export class LinkChecker extends EventEmitter {
}

let crawl = (opts.checkOptions.recurse! &&
result.url &&
result.url.href.startsWith(opts.checkOptions.path)) as boolean;
result.url?.href.startsWith(opts.rootPath)) as boolean;

// only crawl links that start with the same host
if (crawl) {
try {
const pathUrl = new URL(opts.checkOptions.path);
const pathUrl = new URL(opts.rootPath);
crawl = result.url!.host === pathUrl.host;
} catch {
// ignore errors
Expand All @@ -366,6 +402,7 @@ export class LinkChecker extends EventEmitter {
checkOptions: opts.checkOptions,
queue: opts.queue,
parent: opts.url.href,
rootPath: opts.rootPath,
});
});
}
Expand Down
5 changes: 5 additions & 0 deletions test/fixtures/local/index.html
@@ -0,0 +1,5 @@
<html>
<body>
<a href="page2.html">just follow a link</a>
</body>
</html>
5 changes: 5 additions & 0 deletions test/fixtures/local/page2.html
@@ -0,0 +1,5 @@
<html>
<body>
nothing to see here
</body>
</html>
83 changes: 82 additions & 1 deletion test/test.ts
Expand Up @@ -5,13 +5,14 @@ import * as sinon from 'sinon';
import * as path from 'path';
import {describe, it, afterEach} from 'mocha';

import {check, LinkState, LinkChecker} from '../src';
import {check, LinkState, LinkChecker, CheckOptions} from '../src';

nock.disableNetConnect();
nock.enableNetConnect('localhost');

describe('linkinator', () => {
afterEach(() => {
sinon.restore();
nock.cleanAll();
});

Expand Down Expand Up @@ -309,4 +310,84 @@ describe('linkinator', () => {
assert.strictEqual(results.links.length, 3);
assert.ok(results.passed);
});

it('should accept multiple filesystem paths', async () => {
const scope = nock('http://fake.local').head('/').reply(200);
const results = await check({
path: ['test/fixtures/basic', 'test/fixtures/image'],
});
assert.strictEqual(results.passed, false);
assert.strictEqual(results.links.length, 6);
scope.done();
});

it('should not allow mixed local and remote paths', async () => {
await assert.rejects(
check({
path: ['https://jbeckwith.com', 'test/fixtures/basic'],
}),
/cannot be mixed/
);
});

it('should require at least one path', async () => {
await assert.rejects(
check({
path: [],
}),
/At least one/
);
});

it('should not pollute the original options after merge', async () => {
const options: CheckOptions = Object.freeze({path: 'test/fixtures/basic'});
const scope = nock('http://fake.local').head('/').reply(200);
const results = await check(options);
assert.ok(results.passed);
scope.done();
assert.strictEqual(options.serverRoot, undefined);
});

it('should accept multiple http paths', async () => {
const scopes = [
nock('http://fake.local')
.get('/')
.replyWithFile(200, 'test/fixtures/local/index.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
nock('http://fake.local')
.get('/page2.html')
.replyWithFile(200, 'test/fixtures/local/page2.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
nock('http://fake2.local')
.get('/')
.replyWithFile(200, 'test/fixtures/local/index.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
nock('http://fake2.local')
.get('/page2.html')
.replyWithFile(200, 'test/fixtures/local/page2.html', {
'Content-Type': 'text/html; charset=UTF-8',
}),
];
const results = await check({
path: ['http://fake.local', 'http://fake2.local'],
});
assert.ok(results.passed);
scopes.forEach(x => x.done());
});

it('should print debug information when the env var is set', async () => {
sinon.stub(process, 'env').value({
LINKINATOR_DEBUG: true,
});
const consoleSpy = sinon.stub(console, 'log');
const results = await check({
path: 'test/fixtures/markdown/README.md',
markdown: true,
});
assert.ok(results.passed);
assert.ok(consoleSpy.calledOnce);
});
});
21 changes: 21 additions & 0 deletions test/zcli.ts
Expand Up @@ -23,6 +23,16 @@ describe('cli', () => {
assert.include(res.stdout, 'Successfully scanned');
});

it('should allow multiple paths', async () => {
const res = await execa('npx', [
'linkinator',
'--markdown',
'README.md',
'test/fixtures/markdown/README.md',
]);
assert.include(res.stdout, 'Successfully scanned');
});

it('should show help if no params are provided', async () => {
const res = await execa('npx', ['linkinator'], {
reject: false,
Expand Down Expand Up @@ -52,6 +62,17 @@ describe('cli', () => {
assert.include(res.stdout, '/README.md,200,OK,');
});

it('should provide JSON if asked nicely', async () => {
const res = await execa('npx', [
'linkinator',
'--markdown',
'--format',
'json',
'test/fixtures/markdown/README.md',
]);
assert.include(res.stdout, '{');
});

it('should not show links if --silent', async () => {
const res = await execa('npx', [
'linkinator',
Expand Down

0 comments on commit e70dff6

Please sign in to comment.