diff --git a/package.json b/package.json index 3ce66fd..857ea7f 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,8 @@ }, "homepage": "https://github.com/npm/read-package-tree", "files": [ - "rpt.js" + "rpt.js", + "realpath.js" ], "tap": { "100": true diff --git a/realpath.js b/realpath.js new file mode 100644 index 0000000..814e560 --- /dev/null +++ b/realpath.js @@ -0,0 +1,92 @@ +// look up the realpath, but cache stats to minimize overhead +// If the parent folder is in the realpath cache, then we just +// lstat the child, since there's no need to do a full realpath +// This is not a separate module, and is much simpler than Node's +// built-in fs.realpath, because we only care about symbolic links, +// so we can handle many fewer edge cases. + +const fs = require('fs') +const { promisify } = require('util') +const readlink = promisify(fs.readlink) +const lstat = promisify(fs.lstat) +const { resolve, basename, dirname } = require('path') + +const realpathCached = (path, rpcache, stcache, depth) => { + // just a safety against extremely deep eloops + /* istanbul ignore next */ + if (depth > 2000) + throw eloop(path) + + if (rpcache.has(path)) + return Promise.resolve(rpcache.get(path)) + + const dir = dirname(path) + const base = basename(path) + + if (base && rpcache.has(dir)) + return realpathChild(dir, base, rpcache, stcache, depth) + + // if it's the root, then we know it's real + if (!base) { + rpcache.set(dir, dir) + return Promise.resolve(dir) + } + + // the parent, what is that? + // find out, and then come back. + return realpathCached(dir, rpcache, stcache, depth + 1).then(() => + realpathCached(path, rpcache, stcache, depth + 1)) +} + +const lstatCached = (path, stcache) => { + if (stcache.has(path)) + return Promise.resolve(stcache.get(path)) + + const p = lstat(path).then(st => { + stcache.set(path, st) + return st + }) + stcache.set(path, p) + return p +} + +// This is a slight fib, as it doesn't actually occur during a stat syscall. +// But file systems are giant piles of lies, so whatever. +const eloop = path => + Object.assign(new Error( + `ELOOP: too many symbolic links encountered, stat '${path}'`), { + errno: -62, + syscall: 'stat', + code: 'ELOOP', + path: path, + }) + +const realpathChild = (dir, base, rpcache, stcache, depth) => { + const realdir = rpcache.get(dir) + // that unpossible + /* istanbul ignore next */ + if (typeof realdir === 'undefined') + throw new Error('in realpathChild without parent being in realpath cache') + + const realish = resolve(realdir, base) + return lstatCached(realish, stcache).then(st => { + if (!st.isSymbolicLink()) { + rpcache.set(resolve(dir, base), realish) + return realish + } + + let res + return readlink(realish).then(target => { + const resolved = res = resolve(realdir, target) + if (realish === resolved) + throw eloop(realish) + + return realpathCached(resolved, rpcache, stcache, depth + 1) + }).then(real => { + rpcache.set(resolve(dir, base), real) + return real + }) + }) +} + +module.exports = realpathCached diff --git a/rpt.js b/rpt.js index 046ad54..8e7ae95 100644 --- a/rpt.js +++ b/rpt.js @@ -1,16 +1,17 @@ const fs = require('fs') const { promisify } = require('util') -const realpath = promisify(fs.realpath) -const { basename, dirname, join } = require('path') +const { resolve, basename, dirname, join } = require('path') const rpj = promisify(require('read-package-json')) const readdir = promisify(require('readdir-scoped-modules')) +const realpath = require('./realpath.js') let ID = 0 class Node { constructor (pkg, logical, physical, er, cache) { // should be impossible. + const cached = cache.get(physical) /* istanbul ignore next */ - if (cache.get(physical)) + if (cached && !cached.then) throw new Error('re-creating already instantiated node') cache.set(physical, this) @@ -34,37 +35,84 @@ class Node { class Link extends Node { constructor (pkg, logical, physical, realpath, er, cache) { super(pkg, logical, physical, er, cache) + + // if the target has started, but not completed, then + // a Promise will be in the cache to indicate this. const cachedTarget = cache.get(realpath) + if (cachedTarget && cachedTarget.then) + cachedTarget.then(node => this.target = node) + this.target = cachedTarget || new Node(pkg, logical, realpath, er, cache) this.realpath = realpath this.isLink = true - this.children = this.target.children this.error = er + // convenience method only + /* istanbul ignore next */ + Object.defineProperty(this, 'children', { + get () { + return this.target.children + }, + set (c) { + this.target.children = c + }, + enumerable: true + }) } } -const loadNode = (logical, physical, cache) => new Promise((res, rej) => { - res(cache.get(physical) || realpath(physical) - .then(real => - rpj(join(real, 'package.json')) - .then(pkg => [real, pkg, null], er => [real, null, er]) - .then(([real, pkg, er]) => - physical === real ? new Node(pkg, logical, physical, er, cache) - : new Link(pkg, logical, physical, real, er, cache) - ), - // if the realpath fails, don't bother with the rest - er => new Node(null, logical, physical, er, cache)) - ) -}) - -const loadChildren = (node, cache, filterWith) => { +// this is the way it is to expose a timing issue which is difficult to +// test otherwise. The creation of a Node may take slightly longer than +// the creation of a Link that targets it. If the Node has _begun_ its +// creation phase (and put a Promise in the cache) then the Link will +// get a Promise as its cachedTarget instead of an actual Node object. +// This is not a problem, because it gets resolved prior to returning +// the tree or attempting to load children. However, it IS remarkably +// difficult to get to happen in a test environment to verify reliably. +// Hence this kludge. +const newNode = (pkg, logical, physical, er, cache) => + process.env._TEST_RPT_SLOW_LINK_TARGET_ === '1' + ? new Promise(res => setTimeout(() => + res(new Node(pkg, logical, physical, er, cache)), 10)) + : new Node(pkg, logical, physical, er, cache) + +const loadNode = (logical, physical, cache, rpcache, stcache) => { + // cache temporarily holds a promise placeholder so we + // don't try to create the same node multiple times. + // this is very rare to encounter, given the aggressive + // caching on fs.realpath and fs.lstat calls, but + // it can happen in theory. + const cached = cache.get(physical) + /* istanbul ignore next */ + if (cached) + return Promise.resolve(cached) + + const p = realpath(physical, rpcache, stcache, 0).then(real => + rpj(join(real, 'package.json')) + .then(pkg => [pkg, null], er => [null, er]) + .then(([pkg, er]) => + physical === real ? newNode(pkg, logical, physical, er, cache) + : new Link(pkg, logical, physical, real, er, cache) + ), + // if the realpath fails, don't bother with the rest + er => new Node(null, logical, physical, er, cache)) + + cache.set(physical, p) + return p +} + +const loadChildren = (node, cache, filterWith, rpcache, stcache) => { + // if a Link target has started, but not completed, then + // a Promise will be in the cache to indicate this. + if (node.then) + return node.then(node => loadChildren(node, cache, filterWith, rpcache, stcache)) + const nm = join(node.path, 'node_modules') - return realpath(nm) + return realpath(nm, rpcache, stcache, 0) .then(rm => readdir(rm).then(kids => [rm, kids])) .then(([rm, kids]) => Promise.all( kids.filter(kid => kid.charAt(0) !== '.' && (!filterWith || filterWith(node, kid))) - .map(kid => loadNode(join(nm, kid), join(rm, kid), cache))) + .map(kid => loadNode(join(nm, kid), join(rm, kid), cache, rpcache, stcache))) ).then(kidNodes => { kidNodes.forEach(k => k.parent = node) node.children = kidNodes.sort((a, b) => @@ -77,7 +125,7 @@ const loadChildren = (node, cache, filterWith) => { .catch(() => node) } -const loadTree = (node, did, cache, filterWith) => { +const loadTree = (node, did, cache, filterWith, rpcache, stcache) => { // impossible except in pathological ELOOP cases /* istanbul ignore next */ if (did.has(node.realpath)) @@ -85,11 +133,12 @@ const loadTree = (node, did, cache, filterWith) => { did.add(node.realpath) - return loadChildren(node, cache, filterWith) + // load children on the target, not the link + return loadChildren(node.target || node, cache, filterWith, rpcache, stcache) .then(node => Promise.all( node.children .filter(kid => !did.has(kid.realpath)) - .map(kid => loadTree(kid, did, cache, filterWith)) + .map(kid => loadTree(kid, did, cache, filterWith, rpcache, stcache)) )).then(() => node) } @@ -100,10 +149,15 @@ const rpt = (root, filterWith, cb) => { filterWith = null } + root = resolve(root) const cache = new Map() - const p = realpath(root) - .then(realRoot => loadNode(root, realRoot, cache)) - .then(node => loadTree(node, new Set(), cache, filterWith)) + // we can assume that the cwd is real enough + const cwd = process.cwd() + const rpcache = new Map([[ cwd, cwd ]]) + const stcache = new Map() + const p = realpath(root, rpcache, stcache, 0) + .then(realRoot => loadNode(root, realRoot, cache, rpcache, stcache)) + .then(node => loadTree(node, new Set(), cache, filterWith, rpcache, stcache)) if (typeof cb === 'function') p.then(tree => cb(null, tree), cb) diff --git a/tap-snapshots/test-basic.js-TAP.test.js b/tap-snapshots/test-basic.js-TAP.test.js index 9100530..46b4dc6 100644 --- a/tap-snapshots/test-basic.js-TAP.test.js +++ b/tap-snapshots/test-basic.js-TAP.test.js @@ -40,6 +40,20 @@ root@1.2.3 test/fixtures/linkedroot └── foo@1.2.3 test/fixtures/linkedroot/node_modules/foo ` +exports[`test/basic.js TAP looking outside of cwd > must match snapshot 1`] = ` +root@1.2.3 test/fixtures/root +├─┬ @scope/x@1.2.3 test/fixtures/root/node_modules/@scope/x +│ └─┬ glob@4.0.5 test/fixtures/root/node_modules/@scope/x/node_modules/glob +│ ├── graceful-fs@3.0.2 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/graceful-fs +│ ├── inherits@2.0.1 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/inherits +│ ├─┬ minimatch@1.0.0 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch +│ │ ├── lru-cache@2.5.0 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch/node_modules/lru-cache +│ │ └── sigmund@1.0.0 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch/node_modules/sigmund +│ └── once@1.3.0 test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/once +├── @scope/y@1.2.3 test/fixtures/root/node_modules/@scope/y +└── foo@1.2.3 test/fixtures/root/node_modules/foo +` + exports[`test/basic.js TAP noname > noname tree 1`] = ` test/fixtures/noname └── test/fixtures/noname/node_modules/foo @@ -79,3 +93,19 @@ selflink@1.2.3 test/fixtures/selflink │ └── once@1.3.0 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/once └── selflink@1.2.3 test/fixtures/selflink (symlink) ` + +exports[`test/basic.js TAP shake out Link target timing issue > must match snapshot 1`] = ` +selflink@1.2.3 test/fixtures/selflink +├── @scope/y@1.2.3 test/fixtures/selflink/node_modules/@scope/y +├─┬ @scope/z@1.2.3 test/fixtures/selflink/node_modules/@scope/z +│ └── glob@4.0.5 test/fixtures/selflink/node_modules/foo/node_modules/glob (symlink) +└─┬ foo@1.2.3 test/fixtures/selflink/node_modules/foo + ├─┬ glob@4.0.5 test/fixtures/selflink/node_modules/foo/node_modules/glob + │ ├── graceful-fs@3.0.2 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/graceful-fs + │ ├── inherits@2.0.1 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/inherits + │ ├─┬ minimatch@1.0.0 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch + │ │ ├── lru-cache@2.5.0 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch/node_modules/lru-cache + │ │ └── sigmund@1.0.0 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch/node_modules/sigmund + │ └── once@1.3.0 test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/once + └── selflink@1.2.3 test/fixtures/selflink (symlink) +` diff --git a/test/basic.js b/test/basic.js index 093f315..128002a 100644 --- a/test/basic.js +++ b/test/basic.js @@ -85,6 +85,22 @@ test('filterWith', t => ).then(d => t.matchSnapshot(archy(archyize(d)).trim()), 'only 1 level deep') ) +test('looking outside of cwd', t => { + const cwd = process.cwd() + t.teardown(() => process.chdir(cwd)) + process.chdir('test/fixtures/selflink') + return rpt('../root').then(d => + t.matchSnapshot(archy(archyize(d)).trim())) +}) + +test('shake out Link target timing issue', t => { + process.env._TEST_RPT_SLOW_LINK_TARGET_ = '1' + const cwd = process.cwd() + t.teardown(() => process.env._TEST_RPT_SLOW_LINK_TARGET_ = '') + return rpt(path.resolve(fixtures, 'selflink')).then(d => + t.matchSnapshot(archy(archyize(d)).trim())) +}) + test('broken json', function (t) { rpt(path.resolve(fixtures, 'bad'), function (er, d) { t.ok(d.error, 'Got an error object') @@ -152,6 +168,23 @@ function archyize (d, seen) { } } +test('realpath gutchecks', t => { + const d = path.resolve(cwd, 'test/fixtures') + const realpath = require('../realpath.js') + const {realpathSync} = fs + Object.keys(symlinks).map(link => t.test(link, t => + realpath( + path.resolve(d, link), + new Map(), + new Map(), + 0 + ).then( + real => t.equal(real, realpathSync(path.resolve(d, link))), + er => t.throws(()=> realpathSync(path.resolve(d, link))) + ))) + t.end() +}) + test('cleanup', function (t) { cleanup() t.end()