Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aa async resolver #1074

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
131 changes: 90 additions & 41 deletions packages/aa/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,52 @@ function createPerformantResolve() {
*/
const readPackageWithout = (filepath) => {
/**
* @param {(path: string) => StringOrToString} readFileSync - Sync file
* reader
* @param {(
* file: string,
* cb: (err: Error | null, file?: StringOrToString) => void
* ) => void} readFile
* - Async file reader
*
* @param {string} otherFilepath - Path to another `package.json`
* @returns {Record<string, unknown> | undefined}
* @param {(err: Error | null, result?: Record<string, unknown>) => void} cb
* - Callback
*
* @returns {void}
*/
return (readFileSync, otherFilepath) => {
return (readFile, otherFilepath, cb) => {
// avoid loading the package.json we're just trying to resolve
if (otherFilepath.endsWith(filepath)) {
return {}
return cb(null, {})
}
// original readPackageSync implementation from resolve internals:
const body = readFileSync(otherFilepath)
try {
return JSON.parse(`${body}`)
} catch (jsonErr) {}
readFile(otherFilepath, (err, body) => {
if (err) {
return cb(err)
}
try {
return cb(null, JSON.parse(`${body}`))
} catch (jsonErr) {}
})
}
}

return {
sync: (path, { basedir }) =>
nodeResolve.sync(path, {
basedir,
readPackageSync: readPackageWithout(path),
}),
}
// Typescript won't let me promisify without loosing track of argument types :(
// const promisifiedNodeResolve = promisify(nodeResolve)
return (path, { basedir }) =>
new Promise((resolve, reject) => {
nodeResolve(
path,
{
basedir,
readPackage: readPackageWithout(path),
},
(err, result) => {
if (err) {
return reject(err)
}
resolve(result)
}
)
})
}

/**
Expand All @@ -81,7 +102,7 @@ async function loadCanonicalNameMap({
}) {
const canonicalNameMap = /** @type {CanonicalNameMap} */ (new Map())
// walk tree
const logicalPathMap = walkDependencyTreeForBestLogicalPaths({
const logicalPathMap = await walkDependencyTreeForBestLogicalPaths({
packageDir: rootDir,
includeDevDeps,
resolve,
Expand All @@ -103,12 +124,12 @@ async function loadCanonicalNameMap({
* @param {Resolver} resolve - Resolver function
* @param {string} depName - Dependency name
* @param {string} basedir - Dir to resolve from
* @returns {string | undefined}
* @returns {Promise<string | undefined>}
*/
function wrappedResolveSync(resolve, depName, basedir) {
async function wrappedResolve(resolve, depName, basedir) {
const depRelativePackageJsonPath = path.join(depName, 'package.json')
try {
return resolve.sync(depRelativePackageJsonPath, {
return await resolve(depRelativePackageJsonPath, {
basedir,
})
} catch (e) {
Expand All @@ -129,6 +150,8 @@ function wrappedResolveSync(resolve, depName, basedir) {
* @param {string} packageDir
* @param {boolean} includeDevDeps
* @returns {string[]}
*
* WARNING: making this async makes things consistently slower _(ツ)_/
*/
function getDependencies(packageDir, includeDevDeps) {
const packageJsonPath = path.join(packageDir, 'package.json')
Expand All @@ -147,7 +170,7 @@ function getDependencies(packageDir, includeDevDeps) {
* @param {string} location
*/
function isSymlink(location) {
const info = lstatSync(location)
const info = lstatSync(location) // lstatSync is faster than lstat
return info.isSymbolicLink()
}

Expand All @@ -159,9 +182,9 @@ let nextLevelTodos

/**
* @param {WalkDepTreeOpts} options
* @returns {Map<string, string[]>}
* @returns {Promise<Map<string, string[]>>}
*/
function walkDependencyTreeForBestLogicalPaths({
async function walkDependencyTreeForBestLogicalPaths({
packageDir,
logicalPath = [],
includeDevDeps = false,
Expand All @@ -175,13 +198,17 @@ function walkDependencyTreeForBestLogicalPaths({
{ packageDir, logicalPath, includeDevDeps, visited, resolve },
]
nextLevelTodos = []
// drain work queue until empty, avoid going depth-first by prioritizing the current depth level
//avoid going depth-first by prioritizing the current depth level
do {
processOnePackageInLogicalTree(preferredPackageLogicalPathMap, resolve)
if (currentLevelTodos.length === 0) {
currentLevelTodos = nextLevelTodos
nextLevelTodos = []
}
await runParallelMax(10, currentLevelTodos, (currentTodo) =>
processOnePackageInLogicalTree(
currentTodo,
preferredPackageLogicalPathMap,
resolve
)
)
currentLevelTodos = nextLevelTodos
nextLevelTodos = []
} while (currentLevelTodos.length > 0)

for (const [
Expand All @@ -197,10 +224,12 @@ function walkDependencyTreeForBestLogicalPaths({
}

/**
* @param {WalkDepTreeOpts} currentTodo
* @param {Map<string, string[]>} preferredPackageLogicalPathMap
* @param {Resolver} resolve
*/
function processOnePackageInLogicalTree(
async function processOnePackageInLogicalTree(
currentTodo,
preferredPackageLogicalPathMap,
resolve
) {
Expand All @@ -209,23 +238,27 @@ function processOnePackageInLogicalTree(
logicalPath = [],
includeDevDeps = false,
visited = new Set(),
} = /** @type {WalkDepTreeOpts} */ (currentLevelTodos.pop())
} = currentTodo

const depsToWalk = getDependencies(packageDir, includeDevDeps)

// deps are already sorted by preference for paths
for (const depName of depsToWalk) {
let depPackageJsonPath = wrappedResolveSync(resolve, depName, packageDir)
await runParallelMax(10, depsToWalk, async (depName) => {
let depPackageJsonPath = await wrappedResolve(resolve, depName, packageDir)
// ignore unresolved deps
if (!depPackageJsonPath) {
continue
return
}
const childPackageDir = path.dirname(depPackageJsonPath)
// avoid cycles, but still visit the same package
// on disk multiple times through different logical paths
// so that the best logical path can be chosen
if (visited.has(childPackageDir)) {
continue
return
}
const childVisited = new Set([...visited, childPackageDir])
// should be slightly better than: const childVisited = new Set([...visited, childPackageDir])
const childVisited = new Set(visited)
childVisited.add(childPackageDir)
const childLogicalPath = [...logicalPath, depName]

// compare this path and current best path
Expand All @@ -248,9 +281,9 @@ function processOnePackageInLogicalTree(
// debug: log skipped path traversals
// console.log(`skipping "${childPackageDir}"\n preferred "${theCurrentBest}"\n current "${childLogicalPath}"`)
// abort this walk, can't do better
continue
return
}
}
})
}

/**
Expand Down Expand Up @@ -371,8 +404,24 @@ function comparePackageLogicalPaths(aPath, bPath) {
}

/**
* @typedef Resolver
* @property {(path: string, opts: { basedir: string }) => string} sync
* @template T
* @param {number} parallelMax
* @param {T[]} items
* @param {(item: T) => Promise<any>} mapper
* @returns {Promise<void>}
*/
async function runParallelMax(parallelMax, items, mapper) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: this function could be much more sophisticated so that it tracks which async jobs finished and starts new ones as they do. The jobs are similar enough to each other tho, that it might not bring a lot of improvement to the execution time.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update - quick test of the concept:

async function runParallelMax(parallelMax, items, mapper) {
  items = items.reverse()
  return new Promise((resolve, reject) => {
    let promises = 0
    const boop = () => {
      if (items.length === 0) {
        if(promises === 0) resolve()
        return
      }
      const p = mapper(items.pop())
      promises++
      p.then(()=>{
        promises--
        boop()
      }, reject)
    }
    for (let i = 0; i < parallelMax; i++) {
      boop()
    }
  })
}

with this implementation it's a few percent slower, so there's not much to gain from smarter parallelization

for (let i = 0; i < items.length; i += parallelMax) {
const chunk = items.slice(i, i + parallelMax)
await Promise.all(chunk.map(mapper))
}
}

/**
* @callback Resolver
* @param {string} path
* @param {{ basedir: string }} opts
* @returns {Promise<string | undefined>}
*/

/**
Expand Down
27 changes: 11 additions & 16 deletions packages/aa/test/index.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,10 @@ test('project 4 - workspace symlink', async (t) => {
test('project 1 - with custom resolver', async (t) => {
const rootDir = path.join(__dirname, 'projects', '1')
/** @type {import('../src/index.js').Resolver} */
const resolver = {
sync: (moduleId, { basedir }) => {
return Module.createRequire(path.join(basedir, 'dummy.js')).resolve(
moduleId
)
},
const resolver = async (moduleId, { basedir }) => {
return Module.createRequire(path.join(basedir, 'dummy.js')).resolve(
moduleId
)
}
const canonicalNameMap = await loadCanonicalNameMap({
rootDir,
Expand All @@ -109,11 +107,10 @@ test('project 1 - with custom resolver', async (t) => {
test('project 1 - resolution failure', async (t) => {
const rootDir = path.join(__dirname, 'projects', '1')
/** @type {import('../src/index.js').Resolver} */
const resolver = {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
sync: (moduleId, { basedir }) => {
throw new Error('grumble')
},

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const resolver = (moduleId, { basedir }) => {
throw new Error('grumble')
}
await t.throwsAsync(async () => {
await loadCanonicalNameMap({
Expand All @@ -131,11 +128,9 @@ test('project 1 - resolution missing silently', async (t) => {
{ code: 'MODULE_NOT_FOUND' },
]

const resolver = {
// eslint-disable-next-line @typescript-eslint/no-unused-vars
sync: (moduleId, { basedir }) => {
throw errors.pop()
},
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const resolver = async (moduleId, { basedir }) => {
throw errors.pop()
}
let canonicalNameMap
await t.notThrowsAsync(async () => {
Expand Down