Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(resolution): Fix resolution of querystringed imports in ESM files #336

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -4,3 +4,4 @@ out
coverage
test/**/dist
test/**/actual.js
test/unit/cjs-querystring/who?what?idk!.js
98 changes: 77 additions & 21 deletions src/node-file-trace.ts
Expand Up @@ -7,6 +7,31 @@ import { sharedLibEmit } from './utils/sharedlib-emit';
import { join } from 'path';
import { CachedFileSystem } from './fs';

type ParseSpecifierResult = {
remainingSpecifier: string;
queryString: string | null
}

// Splits an ESM specifier into path and querystring (including the leading `?`). (If the specifier is CJS,
// it is passed through untouched.)
export function splitQueryStringFromSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult {
let remainingSpecifier = specifier;
let queryString = null;

if (!cjsResolve) {
const specifierUrl = new URL(specifier, "placeholder-protocol://");
queryString = specifierUrl.search;

if (specifierUrl.search) {
remainingSpecifier = specifier.replace(specifierUrl.search, '');
} else {
remainingSpecifier = specifier;
}
}

return {queryString, remainingSpecifier};
}

function inPath (path: string, parent: string) {
const pathWithSep = join(parent, sep);
return path.startsWith(pathWithSep) && path !== pathWithSep;
Expand Down Expand Up @@ -163,19 +188,21 @@ export class Job {
}

private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => {
// Only affects ESM dependencies
const { remainingSpecifier: strippedDep, queryString = '' } = splitQueryStringFromSpecifier(dep, cjsResolve)
let resolved: string | string[] = '';
let error: Error | undefined;
try {
resolved = await this.resolve(dep, path, this, cjsResolve);
} catch (e1: any) {
error = e1;
try {
if (this.ts && dep.endsWith('.js') && e1 instanceof NotFoundError) {
if (this.ts && strippedDep.endsWith('.js') && e1 instanceof NotFoundError) {
// TS with ESM relative import paths need full extensions
// (we have to write import "./foo.js" instead of import "./foo")
// See https://www.typescriptlang.org/docs/handbook/esm-node.html
const depTS = dep.slice(0, -3) + '.ts';
resolved = await this.resolve(depTS, path, this, cjsResolve);
const strippedDepTS = strippedDep.slice(0, -3) + '.ts';
resolved = await this.resolve(strippedDepTS + queryString, path, this, cjsResolve);
error = undefined;
}
} catch (e2: any) {
Expand All @@ -188,16 +215,19 @@ export class Job {
return;
}

if (Array.isArray(resolved)) {
for (const item of resolved) {
// ignore builtins
if (item.startsWith('node:')) return;
await this.emitDependency(item, path);
// For simplicity, force `resolved` to be an array
resolved = Array.isArray(resolved) ? resolved : [resolved];
for (let item of resolved) {
// ignore builtins for the purposes of both tracing and querystring handling (neither Node
// nor Webpack can handle querystrings on `node:xxx` imports).
if (item.startsWith('node:')) return;

// If querystring was stripped during resolution, restore it
if (queryString && !item.endsWith(queryString)) {
item += queryString;
}
} else {
// ignore builtins
if (resolved.startsWith('node:')) return;
await this.emitDependency(resolved, path);

await this.analyzeAndEmitDependency(item, path, cjsResolve);
}
}

Expand Down Expand Up @@ -274,13 +304,23 @@ export class Job {
}

async emitDependency (path: string, parent?: string) {
if (this.processed.has(path)) {
return this.analyzeAndEmitDependency(path, parent)
}

private async analyzeAndEmitDependency(rawPath: string, parent?: string, cjsResolve?: boolean) {

// Strip the querystring, if any. (Only affects ESM dependencies.)
const { remainingSpecifier: path } = splitQueryStringFromSpecifier(rawPath, cjsResolve)

// Since different querystrings may lead to different results, include the full path
// when noting whether or not we've already seen this path
if (this.processed.has(rawPath)) {
if (parent) {
await this.emitFile(path, 'dependency', parent)
}
return
};
this.processed.add(path);
this.processed.add(rawPath);

const emitted = await this.emitFile(path, 'dependency', parent);
if (!emitted) return;
Expand All @@ -299,18 +339,34 @@ export class Job {

let analyzeResult: AnalyzeResult;

const cachedAnalysis = this.analysisCache.get(path);
// Since different querystrings may lead to analyses, include the full path when caching
const cachedAnalysis = this.analysisCache.get(rawPath);
if (cachedAnalysis) {
analyzeResult = cachedAnalysis;
}
else {
const source = await this.readFile(path);
if (source === null) throw new Error('File ' + path + ' does not exist.');
// By default, `this.readFile` is the regular `fs.readFile`, but a different implementation
// can be specified via a user option in the main `nodeFileTrace` entrypoint. Depending on
// that implementation, having a querystring on the end of the path may either a) be necessary
// in order to specify the right module from which to read, or b) lead to an `ENOENT: no such
// file or directory` error because it thinks the querystring is part of the filename. We
// therefore try it with the querystring first, but have the non-querystringed version as a
// fallback. (If there's no `?` in the given path, `rawPath` will equal `path`, so order is a
// moot point.)
const source = await this.readFile(rawPath) || await this.readFile(path) ;

if (source === null) {
const errorMessage = path === rawPath
? 'File ' + path + ' does not exist.'
: 'Neither ' + path + ' nor ' + rawPath + ' exists.'
throw new Error(errorMessage);
}

// analyze should not have any side-effects e.g. calling `job.emitFile`
// directly as this will not be included in the cachedAnalysis and won't
// be emit for successive runs that leverage the cache
analyzeResult = await analyze(path, source.toString(), this);
this.analysisCache.set(path, analyzeResult);
this.analysisCache.set(rawPath, analyzeResult);
}

const { deps, imports, assets, isESM } = analyzeResult;
Expand All @@ -324,12 +380,12 @@ export class Job {
const ext = extname(asset);
if (ext === '.js' || ext === '.mjs' || ext === '.node' || ext === '' ||
this.ts && (ext === '.ts' || ext === '.tsx') && asset.startsWith(this.base) && asset.slice(this.base.length).indexOf(sep + 'node_modules' + sep) === -1)
await this.emitDependency(asset, path);
await this.analyzeAndEmitDependency(asset, path, !isESM);
else
await this.emitFile(asset, 'asset', path);
}),
...[...deps].map(async dep => this.maybeEmitDep(dep, path, !isESM)),
...[...imports].map(async dep => this.maybeEmitDep(dep, path, false)),
...[...deps].map(async dep => this.maybeEmitDep(dep, rawPath, !isESM)),
...[...imports].map(async dep => this.maybeEmitDep(dep, rawPath, false)),
]);
}
}
7 changes: 6 additions & 1 deletion src/resolve-dependency.ts
@@ -1,11 +1,16 @@
import { isAbsolute, resolve, sep } from 'path';
import { Job } from './node-file-trace';
import { Job, splitQueryStringFromSpecifier } from './node-file-trace';

// node resolver
// custom implementation to emit only needed package.json files for resolver
// (package.json files are emitted as they are hit)
export default async function resolveDependency (specifier: string, parent: string, job: Job, cjsResolve = true): Promise<string | string[]> {
let resolved: string | string[];

// ESM imports are allowed to have querystrings, but the native Node behavior is to ignore them when doing
// file resolution, so emulate that here by stripping any querystring off before continuing
specifier = splitQueryStringFromSpecifier(specifier, cjsResolve).remainingSpecifier

if (isAbsolute(specifier) || specifier === '.' || specifier === '..' || specifier.startsWith('./') || specifier.startsWith('../')) {
const trailingSlash = specifier.endsWith('/');
resolved = await resolvePath(resolve(parent, '..', specifier) + (trailingSlash ? '/' : ''), parent, job);
Expand Down
53 changes: 44 additions & 9 deletions test/unit.test.js
@@ -1,5 +1,5 @@
const fs = require('fs');
const { join, relative } = require('path');
const { join, relative, sep } = require('path');
const { nodeFileTrace } = require('../out/node-file-trace');
const gracefulFS = require('graceful-fs');
const analyze = require('../out/analyze.js').default;
Expand All @@ -10,11 +10,11 @@ const readFile = gracefulFS.promises.readFile

global._unit = true;

const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink'];
const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink', 'cjs-querystring'];
const unitTestDirs = fs.readdirSync(join(__dirname, 'unit'));
const unitTests = [
...unitTestDirs.map(testName => ({testName, isRoot: false})),
...unitTestDirs.map(testName => ({testName, isRoot: true})),
...unitTestDirs.map(testName => ({testName, isRoot: false})),
];

jest.mock('../out/analyze.js', () => {
Expand Down Expand Up @@ -51,12 +51,28 @@ afterEach(resetFileIOMocks)

for (const { testName, isRoot } of unitTests) {
const testSuffix = `${testName} from ${isRoot ? 'root' : 'cwd'}`;
if (process.platform === 'win32' && (isRoot || skipOnWindows.includes(testName))) {
console.log(`Skipping unit test on Windows: ${testSuffix}`);
continue;
};
const unitPath = join(__dirname, 'unit', testName);

if (process.platform === 'win32') {
if (isRoot || skipOnWindows.includes(testName)) {
console.log(`Skipping unit test on Windows: ${testSuffix}`);
continue;
}
} else {
if (testName === 'cjs-querystring') {
// Create (a git-ignored copy of) the file we need, since committing it
// breaks CI on Windows. See https://github.com/vercel/nft/pull/322.
const currentFilepath = join(unitPath, 'noPunctuation', 'whowhatidk.js');
const newFilepath = currentFilepath.replace(
'noPunctuation' + sep + 'whowhatidk.js',
'who?what?idk!.js'
);
if (!fs.existsSync(newFilepath)) {
fs.copyFileSync(currentFilepath, newFilepath);
}
}
}

it(`should correctly trace ${testSuffix}`, async () => {

// We mock readFile because when node-file-trace is integrated into @now/node
Expand All @@ -79,6 +95,25 @@ for (const { testName, isRoot } of unitTests) {
return null
}
}

// mock an in-memory module store (such as webpack's) where the same filename with
// two different querystrings can correspond to two different modules, one importing
// the other
if (testName === 'querystring-self-import') {
if (id.endsWith('input.js') || id.endsWith('base.js') || id.endsWith('dep.js')) {
return fs.readFileSync(id).toString()
}

if (id.endsWith('base.js?__withQuery')) {
return `
import * as origBase from './base';
export const dogs = origBase.dogs.concat('Cory', 'Bodhi');
export const cats = origBase.cats.concat('Teaberry', 'Sassafras', 'Persephone');
export const rats = origBase.rats;
`;
}
}

return this.constructor.prototype.readFile.apply(this, arguments);
});

Expand Down Expand Up @@ -106,7 +141,7 @@ for (const { testName, isRoot } of unitTests) {
inputFileNames.push('input-2.js', 'input-3.js', 'input-4.js');
}

const { fileList, reasons } = await nodeFileTrace(
const { fileList, reasons, warnings } = await nodeFileTrace(
inputFileNames.map(file => join(unitPath, file)),
{
base: isRoot ? '/' : `${__dirname}/../`,
Expand Down Expand Up @@ -231,7 +266,7 @@ for (const { testName, isRoot } of unitTests) {
expect(sortedFileList).toEqual(expected);
}
catch (e) {
console.warn(reasons);
console.warn({reasons, warnings});
fs.writeFileSync(join(unitPath, 'actual.js'), JSON.stringify(sortedFileList, null, 2));
throw e;
}
Expand Down
7 changes: 7 additions & 0 deletions test/unit/cjs-querystring/input.js
@@ -0,0 +1,7 @@
// Test that CJS files treat question marks in filenames as any other character,
// matching Node behavior

// https://www.youtube.com/watch?v=2ve20PVNZ18

const baseball = require('./who?what?idk!');
console.log(baseball.players);
12 changes: 12 additions & 0 deletions test/unit/cjs-querystring/noPunctuation/whowhatidk.js
@@ -0,0 +1,12 @@
module.exports = {
players: {
first: 'Who',
second: 'What',
third: "I Don't Know",
left: 'Why',
center: 'Because',
pitcher: 'Tomorrow',
catcher: 'Today',
shortstop: "I Don't Give a Damn!",
},
};
5 changes: 5 additions & 0 deletions test/unit/cjs-querystring/output.js
@@ -0,0 +1,5 @@
[
"package.json",
"test/unit/cjs-querystring/input.js",
"test/unit/cjs-querystring/who?what?idk!.js"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs
@@ -0,0 +1,4 @@
import { numSpecies } from "./bear.mjs?beaver?bison";
console.log(`There are ${numSpecies} species of bears.`);

export const food = "termites";
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/animalFacts/bear.mjs
@@ -0,0 +1,4 @@
import * as cheetah from "./cheetah.mjs?cow=chipmunk";
console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`);

export const numSpecies = 8;
1 change: 1 addition & 0 deletions test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs
@@ -0,0 +1 @@
export const topSpeed = 65;
6 changes: 6 additions & 0 deletions test/unit/esm-querystring-mjs/input.js
@@ -0,0 +1,6 @@
// Test that querystrings of various forms get stripped from esm imports when those
// imports contain the `.mjs` file extension

import * as aardvark from "./animalFacts/aardvark.mjs?anteater";

console.log(`Aardvarks eat ${aardvark.food}.`);
7 changes: 7 additions & 0 deletions test/unit/esm-querystring-mjs/output.js
@@ -0,0 +1,7 @@
[
"test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs",
"test/unit/esm-querystring-mjs/animalFacts/bear.mjs",
"test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs",
"test/unit/esm-querystring-mjs/input.js",
"test/unit/esm-querystring-mjs/package.json"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/package.json
@@ -0,0 +1,4 @@
{
"private": true,
"type": "module"
}
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/animalFacts/aardvark.js
@@ -0,0 +1,4 @@
import { numSpecies } from './bear?beaver?bison';
console.log(`There are ${numSpecies} species of bears.`);

export const food = 'termites';
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/animalFacts/bear.js
@@ -0,0 +1,4 @@
import * as cheetah from './cheetah?cow=chipmunk';
console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`);

export const numSpecies = 8;
1 change: 1 addition & 0 deletions test/unit/esm-querystring/animalFacts/cheetah.js
@@ -0,0 +1 @@
export const topSpeed = 65;
5 changes: 5 additions & 0 deletions test/unit/esm-querystring/input.js
@@ -0,0 +1,5 @@
// Test that querystrings of various forms get stripped from esm imports

import * as aardvark from './animalFacts/aardvark?anteater';

console.log(`Aardvarks eat ${aardvark.food}.`);
7 changes: 7 additions & 0 deletions test/unit/esm-querystring/output.js
@@ -0,0 +1,7 @@
[
"test/unit/esm-querystring/animalFacts/aardvark.js",
"test/unit/esm-querystring/animalFacts/bear.js",
"test/unit/esm-querystring/animalFacts/cheetah.js",
"test/unit/esm-querystring/input.js",
"test/unit/esm-querystring/package.json"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/package.json
@@ -0,0 +1,4 @@
{
"private": true,
"type": "module"
}
5 changes: 5 additions & 0 deletions test/unit/querystring-self-import/base.js
@@ -0,0 +1,5 @@
import * as dep from './dep';

export const dogs = ['Charlie', 'Maisey'];
export const cats = ['Piper'];
export const rats = dep.rats;
1 change: 1 addition & 0 deletions test/unit/querystring-self-import/dep.js
@@ -0,0 +1 @@
export const rats = ['Debra'];
10 changes: 10 additions & 0 deletions test/unit/querystring-self-import/input.js
@@ -0,0 +1,10 @@
// Test that if a file and the same file with a querystring correspond to different
// modules in memory, one can successfully import the other. The import chain
// goes `input` (this file) -> `base?__withQuery` -> `base` -> `dep`, which means
// that if `dep` shows up in `output`, we know that both `base?__withQuery` and
// `base` have been loaded successfully.

import * as baseWithQuery from './base?__withQuery';
console.log('Dogs:', baseWithQuery.dogs);
console.log('Cats:', baseWithQuery.cats);
console.log('Rats:', baseWithQuery.rats);