Improve minChunkSize algorithm (#4723)

* Slightly simplify graph analysis Also generates dynamicImportsByEntry that will help us to track already loaded modules more efficiently. * Try to merge small side effect chunks first * Used cached hasEffects to improve performance * Add comment explaining chunk merge strategy * Add test for cycle prevention We probably need complete transitive dependency maps to continue here * Avoid cycles when merging chunks * Avoid cycles when merging chunks * Log cycles in generated chunks * Improve cycle prevention mechanism * Hopefully fix the algorithm for good * Add logging * Use a much more basic algorithm * Remove logging * Improve coverage
rollup · Feb 3, 2023 · 0a5ea57 · 0a5ea57
1 parent 52ba95c
commit 0a5ea57
Show file tree

Hide file tree

Showing 63 changed files with 621 additions and 95 deletions.
diff --git a/src/Module.ts b/src/Module.ts
@@ -4,7 +4,7 @@ import { locate } from 'locate-character';
 import MagicString from 'magic-string';
 import ExternalModule from './ExternalModule';
 import type Graph from './Graph';
-import { createHasEffectsContext, createInclusionContext } from './ast/ExecutionContext';
+import { createInclusionContext } from './ast/ExecutionContext';
 import { nodeConstructors } from './ast/nodes';
 import ExportAllDeclaration from './ast/nodes/ExportAllDeclaration';
 import ExportDefaultDeclaration from './ast/nodes/ExportDefaultDeclaration';
@@ -662,10 +662,7 @@ export default class Module {
 	}
 
 	hasEffects(): boolean {
-		return (
-			this.info.moduleSideEffects === 'no-treeshake' ||
-			(this.ast!.included && this.ast!.hasEffects(createHasEffectsContext()))
-		);
+		return this.info.moduleSideEffects === 'no-treeshake' || this.ast!.hasCachedEffects();
 	}
 
 	include(): void {

diff --git a/src/ast/nodes/Program.ts b/src/ast/nodes/Program.ts
@@ -1,5 +1,6 @@
 import type MagicString from 'magic-string';
 import { type RenderOptions, renderStatementList } from '../../utils/renderHelpers';
+import { createHasEffectsContext } from '../ExecutionContext';
 import type { HasEffectsContext, InclusionContext } from '../ExecutionContext';
 import type * as NodeType from './NodeType';
 import { type IncludeChildren, NodeBase, type StatementNode } from './shared/Node';
@@ -9,11 +10,15 @@ export default class Program extends NodeBase {
 	declare sourceType: 'module';
 	declare type: NodeType.tProgram;
 
-	private hasCachedEffect = false;
+	private hasCachedEffect: boolean | null = null;
+
+	hasCachedEffects(): boolean {
+		return this.hasCachedEffect === null
+			? (this.hasCachedEffect = this.hasEffects(createHasEffectsContext()))
+			: this.hasCachedEffect;
+	}
 
 	hasEffects(context: HasEffectsContext): boolean {
-		// We are caching here to later more efficiently identify side-effect-free modules
-		if (this.hasCachedEffect) return true;
 		for (const node of this.body) {
 			if (node.hasEffects(context)) {
 				return (this.hasCachedEffect = true);

diff --git a/src/utils/chunkAssignment.ts b/src/utils/chunkAssignment.ts
@@ -1,6 +1,5 @@
 import ExternalModule from '../ExternalModule';
 import Module from '../Module';
-import { EMPTY_ARRAY } from './blank';
 import { getNewSet, getOrCreate } from './getOrCreate';
 import { concatLazy } from './iterators';
 import { timeEnd, timeStart } from './timers';
@@ -202,19 +201,21 @@ function isModuleAlreadyLoaded(
 	return true;
 }
 
-EMPTY_ARRAY;
-
 interface ChunkDescription {
-	alias: null;
+	dependencies: Set<ChunkDescription>;
+	dependentChunks: Set<ChunkDescription>;
 	modules: Module[];
+	pure: boolean;
 	signature: string;
-	size: number | null;
-}
-
-interface MergeableChunkDescription extends ChunkDescription {
 	size: number;
 }
 
+type ChunkPartition = {
+	[key in 'small' | 'big']: {
+		[subKey in 'pure' | 'sideEffect']: Set<ChunkDescription>;
+	};
+};
+
 function createChunks(
 	allEntries: Iterable<Module>,
 	assignedEntriesByModule: DependentModuleMap,
@@ -226,59 +227,12 @@ function createChunks(
 				alias: null,
 				modules
 		  }))
-		: getOptimizedChunks(chunkModulesBySignature, minChunkSize);
-}
-
-function getOptimizedChunks(
-	chunkModulesBySignature: { [chunkSignature: string]: Module[] },
-	minChunkSize: number
-) {
-	timeStart('optimize chunks', 3);
-	const { chunksToBeMerged, unmergeableChunks } = getMergeableChunks(
-		chunkModulesBySignature,
-		minChunkSize
-	);
-	for (const sourceChunk of chunksToBeMerged) {
-		chunksToBeMerged.delete(sourceChunk);
-		let closestChunk: ChunkDescription | null = null;
-		let closestChunkDistance = Infinity;
-		const { signature, size, modules } = sourceChunk;
-
-		for (const targetChunk of concatLazy(chunksToBeMerged, unmergeableChunks)) {
-			const distance = getSignatureDistance(
-				signature,
-				targetChunk.signature,
-				!chunksToBeMerged.has(targetChunk)
-			);
-			if (distance === 1) {
-				closestChunk = targetChunk;
-				break;
-			} else if (distance < closestChunkDistance) {
-				closestChunk = targetChunk;
-				closestChunkDistance = distance;
-			}
-		}
-		if (closestChunk) {
-			closestChunk.modules.push(...modules);
-			if (chunksToBeMerged.has(closestChunk)) {
-				closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
-				if ((closestChunk.size += size) > minChunkSize) {
-					chunksToBeMerged.delete(closestChunk);
-					unmergeableChunks.push(closestChunk);
-				}
-			}
-		} else {
-			unmergeableChunks.push(sourceChunk);
-		}
-	}
-	timeEnd('optimize chunks', 3);
-	return unmergeableChunks;
+		: getOptimizedChunks(chunkModulesBySignature, minChunkSize).map(({ modules }) => ({
+				alias: null,
+				modules
+		  }));
 }
 
-const CHAR_DEPENDENT = 'X';
-const CHAR_INDEPENDENT = '_';
-const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);
-
 function getChunkModulesBySignature(
 	assignedEntriesByModule: ReadonlyDependentModuleMap,
 	allEntries: Iterable<Module>
@@ -299,33 +253,216 @@ function getChunkModulesBySignature(
 	return chunkModules;
 }
 
-function getMergeableChunks(
+/**
+ * This function tries to get rid of small chunks by merging them with other
+ * chunks. In order to merge chunks, one must obey the following rule:
+ * - When merging several chunks, at most one of the chunks can have side
+ *   effects
+ * - When one of the chunks has side effects, the entry points depending on that
+ *   chunk need to be a super set of the entry points depending on the other
+ *   chunks
+ * - Pure chunks can always be merged
+ * - We use the entry point dependence signature to calculate "chunk distance",
+ *   i.e. how likely it is that two chunks are loaded together
+ */
+function getOptimizedChunks(
 	chunkModulesBySignature: { [chunkSignature: string]: Module[] },
 	minChunkSize: number
 ) {
-	const chunksToBeMerged = new Set() as Set<MergeableChunkDescription> & {
-		has(chunk: unknown): chunk is MergeableChunkDescription;
-	};
-	const unmergeableChunks: ChunkDescription[] = [];
-	const alias = null;
+	timeStart('optimize chunks', 3);
+	const chunkPartition = getPartitionedChunks(chunkModulesBySignature, minChunkSize);
+	if (chunkPartition.small.sideEffect.size > 0) {
+		mergeChunks(
+			chunkPartition.small.sideEffect,
+			[chunkPartition.small.pure, chunkPartition.big.pure],
+			minChunkSize,
+			chunkPartition
+		);
+	}
+
+	if (chunkPartition.small.pure.size > 0) {
+		mergeChunks(
+			chunkPartition.small.pure,
+			[chunkPartition.small.pure, chunkPartition.big.sideEffect, chunkPartition.big.pure],
+			minChunkSize,
+			chunkPartition
+		);
+	}
+	timeEnd('optimize chunks', 3);
+	return [
+		...chunkPartition.small.sideEffect,
+		...chunkPartition.small.pure,
+		...chunkPartition.big.sideEffect,
+		...chunkPartition.big.pure
+	];
+}
+
+const CHAR_DEPENDENT = 'X';
+const CHAR_INDEPENDENT = '_';
+const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);
+
+function getPartitionedChunks(
+	chunkModulesBySignature: { [chunkSignature: string]: Module[] },
+	minChunkSize: number
+): ChunkPartition {
+	const smallPureChunks: ChunkDescription[] = [];
+	const bigPureChunks: ChunkDescription[] = [];
+	const smallSideEffectChunks: ChunkDescription[] = [];
+	const bigSideEffectChunks: ChunkDescription[] = [];
+	const chunkByModule = new Map<Module, ChunkDescription>();
 	for (const [signature, modules] of Object.entries(chunkModulesBySignature)) {
+		const chunkDescription: ChunkDescription = {
+			dependencies: new Set<ChunkDescription>(),
+			dependentChunks: new Set<ChunkDescription>(),
+			modules,
+			pure: true,
+			signature,
+			size: 0
+		};
 		let size = 0;
-		checkModules: {
+		let pure = true;
+		for (const module of modules) {
+			chunkByModule.set(module, chunkDescription);
+			pure &&= !module.hasEffects();
+			// Unfortunately, we cannot take tree-shaking into account here because
+			// rendering did not happen yet
+			size += module.originalCode.length;
+		}
+		chunkDescription.pure = pure;
+		chunkDescription.size = size;
+		(size < minChunkSize
+			? pure
+				? smallPureChunks
+				: smallSideEffectChunks
+			: pure
+			? bigPureChunks
+			: bigSideEffectChunks
+		).push(chunkDescription);
+	}
+	sortChunksAndAddDependencies(
+		[bigPureChunks, bigSideEffectChunks, smallPureChunks, smallSideEffectChunks],
+		chunkByModule
+	);
+	return {
+		big: { pure: new Set(bigPureChunks), sideEffect: new Set(bigSideEffectChunks) },
+		small: { pure: new Set(smallPureChunks), sideEffect: new Set(smallSideEffectChunks) }
+	};
+}
+
+function sortChunksAndAddDependencies(
+	chunkLists: ChunkDescription[][],
+	chunkByModule: Map<Module, ChunkDescription>
+) {
+	for (const chunks of chunkLists) {
+		chunks.sort(compareChunks);
+		for (const chunk of chunks) {
+			const { dependencies, modules } = chunk;
 			for (const module of modules) {
-				if (module.hasEffects()) {
-					break checkModules;
+				for (const dependency of module.getDependenciesToBeIncluded()) {
+					const dependencyChunk = chunkByModule.get(dependency as Module);
+					if (dependencyChunk && dependencyChunk !== chunk) {
+						dependencies.add(dependencyChunk);
+						dependencyChunk.dependentChunks.add(chunk);
+					}
 				}
-				size += module.magicString.toString().length;
-				if (size > minChunkSize) {
-					break checkModules;
+			}
+		}
+	}
+}
+
+function compareChunks(
+	{ size: sizeA }: ChunkDescription,
+	{ size: sizeB }: ChunkDescription
+): number {
+	return sizeA - sizeB;
+}
+
+function mergeChunks(
+	chunksToBeMerged: Set<ChunkDescription>,
+	targetChunks: Set<ChunkDescription>[],
+	minChunkSize: number,
+	chunkPartition: ChunkPartition
+) {
+	for (const mergedChunk of chunksToBeMerged) {
+		let closestChunk: ChunkDescription | null = null;
+		let closestChunkDistance = Infinity;
+		const { signature, modules, pure, size } = mergedChunk;
+
+		for (const targetChunk of concatLazy(targetChunks)) {
+			if (mergedChunk === targetChunk) continue;
+			// Possible improvement:
+			// For dynamic entries depending on a pure chunk, it is safe to merge that
+			// chunk into the chunk doing the dynamic import (i.e. into an "already
+			// loaded chunk") even if it is not pure.
+			// One way of handling this could be to add all "already loaded entries"
+			// of the dynamic importers into the signature as well. That could also
+			// change the way we do code-splitting for already loaded entries.
+			const distance = pure
+				? getSignatureDistance(signature, targetChunk.signature, !targetChunk.pure)
+				: getSignatureDistance(targetChunk.signature, signature, true);
+			if (distance < closestChunkDistance && isValidMerge(mergedChunk, targetChunk)) {
+				if (distance === 1) {
+					closestChunk = targetChunk;
+					break;
 				}
+				closestChunk = targetChunk;
+				closestChunkDistance = distance;
 			}
-			chunksToBeMerged.add({ alias, modules, signature, size });
-			continue;
 		}
-		unmergeableChunks.push({ alias, modules, signature, size: null });
+		if (closestChunk) {
+			chunksToBeMerged.delete(mergedChunk);
+			getChunksInPartition(closestChunk, minChunkSize, chunkPartition).delete(closestChunk);
+			closestChunk.modules.push(...modules);
+			closestChunk.size += size;
+			closestChunk.pure &&= pure;
+			closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
+			const { dependencies, dependentChunks } = closestChunk;
+			for (const dependency of mergedChunk.dependencies) {
+				dependencies.add(dependency);
+			}
+			for (const dependentChunk of mergedChunk.dependentChunks) {
+				dependentChunks.add(dependentChunk);
+				dependentChunk.dependencies.delete(mergedChunk);
+				dependentChunk.dependencies.add(closestChunk);
+			}
+			dependencies.delete(closestChunk);
+			getChunksInPartition(closestChunk, minChunkSize, chunkPartition).add(closestChunk);
+		}
 	}
-	return { chunksToBeMerged, unmergeableChunks };
+}
+
+// Merging will not produce cycles if none of the direct non-merged dependencies
+// of a chunk have the other chunk as a transitive dependency
+function isValidMerge(mergedChunk: ChunkDescription, targetChunk: ChunkDescription) {
+	return !(
+		hasTransitiveDependency(mergedChunk, targetChunk) ||
+		hasTransitiveDependency(targetChunk, mergedChunk)
+	);
+}
+
+function hasTransitiveDependency(
+	dependentChunk: ChunkDescription,
+	dependencyChunk: ChunkDescription
+) {
+	const chunksToCheck = new Set(dependentChunk.dependencies);
+	for (const { dependencies } of chunksToCheck) {
+		for (const dependency of dependencies) {
+			if (dependency === dependencyChunk) {
+				return true;
+			}
+			chunksToCheck.add(dependency);
+		}
+	}
+	return false;
+}
+
+function getChunksInPartition(
+	chunk: ChunkDescription,
+	minChunkSize: number,
+	chunkPartition: ChunkPartition
+): Set<ChunkDescription> {
+	const subPartition = chunk.size < minChunkSize ? chunkPartition.small : chunkPartition.big;
+	return chunk.pure ? subPartition.pure : subPartition.sideEffect;
 }
 
 function getSignatureDistance(

diff --git a/src/utils/iterators.ts b/src/utils/iterators.ts
@@ -3,7 +3,7 @@
  * their iterators. Useful when e.g. working with large sets or lists and when
  * there is a chance that the iterators will not be fully exhausted.
  */
-export function* concatLazy<T>(...iterables: Iterable<T>[]) {
+export function* concatLazy<T>(iterables: Iterable<T>[]): Iterable<T> {
 	for (const iterable of iterables) {
 		yield* iterable;
 	}

diff --git a/test/chunking-form/samples/minChunkSize/avoid-circular-dependencies/_config.js b/test/chunking-form/samples/minChunkSize/avoid-circular-dependencies/_config.js
@@ -0,0 +1,9 @@
+module.exports = {
+	description: 'avoids circular dependencies when merging chunks',
+	options: {
+		input: ['main1.js', 'main2.js', 'main3.js'],
+		output: {
+			experimentalMinChunkSize: 100
+		}
+	}
+};