Skip to content

Commit

Permalink
Improve minChunkSize algorithm (#4723)
Browse files Browse the repository at this point in the history
* Slightly simplify graph analysis

Also generates dynamicImportsByEntry that will help us to track already loaded
modules more efficiently.

* Try to merge small side effect chunks first

* Used cached hasEffects to improve performance

* Add comment explaining chunk merge strategy

* Add test for cycle prevention

We probably need complete transitive dependency maps
to continue here

* Avoid cycles when merging chunks

* Avoid cycles when merging chunks

* Log cycles in generated chunks

* Improve cycle prevention mechanism

* Hopefully fix the algorithm for good

* Add logging

* Use a much more basic algorithm

* Remove logging

* Improve coverage
  • Loading branch information
lukastaegert committed Feb 3, 2023
1 parent 52ba95c commit 0a5ea57
Show file tree
Hide file tree
Showing 63 changed files with 621 additions and 95 deletions.
7 changes: 2 additions & 5 deletions src/Module.ts
Expand Up @@ -4,7 +4,7 @@ import { locate } from 'locate-character';
import MagicString from 'magic-string';
import ExternalModule from './ExternalModule';
import type Graph from './Graph';
import { createHasEffectsContext, createInclusionContext } from './ast/ExecutionContext';
import { createInclusionContext } from './ast/ExecutionContext';
import { nodeConstructors } from './ast/nodes';
import ExportAllDeclaration from './ast/nodes/ExportAllDeclaration';
import ExportDefaultDeclaration from './ast/nodes/ExportDefaultDeclaration';
Expand Down Expand Up @@ -662,10 +662,7 @@ export default class Module {
}

hasEffects(): boolean {
return (
this.info.moduleSideEffects === 'no-treeshake' ||
(this.ast!.included && this.ast!.hasEffects(createHasEffectsContext()))
);
return this.info.moduleSideEffects === 'no-treeshake' || this.ast!.hasCachedEffects();
}

include(): void {
Expand Down
11 changes: 8 additions & 3 deletions src/ast/nodes/Program.ts
@@ -1,5 +1,6 @@
import type MagicString from 'magic-string';
import { type RenderOptions, renderStatementList } from '../../utils/renderHelpers';
import { createHasEffectsContext } from '../ExecutionContext';
import type { HasEffectsContext, InclusionContext } from '../ExecutionContext';
import type * as NodeType from './NodeType';
import { type IncludeChildren, NodeBase, type StatementNode } from './shared/Node';
Expand All @@ -9,11 +10,15 @@ export default class Program extends NodeBase {
declare sourceType: 'module';
declare type: NodeType.tProgram;

private hasCachedEffect = false;
private hasCachedEffect: boolean | null = null;

hasCachedEffects(): boolean {
return this.hasCachedEffect === null
? (this.hasCachedEffect = this.hasEffects(createHasEffectsContext()))
: this.hasCachedEffect;
}

hasEffects(context: HasEffectsContext): boolean {
// We are caching here to later more efficiently identify side-effect-free modules
if (this.hasCachedEffect) return true;
for (const node of this.body) {
if (node.hasEffects(context)) {
return (this.hasCachedEffect = true);
Expand Down
287 changes: 212 additions & 75 deletions src/utils/chunkAssignment.ts
@@ -1,6 +1,5 @@
import ExternalModule from '../ExternalModule';
import Module from '../Module';
import { EMPTY_ARRAY } from './blank';
import { getNewSet, getOrCreate } from './getOrCreate';
import { concatLazy } from './iterators';
import { timeEnd, timeStart } from './timers';
Expand Down Expand Up @@ -202,19 +201,21 @@ function isModuleAlreadyLoaded(
return true;
}

EMPTY_ARRAY;

interface ChunkDescription {
alias: null;
dependencies: Set<ChunkDescription>;
dependentChunks: Set<ChunkDescription>;
modules: Module[];
pure: boolean;
signature: string;
size: number | null;
}

interface MergeableChunkDescription extends ChunkDescription {
size: number;
}

type ChunkPartition = {
[key in 'small' | 'big']: {
[subKey in 'pure' | 'sideEffect']: Set<ChunkDescription>;
};
};

function createChunks(
allEntries: Iterable<Module>,
assignedEntriesByModule: DependentModuleMap,
Expand All @@ -226,59 +227,12 @@ function createChunks(
alias: null,
modules
}))
: getOptimizedChunks(chunkModulesBySignature, minChunkSize);
}

function getOptimizedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
) {
timeStart('optimize chunks', 3);
const { chunksToBeMerged, unmergeableChunks } = getMergeableChunks(
chunkModulesBySignature,
minChunkSize
);
for (const sourceChunk of chunksToBeMerged) {
chunksToBeMerged.delete(sourceChunk);
let closestChunk: ChunkDescription | null = null;
let closestChunkDistance = Infinity;
const { signature, size, modules } = sourceChunk;

for (const targetChunk of concatLazy(chunksToBeMerged, unmergeableChunks)) {
const distance = getSignatureDistance(
signature,
targetChunk.signature,
!chunksToBeMerged.has(targetChunk)
);
if (distance === 1) {
closestChunk = targetChunk;
break;
} else if (distance < closestChunkDistance) {
closestChunk = targetChunk;
closestChunkDistance = distance;
}
}
if (closestChunk) {
closestChunk.modules.push(...modules);
if (chunksToBeMerged.has(closestChunk)) {
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
if ((closestChunk.size += size) > minChunkSize) {
chunksToBeMerged.delete(closestChunk);
unmergeableChunks.push(closestChunk);
}
}
} else {
unmergeableChunks.push(sourceChunk);
}
}
timeEnd('optimize chunks', 3);
return unmergeableChunks;
: getOptimizedChunks(chunkModulesBySignature, minChunkSize).map(({ modules }) => ({
alias: null,
modules
}));
}

const CHAR_DEPENDENT = 'X';
const CHAR_INDEPENDENT = '_';
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);

function getChunkModulesBySignature(
assignedEntriesByModule: ReadonlyDependentModuleMap,
allEntries: Iterable<Module>
Expand All @@ -299,33 +253,216 @@ function getChunkModulesBySignature(
return chunkModules;
}

function getMergeableChunks(
/**
* This function tries to get rid of small chunks by merging them with other
* chunks. In order to merge chunks, one must obey the following rule:
* - When merging several chunks, at most one of the chunks can have side
* effects
* - When one of the chunks has side effects, the entry points depending on that
* chunk need to be a super set of the entry points depending on the other
* chunks
* - Pure chunks can always be merged
* - We use the entry point dependence signature to calculate "chunk distance",
* i.e. how likely it is that two chunks are loaded together
*/
function getOptimizedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
) {
const chunksToBeMerged = new Set() as Set<MergeableChunkDescription> & {
has(chunk: unknown): chunk is MergeableChunkDescription;
};
const unmergeableChunks: ChunkDescription[] = [];
const alias = null;
timeStart('optimize chunks', 3);
const chunkPartition = getPartitionedChunks(chunkModulesBySignature, minChunkSize);
if (chunkPartition.small.sideEffect.size > 0) {
mergeChunks(
chunkPartition.small.sideEffect,
[chunkPartition.small.pure, chunkPartition.big.pure],
minChunkSize,
chunkPartition
);
}

if (chunkPartition.small.pure.size > 0) {
mergeChunks(
chunkPartition.small.pure,
[chunkPartition.small.pure, chunkPartition.big.sideEffect, chunkPartition.big.pure],
minChunkSize,
chunkPartition
);
}
timeEnd('optimize chunks', 3);
return [
...chunkPartition.small.sideEffect,
...chunkPartition.small.pure,
...chunkPartition.big.sideEffect,
...chunkPartition.big.pure
];
}

const CHAR_DEPENDENT = 'X';
const CHAR_INDEPENDENT = '_';
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);

function getPartitionedChunks(
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
minChunkSize: number
): ChunkPartition {
const smallPureChunks: ChunkDescription[] = [];
const bigPureChunks: ChunkDescription[] = [];
const smallSideEffectChunks: ChunkDescription[] = [];
const bigSideEffectChunks: ChunkDescription[] = [];
const chunkByModule = new Map<Module, ChunkDescription>();
for (const [signature, modules] of Object.entries(chunkModulesBySignature)) {
const chunkDescription: ChunkDescription = {
dependencies: new Set<ChunkDescription>(),
dependentChunks: new Set<ChunkDescription>(),
modules,
pure: true,
signature,
size: 0
};
let size = 0;
checkModules: {
let pure = true;
for (const module of modules) {
chunkByModule.set(module, chunkDescription);
pure &&= !module.hasEffects();
// Unfortunately, we cannot take tree-shaking into account here because
// rendering did not happen yet
size += module.originalCode.length;
}
chunkDescription.pure = pure;
chunkDescription.size = size;
(size < minChunkSize
? pure
? smallPureChunks
: smallSideEffectChunks
: pure
? bigPureChunks
: bigSideEffectChunks
).push(chunkDescription);
}
sortChunksAndAddDependencies(
[bigPureChunks, bigSideEffectChunks, smallPureChunks, smallSideEffectChunks],
chunkByModule
);
return {
big: { pure: new Set(bigPureChunks), sideEffect: new Set(bigSideEffectChunks) },
small: { pure: new Set(smallPureChunks), sideEffect: new Set(smallSideEffectChunks) }
};
}

function sortChunksAndAddDependencies(
chunkLists: ChunkDescription[][],
chunkByModule: Map<Module, ChunkDescription>
) {
for (const chunks of chunkLists) {
chunks.sort(compareChunks);
for (const chunk of chunks) {
const { dependencies, modules } = chunk;
for (const module of modules) {
if (module.hasEffects()) {
break checkModules;
for (const dependency of module.getDependenciesToBeIncluded()) {
const dependencyChunk = chunkByModule.get(dependency as Module);
if (dependencyChunk && dependencyChunk !== chunk) {
dependencies.add(dependencyChunk);
dependencyChunk.dependentChunks.add(chunk);
}
}
size += module.magicString.toString().length;
if (size > minChunkSize) {
break checkModules;
}
}
}
}

function compareChunks(
{ size: sizeA }: ChunkDescription,
{ size: sizeB }: ChunkDescription
): number {
return sizeA - sizeB;
}

function mergeChunks(
chunksToBeMerged: Set<ChunkDescription>,
targetChunks: Set<ChunkDescription>[],
minChunkSize: number,
chunkPartition: ChunkPartition
) {
for (const mergedChunk of chunksToBeMerged) {
let closestChunk: ChunkDescription | null = null;
let closestChunkDistance = Infinity;
const { signature, modules, pure, size } = mergedChunk;

for (const targetChunk of concatLazy(targetChunks)) {
if (mergedChunk === targetChunk) continue;
// Possible improvement:
// For dynamic entries depending on a pure chunk, it is safe to merge that
// chunk into the chunk doing the dynamic import (i.e. into an "already
// loaded chunk") even if it is not pure.
// One way of handling this could be to add all "already loaded entries"
// of the dynamic importers into the signature as well. That could also
// change the way we do code-splitting for already loaded entries.
const distance = pure
? getSignatureDistance(signature, targetChunk.signature, !targetChunk.pure)
: getSignatureDistance(targetChunk.signature, signature, true);
if (distance < closestChunkDistance && isValidMerge(mergedChunk, targetChunk)) {
if (distance === 1) {
closestChunk = targetChunk;
break;
}
closestChunk = targetChunk;
closestChunkDistance = distance;
}
chunksToBeMerged.add({ alias, modules, signature, size });
continue;
}
unmergeableChunks.push({ alias, modules, signature, size: null });
if (closestChunk) {
chunksToBeMerged.delete(mergedChunk);
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).delete(closestChunk);
closestChunk.modules.push(...modules);
closestChunk.size += size;
closestChunk.pure &&= pure;
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
const { dependencies, dependentChunks } = closestChunk;
for (const dependency of mergedChunk.dependencies) {
dependencies.add(dependency);
}
for (const dependentChunk of mergedChunk.dependentChunks) {
dependentChunks.add(dependentChunk);
dependentChunk.dependencies.delete(mergedChunk);
dependentChunk.dependencies.add(closestChunk);
}
dependencies.delete(closestChunk);
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).add(closestChunk);
}
}
return { chunksToBeMerged, unmergeableChunks };
}

// Merging will not produce cycles if none of the direct non-merged dependencies
// of a chunk have the other chunk as a transitive dependency
function isValidMerge(mergedChunk: ChunkDescription, targetChunk: ChunkDescription) {
return !(
hasTransitiveDependency(mergedChunk, targetChunk) ||
hasTransitiveDependency(targetChunk, mergedChunk)
);
}

function hasTransitiveDependency(
dependentChunk: ChunkDescription,
dependencyChunk: ChunkDescription
) {
const chunksToCheck = new Set(dependentChunk.dependencies);
for (const { dependencies } of chunksToCheck) {
for (const dependency of dependencies) {
if (dependency === dependencyChunk) {
return true;
}
chunksToCheck.add(dependency);
}
}
return false;
}

function getChunksInPartition(
chunk: ChunkDescription,
minChunkSize: number,
chunkPartition: ChunkPartition
): Set<ChunkDescription> {
const subPartition = chunk.size < minChunkSize ? chunkPartition.small : chunkPartition.big;
return chunk.pure ? subPartition.pure : subPartition.sideEffect;
}

function getSignatureDistance(
Expand Down
2 changes: 1 addition & 1 deletion src/utils/iterators.ts
Expand Up @@ -3,7 +3,7 @@
* their iterators. Useful when e.g. working with large sets or lists and when
* there is a chance that the iterators will not be fully exhausted.
*/
export function* concatLazy<T>(...iterables: Iterable<T>[]) {
export function* concatLazy<T>(iterables: Iterable<T>[]): Iterable<T> {
for (const iterable of iterables) {
yield* iterable;
}
Expand Down
@@ -0,0 +1,9 @@
module.exports = {
description: 'avoids circular dependencies when merging chunks',
options: {
input: ['main1.js', 'main2.js', 'main3.js'],
output: {
experimentalMinChunkSize: 100
}
}
};

0 comments on commit 0a5ea57

Please sign in to comment.