forked from bazelbuild/bazel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CriticalPathComputer.java
379 lines (345 loc) · 15.9 KB
/
CriticalPathComputer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.runtime;
import com.google.common.base.Preconditions;
import com.google.common.collect.Comparators;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.google.common.eventbus.AllowConcurrentEvents;
import com.google.common.eventbus.Subscribe;
import com.google.common.flogger.GoogleLogger;
import com.google.common.flogger.StackSize;
import com.google.devtools.build.lib.actions.Action;
import com.google.devtools.build.lib.actions.ActionAnalysisMetadata;
import com.google.devtools.build.lib.actions.ActionCompletionEvent;
import com.google.devtools.build.lib.actions.ActionKeyContext;
import com.google.devtools.build.lib.actions.ActionMiddlemanEvent;
import com.google.devtools.build.lib.actions.ActionStartedEvent;
import com.google.devtools.build.lib.actions.Actions;
import com.google.devtools.build.lib.actions.AggregatedSpawnMetrics;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.CachedActionEvent;
import com.google.devtools.build.lib.actions.DiscoveredInputsEvent;
import com.google.devtools.build.lib.actions.SpawnExecutedEvent;
import com.google.devtools.build.lib.actions.SpawnMetrics;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.clock.Clock;
import com.google.devtools.build.lib.skyframe.rewinding.ActionRewoundEvent;
import java.time.Duration;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BinaryOperator;
import java.util.stream.Stream;
import javax.annotation.concurrent.ThreadSafe;
/**
* Computes the critical path in the action graph based on events published to the event bus.
*
* <p>After instantiation, this object needs to be registered on the event bus to work.
*/
@ThreadSafe
public class CriticalPathComputer {
private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();
/** Number of top actions to record. */
static final int SLOWEST_COMPONENTS_SIZE = 30;
private static final int LARGEST_MEMORY_COMPONENTS_SIZE = 20;
private static final int LARGEST_INPUT_SIZE_COMPONENTS_SIZE = 20;
private static final int LARGEST_INPUT_COUNT_COMPONENTS_SIZE = 20;
/** Selects and returns the longer of two components (the first may be {@code null}). */
private static final BinaryOperator<CriticalPathComponent> SELECT_LONGER_COMPONENT =
(a, b) ->
a == null || a.getAggregatedElapsedTime().compareTo(b.getAggregatedElapsedTime()) < 0
? b
: a;
private final AtomicInteger idGenerator = new AtomicInteger();
// outputArtifactToComponent is accessed from multiple event handlers.
private final ConcurrentMap<Artifact, CriticalPathComponent> outputArtifactToComponent =
Maps.newConcurrentMap();
private final ActionKeyContext actionKeyContext;
/** Maximum critical path found. */
private final AtomicReference<CriticalPathComponent> maxCriticalPath = new AtomicReference<>();
private final Clock clock;
public CriticalPathComputer(ActionKeyContext actionKeyContext, Clock clock) {
this.actionKeyContext = actionKeyContext;
this.clock = clock;
}
/**
* Creates a critical path component for an action.
*
* @param action the action for the critical path component
* @param relativeStartNanos time when the action started to run in nanos. Only meant to be used
* for computing time differences.
*/
private CriticalPathComponent createComponent(Action action, long relativeStartNanos) {
return new CriticalPathComponent(idGenerator.getAndIncrement(), action, relativeStartNanos);
}
/**
* Return the critical path stats for the current command execution.
*
* <p>This method allows us to calculate lazily the aggregate statistics of the critical path,
* avoiding the memory and cpu penalty for doing it for all the actions executed.
*/
public AggregatedCriticalPath aggregate() {
CriticalPathComponent criticalPath = getMaxCriticalPath();
if (criticalPath == null) {
return AggregatedCriticalPath.EMPTY;
}
ImmutableList.Builder<CriticalPathComponent> components = ImmutableList.builder();
AggregatedSpawnMetrics.Builder metricsBuilder = new AggregatedSpawnMetrics.Builder();
CriticalPathComponent child = criticalPath;
while (child != null) {
AggregatedSpawnMetrics childSpawnMetrics = child.getSpawnMetrics();
if (childSpawnMetrics != null) {
metricsBuilder.addDurations(childSpawnMetrics);
metricsBuilder.addNonDurations(childSpawnMetrics);
}
components.add(child);
child = child.getChild();
}
return new AggregatedCriticalPath(
(int) criticalPath.getAggregatedElapsedTime().toMillis(),
metricsBuilder.build(),
components.build());
}
public Map<Artifact, CriticalPathComponent> getCriticalPathComponentsMap() {
return outputArtifactToComponent;
}
/** Changes the phase of the action */
@Subscribe
@AllowConcurrentEvents
public void nextCriticalPathPhase(SpawnExecutedEvent.ChangePhase phase) {
CriticalPathComponent stats =
outputArtifactToComponent.get(phase.getAction().getPrimaryOutput());
if (stats != null) {
stats.changePhase();
}
}
/** Adds spawn metrics to the action stats. */
@Subscribe
@AllowConcurrentEvents
public void spawnExecuted(SpawnExecutedEvent event) {
ActionAnalysisMetadata action = event.getActionMetadata();
Artifact primaryOutput = action.getPrimaryOutput();
if (primaryOutput == null) {
// Despite the documentation to the contrary, the SpawnIncludeScanner creates an
// ActionExecutionMetadata instance that returns a null primary output. That said, this
// class is incorrect wrt. multiple Spawns in a single action. See b/111583707.
return;
}
CriticalPathComponent stats =
Preconditions.checkNotNull(outputArtifactToComponent.get(primaryOutput));
SpawnResult spawnResult = event.getSpawnResult();
stats.addSpawnResult(
spawnResult.getMetrics(),
spawnResult.getRunnerName(),
spawnResult.getRunnerSubtype(),
spawnResult.wasRemote());
}
/** Returns the list of components using the most memory. */
public List<CriticalPathComponent> getLargestMemoryComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
LARGEST_MEMORY_COMPONENTS_SIZE,
Comparator.comparingLong(
(c) ->
c.getSpawnMetrics().getMaxNonDuration(0, SpawnMetrics::memoryEstimate))));
}
/** Returns the list of components with the largest input sizes. */
public List<CriticalPathComponent> getLargestInputSizeComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
LARGEST_INPUT_SIZE_COMPONENTS_SIZE,
Comparator.comparingLong(
(c) -> c.getSpawnMetrics().getMaxNonDuration(0, SpawnMetrics::inputBytes))));
}
/** Returns the list of components with the largest input counts. */
public List<CriticalPathComponent> getLargestInputCountComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
LARGEST_INPUT_COUNT_COMPONENTS_SIZE,
Comparator.comparingLong(
(c) -> c.getSpawnMetrics().getMaxNonDuration(0, SpawnMetrics::inputFiles))));
}
/** Returns the list of slowest components. */
public List<CriticalPathComponent> getSlowestComponents() {
return uniqueActions()
.collect(
Comparators.greatest(
SLOWEST_COMPONENTS_SIZE,
Comparator.comparingLong(CriticalPathComponent::getElapsedTimeNanos)));
}
private Stream<CriticalPathComponent> uniqueActions() {
return outputArtifactToComponent.entrySet().stream()
.filter(e -> e.getValue().isPrimaryOutput(e.getKey()))
.map(Map.Entry::getValue);
}
/** Creates a CriticalPathComponent and adds the duration of input discovery and changes phase. */
@Subscribe
@AllowConcurrentEvents
public void discoverInputs(DiscoveredInputsEvent event) throws InterruptedException {
CriticalPathComponent stats =
tryAddComponent(createComponent(event.getAction(), event.getStartTimeNanos()));
stats.addSpawnResult(event.getMetrics(), null, "", /* wasRemote=*/ false);
stats.changePhase();
}
/**
* Record an action that has started to run. If the CriticalPathComponent has not been created,
* initialize it and then start running.
*
* @param event information about the started action
*/
@Subscribe
@AllowConcurrentEvents
public void actionStarted(ActionStartedEvent event) throws InterruptedException {
Action action = event.getAction();
tryAddComponent(createComponent(action, event.getNanoTimeStart())).startRunning();
}
/**
* Record a middleman action execution. Even if middleman are almost instant, we record them
* because they depend on other actions and we need them for constructing the critical path.
*
* <p>For some rules with incorrect configuration transitions we might get notified several times
* for the same middleman. This should only happen if the actions are shared.
*/
@Subscribe
@AllowConcurrentEvents
public void middlemanAction(ActionMiddlemanEvent event) throws InterruptedException {
Action action = event.getAction();
CriticalPathComponent component =
tryAddComponent(createComponent(action, event.getNanoTimeStart()));
finalizeActionStat(event.getNanoTimeStart(), action, component, "middleman action");
}
/**
* Try to add the component to the map of critical path components. If there is an existing
* component for its primary output it uses that to update the rest of the outputs.
*
* @return The component to be used for updating the time stats.
*/
@SuppressWarnings("ReferenceEquality")
private CriticalPathComponent tryAddComponent(CriticalPathComponent newComponent)
throws InterruptedException {
Action newAction = newComponent.getAction();
Artifact primaryOutput = newAction.getPrimaryOutput();
CriticalPathComponent storedComponent =
outputArtifactToComponent.putIfAbsent(primaryOutput, newComponent);
if (storedComponent != null) {
Action oldAction = storedComponent.getAction();
// TODO(b/120663721) Replace this fragile reference equality check with something principled.
if (oldAction != newAction && !Actions.canBeShared(actionKeyContext, newAction, oldAction)) {
throw new IllegalStateException(
"Duplicate output artifact found for unsharable actions."
+ "This can happen if a previous event registered the action.\n"
+ "Old action: "
+ oldAction
+ "\n\nNew action: "
+ newAction
+ "\n\nArtifact: "
+ primaryOutput
+ "\n");
}
} else {
storedComponent = newComponent;
}
// Try to insert the existing component for the rest of the outputs even if we failed to be
// the ones inserting the component so that at the end of this method we guarantee that all the
// outputs have a component.
for (Artifact output : newAction.getOutputs()) {
if (output == primaryOutput) {
continue;
}
CriticalPathComponent old = outputArtifactToComponent.putIfAbsent(output, storedComponent);
// If two actions run concurrently maybe we find a component by primary output but we are
// the first updating the rest of the outputs.
Preconditions.checkState(
old == null || old == storedComponent, "Inconsistent state for %s", newAction);
}
return storedComponent;
}
/**
* Record an action that was not executed because it was in the (disk) cache. This is needed so
* that we can calculate correctly the dependencies tree if we have some cached actions in the
* middle of the critical path.
*/
@Subscribe
@AllowConcurrentEvents
public void actionCached(CachedActionEvent event) throws InterruptedException {
Action action = event.getAction();
CriticalPathComponent component =
tryAddComponent(createComponent(action, event.getNanoTimeStart()));
finalizeActionStat(event.getNanoTimeStart(), action, component, "action cache hit");
}
/**
* Records the elapsed time stats for the action. For each input artifact, it finds the real
* dependent artifacts and records the critical path stats.
*/
@Subscribe
@AllowConcurrentEvents
public void actionComplete(ActionCompletionEvent event) {
Action action = event.getAction();
CriticalPathComponent component =
Preconditions.checkNotNull(
outputArtifactToComponent.get(action.getPrimaryOutput()), action);
finalizeActionStat(event.getRelativeActionStartTime(), action, component, "");
}
/**
* Record that the failed rewound action is no longer running. The action may or may not start
* again later.
*/
@Subscribe
@AllowConcurrentEvents
public void actionRewound(ActionRewoundEvent event) {
Action action = event.getFailedRewoundAction();
CriticalPathComponent component =
Preconditions.checkNotNull(outputArtifactToComponent.get(action.getPrimaryOutput()));
component.finishActionExecution(
event.getRelativeActionStartTime(), clock.nanoTime(), "action rewound");
}
/** Maximum critical path component found during the build. */
CriticalPathComponent getMaxCriticalPath() {
return maxCriticalPath.get();
}
private void finalizeActionStat(
long startTimeNanos, Action action, CriticalPathComponent component, String finalizeReason) {
long finishTimeNanos = clock.nanoTime();
for (Artifact input : action.getInputs().toList()) {
addArtifactDependency(component, input, finishTimeNanos);
}
if (Duration.ofNanos(finishTimeNanos - startTimeNanos).compareTo(Duration.ofMillis(-5)) < 0) {
// See note in {@link Clock#nanoTime} about non increasing subsequent #nanoTime calls.
logger.atWarning().withStackTrace(StackSize.MEDIUM).log(
"Negative duration time for [%s] %s with start: %s, finish: %s.",
action.getMnemonic(), action.getPrimaryOutput(), startTimeNanos, finishTimeNanos);
}
component.finishActionExecution(startTimeNanos, finishTimeNanos, finalizeReason);
maxCriticalPath.accumulateAndGet(component, SELECT_LONGER_COMPONENT);
}
/** If "input" is a generated artifact, link its critical path to the one we're building. */
private void addArtifactDependency(
CriticalPathComponent actionStats, Artifact input, long componentFinishNanos) {
CriticalPathComponent depComponent = outputArtifactToComponent.get(input);
// Typically, the dep component should already be finished since its output was used as an input
// for a just-completed action. However, we tolerate it still running for (a) action rewinding
// and (b) the rare case that an action depending on a previously-cached shared action sees a
// different shared action that is in the midst of being an action cache hit.
if (depComponent != null && !depComponent.isRunning()) {
actionStats.addDepInfo(depComponent, componentFinishNanos);
}
}
}