Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include TimeSaved metric in Run Summaries #4539

Merged
merged 19 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion cli/integration_tests/cache_state.t
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,8 @@ Validate that local cache is true in dry run
$ cat dry.json | jq '.tasks | map(select(.taskId == "my-app#build")) | .[0].cache'
{
"local": true,
"remote": false
"remote": false,
"status": "HIT",
"source": "LOCAL",
"timeSaved": [0-9]+ (re)
}
8 changes: 6 additions & 2 deletions cli/integration_tests/dry_json/monorepo.t
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ Setup
"hashOfExternalDependencies": "ccab0b28617f1f56",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "echo 'building'",
"cliArguments": [],
Expand Down Expand Up @@ -143,7 +145,9 @@ Setup
"hashOfExternalDependencies": "ccab0b28617f1f56",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "echo 'building'",
"cliArguments": [],
Expand Down
4 changes: 3 additions & 1 deletion cli/integration_tests/dry_json/single_package.t
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ Setup
"hashOfExternalDependencies": "",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "echo 'building' \u003e foo",
"cliArguments": [],
Expand Down
4 changes: 3 additions & 1 deletion cli/integration_tests/dry_json/single_package_no_config.t
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ Setup
"hashOfExternalDependencies": "",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
mehulkar marked this conversation as resolved.
Show resolved Hide resolved
"timeSaved": 0
},
"command": "echo 'building'",
"cliArguments": [],
Expand Down
8 changes: 6 additions & 2 deletions cli/integration_tests/dry_json/single_package_with_deps.t
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ Setup
"hashOfExternalDependencies": "",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "echo 'building' \u003e foo",
"cliArguments": [],
Expand Down Expand Up @@ -100,7 +102,9 @@ Setup
"hashOfExternalDependencies": "",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "[[ ( -f foo ) \u0026\u0026 $(cat foo) == 'building' ]]",
"cliArguments": [],
Expand Down
4 changes: 3 additions & 1 deletion cli/integration_tests/run_summary/error.t
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ Validate that we got a full task summary for the failed task with an error in .e
"hashOfExternalDependencies": "ccab0b28617f1f56",
"cache": {
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
},
"command": "exit 4",
"cliArguments": [],
Expand Down
9 changes: 7 additions & 2 deletions cli/integration_tests/run_summary/monorepo.t
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,17 @@ Setup
$ echo $FIRST_APP_BUILD | jq '.cache'
{
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
}
$ echo $SECOND_APP_BUILD | jq '.cache'
{
"local": true,
"remote": false
"remote": false,
"status": "HIT",
"source": "LOCAL",
"timeSaved": [0-9]+ (re)
}

# Some validation of util#build
Expand Down
4 changes: 3 additions & 1 deletion cli/integration_tests/run_summary/single-package.t
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,7 @@ Check
$ echo $TASK_SUMMARY | jq '.cache'
{
"local": false,
"remote": false
"remote": false,
"status": "MISS",
"timeSaved": 0
}
12 changes: 10 additions & 2 deletions cli/internal/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,16 @@ type ItemStatus struct {
Remote bool `json:"remote"`
}

const cacheEventHit = "HIT"
const cacheEventMiss = "MISS"
const (
// CacheSourceFS is a constant to indicate local cache hit
CacheSourceFS = "LOCAL"
// CacheSourceRemote is a constant to indicate remote cache hit
CacheSourceRemote = "REMOTE"
// CacheEventHit is a constant to indicate a cache hit
CacheEventHit = "HIT"
// CacheEventMiss is a constant to indicate a cache miss
CacheEventMiss = "MISS"
)

type CacheEvent struct {
Source string `mapstructure:"source"`
Expand Down
6 changes: 3 additions & 3 deletions cli/internal/cache/cache_fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ func (f *fsCache) Exists(hash string) ItemStatus {
func (f *fsCache) logFetch(hit bool, hash string, duration int) {
var event string
if hit {
event = cacheEventHit
event = CacheEventHit
} else {
event = cacheEventMiss
event = CacheEventMiss
}
payload := &CacheEvent{
Source: "LOCAL",
Source: CacheSourceFS,
Event: event,
Hash: hash,
Duration: duration,
Expand Down
6 changes: 3 additions & 3 deletions cli/internal/cache/cache_http.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,12 @@ func (cache *httpCache) Exists(key string) ItemStatus {
func (cache *httpCache) logFetch(hit bool, hash string, duration int) {
var event string
if hit {
event = cacheEventHit
event = CacheEventHit
} else {
event = cacheEventMiss
event = CacheEventMiss
}
payload := &CacheEvent{
Source: "REMOTE",
Source: CacheSourceRemote,
Event: event,
Hash: hash,
Duration: duration,
Expand Down
2 changes: 1 addition & 1 deletion cli/internal/run/dry_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func populateCacheState(turboCache cache.Cache, taskSummaries []*runsummary.Task
for index := range queue {
task := taskSummaries[index]
itemStatus := turboCache.Exists(task.Hash)
task.CacheState = itemStatus
task.CacheSummary = runsummary.NewTaskCacheSummary(itemStatus, nil)
}
}()
}
Expand Down
17 changes: 12 additions & 5 deletions cli/internal/run/real_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func RealRun(
if taskExecutionSummary != nil {
taskSummary.ExpandedOutputs = taskHashTracker.GetExpandedOutputs(taskSummary.TaskID)
taskSummary.Execution = taskExecutionSummary
taskSummary.CacheState = taskHashTracker.GetCacheStatus(taskSummary.TaskID)
taskSummary.CacheSummary = taskHashTracker.GetCacheStatus(taskSummary.TaskID)

// lock since multiple things to be appending to this array at the same time
mu.Lock()
Expand Down Expand Up @@ -271,13 +271,20 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas
ErrorPrefix: prettyPrefix,
WarnPrefix: prettyPrefix,
}
cacheStatus, err := taskCache.RestoreOutputs(ctx, prefixedUI, progressLogger)
ec.taskHashTracker.SetCacheStatus(packageTask.TaskID, cacheStatus)

hit := cacheStatus.Local || cacheStatus.Remote
cacheStatus, timeSaved, err := taskCache.RestoreOutputs(ctx, prefixedUI, progressLogger)

// It's safe to set the CacheStatus even if there's an error, because if there's
// an error, the 0 values are actually what we want. We save cacheStatus and timeSaved
// for the task, so that even if there's an error, we have those values for the taskSummary.
ec.taskHashTracker.SetCacheStatus(
mehulkar marked this conversation as resolved.
Show resolved Hide resolved
packageTask.TaskID,
runsummary.NewTaskCacheSummary(cacheStatus, &timeSaved),
)

if err != nil {
prefixedUI.Error(fmt.Sprintf("error fetching from cache: %s", err))
} else if hit {
} else if cacheStatus.Local || cacheStatus.Remote { // If there was a cache hit
ec.taskHashTracker.SetExpandedOutputs(packageTask.TaskID, taskCache.ExpandedOutputs)
// We only cache successful executions, so we can assume this is a successCode exit.
tracer(runsummary.TargetCached, nil, &successCode)
Expand Down
20 changes: 12 additions & 8 deletions cli/internal/runcache/runcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,14 @@ type TaskCache struct {
}

// RestoreOutputs attempts to restore output for the corresponding task from the cache.
// Returns true if successful.
func (tc *TaskCache) RestoreOutputs(ctx context.Context, prefixedUI *cli.PrefixedUi, progressLogger hclog.Logger) (cache.ItemStatus, error) {
// Returns the cacheStatus, the timeSaved, and error values, so the consumer can understand
// what happened in here.
func (tc *TaskCache) RestoreOutputs(ctx context.Context, prefixedUI *cli.PrefixedUi, progressLogger hclog.Logger) (cache.ItemStatus, int, error) {
if tc.cachingDisabled || tc.rc.readsDisabled {
if tc.taskOutputMode != util.NoTaskOutput && tc.taskOutputMode != util.ErrorTaskOutput {
prefixedUI.Output(fmt.Sprintf("cache bypass, force executing %s", ui.Dim(tc.hash)))
}
return cache.ItemStatus{Local: false, Remote: false}, nil
return cache.ItemStatus{Local: false, Remote: false}, 0, nil
}

changedOutputGlobs, err := tc.rc.outputWatcher.GetChangedOutputs(ctx, tc.hash, tc.repoRelativeGlobs.Inclusions)
Expand All @@ -126,25 +127,26 @@ func (tc *TaskCache) RestoreOutputs(ctx context.Context, prefixedUI *cli.Prefixe

hasChangedOutputs := len(changedOutputGlobs) > 0
var cacheStatus cache.ItemStatus

var timeSaved int
if hasChangedOutputs {
// Note that we currently don't use the output globs when restoring, but we could in the
// future to avoid doing unnecessary file I/O. We also need to pass along the exclusion
// globs as well.
itemStatus, restoredFiles, _, err := tc.rc.cache.Fetch(tc.rc.repoRoot, tc.hash, nil)
itemStatus, restoredFiles, duration, err := tc.rc.cache.Fetch(tc.rc.repoRoot, tc.hash, nil)
hit := itemStatus.Local || itemStatus.Remote
timeSaved = duration
tc.ExpandedOutputs = restoredFiles
// Assign to this variable outside this closure so we can return at the end of the function
cacheStatus = itemStatus
if err != nil {
// If there was an error fetching from cache, we'll say there was no cache hit
return cache.ItemStatus{Local: false, Remote: false}, err
return cache.ItemStatus{Local: false, Remote: false}, 0, err
} else if !hit {
if tc.taskOutputMode != util.NoTaskOutput && tc.taskOutputMode != util.ErrorTaskOutput {
prefixedUI.Output(fmt.Sprintf("cache miss, executing %s", ui.Dim(tc.hash)))
}
// If there was no hit, we can also say there was no hit
return cache.ItemStatus{Local: false, Remote: false}, nil
return cache.ItemStatus{Local: false, Remote: false}, 0, nil
}

if err := tc.rc.outputWatcher.NotifyOutputsWritten(ctx, tc.hash, tc.repoRelativeGlobs); err != nil {
Expand Down Expand Up @@ -172,7 +174,9 @@ func (tc *TaskCache) RestoreOutputs(ctx context.Context, prefixedUI *cli.Prefixe
default:
// NoLogs, do not output anything
}
return cacheStatus, nil
// TODO: timeSaved could be part of cacheStatus, so we don't have to make a new struct
// downstream, but this would be a more invasive change right now.
return cacheStatus, timeSaved, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how to fix this, but it's a bummer that we lose the time saved value in the most efficient case: no restoration necessary.

Maybe we put it on the daemon someday along with how long it took to generate those outputs? cc @arlyon

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, our timings could use a bit more sophistication in general. timeSaved doesn't account for the time it took to restore the cache at all, it's just how long the task took when it populated the cache.

}

// ReplayLogFile writes out the stored logfile to the terminal
Expand Down
4 changes: 2 additions & 2 deletions cli/internal/runsummary/format_text.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ func (rsm Meta) FormatAndPrintText(workspaceInfos workspace.Catalog) error {
fmt.Fprintln(w, util.Sprintf(" ${GREY}Package\t=\t%s\t${RESET}", task.Package))
}
fmt.Fprintln(w, util.Sprintf(" ${GREY}Hash\t=\t%s\t${RESET}", task.Hash))
fmt.Fprintln(w, util.Sprintf(" ${GREY}Cached (Local)\t=\t%s\t${RESET}", strconv.FormatBool(task.CacheState.Local)))
fmt.Fprintln(w, util.Sprintf(" ${GREY}Cached (Remote)\t=\t%s\t${RESET}", strconv.FormatBool(task.CacheState.Remote)))
fmt.Fprintln(w, util.Sprintf(" ${GREY}Cached (Local)\t=\t%s\t${RESET}", strconv.FormatBool(task.CacheSummary.Local)))
fmt.Fprintln(w, util.Sprintf(" ${GREY}Cached (Remote)\t=\t%s\t${RESET}", strconv.FormatBool(task.CacheSummary.Remote)))

if !rsm.singlePackage {
fmt.Fprintln(w, util.Sprintf(" ${GREY}Directory\t=\t%s\t${RESET}", task.Dir))
Expand Down
39 changes: 15 additions & 24 deletions cli/internal/runsummary/spaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,15 @@ type spacesRunPayload struct {
// gitSha string
}

// spacesCacheStatus is the same as TaskCacheSummary so we can convert
// spacesCacheStatus(cacheSummary), but change the json tags, to omit local and remote fields
type spacesCacheStatus struct {
Status string `json:"status,omitempty"`
Source string `json:"source,omitempty"`
// omitted fields, but here so we can convert from TaskCacheSummary easily
Local bool `json:"-"`
Remote bool `json:"-"`
Status string `json:"status"` // should always be there
Source string `json:"source,omitempty"`
TimeSaved int `json:"timeSaved"`
}

type spacesTask struct {
Expand Down Expand Up @@ -84,32 +90,17 @@ func newSpacesDonePayload(runsummary *RunSummary) *spacesRunPayload {
}

func newSpacesTaskPayload(taskSummary *TaskSummary) *spacesTask {
// Set the cache source. Local and Remote shouldn't _both_ be true.
var source string
if taskSummary.CacheState.Local {
source = "LOCAL"
} else if taskSummary.CacheState.Remote {
source = "REMOTE"
}
status := "MISS"
if source != "" {
status = "HIT"
}

startTime := taskSummary.Execution.startAt.UnixMilli()
endTime := taskSummary.Execution.endTime().UnixMilli()

return &spacesTask{
Key: taskSummary.TaskID,
Name: taskSummary.Task,
Workspace: taskSummary.Package,
Hash: taskSummary.Hash,
StartTime: startTime,
EndTime: endTime,
Cache: spacesCacheStatus{
Status: status,
Source: source,
},
Key: taskSummary.TaskID,
Name: taskSummary.Task,
Workspace: taskSummary.Package,
Hash: taskSummary.Hash,
StartTime: startTime,
EndTime: endTime,
Cache: spacesCacheStatus(taskSummary.CacheSummary), // wrapped so we can remove fields
ExitCode: *taskSummary.Execution.exitCode,
Dependencies: taskSummary.Dependencies,
Dependents: taskSummary.Dependents,
Expand Down
43 changes: 42 additions & 1 deletion cli/internal/runsummary/task_summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,47 @@ import (
"github.com/vercel/turbo/cli/internal/util"
)

// TaskCacheSummary is an extended version of cache.ItemStatus
// that includes TimeSaved and some better data.
type TaskCacheSummary struct {
Local bool `json:"local"` // Deprecated, but keeping around for --dry=json
Remote bool `json:"remote"` // Deprecated, but keeping around for --dry=json
Status string `json:"status"` // should always be there
Source string `json:"source,omitempty"` // can be empty on status:miss
TimeSaved int `json:"timeSaved"` // always include, but can be 0
}

// NewTaskCacheSummary decorates a cache.ItemStatus into a TaskCacheSummary
// Importantly, it adds the derived keys of `source` and `status` based on
// the local/remote booleans. It would be nice if these were just included
// from upstream, but that is a more invasive change.
func NewTaskCacheSummary(itemStatus cache.ItemStatus, timeSaved *int) TaskCacheSummary {
status := cache.CacheEventMiss
if itemStatus.Local || itemStatus.Remote {
status = cache.CacheEventHit
}

var source string
if itemStatus.Local {
source = cache.CacheSourceFS
} else if itemStatus.Remote {
source = cache.CacheSourceRemote
}

cs := TaskCacheSummary{
// copy these over
Local: itemStatus.Local,
Remote: itemStatus.Remote,
Status: status,
Source: source,
}
// add in a dereferences timeSaved, should be 0 if nil
if timeSaved != nil {
cs.TimeSaved = *timeSaved
}
return cs
}
mehulkar marked this conversation as resolved.
Show resolved Hide resolved

// TaskSummary contains information about the task that was about to run
// TODO(mehulkar): `Outputs` and `ExcludedOutputs` are slightly redundant
// as the information is also available in ResolvedTaskDefinition. We could remove them
Expand All @@ -18,7 +59,7 @@ type TaskSummary struct {
Hash string `json:"hash"`
ExpandedInputs map[turbopath.AnchoredUnixPath]string `json:"inputs"`
ExternalDepsHash string `json:"hashOfExternalDependencies"`
CacheState cache.ItemStatus `json:"cache"`
CacheSummary TaskCacheSummary `json:"cache"`
Command string `json:"command"`
CommandArguments []string `json:"cliArguments"`
Outputs []string `json:"outputs"`
Expand Down