Skip to content

Commit

Permalink
Optional framework inference (#4788)
Browse files Browse the repository at this point in the history
This makes it possible to opt out of framework inference by specifying
`--framework-inference=false` on the command line. This is designed to
interact gracefully with `--env-mode=strict` as a separate-but-related
knob to turn for control over environment variables.
  • Loading branch information
nathanhammond committed May 8, 2023
1 parent 9ce9805 commit e942032
Show file tree
Hide file tree
Showing 28 changed files with 815 additions and 50 deletions.
4 changes: 3 additions & 1 deletion cli/internal/graph/graph.go
Expand Up @@ -51,6 +51,7 @@ type CompleteGraph struct {
func (g *CompleteGraph) GetPackageTaskVisitor(
ctx gocontext.Context,
taskGraph *dag.AcyclicGraph,
frameworkInference bool,
globalEnvMode util.EnvMode,
getArgs func(taskID string) []string,
logger hclog.Logger,
Expand Down Expand Up @@ -109,9 +110,10 @@ func (g *CompleteGraph) GetPackageTaskVisitor(

passThruArgs := getArgs(taskName)
hash, err := g.TaskHashTracker.CalculateTaskHash(
logger,
packageTask,
taskGraph.DownEdges(taskID),
logger,
frameworkInference,
passThruArgs,
useOldTaskHashable,
)
Expand Down
8 changes: 6 additions & 2 deletions cli/internal/run/dry_run.go
Expand Up @@ -44,7 +44,11 @@ func DryRun(
}

if taskSummary.Framework == "" {
taskSummary.Framework = runsummary.MissingFrameworkLabel
if rs.Opts.runOpts.FrameworkInference {
taskSummary.Framework = runsummary.NoFrameworkDetected
} else {
taskSummary.Framework = runsummary.FrameworkDetectionSkipped
}
}

// This mutex is not _really_ required, since we are using Concurrency: 1 as an execution
Expand All @@ -63,7 +67,7 @@ func DryRun(
return rs.ArgsForTask(taskID)
}

visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, globalEnvMode, getArgs, base.Logger, execFunc)
visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, rs.Opts.runOpts.FrameworkInference, globalEnvMode, getArgs, base.Logger, execFunc)
execOpts := core.EngineExecutionOptions{
Concurrency: 1,
Parallel: false,
Expand Down
10 changes: 10 additions & 0 deletions cli/internal/run/global_hash.go
Expand Up @@ -33,6 +33,7 @@ type GlobalHashableInputs struct {
pipeline fs.PristinePipeline
envVarPassthroughs []string
envMode util.EnvMode
frameworkInference bool
}

type newGlobalHashable struct {
Expand All @@ -43,6 +44,7 @@ type newGlobalHashable struct {
pipeline fs.PristinePipeline
envVarPassthroughs []string
envMode util.EnvMode
frameworkInference bool
}

// newGlobalHash is a transformation of GlobalHashableInputs.
Expand All @@ -57,6 +59,7 @@ func newGlobalHash(full GlobalHashableInputs) (string, error) {
pipeline: full.pipeline,
envVarPassthroughs: full.envVarPassthroughs,
envMode: full.envMode,
frameworkInference: full.frameworkInference,
})
}

Expand Down Expand Up @@ -94,6 +97,11 @@ func calculateGlobalHashFromHashableInputs(full GlobalHashableInputs) (string, e
return newGlobalHash(full)
}

// If you tell us not to infer framework you get the new hash.
if !full.frameworkInference {
return newGlobalHash(full)
}

// If we're in infer mode, and there is no global pass through config,
// we use the old struct layout. this will be true for everyone not using the strict env
// feature, and we don't want to break their cache.
Expand Down Expand Up @@ -123,6 +131,7 @@ func getGlobalHashInputs(
lockFile lockfile.Lockfile,
envVarPassthroughs []string,
envMode util.EnvMode,
frameworkInference bool,
logger hclog.Logger,
ui cli.Ui,
isStructuredOutput bool,
Expand Down Expand Up @@ -196,5 +205,6 @@ func getGlobalHashInputs(
pipeline: pipeline.Pristine(),
envVarPassthroughs: envVarPassthroughs,
envMode: envMode,
frameworkInference: frameworkInference,
}, nil
}
2 changes: 1 addition & 1 deletion cli/internal/run/real_run.go
Expand Up @@ -127,7 +127,7 @@ func RealRun(
return rs.ArgsForTask(taskID)
}

visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, globalEnvMode, getArgs, base.Logger, execFunc)
visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, rs.Opts.runOpts.FrameworkInference, globalEnvMode, getArgs, base.Logger, execFunc)
errs := engine.Execute(visitorFn, execOpts)

// Track if we saw any child with a non-zero exit code
Expand Down
2 changes: 2 additions & 0 deletions cli/internal/run/run.go
Expand Up @@ -72,6 +72,7 @@ func optsFromArgs(args *turbostate.ParsedArgsFromRust) (*Opts, error) {
opts.runOpts.Summarize = runPayload.Summarize
opts.runOpts.ExperimentalSpaceID = runPayload.ExperimentalSpaceID
opts.runOpts.EnvMode = runPayload.EnvMode
opts.runOpts.FrameworkInference = runPayload.FrameworkInference

// Runcache flags
opts.runcacheOpts.SkipReads = runPayload.Force
Expand Down Expand Up @@ -248,6 +249,7 @@ func (r *run) run(ctx gocontext.Context, targets []string, executionState *turbo
pkgDepGraph.Lockfile,
turboJSON.GlobalPassthroughEnv,
r.opts.runOpts.EnvMode,
r.opts.runOpts.FrameworkInference,
r.base.Logger,
r.base.UI,
isStructuredOutput,
Expand Down
19 changes: 10 additions & 9 deletions cli/internal/runsummary/format_json.go
Expand Up @@ -55,13 +55,14 @@ func (rsm *Meta) normalize() {
// This struct exists solely for the purpose of serializing to JSON and should not be
// used anywhere else.
type nonMonorepoRunSummary struct {
ID ksuid.KSUID `json:"id"`
Version string `json:"version"`
TurboVersion string `json:"turboVersion"`
GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
Packages []string `json:"-"`
EnvMode util.EnvMode `json:"envMode"`
ExecutionSummary *executionSummary `json:"execution,omitempty"`
Tasks []*TaskSummary `json:"tasks"`
SCM *scmState `json:"scm"`
ID ksuid.KSUID `json:"id"`
Version string `json:"version"`
TurboVersion string `json:"turboVersion"`
GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
Packages []string `json:"-"`
EnvMode util.EnvMode `json:"envMode"`
FrameworkInference bool `json:"frameworkInference"`
ExecutionSummary *executionSummary `json:"execution,omitempty"`
Tasks []*TaskSummary `json:"tasks"`
SCM *scmState `json:"scm"`
}
45 changes: 25 additions & 20 deletions cli/internal/runsummary/run_summary.go
Expand Up @@ -23,8 +23,11 @@ import (
// the RunSummary will print this, instead of the script (e.g. `next build`).
const MissingTaskLabel = "<NONEXISTENT>"

// MissingFrameworkLabel is a string to identify when a workspace doesn't detect a framework
const MissingFrameworkLabel = "<NO FRAMEWORK DETECTED>"
// NoFrameworkDetected is a string to identify when a workspace doesn't detect a framework
const NoFrameworkDetected = "<NO FRAMEWORK DETECTED>"

// FrameworkDetectionSkipped is a string to identify when framework detection was skipped
const FrameworkDetectionSkipped = "<FRAMEWORK DETECTION SKIPPED>"

const runSummarySchemaVersion = "0"
const runsEndpoint = "/v0/spaces/%s/runs"
Expand Down Expand Up @@ -56,15 +59,16 @@ type Meta struct {

// RunSummary contains a summary of what happens in the `turbo run` command and why.
type RunSummary struct {
ID ksuid.KSUID `json:"id"`
Version string `json:"version"`
TurboVersion string `json:"turboVersion"`
GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
Packages []string `json:"packages"`
EnvMode util.EnvMode `json:"envMode"`
ExecutionSummary *executionSummary `json:"execution,omitempty"`
Tasks []*TaskSummary `json:"tasks"`
SCM *scmState `json:"scm"`
ID ksuid.KSUID `json:"id"`
Version string `json:"version"`
TurboVersion string `json:"turboVersion"`
GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
Packages []string `json:"packages"`
EnvMode util.EnvMode `json:"envMode"`
FrameworkInference bool `json:"frameworkInference"`
ExecutionSummary *executionSummary `json:"execution,omitempty"`
Tasks []*TaskSummary `json:"tasks"`
SCM *scmState `json:"scm"`
}

// NewRunSummary returns a RunSummary instance
Expand Down Expand Up @@ -98,15 +102,16 @@ func NewRunSummary(

return Meta{
RunSummary: &RunSummary{
ID: ksuid.New(),
Version: runSummarySchemaVersion,
ExecutionSummary: executionSummary,
TurboVersion: turboVersion,
Packages: packages,
EnvMode: globalEnvMode,
Tasks: []*TaskSummary{},
GlobalHashSummary: globalHashSummary,
SCM: getSCMState(repoRoot),
ID: ksuid.New(),
Version: runSummarySchemaVersion,
ExecutionSummary: executionSummary,
TurboVersion: turboVersion,
Packages: packages,
EnvMode: globalEnvMode,
FrameworkInference: runOpts.FrameworkInference,
Tasks: []*TaskSummary{},
GlobalHashSummary: globalHashSummary,
SCM: getSCMState(repoRoot),
},
ui: ui,
runType: runType,
Expand Down
20 changes: 13 additions & 7 deletions cli/internal/taskhash/taskhash.go
Expand Up @@ -247,24 +247,30 @@ func (th *Tracker) calculateDependencyHashes(dependencySet dag.Set) ([]string, e
// CalculateTaskHash calculates the hash for package-task combination. It is threadsafe, provided
// that it has previously been called on its task-graph dependencies. File hashes must be calculated
// first.
func (th *Tracker) CalculateTaskHash(packageTask *nodes.PackageTask, dependencySet dag.Set, logger hclog.Logger, args []string, useOldTaskHashable bool) (string, error) {
func (th *Tracker) CalculateTaskHash(logger hclog.Logger, packageTask *nodes.PackageTask, dependencySet dag.Set, frameworkInference bool, args []string, useOldTaskHashable bool) (string, error) {
hashOfFiles, ok := th.packageInputsHashes[packageTask.TaskID]
if !ok {
return "", fmt.Errorf("cannot find package-file hash for %v", packageTask.TaskID)
}

var keyMatchers []string
framework := inference.InferFramework(packageTask.Pkg)
if framework != nil && framework.EnvMatcher != "" {
// log auto detected framework and env prefix
logger.Debug(fmt.Sprintf("auto detected framework for %s", packageTask.PackageName), "framework", framework.Slug, "env_prefix", framework.EnvMatcher)
keyMatchers = append(keyMatchers, framework.EnvMatcher)
var framework *inference.Framework
envVarContainingExcludePrefix := ""

if frameworkInference {
envVarContainingExcludePrefix = "TURBO_CI_VENDOR_ENV_KEY"
framework = inference.InferFramework(packageTask.Pkg)
if framework != nil && framework.EnvMatcher != "" {
// log auto detected framework and env prefix
logger.Debug(fmt.Sprintf("auto detected framework for %s", packageTask.PackageName), "framework", framework.Slug, "env_prefix", framework.EnvMatcher)
keyMatchers = append(keyMatchers, framework.EnvMatcher)
}
}

envVars, err := env.GetHashableEnvVars(
packageTask.TaskDefinition.EnvVarDependencies,
keyMatchers,
"TURBO_CI_VENDOR_ENV_KEY",
envVarContainingExcludePrefix,
)
if err != nil {
return "", err
Expand Down
19 changes: 10 additions & 9 deletions cli/internal/turbostate/turbostate.go
Expand Up @@ -23,15 +23,16 @@ type PrunePayload struct {

// RunPayload is the extra flags passed for the `run` subcommand
type RunPayload struct {
CacheDir string `json:"cache_dir"`
CacheWorkers int `json:"cache_workers"`
Concurrency string `json:"concurrency"`
ContinueExecution bool `json:"continue_execution"`
DryRun string `json:"dry_run"`
Filter []string `json:"filter"`
Force bool `json:"force"`
GlobalDeps []string `json:"global_deps"`
EnvMode util.EnvMode `json:"env_mode"`
CacheDir string `json:"cache_dir"`
CacheWorkers int `json:"cache_workers"`
Concurrency string `json:"concurrency"`
ContinueExecution bool `json:"continue_execution"`
DryRun string `json:"dry_run"`
Filter []string `json:"filter"`
Force bool `json:"force"`
FrameworkInference bool `json:"framework_inference"`
GlobalDeps []string `json:"global_deps"`
EnvMode util.EnvMode `json:"env_mode"`
// NOTE: Graph has three effective states that is modeled using a *string:
// nil -> no flag passed
// "" -> flag passed but no file name attached: print to stdout
Expand Down
2 changes: 2 additions & 0 deletions cli/internal/util/run_opts.go
Expand Up @@ -27,6 +27,8 @@ type RunOpts struct {
Parallel bool

EnvMode EnvMode
// Whether or not to infer the framework for each workspace.
FrameworkInference bool
// The filename to write a perf profile.
Profile string
// If true, continue task executions even if a task fails.
Expand Down
58 changes: 58 additions & 0 deletions crates/turborepo-lib/src/cli.rs
Expand Up @@ -330,6 +330,9 @@ pub struct RunArgs {
/// Ignore the existing cache (to force execution)
#[clap(long, env = "TURBO_FORCE", default_missing_value = "true")]
pub force: Option<Option<bool>>,
/// Specify whether or not to do framework inference for tasks
#[clap(long, value_name = "BOOL", action = ArgAction::Set, default_value = "true", default_missing_value = "true", num_args = 0..=1)]
pub framework_inference: bool,
/// Specify glob of global filesystem dependencies to be hashed. Useful
/// for .env and files
#[clap(long = "global-deps", action = ArgAction::Append)]
Expand Down Expand Up @@ -612,6 +615,7 @@ mod test {
RunArgs {
cache_workers: 10,
output_logs: None,
framework_inference: true,
..RunArgs::default()
}
}
Expand Down Expand Up @@ -665,6 +669,60 @@ mod test {
}
);

assert_eq!(
Args::try_parse_from(["turbo", "run", "build"]).unwrap(),
Args {
command: Some(Command::Run(Box::new(RunArgs {
tasks: vec!["build".to_string()],
framework_inference: true,
..get_default_run_args()
}))),
..Args::default()
},
"framework_inference: default to true"
);

assert_eq!(
Args::try_parse_from(["turbo", "run", "build", "--framework-inference"]).unwrap(),
Args {
command: Some(Command::Run(Box::new(RunArgs {
tasks: vec!["build".to_string()],
framework_inference: true,
..get_default_run_args()
}))),
..Args::default()
},
"framework_inference: flag only"
);

assert_eq!(
Args::try_parse_from(["turbo", "run", "build", "--framework-inference", "true"])
.unwrap(),
Args {
command: Some(Command::Run(Box::new(RunArgs {
tasks: vec!["build".to_string()],
framework_inference: true,
..get_default_run_args()
}))),
..Args::default()
},
"framework_inference: flag set to true"
);

assert_eq!(
Args::try_parse_from(["turbo", "run", "build", "--framework-inference", "false"])
.unwrap(),
Args {
command: Some(Command::Run(Box::new(RunArgs {
tasks: vec!["build".to_string()],
framework_inference: false,
..get_default_run_args()
}))),
..Args::default()
},
"framework_inference: flag set to false"
);

assert_eq!(
Args::try_parse_from(["turbo", "run", "build"]).unwrap(),
Args {
Expand Down
4 changes: 4 additions & 0 deletions docs/pages/repo/docs/core-concepts/caching.mdx
Expand Up @@ -261,6 +261,10 @@ To alter the cache for _all_ tasks, you can declare environment variables in the

### Automatic environment variable inclusion

<Callout type="info">
This feature can be disabled by passing [`--framework-inference=false`](../reference/command-line-reference#--framework-inference) to your `turbo` command.
</Callout>

To help ensure correct caching across environments, Turborepo automatically infers and includes public environment variables when calculating cache keys for apps built with detected frameworks. You can safely omit framework-specific public environment variables from `turbo.json`:

```diff filename="turbo.json"
Expand Down
10 changes: 10 additions & 0 deletions docs/pages/repo/docs/reference/command-line-reference.mdx
Expand Up @@ -293,6 +293,16 @@ turbo run build --global-deps=".env.*" --global-deps=".eslintrc" --global-deps="

You can also specify these in your `turbo` configuration as `globalDependencies` key.

#### `--framework-inference`

`type: bool`

Specify whether or not to do framework inference for tasks. Defaults to `true`, can be configured to be `false` which skips framework inference for tasks. This disables [automatic environment variable inclusion](../core-concepts/caching#automatic-environment-variable-inclusion).

```sh
turbo run build --framework-inference=false
```

#### `--ignore`

`type: string[]`
Expand Down
@@ -0,0 +1 @@
.turbo/
@@ -0,0 +1,9 @@
{
"name": "docs",
"scripts": {
"build": "next build"
},
"dependencies": {
"next": "latest"
}
}

1 comment on commit e942032

@vercel
Copy link

@vercel vercel bot commented on e942032 May 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.