Optional framework inference (#4788)

This makes it possible to opt out of framework inference by specifying `--framework-inference=false` on the command line. This is designed to interact gracefully with `--env-mode=strict` as a separate-but-related knob to turn for control over environment variables.
vercel · May 8, 2023 · e942032 · e942032 · vercel · May 8, 2023
1 parent 9ce9805
commit e942032
Show file tree

Hide file tree

Showing 28 changed files with 815 additions and 50 deletions.
diff --git a/cli/internal/graph/graph.go b/cli/internal/graph/graph.go
@@ -51,6 +51,7 @@ type CompleteGraph struct {
 func (g *CompleteGraph) GetPackageTaskVisitor(
 	ctx gocontext.Context,
 	taskGraph *dag.AcyclicGraph,
+	frameworkInference bool,
 	globalEnvMode util.EnvMode,
 	getArgs func(taskID string) []string,
 	logger hclog.Logger,
@@ -109,9 +110,10 @@ func (g *CompleteGraph) GetPackageTaskVisitor(
 
 		passThruArgs := getArgs(taskName)
 		hash, err := g.TaskHashTracker.CalculateTaskHash(
+			logger,
 			packageTask,
 			taskGraph.DownEdges(taskID),
-			logger,
+			frameworkInference,
 			passThruArgs,
 			useOldTaskHashable,
 		)

diff --git a/cli/internal/run/dry_run.go b/cli/internal/run/dry_run.go
@@ -44,7 +44,11 @@ func DryRun(
 		}
 
 		if taskSummary.Framework == "" {
-			taskSummary.Framework = runsummary.MissingFrameworkLabel
+			if rs.Opts.runOpts.FrameworkInference {
+				taskSummary.Framework = runsummary.NoFrameworkDetected
+			} else {
+				taskSummary.Framework = runsummary.FrameworkDetectionSkipped
+			}
 		}
 
 		// This mutex is not _really_ required, since we are using Concurrency: 1 as an execution
@@ -63,7 +67,7 @@ func DryRun(
 		return rs.ArgsForTask(taskID)
 	}
 
-	visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, globalEnvMode, getArgs, base.Logger, execFunc)
+	visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, rs.Opts.runOpts.FrameworkInference, globalEnvMode, getArgs, base.Logger, execFunc)
 	execOpts := core.EngineExecutionOptions{
 		Concurrency: 1,
 		Parallel:    false,

diff --git a/cli/internal/run/global_hash.go b/cli/internal/run/global_hash.go
@@ -33,6 +33,7 @@ type GlobalHashableInputs struct {
 	pipeline             fs.PristinePipeline
 	envVarPassthroughs   []string
 	envMode              util.EnvMode
+	frameworkInference   bool
 }
 
 type newGlobalHashable struct {
@@ -43,6 +44,7 @@ type newGlobalHashable struct {
 	pipeline             fs.PristinePipeline
 	envVarPassthroughs   []string
 	envMode              util.EnvMode
+	frameworkInference   bool
 }
 
 // newGlobalHash is a transformation of GlobalHashableInputs.
@@ -57,6 +59,7 @@ func newGlobalHash(full GlobalHashableInputs) (string, error) {
 		pipeline:             full.pipeline,
 		envVarPassthroughs:   full.envVarPassthroughs,
 		envMode:              full.envMode,
+		frameworkInference:   full.frameworkInference,
 	})
 }
 
@@ -94,6 +97,11 @@ func calculateGlobalHashFromHashableInputs(full GlobalHashableInputs) (string, e
 			return newGlobalHash(full)
 		}
 
+		// If you tell us not to infer framework you get the new hash.
+		if !full.frameworkInference {
+			return newGlobalHash(full)
+		}
+
 		// If we're in infer mode, and there is no global pass through config,
 		// we use the old struct layout. this will be true for everyone not using the strict env
 		// feature, and we don't want to break their cache.
@@ -123,6 +131,7 @@ func getGlobalHashInputs(
 	lockFile lockfile.Lockfile,
 	envVarPassthroughs []string,
 	envMode util.EnvMode,
+	frameworkInference bool,
 	logger hclog.Logger,
 	ui cli.Ui,
 	isStructuredOutput bool,
@@ -196,5 +205,6 @@ func getGlobalHashInputs(
 		pipeline:             pipeline.Pristine(),
 		envVarPassthroughs:   envVarPassthroughs,
 		envMode:              envMode,
+		frameworkInference:   frameworkInference,
 	}, nil
 }
diff --git a/cli/internal/run/real_run.go b/cli/internal/run/real_run.go
@@ -127,7 +127,7 @@ func RealRun(
 		return rs.ArgsForTask(taskID)
 	}
 
-	visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, globalEnvMode, getArgs, base.Logger, execFunc)
+	visitorFn := g.GetPackageTaskVisitor(ctx, engine.TaskGraph, rs.Opts.runOpts.FrameworkInference, globalEnvMode, getArgs, base.Logger, execFunc)
 	errs := engine.Execute(visitorFn, execOpts)
 
 	// Track if we saw any child with a non-zero exit code

diff --git a/cli/internal/run/run.go b/cli/internal/run/run.go
@@ -72,6 +72,7 @@ func optsFromArgs(args *turbostate.ParsedArgsFromRust) (*Opts, error) {
 	opts.runOpts.Summarize = runPayload.Summarize
 	opts.runOpts.ExperimentalSpaceID = runPayload.ExperimentalSpaceID
 	opts.runOpts.EnvMode = runPayload.EnvMode
+	opts.runOpts.FrameworkInference = runPayload.FrameworkInference
 
 	// Runcache flags
 	opts.runcacheOpts.SkipReads = runPayload.Force
@@ -248,6 +249,7 @@ func (r *run) run(ctx gocontext.Context, targets []string, executionState *turbo
 		pkgDepGraph.Lockfile,
 		turboJSON.GlobalPassthroughEnv,
 		r.opts.runOpts.EnvMode,
+		r.opts.runOpts.FrameworkInference,
 		r.base.Logger,
 		r.base.UI,
 		isStructuredOutput,

diff --git a/cli/internal/runsummary/format_json.go b/cli/internal/runsummary/format_json.go
@@ -55,13 +55,14 @@ func (rsm *Meta) normalize() {
 // This struct exists solely for the purpose of serializing to JSON and should not be
 // used anywhere else.
 type nonMonorepoRunSummary struct {
-	ID                ksuid.KSUID        `json:"id"`
-	Version           string             `json:"version"`
-	TurboVersion      string             `json:"turboVersion"`
-	GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
-	Packages          []string           `json:"-"`
-	EnvMode           util.EnvMode       `json:"envMode"`
-	ExecutionSummary  *executionSummary  `json:"execution,omitempty"`
-	Tasks             []*TaskSummary     `json:"tasks"`
-	SCM               *scmState          `json:"scm"`
+	ID                 ksuid.KSUID        `json:"id"`
+	Version            string             `json:"version"`
+	TurboVersion       string             `json:"turboVersion"`
+	GlobalHashSummary  *GlobalHashSummary `json:"globalCacheInputs"`
+	Packages           []string           `json:"-"`
+	EnvMode            util.EnvMode       `json:"envMode"`
+	FrameworkInference bool               `json:"frameworkInference"`
+	ExecutionSummary   *executionSummary  `json:"execution,omitempty"`
+	Tasks              []*TaskSummary     `json:"tasks"`
+	SCM                *scmState          `json:"scm"`
 }
diff --git a/cli/internal/runsummary/run_summary.go b/cli/internal/runsummary/run_summary.go
@@ -23,8 +23,11 @@ import (
 // the RunSummary will print this, instead of the script (e.g. `next build`).
 const MissingTaskLabel = "<NONEXISTENT>"
 
-// MissingFrameworkLabel is a string to identify when a workspace doesn't detect a framework
-const MissingFrameworkLabel = "<NO FRAMEWORK DETECTED>"
+// NoFrameworkDetected is a string to identify when a workspace doesn't detect a framework
+const NoFrameworkDetected = "<NO FRAMEWORK DETECTED>"
+
+// FrameworkDetectionSkipped is a string to identify when framework detection was skipped
+const FrameworkDetectionSkipped = "<FRAMEWORK DETECTION SKIPPED>"
 
 const runSummarySchemaVersion = "0"
 const runsEndpoint = "/v0/spaces/%s/runs"
@@ -56,15 +59,16 @@ type Meta struct {
 
 // RunSummary contains a summary of what happens in the `turbo run` command and why.
 type RunSummary struct {
-	ID                ksuid.KSUID        `json:"id"`
-	Version           string             `json:"version"`
-	TurboVersion      string             `json:"turboVersion"`
-	GlobalHashSummary *GlobalHashSummary `json:"globalCacheInputs"`
-	Packages          []string           `json:"packages"`
-	EnvMode           util.EnvMode       `json:"envMode"`
-	ExecutionSummary  *executionSummary  `json:"execution,omitempty"`
-	Tasks             []*TaskSummary     `json:"tasks"`
-	SCM               *scmState          `json:"scm"`
+	ID                 ksuid.KSUID        `json:"id"`
+	Version            string             `json:"version"`
+	TurboVersion       string             `json:"turboVersion"`
+	GlobalHashSummary  *GlobalHashSummary `json:"globalCacheInputs"`
+	Packages           []string           `json:"packages"`
+	EnvMode            util.EnvMode       `json:"envMode"`
+	FrameworkInference bool               `json:"frameworkInference"`
+	ExecutionSummary   *executionSummary  `json:"execution,omitempty"`
+	Tasks              []*TaskSummary     `json:"tasks"`
+	SCM                *scmState          `json:"scm"`
 }
 
 // NewRunSummary returns a RunSummary instance
@@ -98,15 +102,16 @@ func NewRunSummary(
 
 	return Meta{
 		RunSummary: &RunSummary{
-			ID:                ksuid.New(),
-			Version:           runSummarySchemaVersion,
-			ExecutionSummary:  executionSummary,
-			TurboVersion:      turboVersion,
-			Packages:          packages,
-			EnvMode:           globalEnvMode,
-			Tasks:             []*TaskSummary{},
-			GlobalHashSummary: globalHashSummary,
-			SCM:               getSCMState(repoRoot),
+			ID:                 ksuid.New(),
+			Version:            runSummarySchemaVersion,
+			ExecutionSummary:   executionSummary,
+			TurboVersion:       turboVersion,
+			Packages:           packages,
+			EnvMode:            globalEnvMode,
+			FrameworkInference: runOpts.FrameworkInference,
+			Tasks:              []*TaskSummary{},
+			GlobalHashSummary:  globalHashSummary,
+			SCM:                getSCMState(repoRoot),
 		},
 		ui:                 ui,
 		runType:            runType,

diff --git a/cli/internal/taskhash/taskhash.go b/cli/internal/taskhash/taskhash.go
@@ -247,24 +247,30 @@ func (th *Tracker) calculateDependencyHashes(dependencySet dag.Set) ([]string, e
 // CalculateTaskHash calculates the hash for package-task combination. It is threadsafe, provided
 // that it has previously been called on its task-graph dependencies. File hashes must be calculated
 // first.
-func (th *Tracker) CalculateTaskHash(packageTask *nodes.PackageTask, dependencySet dag.Set, logger hclog.Logger, args []string, useOldTaskHashable bool) (string, error) {
+func (th *Tracker) CalculateTaskHash(logger hclog.Logger, packageTask *nodes.PackageTask, dependencySet dag.Set, frameworkInference bool, args []string, useOldTaskHashable bool) (string, error) {
 	hashOfFiles, ok := th.packageInputsHashes[packageTask.TaskID]
 	if !ok {
 		return "", fmt.Errorf("cannot find package-file hash for %v", packageTask.TaskID)
 	}
 
 	var keyMatchers []string
-	framework := inference.InferFramework(packageTask.Pkg)
-	if framework != nil && framework.EnvMatcher != "" {
-		// log auto detected framework and env prefix
-		logger.Debug(fmt.Sprintf("auto detected framework for %s", packageTask.PackageName), "framework", framework.Slug, "env_prefix", framework.EnvMatcher)
-		keyMatchers = append(keyMatchers, framework.EnvMatcher)
+	var framework *inference.Framework
+	envVarContainingExcludePrefix := ""
+
+	if frameworkInference {
+		envVarContainingExcludePrefix = "TURBO_CI_VENDOR_ENV_KEY"
+		framework = inference.InferFramework(packageTask.Pkg)
+		if framework != nil && framework.EnvMatcher != "" {
+			// log auto detected framework and env prefix
+			logger.Debug(fmt.Sprintf("auto detected framework for %s", packageTask.PackageName), "framework", framework.Slug, "env_prefix", framework.EnvMatcher)
+			keyMatchers = append(keyMatchers, framework.EnvMatcher)
+		}
 	}
 
 	envVars, err := env.GetHashableEnvVars(
 		packageTask.TaskDefinition.EnvVarDependencies,
 		keyMatchers,
-		"TURBO_CI_VENDOR_ENV_KEY",
+		envVarContainingExcludePrefix,
 	)
 	if err != nil {
 		return "", err

diff --git a/cli/internal/turbostate/turbostate.go b/cli/internal/turbostate/turbostate.go
@@ -23,15 +23,16 @@ type PrunePayload struct {
 
 // RunPayload is the extra flags passed for the `run` subcommand
 type RunPayload struct {
-	CacheDir          string       `json:"cache_dir"`
-	CacheWorkers      int          `json:"cache_workers"`
-	Concurrency       string       `json:"concurrency"`
-	ContinueExecution bool         `json:"continue_execution"`
-	DryRun            string       `json:"dry_run"`
-	Filter            []string     `json:"filter"`
-	Force             bool         `json:"force"`
-	GlobalDeps        []string     `json:"global_deps"`
-	EnvMode           util.EnvMode `json:"env_mode"`
+	CacheDir           string       `json:"cache_dir"`
+	CacheWorkers       int          `json:"cache_workers"`
+	Concurrency        string       `json:"concurrency"`
+	ContinueExecution  bool         `json:"continue_execution"`
+	DryRun             string       `json:"dry_run"`
+	Filter             []string     `json:"filter"`
+	Force              bool         `json:"force"`
+	FrameworkInference bool         `json:"framework_inference"`
+	GlobalDeps         []string     `json:"global_deps"`
+	EnvMode            util.EnvMode `json:"env_mode"`
 	// NOTE: Graph has three effective states that is modeled using a *string:
 	//   nil -> no flag passed
 	//   ""  -> flag passed but no file name attached: print to stdout

diff --git a/cli/internal/util/run_opts.go b/cli/internal/util/run_opts.go
@@ -27,6 +27,8 @@ type RunOpts struct {
 	Parallel bool
 
 	EnvMode EnvMode
+	// Whether or not to infer the framework for each workspace.
+	FrameworkInference bool
 	// The filename to write a perf profile.
 	Profile string
 	// If true, continue task executions even if a task fails.

diff --git a/crates/turborepo-lib/src/cli.rs b/crates/turborepo-lib/src/cli.rs
@@ -330,6 +330,9 @@ pub struct RunArgs {
     /// Ignore the existing cache (to force execution)
     #[clap(long, env = "TURBO_FORCE", default_missing_value = "true")]
     pub force: Option<Option<bool>>,
+    /// Specify whether or not to do framework inference for tasks
+    #[clap(long, value_name = "BOOL", action = ArgAction::Set, default_value = "true", default_missing_value = "true", num_args = 0..=1)]
+    pub framework_inference: bool,
     /// Specify glob of global filesystem dependencies to be hashed. Useful
     /// for .env and files
     #[clap(long = "global-deps", action = ArgAction::Append)]
@@ -612,6 +615,7 @@ mod test {
         RunArgs {
             cache_workers: 10,
             output_logs: None,
+            framework_inference: true,
             ..RunArgs::default()
         }
     }
@@ -665,6 +669,60 @@ mod test {
             }
         );
 
+        assert_eq!(
+            Args::try_parse_from(["turbo", "run", "build"]).unwrap(),
+            Args {
+                command: Some(Command::Run(Box::new(RunArgs {
+                    tasks: vec!["build".to_string()],
+                    framework_inference: true,
+                    ..get_default_run_args()
+                }))),
+                ..Args::default()
+            },
+            "framework_inference: default to true"
+        );
+
+        assert_eq!(
+            Args::try_parse_from(["turbo", "run", "build", "--framework-inference"]).unwrap(),
+            Args {
+                command: Some(Command::Run(Box::new(RunArgs {
+                    tasks: vec!["build".to_string()],
+                    framework_inference: true,
+                    ..get_default_run_args()
+                }))),
+                ..Args::default()
+            },
+            "framework_inference: flag only"
+        );
+
+        assert_eq!(
+            Args::try_parse_from(["turbo", "run", "build", "--framework-inference", "true"])
+                .unwrap(),
+            Args {
+                command: Some(Command::Run(Box::new(RunArgs {
+                    tasks: vec!["build".to_string()],
+                    framework_inference: true,
+                    ..get_default_run_args()
+                }))),
+                ..Args::default()
+            },
+            "framework_inference: flag set to true"
+        );
+
+        assert_eq!(
+            Args::try_parse_from(["turbo", "run", "build", "--framework-inference", "false"])
+                .unwrap(),
+            Args {
+                command: Some(Command::Run(Box::new(RunArgs {
+                    tasks: vec!["build".to_string()],
+                    framework_inference: false,
+                    ..get_default_run_args()
+                }))),
+                ..Args::default()
+            },
+            "framework_inference: flag set to false"
+        );
+
         assert_eq!(
             Args::try_parse_from(["turbo", "run", "build"]).unwrap(),
             Args {

diff --git a/docs/pages/repo/docs/core-concepts/caching.mdx b/docs/pages/repo/docs/core-concepts/caching.mdx
@@ -261,6 +261,10 @@ To alter the cache for _all_ tasks, you can declare environment variables in the
 
 ### Automatic environment variable inclusion
 
+<Callout type="info">
+  This feature can be disabled by passing [`--framework-inference=false`](../reference/command-line-reference#--framework-inference) to your `turbo` command.
+</Callout>
+
 To help ensure correct caching across environments, Turborepo automatically infers and includes public environment variables when calculating cache keys for apps built with detected frameworks. You can safely omit framework-specific public environment variables from `turbo.json`:
 
 ```diff filename="turbo.json"

diff --git a/docs/pages/repo/docs/reference/command-line-reference.mdx b/docs/pages/repo/docs/reference/command-line-reference.mdx
@@ -293,6 +293,16 @@ turbo run build --global-deps=".env.*" --global-deps=".eslintrc" --global-deps="
 
 You can also specify these in your `turbo` configuration as `globalDependencies` key.
 
+#### `--framework-inference`
+
+`type: bool`
+
+Specify whether or not to do framework inference for tasks. Defaults to `true`, can be configured to be `false` which skips framework inference for tasks. This disables [automatic environment variable inclusion](../core-concepts/caching#automatic-environment-variable-inclusion).
+
+```sh
+turbo run build --framework-inference=false
+```
+
 #### `--ignore`
 
 `type: string[]`

diff --git a/turborepo-tests/integration/tests/_fixtures/framework_inference/.gitignore b/turborepo-tests/integration/tests/_fixtures/framework_inference/.gitignore
@@ -0,0 +1 @@
+.turbo/
diff --git a/turborepo-tests/integration/tests/_fixtures/framework_inference/apps/docs/package.json b/turborepo-tests/integration/tests/_fixtures/framework_inference/apps/docs/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "docs",
+  "scripts": {
+    "build": "next build"
+  },
+  "dependencies": {
+    "next": "latest"
+  }
+}