vercel · kodiakhq · Apr 25, 2023 · Apr 20, 2023 · Apr 20, 2023 · Apr 20, 2023
@@ -6,6 +6,7 @@ import (
 	"os"
 	"sort"
 	"strings"
+	"sync"
 	"sync/atomic"
 
 	"github.com/vercel/turbo/cli/internal/fs"
@@ -72,40 +73,87 @@ type EngineExecutionOptions struct {
 	Concurrency int
 }
 
+// StopExecutionSentinel is used to return an error from a graph Walk that indicates that
+// all further walking should stop.
+type StopExecutionSentinel struct {
+	err error
+}
+
+// StopExecution wraps the given error in a sentinel error indicating that
+// graph traversal should stop. Note that this will stop all tasks, not just
+// downstream tasks.
+func StopExecution(reason error) *StopExecutionSentinel {
+	return &StopExecutionSentinel{
+		err: reason,
+	}
+}
+
+// Error implements error.Error for StopExecutionSentinel
+func (se *StopExecutionSentinel) Error() string {
+	return fmt.Sprintf("Execution stopped due to error: %v", se.err)
+}
+
 // Execute executes the pipeline, constructing an internal task graph and walking it accordingly.
 func (e *Engine) Execute(visitor Visitor, opts EngineExecutionOptions) []error {
 	var sema = util.NewSemaphore(opts.Concurrency)
 	var errored int32
-	return e.TaskGraph.Walk(func(v dag.Vertex) error {
-		// If something has already errored, short-circuit.
-		// There is a race here between concurrent tasks. However, if there is not a
-		// dependency edge between them, we are not required to have a strict order
-		// between them, so a failed task can fail to short-circuit a concurrent
-		// task that happened to be starting at the same time.
-		if atomic.LoadInt32(&errored) != 0 {
-			return nil
-		}
-		// Each vertex in the graph is a taskID (package#task format)
-		taskID := dag.VertexName(v)
 
-		// Always return if it is the root node
-		if strings.Contains(taskID, ROOT_NODE_NAME) {
-			return nil
-		}
+	// Returning an error from the Walk callback cancels downstream tasks, but not unrelated tasks.
+	// The behavior we want is to either cancel everything or nothing (--continue). So, we do our own
+	// error handling. Collect any errors that occur in "errors", and report them as the result of
+	// Execute. panic on any other error returned by Walk.
+	var errorMu sync.Mutex
+	var errors []error
+	recordErr := func(err error) {
+		errorMu.Lock()
+		defer errorMu.Unlock()
+		errors = append(errors, err)
+	}
+	unusedErrs := e.TaskGraph.Walk(func(v dag.Vertex) error {
+		// Use an extra func() to ensure that we are not returning any errors to Walk
+		func() {
+			// If something has already errored, short-circuit.
+			// There is a race here between concurrent tasks. However, if there is not a
+			// dependency edge between them, we are not required to have a strict order
+			// between them, so a failed task can fail to short-circuit a concurrent
+			// task that happened to be starting at the same time.
+			if atomic.LoadInt32(&errored) != 0 {
+				return
+			}
+			// Each vertex in the graph is a taskID (package#task format)
+			taskID := dag.VertexName(v)
 
-		// Acquire the semaphore unless parallel
-		if !opts.Parallel {
-			sema.Acquire()
-			defer sema.Release()
-		}
+			// Always return if it is the root node
+			if strings.Contains(taskID, ROOT_NODE_NAME) {
+				return
+			}
 
-		if err := visitor(taskID); err != nil {
-			// We only ever flip from false to true, so we don't need to compare and swap the atomic
-			atomic.StoreInt32(&errored, 1)
-			return err
-		}
+			// Acquire the semaphore unless parallel
+			if !opts.Parallel {
+				sema.Acquire()
+				defer sema.Release()
+			}
+
+			if err := visitor(taskID); err != nil {
+				if se, ok := err.(*StopExecutionSentinel); ok {
+					// We only ever flip from false to true, so we don't need to compare and swap the atomic
+					atomic.StoreInt32(&errored, 1)
+					recordErr(se.err)
+					// Note: returning an error here would cancel execution of downstream tasks only, and show
+					// up in the errors returned from Walk. However, we are doing our own error collection
+					// and intentionally ignoring errors from walk, so fallthrough and use the "errored" mechanism
+					// to skip downstream tasks
+				} else {
+					recordErr(err)
+				}
+			}
+		}()
 		return nil
 	})
+	if len(unusedErrs) > 0 {
+		panic("we should be handling execution errors via our own errors + errored mechanism")
+	}
+	return errors
 }
 
 // MissingTaskError is a specialized Error thrown in the case that we can't find a task.

@@ -71,7 +71,7 @@ func TestShortCircuiting(t *testing.T) {
 		println(taskID)
 		executed[taskID] = true
 		if taskID == "b#build" {
-			return expectedErr
+			return StopExecution(expectedErr)
 		}
 		return nil
 	}

@@ -322,7 +322,7 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas
 
 		ec.logError(prettyPrefix, err)
 		if !ec.rs.Opts.runOpts.ContinueOnError {
-			return nil, errors.Wrapf(err, "failed to capture outputs for \"%v\"", packageTask.TaskID)
+			return nil, core.StopExecution(errors.Wrapf(err, "failed to capture outputs for \"%v\"", packageTask.TaskID))
 		}
 	}
 
@@ -381,19 +381,20 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas
 			tracer(runsummary.TargetBuildFailed, err, nil)
 		}
 
+		// If there was an error, flush the buffered output
+		taskCache.OnError(prefixedUI, progressLogger)
 		progressLogger.Error(fmt.Sprintf("Error: command finished with error: %v", err))
 		if !ec.rs.Opts.runOpts.ContinueOnError {
 			prefixedUI.Error(fmt.Sprintf("ERROR: command finished with error: %s", err))
 			ec.processes.Close()
+			// We're not continuing, stop graph traversal
+			err = core.StopExecution(err)
 		} else {
 			prefixedUI.Warn("command finished with error, but continuing...")
 			// Set to nil so we don't short-circuit any other execution
-			err = nil
+			//err = nil
 		}
 
-		// If there was an error, flush the buffered output
-		taskCache.OnError(prefixedUI, progressLogger)
-
 		return taskExecutionSummary, err
 	}
 

@@ -190,6 +190,7 @@ func (tc TaskCache) ReplayLogFile(prefixedUI *cli.PrefixedUi, progressLogger hcl
 // This is called if the task exited with an non-zero error code.
 func (tc TaskCache) OnError(terminal *cli.PrefixedUi, logger hclog.Logger) {
 	if tc.taskOutputMode == util.ErrorTaskOutput {
+		terminal.Output(fmt.Sprintf("cache miss, executing %s", ui.Dim(tc.hash)))
 		tc.ReplayLogFile(terminal, logger)
 	}
 }

diff --git a/turborepo-tests/integration/tests/_fixtures/monorepo_dependency_error/.gitignore b/turborepo-tests/integration/tests/_fixtures/monorepo_dependency_error/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+.turbo
+.npmrc
@@ -0,0 +1,9 @@
+{
+  "name": "my-app",
+  "scripts": {
+    "build": "echo 'working'"
+  },
+  "dependencies": {
+    "some-lib": "*"
+  }
+}
@@ -0,0 +1,9 @@
+{
+  "name": "other-app",
+  "scripts": {
+    "build": "exit 3"
+  },
+  "dependencies": {
+    "some-lib": "*"
+  }
+}
@@ -0,0 +1,6 @@
+{
+  "name": "some-lib",
+  "scripts": {
+    "build": "exit 2"
+  }
+}
@@ -0,0 +1,6 @@
+{
+  "name": "monorepo",
+  "workspaces": [
+    "apps/**"
+  ]
+}
@@ -0,0 +1,9 @@
+{
+  "$schema": "https://turbo.build/schema.json",
+  "pipeline": {
+    "build": {
+      "dependsOn": ["^build"],
+      "outputs": []
+    }
+  }
+}
@@ -0,0 +1,85 @@
+Setup
+  $ . ${TESTDIR}/../../helpers/setup.sh
+  $ . ${TESTDIR}/_helpers/setup_monorepo.sh $(pwd) monorepo_dependency_error
+Run without --continue
+  $ ${TURBO} build
+  \xe2\x80\xa2 Packages in scope: my-app, other-app, some-lib (esc)
+  \xe2\x80\xa2 Running build in 3 packages (esc)
+  \xe2\x80\xa2 Remote caching disabled (esc)
+  some-lib:build: cache miss, executing 3494007308f52ad6
+  some-lib:build: 
+  some-lib:build: > build
+  some-lib:build: > exit 2
+  some-lib:build: 
+  some-lib:build: npm ERR! Lifecycle script `build` failed with error: 
+  some-lib:build: npm ERR! Error: command failed 
+  some-lib:build: npm ERR!   in workspace: some-lib 
+  some-lib:build: npm ERR!   at location: (.*)/apps/some-lib  (re)
+  some-lib:build: ERROR: command finished with error: command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+  command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+
+   Tasks:    0 successful, 1 total
+  Cached:    0 cached, 1 total
+    Time:\s*[\.0-9]+m?s  (re)
+
+   ERROR  run failed: command  exited (1)
+  [1]
+
+Run without --continue, and with only errors.
+  $ ${TURBO} build --output-logs=errors-only
+  \xe2\x80\xa2 Packages in scope: my-app, other-app, some-lib (esc)
+  \xe2\x80\xa2 Running build in 3 packages (esc)
+  \xe2\x80\xa2 Remote caching disabled (esc)
+  some-lib:build: cache miss, executing 3494007308f52ad6
+  some-lib:build: 
+  some-lib:build: > build
+  some-lib:build: > exit 2
+  some-lib:build: 
+  some-lib:build: npm ERR! Lifecycle script `build` failed with error: 
+  some-lib:build: npm ERR! Error: command failed 
+  some-lib:build: npm ERR!   in workspace: some-lib 
+  some-lib:build: npm ERR!   at location: (.*)/apps/some-lib  (re)
+  some-lib:build: ERROR: command finished with error: command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+  command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+
+   Tasks:    0 successful, 1 total
+  Cached:    0 cached, 1 total
+    Time:\s*[\.0-9]+m?s  (re)
+
+   ERROR  run failed: command  exited (1)
+  [1]
+
+Run with --continue
+  $ ${TURBO} build --output-logs=errors-only --continue
+  \xe2\x80\xa2 Packages in scope: my-app, other-app, some-lib (esc)
+  \xe2\x80\xa2 Running build in 3 packages (esc)
+  \xe2\x80\xa2 Remote caching disabled (esc)
+  some-lib:build: cache miss, executing 3494007308f52ad6
+  some-lib:build: 
+  some-lib:build: > build
+  some-lib:build: > exit 2
+  some-lib:build: 
+  some-lib:build: npm ERR! Lifecycle script `build` failed with error: 
+  some-lib:build: npm ERR! Error: command failed 
+  some-lib:build: npm ERR!   in workspace: some-lib 
+  some-lib:build: npm ERR!   at location: (.*)/apps/some-lib  (re)
+  some-lib:build: command finished with error, but continuing...
+  other-app:build: cache miss, executing af6505fe5634a5f5
+  other-app:build: 
+  other-app:build: > build
+  other-app:build: > exit 3
+  other-app:build: 
+  other-app:build: npm ERR! Lifecycle script `build` failed with error: 
+  other-app:build: npm ERR! Error: command failed 
+  other-app:build: npm ERR!   in workspace: other-app 
+  other-app:build: npm ERR!   at location: (.*)/apps/other-app  (re)
+  other-app:build: command finished with error, but continuing...
+  command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+  command \((.*)/apps/other-app\) npm run build exited \(1\) (re)
+
+   Tasks:    1 successful, 3 total
+  Cached:    0 cached, 3 total
+    Time:\s*[\.0-9]+m?s  (re)
+
+   ERROR  run failed: command  exited (1)
+  [1]