Resume printing failed commands at the end of a run

vercel · Apr 20, 2023 · 8327560 · 8327560
1 parent 08db941
commit 8327560
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 28 deletions.
diff --git a/cli/internal/core/engine.go b/cli/internal/core/engine.go
@@ -6,6 +6,7 @@ import (
 	"os"
 	"sort"
 	"strings"
+	"sync"
 	"sync/atomic"
 
 	"github.com/vercel/turbo/cli/internal/fs"
@@ -72,40 +73,87 @@ type EngineExecutionOptions struct {
 	Concurrency int
 }
 
+// StopExecutionSentinel is used to return an error from a graph Walk that indicates that
+// all further walking should stop.
+type StopExecutionSentinel struct {
+	err error
+}
+
+// StopExecution wraps the given error in a sentinel error indicating that
+// graph traversal should stop. Note that this will stop all tasks, not just
+// downstream tasks.
+func StopExecution(reason error) *StopExecutionSentinel {
+	return &StopExecutionSentinel{
+		err: reason,
+	}
+}
+
+// Error implements error.Error for StopExecutionSentinel
+func (se *StopExecutionSentinel) Error() string {
+	return fmt.Sprintf("Execution stopped due to error: %v", se.err)
+}
+
 // Execute executes the pipeline, constructing an internal task graph and walking it accordingly.
 func (e *Engine) Execute(visitor Visitor, opts EngineExecutionOptions) []error {
 	var sema = util.NewSemaphore(opts.Concurrency)
 	var errored int32
-	return e.TaskGraph.Walk(func(v dag.Vertex) error {
-		// If something has already errored, short-circuit.
-		// There is a race here between concurrent tasks. However, if there is not a
-		// dependency edge between them, we are not required to have a strict order
-		// between them, so a failed task can fail to short-circuit a concurrent
-		// task that happened to be starting at the same time.
-		if atomic.LoadInt32(&errored) != 0 {
-			return nil
-		}
-		// Each vertex in the graph is a taskID (package#task format)
-		taskID := dag.VertexName(v)
 
-		// Always return if it is the root node
-		if strings.Contains(taskID, ROOT_NODE_NAME) {
-			return nil
-		}
+	// Returning an error from the Walk callback cancels downstream tasks, but not unrelated tasks.
+	// The behavior we want is to either cancel everything or nothing (--continue). So, we do our own
+	// error handling. Collect any errors that occur in "errors", and report them as the result of
+	// Execute. panic on any other error returned by Walk.
+	var errorMu sync.Mutex
+	var errors []error
+	recordErr := func(err error) {
+		errorMu.Lock()
+		defer errorMu.Unlock()
+		errors = append(errors, err)
+	}
+	unusedErrs := e.TaskGraph.Walk(func(v dag.Vertex) error {
+		// Use an extra func() to ensure that we are not returning any errors to Walk
+		func() {
+			// If something has already errored, short-circuit.
+			// There is a race here between concurrent tasks. However, if there is not a
+			// dependency edge between them, we are not required to have a strict order
+			// between them, so a failed task can fail to short-circuit a concurrent
+			// task that happened to be starting at the same time.
+			if atomic.LoadInt32(&errored) != 0 {
+				return
+			}
+			// Each vertex in the graph is a taskID (package#task format)
+			taskID := dag.VertexName(v)
 
-		// Acquire the semaphore unless parallel
-		if !opts.Parallel {
-			sema.Acquire()
-			defer sema.Release()
-		}
+			// Always return if it is the root node
+			if strings.Contains(taskID, ROOT_NODE_NAME) {
+				return
+			}
 
-		if err := visitor(taskID); err != nil {
-			// We only ever flip from false to true, so we don't need to compare and swap the atomic
-			atomic.StoreInt32(&errored, 1)
-			return err
-		}
+			// Acquire the semaphore unless parallel
+			if !opts.Parallel {
+				sema.Acquire()
+				defer sema.Release()
+			}
+
+			if err := visitor(taskID); err != nil {
+				if se, ok := err.(*StopExecutionSentinel); ok {
+					// We only ever flip from false to true, so we don't need to compare and swap the atomic
+					atomic.StoreInt32(&errored, 1)
+					recordErr(se.err)
+					// Note: returning an error here would cancel execution of downstream tasks only, and show
+					// up in the errors returned from Walk. However, we are doing our own error collection
+					// and intentionally ignoring errors from walk, so fallthrough and use the "errored" mechanism
+					// to skip downstream tasks
+				} else {
+					recordErr(err)
+				}
+			}
+		}()
 		return nil
 	})
+	if len(unusedErrs) > 0 {
+		panic("we should be handling execution errors via our own errors + errored mechanism")
+	}
+	return errors
 }
 
 // MissingTaskError is a specialized Error thrown in the case that we can't find a task.

diff --git a/cli/internal/core/engine_test.go b/cli/internal/core/engine_test.go
@@ -71,7 +71,7 @@ func TestShortCircuiting(t *testing.T) {
 		println(taskID)
 		executed[taskID] = true
 		if taskID == "b#build" {
-			return expectedErr
+			return StopExecution(expectedErr)
 		}
 		return nil
 	}

diff --git a/cli/internal/run/real_run.go b/cli/internal/run/real_run.go
@@ -322,7 +322,7 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas
 
 		ec.logError(prettyPrefix, err)
 		if !ec.rs.Opts.runOpts.ContinueOnError {
-			return nil, errors.Wrapf(err, "failed to capture outputs for \"%v\"", packageTask.TaskID)
+			return nil, core.StopExecution(errors.Wrapf(err, "failed to capture outputs for \"%v\"", packageTask.TaskID))
 		}
 	}
 
@@ -387,10 +387,12 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas
 		if !ec.rs.Opts.runOpts.ContinueOnError {
 			prefixedUI.Error(fmt.Sprintf("ERROR: command finished with error: %s", err))
 			ec.processes.Close()
+			// We're not continuing, stop graph traversal
+			err = core.StopExecution(err)
 		} else {
 			prefixedUI.Warn("command finished with error, but continuing...")
 			// Set to nil so we don't short-circuit any other execution
-			err = nil
+			//err = nil
 		}
 
 		return taskExecutionSummary, err

diff --git a/turborepo-tests/integration/tests/continue.t b/turborepo-tests/integration/tests/continue.t
@@ -74,6 +74,8 @@ Run with --continue
   other-app:build: npm ERR!   in workspace: other-app 
   other-app:build: npm ERR!   at location: (.*)/apps/other-app  (re)
   other-app:build: command finished with error, but continuing...
+  command \((.*)/apps/some-lib\) npm run build exited \(1\) (re)
+  command \((.*)/apps/other-app\) npm run build exited \(1\) (re)
 
    Tasks:    1 successful, 3 total
   Cached:    0 cached, 3 total