diff --git a/changelog/pending/20221014--engine--pending-deletes.yaml b/changelog/pending/20221014--engine--pending-deletes.yaml new file mode 100644 index 000000000000..adde30fbe7c0 --- /dev/null +++ b/changelog/pending/20221014--engine--pending-deletes.yaml @@ -0,0 +1,4 @@ +changes: +- type: fix + scope: engine + description: Pending deletes are no longer executed before everything else. This correctly handles dependencies for resource graphs that were partially deleted. diff --git a/pkg/engine/lifecycletest/pulumi_test.go b/pkg/engine/lifecycletest/pulumi_test.go index d5b57ee64ab6..b5c202b7958b 100644 --- a/pkg/engine/lifecycletest/pulumi_test.go +++ b/pkg/engine/lifecycletest/pulumi_test.go @@ -3956,3 +3956,163 @@ func TestDefaultParents(t *testing.T) { assert.Equal(t, snap.Resources[0].URN, snap.Resources[1].Parent) assert.Equal(t, snap.Resources[0].URN, snap.Resources[2].Parent) } + +func TestPendingDeleteOrder(t *testing.T) { + // Test for https://github.com/pulumi/pulumi/issues/2948 Ensure that if we have resources A and B, and we + // go to replace A but then fail to replace B that we correctly handle everything in the same order when + // we retry the update. + // + // That is normally for this operation we would do the following: + // 1. Create new A + // 2. Create new B + // 3. Delete old B + // 4. Delete old A + // So if step 2 fails to create the new B we want to see: + // 1. Create new A + // 2. Create new B (fail) + // 1. Create new B + // 2. Delete old B + // 3. Delete old A + // Currently (and what #2948 tracks) is that the engine does the following: + // 1. Create new A + // 2. Create new B (fail) + // 3. Delete old A + // 1. Create new B + // 2. Delete old B + // That delete A fails because the delete B needs to happen first. + + t.Parallel() + + cloudState := map[resource.ID]resource.PropertyMap{} + + failCreationOfTypB := false + + loaders := []*deploytest.ProviderLoader{ + deploytest.NewProviderLoader("pkgA", semver.MustParse("1.0.0"), func() (plugin.Provider, error) { + return &deploytest.Provider{ + CreateF: func(urn resource.URN, news resource.PropertyMap, timeout float64, + preview bool) (resource.ID, resource.PropertyMap, resource.Status, error) { + + if strings.Contains(string(urn), "typB") && failCreationOfTypB { + return "", nil, resource.StatusOK, fmt.Errorf("Could not create typB") + } + + id := resource.ID(fmt.Sprintf("%d", len(cloudState))) + if !preview { + cloudState[id] = news + } + return id, news, resource.StatusOK, nil + }, + DeleteF: func(urn resource.URN, + id resource.ID, olds resource.PropertyMap, timeout float64) (resource.Status, error) { + // Fail if anything in cloud state still points to us + for _, res := range cloudState { + for _, v := range res { + if v.IsString() && v.StringValue() == string(id) { + return resource.StatusOK, fmt.Errorf("Can not delete %s", id) + } + } + } + + delete(cloudState, id) + return resource.StatusOK, nil + }, + DiffF: func(urn resource.URN, + id resource.ID, olds, news resource.PropertyMap, ignoreChanges []string) (plugin.DiffResult, error) { + if strings.Contains(string(urn), "typA") { + if !olds["foo"].DeepEquals(news["foo"]) { + return plugin.DiffResult{ + Changes: plugin.DiffSome, + ReplaceKeys: []resource.PropertyKey{"foo"}, + DetailedDiff: map[string]plugin.PropertyDiff{ + "foo": { + Kind: plugin.DiffUpdateReplace, + InputDiff: true, + }, + }, + DeleteBeforeReplace: false, + }, nil + } + } + if strings.Contains(string(urn), "typB") { + if !olds["parent"].DeepEquals(news["parent"]) { + return plugin.DiffResult{ + Changes: plugin.DiffSome, + ReplaceKeys: []resource.PropertyKey{"parent"}, + DetailedDiff: map[string]plugin.PropertyDiff{ + "parent": { + Kind: plugin.DiffUpdateReplace, + InputDiff: true, + }, + }, + DeleteBeforeReplace: false, + }, nil + } + } + + return plugin.DiffResult{}, nil + }, + UpdateF: func(urn resource.URN, + id resource.ID, olds, news resource.PropertyMap, timeout float64, + ignoreChanges []string, preview bool) (resource.PropertyMap, resource.Status, error) { + assert.Fail(t, "Didn't expect update to be called") + return nil, resource.StatusOK, nil + }, + }, nil + }, deploytest.WithoutGrpc), + } + + ins := resource.NewPropertyMapFromMap(map[string]interface{}{ + "foo": "bar", + }) + program := deploytest.NewLanguageRuntime(func(info plugin.RunInfo, monitor *deploytest.ResourceMonitor) error { + _, idA, _, err := monitor.RegisterResource("pkgA:m:typA", "resA", true, deploytest.ResourceOptions{ + Inputs: ins, + }) + assert.NoError(t, err) + + _, _, _, err = monitor.RegisterResource("pkgA:m:typB", "resB", true, deploytest.ResourceOptions{ + Inputs: resource.NewPropertyMapFromMap(map[string]interface{}{ + "parent": idA, + }), + }) + assert.NoError(t, err) + + return nil + }) + host := deploytest.NewPluginHost(nil, nil, program, loaders...) + + p := &TestPlan{ + Options: UpdateOptions{Host: host}, + } + + project := p.GetProject() + + // Run an update to create the resources + snap, res := TestOp(Update).Run(project, p.GetTarget(t, nil), p.Options, false, p.BackendClient, nil) + assert.Nil(t, res) + assert.NotNil(t, snap) + assert.Len(t, snap.Resources, 3) + + // Trigger a replacement of A but fail to create B + failCreationOfTypB = true + ins = resource.NewPropertyMapFromMap(map[string]interface{}{ + "foo": "baz", + }) + snap, res = TestOp(Update).Run(project, p.GetTarget(t, snap), p.Options, false, p.BackendClient, nil) + // Assert that this fails, we should have two copies of A now, one new one and one old one pending delete + assert.NotNil(t, res) + assert.NotNil(t, snap) + assert.Len(t, snap.Resources, 4) + assert.Equal(t, snap.Resources[1].Type, tokens.Type("pkgA:m:typA")) + assert.False(t, snap.Resources[1].Delete) + assert.Equal(t, snap.Resources[2].Type, tokens.Type("pkgA:m:typA")) + assert.True(t, snap.Resources[2].Delete) + + // Now allow B to create and try again + failCreationOfTypB = false + snap, res = TestOp(Update).Run(project, p.GetTarget(t, snap), p.Options, false, p.BackendClient, nil) + assert.Nil(t, res) + assert.NotNil(t, snap) + assert.Len(t, snap.Resources, 3) +} diff --git a/pkg/resource/deploy/deployment_executor.go b/pkg/resource/deploy/deployment_executor.go index 6d5b81513700..4ef81b0c98cd 100644 --- a/pkg/resource/deploy/deployment_executor.go +++ b/pkg/resource/deploy/deployment_executor.go @@ -207,11 +207,6 @@ func (ex *deploymentExecutor) Execute(callerCtx context.Context, opts Options, p // Set up a step generator for this deployment. ex.stepGen = newStepGenerator(ex.deployment, opts, updateTargetsOpt, replaceTargetsOpt) - // Retire any pending deletes that are currently present in this deployment. - if res := ex.retirePendingDeletes(callerCtx, opts, preview); res != nil { - return nil, res - } - // Derive a cancellable context for this deployment. We will only cancel this context if some piece of the // deployment's execution fails. ctx, cancel := context.WithCancel(callerCtx) @@ -459,51 +454,6 @@ func (ex *deploymentExecutor) handleSingleEvent(event SourceEvent) result.Result return nil } -// retirePendingDeletes deletes all resources that are pending deletion. Run before the start of a deployment, this pass -// ensures that the engine never sees any resources that are pending deletion from a previous deployment. -// -// retirePendingDeletes re-uses the deployment executor's step generator but uses its own step executor. -func (ex *deploymentExecutor) retirePendingDeletes(callerCtx context.Context, opts Options, - preview bool) result.Result { - - contract.Require(ex.stepGen != nil, "ex.stepGen != nil") - steps := ex.stepGen.GeneratePendingDeletes() - if len(steps) == 0 { - logging.V(4).Infoln("deploymentExecutor.retirePendingDeletes(...): no pending deletions") - return nil - } - - logging.V(4).Infof("deploymentExecutor.retirePendingDeletes(...): executing %d steps", len(steps)) - ctx, cancel := context.WithCancel(callerCtx) - - stepExec := newStepExecutor(ctx, cancel, ex.deployment, opts, preview, false) - antichains := ex.stepGen.ScheduleDeletes(steps) - // Submit the deletes for execution and wait for them all to retire. - for _, antichain := range antichains { - for _, step := range antichain { - ex.deployment.Ctx().StatusDiag.Infof(diag.RawMessage(step.URN(), "completing deletion from previous update")) - } - - tok := stepExec.ExecuteParallel(antichain) - tok.Wait(ctx) - } - - stepExec.SignalCompletion() - stepExec.WaitForCompletion() - - // Like Refresh, we use the presence of an error in the caller's context to detect whether or not we have been - // cancelled. - canceled := callerCtx.Err() != nil - if stepExec.Errored() { - ex.reportExecResult("failed", preview) - return result.Bail() - } else if canceled { - ex.reportExecResult("canceled", preview) - return result.Bail() - } - return nil -} - // import imports a list of resources into a stack. func (ex *deploymentExecutor) importResources( callerCtx context.Context, diff --git a/pkg/resource/deploy/step_generator.go b/pkg/resource/deploy/step_generator.go index 746d071dc9f8..d3b1706b7123 100644 --- a/pkg/resource/deploy/step_generator.go +++ b/pkg/resource/deploy/step_generator.go @@ -1228,26 +1228,6 @@ func (sg *stepGenerator) determineAllowedResourcesToDeleteFromTargets( return resourcesToDelete, nil } -// GeneratePendingDeletes generates delete steps for all resources that are pending deletion. This function should be -// called at the start of a deployment in order to find all resources that are pending deletion from the previous -// deployment. -func (sg *stepGenerator) GeneratePendingDeletes() []Step { - var dels []Step - if prev := sg.deployment.prev; prev != nil { - logging.V(7).Infof("stepGenerator.GeneratePendingDeletes(): scanning previous snapshot for pending deletes") - for i := len(prev.Resources) - 1; i >= 0; i-- { - res := prev.Resources[i] - if res.Delete { - logging.V(7).Infof( - "stepGenerator.GeneratePendingDeletes(): resource (%v, %v) is pending deletion", res.URN, res.ID) - sg.pendingDeletes[res] = true - dels = append(dels, NewDeleteStep(sg.deployment, res)) - } - } - } - return dels -} - // ScheduleDeletes takes a list of steps that will delete resources and "schedules" them by producing a list of list of // steps, where each list can be executed in parallel but a previous list must be executed to completion before // advancing to the next list. diff --git a/tests/integration/double_pending_delete/double_pending_delete_test.go b/tests/integration/double_pending_delete/double_pending_delete_test.go index d555b12d39f8..a74e3e7c85cf 100644 --- a/tests/integration/double_pending_delete/double_pending_delete_test.go +++ b/tests/integration/double_pending_delete/double_pending_delete_test.go @@ -44,7 +44,6 @@ func TestDoublePendingDelete(t *testing.T) { b := stackInfo.Deployment.Resources[4] assert.Equal(t, "b", string(b.URN.Name())) assert.False(t, b.Delete) - }, }, { @@ -52,10 +51,10 @@ func TestDoublePendingDelete(t *testing.T) { Additive: true, ExpectFailure: true, ExtraRuntimeValidation: func(t *testing.T, stackInfo integration.RuntimeValidationStackInfo) { - // There is still only one pending delete resource in this snapshot. + // There is still two pending delete resources in this snapshot. assert.NotNil(t, stackInfo.Deployment) - assert.Equal(t, 5, len(stackInfo.Deployment.Resources)) + assert.Equal(t, 6, len(stackInfo.Deployment.Resources)) stackRes := stackInfo.Deployment.Resources[0] assert.Equal(t, resource.RootStackType, stackRes.URN.Type()) providerRes := stackInfo.Deployment.Resources[1] @@ -69,7 +68,11 @@ func TestDoublePendingDelete(t *testing.T) { assert.Equal(t, "a", string(aCondemned.URN.Name())) assert.True(t, aCondemned.Delete) - b := stackInfo.Deployment.Resources[4] + aSecondCondemned := stackInfo.Deployment.Resources[4] + assert.Equal(t, "a", string(aSecondCondemned.URN.Name())) + assert.True(t, aSecondCondemned.Delete) + + b := stackInfo.Deployment.Resources[5] assert.Equal(t, "b", string(b.URN.Name())) assert.False(t, b.Delete) }, diff --git a/tests/integration/double_pending_delete/step3/index.ts b/tests/integration/double_pending_delete/step3/index.ts index a565590a0803..f7e8d3a514b0 100644 --- a/tests/integration/double_pending_delete/step3/index.ts +++ b/tests/integration/double_pending_delete/step3/index.ts @@ -16,8 +16,7 @@ import { Resource } from "./resource"; // The previous plan failed, but we're going to initiate *another* plan that // introduces new changes, while still keeping around the failed state -// from the previous plan. The engine should delete all pending deletes before -// attempting to start the next plan. +// from the previous plan. The engine should handle all pending deletes. // // To do this, we're going to trigger another replacement of A: const a = new Resource("a", { fail: 3 }); @@ -27,9 +26,10 @@ const b = new Resource("b", { fail: 1 }, { dependsOn: a }); // The snapshot now contains: // A: Created // A: Pending Delete +// A: Pending Delete // B: Created -// The A from the previous snapshot should have been deleted. +// The A from the previous snapshot won't have been deleted yet because we try to leave deletes till last. // This plan is interesting because it shows that it is legal to delete the same URN multiple // times in the same plan. This previously triggered an assert in the engine that asserted diff --git a/tests/integration/double_pending_delete/step4/index.ts b/tests/integration/double_pending_delete/step4/index.ts index b29318c541d5..1e985c6c955c 100644 --- a/tests/integration/double_pending_delete/step4/index.ts +++ b/tests/integration/double_pending_delete/step4/index.ts @@ -25,6 +25,7 @@ const a = new Resource("a", { fail: 4 }); // Create B const b = new Resource("b", { fail: 2 }, { dependsOn: a }); +// Delete A // Delete A // Delete B