Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix non-determinism in diffing algorithm #247

Merged
merged 1 commit into from Nov 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 27 additions & 21 deletions cmp/internal/diff/diff.go
Expand Up @@ -119,7 +119,7 @@ func (r Result) Similar() bool {
return r.NumSame+1 >= r.NumDiff
}

var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2)
var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0

// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
Expand Down Expand Up @@ -168,17 +168,6 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.

// To ensure flexibility in changing the algorithm in the future,
// introduce some degree of deliberate instability.
// This is achieved by fiddling the zigzag iterator to start searching
// the graph starting from the bottom-right versus than the top-left.
// The result may differ depending on the starting search location,
// but still produces a valid edit script.
zigzagInit := randInt // either 0 or 1
if flags.Deterministic {
zigzagInit = 0
}

// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
Expand All @@ -197,6 +186,11 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// approximately the square-root of the search budget.
searchBudget := 4 * (nx + ny) // O(n)

// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)

// The algorithm below is a greedy, meet-in-the-middle algorithm for
// computing sub-optimal edit-scripts between two lists.
//
Expand All @@ -214,22 +208,28 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// frontier towards the opposite corner.
// • This algorithm terminates when either the X coordinates or the
// Y coordinates of the forward and reverse frontier points ever intersect.
//

// This algorithm is correct even if searching only in the forward direction
// or in the reverse direction. We do both because it is commonly observed
// that two lists commonly differ because elements were added to the front
// or end of the other list.
//
// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
for {
// Non-deterministically start with either the forward or reverse direction
// to introduce some deliberate instability so that we have the flexibility
// to change this algorithm in the future.
if flags.Deterministic || randBool {
goto forwardSearch
} else {
goto reverseSearch
}

forwardSearch:
{
// Forward search from the beginning.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
goto finishSearch
}
for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ {
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
Expand Down Expand Up @@ -262,10 +262,14 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
fwdFrontier.Y++
}
goto reverseSearch
}

reverseSearch:
{
// Reverse search from the end.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
goto finishSearch
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
Expand Down Expand Up @@ -300,8 +304,10 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
revFrontier.Y--
}
goto forwardSearch
}

finishSearch:
// Join the forward and reverse paths and then append the reverse path.
fwdPath.connect(revPath.point, f)
for i := len(revPath.es) - 1; i >= 0; i-- {
Expand Down
27 changes: 13 additions & 14 deletions cmp/internal/diff/diff_test.go
Expand Up @@ -10,21 +10,15 @@ import (
"strings"
"testing"
"unicode"

"github.com/google/go-cmp/cmp/internal/flags"
)

func init() {
flags.Deterministic = true
}

func TestDifference(t *testing.T) {
tests := []struct {
// Before passing x and y to Difference, we strip all spaces so that
// they can be used by the test author to indicate a missing symbol
// in one of the lists.
x, y string
want string
want string // '|' separated list of possible outputs
}{{
x: "",
y: "",
Expand All @@ -36,7 +30,7 @@ func TestDifference(t *testing.T) {
}, {
x: "##",
y: "# ",
want: ".X",
want: ".X|X.",
}, {
x: "a#",
y: "A ",
Expand All @@ -48,7 +42,7 @@ func TestDifference(t *testing.T) {
}, {
x: "# ",
y: "##",
want: ".Y",
want: ".Y|Y.",
}, {
x: " #",
y: "@#",
Expand Down Expand Up @@ -148,7 +142,7 @@ func TestDifference(t *testing.T) {
}, {
x: "ABCAB BA ",
y: " C BABAC",
want: "XX.X.Y..Y",
want: "XX.X.Y..Y|XX.Y.X..Y",
}, {
x: "# #### ###",
y: "#y####yy###",
Expand All @@ -164,7 +158,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0 12z3x 456789 x x 0",
y: "0y12Z3 y456789y y y0",
want: ".Y..M.XY......YXYXY.",
want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.",
}, {
x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
y: " 1 3 5 7 9..................AB CDE F.GH I",
Expand Down Expand Up @@ -216,7 +210,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0123456789 ",
y: " 5678901234",
want: "XXXXX.....YYYYY",
want: "XXXXX.....YYYYY|YYYYY.....XXXXX",
}, {
x: "0123456789 ",
y: " 4567890123",
Expand Down Expand Up @@ -252,9 +246,14 @@ func TestDifference(t *testing.T) {
x := strings.Replace(tt.x, " ", "", -1)
y := strings.Replace(tt.y, " ", "", -1)
es := testStrings(t, x, y)
if got := es.String(); got != tt.want {
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
var want string
got := es.String()
for _, want = range strings.Split(tt.want, "|") {
if got == want {
return
}
}
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want)
})
}
}
Expand Down