Skip to content

Commit

Permalink
Adjust heuristic for line-based versus byte-based diffing (#299)
Browse files Browse the repository at this point in the history
If the string has many characters that require escape sequences to print,
then we need to take that into consideration and avoid byte-by-byte diffing.

Co-authored-by: Damien Neil <neild@users.noreply.github.com>
  • Loading branch information
dsnet and neild committed Sep 2, 2022
1 parent 377d283 commit a97318b
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 1 deletion.
17 changes: 17 additions & 0 deletions cmp/compare_test.go
Expand Up @@ -1403,6 +1403,23 @@ using the AllowUnexported option.`, "\n"),
[]byte("\xffoo"), []byte("foo"), []byte("barbaz"), []byte("added"), []byte("here"), []byte("hrmph\xff"),
},
reason: "should print text byte slices as strings except those with binary",
}, {
label: label + "/ManyEscapeCharacters",
x: `[
{"Base32": "NA======"},
{"Base32": "NBSQ===="},
{"Base32": "NBSWY==="},
{"Base32": "NBSWY3A="},
{"Base32": "NBSWY3DP"}
]`,
y: `[
{"Base32": "NB======"},
{"Base32": "NBSQ===="},
{"Base32": "NBSWY==="},
{"Base32": "NBSWY3A="},
{"Base32": "NBSWY3DP"}
]`,
reason: "should use line-based diffing since byte-based diffing is unreadable due to heavy amounts of escaping",
}}
}

Expand Down
5 changes: 4 additions & 1 deletion cmp/report_slices.go
Expand Up @@ -147,7 +147,10 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
})
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
isPureLinedText = efficiencyLines < 4*efficiencyBytes
quotedLength := len(strconv.Quote(sx + sy))
unquotedLength := len(sx) + len(sy)
escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)
isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1
}
}

Expand Down
12 changes: 12 additions & 0 deletions cmp/testdata/diffs
Expand Up @@ -1182,6 +1182,18 @@
+ {0x68, 0x72, 0x6d, 0x70, 0x68, 0xff},
}
>>> TestDiff/Reporter/SliceOfBytesBinary
<<< TestDiff/Reporter/ManyEscapeCharacters
(
"""
[
- {"Base32": "NA======"},
+ {"Base32": "NB======"},
{"Base32": "NBSQ===="},
{"Base32": "NBSWY==="},
... // 3 identical lines
"""
)
>>> TestDiff/Reporter/ManyEscapeCharacters
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
teststructs.ParentStructA{
privateStruct: teststructs.privateStruct{
Expand Down

0 comments on commit a97318b

Please sign in to comment.