Skip to content

Commit

Permalink
Improve fp ignore logic (#2351)
Browse files Browse the repository at this point in the history
* forgot field change

* use aho corasick for filter

* reduce wordlist sensitivity
  • Loading branch information
dustin-decker committed Jan 29, 2024
1 parent 303e191 commit 7befefd
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 52 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
github.com/go-sql-driver/mysql v1.7.1
github.com/gobwas/glob v0.2.3
github.com/golang-jwt/jwt v3.2.2+incompatible
github.com/golang-jwt/jwt/v4 v4.5.0
github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.17.0
github.com/google/go-github/v42 v42.0.0
Expand Down Expand Up @@ -166,7 +167,6 @@ require (
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/goccy/go-json v0.10.0 // indirect
github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect
github.com/golang-sql/sqlexp v0.1.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
Expand Down
3 changes: 2 additions & 1 deletion pkg/detectors/azuresearchquerykey/azuresearchquerykey.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result

s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureSearchQueryKey,
Raw: []byte(resMatch + resUrlMatch),
Raw: []byte(resMatch),
RawV2: []byte(resMatch + resUrlMatch),
}
if verify {
client := s.client
Expand Down
25 changes: 1 addition & 24 deletions pkg/detectors/badlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@ array
uint
boolean
config
/>
</
\n
\r
parse
()
func
param
cancel
Expand All @@ -27,7 +22,6 @@ space
ident
block
type
\"
index
case
safe
Expand Down Expand Up @@ -87,8 +81,6 @@ keyword
trace
truncate
group
a-z
0-9
href
scale
model
Expand All @@ -106,26 +98,18 @@ close
defer
start
;var
":
storage
blob
cred
${
math
.xml
conflict
];
$(
-{{
hack
-v1
-v2
package
contract
schema
vec<
ed25519
(&
prefix
suffix
compress
Expand Down Expand Up @@ -177,7 +161,6 @@ error
revoke
encrypt
binary
md5
2018-
2019-
2020-
Expand All @@ -188,14 +171,12 @@ root
readon
test
2048
1<<
match
private
key_
aes256
aes128
state
...
alloc
proto
term
Expand Down Expand Up @@ -281,8 +262,6 @@ k8s.
role
application
explic
[[
]]
random
DES3
3DES
Expand All @@ -295,7 +274,6 @@ tag:
extend
split
option
t=0
fontsize
&quot;
keyboard
Expand All @@ -306,5 +284,4 @@ iphone
develop
master
slave
secondary
---
secondary
48 changes: 25 additions & 23 deletions pkg/detectors/falsepositives.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"strings"
"unicode"
"unicode/utf8"

ahocorasick "github.com/BobuSumisu/aho-corasick"
)

var DefaultFalsePositives = []FalsePositive{"example", "xxxxxx", "aaaaaa", "abcde", "00000", "sample", "www"}
Expand All @@ -21,16 +23,21 @@ var wordList []byte
//go:embed "programmingbooks.txt"
var programmingBookWords []byte

type Wordlists struct {
wordList map[string]struct{}
badList map[string]struct{}
programmingBookWords map[string]struct{}
}
var filter *ahocorasick.Trie

func init() {
builder := ahocorasick.NewTrieBuilder()

wordList := bytesToCleanWordList(wordList)
builder.AddStrings(wordList)

var FalsePositiveWordlists = Wordlists{
wordList: bytesToCleanWordList(wordList),
badList: bytesToCleanWordList(badList),
programmingBookWords: bytesToCleanWordList(programmingBookWords),
badList := bytesToCleanWordList(badList)
builder.AddStrings(badList)

programmingBookWords := bytesToCleanWordList(programmingBookWords)
builder.AddStrings(programmingBookWords)

filter = builder.Build()
}

// IsKnownFalsePositives will not return a valid secret finding if any of the disqualifying conditions are met
Expand All @@ -48,21 +55,11 @@ func IsKnownFalsePositive(match string, falsePositives []FalsePositive, wordChec
}

if wordCheck {
// check against common substring badlist
if _, ok := FalsePositiveWordlists.badList[lower]; ok {
return true
}

// check for dictionary word substrings
if _, ok := FalsePositiveWordlists.wordList[lower]; ok {
return true
}

// check for programming book token substrings
if _, ok := FalsePositiveWordlists.programmingBookWords[lower]; ok {
if filter.MatchFirstString(lower) != nil {
return true
}
}

return false
}

Expand All @@ -76,14 +73,19 @@ func HasDigit(key string) bool {
return false
}

func bytesToCleanWordList(data []byte) map[string]struct{} {
func bytesToCleanWordList(data []byte) []string {
words := make(map[string]struct{})
for _, word := range strings.Split(string(data), "\n") {
if strings.TrimSpace(word) != "" {
words[strings.TrimSpace(strings.ToLower(word))] = struct{}{}
}
}
return words

wordList := make([]string, 0, len(words))
for word := range words {
wordList = append(wordList, word)
}
return wordList
}

func StringShannonEntropy(input string) float64 {
Expand Down
32 changes: 31 additions & 1 deletion pkg/detectors/falsepositives_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ func TestIsFalsePositive(t *testing.T) {
type args struct {
match string
falsePositives []FalsePositive
useWordlist bool
}
tests := []struct {
name string
Expand All @@ -23,21 +24,50 @@ func TestIsFalsePositive(t *testing.T) {
args: args{
match: "example",
falsePositives: DefaultFalsePositives,
useWordlist: false,
},
want: true,
},
{
name: "fp - in wordlist",
args: args{
match: "sdfdsfprivatesfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
{
name: "fp - not in wordlist",
args: args{
match: "sdfdsfsfsdfd",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: false,
},
{
name: "not fp",
args: args{
match: "notafp123",
falsePositives: DefaultFalsePositives,
useWordlist: false,
},
want: false,
},
{
name: "fp - in wordlist exact match",
args: args{
match: "private",
falsePositives: DefaultFalsePositives,
useWordlist: true,
},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, false); got != tt.want {
if got := IsKnownFalsePositive(tt.args.match, tt.args.falsePositives, tt.args.useWordlist); got != tt.want {
t.Errorf("IsKnownFalsePositive() = %v, want %v", got, tt.want)
}
})
Expand Down
3 changes: 1 addition & 2 deletions pkg/detectors/programmingbooks.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ ${ts.map(kv
${uname
$value
$x:expr
+3=err
a;
+3=err
a][appendix_a
abbreviated
abcabcabc
Expand Down

0 comments on commit 7befefd

Please sign in to comment.