Skip to content

Commit

Permalink
fix(uuid): UUID regexes to support all-or-none '-' separator
Browse files Browse the repository at this point in the history
This PR changes the UUID validation to support either
UUIDs with the expected number of separators or no separator at all.

Under the hood, UUID validation no longer relies on regular expressions
(exported regexp patterns are marked as deprecated) but on
github.com/google/uuid. This brings a significant performance
improvement on validation (~ 15-20 times faster).

Notice that some non-standard UUID schemes as well as UUID v6 and v7
now pass "IsUUID".

* contributes go-swagger/go-swagger#2878

Signed-off-by: Frederic BIDON <fredbi@yahoo.com>
  • Loading branch information
fredbi committed Dec 28, 2023
1 parent 03a91f9 commit 740a7c6
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 25 deletions.
52 changes: 34 additions & 18 deletions default.go
Expand Up @@ -25,6 +25,7 @@ import (
"strings"

"github.com/asaskevich/govalidator"
"github.com/google/uuid"
"go.mongodb.org/mongo-driver/bson"
)

Expand Down Expand Up @@ -57,24 +58,35 @@ const (
// - long top-level domain names (e.g. example.london) are permitted
// - symbol unicode points are permitted (e.g. emoji) (not for top-level domain)
HostnamePattern = `^([a-zA-Z0-9\p{S}\p{L}]((-?[a-zA-Z0-9\p{S}\p{L}]{0,62})?)|([a-zA-Z0-9\p{S}\p{L}](([a-zA-Z0-9-\p{S}\p{L}]{0,61}[a-zA-Z0-9\p{S}\p{L}])?)(\.)){1,}([a-zA-Z\p{L}]){2,63})$`

// json null type
jsonNull = "null"
)

const (
// UUIDPattern Regex for UUID that allows uppercase
UUIDPattern = `(?i)^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$`
//
// Deprecated: strfmt no longer uses regular expressions to validate UUIDs.
UUIDPattern = `(?i)(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$)|(^[0-9a-f]{32}$)`

// UUID3Pattern Regex for UUID3 that allows uppercase
UUID3Pattern = `(?i)^[0-9a-f]{8}-?[0-9a-f]{4}-?3[0-9a-f]{3}-?[0-9a-f]{4}-?[0-9a-f]{12}$`
//
// Deprecated: strfmt no longer uses regular expressions to validate UUIDs.
UUID3Pattern = `(?i)(^[0-9a-f]{8}-[0-9a-f]{4}-3[0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$)|(^[0-9a-f]{12}3[0-9a-f]{3}?[0-9a-f]{16}$)`

// UUID4Pattern Regex for UUID4 that allows uppercase
UUID4Pattern = `(?i)^[0-9a-f]{8}-?[0-9a-f]{4}-?4[0-9a-f]{3}-?[89ab][0-9a-f]{3}-?[0-9a-f]{12}$`
//
// Deprecated: strfmt no longer uses regular expressions to validate UUIDs.
UUID4Pattern = `(?i)(^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$)|(^[0-9a-f]{12}4[0-9a-f]{3}[89ab][0-9a-f]{15}$)`

// UUID5Pattern Regex for UUID5 that allows uppercase
UUID5Pattern = `(?i)^[0-9a-f]{8}-?[0-9a-f]{4}-?5[0-9a-f]{3}-?[89ab][0-9a-f]{3}-?[0-9a-f]{12}$`
// json null type
jsonNull = "null"
//
// Deprecated: strfmt no longer uses regular expressions to validate UUIDs.
UUID5Pattern = `(?i)(^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$)|(^[0-9a-f]{12}5[0-9a-f]{3}[89ab][0-9a-f]{15}$)`
)

var (
rxHostname = regexp.MustCompile(HostnamePattern)
rxUUID = regexp.MustCompile(UUIDPattern)
rxUUID3 = regexp.MustCompile(UUID3Pattern)
rxUUID4 = regexp.MustCompile(UUID4Pattern)
rxUUID5 = regexp.MustCompile(UUID5Pattern)
)

// IsHostname returns true when the string is a valid hostname
Expand All @@ -99,24 +111,28 @@ func IsHostname(str string) bool {
return valid
}

// IsUUID returns true is the string matches a UUID, upper case is allowed
// IsUUID returns true is the string matches a UUID (in any version, including v6 and v7), upper case is allowed
func IsUUID(str string) bool {
return rxUUID.MatchString(str)
_, err := uuid.Parse(str)
return err == nil
}

// IsUUID3 returns true is the string matches a UUID, upper case is allowed
// IsUUID3 returns true is the string matches a UUID v3, upper case is allowed
func IsUUID3(str string) bool {
return rxUUID3.MatchString(str)
id, err := uuid.Parse(str)
return err == nil && id.Version() == uuid.Version(3)
}

// IsUUID4 returns true is the string matches a UUID, upper case is allowed
// IsUUID4 returns true is the string matches a UUID v4, upper case is allowed
func IsUUID4(str string) bool {
return rxUUID4.MatchString(str)
id, err := uuid.Parse(str)
return err == nil && id.Version() == uuid.Version(4)
}

// IsUUID5 returns true is the string matches a UUID, upper case is allowed
// IsUUID5 returns true is the string matches a UUID v5, upper case is allowed
func IsUUID5(str string) bool {
return rxUUID5.MatchString(str)
id, err := uuid.Parse(str)
return err == nil && id.Version() == uuid.Version(5)
}

// IsEmail validates an email address.
Expand Down
130 changes: 126 additions & 4 deletions default_test.go
Expand Up @@ -21,7 +21,9 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"io"
"reflect"
"regexp"
"strings"
"testing"

Expand Down Expand Up @@ -175,9 +177,26 @@ func TestFormatMAC(t *testing.T) {
func TestFormatUUID3(t *testing.T) {
first3 := uuid.NewMD5(uuid.NameSpaceURL, []byte("somewhere.com"))
other3 := uuid.NewMD5(uuid.NameSpaceURL, []byte("somewhereelse.com"))
other4 := uuid.Must(uuid.NewRandom())
other5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhereelse.com"))
uuid3 := UUID3(first3.String())
str := other3.String()
testStringFormat(t, &uuid3, "uuid3", str, []string{}, []string{"not-a-uuid"})
testStringFormat(t, &uuid3, "uuid3", str,
[]string{
other3.String(),
strings.ReplaceAll(other3.String(), "-", ""),
},
[]string{
"not-a-uuid",
other4.String(),
other5.String(),
strings.ReplaceAll(other4.String(), "-", ""),
strings.ReplaceAll(other5.String(), "-", ""),
strings.Replace(other3.String(), "-", "", 2),
strings.Replace(other4.String(), "-", "", 2),
strings.Replace(other5.String(), "-", "", 2),
},
)

// special case for zero UUID
var uuidZero UUID3
Expand All @@ -188,10 +207,27 @@ func TestFormatUUID3(t *testing.T) {

func TestFormatUUID4(t *testing.T) {
first4 := uuid.Must(uuid.NewRandom())
other3 := uuid.NewMD5(uuid.NameSpaceURL, []byte("somewhere.com"))
other4 := uuid.Must(uuid.NewRandom())
other5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhereelse.com"))
uuid4 := UUID4(first4.String())
str := other4.String()
testStringFormat(t, &uuid4, "uuid4", str, []string{}, []string{"not-a-uuid"})
testStringFormat(t, &uuid4, "uuid4", str,
[]string{
other4.String(),
strings.ReplaceAll(other4.String(), "-", ""),
},
[]string{
"not-a-uuid",
other3.String(),
other5.String(),
strings.ReplaceAll(other3.String(), "-", ""),
strings.ReplaceAll(other5.String(), "-", ""),
strings.Replace(other3.String(), "-", "", 2),
strings.Replace(other4.String(), "-", "", 2),
strings.Replace(other5.String(), "-", "", 2),
},
)

// special case for zero UUID
var uuidZero UUID4
Expand All @@ -202,10 +238,27 @@ func TestFormatUUID4(t *testing.T) {

func TestFormatUUID5(t *testing.T) {
first5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhere.com"))
other3 := uuid.NewMD5(uuid.NameSpaceURL, []byte("somewhere.com"))
other4 := uuid.Must(uuid.NewRandom())
other5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhereelse.com"))
uuid5 := UUID5(first5.String())
str := other5.String()
testStringFormat(t, &uuid5, "uuid5", str, []string{}, []string{"not-a-uuid"})
testStringFormat(t, &uuid5, "uuid5", str,
[]string{
other5.String(),
strings.ReplaceAll(other5.String(), "-", ""),
},
[]string{
"not-a-uuid",
other3.String(),
other4.String(),
strings.ReplaceAll(other3.String(), "-", ""),
strings.ReplaceAll(other4.String(), "-", ""),
strings.Replace(other3.String(), "-", "", 2),
strings.Replace(other4.String(), "-", "", 2),
strings.Replace(other5.String(), "-", "", 2),
},
)

// special case for zero UUID
var uuidZero UUID5
Expand All @@ -216,10 +269,34 @@ func TestFormatUUID5(t *testing.T) {

func TestFormatUUID(t *testing.T) {
first5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhere.com"))
other3 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhereelse.com"))
other4 := uuid.Must(uuid.NewRandom())
other5 := uuid.NewSHA1(uuid.NameSpaceURL, []byte("somewhereelse.com"))
other6 := uuid.Must(uuid.NewV6())
other7 := uuid.Must(uuid.NewV7())
microsoft := "0" + other4.String() + "f"

uuid := UUID(first5.String())
str := other5.String()
testStringFormat(t, &uuid, "uuid", str, []string{}, []string{"not-a-uuid"})
testStringFormat(t, &uuid, "uuid", str,
[]string{
other3.String(),
other4.String(),
other5.String(),
strings.ReplaceAll(other3.String(), "-", ""),
strings.ReplaceAll(other4.String(), "-", ""),
strings.ReplaceAll(other5.String(), "-", ""),
other6.String(),
other7.String(),
microsoft,
},
[]string{
"not-a-uuid",
strings.Replace(other3.String(), "-", "", 2),
strings.Replace(other4.String(), "-", "", 2),
strings.Replace(other5.String(), "-", "", 2),
},
)

// special case for zero UUID
var uuidZero UUID
Expand Down Expand Up @@ -775,3 +852,48 @@ func TestDeepCopyPassword(t *testing.T) {
out3 := inNil.DeepCopy()
assert.Nil(t, out3)
}

func BenchmarkIsUUID(b *testing.B) {
const sampleSize = 100
rxUUID := regexp.MustCompile(UUIDPattern)
rxUUID3 := regexp.MustCompile(UUID3Pattern)
rxUUID4 := regexp.MustCompile(UUID4Pattern)
rxUUID5 := regexp.MustCompile(UUID5Pattern)

uuids := make([]string, 0, sampleSize)
uuid3s := make([]string, 0, sampleSize)
uuid4s := make([]string, 0, sampleSize)
uuid5s := make([]string, 0, sampleSize)

for i := 0; i < sampleSize; i++ {
seed := []byte(uuid.Must(uuid.NewRandom()).String())
uuids = append(uuids, uuid.Must(uuid.NewRandom()).String())
uuid3s = append(uuid3s, uuid.NewMD5(uuid.NameSpaceURL, seed).String())
uuid4s = append(uuid4s, uuid.Must(uuid.NewRandom()).String())
uuid5s = append(uuid5s, uuid.NewSHA1(uuid.NameSpaceURL, seed).String())
}

b.Run("IsUUID - google.uuid", benchmarkIs(uuids, IsUUID))
b.Run("IsUUID - regexp", benchmarkIs(uuids, func(id string) bool { return rxUUID.MatchString(id) }))

b.Run("IsUUIDv3 - google.uuid", benchmarkIs(uuid3s, IsUUID3))
b.Run("IsUUIDv3 - regexp", benchmarkIs(uuid3s, func(id string) bool { return rxUUID3.MatchString(id) }))

b.Run("IsUUIDv4 - google.uuid", benchmarkIs(uuid4s, IsUUID4))
b.Run("IsUUIDv4 - regexp", benchmarkIs(uuid4s, func(id string) bool { return rxUUID4.MatchString(id) }))

b.Run("IsUUIDv5 - google.uuid", benchmarkIs(uuid5s, IsUUID5))
b.Run("IsUUIDv5 - regexp", benchmarkIs(uuid5s, func(id string) bool { return rxUUID5.MatchString(id) }))
}

func benchmarkIs(input []string, fn func(string) bool) func(*testing.B) {
return func(b *testing.B) {
var isTrue bool
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
isTrue = fn(input[i%len(input)])
}
fmt.Fprintln(io.Discard, isTrue)
}
}
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -3,7 +3,7 @@ module github.com/go-openapi/strfmt
require (
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2
github.com/go-openapi/errors v0.21.0
github.com/google/uuid v1.4.0
github.com/google/uuid v1.5.0
github.com/mitchellh/mapstructure v1.5.0
github.com/oklog/ulid v1.3.1
github.com/stretchr/testify v1.8.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Expand Up @@ -7,8 +7,8 @@ github.com/go-openapi/errors v0.21.0/go.mod h1:jxNTMUxRCKj65yb/okJGEtahVd7uvWnuW
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
Expand Down

0 comments on commit 740a7c6

Please sign in to comment.