Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make hash collisions in the registry much less likely #657

Merged
merged 3 commits into from Oct 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -2,6 +2,7 @@ module github.com/prometheus/client_golang

require (
github.com/beorn7/perks v1.0.1
github.com/cespare/xxhash/v2 v2.1.0
github.com/golang/protobuf v1.3.2
github.com/json-iterator/go v1.1.7
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -8,6 +8,8 @@ github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA=
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down
21 changes: 11 additions & 10 deletions prometheus/desc.go
Expand Up @@ -19,6 +19,7 @@ import (
"sort"
"strings"

"github.com/cespare/xxhash/v2"
"github.com/golang/protobuf/proto"
"github.com/prometheus/common/model"

Expand Down Expand Up @@ -126,24 +127,24 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
return d
}

vh := hashNew()
xxh := xxhash.New()
for _, val := range labelValues {
vh = hashAdd(vh, val)
vh = hashAddByte(vh, separatorByte)
xxh.WriteString(val)
xxh.Write(separatorByteSlice)
}
d.id = vh
d.id = xxh.Sum64()
// Sort labelNames so that order doesn't matter for the hash.
sort.Strings(labelNames)
// Now hash together (in this order) the help string and the sorted
// label names.
lh := hashNew()
lh = hashAdd(lh, help)
lh = hashAddByte(lh, separatorByte)
xxh.Reset()
xxh.WriteString(help)
xxh.Write(separatorByteSlice)
for _, labelName := range labelNames {
lh = hashAdd(lh, labelName)
lh = hashAddByte(lh, separatorByte)
xxh.WriteString(labelName)
xxh.Write(separatorByteSlice)
}
d.dimHash = lh
d.dimHash = xxh.Sum64()

d.constLabelPairs = make([]*dto.LabelPair, 0, len(constLabels))
for n, v := range constLabels {
Expand Down
2 changes: 2 additions & 0 deletions prometheus/metric.go
Expand Up @@ -24,6 +24,8 @@ import (

const separatorByte byte = 255

var separatorByteSlice = []byte{255} // For convenient use with xxhash.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that it's likely to change, but should this not be []byte{separatorByte}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's better. (Originally, I wanted to remove separatorByte altogether, but then realized I want to keep the old hashing for vectors.) Will sent you a PR.


// A Metric models a single sample value with its meta data being exported to
// Prometheus. Implementations of Metric in this package are Gauge, Counter,
// Histogram, Summary, and Untyped.
Expand Down
26 changes: 14 additions & 12 deletions prometheus/registry.go
Expand Up @@ -25,6 +25,7 @@ import (
"sync"
"unicode/utf8"

"github.com/cespare/xxhash/v2"
"github.com/golang/protobuf/proto"
"github.com/prometheus/common/expfmt"

Expand Down Expand Up @@ -266,7 +267,7 @@ func (r *Registry) Register(c Collector) error {
descChan = make(chan *Desc, capDescChan)
newDescIDs = map[uint64]struct{}{}
newDimHashesByName = map[string]uint64{}
collectorID uint64 // Just a sum of all desc IDs.
collectorID uint64 // All desc IDs XOR'd together.
duplicateDescErr error
)
go func() {
Expand All @@ -293,12 +294,12 @@ func (r *Registry) Register(c Collector) error {
if _, exists := r.descIDs[desc.id]; exists {
duplicateDescErr = fmt.Errorf("descriptor %s already exists with the same fully-qualified name and const label values", desc)
}
// If it is not a duplicate desc in this collector, add it to
// If it is not a duplicate desc in this collector, XOR it to
// the collectorID. (We allow duplicate descs within the same
// collector, but their existence must be a no-op.)
if _, exists := newDescIDs[desc.id]; !exists {
newDescIDs[desc.id] = struct{}{}
collectorID += desc.id
collectorID ^= desc.id
}

// Are all the label names and the help string consistent with
Expand Down Expand Up @@ -875,9 +876,9 @@ func checkMetricConsistency(
}

// Is the metric unique (i.e. no other metric with the same name and the same labels)?
h := hashNew()
h = hashAdd(h, name)
h = hashAddByte(h, separatorByte)
h := xxhash.New()
h.WriteString(name)
h.Write(separatorByteSlice)
// Make sure label pairs are sorted. We depend on it for the consistency
// check.
if !sort.IsSorted(labelPairSorter(dtoMetric.Label)) {
Expand All @@ -888,18 +889,19 @@ func checkMetricConsistency(
dtoMetric.Label = copiedLabels
}
for _, lp := range dtoMetric.Label {
h = hashAdd(h, lp.GetName())
h = hashAddByte(h, separatorByte)
h = hashAdd(h, lp.GetValue())
h = hashAddByte(h, separatorByte)
h.WriteString(lp.GetName())
h.Write(separatorByteSlice)
h.WriteString(lp.GetValue())
h.Write(separatorByteSlice)
}
if _, exists := metricHashes[h]; exists {
hSum := h.Sum64()
if _, exists := metricHashes[hSum]; exists {
return fmt.Errorf(
"collected metric %q { %s} was collected before with the same name and label values",
name, dtoMetric,
)
}
metricHashes[h] = struct{}{}
metricHashes[hSum] = struct{}{}
return nil
}

Expand Down
87 changes: 87 additions & 0 deletions prometheus/registry_test.go
Expand Up @@ -1070,3 +1070,90 @@ test_summary_count{name="foo"} 2
)
}
}

// collidingCollector is a collection of prometheus.Collectors,
// and is itself a prometheus.Collector.
type collidingCollector struct {
i int
name string

a, b, c, d prometheus.Collector
}

// Describe satisifies part of the prometheus.Collector interface.
func (m *collidingCollector) Describe(desc chan<- *prometheus.Desc) {
m.a.Describe(desc)
m.b.Describe(desc)
m.c.Describe(desc)
m.d.Describe(desc)
}

// Collect satisifies part of the prometheus.Collector interface.
func (m *collidingCollector) Collect(metric chan<- prometheus.Metric) {
m.a.Collect(metric)
m.b.Collect(metric)
m.c.Collect(metric)
m.d.Collect(metric)
}

// TestAlreadyRegistered will fail with the old, weaker hash function. It is
// taken from https://play.golang.org/p/HpV7YE6LI_4 , authored by @awilliams.
func TestAlreadyRegisteredCollision(t *testing.T) {

reg := prometheus.NewRegistry()

for i := 0; i < 10000; i++ {
// A collector should be considered unique if its name and const
// label values are unique.

name := fmt.Sprintf("test-collector-%010d", i)

collector := collidingCollector{
i: i,
name: name,

a: prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_collector_a",
ConstLabels: prometheus.Labels{
"name": name,
"type": "test",
},
}),
b: prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_collector_b",
ConstLabels: prometheus.Labels{
"name": name,
"type": "test",
},
}),
c: prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_collector_c",
ConstLabels: prometheus.Labels{
"name": name,
"type": "test",
},
}),
d: prometheus.NewCounter(prometheus.CounterOpts{
Name: "my_collector_d",
ConstLabels: prometheus.Labels{
"name": name,
"type": "test",
},
}),
}

// Register should not fail, since each collector has a unique
// set of sub-collectors, determined by their names and const label values.
if err := reg.Register(&collector); err != nil {
alreadyRegErr, ok := err.(prometheus.AlreadyRegisteredError)
if !ok {
t.Fatal(err)
}

previous := alreadyRegErr.ExistingCollector.(*collidingCollector)
current := alreadyRegErr.NewCollector.(*collidingCollector)

t.Errorf("Unexpected registration error: %q\nprevious collector: %s (i=%d)\ncurrent collector %s (i=%d)", alreadyRegErr, previous.name, previous.i, current.name, current.i)
}
}
}