Skip to content

Commit

Permalink
hostmetricsreceiver: use gopsutil cached boot time (open-telemetry#32126
Browse files Browse the repository at this point in the history
)

**Description:**
This PR enables `gopsutil`'s cached boot time feature. The primary
purpose is to reduce the CPU usage of the `process` and `processes`
scrapers, which read the boot time vastly more times than necessary.

Also added the `hostmetrics.process.bootTimeCache` featuregate which is
enabled by default. Disabling it will return the `process` scraper to a
similar functionality of reading the boot time at the start of every
scrape (but still won't read it as much as it used to).

**Link to tracking Issue:** open-telemetry#28849

**Testing:** 
No tests were added. Ran the collector to ensure that everything still
functioned as expected with the new functionality.
  • Loading branch information
braydonk authored and rimitchell committed May 8, 2024
1 parent eaedfd9 commit f0600f1
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/hostmetrics-boot-time-cache.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: hostmetricsreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: The hostmetricsreceiver now caches the system boot time at receiver start and uses it for all subsequent calls. The featuregate `hostmetrics.process.bootTimeCache` can be disabled to restore previous behaviour.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [28849]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: This change was made because it greatly reduces the CPU usage of the process and processes scrapers.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
5 changes: 5 additions & 0 deletions receiver/hostmetricsreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"fmt"
"os"

"github.com/shirou/gopsutil/v3/host"
"github.com/shirou/gopsutil/v3/process"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/receiver"
Expand Down Expand Up @@ -76,6 +78,9 @@ func createMetricsReceiver(
return nil, err
}

host.EnableBootTimeCache(true)
process.EnableBootTimeCache(true)

return scraperhelper.NewScraperControllerReceiver(
&oCfg.ControllerConfig,
set,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"runtime"

"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/receiver"
"go.opentelemetry.io/collector/receiver/scraperhelper"

Expand All @@ -22,6 +23,17 @@ const (
TypeStr = "process"
)

var (
bootTimeCacheFeaturegateID = "hostmetrics.process.bootTimeCache"
bootTimeCacheFeaturegate = featuregate.GlobalRegistry().MustRegister(
bootTimeCacheFeaturegateID,
featuregate.StageBeta,
featuregate.WithRegisterDescription("When enabled, all process scrapes will use the boot time value that is cached at the start of the process."),
featuregate.WithRegisterReferenceURL("https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/28849"),
featuregate.WithRegisterFromVersion("v0.98.0"),
)
)

// Factory is the Factory for scraper.
type Factory struct {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/shirou/gopsutil/v3/common"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shirou/gopsutil/v3/host"
"github.com/shirou/gopsutil/v3/process"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
Expand Down Expand Up @@ -104,6 +105,19 @@ func (s *scraper) start(context.Context, component.Host) error {
func (s *scraper) scrape(ctx context.Context) (pmetric.Metrics, error) {
var errs scrapererror.ScrapeErrors

// If the boot time cache featuregate is disabled, this will refresh the
// cached boot time value for use in the current scrape. This functionally
// replicates the previous functionality in all but the most extreme
// cases of boot time changing in the middle of a scrape.
if !bootTimeCacheFeaturegate.IsEnabled() {
host.EnableBootTimeCache(false)
_, err := host.BootTimeWithContext(ctx)
if err != nil {
errs.AddPartial(1, fmt.Errorf(`retrieving boot time failed with error "%w", using cached boot time`, err))
}
host.EnableBootTimeCache(true)
}

data, err := s.getProcessMetadata()
if err != nil {
var partialErr scrapererror.PartialScrapeError
Expand Down

0 comments on commit f0600f1

Please sign in to comment.