Merge pull request #36519 from stevvooe/resilient-cpu-sampling

daemon/stats: more resilient cpu sampling
This commit is contained in:
Yong Tang 2018-03-09 14:34:45 -08:00 committed by GitHub
commit 623b1a5c3c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 7 deletions

View File

@ -90,7 +90,7 @@ func (s *Collector) Run() {
// it will grow enough in first iteration
var pairs []publishersPair
for range time.Tick(s.interval) {
for {
// it does not make sense in the first iteration,
// but saves allocations in further iterations
pairs = pairs[:0]
@ -105,12 +105,6 @@ func (s *Collector) Run() {
continue
}
systemUsage, err := s.getSystemCPUUsage()
if err != nil {
logrus.Errorf("collecting system cpu usage: %v", err)
continue
}
onlineCPUs, err := s.getNumberOnlineCPUs()
if err != nil {
logrus.Errorf("collecting system online cpu count: %v", err)
@ -122,6 +116,14 @@ func (s *Collector) Run() {
switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, err := s.getSystemCPUUsage()
if err != nil {
logrus.WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
continue
}
// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs
@ -139,6 +141,8 @@ func (s *Collector) Run() {
logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
}
}
time.Sleep(s.interval)
}
}