Correct CPU usage calculation in presence of offline CPUs and newer Linux

In https://github.com/torvalds/linux/commit/5ca3726 (released in v4.7-rc1) the
content of the `cpuacct.usage_percpu` file in sysfs was changed to include both
online and offline cpus. This broke the arithmetic in the stats helpers used by
`docker stats`, since it was using the length of the PerCPUUsage array as a
proxy for the number of online CPUs.

Add current number of online CPUs to types.StatsJSON and use it in the
calculation.

Keep a fallback to `len(v.CPUStats.CPUUsage.PercpuUsage)` so this code
continues to work when talking to an older daemon. An old client talking to a
new daemon will ignore the new field and behave as before.

Fixes #28941.

Signed-off-by: Ian Campbell <ian.campbell@docker.com>
This commit is contained in:
Ian Campbell 2017-03-06 17:29:09 +00:00
parent 95cb74818a
commit 115f91d757
7 changed files with 39 additions and 1 deletions

View File

@ -3468,6 +3468,10 @@ paths:
The `precpu_stats` is the CPU statistic of last read, which is used
for calculating the CPU usage percentage. It is not the same as the
`cpu_stats` field.
If either `precpu_stats.online_cpus` or `cpu_stats.online_cpus` is
nil then for compatibility with older daemons the length of the
corresponding `cpu_usage.percpu_usage` array should be used.
operationId: "ContainerStats"
produces: ["application/json"]
responses:
@ -3546,6 +3550,7 @@ paths:
total_usage: 100215355
usage_in_kernelmode: 30000000
system_cpu_usage: 739306590000000
online_cpus: 4
throttling_data:
periods: 0
throttled_periods: 0
@ -3561,6 +3566,7 @@ paths:
total_usage: 100093996
usage_in_kernelmode: 30000000
system_cpu_usage: 9492140000000
online_cpus: 4
throttling_data:
periods: 0
throttled_periods: 0

View File

@ -47,6 +47,9 @@ type CPUStats struct {
// System Usage. Linux only.
SystemUsage uint64 `json:"system_cpu_usage,omitempty"`
// Online CPUs. Linux only.
OnlineCPUs uint32 `json:"online_cpus,omitempty"`
// Throttling Data. Linux only.
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}

View File

@ -178,10 +178,14 @@ func calculateCPUPercentUnix(previousCPU, previousSystem uint64, v *types.StatsJ
cpuDelta = float64(v.CPUStats.CPUUsage.TotalUsage) - float64(previousCPU)
// calculate the change for the entire system between readings
systemDelta = float64(v.CPUStats.SystemUsage) - float64(previousSystem)
onlineCPUs = float64(v.CPUStats.OnlineCPUs)
)
if onlineCPUs == 0.0 {
onlineCPUs = float64(len(v.CPUStats.CPUUsage.PercpuUsage))
}
if systemDelta > 0.0 && cpuDelta > 0.0 {
cpuPercent = (cpuDelta / systemDelta) * float64(len(v.CPUStats.CPUUsage.PercpuUsage)) * 100.0
cpuPercent = (cpuDelta / systemDelta) * onlineCPUs * 100.0
}
return cpuPercent
}

View File

@ -80,6 +80,12 @@ func (s *Collector) Run() {
continue
}
onlineCPUs, err := s.getNumberOnlineCPUs()
if err != nil {
logrus.Errorf("collecting system online cpu count: %v", err)
continue
}
for _, pair := range pairs {
stats, err := s.supervisor.GetContainerStats(pair.container)
if err != nil {
@ -97,6 +103,7 @@ func (s *Collector) Run() {
}
// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs
pair.publisher.Publish(*stats)
}

View File

@ -11,6 +11,11 @@ import (
"github.com/opencontainers/runc/libcontainer/system"
)
/*
#include <unistd.h>
*/
import "C"
// platformNewStatsCollector performs platform specific initialisation of the
// Collector structure.
func platformNewStatsCollector(s *Collector) {
@ -64,3 +69,11 @@ func (s *Collector) getSystemCPUUsage() (uint64, error) {
}
return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file")
}
func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
i, err := C.sysconf(C._SC_NPROCESSORS_ONLN)
if err != nil {
return 0, err
}
return uint32(i), nil
}

View File

@ -13,3 +13,7 @@ func platformNewStatsCollector(s *Collector) {
func (s *Collector) getSystemCPUUsage() (uint64, error) {
return 0, nil
}
func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
return 0, nil
}

View File

@ -24,6 +24,7 @@ keywords: "API, Docker, rcli, REST, documentation"
* `POST /build` now accepts `extrahosts` parameter to specify a host to ip mapping to use during the build.
* `POST /services/create` and `POST /services/(id or name)/update` now accept a `rollback` value for `FailureAction`.
* `POST /services/create` and `POST /services/(id or name)/update` now accept an optional `RollbackConfig` object which specifies rollback options.
* `GET /containers/(id or name)/stats` now includes an `online_cpus` field in both `precpu_stats` and `cpu_stats`. If this field is `nil` then for compatibility with older daemons the length of the corresponding `cpu_usage.percpu_usage` array should be used.
## v1.26 API changes