mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Use counter for tracking container states
Container state counts are used for reporting in the `/info` endpoint. Currently when `/info` is called, each container is iterated over and the containers 'StateString()' is called. This is not very efficient with lots of containers, and is also racey since `StateString()` is not using a mutex and the mutex is not otherwise locked. We could just lock the container mutex, but this is proven to be problematic since there are frequent deadlock scenarios and we should always have the `/info` endpoint available since this endpoint is used to get general information about the docker host. Really, these metrics on `/info` should be deprecated. But until then, we can just keep a running tally in memory for each of the reported states. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
parent
52bded9868
commit
e4c03623c2
9 changed files with 176 additions and 31 deletions
|
@ -151,6 +151,7 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
|
|||
return nil, err
|
||||
}
|
||||
daemon.Register(container)
|
||||
stateCtr.set(container.ID, "stopped")
|
||||
daemon.LogContainerEvent(container, "create")
|
||||
return container, nil
|
||||
}
|
||||
|
|
|
@ -198,6 +198,7 @@ func (daemon *Daemon) restore() error {
|
|||
if err := backportMountSpec(c); err != nil {
|
||||
logrus.Error("Failed to migrate old mounts to use new spec format")
|
||||
}
|
||||
daemon.setStateCounter(c)
|
||||
|
||||
if c.IsRunning() || c.IsPaused() {
|
||||
c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
|
||||
|
|
|
@ -124,6 +124,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
|
|||
logrus.Error(e)
|
||||
}
|
||||
daemon.LogContainerEvent(container, "destroy")
|
||||
stateCtr.del(container.ID)
|
||||
}
|
||||
}()
|
||||
|
||||
|
|
|
@ -4,14 +4,12 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/api"
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/cli/debug"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/daemon/logger"
|
||||
"github.com/docker/docker/dockerversion"
|
||||
"github.com/docker/docker/pkg/fileutils"
|
||||
|
@ -58,18 +56,7 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) {
|
|||
}
|
||||
|
||||
sysInfo := sysinfo.New(true)
|
||||
|
||||
var cRunning, cPaused, cStopped int32
|
||||
daemon.containers.ApplyAll(func(c *container.Container) {
|
||||
switch c.StateString() {
|
||||
case "paused":
|
||||
atomic.AddInt32(&cPaused, 1)
|
||||
case "running":
|
||||
atomic.AddInt32(&cRunning, 1)
|
||||
default:
|
||||
atomic.AddInt32(&cStopped, 1)
|
||||
}
|
||||
})
|
||||
cRunning, cPaused, cStopped := stateCtr.get()
|
||||
|
||||
securityOptions := []string{}
|
||||
if sysInfo.AppArmor {
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
package daemon
|
||||
|
||||
import "github.com/docker/go-metrics"
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
containerActions metrics.LabeledTimer
|
||||
containerStates metrics.LabeledGauge
|
||||
imageActions metrics.LabeledTimer
|
||||
networkActions metrics.LabeledTimer
|
||||
engineVersion metrics.LabeledGauge
|
||||
|
@ -11,6 +17,8 @@ var (
|
|||
engineMemory metrics.Gauge
|
||||
healthChecksCounter metrics.Counter
|
||||
healthChecksFailedCounter metrics.Counter
|
||||
|
||||
stateCtr *stateCounter
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -25,6 +33,7 @@ func init() {
|
|||
} {
|
||||
containerActions.WithValues(a).Update(0)
|
||||
}
|
||||
|
||||
networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
|
||||
engineVersion = ns.NewLabeledGauge("engine", "The version and commit information for the engine process", metrics.Unit("info"),
|
||||
"version",
|
||||
|
@ -38,5 +47,60 @@ func init() {
|
|||
healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
|
||||
healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
|
||||
imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
|
||||
|
||||
stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
|
||||
ns.Add(stateCtr)
|
||||
|
||||
metrics.Register(ns)
|
||||
}
|
||||
|
||||
type stateCounter struct {
|
||||
mu sync.Mutex
|
||||
states map[string]string
|
||||
desc *prometheus.Desc
|
||||
}
|
||||
|
||||
func newStateCounter(desc *prometheus.Desc) *stateCounter {
|
||||
return &stateCounter{
|
||||
states: make(map[string]string),
|
||||
desc: desc,
|
||||
}
|
||||
}
|
||||
|
||||
func (ctr *stateCounter) get() (running int, paused int, stopped int) {
|
||||
ctr.mu.Lock()
|
||||
defer ctr.mu.Unlock()
|
||||
|
||||
states := map[string]int{
|
||||
"running": 0,
|
||||
"paused": 0,
|
||||
"stopped": 0,
|
||||
}
|
||||
for _, state := range ctr.states {
|
||||
states[state]++
|
||||
}
|
||||
return states["running"], states["paused"], states["stopped"]
|
||||
}
|
||||
|
||||
func (ctr *stateCounter) set(id, label string) {
|
||||
ctr.mu.Lock()
|
||||
ctr.states[id] = label
|
||||
ctr.mu.Unlock()
|
||||
}
|
||||
|
||||
func (ctr *stateCounter) del(id string) {
|
||||
ctr.mu.Lock()
|
||||
delete(ctr.states, id)
|
||||
ctr.mu.Unlock()
|
||||
}
|
||||
|
||||
func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- ctr.desc
|
||||
}
|
||||
|
||||
func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
|
||||
running, paused, stopped := ctr.get()
|
||||
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
|
||||
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
|
||||
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
|
||||
}
|
||||
|
|
|
@ -9,10 +9,22 @@ import (
|
|||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/container"
|
||||
"github.com/docker/docker/libcontainerd"
|
||||
"github.com/docker/docker/restartmanager"
|
||||
)
|
||||
|
||||
func (daemon *Daemon) setStateCounter(c *container.Container) {
|
||||
switch c.StateString() {
|
||||
case "paused":
|
||||
stateCtr.set(c.ID, "paused")
|
||||
case "running":
|
||||
stateCtr.set(c.ID, "running")
|
||||
default:
|
||||
stateCtr.set(c.ID, "stopped")
|
||||
}
|
||||
}
|
||||
|
||||
// StateChanged updates daemon state changes from containerd
|
||||
func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
||||
c := daemon.containers.Get(id)
|
||||
|
@ -81,6 +93,8 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
}()
|
||||
}
|
||||
|
||||
daemon.setStateCounter(c)
|
||||
|
||||
defer c.Unlock()
|
||||
if err := c.ToDisk(); err != nil {
|
||||
return err
|
||||
|
@ -109,15 +123,19 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
|
||||
c.HasBeenManuallyStopped = false
|
||||
c.HasBeenStartedBefore = true
|
||||
daemon.setStateCounter(c)
|
||||
|
||||
if err := c.ToDisk(); err != nil {
|
||||
c.Reset(false)
|
||||
return err
|
||||
}
|
||||
daemon.initHealthMonitor(c)
|
||||
|
||||
daemon.LogContainerEvent(c, "start")
|
||||
case libcontainerd.StatePause:
|
||||
// Container is already locked in this case
|
||||
c.Paused = true
|
||||
daemon.setStateCounter(c)
|
||||
if err := c.ToDisk(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -126,12 +144,12 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|||
case libcontainerd.StateResume:
|
||||
// Container is already locked in this case
|
||||
c.Paused = false
|
||||
daemon.setStateCounter(c)
|
||||
if err := c.ToDisk(); err != nil {
|
||||
return err
|
||||
}
|
||||
daemon.updateHealthMonitor(c)
|
||||
daemon.LogContainerEvent(c, "unpause")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -133,7 +133,7 @@ github.com/flynn-archive/go-shlex 3f9db97f856818214da2e1057f8ad84803971cff
|
|||
github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github.com/ijc25/Gotty
|
||||
|
||||
# metrics
|
||||
github.com/docker/go-metrics 86138d05f285fd9737a99bee2d9be30866b59d72
|
||||
github.com/docker/go-metrics 8fd5772bf1584597834c6f7961a530f06cbfbb87
|
||||
|
||||
# composefile
|
||||
github.com/mitchellh/mapstructure f3009df150dadf309fdee4a54ed65c124afad715
|
||||
|
|
63
vendor/github.com/docker/go-metrics/README.md
generated
vendored
63
vendor/github.com/docker/go-metrics/README.md
generated
vendored
|
@ -2,10 +2,67 @@
|
|||
|
||||
This package is small wrapper around the prometheus go client to help enforce convention and best practices for metrics collection in Docker projects.
|
||||
|
||||
## Status
|
||||
## Best Practices
|
||||
|
||||
This project is a work in progress.
|
||||
It is under heavy development and not intended to be used.
|
||||
This packages is meant to be used for collecting metrics in Docker projects.
|
||||
It is not meant to be used as a replacement for the prometheus client but to help enforce consistent naming across metrics collected.
|
||||
If you have not already read the prometheus best practices around naming and labels you can read the page [here](https://prometheus.io/docs/practices/naming/).
|
||||
|
||||
The following are a few Docker specific rules that will help you name and work with metrics in your project.
|
||||
|
||||
1. Namespace and Subsystem
|
||||
|
||||
This package provides you with a namespace type that allows you to specify the same namespace and subsystem for your metrics.
|
||||
|
||||
```go
|
||||
ns := metrics.NewNamespace("engine", "daemon", metrics.Labels{
|
||||
"version": dockerversion.Version,
|
||||
"commit": dockerversion.GitCommit,
|
||||
})
|
||||
```
|
||||
|
||||
In the example above we are creating metrics for the Docker engine's daemon package.
|
||||
`engine` would be the namespace in this example where `daemon` is the subsystem or package where we are collecting the metrics.
|
||||
|
||||
A namespace also allows you to attach constant labels to the metrics such as the git commit and version that it is collecting.
|
||||
|
||||
2. Declaring your Metrics
|
||||
|
||||
Try to keep all your metric declarations in one file.
|
||||
This makes it easy for others to see what constant labels are defined on the namespace and what labels are defined on the metrics when they are created.
|
||||
|
||||
3. Use labels instead of multiple metrics
|
||||
|
||||
Labels allow you to define one metric such as the time it takes to perform a certain action on an object.
|
||||
If we wanted to collect timings on various container actions such as create, start, and delete then we can define one metric called `container_actions` and use labels to specify the type of action.
|
||||
|
||||
|
||||
```go
|
||||
containerActions = ns.NewLabeledTimer("container_actions", "The number of milliseconds it takes to process each container action", "action")
|
||||
```
|
||||
|
||||
The last parameter is the label name or key.
|
||||
When adding a data point to the metric you will use the `WithValues` function to specify the `action` that you are collecting for.
|
||||
|
||||
```go
|
||||
containerActions.WithValues("create").UpdateSince(start)
|
||||
```
|
||||
|
||||
4. Always use a unit
|
||||
|
||||
The metric name should describe what you are measuring but you also need to provide the unit that it is being measured with.
|
||||
For a timer, the standard unit is seconds and a counter's standard unit is a total.
|
||||
For gauges you must provide the unit.
|
||||
This package provides a standard set of units for use within the Docker projects.
|
||||
|
||||
```go
|
||||
Nanoseconds Unit = "nanoseconds"
|
||||
Seconds Unit = "seconds"
|
||||
Bytes Unit = "bytes"
|
||||
Total Unit = "total"
|
||||
```
|
||||
|
||||
If you need to use a unit but it is not defined in the package please open a PR to add it but first try to see if one of the already created units will work for your metric, i.e. seconds or nanoseconds vs adding milliseconds.
|
||||
|
||||
## Docs
|
||||
|
||||
|
|
38
vendor/github.com/docker/go-metrics/namespace.go
generated
vendored
38
vendor/github.com/docker/go-metrics/namespace.go
generated
vendored
|
@ -40,21 +40,25 @@ type Namespace struct {
|
|||
// Only metrics created with the returned namespace will get the new constant
|
||||
// labels. The returned namespace must be registered separately.
|
||||
func (n *Namespace) WithConstLabels(labels Labels) *Namespace {
|
||||
ns := *n
|
||||
ns.metrics = nil // blank this out
|
||||
ns.labels = mergeLabels(ns.labels, labels)
|
||||
return &ns
|
||||
n.mu.Lock()
|
||||
ns := &Namespace{
|
||||
name: n.name,
|
||||
subsystem: n.subsystem,
|
||||
labels: mergeLabels(n.labels, labels),
|
||||
}
|
||||
n.mu.Unlock()
|
||||
return ns
|
||||
}
|
||||
|
||||
func (n *Namespace) NewCounter(name, help string) Counter {
|
||||
c := &counter{pc: prometheus.NewCounter(n.newCounterOpts(name, help))}
|
||||
n.addMetric(c)
|
||||
n.Add(c)
|
||||
return c
|
||||
}
|
||||
|
||||
func (n *Namespace) NewLabeledCounter(name, help string, labels ...string) LabeledCounter {
|
||||
c := &labeledCounter{pc: prometheus.NewCounterVec(n.newCounterOpts(name, help), labels)}
|
||||
n.addMetric(c)
|
||||
n.Add(c)
|
||||
return c
|
||||
}
|
||||
|
||||
|
@ -72,7 +76,7 @@ func (n *Namespace) NewTimer(name, help string) Timer {
|
|||
t := &timer{
|
||||
m: prometheus.NewHistogram(n.newTimerOpts(name, help)),
|
||||
}
|
||||
n.addMetric(t)
|
||||
n.Add(t)
|
||||
return t
|
||||
}
|
||||
|
||||
|
@ -80,7 +84,7 @@ func (n *Namespace) NewLabeledTimer(name, help string, labels ...string) Labeled
|
|||
t := &labeledTimer{
|
||||
m: prometheus.NewHistogramVec(n.newTimerOpts(name, help), labels),
|
||||
}
|
||||
n.addMetric(t)
|
||||
n.Add(t)
|
||||
return t
|
||||
}
|
||||
|
||||
|
@ -98,7 +102,7 @@ func (n *Namespace) NewGauge(name, help string, unit Unit) Gauge {
|
|||
g := &gauge{
|
||||
pg: prometheus.NewGauge(n.newGaugeOpts(name, help, unit)),
|
||||
}
|
||||
n.addMetric(g)
|
||||
n.Add(g)
|
||||
return g
|
||||
}
|
||||
|
||||
|
@ -106,7 +110,7 @@ func (n *Namespace) NewLabeledGauge(name, help string, unit Unit, labels ...stri
|
|||
g := &labeledGauge{
|
||||
pg: prometheus.NewGaugeVec(n.newGaugeOpts(name, help, unit), labels),
|
||||
}
|
||||
n.addMetric(g)
|
||||
n.Add(g)
|
||||
return g
|
||||
}
|
||||
|
||||
|
@ -138,12 +142,24 @@ func (n *Namespace) Collect(ch chan<- prometheus.Metric) {
|
|||
}
|
||||
}
|
||||
|
||||
func (n *Namespace) addMetric(collector prometheus.Collector) {
|
||||
func (n *Namespace) Add(collector prometheus.Collector) {
|
||||
n.mu.Lock()
|
||||
n.metrics = append(n.metrics, collector)
|
||||
n.mu.Unlock()
|
||||
}
|
||||
|
||||
func (n *Namespace) NewDesc(name, help string, unit Unit, labels ...string) *prometheus.Desc {
|
||||
if string(unit) != "" {
|
||||
name = fmt.Sprintf("%s_%s", name, unit)
|
||||
}
|
||||
namespace := n.name
|
||||
if n.subsystem != "" {
|
||||
namespace = fmt.Sprintf("%s_%s", namespace, n.subsystem)
|
||||
}
|
||||
name = fmt.Sprintf("%s_%s", namespace, name)
|
||||
return prometheus.NewDesc(name, help, labels, prometheus.Labels(n.labels))
|
||||
}
|
||||
|
||||
// mergeLabels merges two or more labels objects into a single map, favoring
|
||||
// the later labels.
|
||||
func mergeLabels(lbs ...Labels) Labels {
|
||||
|
|
Loading…
Reference in a new issue