diff --git a/daemon/create.go b/daemon/create.go index 55a106c646..c6d2b40648 100644 --- a/daemon/create.go +++ b/daemon/create.go @@ -151,6 +151,7 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) ( return nil, err } daemon.Register(container) + stateCtr.set(container.ID, "stopped") daemon.LogContainerEvent(container, "create") return container, nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 59d84a0260..35ff2a66ef 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -198,6 +198,7 @@ func (daemon *Daemon) restore() error { if err := backportMountSpec(c); err != nil { logrus.Error("Failed to migrate old mounts to use new spec format") } + daemon.setStateCounter(c) if c.IsRunning() || c.IsPaused() { c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking diff --git a/daemon/delete.go b/daemon/delete.go index fd1759ee1c..483241db53 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -124,6 +124,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo logrus.Error(e) } daemon.LogContainerEvent(container, "destroy") + stateCtr.del(container.ID) } }() diff --git a/daemon/info.go b/daemon/info.go index b6c2565f44..c3f1c3b8b1 100644 --- a/daemon/info.go +++ b/daemon/info.go @@ -4,14 +4,12 @@ import ( "fmt" "os" "runtime" - "sync/atomic" "time" "github.com/Sirupsen/logrus" "github.com/docker/docker/api" "github.com/docker/docker/api/types" "github.com/docker/docker/cli/debug" - "github.com/docker/docker/container" "github.com/docker/docker/daemon/logger" "github.com/docker/docker/dockerversion" "github.com/docker/docker/pkg/fileutils" @@ -58,18 +56,7 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) { } sysInfo := sysinfo.New(true) - - var cRunning, cPaused, cStopped int32 - daemon.containers.ApplyAll(func(c *container.Container) { - switch c.StateString() { - case "paused": - atomic.AddInt32(&cPaused, 1) - case "running": - atomic.AddInt32(&cRunning, 1) - default: - atomic.AddInt32(&cStopped, 1) - } - }) + cRunning, cPaused, cStopped := stateCtr.get() securityOptions := []string{} if sysInfo.AppArmor { diff --git a/daemon/metrics.go b/daemon/metrics.go index 69dbfd9378..65d92901ce 100644 --- a/daemon/metrics.go +++ b/daemon/metrics.go @@ -1,9 +1,15 @@ package daemon -import "github.com/docker/go-metrics" +import ( + "sync" + + "github.com/docker/go-metrics" + "github.com/prometheus/client_golang/prometheus" +) var ( containerActions metrics.LabeledTimer + containerStates metrics.LabeledGauge imageActions metrics.LabeledTimer networkActions metrics.LabeledTimer engineVersion metrics.LabeledGauge @@ -11,6 +17,8 @@ var ( engineMemory metrics.Gauge healthChecksCounter metrics.Counter healthChecksFailedCounter metrics.Counter + + stateCtr *stateCounter ) func init() { @@ -25,6 +33,7 @@ func init() { } { containerActions.WithValues(a).Update(0) } + networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action") engineVersion = ns.NewLabeledGauge("engine", "The version and commit information for the engine process", metrics.Unit("info"), "version", @@ -38,5 +47,60 @@ func init() { healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks") healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks") imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action") + + stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state")) + ns.Add(stateCtr) + metrics.Register(ns) } + +type stateCounter struct { + mu sync.Mutex + states map[string]string + desc *prometheus.Desc +} + +func newStateCounter(desc *prometheus.Desc) *stateCounter { + return &stateCounter{ + states: make(map[string]string), + desc: desc, + } +} + +func (ctr *stateCounter) get() (running int, paused int, stopped int) { + ctr.mu.Lock() + defer ctr.mu.Unlock() + + states := map[string]int{ + "running": 0, + "paused": 0, + "stopped": 0, + } + for _, state := range ctr.states { + states[state]++ + } + return states["running"], states["paused"], states["stopped"] +} + +func (ctr *stateCounter) set(id, label string) { + ctr.mu.Lock() + ctr.states[id] = label + ctr.mu.Unlock() +} + +func (ctr *stateCounter) del(id string) { + ctr.mu.Lock() + delete(ctr.states, id) + ctr.mu.Unlock() +} + +func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) { + ch <- ctr.desc +} + +func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) { + running, paused, stopped := ctr.get() + ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running") + ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused") + ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped") +} diff --git a/daemon/monitor.go b/daemon/monitor.go index 9227525e72..b243b74784 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -9,10 +9,22 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/docker/api/types" + "github.com/docker/docker/container" "github.com/docker/docker/libcontainerd" "github.com/docker/docker/restartmanager" ) +func (daemon *Daemon) setStateCounter(c *container.Container) { + switch c.StateString() { + case "paused": + stateCtr.set(c.ID, "paused") + case "running": + stateCtr.set(c.ID, "running") + default: + stateCtr.set(c.ID, "stopped") + } +} + // StateChanged updates daemon state changes from containerd func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { c := daemon.containers.Get(id) @@ -81,6 +93,8 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { }() } + daemon.setStateCounter(c) + defer c.Unlock() if err := c.ToDisk(); err != nil { return err @@ -109,15 +123,19 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart) c.HasBeenManuallyStopped = false c.HasBeenStartedBefore = true + daemon.setStateCounter(c) + if err := c.ToDisk(); err != nil { c.Reset(false) return err } daemon.initHealthMonitor(c) + daemon.LogContainerEvent(c, "start") case libcontainerd.StatePause: // Container is already locked in this case c.Paused = true + daemon.setStateCounter(c) if err := c.ToDisk(); err != nil { return err } @@ -126,12 +144,12 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { case libcontainerd.StateResume: // Container is already locked in this case c.Paused = false + daemon.setStateCounter(c) if err := c.ToDisk(); err != nil { return err } daemon.updateHealthMonitor(c) daemon.LogContainerEvent(c, "unpause") } - return nil } diff --git a/vendor.conf b/vendor.conf index 9632ed26aa..ed3c2a5fca 100644 --- a/vendor.conf +++ b/vendor.conf @@ -133,7 +133,7 @@ github.com/flynn-archive/go-shlex 3f9db97f856818214da2e1057f8ad84803971cff github.com/Nvveen/Gotty a8b993ba6abdb0e0c12b0125c603323a71c7790c https://github.com/ijc25/Gotty # metrics -github.com/docker/go-metrics 86138d05f285fd9737a99bee2d9be30866b59d72 +github.com/docker/go-metrics 8fd5772bf1584597834c6f7961a530f06cbfbb87 # composefile github.com/mitchellh/mapstructure f3009df150dadf309fdee4a54ed65c124afad715 diff --git a/vendor/github.com/docker/go-metrics/README.md b/vendor/github.com/docker/go-metrics/README.md index 7407f34ce8..fdf7fb746f 100644 --- a/vendor/github.com/docker/go-metrics/README.md +++ b/vendor/github.com/docker/go-metrics/README.md @@ -2,10 +2,67 @@ This package is small wrapper around the prometheus go client to help enforce convention and best practices for metrics collection in Docker projects. -## Status +## Best Practices -This project is a work in progress. -It is under heavy development and not intended to be used. +This packages is meant to be used for collecting metrics in Docker projects. +It is not meant to be used as a replacement for the prometheus client but to help enforce consistent naming across metrics collected. +If you have not already read the prometheus best practices around naming and labels you can read the page [here](https://prometheus.io/docs/practices/naming/). + +The following are a few Docker specific rules that will help you name and work with metrics in your project. + +1. Namespace and Subsystem + +This package provides you with a namespace type that allows you to specify the same namespace and subsystem for your metrics. + +```go +ns := metrics.NewNamespace("engine", "daemon", metrics.Labels{ + "version": dockerversion.Version, + "commit": dockerversion.GitCommit, +}) +``` + +In the example above we are creating metrics for the Docker engine's daemon package. +`engine` would be the namespace in this example where `daemon` is the subsystem or package where we are collecting the metrics. + +A namespace also allows you to attach constant labels to the metrics such as the git commit and version that it is collecting. + +2. Declaring your Metrics + +Try to keep all your metric declarations in one file. +This makes it easy for others to see what constant labels are defined on the namespace and what labels are defined on the metrics when they are created. + +3. Use labels instead of multiple metrics + +Labels allow you to define one metric such as the time it takes to perform a certain action on an object. +If we wanted to collect timings on various container actions such as create, start, and delete then we can define one metric called `container_actions` and use labels to specify the type of action. + + +```go +containerActions = ns.NewLabeledTimer("container_actions", "The number of milliseconds it takes to process each container action", "action") +``` + +The last parameter is the label name or key. +When adding a data point to the metric you will use the `WithValues` function to specify the `action` that you are collecting for. + +```go +containerActions.WithValues("create").UpdateSince(start) +``` + +4. Always use a unit + +The metric name should describe what you are measuring but you also need to provide the unit that it is being measured with. +For a timer, the standard unit is seconds and a counter's standard unit is a total. +For gauges you must provide the unit. +This package provides a standard set of units for use within the Docker projects. + +```go +Nanoseconds Unit = "nanoseconds" +Seconds Unit = "seconds" +Bytes Unit = "bytes" +Total Unit = "total" +``` + +If you need to use a unit but it is not defined in the package please open a PR to add it but first try to see if one of the already created units will work for your metric, i.e. seconds or nanoseconds vs adding milliseconds. ## Docs diff --git a/vendor/github.com/docker/go-metrics/namespace.go b/vendor/github.com/docker/go-metrics/namespace.go index f49d88266d..27dab786df 100644 --- a/vendor/github.com/docker/go-metrics/namespace.go +++ b/vendor/github.com/docker/go-metrics/namespace.go @@ -40,21 +40,25 @@ type Namespace struct { // Only metrics created with the returned namespace will get the new constant // labels. The returned namespace must be registered separately. func (n *Namespace) WithConstLabels(labels Labels) *Namespace { - ns := *n - ns.metrics = nil // blank this out - ns.labels = mergeLabels(ns.labels, labels) - return &ns + n.mu.Lock() + ns := &Namespace{ + name: n.name, + subsystem: n.subsystem, + labels: mergeLabels(n.labels, labels), + } + n.mu.Unlock() + return ns } func (n *Namespace) NewCounter(name, help string) Counter { c := &counter{pc: prometheus.NewCounter(n.newCounterOpts(name, help))} - n.addMetric(c) + n.Add(c) return c } func (n *Namespace) NewLabeledCounter(name, help string, labels ...string) LabeledCounter { c := &labeledCounter{pc: prometheus.NewCounterVec(n.newCounterOpts(name, help), labels)} - n.addMetric(c) + n.Add(c) return c } @@ -72,7 +76,7 @@ func (n *Namespace) NewTimer(name, help string) Timer { t := &timer{ m: prometheus.NewHistogram(n.newTimerOpts(name, help)), } - n.addMetric(t) + n.Add(t) return t } @@ -80,7 +84,7 @@ func (n *Namespace) NewLabeledTimer(name, help string, labels ...string) Labeled t := &labeledTimer{ m: prometheus.NewHistogramVec(n.newTimerOpts(name, help), labels), } - n.addMetric(t) + n.Add(t) return t } @@ -98,7 +102,7 @@ func (n *Namespace) NewGauge(name, help string, unit Unit) Gauge { g := &gauge{ pg: prometheus.NewGauge(n.newGaugeOpts(name, help, unit)), } - n.addMetric(g) + n.Add(g) return g } @@ -106,7 +110,7 @@ func (n *Namespace) NewLabeledGauge(name, help string, unit Unit, labels ...stri g := &labeledGauge{ pg: prometheus.NewGaugeVec(n.newGaugeOpts(name, help, unit), labels), } - n.addMetric(g) + n.Add(g) return g } @@ -138,12 +142,24 @@ func (n *Namespace) Collect(ch chan<- prometheus.Metric) { } } -func (n *Namespace) addMetric(collector prometheus.Collector) { +func (n *Namespace) Add(collector prometheus.Collector) { n.mu.Lock() n.metrics = append(n.metrics, collector) n.mu.Unlock() } +func (n *Namespace) NewDesc(name, help string, unit Unit, labels ...string) *prometheus.Desc { + if string(unit) != "" { + name = fmt.Sprintf("%s_%s", name, unit) + } + namespace := n.name + if n.subsystem != "" { + namespace = fmt.Sprintf("%s_%s", namespace, n.subsystem) + } + name = fmt.Sprintf("%s_%s", namespace, name) + return prometheus.NewDesc(name, help, labels, prometheus.Labels(n.labels)) +} + // mergeLabels merges two or more labels objects into a single map, favoring // the later labels. func mergeLabels(lbs ...Labels) Labels {