1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00
moby--moby/daemon/metrics.go
Brian Goff 0e8e8f0f31 Add support for metrics plugins
Allows for a plugin type that can be used to scrape metrics.
This is useful because metrics are not neccessarily at a standard
location... `--metrics-addr` must be set, and must currently be a TCP
socket.
Even if metrics are done via a unix socket, there's no guarentee where
the socket may be located on the system, making bind-mounting such a
socket into a container difficult (and racey, failure-prone on daemon
restart).

Metrics plugins side-step this issue by always listening on a unix
socket and then bind-mounting that into a known path in the plugin
container.

Note there has been similar work in the past (and ultimately punted at
the time) for consistent access to the Docker API from within a
container.

Why not add metrics to the Docker API and just provide a plugin with
access to the Docker API? Certainly this can be useful, but gives a lot
of control/access to a plugin that may only need the metrics. We can
look at supporting API plugins separately for this reason.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
2017-05-12 00:30:09 -04:00

174 lines
5 KiB
Go

package daemon
import (
"path/filepath"
"sync"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/go-metrics"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)
const metricsPluginType = "MetricsCollector"
var (
containerActions metrics.LabeledTimer
containerStates metrics.LabeledGauge
imageActions metrics.LabeledTimer
networkActions metrics.LabeledTimer
engineInfo metrics.LabeledGauge
engineCpus metrics.Gauge
engineMemory metrics.Gauge
healthChecksCounter metrics.Counter
healthChecksFailedCounter metrics.Counter
stateCtr *stateCounter
)
func init() {
ns := metrics.NewNamespace("engine", "daemon", nil)
containerActions = ns.NewLabeledTimer("container_actions", "The number of seconds it takes to process each container action", "action")
for _, a := range []string{
"start",
"changes",
"commit",
"create",
"delete",
} {
containerActions.WithValues(a).Update(0)
}
networkActions = ns.NewLabeledTimer("network_actions", "The number of seconds it takes to process each network action", "action")
engineInfo = ns.NewLabeledGauge("engine", "The information related to the engine and the OS it is running on", metrics.Unit("info"),
"version",
"commit",
"architecture",
"graphdriver",
"kernel", "os",
"os_type",
"daemon_id", // ID is a randomly generated unique identifier (e.g. UUID4)
)
engineCpus = ns.NewGauge("engine_cpus", "The number of cpus that the host system of the engine has", metrics.Unit("cpus"))
engineMemory = ns.NewGauge("engine_memory", "The number of bytes of memory that the host system of the engine has", metrics.Bytes)
healthChecksCounter = ns.NewCounter("health_checks", "The total number of health checks")
healthChecksFailedCounter = ns.NewCounter("health_checks_failed", "The total number of failed health checks")
imageActions = ns.NewLabeledTimer("image_actions", "The number of seconds it takes to process each image action", "action")
stateCtr = newStateCounter(ns.NewDesc("container_states", "The count of containers in various states", metrics.Unit("containers"), "state"))
ns.Add(stateCtr)
metrics.Register(ns)
}
type stateCounter struct {
mu sync.Mutex
states map[string]string
desc *prometheus.Desc
}
func newStateCounter(desc *prometheus.Desc) *stateCounter {
return &stateCounter{
states: make(map[string]string),
desc: desc,
}
}
func (ctr *stateCounter) get() (running int, paused int, stopped int) {
ctr.mu.Lock()
defer ctr.mu.Unlock()
states := map[string]int{
"running": 0,
"paused": 0,
"stopped": 0,
}
for _, state := range ctr.states {
states[state]++
}
return states["running"], states["paused"], states["stopped"]
}
func (ctr *stateCounter) set(id, label string) {
ctr.mu.Lock()
ctr.states[id] = label
ctr.mu.Unlock()
}
func (ctr *stateCounter) del(id string) {
ctr.mu.Lock()
delete(ctr.states, id)
ctr.mu.Unlock()
}
func (ctr *stateCounter) Describe(ch chan<- *prometheus.Desc) {
ch <- ctr.desc
}
func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
running, paused, stopped := ctr.get()
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(running), "running")
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
}
func (d *Daemon) cleanupMetricsPlugins() {
ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
var wg sync.WaitGroup
wg.Add(len(ls))
for _, p := range ls {
go func() {
defer wg.Done()
pluginStopMetricsCollection(p)
}()
}
wg.Wait()
if d.metricsPluginListener != nil {
d.metricsPluginListener.Close()
}
}
type metricsPlugin struct {
plugingetter.CompatPlugin
}
func (p metricsPlugin) sock() string {
return "metrics.sock"
}
func (p metricsPlugin) sockBase() string {
return filepath.Join(p.BasePath(), "run", "docker")
}
func pluginStartMetricsCollection(p plugingetter.CompatPlugin) error {
type metricsPluginResponse struct {
Err string
}
var res metricsPluginResponse
if err := p.Client().Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
return errors.Wrap(err, "could not start metrics plugin")
}
if res.Err != "" {
return errors.New(res.Err)
}
return nil
}
func pluginStopMetricsCollection(p plugingetter.CompatPlugin) {
if err := p.Client().Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
logrus.WithError(err).WithField("name", p.Name()).Error("error stopping metrics collector")
}
mp := metricsPlugin{p}
sockPath := filepath.Join(mp.sockBase(), mp.sock())
if err := mount.Unmount(sockPath); err != nil {
if mounted, _ := mount.Mounted(sockPath); mounted {
logrus.WithError(err).WithField("name", p.Name()).WithField("socket", sockPath).Error("error unmounting metrics socket for plugin")
}
}
return
}