2018-02-05 16:05:59 -05:00
package daemon // import "github.com/docker/docker/daemon"
2016-07-20 19:11:28 -04:00
2017-02-09 21:57:35 -05:00
import (
"sync"
2018-05-30 15:00:42 -04:00
"github.com/docker/docker/errdefs"
2017-04-13 21:56:50 -04:00
"github.com/docker/docker/pkg/plugingetter"
2018-04-24 21:45:00 -04:00
"github.com/docker/docker/pkg/plugins"
2019-08-05 10:37:47 -04:00
metrics "github.com/docker/go-metrics"
2017-04-13 21:56:50 -04:00
"github.com/pkg/errors"
2017-02-09 21:57:35 -05:00
"github.com/prometheus/client_golang/prometheus"
2017-07-26 17:42:13 -04:00
"github.com/sirupsen/logrus"
2017-02-09 21:57:35 -05:00
)
2016-07-20 19:11:28 -04:00
2017-04-13 21:56:50 -04:00
const metricsPluginType = "MetricsCollector"
2016-07-20 19:11:28 -04:00
var (
2022-04-12 16:22:37 -04:00
metricsNS = metrics . NewNamespace ( "engine" , "daemon" , nil )
2017-02-09 21:57:35 -05:00
2022-04-12 16:22:37 -04:00
containerActions = metricsNS . NewLabeledTimer ( "container_actions" , "The number of seconds it takes to process each container action" , "action" )
networkActions = metricsNS . NewLabeledTimer ( "network_actions" , "The number of seconds it takes to process each network action" , "action" )
hostInfoFunctions = metricsNS . NewLabeledTimer ( "host_info_functions" , "The number of seconds it takes to call functions gathering info about the host" , "function" )
2016-07-20 19:11:28 -04:00
2022-04-12 16:22:37 -04:00
engineInfo = metricsNS . NewLabeledGauge ( "engine" , "The information related to the engine and the OS it is running on" , metrics . Unit ( "info" ) ,
2016-07-20 19:11:28 -04:00
"version" ,
"commit" ,
"architecture" ,
2017-04-24 07:32:01 -04:00
"graphdriver" ,
2019-05-30 12:51:41 -04:00
"kernel" ,
"os" ,
2017-04-24 07:32:01 -04:00
"os_type" ,
2019-05-30 12:51:41 -04:00
"os_version" ,
2017-04-24 07:32:01 -04:00
"daemon_id" , // ID is a randomly generated unique identifier (e.g. UUID4)
2016-07-20 19:11:28 -04:00
)
2022-04-12 16:22:37 -04:00
engineCpus = metricsNS . NewGauge ( "engine_cpus" , "The number of cpus that the host system of the engine has" , metrics . Unit ( "cpus" ) )
engineMemory = metricsNS . NewGauge ( "engine_memory" , "The number of bytes of memory that the host system of the engine has" , metrics . Bytes )
healthChecksCounter = metricsNS . NewCounter ( "health_checks" , "The total number of health checks" )
healthChecksFailedCounter = metricsNS . NewCounter ( "health_checks_failed" , "The total number of failed health checks" )
2022-04-12 17:57:23 -04:00
healthCheckStartDuration = metricsNS . NewTimer ( "health_check_start_duration" , "The number of seconds it takes to prepare to run health checks" )
2017-02-09 21:57:35 -05:00
2022-04-12 16:22:37 -04:00
stateCtr = newStateCounter ( metricsNS , metricsNS . NewDesc ( "container_states" , "The count of containers in various states" , metrics . Unit ( "containers" ) , "state" ) )
)
func init ( ) {
for _ , a := range [ ] string {
"start" ,
"changes" ,
"commit" ,
"create" ,
"delete" ,
} {
containerActions . WithValues ( a ) . Update ( 0 )
}
2017-02-09 21:57:35 -05:00
2022-04-12 16:22:37 -04:00
metrics . Register ( metricsNS )
2016-07-20 19:11:28 -04:00
}
2017-02-09 21:57:35 -05:00
type stateCounter struct {
2022-02-15 12:04:18 -05:00
mu sync . RWMutex
2017-02-09 21:57:35 -05:00
states map [ string ] string
desc * prometheus . Desc
}
2022-04-12 16:22:37 -04:00
func newStateCounter ( ns * metrics . Namespace , desc * prometheus . Desc ) * stateCounter {
c := & stateCounter {
2017-02-09 21:57:35 -05:00
states : make ( map [ string ] string ) ,
desc : desc ,
}
2022-04-12 16:22:37 -04:00
ns . Add ( c )
return c
2017-02-09 21:57:35 -05:00
}
func ( ctr * stateCounter ) get ( ) ( running int , paused int , stopped int ) {
2022-02-15 12:04:18 -05:00
ctr . mu . RLock ( )
defer ctr . mu . RUnlock ( )
2017-02-09 21:57:35 -05:00
states := map [ string ] int {
"running" : 0 ,
"paused" : 0 ,
"stopped" : 0 ,
}
for _ , state := range ctr . states {
states [ state ] ++
}
return states [ "running" ] , states [ "paused" ] , states [ "stopped" ]
}
func ( ctr * stateCounter ) set ( id , label string ) {
ctr . mu . Lock ( )
ctr . states [ id ] = label
ctr . mu . Unlock ( )
}
func ( ctr * stateCounter ) del ( id string ) {
ctr . mu . Lock ( )
delete ( ctr . states , id )
ctr . mu . Unlock ( )
}
func ( ctr * stateCounter ) Describe ( ch chan <- * prometheus . Desc ) {
ch <- ctr . desc
}
func ( ctr * stateCounter ) Collect ( ch chan <- prometheus . Metric ) {
running , paused , stopped := ctr . get ( )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( running ) , "running" )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( paused ) , "paused" )
ch <- prometheus . MustNewConstMetric ( ctr . desc , prometheus . GaugeValue , float64 ( stopped ) , "stopped" )
}
2017-04-13 21:56:50 -04:00
2019-08-09 07:19:49 -04:00
func ( daemon * Daemon ) cleanupMetricsPlugins ( ) {
ls := daemon . PluginStore . GetAllManagedPluginsByCap ( metricsPluginType )
2017-04-13 21:56:50 -04:00
var wg sync . WaitGroup
wg . Add ( len ( ls ) )
2018-01-16 17:51:36 -05:00
for _ , plugin := range ls {
p := plugin
2017-04-13 21:56:50 -04:00
go func ( ) {
defer wg . Done ( )
2018-04-24 21:45:00 -04:00
adapter , err := makePluginAdapter ( p )
if err != nil {
2018-10-08 07:15:38 -04:00
logrus . WithError ( err ) . WithField ( "plugin" , p . Name ( ) ) . Error ( "Error creating metrics plugin adapter" )
2018-04-24 21:45:00 -04:00
return
}
if err := adapter . StopMetrics ( ) ; err != nil {
logrus . WithError ( err ) . WithField ( "plugin" , p . Name ( ) ) . Error ( "Error stopping plugin metrics collection" )
}
2017-04-13 21:56:50 -04:00
} ( )
}
wg . Wait ( )
2019-08-09 07:19:49 -04:00
if daemon . metricsPluginListener != nil {
daemon . metricsPluginListener . Close ( )
2017-04-13 21:56:50 -04:00
}
}
2018-04-24 21:45:00 -04:00
type metricsPlugin interface {
StartMetrics ( ) error
StopMetrics ( ) error
}
2019-03-12 19:37:35 -04:00
func makePluginAdapter ( p plugingetter . CompatPlugin ) ( metricsPlugin , error ) {
2018-05-30 15:00:42 -04:00
if pc , ok := p . ( plugingetter . PluginWithV1Client ) ; ok {
return & metricsPluginAdapter { pc . Client ( ) , p . Name ( ) } , nil
}
2018-04-24 21:45:00 -04:00
pa , ok := p . ( plugingetter . PluginAddr )
if ! ok {
2018-05-30 15:00:42 -04:00
return nil , errdefs . System ( errors . Errorf ( "got unknown plugin type %T" , p ) )
2018-04-24 21:45:00 -04:00
}
2018-05-30 15:00:42 -04:00
2018-04-24 21:45:00 -04:00
if pa . Protocol ( ) != plugins . ProtocolSchemeHTTPV1 {
return nil , errors . Errorf ( "plugin protocol not supported: %s" , pa . Protocol ( ) )
}
addr := pa . Addr ( )
client , err := plugins . NewClientWithTimeout ( addr . Network ( ) + "://" + addr . String ( ) , nil , pa . Timeout ( ) )
if err != nil {
return nil , errors . Wrap ( err , "error creating metrics plugin client" )
}
return & metricsPluginAdapter { client , p . Name ( ) } , nil
}
type metricsPluginAdapter struct {
c * plugins . Client
name string
}
func ( a * metricsPluginAdapter ) StartMetrics ( ) error {
2017-04-13 21:56:50 -04:00
type metricsPluginResponse struct {
Err string
}
var res metricsPluginResponse
2018-04-24 21:45:00 -04:00
if err := a . c . Call ( metricsPluginType + ".StartMetrics" , nil , & res ) ; err != nil {
2017-04-13 21:56:50 -04:00
return errors . Wrap ( err , "could not start metrics plugin" )
}
if res . Err != "" {
return errors . New ( res . Err )
}
return nil
}
2018-04-24 21:45:00 -04:00
func ( a * metricsPluginAdapter ) StopMetrics ( ) error {
if err := a . c . Call ( metricsPluginType + ".StopMetrics" , nil , nil ) ; err != nil {
return errors . Wrap ( err , "error stopping metrics collector" )
2017-04-13 21:56:50 -04:00
}
2018-04-24 21:45:00 -04:00
return nil
2017-04-13 21:56:50 -04:00
}