2015-01-07 17:43:04 -05:00
|
|
|
package daemon
|
|
|
|
|
|
|
|
import (
|
2015-01-07 19:22:42 -05:00
|
|
|
"bufio"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2015-01-07 17:43:04 -05:00
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
log "github.com/Sirupsen/logrus"
|
|
|
|
"github.com/docker/docker/daemon/execdriver"
|
2015-01-19 17:07:21 -05:00
|
|
|
"github.com/docker/libcontainer/system"
|
2015-01-07 17:43:04 -05:00
|
|
|
)
|
|
|
|
|
2015-01-07 21:02:08 -05:00
|
|
|
// newStatsCollector returns a new statsCollector that collections
|
|
|
|
// network and cgroup stats for a registered container at the specified
|
|
|
|
// interval. The collector allows non-running containers to be added
|
|
|
|
// and will start processing stats when they are started.
|
2015-01-07 17:43:04 -05:00
|
|
|
func newStatsCollector(interval time.Duration) *statsCollector {
|
|
|
|
s := &statsCollector{
|
|
|
|
interval: interval,
|
2015-01-07 21:02:08 -05:00
|
|
|
containers: make(map[string]*statsData),
|
2015-01-19 17:07:21 -05:00
|
|
|
clockTicks: uint64(system.GetClockTicks()),
|
2015-01-07 17:43:04 -05:00
|
|
|
}
|
|
|
|
s.start()
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2015-01-07 21:02:08 -05:00
|
|
|
type statsData struct {
|
2015-01-07 17:43:04 -05:00
|
|
|
c *Container
|
|
|
|
lastStats *execdriver.ResourceStats
|
|
|
|
subs []chan *execdriver.ResourceStats
|
|
|
|
}
|
|
|
|
|
|
|
|
// statsCollector manages and provides container resource stats
|
|
|
|
type statsCollector struct {
|
|
|
|
m sync.Mutex
|
|
|
|
interval time.Duration
|
2015-01-19 17:07:21 -05:00
|
|
|
clockTicks uint64
|
2015-01-07 21:02:08 -05:00
|
|
|
containers map[string]*statsData
|
2015-01-07 17:43:04 -05:00
|
|
|
}
|
|
|
|
|
2015-01-07 21:02:08 -05:00
|
|
|
// collect registers the container with the collector and adds it to
|
|
|
|
// the event loop for collection on the specified interval returning
|
|
|
|
// a channel for the subscriber to receive on.
|
|
|
|
func (s *statsCollector) collect(c *Container) chan *execdriver.ResourceStats {
|
2015-01-07 17:43:04 -05:00
|
|
|
s.m.Lock()
|
2015-01-07 21:02:08 -05:00
|
|
|
defer s.m.Unlock()
|
2015-01-07 17:43:04 -05:00
|
|
|
ch := make(chan *execdriver.ResourceStats, 1024)
|
2015-01-07 21:02:08 -05:00
|
|
|
if _, exists := s.containers[c.ID]; exists {
|
|
|
|
s.containers[c.ID].subs = append(s.containers[c.ID].subs, ch)
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
s.containers[c.ID] = &statsData{
|
2015-01-07 17:43:04 -05:00
|
|
|
c: c,
|
|
|
|
subs: []chan *execdriver.ResourceStats{
|
|
|
|
ch,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2015-01-07 21:02:08 -05:00
|
|
|
// stopCollection closes the channels for all subscribers and removes
|
|
|
|
// the container from metrics collection.
|
2015-01-07 17:43:04 -05:00
|
|
|
func (s *statsCollector) stopCollection(c *Container) {
|
|
|
|
s.m.Lock()
|
2015-01-07 21:02:08 -05:00
|
|
|
defer s.m.Unlock()
|
|
|
|
d := s.containers[c.ID]
|
|
|
|
if d == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, sub := range d.subs {
|
|
|
|
close(sub)
|
|
|
|
}
|
2015-01-07 17:43:04 -05:00
|
|
|
delete(s.containers, c.ID)
|
2015-01-07 21:02:08 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// unsubscribe removes a specific subscriber from receiving updates for a
|
|
|
|
// container's stats.
|
|
|
|
func (s *statsCollector) unsubscribe(c *Container, ch chan *execdriver.ResourceStats) {
|
|
|
|
s.m.Lock()
|
|
|
|
cd := s.containers[c.ID]
|
|
|
|
for i, sub := range cd.subs {
|
|
|
|
if ch == sub {
|
|
|
|
cd.subs = append(cd.subs[:i], cd.subs[i+1:]...)
|
|
|
|
close(ch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// if there are no more subscribers then remove the entire container
|
|
|
|
// from collection.
|
|
|
|
if len(cd.subs) == 0 {
|
|
|
|
delete(s.containers, c.ID)
|
|
|
|
}
|
2015-01-07 17:43:04 -05:00
|
|
|
s.m.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *statsCollector) start() {
|
|
|
|
go func() {
|
|
|
|
for _ = range time.Tick(s.interval) {
|
|
|
|
s.m.Lock()
|
|
|
|
for id, d := range s.containers {
|
2015-01-07 21:02:08 -05:00
|
|
|
systemUsage, err := s.getSystemCpuUsage()
|
2015-01-07 19:22:42 -05:00
|
|
|
if err != nil {
|
|
|
|
log.Errorf("collecting system cpu usage for %s: %v", id, err)
|
|
|
|
continue
|
|
|
|
}
|
2015-01-07 17:43:04 -05:00
|
|
|
stats, err := d.c.Stats()
|
|
|
|
if err != nil {
|
2015-01-07 21:02:08 -05:00
|
|
|
if err == execdriver.ErrNotRunning {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// if the error is not because the container is currently running then
|
|
|
|
// evict the container from the collector and close the channel for
|
|
|
|
// any subscribers currently waiting on changes.
|
2015-01-07 17:43:04 -05:00
|
|
|
log.Errorf("collecting stats for %s: %v", id, err)
|
2015-01-07 21:02:08 -05:00
|
|
|
for _, sub := range s.containers[id].subs {
|
|
|
|
close(sub)
|
|
|
|
}
|
|
|
|
delete(s.containers, id)
|
2015-01-07 17:43:04 -05:00
|
|
|
continue
|
|
|
|
}
|
2015-01-07 19:22:42 -05:00
|
|
|
stats.SystemUsage = systemUsage
|
2015-01-07 17:43:04 -05:00
|
|
|
for _, sub := range s.containers[id].subs {
|
|
|
|
sub <- stats
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s.m.Unlock()
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2015-01-07 19:22:42 -05:00
|
|
|
|
2015-01-19 17:07:21 -05:00
|
|
|
const nanoSeconds = 1e9
|
|
|
|
|
|
|
|
// getSystemdCpuUSage returns the host system's cpu usage in nanoseconds
|
|
|
|
// for the system to match the cgroup readings are returned in the same format.
|
2015-01-07 21:02:08 -05:00
|
|
|
func (s *statsCollector) getSystemCpuUsage() (uint64, error) {
|
2015-01-07 19:22:42 -05:00
|
|
|
f, err := os.Open("/proc/stat")
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
sc := bufio.NewScanner(f)
|
|
|
|
for sc.Scan() {
|
|
|
|
parts := strings.Fields(sc.Text())
|
|
|
|
switch parts[0] {
|
|
|
|
case "cpu":
|
|
|
|
if len(parts) < 8 {
|
|
|
|
return 0, fmt.Errorf("invalid number of cpu fields")
|
|
|
|
}
|
2015-01-19 17:07:21 -05:00
|
|
|
var sum uint64
|
2015-01-07 19:22:42 -05:00
|
|
|
for _, i := range parts[1:8] {
|
|
|
|
v, err := strconv.ParseUint(i, 10, 64)
|
|
|
|
if err != nil {
|
2015-01-19 17:07:21 -05:00
|
|
|
return 0, fmt.Errorf("Unable to convert value %s to int: %s", i, err)
|
2015-01-07 19:22:42 -05:00
|
|
|
}
|
2015-01-19 17:07:21 -05:00
|
|
|
sum += v
|
2015-01-07 19:22:42 -05:00
|
|
|
}
|
2015-01-19 17:07:21 -05:00
|
|
|
return (sum * nanoSeconds) / s.clockTicks, nil
|
2015-01-07 19:22:42 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0, fmt.Errorf("invalid stat format")
|
|
|
|
}
|