mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00

Container state counts are used for reporting in the `/info` endpoint. Currently when `/info` is called, each container is iterated over and the containers 'StateString()' is called. This is not very efficient with lots of containers, and is also racey since `StateString()` is not using a mutex and the mutex is not otherwise locked. We could just lock the container mutex, but this is proven to be problematic since there are frequent deadlock scenarios and we should always have the `/info` endpoint available since this endpoint is used to get general information about the docker host. Really, these metrics on `/info` should be deprecated. But until then, we can just keep a running tally in memory for each of the reported states. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
155 lines
4.2 KiB
Go
155 lines
4.2 KiB
Go
package daemon
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"runtime"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/libcontainerd"
|
|
"github.com/docker/docker/restartmanager"
|
|
)
|
|
|
|
func (daemon *Daemon) setStateCounter(c *container.Container) {
|
|
switch c.StateString() {
|
|
case "paused":
|
|
stateCtr.set(c.ID, "paused")
|
|
case "running":
|
|
stateCtr.set(c.ID, "running")
|
|
default:
|
|
stateCtr.set(c.ID, "stopped")
|
|
}
|
|
}
|
|
|
|
// StateChanged updates daemon state changes from containerd
|
|
func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
|
|
c := daemon.containers.Get(id)
|
|
if c == nil {
|
|
return fmt.Errorf("no such container: %s", id)
|
|
}
|
|
|
|
switch e.State {
|
|
case libcontainerd.StateOOM:
|
|
// StateOOM is Linux specific and should never be hit on Windows
|
|
if runtime.GOOS == "windows" {
|
|
return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
|
|
}
|
|
daemon.updateHealthMonitor(c)
|
|
daemon.LogContainerEvent(c, "oom")
|
|
case libcontainerd.StateExit:
|
|
// if container's AutoRemove flag is set, remove it after clean up
|
|
autoRemove := func() {
|
|
c.Lock()
|
|
ar := c.HostConfig.AutoRemove
|
|
c.Unlock()
|
|
if ar {
|
|
if err := daemon.ContainerRm(c.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
|
|
logrus.Errorf("can't remove container %s: %v", c.ID, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
c.Lock()
|
|
c.StreamConfig.Wait()
|
|
c.Reset(false)
|
|
|
|
restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, c.HasBeenManuallyStopped, time.Since(c.StartedAt))
|
|
if err == nil && restart {
|
|
c.RestartCount++
|
|
c.SetRestarting(platformConstructExitStatus(e))
|
|
} else {
|
|
c.SetStopped(platformConstructExitStatus(e))
|
|
defer autoRemove()
|
|
}
|
|
|
|
// cancel healthcheck here, they will be automatically
|
|
// restarted if/when the container is started again
|
|
daemon.stopHealthchecks(c)
|
|
attributes := map[string]string{
|
|
"exitCode": strconv.Itoa(int(e.ExitCode)),
|
|
}
|
|
daemon.LogContainerEventWithAttributes(c, "die", attributes)
|
|
daemon.Cleanup(c)
|
|
|
|
if err == nil && restart {
|
|
go func() {
|
|
err := <-wait
|
|
if err == nil {
|
|
if err = daemon.containerStart(c, "", "", false); err != nil {
|
|
logrus.Debugf("failed to restart container: %+v", err)
|
|
}
|
|
}
|
|
if err != nil {
|
|
c.SetStopped(platformConstructExitStatus(e))
|
|
defer autoRemove()
|
|
if err != restartmanager.ErrRestartCanceled {
|
|
logrus.Errorf("restartmanger wait error: %+v", err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
daemon.setStateCounter(c)
|
|
|
|
defer c.Unlock()
|
|
if err := c.ToDisk(); err != nil {
|
|
return err
|
|
}
|
|
return daemon.postRunProcessing(c, e)
|
|
case libcontainerd.StateExitProcess:
|
|
if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
|
|
ec := int(e.ExitCode)
|
|
execConfig.Lock()
|
|
defer execConfig.Unlock()
|
|
execConfig.ExitCode = &ec
|
|
execConfig.Running = false
|
|
execConfig.StreamConfig.Wait()
|
|
if err := execConfig.CloseStreams(); err != nil {
|
|
logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
|
|
}
|
|
|
|
// remove the exec command from the container's store only and not the
|
|
// daemon's store so that the exec command can be inspected.
|
|
c.ExecCommands.Delete(execConfig.ID)
|
|
} else {
|
|
logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
|
|
}
|
|
case libcontainerd.StateStart, libcontainerd.StateRestore:
|
|
// Container is already locked in this case
|
|
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
|
|
c.HasBeenManuallyStopped = false
|
|
c.HasBeenStartedBefore = true
|
|
daemon.setStateCounter(c)
|
|
|
|
if err := c.ToDisk(); err != nil {
|
|
c.Reset(false)
|
|
return err
|
|
}
|
|
daemon.initHealthMonitor(c)
|
|
|
|
daemon.LogContainerEvent(c, "start")
|
|
case libcontainerd.StatePause:
|
|
// Container is already locked in this case
|
|
c.Paused = true
|
|
daemon.setStateCounter(c)
|
|
if err := c.ToDisk(); err != nil {
|
|
return err
|
|
}
|
|
daemon.updateHealthMonitor(c)
|
|
daemon.LogContainerEvent(c, "pause")
|
|
case libcontainerd.StateResume:
|
|
// Container is already locked in this case
|
|
c.Paused = false
|
|
daemon.setStateCounter(c)
|
|
if err := c.ToDisk(); err != nil {
|
|
return err
|
|
}
|
|
daemon.updateHealthMonitor(c)
|
|
daemon.LogContainerEvent(c, "unpause")
|
|
}
|
|
return nil
|
|
}
|