moby--moby/daemon/monitor.go

232 lines
6.5 KiB
Go
Raw Normal View History

package daemon
import (
"io"
"os/exec"
"sync"
"time"
"github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/runconfig"
"github.com/docker/docker/utils"
)
const defaultTimeIncrement = 100
// containerMonitor monitors the execution of a container's main process.
// If a restart policy is specified for the cotnainer the monitor will ensure that the
// process is restarted based on the rules of the policy. When the container is finally stopped
// the monitor will reset and cleanup any of the container resources such as networking allocations
// and the rootfs
type containerMonitor struct {
mux sync.Mutex
// container is the container being monitored
container *Container
// restartPolicy is the being applied to the container monitor
restartPolicy runconfig.RestartPolicy
// failureCount is the number of times the container has failed to
// start in a row
failureCount int
// shouldStop signals the monitor that the next time the container exits it is
// either because docker or the user asked for the container to be stopped
shouldStop bool
// timeIncrement is the amount of time to wait between restarts
// this is in milliseconds
timeIncrement int
}
func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
return &containerMonitor{
container: container,
restartPolicy: policy,
timeIncrement: defaultTimeIncrement,
}
}
// Stop signals to the container monitor that it should stop monitoring the container
// for exits the next time the process dies
func (m *containerMonitor) ExitOnNext() {
m.mux.Lock()
m.shouldStop = true
m.mux.Unlock()
}
// Close closes the container's resources such as networking allocations and
// unmounts the contatiner's root filesystem
func (m *containerMonitor) Close() error {
// Cleanup networking and mounts
m.container.cleanup()
// FIXME: here is race condition between two RUN instructions in Dockerfile
// because they share same runconfig and change image. Must be fixed
// in builder/builder.go
if err := m.container.toDisk(); err != nil {
utils.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
return err
}
return nil
}
// reset resets the container's IO and ensures that the command is able to be executed again
// by copying the data into a new struct
func (m *containerMonitor) reset(successful bool) {
container := m.container
if container.Config.OpenStdin {
if err := container.stdin.Close(); err != nil {
utils.Errorf("%s: Error close stdin: %s", container.ID, err)
}
}
if err := container.stdout.Clean(); err != nil {
utils.Errorf("%s: Error close stdout: %s", container.ID, err)
}
if err := container.stderr.Clean(); err != nil {
utils.Errorf("%s: Error close stderr: %s", container.ID, err)
}
if container.command != nil && container.command.Terminal != nil {
if err := container.command.Terminal.Close(); err != nil {
utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
}
}
// Re-create a brand new stdin pipe once the container exited
if container.Config.OpenStdin {
container.stdin, container.stdinPipe = io.Pipe()
}
container.LogEvent("die")
c := container.command.Cmd
container.command.Cmd = exec.Cmd{
Stdin: c.Stdin,
Stdout: c.Stdout,
Stderr: c.Stderr,
Path: c.Path,
Env: c.Env,
ExtraFiles: c.ExtraFiles,
Args: c.Args,
Dir: c.Dir,
SysProcAttr: c.SysProcAttr,
}
// the container exited successfully so we need to reset the failure counter
// and the timeIncrement back to the default values
if successful {
m.failureCount = 0
m.timeIncrement = defaultTimeIncrement
} else {
// otherwise we need to increment the amount of time we wait before restarting
// the process. We will build up by multiplying the increment by 2
m.failureCount++
m.timeIncrement *= 2
}
}
// Start starts the containers process and monitors it according to the restart policy
func (m *containerMonitor) Start() error {
var (
err error
exitStatus int
)
// ensure that when the monitor finally exits we release the networking and unmount the rootfs
defer m.Close()
// reset the restart count
m.container.RestartCount = -1
for !m.shouldStop {
m.container.RestartCount++
if err := m.container.startLoggingToDisk(); err != nil {
m.reset(false)
return err
}
pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
m.container.LogEvent("start")
if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
utils.Errorf("Error running container: %s", err)
}
// we still wait to set the state as stopped and ensure that the locks were released
m.container.State.SetStopped(exitStatus)
// pass if we exited successfully
m.reset(err == nil && exitStatus == 0)
if m.shouldRestart(exitStatus) {
// sleep with a small time increment between each restart to help avoid issues cased by quickly
// restarting the container because of some types of errors ( networking cut out, etc... )
time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
continue
}
break
}
return err
}
// shouldRestart checks the restart policy and applies the rules to determine if
// the container's process should be restarted
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
m.mux.Lock()
defer m.mux.Unlock()
// do not restart if the user or docker has requested that this container be stopped
if m.shouldStop {
return false
}
switch m.restartPolicy.Name {
case "always":
return true
case "on-failure":
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
return false
}
return exitStatus != 0
}
return false
}
// callback ensures that the container's state is properly updated after we
// received ack from the execution drivers
func (m *containerMonitor) callback(command *execdriver.Command) {
if command.Tty {
// The callback is called after the process Start()
// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
// which we close here.
if c, ok := command.Stdout.(io.Closer); ok {
c.Close()
}
}
m.container.State.SetRunning(command.Pid())
if err := m.container.ToDisk(); err != nil {
utils.Debugf("%s", err)
}
}