From 4f2a5ba360d0b00213d31f50a5be074c89124c52 Mon Sep 17 00:00:00 2001 From: David Calavera Date: Mon, 2 Nov 2015 18:25:26 -0500 Subject: [PATCH] Decouple daemon and container to stop and kill containers. Signed-off-by: David Calavera --- builder/builder.go | 2 + builder/dockerfile/internals.go | 2 +- daemon/container.go | 139 +------------------------------- daemon/container_unix.go | 2 +- daemon/daemon.go | 6 +- daemon/daemonbuilder/builder.go | 5 ++ daemon/delete.go | 4 +- daemon/events.go | 10 +++ daemon/kill.go | 89 +++++++++++++++++++- daemon/restart.go | 26 +++++- daemon/state.go | 4 +- daemon/stop.go | 37 ++++++++- 12 files changed, 177 insertions(+), 149 deletions(-) create mode 100644 daemon/events.go diff --git a/builder/builder.go b/builder/builder.go index f693aef9ba..601e230031 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -128,6 +128,8 @@ type Docker interface { // Release releases a list of images that were retained for the time of a build. // TODO: remove Release(sessionID string, activeImages []string) + // Kill stops the container execution abruptly. + Kill(c *daemon.Container) error } // ImageCache abstracts an image cache store. diff --git a/builder/dockerfile/internals.go b/builder/dockerfile/internals.go index 2162cfdc7c..b96772f228 100644 --- a/builder/dockerfile/internals.go +++ b/builder/dockerfile/internals.go @@ -559,7 +559,7 @@ func (b *Builder) run(c *daemon.Container) error { select { case <-b.cancelled: logrus.Debugln("Build cancelled, killing and removing container:", c.ID) - c.Kill() + b.docker.Kill(c) b.removeContainer(c.ID) case <-finished: } diff --git a/daemon/container.go b/daemon/container.go index 1731e05b8d..f90dde799e 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -337,51 +337,10 @@ func (container *Container) cleanup() { } } -// killSig sends the container the given signal. This wrapper for the -// host specific kill command prepares the container before attempting -// to send the signal. An error is returned if the container is paused -// or not running, or if there is a problem returned from the -// underlying kill command. -func (container *Container) killSig(sig int) error { - logrus.Debugf("Sending %d to %s", sig, container.ID) - container.Lock() - defer container.Unlock() - - // We could unpause the container for them rather than returning this error - if container.Paused { - return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID) - } - - if !container.Running { - return derr.ErrorCodeNotRunning.WithArgs(container.ID) - } - - // signal to the monitor that it should not restart the container - // after we send the kill signal +// ExitOnNext signals to the monitor that it should not restart the container +// after we send the kill signal. +func (container *Container) ExitOnNext() { container.monitor.ExitOnNext() - - // if the container is currently restarting we do not need to send the signal - // to the process. Telling the monitor that it should exit on it's next event - // loop is enough - if container.Restarting { - return nil - } - - if err := container.daemon.kill(container, sig); err != nil { - return err - } - container.logEvent("kill") - return nil -} - -// Wrapper aroung killSig() suppressing "no such process" error. -func (container *Container) killPossiblyDeadProcess(sig int) error { - err := container.killSig(sig) - if err == syscall.ESRCH { - logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.getPID(), sig) - return nil - } - return err } func (container *Container) pause() error { @@ -428,98 +387,6 @@ func (container *Container) unpause() error { return nil } -// Kill forcefully terminates a container. -func (container *Container) Kill() error { - if !container.IsRunning() { - return derr.ErrorCodeNotRunning.WithArgs(container.ID) - } - - // 1. Send SIGKILL - if err := container.killPossiblyDeadProcess(int(syscall.SIGKILL)); err != nil { - // While normally we might "return err" here we're not going to - // because if we can't stop the container by this point then - // its probably because its already stopped. Meaning, between - // the time of the IsRunning() call above and now it stopped. - // Also, since the err return will be exec driver specific we can't - // look for any particular (common) error that would indicate - // that the process is already dead vs something else going wrong. - // So, instead we'll give it up to 2 more seconds to complete and if - // by that time the container is still running, then the error - // we got is probably valid and so we return it to the caller. - - if container.IsRunning() { - container.WaitStop(2 * time.Second) - if container.IsRunning() { - return err - } - } - } - - // 2. Wait for the process to die, in last resort, try to kill the process directly - if err := killProcessDirectly(container); err != nil { - return err - } - - container.WaitStop(-1 * time.Second) - return nil -} - -// Stop halts a container by sending a stop signal, waiting for the given -// duration in seconds, and then calling SIGKILL and waiting for the -// process to exit. If a negative duration is given, Stop will wait -// for the initial signal forever. If the container is not running Stop returns -// immediately. -func (container *Container) Stop(seconds int) error { - if !container.IsRunning() { - return nil - } - - // 1. Send a SIGTERM - if err := container.killPossiblyDeadProcess(container.stopSignal()); err != nil { - logrus.Infof("Failed to send SIGTERM to the process, force killing") - if err := container.killPossiblyDeadProcess(9); err != nil { - return err - } - } - - // 2. Wait for the process to exit on its own - if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil { - logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds) - // 3. If it doesn't, then send SIGKILL - if err := container.Kill(); err != nil { - container.WaitStop(-1 * time.Second) - logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it - } - } - - container.logEvent("stop") - return nil -} - -// Restart attempts to gracefully stop and then start the -// container. When stopping, wait for the given duration in seconds to -// gracefully stop, before forcefully terminating the container. If -// given a negative duration, wait forever for a graceful stop. -func (container *Container) Restart(seconds int) error { - // Avoid unnecessarily unmounting and then directly mounting - // the container when the container stops and then starts - // again - if err := container.Mount(); err == nil { - defer container.Unmount() - } - - if err := container.Stop(seconds); err != nil { - return err - } - - if err := container.Start(); err != nil { - return err - } - - container.logEvent("restart") - return nil -} - // Resize changes the TTY of the process running inside the container // to the given height and width. The container must be running. func (container *Container) Resize(h, w int) error { diff --git a/daemon/container_unix.go b/daemon/container_unix.go index 6cde5705b2..9aef3f33b1 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -64,7 +64,7 @@ type Container struct { func killProcessDirectly(container *Container) error { if _, err := container.WaitStop(10 * time.Second); err != nil { // Ensure that we don't kill ourselves - if pid := container.getPID(); pid != 0 { + if pid := container.GetPID(); pid != 0 { logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(container.ID)) if err := syscall.Kill(pid, 9); err != nil { if err != syscall.ESRCH { diff --git a/daemon/daemon.go b/daemon/daemon.go index dec9236884..138a56b7a3 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -838,7 +838,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo return d, nil } -func stopContainer(c *Container) error { +func (daemon *Daemon) shutdownContainer(c *Container) error { // TODO(windows): Handle docker restart with paused containers if c.isPaused() { // To terminate a process in freezer cgroup, we should send @@ -869,7 +869,7 @@ func stopContainer(c *Container) error { } } // If container failed to exit in 10 seconds of SIGTERM, then using the force - if err := c.Stop(10); err != nil { + if err := daemon.containerStop(c, 10); err != nil { return fmt.Errorf("Stop container %s with error: %v", c.ID, err) } @@ -891,7 +891,7 @@ func (daemon *Daemon) Shutdown() error { group.Add(1) go func(c *Container) { defer group.Done() - if err := stopContainer(c); err != nil { + if err := daemon.shutdownContainer(c); err != nil { logrus.Errorf("Stop container error: %v", err) return } diff --git a/daemon/daemonbuilder/builder.go b/daemon/daemonbuilder/builder.go index fab97face2..fcfcf5e564 100644 --- a/daemon/daemonbuilder/builder.go +++ b/daemon/daemonbuilder/builder.go @@ -205,6 +205,11 @@ func (d Docker) GetCachedImage(imgID string, cfg *runconfig.Config) (string, err return cache.ID, nil } +// Kill stops the container execution abruptly. +func (d Docker) Kill(container *daemon.Container) error { + return d.Daemon.Kill(container) +} + // Following is specific to builder contexts // DetectContextFromRemoteURL returns a context and in certain cases the name of the dockerfile to be used diff --git a/daemon/delete.go b/daemon/delete.go index 49ccc72779..a2e7e5fa82 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -71,7 +71,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) { if !forceRemove { return derr.ErrorCodeRmRunning } - if err := container.Kill(); err != nil { + if err := daemon.Kill(container); err != nil { return derr.ErrorCodeRmFailed.WithArgs(err) } } @@ -90,7 +90,7 @@ func (daemon *Daemon) rm(container *Container, forceRemove bool) (err error) { // if stats are currently getting collected. daemon.statsCollector.stopCollection(container) - if err = container.Stop(3); err != nil { + if err = daemon.containerStop(container, 3); err != nil { return err } diff --git a/daemon/events.go b/daemon/events.go new file mode 100644 index 0000000000..be6d0c5b67 --- /dev/null +++ b/daemon/events.go @@ -0,0 +1,10 @@ +package daemon + +// logContainerEvent generates an event related to a container. +func (daemon *Daemon) logContainerEvent(container *Container, action string) { + daemon.EventsService.Log( + action, + container.ID, + container.Config.Image, + ) +} diff --git a/daemon/kill.go b/daemon/kill.go index dc8f23be2d..91b16ad389 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -4,7 +4,10 @@ import ( "fmt" "runtime" "syscall" + "time" + "github.com/Sirupsen/logrus" + derr "github.com/docker/docker/errors" "github.com/docker/docker/pkg/signal" ) @@ -24,14 +27,96 @@ func (daemon *Daemon) ContainerKill(name string, sig uint64) error { // If no signal is passed, or SIGKILL, perform regular Kill (SIGKILL + wait()) if sig == 0 || syscall.Signal(sig) == syscall.SIGKILL { - if err := container.Kill(); err != nil { + if err := daemon.Kill(container); err != nil { return err } } else { // Otherwise, just send the requested signal - if err := container.killSig(int(sig)); err != nil { + if err := daemon.killWithSignal(container, int(sig)); err != nil { return err } } return nil } + +// killWithSignal sends the container the given signal. This wrapper for the +// host specific kill command prepares the container before attempting +// to send the signal. An error is returned if the container is paused +// or not running, or if there is a problem returned from the +// underlying kill command. +func (daemon *Daemon) killWithSignal(container *Container, sig int) error { + logrus.Debugf("Sending %d to %s", sig, container.ID) + container.Lock() + defer container.Unlock() + + // We could unpause the container for them rather than returning this error + if container.Paused { + return derr.ErrorCodeUnpauseContainer.WithArgs(container.ID) + } + + if !container.Running { + return derr.ErrorCodeNotRunning.WithArgs(container.ID) + } + + container.ExitOnNext() + + // if the container is currently restarting we do not need to send the signal + // to the process. Telling the monitor that it should exit on it's next event + // loop is enough + if container.Restarting { + return nil + } + + if err := daemon.kill(container, sig); err != nil { + return err + } + + daemon.logContainerEvent(container, "kill") + return nil +} + +// Kill forcefully terminates a container. +func (daemon *Daemon) Kill(container *Container) error { + if !container.IsRunning() { + return derr.ErrorCodeNotRunning.WithArgs(container.ID) + } + + // 1. Send SIGKILL + if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil { + // While normally we might "return err" here we're not going to + // because if we can't stop the container by this point then + // its probably because its already stopped. Meaning, between + // the time of the IsRunning() call above and now it stopped. + // Also, since the err return will be exec driver specific we can't + // look for any particular (common) error that would indicate + // that the process is already dead vs something else going wrong. + // So, instead we'll give it up to 2 more seconds to complete and if + // by that time the container is still running, then the error + // we got is probably valid and so we return it to the caller. + + if container.IsRunning() { + container.WaitStop(2 * time.Second) + if container.IsRunning() { + return err + } + } + } + + // 2. Wait for the process to die, in last resort, try to kill the process directly + if err := killProcessDirectly(container); err != nil { + return err + } + + container.WaitStop(-1 * time.Second) + return nil +} + +// killPossibleDeadProcess is a wrapper aroung killSig() suppressing "no such process" error. +func (daemon *Daemon) killPossiblyDeadProcess(container *Container, sig int) error { + err := daemon.killWithSignal(container, sig) + if err == syscall.ESRCH { + logrus.Debugf("Cannot kill process (pid=%d) with signal %d: no such process.", container.GetPID(), sig) + return nil + } + return err +} diff --git a/daemon/restart.go b/daemon/restart.go index 86ec88dbb1..844218681b 100644 --- a/daemon/restart.go +++ b/daemon/restart.go @@ -15,8 +15,32 @@ func (daemon *Daemon) ContainerRestart(name string, seconds int) error { if err != nil { return err } - if err := container.Restart(seconds); err != nil { + if err := daemon.containerRestart(container, seconds); err != nil { return derr.ErrorCodeCantRestart.WithArgs(name, err) } return nil } + +// containerRestart attempts to gracefully stop and then start the +// container. When stopping, wait for the given duration in seconds to +// gracefully stop, before forcefully terminating the container. If +// given a negative duration, wait forever for a graceful stop. +func (daemon *Daemon) containerRestart(container *Container, seconds int) error { + // Avoid unnecessarily unmounting and then directly mounting + // the container when the container stops and then starts + // again + if err := container.Mount(); err == nil { + defer container.Unmount() + } + + if err := daemon.containerStop(container, seconds); err != nil { + return err + } + + if err := container.Start(); err != nil { + return err + } + + daemon.logContainerEvent(container, "restart") + return nil +} diff --git a/daemon/state.go b/daemon/state.go index 5adeb77e23..b9231f2a6c 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -134,7 +134,7 @@ func (s *State) waitRunning(timeout time.Duration) (int, error) { if err := wait(waitChan, timeout); err != nil { return -1, err } - return s.getPID(), nil + return s.GetPID(), nil } // WaitStop waits until state is stopped. If state already stopped it returns @@ -164,7 +164,7 @@ func (s *State) IsRunning() bool { } // GetPID holds the process id of a container. -func (s *State) getPID() int { +func (s *State) GetPID() int { s.Lock() res := s.Pid s.Unlock() diff --git a/daemon/stop.go b/daemon/stop.go index 9e33315216..17629acdd6 100644 --- a/daemon/stop.go +++ b/daemon/stop.go @@ -1,6 +1,9 @@ package daemon import ( + "time" + + "github.com/Sirupsen/logrus" derr "github.com/docker/docker/errors" ) @@ -18,8 +21,40 @@ func (daemon *Daemon) ContainerStop(name string, seconds int) error { if !container.IsRunning() { return derr.ErrorCodeStopped } - if err := container.Stop(seconds); err != nil { + if err := daemon.containerStop(container, seconds); err != nil { return derr.ErrorCodeCantStop.WithArgs(name, err) } return nil } + +// containerStop halts a container by sending a stop signal, waiting for the given +// duration in seconds, and then calling SIGKILL and waiting for the +// process to exit. If a negative duration is given, Stop will wait +// for the initial signal forever. If the container is not running Stop returns +// immediately. +func (daemon *Daemon) containerStop(container *Container, seconds int) error { + if !container.IsRunning() { + return nil + } + + // 1. Send a SIGTERM + if err := daemon.killPossiblyDeadProcess(container, container.stopSignal()); err != nil { + logrus.Infof("Failed to send SIGTERM to the process, force killing") + if err := daemon.killPossiblyDeadProcess(container, 9); err != nil { + return err + } + } + + // 2. Wait for the process to exit on its own + if _, err := container.WaitStop(time.Duration(seconds) * time.Second); err != nil { + logrus.Infof("Container %v failed to exit within %d seconds of SIGTERM - using the force", container.ID, seconds) + // 3. If it doesn't, then send SIGKILL + if err := daemon.Kill(container); err != nil { + container.WaitStop(-1 * time.Second) + logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it + } + } + + daemon.logContainerEvent(container, "stop") + return nil +}