From a705e166cf3bcca62543150c2b3f9bfeae45ecfa Mon Sep 17 00:00:00 2001 From: Zhang Wei Date: Thu, 7 Apr 2016 22:05:29 +0800 Subject: [PATCH] Fix critical bug: can't restart a restarting container When user try to restart a restarting container, docker client report error: "container is already active", and container will be stopped instead be restarted which is seriously wrong. What's more critical is that when user try to start this container again, it will always fail. This error can also be reproduced with a `docker stop`+`docker start`. And this commit will fix the bug. Signed-off-by: Zhang Wei --- daemon/kill.go | 10 +++++++- integration-cli/docker_cli_restart_test.go | 29 ++++++++++++++++++++++ libcontainerd/client_linux.go | 2 +- libcontainerd/container_linux.go | 1 + restartmanager/restartmanager.go | 2 +- 5 files changed, 41 insertions(+), 3 deletions(-) diff --git a/daemon/kill.go b/daemon/kill.go index e6ccfc7955..3967f0f299 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -3,6 +3,7 @@ package daemon import ( "fmt" "runtime" + "strings" "syscall" "time" @@ -81,7 +82,14 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er } if err := daemon.kill(container, sig); err != nil { - return fmt.Errorf("Cannot kill container %s: %s", container.ID, err) + err = fmt.Errorf("Cannot kill container %s: %s", container.ID, err) + // if container or process not exists, ignore the error + if strings.Contains(err.Error(), "container not found") || + strings.Contains(err.Error(), "no such process") { + logrus.Warnf("%s", err.Error()) + } else { + return err + } } attributes := map[string]string{ diff --git a/integration-cli/docker_cli_restart_test.go b/integration-cli/docker_cli_restart_test.go index 12fe1c4e54..31c8920385 100644 --- a/integration-cli/docker_cli_restart_test.go +++ b/integration-cli/docker_cli_restart_test.go @@ -218,3 +218,32 @@ func (s *DockerSuite) TestRestartPolicyAfterRestart(c *check.C) { err = waitInspect(id, "{{.State.Status}}", "running", 30*time.Second) c.Assert(err, check.IsNil) } + +func (s *DockerSuite) TestRestartContainerwithRestartPolicy(c *check.C) { + out1, _ := dockerCmd(c, "run", "-d", "--restart=on-failure:3", "busybox", "false") + out2, _ := dockerCmd(c, "run", "-d", "--restart=always", "busybox", "false") + + id1 := strings.TrimSpace(string(out1)) + id2 := strings.TrimSpace(string(out2)) + err := waitInspect(id1, "{{ .State.Restarting }} {{ .State.Running }}", "false false", 30*time.Second) + c.Assert(err, checker.IsNil) + + // TODO: fix racey problem during restart: + // https://jenkins.dockerproject.org/job/Docker-PRs-Win2Lin/24665/console + // Error response from daemon: Cannot restart container 6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251: remove /var/run/docker/libcontainerd/6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251/rootfs: device or resource busy + if _, _, err := dockerCmdWithError("restart", id1); err != nil { + // if restart met racey problem, try again + time.Sleep(500 * time.Millisecond) + dockerCmd(c, "restart", id1) + } + if _, _, err := dockerCmdWithError("restart", id2); err != nil { + // if restart met racey problem, try again + time.Sleep(500 * time.Millisecond) + dockerCmd(c, "restart", id2) + } + + dockerCmd(c, "stop", id1) + dockerCmd(c, "stop", id2) + dockerCmd(c, "start", id1) + dockerCmd(c, "start", id2) +} diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go index 6bed5dc74d..3eae2e0d73 100644 --- a/libcontainerd/client_linux.go +++ b/libcontainerd/client_linux.go @@ -134,7 +134,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio defer clnt.unlock(containerID) if ctr, err := clnt.getContainer(containerID); err == nil { - if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future + if ctr.restarting { ctr.restartManager.Cancel() ctr.clean() } else { diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go index a2a67af2ff..019e5c4dd5 100644 --- a/libcontainerd/container_linux.go +++ b/libcontainerd/container_linux.go @@ -124,6 +124,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error { } else if restart { st.State = StateRestart ctr.restarting = true + ctr.client.deleteContainer(e.Id) go func() { err := <-wait ctr.restarting = false diff --git a/restartmanager/restartmanager.go b/restartmanager/restartmanager.go index e534b2cf73..39b8b1502c 100644 --- a/restartmanager/restartmanager.go +++ b/restartmanager/restartmanager.go @@ -51,7 +51,7 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro }() if rm.canceled { - return false, nil, nil + return false, nil, fmt.Errorf("restartmanager canceled") } if rm.active {