diff --git a/container/container.go b/container/container.go index d33b14878c..cae0eceee2 100644 --- a/container/container.go +++ b/container/container.go @@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64 // ShouldRestart decides whether the daemon should restart the container or not. // This is based on the container's restart policy. func (container *Container) ShouldRestart() bool { - return container.HostConfig.RestartPolicy.Name == "always" || - (container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) || - (container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0) + shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped) + return shouldRestart } // AddBindMountPoint adds a new bind mount point configuration to the container. @@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan container.restartManager = nil } if container.restartManager == nil { - container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy) + container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount) } + return container.restartManager } diff --git a/daemon/daemon.go b/daemon/daemon.go index a8c5fb9ace..af25910a5e 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error { wg.Add(1) go func(c *container.Container) { defer wg.Done() + rm := c.RestartManager(false) if c.IsRunning() || c.IsPaused() { // Fix activityCount such that graph mounts can be unmounted later if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil { logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err) return } - if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil { + if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil { logrus.Errorf("Failed to restore with containerd: %q", err) return } diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 3249cf1570..98fb5bdd0b 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) { } +func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) { + err := s.d.StartWithBusybox() + c.Assert(err, check.IsNil) + + out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false") + c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out)) + + // wait test1 to stop + hostArgs := []string{"--host", s.d.sock()} + err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...) + c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not")) + + // record last start time + out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1") + c.Assert(err, checker.IsNil, check.Commentf("out: %v", out)) + lastStartTime := out + + err = s.d.Restart() + c.Assert(err, check.IsNil) + + // test1 shouldn't restart at all + err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...) + c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not")) + + // make sure test1 isn't restarted when daemon restart + // if "StartAt" time updates, means test1 was once restarted. + out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1") + c.Assert(err, checker.IsNil, check.Commentf("out: %v", out)) + c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts")) +} + func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) { if err := s.d.Start("--iptables=false"); err != nil { c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err) diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go index 019e5c4dd5..ed0ff05ffa 100644 --- a/libcontainerd/container_linux.go +++ b/libcontainerd/container_linux.go @@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error { st.State = StateExitProcess } if st.State == StateExit && ctr.restartManager != nil { - restart, wait, err := ctr.restartManager.ShouldRestart(e.Status) + restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false) if err != nil { logrus.Error(err) } else if restart { diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go index 7cab47e6b3..40517f59c8 100644 --- a/libcontainerd/container_windows.go +++ b/libcontainerd/container_windows.go @@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr defer ctr.client.unlock(ctr.containerID) if si.State == StateExit && ctr.restartManager != nil { - restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode)) + restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false) if err != nil { logrus.Error(err) } else if restart { diff --git a/restartmanager/restartmanager.go b/restartmanager/restartmanager.go index 39b8b1502c..2cf1de684f 100644 --- a/restartmanager/restartmanager.go +++ b/restartmanager/restartmanager.go @@ -16,14 +16,14 @@ const ( // RestartManager defines object that controls container restarting rules. type RestartManager interface { Cancel() error - ShouldRestart(exitCode uint32) (bool, chan error, error) + ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) } type restartManager struct { sync.Mutex sync.Once policy container.RestartPolicy - failureCount int + restartCount int timeout time.Duration active bool cancel chan struct{} @@ -31,8 +31,8 @@ type restartManager struct { } // New returns a new restartmanager based on a policy. -func New(policy container.RestartPolicy) RestartManager { - return &restartManager{policy: policy, cancel: make(chan struct{})} +func New(policy container.RestartPolicy, restartCount int) RestartManager { + return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})} } func (rm *restartManager) SetPolicy(policy container.RestartPolicy) { @@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) { rm.Unlock() } -func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) { +func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) { rm.Lock() unlockOnExit := true defer func() { @@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro return false, nil, fmt.Errorf("invalid call on active restartmanager") } - if exitCode != 0 { - rm.failureCount++ - } else { - rm.failureCount = 0 - } - if rm.timeout == 0 { rm.timeout = defaultTimeout } else { @@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro var restart bool switch { - case rm.policy.IsAlways(), rm.policy.IsUnlessStopped(): + case rm.policy.IsAlways(): + restart = true + case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped: restart = true case rm.policy.IsOnFailure(): // the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count - if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max { + if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max { restart = exitCode != 0 } } @@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro return false, nil, nil } + rm.restartCount++ + unlockOnExit = false rm.active = true rm.Unlock()