mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Fix ShouldRestart for on-failure handle
Currently if you restart docker daemon, all the containers with restart policy `on-failure` regardless of its `RestartCount` will be started, this will make daemon cost more extra time for restart. This commit will stop these containers to do unnecessary start on daemon's restart. Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
This commit is contained in:
parent
b4664e3f86
commit
51e42e6ee0
6 changed files with 50 additions and 20 deletions
|
@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
|
|||
// ShouldRestart decides whether the daemon should restart the container or not.
|
||||
// This is based on the container's restart policy.
|
||||
func (container *Container) ShouldRestart() bool {
|
||||
return container.HostConfig.RestartPolicy.Name == "always" ||
|
||||
(container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) ||
|
||||
(container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0)
|
||||
shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped)
|
||||
return shouldRestart
|
||||
}
|
||||
|
||||
// AddBindMountPoint adds a new bind mount point configuration to the container.
|
||||
|
@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan
|
|||
container.restartManager = nil
|
||||
}
|
||||
if container.restartManager == nil {
|
||||
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
|
||||
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount)
|
||||
}
|
||||
|
||||
return container.restartManager
|
||||
}
|
||||
|
||||
|
|
|
@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error {
|
|||
wg.Add(1)
|
||||
go func(c *container.Container) {
|
||||
defer wg.Done()
|
||||
rm := c.RestartManager(false)
|
||||
if c.IsRunning() || c.IsPaused() {
|
||||
// Fix activityCount such that graph mounts can be unmounted later
|
||||
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
|
||||
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
|
||||
return
|
||||
}
|
||||
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
|
||||
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
|
||||
logrus.Errorf("Failed to restore with containerd: %q", err)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) {
|
|||
|
||||
}
|
||||
|
||||
func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) {
|
||||
err := s.d.StartWithBusybox()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false")
|
||||
c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out))
|
||||
|
||||
// wait test1 to stop
|
||||
hostArgs := []string{"--host", s.d.sock()}
|
||||
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...)
|
||||
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
||||
|
||||
// record last start time
|
||||
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
||||
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
||||
lastStartTime := out
|
||||
|
||||
err = s.d.Restart()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
// test1 shouldn't restart at all
|
||||
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...)
|
||||
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
||||
|
||||
// make sure test1 isn't restarted when daemon restart
|
||||
// if "StartAt" time updates, means test1 was once restarted.
|
||||
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
||||
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
||||
c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts"))
|
||||
}
|
||||
|
||||
func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
|
||||
if err := s.d.Start("--iptables=false"); err != nil {
|
||||
c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)
|
||||
|
|
|
@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
|||
st.State = StateExitProcess
|
||||
}
|
||||
if st.State == StateExit && ctr.restartManager != nil {
|
||||
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
|
||||
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
} else if restart {
|
||||
|
|
|
@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
|
|||
defer ctr.client.unlock(ctr.containerID)
|
||||
|
||||
if si.State == StateExit && ctr.restartManager != nil {
|
||||
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode))
|
||||
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
} else if restart {
|
||||
|
|
|
@ -16,14 +16,14 @@ const (
|
|||
// RestartManager defines object that controls container restarting rules.
|
||||
type RestartManager interface {
|
||||
Cancel() error
|
||||
ShouldRestart(exitCode uint32) (bool, chan error, error)
|
||||
ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error)
|
||||
}
|
||||
|
||||
type restartManager struct {
|
||||
sync.Mutex
|
||||
sync.Once
|
||||
policy container.RestartPolicy
|
||||
failureCount int
|
||||
restartCount int
|
||||
timeout time.Duration
|
||||
active bool
|
||||
cancel chan struct{}
|
||||
|
@ -31,8 +31,8 @@ type restartManager struct {
|
|||
}
|
||||
|
||||
// New returns a new restartmanager based on a policy.
|
||||
func New(policy container.RestartPolicy) RestartManager {
|
||||
return &restartManager{policy: policy, cancel: make(chan struct{})}
|
||||
func New(policy container.RestartPolicy, restartCount int) RestartManager {
|
||||
return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})}
|
||||
}
|
||||
|
||||
func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
||||
|
@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
|||
rm.Unlock()
|
||||
}
|
||||
|
||||
func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
|
||||
func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
|
||||
rm.Lock()
|
||||
unlockOnExit := true
|
||||
defer func() {
|
||||
|
@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
|||
return false, nil, fmt.Errorf("invalid call on active restartmanager")
|
||||
}
|
||||
|
||||
if exitCode != 0 {
|
||||
rm.failureCount++
|
||||
} else {
|
||||
rm.failureCount = 0
|
||||
}
|
||||
|
||||
if rm.timeout == 0 {
|
||||
rm.timeout = defaultTimeout
|
||||
} else {
|
||||
|
@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
|||
|
||||
var restart bool
|
||||
switch {
|
||||
case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
|
||||
case rm.policy.IsAlways():
|
||||
restart = true
|
||||
case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped:
|
||||
restart = true
|
||||
case rm.policy.IsOnFailure():
|
||||
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
|
||||
if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
|
||||
if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max {
|
||||
restart = exitCode != 0
|
||||
}
|
||||
}
|
||||
|
@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
|||
return false, nil, nil
|
||||
}
|
||||
|
||||
rm.restartCount++
|
||||
|
||||
unlockOnExit = false
|
||||
rm.active = true
|
||||
rm.Unlock()
|
||||
|
|
Loading…
Reference in a new issue