Fix ShouldRestart for on-failure handle

Currently if you restart docker daemon, all the containers with restart
policy `on-failure` regardless of its `RestartCount` will be started,
this will make daemon cost more extra time for restart.

This commit will stop these containers to do unnecessary start on
daemon's restart.

Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
This commit is contained in:
Zhang Wei 2016-03-22 23:46:40 +08:00
parent b4664e3f86
commit 51e42e6ee0
6 changed files with 50 additions and 20 deletions

View File

@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
// ShouldRestart decides whether the daemon should restart the container or not.
// This is based on the container's restart policy.
func (container *Container) ShouldRestart() bool {
return container.HostConfig.RestartPolicy.Name == "always" ||
(container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) ||
(container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0)
shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped)
return shouldRestart
}
// AddBindMountPoint adds a new bind mount point configuration to the container.
@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan
container.restartManager = nil
}
if container.restartManager == nil {
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount)
}
return container.restartManager
}

View File

@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error {
wg.Add(1)
go func(c *container.Container) {
defer wg.Done()
rm := c.RestartManager(false)
if c.IsRunning() || c.IsPaused() {
// Fix activityCount such that graph mounts can be unmounted later
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
return
}
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
logrus.Errorf("Failed to restore with containerd: %q", err)
return
}

View File

@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) {
}
func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) {
err := s.d.StartWithBusybox()
c.Assert(err, check.IsNil)
out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false")
c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out))
// wait test1 to stop
hostArgs := []string{"--host", s.d.sock()}
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...)
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
// record last start time
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
lastStartTime := out
err = s.d.Restart()
c.Assert(err, check.IsNil)
// test1 shouldn't restart at all
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...)
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
// make sure test1 isn't restarted when daemon restart
// if "StartAt" time updates, means test1 was once restarted.
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts"))
}
func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
if err := s.d.Start("--iptables=false"); err != nil {
c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)

View File

@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
st.State = StateExitProcess
}
if st.State == StateExit && ctr.restartManager != nil {
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false)
if err != nil {
logrus.Error(err)
} else if restart {

View File

@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
defer ctr.client.unlock(ctr.containerID)
if si.State == StateExit && ctr.restartManager != nil {
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode))
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false)
if err != nil {
logrus.Error(err)
} else if restart {

View File

@ -16,14 +16,14 @@ const (
// RestartManager defines object that controls container restarting rules.
type RestartManager interface {
Cancel() error
ShouldRestart(exitCode uint32) (bool, chan error, error)
ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error)
}
type restartManager struct {
sync.Mutex
sync.Once
policy container.RestartPolicy
failureCount int
restartCount int
timeout time.Duration
active bool
cancel chan struct{}
@ -31,8 +31,8 @@ type restartManager struct {
}
// New returns a new restartmanager based on a policy.
func New(policy container.RestartPolicy) RestartManager {
return &restartManager{policy: policy, cancel: make(chan struct{})}
func New(policy container.RestartPolicy, restartCount int) RestartManager {
return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})}
}
func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
rm.Unlock()
}
func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
rm.Lock()
unlockOnExit := true
defer func() {
@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
return false, nil, fmt.Errorf("invalid call on active restartmanager")
}
if exitCode != 0 {
rm.failureCount++
} else {
rm.failureCount = 0
}
if rm.timeout == 0 {
rm.timeout = defaultTimeout
} else {
@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
var restart bool
switch {
case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
case rm.policy.IsAlways():
restart = true
case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped:
restart = true
case rm.policy.IsOnFailure():
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max {
restart = exitCode != 0
}
}
@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
return false, nil, nil
}
rm.restartCount++
unlockOnExit = false
rm.active = true
rm.Unlock()