Fix ShouldRestart for on-failure handle
Currently if you restart docker daemon, all the containers with restart policy `on-failure` regardless of its `RestartCount` will be started, this will make daemon cost more extra time for restart. This commit will stop these containers to do unnecessary start on daemon's restart. Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
This commit is contained in:
parent
b4664e3f86
commit
51e42e6ee0
|
@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
|
||||||
// ShouldRestart decides whether the daemon should restart the container or not.
|
// ShouldRestart decides whether the daemon should restart the container or not.
|
||||||
// This is based on the container's restart policy.
|
// This is based on the container's restart policy.
|
||||||
func (container *Container) ShouldRestart() bool {
|
func (container *Container) ShouldRestart() bool {
|
||||||
return container.HostConfig.RestartPolicy.Name == "always" ||
|
shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped)
|
||||||
(container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) ||
|
return shouldRestart
|
||||||
(container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddBindMountPoint adds a new bind mount point configuration to the container.
|
// AddBindMountPoint adds a new bind mount point configuration to the container.
|
||||||
|
@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan
|
||||||
container.restartManager = nil
|
container.restartManager = nil
|
||||||
}
|
}
|
||||||
if container.restartManager == nil {
|
if container.restartManager == nil {
|
||||||
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
|
container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
return container.restartManager
|
return container.restartManager
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(c *container.Container) {
|
go func(c *container.Container) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
rm := c.RestartManager(false)
|
||||||
if c.IsRunning() || c.IsPaused() {
|
if c.IsRunning() || c.IsPaused() {
|
||||||
// Fix activityCount such that graph mounts can be unmounted later
|
// Fix activityCount such that graph mounts can be unmounted later
|
||||||
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
|
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
|
||||||
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
|
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
|
if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
|
||||||
logrus.Errorf("Failed to restore with containerd: %q", err)
|
logrus.Errorf("Failed to restore with containerd: %q", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) {
|
||||||
|
err := s.d.StartWithBusybox()
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
|
||||||
|
out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false")
|
||||||
|
c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out))
|
||||||
|
|
||||||
|
// wait test1 to stop
|
||||||
|
hostArgs := []string{"--host", s.d.sock()}
|
||||||
|
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...)
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
||||||
|
|
||||||
|
// record last start time
|
||||||
|
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
||||||
|
lastStartTime := out
|
||||||
|
|
||||||
|
err = s.d.Restart()
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
|
||||||
|
// test1 shouldn't restart at all
|
||||||
|
err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...)
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
||||||
|
|
||||||
|
// make sure test1 isn't restarted when daemon restart
|
||||||
|
// if "StartAt" time updates, means test1 was once restarted.
|
||||||
|
out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
||||||
|
c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts"))
|
||||||
|
}
|
||||||
|
|
||||||
func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
|
func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
|
||||||
if err := s.d.Start("--iptables=false"); err != nil {
|
if err := s.d.Start("--iptables=false"); err != nil {
|
||||||
c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)
|
c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)
|
||||||
|
|
|
@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
||||||
st.State = StateExitProcess
|
st.State = StateExitProcess
|
||||||
}
|
}
|
||||||
if st.State == StateExit && ctr.restartManager != nil {
|
if st.State == StateExit && ctr.restartManager != nil {
|
||||||
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
|
restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
} else if restart {
|
} else if restart {
|
||||||
|
|
|
@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
|
||||||
defer ctr.client.unlock(ctr.containerID)
|
defer ctr.client.unlock(ctr.containerID)
|
||||||
|
|
||||||
if si.State == StateExit && ctr.restartManager != nil {
|
if si.State == StateExit && ctr.restartManager != nil {
|
||||||
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode))
|
restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
} else if restart {
|
} else if restart {
|
||||||
|
|
|
@ -16,14 +16,14 @@ const (
|
||||||
// RestartManager defines object that controls container restarting rules.
|
// RestartManager defines object that controls container restarting rules.
|
||||||
type RestartManager interface {
|
type RestartManager interface {
|
||||||
Cancel() error
|
Cancel() error
|
||||||
ShouldRestart(exitCode uint32) (bool, chan error, error)
|
ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type restartManager struct {
|
type restartManager struct {
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
sync.Once
|
sync.Once
|
||||||
policy container.RestartPolicy
|
policy container.RestartPolicy
|
||||||
failureCount int
|
restartCount int
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
active bool
|
active bool
|
||||||
cancel chan struct{}
|
cancel chan struct{}
|
||||||
|
@ -31,8 +31,8 @@ type restartManager struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// New returns a new restartmanager based on a policy.
|
// New returns a new restartmanager based on a policy.
|
||||||
func New(policy container.RestartPolicy) RestartManager {
|
func New(policy container.RestartPolicy, restartCount int) RestartManager {
|
||||||
return &restartManager{policy: policy, cancel: make(chan struct{})}
|
return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
||||||
|
@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
||||||
rm.Unlock()
|
rm.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
|
func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
|
||||||
rm.Lock()
|
rm.Lock()
|
||||||
unlockOnExit := true
|
unlockOnExit := true
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
||||||
return false, nil, fmt.Errorf("invalid call on active restartmanager")
|
return false, nil, fmt.Errorf("invalid call on active restartmanager")
|
||||||
}
|
}
|
||||||
|
|
||||||
if exitCode != 0 {
|
|
||||||
rm.failureCount++
|
|
||||||
} else {
|
|
||||||
rm.failureCount = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if rm.timeout == 0 {
|
if rm.timeout == 0 {
|
||||||
rm.timeout = defaultTimeout
|
rm.timeout = defaultTimeout
|
||||||
} else {
|
} else {
|
||||||
|
@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
||||||
|
|
||||||
var restart bool
|
var restart bool
|
||||||
switch {
|
switch {
|
||||||
case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
|
case rm.policy.IsAlways():
|
||||||
|
restart = true
|
||||||
|
case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped:
|
||||||
restart = true
|
restart = true
|
||||||
case rm.policy.IsOnFailure():
|
case rm.policy.IsOnFailure():
|
||||||
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
|
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
|
||||||
if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
|
if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max {
|
||||||
restart = exitCode != 0
|
restart = exitCode != 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
|
||||||
return false, nil, nil
|
return false, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rm.restartCount++
|
||||||
|
|
||||||
unlockOnExit = false
|
unlockOnExit = false
|
||||||
rm.active = true
|
rm.active = true
|
||||||
rm.Unlock()
|
rm.Unlock()
|
||||||
|
|
Loading…
Reference in New Issue