Merge pull request #41935 from alexisries/Issue-41871-Restore-healthcheck-at-dockerd-restart

Resume healthcheck when daemon restarts
This commit is contained in:
Sebastiaan van Stijn 2021-10-15 12:46:43 +02:00 committed by GitHub
commit a80c450fb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 1 deletions

View File

@ -364,7 +364,8 @@ func (daemon *Daemon) restore() error {
c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
if c.IsPaused() && alive {
switch {
case c.IsPaused() && alive:
s, err := daemon.containerd.Status(context.Background(), c.ID)
if err != nil {
logger(c).WithError(err).Error("failed to get container status")
@ -382,12 +383,18 @@ func (daemon *Daemon) restore() error {
c.Lock()
c.Paused = false
daemon.setStateCounter(c)
daemon.updateHealthMonitor(c)
if err := c.CheckpointTo(daemon.containersReplica); err != nil {
log.WithError(err).Error("failed to update paused container state")
}
c.Unlock()
}
}
case !c.IsPaused() && alive:
logger(c).Debug("restoring healthcheck")
c.Lock()
daemon.updateHealthMonitor(c)
c.Unlock()
}
if !alive {

View File

@ -8,8 +8,10 @@ import (
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/client"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
"gotest.tools/v3/poll"
"gotest.tools/v3/skip"
)
@ -25,6 +27,7 @@ func TestDaemonRestartKillContainers(t *testing.T) {
xRunning bool
xRunningLiveRestore bool
xStart bool
xHealthCheck bool
}
for _, tc := range []testCase{
@ -42,6 +45,20 @@ func TestDaemonRestartKillContainers(t *testing.T) {
xRunningLiveRestore: true,
xStart: true,
},
{
desc: "container with restart=always and with healthcheck",
config: &container.Config{Image: "busybox", Cmd: []string{"top"},
Healthcheck: &container.HealthConfig{
Test: []string{"CMD-SHELL", "sleep 1"},
Interval: time.Second,
},
},
hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
xRunning: true,
xRunningLiveRestore: true,
xStart: true,
xHealthCheck: true,
},
{
desc: "container created should not be restarted",
config: &container.Config{Image: "busybox", Cmd: []string{"top"}},
@ -107,9 +124,32 @@ func TestDaemonRestartKillContainers(t *testing.T) {
}
assert.Equal(t, expected, running, "got unexpected running state, expected %v, got: %v", expected, running)
if c.xHealthCheck {
startTime := time.Now()
ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
poll.WaitOn(t, pollForNewHealthCheck(ctxPoll, client, startTime, resp.ID), poll.WithDelay(100*time.Millisecond))
}
// TODO(cpuguy83): test pause states... this seems to be rather undefined currently
})
}
}
}
}
func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result {
return func(log poll.LogT) poll.Result {
inspect, err := client.ContainerInspect(ctx, containerID)
if err != nil {
return poll.Error(err)
}
healthChecksTotal := len(inspect.State.Health.Log)
if healthChecksTotal > 0 {
if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) {
return poll.Success()
}
}
return poll.Continue("waiting for a new container healthcheck")
}
}