From 9f39889dee7d96430359d7e1f8970a88acad59e5 Mon Sep 17 00:00:00 2001 From: Alexis Ries Date: Tue, 26 Jan 2021 14:38:10 +0100 Subject: [PATCH] Fixes #41871: Update daemon/daemon.go: resume healthcheck on restore Call updateHealthMonitor for alive non-paused containers Signed-off-by: Alexis Ries --- daemon/daemon.go | 9 +++++- integration/container/restart_test.go | 40 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index 15bfee1b7d..f94168eceb 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -364,7 +364,8 @@ func (daemon *Daemon) restore() error { c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking - if c.IsPaused() && alive { + switch { + case c.IsPaused() && alive: s, err := daemon.containerd.Status(context.Background(), c.ID) if err != nil { logger(c).WithError(err).Error("failed to get container status") @@ -382,12 +383,18 @@ func (daemon *Daemon) restore() error { c.Lock() c.Paused = false daemon.setStateCounter(c) + daemon.updateHealthMonitor(c) if err := c.CheckpointTo(daemon.containersReplica); err != nil { log.WithError(err).Error("failed to update paused container state") } c.Unlock() } } + case !c.IsPaused() && alive: + logger(c).Debug("restoring healthcheck") + c.Lock() + daemon.updateHealthMonitor(c) + c.Unlock() } if !alive { diff --git a/integration/container/restart_test.go b/integration/container/restart_test.go index e09a6e6081..0c29812e8d 100644 --- a/integration/container/restart_test.go +++ b/integration/container/restart_test.go @@ -8,8 +8,10 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" + "github.com/docker/docker/client" "github.com/docker/docker/testutil/daemon" "gotest.tools/v3/assert" + "gotest.tools/v3/poll" "gotest.tools/v3/skip" ) @@ -25,6 +27,7 @@ func TestDaemonRestartKillContainers(t *testing.T) { xRunning bool xRunningLiveRestore bool xStart bool + xHealthCheck bool } for _, tc := range []testCase{ @@ -42,6 +45,20 @@ func TestDaemonRestartKillContainers(t *testing.T) { xRunningLiveRestore: true, xStart: true, }, + { + desc: "container with restart=always and with healthcheck", + config: &container.Config{Image: "busybox", Cmd: []string{"top"}, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD-SHELL", "sleep 1"}, + Interval: time.Second, + }, + }, + hostConfig: &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}}, + xRunning: true, + xRunningLiveRestore: true, + xStart: true, + xHealthCheck: true, + }, { desc: "container created should not be restarted", config: &container.Config{Image: "busybox", Cmd: []string{"top"}}, @@ -107,9 +124,32 @@ func TestDaemonRestartKillContainers(t *testing.T) { } assert.Equal(t, expected, running, "got unexpected running state, expected %v, got: %v", expected, running) + + if c.xHealthCheck { + startTime := time.Now() + ctxPoll, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + poll.WaitOn(t, pollForNewHealthCheck(ctxPoll, client, startTime, resp.ID), poll.WithDelay(100*time.Millisecond)) + } // TODO(cpuguy83): test pause states... this seems to be rather undefined currently }) } } } } + +func pollForNewHealthCheck(ctx context.Context, client *client.Client, startTime time.Time, containerID string) func(log poll.LogT) poll.Result { + return func(log poll.LogT) poll.Result { + inspect, err := client.ContainerInspect(ctx, containerID) + if err != nil { + return poll.Error(err) + } + healthChecksTotal := len(inspect.State.Health.Log) + if healthChecksTotal > 0 { + if inspect.State.Health.Log[healthChecksTotal-1].Start.After(startTime) { + return poll.Success() + } + } + return poll.Continue("waiting for a new container healthcheck") + } +}