From a99db84b4a966f0f09e81c446e857323a2a3302c Mon Sep 17 00:00:00 2001 From: runshenzhu Date: Tue, 12 Jul 2016 10:56:01 -0700 Subject: [PATCH] extend health check to start service Signed-off-by: runshenzhu Signed-off-by: Runshen Zhu --- .../cluster/executor/container/controller.go | 53 ++++- .../docker_cli_service_health_test.go | 191 ++++++++++++++++++ 2 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 integration-cli/docker_cli_service_health_test.go diff --git a/daemon/cluster/executor/container/controller.go b/daemon/cluster/executor/container/controller.go index a51e5c52e5..b86aa4ecb3 100644 --- a/daemon/cluster/executor/container/controller.go +++ b/daemon/cluster/executor/container/controller.go @@ -142,7 +142,58 @@ func (r *controller) Start(ctx context.Context) error { return errors.Wrap(err, "starting container failed") } - return nil + // no health check + if ctnr.Config == nil || ctnr.Config.Healthcheck == nil { + return nil + } + + healthCmd := ctnr.Config.Healthcheck.Test + + if len(healthCmd) == 0 || healthCmd[0] == "NONE" { + return nil + } + + // wait for container to be healthy + eventq := r.adapter.events(ctx) + + var healthErr error + for { + select { + case event := <-eventq: + if !r.matchevent(event) { + continue + } + + switch event.Action { + case "die": // exit on terminal events + ctnr, err := r.adapter.inspect(ctx) + if err != nil { + return errors.Wrap(err, "die event received") + } else if ctnr.State.ExitCode != 0 { + return &exitError{code: ctnr.State.ExitCode, cause: healthErr} + } + + return nil + case "destroy": + // If we get here, something has gone wrong but we want to exit + // and report anyways. + return ErrContainerDestroyed + case "health_status: unhealthy": + // in this case, we stop the container and report unhealthy status + if err := r.Shutdown(ctx); err != nil { + return errors.Wrap(err, "unhealthy container shutdown failed") + } + // set health check error, and wait for container to fully exit ("die" event) + healthErr = ErrContainerUnhealthy + case "health_status: healthy": + return nil + } + case <-ctx.Done(): + return ctx.Err() + case <-r.closed: + return r.err + } + } } // Wait on the container to exit. diff --git a/integration-cli/docker_cli_service_health_test.go b/integration-cli/docker_cli_service_health_test.go new file mode 100644 index 0000000000..fb32070827 --- /dev/null +++ b/integration-cli/docker_cli_service_health_test.go @@ -0,0 +1,191 @@ +// +build !windows + +package main + +import ( + "strconv" + "strings" + + "github.com/docker/docker/daemon/cluster/executor/container" + "github.com/docker/docker/pkg/integration/checker" + "github.com/docker/engine-api/types/swarm" + "github.com/go-check/check" +) + +// start a service, and then make its task unhealthy during running +// finally, unhealthy task should be detected and killed +func (s *DockerSwarmSuite) TestServiceHealthRun(c *check.C) { + testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows + + d := s.AddDaemon(c, true, true) + + // build image with health-check + // note: use `daemon.buildImageWithOut` to build, do not use `buildImage` to build + imageName := "testhealth" + _, _, err := d.buildImageWithOut(imageName, + `FROM busybox + RUN touch /status + HEALTHCHECK --interval=1s --timeout=1s --retries=1\ + CMD cat /status`, + true) + c.Check(err, check.IsNil) + + serviceName := "healthServiceRun" + out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top") + c.Assert(err, checker.IsNil, check.Commentf(out)) + id := strings.TrimSpace(out) + + var tasks []swarm.Task + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + tasks = d.getServiceTasks(c, id) + return tasks, nil + }, checker.HasLen, 1) + + task := tasks[0] + + // wait for task to start + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateStarting) + containerID := task.Status.ContainerStatus.ContainerID + + // wait for container to be healthy + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID) + return strings.TrimSpace(out), nil + }, checker.Equals, "healthy") + + // make it fail + d.Cmd("exec", containerID, "rm", "/status") + // wait for container to be unhealthy + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID) + return strings.TrimSpace(out), nil + }, checker.Equals, "unhealthy") + + // Task should be terminated + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateFailed) + + if !strings.Contains(task.Status.Err, container.ErrContainerUnhealthy.Error()) { + c.Fatal("unhealthy task exits because of other error") + } +} + +// start a service whose task is unhealthy at beginning +// its tasks should be blocked in starting stage, until health check is passed +func (s *DockerSwarmSuite) TestServiceHealthStart(c *check.C) { + testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows + + d := s.AddDaemon(c, true, true) + + // service started from this image won't pass health check + imageName := "testhealth" + _, _, err := d.buildImageWithOut(imageName, + `FROM busybox + HEALTHCHECK --interval=1s --timeout=1s --retries=1024\ + CMD cat /status`, + true) + c.Check(err, check.IsNil) + + serviceName := "healthServiceStart" + out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top") + c.Assert(err, checker.IsNil, check.Commentf(out)) + id := strings.TrimSpace(out) + + var tasks []swarm.Task + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + tasks = d.getServiceTasks(c, id) + return tasks, nil + }, checker.HasLen, 1) + + task := tasks[0] + + // wait for task to start + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateStarting) + + containerID := task.Status.ContainerStatus.ContainerID + + // wait for health check to work + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID) + failingStreak, _ := strconv.Atoi(strings.TrimSpace(out)) + return failingStreak, nil + }, checker.GreaterThan, 0) + + // task should be blocked at starting status + task = d.getTask(c, task.ID) + c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting) + + // make it healthy + d.Cmd("exec", containerID, "touch", "/status") + + // Task should be at running status + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateRunning) +} + +// start a service whose task is unhealthy at beginning +// its tasks should be blocked in starting stage, until health check is passed +func (s *DockerSwarmSuite) TestServiceHealthUpdate(c *check.C) { + testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows + + d := s.AddDaemon(c, true, true) + + // service started from this image won't pass health check + imageName := "testhealth" + _, _, err := d.buildImageWithOut(imageName, + `FROM busybox + HEALTHCHECK --interval=1s --timeout=1s --retries=1024\ + CMD cat /status`, + true) + c.Check(err, check.IsNil) + + serviceName := "healthServiceStart" + out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top") + c.Assert(err, checker.IsNil, check.Commentf(out)) + id := strings.TrimSpace(out) + + var tasks []swarm.Task + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + tasks = d.getServiceTasks(c, id) + return tasks, nil + }, checker.HasLen, 1) + + task := tasks[0] + + // wait for task to start + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateStarting) + + containerID := task.Status.ContainerStatus.ContainerID + + // wait for health check to work + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID) + failingStreak, _ := strconv.Atoi(strings.TrimSpace(out)) + return failingStreak, nil + }, checker.GreaterThan, 0) + + // task should be blocked at starting status + task = d.getTask(c, task.ID) + c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting) + + // make it healthy + d.Cmd("exec", containerID, "touch", "/status") + // Task should be at running status + waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + task = d.getTask(c, task.ID) + return task.Status.State, nil + }, checker.Equals, swarm.TaskStateRunning) +}