mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
extend health check to start service
Signed-off-by: runshenzhu <runshen.zhu@gmail.com> Signed-off-by: Runshen Zhu <runshen.zhu@gmail.com>
This commit is contained in:
parent
c9e7390115
commit
a99db84b4a
2 changed files with 243 additions and 1 deletions
|
@ -142,7 +142,58 @@ func (r *controller) Start(ctx context.Context) error {
|
||||||
return errors.Wrap(err, "starting container failed")
|
return errors.Wrap(err, "starting container failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
// no health check
|
||||||
|
if ctnr.Config == nil || ctnr.Config.Healthcheck == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
healthCmd := ctnr.Config.Healthcheck.Test
|
||||||
|
|
||||||
|
if len(healthCmd) == 0 || healthCmd[0] == "NONE" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for container to be healthy
|
||||||
|
eventq := r.adapter.events(ctx)
|
||||||
|
|
||||||
|
var healthErr error
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case event := <-eventq:
|
||||||
|
if !r.matchevent(event) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch event.Action {
|
||||||
|
case "die": // exit on terminal events
|
||||||
|
ctnr, err := r.adapter.inspect(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrap(err, "die event received")
|
||||||
|
} else if ctnr.State.ExitCode != 0 {
|
||||||
|
return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
case "destroy":
|
||||||
|
// If we get here, something has gone wrong but we want to exit
|
||||||
|
// and report anyways.
|
||||||
|
return ErrContainerDestroyed
|
||||||
|
case "health_status: unhealthy":
|
||||||
|
// in this case, we stop the container and report unhealthy status
|
||||||
|
if err := r.Shutdown(ctx); err != nil {
|
||||||
|
return errors.Wrap(err, "unhealthy container shutdown failed")
|
||||||
|
}
|
||||||
|
// set health check error, and wait for container to fully exit ("die" event)
|
||||||
|
healthErr = ErrContainerUnhealthy
|
||||||
|
case "health_status: healthy":
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-r.closed:
|
||||||
|
return r.err
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait on the container to exit.
|
// Wait on the container to exit.
|
||||||
|
|
191
integration-cli/docker_cli_service_health_test.go
Normal file
191
integration-cli/docker_cli_service_health_test.go
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/docker/docker/daemon/cluster/executor/container"
|
||||||
|
"github.com/docker/docker/pkg/integration/checker"
|
||||||
|
"github.com/docker/engine-api/types/swarm"
|
||||||
|
"github.com/go-check/check"
|
||||||
|
)
|
||||||
|
|
||||||
|
// start a service, and then make its task unhealthy during running
|
||||||
|
// finally, unhealthy task should be detected and killed
|
||||||
|
func (s *DockerSwarmSuite) TestServiceHealthRun(c *check.C) {
|
||||||
|
testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
|
||||||
|
|
||||||
|
d := s.AddDaemon(c, true, true)
|
||||||
|
|
||||||
|
// build image with health-check
|
||||||
|
// note: use `daemon.buildImageWithOut` to build, do not use `buildImage` to build
|
||||||
|
imageName := "testhealth"
|
||||||
|
_, _, err := d.buildImageWithOut(imageName,
|
||||||
|
`FROM busybox
|
||||||
|
RUN touch /status
|
||||||
|
HEALTHCHECK --interval=1s --timeout=1s --retries=1\
|
||||||
|
CMD cat /status`,
|
||||||
|
true)
|
||||||
|
c.Check(err, check.IsNil)
|
||||||
|
|
||||||
|
serviceName := "healthServiceRun"
|
||||||
|
out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf(out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
var tasks []swarm.Task
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
tasks = d.getServiceTasks(c, id)
|
||||||
|
return tasks, nil
|
||||||
|
}, checker.HasLen, 1)
|
||||||
|
|
||||||
|
task := tasks[0]
|
||||||
|
|
||||||
|
// wait for task to start
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateStarting)
|
||||||
|
containerID := task.Status.ContainerStatus.ContainerID
|
||||||
|
|
||||||
|
// wait for container to be healthy
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID)
|
||||||
|
return strings.TrimSpace(out), nil
|
||||||
|
}, checker.Equals, "healthy")
|
||||||
|
|
||||||
|
// make it fail
|
||||||
|
d.Cmd("exec", containerID, "rm", "/status")
|
||||||
|
// wait for container to be unhealthy
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
out, _ := d.Cmd("inspect", "--format={{.State.Health.Status}}", containerID)
|
||||||
|
return strings.TrimSpace(out), nil
|
||||||
|
}, checker.Equals, "unhealthy")
|
||||||
|
|
||||||
|
// Task should be terminated
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateFailed)
|
||||||
|
|
||||||
|
if !strings.Contains(task.Status.Err, container.ErrContainerUnhealthy.Error()) {
|
||||||
|
c.Fatal("unhealthy task exits because of other error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// start a service whose task is unhealthy at beginning
|
||||||
|
// its tasks should be blocked in starting stage, until health check is passed
|
||||||
|
func (s *DockerSwarmSuite) TestServiceHealthStart(c *check.C) {
|
||||||
|
testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
|
||||||
|
|
||||||
|
d := s.AddDaemon(c, true, true)
|
||||||
|
|
||||||
|
// service started from this image won't pass health check
|
||||||
|
imageName := "testhealth"
|
||||||
|
_, _, err := d.buildImageWithOut(imageName,
|
||||||
|
`FROM busybox
|
||||||
|
HEALTHCHECK --interval=1s --timeout=1s --retries=1024\
|
||||||
|
CMD cat /status`,
|
||||||
|
true)
|
||||||
|
c.Check(err, check.IsNil)
|
||||||
|
|
||||||
|
serviceName := "healthServiceStart"
|
||||||
|
out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf(out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
var tasks []swarm.Task
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
tasks = d.getServiceTasks(c, id)
|
||||||
|
return tasks, nil
|
||||||
|
}, checker.HasLen, 1)
|
||||||
|
|
||||||
|
task := tasks[0]
|
||||||
|
|
||||||
|
// wait for task to start
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateStarting)
|
||||||
|
|
||||||
|
containerID := task.Status.ContainerStatus.ContainerID
|
||||||
|
|
||||||
|
// wait for health check to work
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID)
|
||||||
|
failingStreak, _ := strconv.Atoi(strings.TrimSpace(out))
|
||||||
|
return failingStreak, nil
|
||||||
|
}, checker.GreaterThan, 0)
|
||||||
|
|
||||||
|
// task should be blocked at starting status
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting)
|
||||||
|
|
||||||
|
// make it healthy
|
||||||
|
d.Cmd("exec", containerID, "touch", "/status")
|
||||||
|
|
||||||
|
// Task should be at running status
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateRunning)
|
||||||
|
}
|
||||||
|
|
||||||
|
// start a service whose task is unhealthy at beginning
|
||||||
|
// its tasks should be blocked in starting stage, until health check is passed
|
||||||
|
func (s *DockerSwarmSuite) TestServiceHealthUpdate(c *check.C) {
|
||||||
|
testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
|
||||||
|
|
||||||
|
d := s.AddDaemon(c, true, true)
|
||||||
|
|
||||||
|
// service started from this image won't pass health check
|
||||||
|
imageName := "testhealth"
|
||||||
|
_, _, err := d.buildImageWithOut(imageName,
|
||||||
|
`FROM busybox
|
||||||
|
HEALTHCHECK --interval=1s --timeout=1s --retries=1024\
|
||||||
|
CMD cat /status`,
|
||||||
|
true)
|
||||||
|
c.Check(err, check.IsNil)
|
||||||
|
|
||||||
|
serviceName := "healthServiceStart"
|
||||||
|
out, err := d.Cmd("service", "create", "--name", serviceName, imageName, "top")
|
||||||
|
c.Assert(err, checker.IsNil, check.Commentf(out))
|
||||||
|
id := strings.TrimSpace(out)
|
||||||
|
|
||||||
|
var tasks []swarm.Task
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
tasks = d.getServiceTasks(c, id)
|
||||||
|
return tasks, nil
|
||||||
|
}, checker.HasLen, 1)
|
||||||
|
|
||||||
|
task := tasks[0]
|
||||||
|
|
||||||
|
// wait for task to start
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateStarting)
|
||||||
|
|
||||||
|
containerID := task.Status.ContainerStatus.ContainerID
|
||||||
|
|
||||||
|
// wait for health check to work
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
out, _ := d.Cmd("inspect", "--format={{.State.Health.FailingStreak}}", containerID)
|
||||||
|
failingStreak, _ := strconv.Atoi(strings.TrimSpace(out))
|
||||||
|
return failingStreak, nil
|
||||||
|
}, checker.GreaterThan, 0)
|
||||||
|
|
||||||
|
// task should be blocked at starting status
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
c.Assert(task.Status.State, check.Equals, swarm.TaskStateStarting)
|
||||||
|
|
||||||
|
// make it healthy
|
||||||
|
d.Cmd("exec", containerID, "touch", "/status")
|
||||||
|
// Task should be at running status
|
||||||
|
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
||||||
|
task = d.getTask(c, task.ID)
|
||||||
|
return task.Status.State, nil
|
||||||
|
}, checker.Equals, swarm.TaskStateRunning)
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue