Add support for user-defined healthchecks

This PR adds support for user-defined health-check probes for Docker
containers. It adds a `HEALTHCHECK` instruction to the Dockerfile syntax plus
some corresponding "docker run" options. It can be used with a restart policy
to automatically restart a container if the check fails.

The `HEALTHCHECK` instruction has two forms:

* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container)
* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image)

The `HEALTHCHECK` instruction tells Docker how to test a container to check that
it is still working. This can detect cases such as a web server that is stuck in
an infinite loop and unable to handle new connections, even though the server
process is still running.

When a container has a healthcheck specified, it has a _health status_ in
addition to its normal status. This status is initially `starting`. Whenever a
health check passes, it becomes `healthy` (whatever state it was previously in).
After a certain number of consecutive failures, it becomes `unhealthy`.

The options that can appear before `CMD` are:

* `--interval=DURATION` (default: `30s`)
* `--timeout=DURATION` (default: `30s`)
* `--retries=N` (default: `1`)

The health check will first run **interval** seconds after the container is
started, and then again **interval** seconds after each previous check completes.

If a single run of the check takes longer than **timeout** seconds then the check
is considered to have failed.

It takes **retries** consecutive failures of the health check for the container
to be considered `unhealthy`.

There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list
more than one then only the last `HEALTHCHECK` will take effect.

The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK
CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands;
see e.g. `ENTRYPOINT` for details).

The command's exit status indicates the health status of the container.
The possible values are:

- 0: success - the container is healthy and ready for use
- 1: unhealthy - the container is not working correctly
- 2: starting - the container is not ready for use yet, but is working correctly

If the probe returns 2 ("starting") when the container has already moved out of the
"starting" state then it is treated as "unhealthy" instead.

For example, to check every five minutes or so that a web-server is able to
serve the site's main page within three seconds:

    HEALTHCHECK --interval=5m --timeout=3s \
      CMD curl -f http://localhost/ || exit 1

To help debug failing probes, any output text (UTF-8 encoded) that the command writes
on stdout or stderr will be stored in the health status and can be queried with
`docker inspect`. Such output should be kept short (only the first 4096 bytes
are stored currently).

When the health status of a container changes, a `health_status` event is
generated with the new status. The health status is also displayed in the
`docker ps` output.

Signed-off-by: Thomas Leonard <thomas.leonard@docker.com>
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Thomas Leonard 2016-04-18 10:48:13 +01:00 committed by Sebastiaan van Stijn
parent d1b1b6a98e
commit b6c7becbfe
No known key found for this signature in database
GPG Key ID: 76698F39D527CE8C
27 changed files with 1170 additions and 80 deletions

View File

@ -17,7 +17,7 @@ type execBackend interface {
ContainerExecCreate(name string, config *types.ExecConfig) (string, error)
ContainerExecInspect(id string) (*backend.ExecInspect, error)
ContainerExecResize(name string, height, width int) error
ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error
ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error
ExecExists(name string) (bool, error)
}

View File

@ -106,7 +106,8 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
}
// Now run the user process in container.
if err := s.backend.ContainerExecStart(execName, stdin, stdout, stderr); err != nil {
// Maybe we should we pass ctx here if we're not detaching?
if err := s.backend.ContainerExecStart(context.Background(), execName, stdin, stdout, stderr); err != nil {
if execStartCheck.Detach {
return err
}

View File

@ -22,15 +22,16 @@ import (
)
var validCommitCommands = map[string]bool{
"cmd": true,
"entrypoint": true,
"env": true,
"expose": true,
"label": true,
"onbuild": true,
"user": true,
"volume": true,
"workdir": true,
"cmd": true,
"entrypoint": true,
"healthcheck": true,
"env": true,
"expose": true,
"label": true,
"onbuild": true,
"user": true,
"volume": true,
"workdir": true,
}
// BuiltinAllowedBuildArgs is list of built-in allowed build args

View File

@ -3,40 +3,42 @@ package command
// Define constants for the command strings
const (
Env = "env"
Label = "label"
Maintainer = "maintainer"
Add = "add"
Copy = "copy"
From = "from"
Onbuild = "onbuild"
Workdir = "workdir"
Run = "run"
Cmd = "cmd"
Entrypoint = "entrypoint"
Expose = "expose"
Volume = "volume"
User = "user"
StopSignal = "stopsignal"
Arg = "arg"
Env = "env"
Label = "label"
Maintainer = "maintainer"
Add = "add"
Copy = "copy"
From = "from"
Onbuild = "onbuild"
Workdir = "workdir"
Run = "run"
Cmd = "cmd"
Entrypoint = "entrypoint"
Expose = "expose"
Volume = "volume"
User = "user"
StopSignal = "stopsignal"
Arg = "arg"
Healthcheck = "healthcheck"
)
// Commands is list of all Dockerfile commands
var Commands = map[string]struct{}{
Env: {},
Label: {},
Maintainer: {},
Add: {},
Copy: {},
From: {},
Onbuild: {},
Workdir: {},
Run: {},
Cmd: {},
Entrypoint: {},
Expose: {},
Volume: {},
User: {},
StopSignal: {},
Arg: {},
Env: {},
Label: {},
Maintainer: {},
Add: {},
Copy: {},
From: {},
Onbuild: {},
Workdir: {},
Run: {},
Cmd: {},
Entrypoint: {},
Expose: {},
Volume: {},
User: {},
StopSignal: {},
Arg: {},
Healthcheck: {},
}

View File

@ -12,7 +12,9 @@ import (
"regexp"
"runtime"
"sort"
"strconv"
"strings"
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/api"
@ -426,6 +428,111 @@ func cmd(b *Builder, args []string, attributes map[string]bool, original string)
return nil
}
// parseOptInterval(flag) is the duration of flag.Value, or 0 if
// empty. An error is reported if the value is given and is not positive.
func parseOptInterval(f *Flag) (time.Duration, error) {
s := f.Value
if s == "" {
return 0, nil
}
d, err := time.ParseDuration(s)
if err != nil {
return 0, err
}
if d <= 0 {
return 0, fmt.Errorf("Interval %#v must be positive", f.name)
}
return d, nil
}
// HEALTHCHECK foo
//
// Set the default healthcheck command to run in the container (which may be empty).
// Argument handling is the same as RUN.
//
func healthcheck(b *Builder, args []string, attributes map[string]bool, original string) error {
if len(args) == 0 {
return fmt.Errorf("HEALTHCHECK requires an argument")
}
typ := strings.ToUpper(args[0])
args = args[1:]
if typ == "NONE" {
if len(args) != 0 {
return fmt.Errorf("HEALTHCHECK NONE takes no arguments")
}
test := strslice.StrSlice{typ}
b.runConfig.Healthcheck = &container.HealthConfig{
Test: test,
}
} else {
if b.runConfig.Healthcheck != nil {
oldCmd := b.runConfig.Healthcheck.Test
if len(oldCmd) > 0 && oldCmd[0] != "NONE" {
fmt.Fprintf(b.Stdout, "Note: overriding previous HEALTHCHECK: %v\n", oldCmd)
}
}
healthcheck := container.HealthConfig{}
flInterval := b.flags.AddString("interval", "")
flTimeout := b.flags.AddString("timeout", "")
flRetries := b.flags.AddString("retries", "")
if err := b.flags.Parse(); err != nil {
return err
}
switch typ {
case "CMD":
cmdSlice := handleJSONArgs(args, attributes)
if len(cmdSlice) == 0 {
return fmt.Errorf("Missing command after HEALTHCHECK CMD")
}
if !attributes["json"] {
typ = "CMD-SHELL"
}
healthcheck.Test = strslice.StrSlice(append([]string{typ}, cmdSlice...))
default:
return fmt.Errorf("Unknown type %#v in HEALTHCHECK (try CMD)", typ)
}
interval, err := parseOptInterval(flInterval)
if err != nil {
return err
}
healthcheck.Interval = interval
timeout, err := parseOptInterval(flTimeout)
if err != nil {
return err
}
healthcheck.Timeout = timeout
if flRetries.Value != "" {
retries, err := strconv.ParseInt(flRetries.Value, 10, 32)
if err != nil {
return err
}
if retries < 1 {
return fmt.Errorf("--retries must be at least 1 (not %d)", retries)
}
healthcheck.Retries = int(retries)
} else {
healthcheck.Retries = 0
}
b.runConfig.Healthcheck = &healthcheck
}
if err := b.commit("", b.runConfig.Cmd, fmt.Sprintf("HEALTHCHECK %q", b.runConfig.Healthcheck)); err != nil {
return err
}
return nil
}
// ENTRYPOINT /usr/sbin/nginx
//
// Set the entrypoint (which defaults to sh -c on linux, or cmd /S /C on Windows) to

View File

@ -58,22 +58,23 @@ var evaluateTable map[string]func(*Builder, []string, map[string]bool, string) e
func init() {
evaluateTable = map[string]func(*Builder, []string, map[string]bool, string) error{
command.Env: env,
command.Label: label,
command.Maintainer: maintainer,
command.Add: add,
command.Copy: dispatchCopy, // copy() is a go builtin
command.From: from,
command.Onbuild: onbuild,
command.Workdir: workdir,
command.Run: run,
command.Cmd: cmd,
command.Entrypoint: entrypoint,
command.Expose: expose,
command.Volume: volume,
command.User: user,
command.StopSignal: stopSignal,
command.Arg: arg,
command.Env: env,
command.Label: label,
command.Maintainer: maintainer,
command.Add: add,
command.Copy: dispatchCopy, // copy() is a go builtin
command.From: from,
command.Onbuild: onbuild,
command.Workdir: workdir,
command.Run: run,
command.Cmd: cmd,
command.Entrypoint: entrypoint,
command.Expose: expose,
command.Volume: volume,
command.User: user,
command.StopSignal: stopSignal,
command.Arg: arg,
command.Healthcheck: healthcheck,
}
}

View File

@ -329,3 +329,32 @@ func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) {
return parseStringsWhitespaceDelimited(rest)
}
// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
func parseHealthConfig(rest string) (*Node, map[string]bool, error) {
// Find end of first argument
var sep int
for ; sep < len(rest); sep++ {
if unicode.IsSpace(rune(rest[sep])) {
break
}
}
next := sep
for ; next < len(rest); next++ {
if !unicode.IsSpace(rune(rest[next])) {
break
}
}
if sep == 0 {
return nil, nil, nil
}
typ := rest[:sep]
cmd, attrs, err := parseMaybeJSON(rest[next:])
if err != nil {
return nil, nil, err
}
return &Node{Value: typ, Next: cmd, Attributes: attrs}, nil, err
}

View File

@ -66,22 +66,23 @@ func init() {
// functions. Errors are propagated up by Parse() and the resulting AST can
// be incorporated directly into the existing AST as a next.
dispatch = map[string]func(string) (*Node, map[string]bool, error){
command.User: parseString,
command.Onbuild: parseSubCommand,
command.Workdir: parseString,
command.Env: parseEnv,
command.Label: parseLabel,
command.Maintainer: parseString,
command.From: parseString,
command.Add: parseMaybeJSONToList,
command.Copy: parseMaybeJSONToList,
command.Run: parseMaybeJSON,
command.Cmd: parseMaybeJSON,
command.Entrypoint: parseMaybeJSON,
command.Expose: parseStringsWhitespaceDelimited,
command.Volume: parseMaybeJSONToList,
command.StopSignal: parseString,
command.Arg: parseNameOrNameVal,
command.User: parseString,
command.Onbuild: parseSubCommand,
command.Workdir: parseString,
command.Env: parseEnv,
command.Label: parseLabel,
command.Maintainer: parseString,
command.From: parseString,
command.Add: parseMaybeJSONToList,
command.Copy: parseMaybeJSONToList,
command.Run: parseMaybeJSON,
command.Cmd: parseMaybeJSON,
command.Entrypoint: parseMaybeJSON,
command.Expose: parseStringsWhitespaceDelimited,
command.Volume: parseMaybeJSONToList,
command.StopSignal: parseString,
command.Arg: parseNameOrNameVal,
command.Healthcheck: parseHealthConfig,
}
}

View File

@ -0,0 +1,10 @@
FROM debian
ADD check.sh main.sh /app/
CMD /app/main.sh
HEALTHCHECK
HEALTHCHECK --interval=5s --timeout=3s --retries=1 \
CMD /app/check.sh --quiet
HEALTHCHECK CMD
HEALTHCHECK CMD a b
HEALTHCHECK --timeout=3s CMD ["foo"]
HEALTHCHECK CONNECT TCP 7000

View File

@ -0,0 +1,9 @@
(from "debian")
(add "check.sh" "main.sh" "/app/")
(cmd "/app/main.sh")
(healthcheck)
(healthcheck ["--interval=5s" "--timeout=3s" "--retries=1"] "CMD" "/app/check.sh --quiet")
(healthcheck "CMD")
(healthcheck "CMD" "a b")
(healthcheck ["--timeout=3s"] "CMD" "foo")
(healthcheck "CONNECT" "TCP 7000")

49
container/health.go Normal file
View File

@ -0,0 +1,49 @@
package container
import (
"github.com/Sirupsen/logrus"
"github.com/docker/engine-api/types"
)
// Health holds the current container health-check state
type Health struct {
types.Health
stop chan struct{} // Write struct{} to stop the monitor
}
// String returns a human-readable description of the health-check state
func (s *Health) String() string {
if s.stop == nil {
return "no healthcheck"
}
switch s.Status {
case types.Starting:
return "health: starting"
default: // Healthy and Unhealthy are clear on their own
return s.Status
}
}
// OpenMonitorChannel creates and returns a new monitor channel. If there already is one,
// it returns nil.
func (s *Health) OpenMonitorChannel() chan struct{} {
if s.stop == nil {
logrus.Debugf("OpenMonitorChannel")
s.stop = make(chan struct{})
return s.stop
}
return nil
}
// CloseMonitorChannel closes any existing monitor channel.
func (s *Health) CloseMonitorChannel() {
if s.stop != nil {
logrus.Debugf("CloseMonitorChannel: waiting for probe to stop")
// This channel does not buffer. Once the write succeeds, the monitor
// has read the stop request and will not make any further updates
// to c.State.Health.
s.stop <- struct{}{}
s.stop = nil
logrus.Debugf("CloseMonitorChannel done")
}
}

View File

@ -27,6 +27,7 @@ type State struct {
StartedAt time.Time
FinishedAt time.Time
waitChan chan struct{}
Health *Health
}
// NewState creates a default state object with a fresh channel for state changes.
@ -46,6 +47,9 @@ func (s *State) String() string {
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
}
if h := s.Health; h != nil {
return fmt.Sprintf("Up %s (%s)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)), h.String())
}
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
}

View File

@ -80,6 +80,25 @@ func merge(userConf, imageConf *containertypes.Config) error {
userConf.Entrypoint = imageConf.Entrypoint
}
}
if imageConf.Healthcheck != nil {
if userConf.Healthcheck == nil {
userConf.Healthcheck = imageConf.Healthcheck
} else {
if len(userConf.Healthcheck.Test) == 0 {
userConf.Healthcheck.Test = imageConf.Healthcheck.Test
}
if userConf.Healthcheck.Interval == 0 {
userConf.Healthcheck.Interval = imageConf.Healthcheck.Interval
}
if userConf.Healthcheck.Timeout == 0 {
userConf.Healthcheck.Timeout = imageConf.Healthcheck.Timeout
}
if userConf.Healthcheck.Retries == 0 {
userConf.Healthcheck.Retries = imageConf.Healthcheck.Retries
}
}
}
if userConf.WorkingDir == "" {
userConf.WorkingDir = imageConf.WorkingDir
}

View File

@ -14,11 +14,15 @@ import (
"github.com/docker/docker/errors"
"github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/signal"
"github.com/docker/docker/pkg/term"
"github.com/docker/engine-api/types"
"github.com/docker/engine-api/types/strslice"
)
// Seconds to wait after sending TERM before trying KILL
const termProcessTimeout = 10
func (d *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
container.ExecCommands.Add(config.ID, config)
@ -130,7 +134,8 @@ func (d *Daemon) ContainerExecCreate(name string, config *types.ExecConfig) (str
// ContainerExecStart starts a previously set up exec instance. The
// std streams are set up.
func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
// If ctx is cancelled, the process is terminated.
func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
var (
cStdin io.ReadCloser
cStdout, cStderr io.Writer
@ -197,15 +202,28 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
return nil
}
attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
attachErr := container.AttachStreams(ctx, ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
if err := d.containerd.AddProcess(c.ID, name, p); err != nil {
return err
}
err = <-attachErr
if err != nil {
return fmt.Errorf("attach failed with error: %v", err)
select {
case <-ctx.Done():
logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"]))
select {
case <-time.After(termProcessTimeout * time.Second):
logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"]))
case <-attachErr:
// TERM signal worked
}
return fmt.Errorf("context cancelled")
case err := <-attachErr:
if err != nil {
return fmt.Errorf("attach failed with error: %v", err)
}
}
return nil
}

314
daemon/health.go Normal file
View File

@ -0,0 +1,314 @@
package daemon
import (
"bytes"
"fmt"
"runtime"
"strings"
"time"
"golang.org/x/net/context"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/exec"
"github.com/docker/engine-api/types"
"github.com/docker/engine-api/types/strslice"
)
const (
// Longest healthcheck probe output message to store. Longer messages will be truncated.
maxOutputLen = 4096
// Default interval between probe runs (from the end of the first to the start of the second).
// Also the time before the first probe.
defaultProbeInterval = 30 * time.Second
// The maximum length of time a single probe run should take. If the probe takes longer
// than this, the check is considered to have failed.
defaultProbeTimeout = 30 * time.Second
// Shut down a container if it becomes Unhealthy.
defaultExitOnUnhealthy = true
// Maximum number of entries to record
maxLogEntries = 5
)
const (
// Exit status codes that can be returned by the probe command.
exitStatusHealthy = 0 // Container is healthy
exitStatusUnhealthy = 1 // Container is unhealthy
exitStatusStarting = 2 // Container needs more time to start
)
// probe implementations know how to run a particular type of probe.
type probe interface {
// Perform one run of the check. Returns the exit code and an optional
// short diagnostic string.
run(context.Context, *Daemon, *container.Container) (*types.HealthcheckResult, error)
}
// cmdProbe implements the "CMD" probe type.
type cmdProbe struct {
// Run the command with the system's default shell instead of execing it directly.
shell bool
}
// exec the healthcheck command in the container.
// Returns the exit code and probe output (if any)
func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Container) (*types.HealthcheckResult, error) {
cmdSlice := strslice.StrSlice(container.Config.Healthcheck.Test)[1:]
if p.shell {
if runtime.GOOS != "windows" {
cmdSlice = append([]string{"/bin/sh", "-c"}, cmdSlice...)
} else {
cmdSlice = append([]string{"cmd", "/S", "/C"}, cmdSlice...)
}
}
entrypoint, args := d.getEntrypointAndArgs(strslice.StrSlice{}, cmdSlice)
execConfig := exec.NewConfig()
execConfig.OpenStdin = false
execConfig.OpenStdout = true
execConfig.OpenStderr = true
execConfig.ContainerID = container.ID
execConfig.DetachKeys = []byte{}
execConfig.Entrypoint = entrypoint
execConfig.Args = args
execConfig.Tty = false
execConfig.Privileged = false
execConfig.User = container.Config.User
d.registerExecCommand(container, execConfig)
d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "))
output := &limitedBuffer{}
err := d.ContainerExecStart(ctx, execConfig.ID, nil, output, output)
if err != nil {
return nil, err
}
info, err := d.getExecConfig(execConfig.ID)
if err != nil {
return nil, err
}
if info.ExitCode == nil {
return nil, fmt.Errorf("Healthcheck has no exit code!")
}
// Note: Go's json package will handle invalid UTF-8 for us
out := output.String()
return &types.HealthcheckResult{
End: time.Now(),
ExitCode: *info.ExitCode,
Output: out,
}, nil
}
// Update the container's Status.Health struct based on the latest probe's result.
func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) {
c.Lock()
defer c.Unlock()
retries := c.Config.Healthcheck.Retries
if retries <= 0 {
retries = 1 // Default if unset or set to an invalid value
}
h := c.State.Health
oldStatus := h.Status
if len(h.Log) >= maxLogEntries {
h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
} else {
h.Log = append(h.Log, result)
}
if result.ExitCode == exitStatusHealthy {
h.FailingStreak = 0
h.Status = types.Healthy
} else if result.ExitCode == exitStatusStarting && c.State.Health.Status == types.Starting {
// The container is not ready yet. Remain in the starting state.
} else {
// Failure (incuding invalid exit code)
h.FailingStreak++
if c.State.Health.FailingStreak >= retries {
h.Status = types.Unhealthy
}
// Else we're starting or healthy. Stay in that state.
}
if oldStatus != h.Status {
d.LogContainerEvent(c, "health_status: "+h.Status)
}
}
// Run the container's monitoring thread until notified via "stop".
// There is never more than one monitor thread running per container at a time.
func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) {
probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout)
probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval)
for {
select {
case <-stop:
logrus.Debugf("Stop healthcheck monitoring (received while idle)")
return
case <-time.After(probeInterval):
logrus.Debugf("Running health check...")
startTime := time.Now()
ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout)
results := make(chan *types.HealthcheckResult)
go func() {
result, err := probe.run(ctx, d, c)
if err != nil {
logrus.Warnf("Health check error: %v", err)
results <- &types.HealthcheckResult{
ExitCode: -1,
Output: err.Error(),
Start: startTime,
End: time.Now(),
}
} else {
result.Start = startTime
logrus.Debugf("Health check done (exitCode=%d)", result.ExitCode)
results <- result
}
close(results)
}()
select {
case <-stop:
logrus.Debugf("Stop healthcheck monitoring (received while probing)")
// Stop timeout and kill probe, but don't wait for probe to exit.
cancelProbe()
return
case result := <-results:
handleProbeResult(d, c, result)
// Stop timeout
cancelProbe()
case <-ctx.Done():
logrus.Debugf("Health check taking too long")
handleProbeResult(d, c, &types.HealthcheckResult{
ExitCode: -1,
Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout),
Start: startTime,
End: time.Now(),
})
cancelProbe()
// Wait for probe to exit (it might take a while to respond to the TERM
// signal and we don't want dying probes to pile up).
<-results
}
}
}
}
// Get a suitable probe implementation for the container's healthcheck configuration.
func getProbe(c *container.Container) probe {
config := c.Config.Healthcheck
if config == nil || len(config.Test) == 0 {
return nil
}
switch config.Test[0] {
case "CMD":
return &cmdProbe{shell: false}
case "CMD-SHELL":
return &cmdProbe{shell: true}
default:
logrus.Warnf("Unknown healthcheck type '%s' (expected 'CMD')", config.Test[0])
return nil
}
}
// Ensure the health-check monitor is running or not, depending on the current
// state of the container.
// Called from monitor.go, with c locked.
func (d *Daemon) updateHealthMonitor(c *container.Container) {
h := c.State.Health
if h == nil {
return // No healthcheck configured
}
probe := getProbe(c)
wantRunning := c.Running && !c.Paused && probe != nil
if wantRunning {
if stop := h.OpenMonitorChannel(); stop != nil {
go monitor(d, c, stop, probe)
}
} else {
h.CloseMonitorChannel()
}
}
// Reset the health state for a newly-started, restarted or restored container.
// initHealthMonitor is called from monitor.go and we should never be running
// two instances at once.
// Called with c locked.
func (d *Daemon) initHealthMonitor(c *container.Container) {
if c.Config.Healthcheck == nil {
return
}
// This is needed in case we're auto-restarting
d.stopHealthchecks(c)
if c.State.Health == nil {
h := &container.Health{}
h.Status = types.Starting
h.FailingStreak = 0
c.State.Health = h
}
d.updateHealthMonitor(c)
}
// Called when the container is being stopped (whether because the health check is
// failing or for any other reason).
func (d *Daemon) stopHealthchecks(c *container.Container) {
h := c.State.Health
if h != nil {
h.CloseMonitorChannel()
}
}
// Buffer up to maxOutputLen bytes. Further data is discarded.
type limitedBuffer struct {
buf bytes.Buffer
truncated bool // indicates that data has been lost
}
// Append to limitedBuffer while there is room.
func (b *limitedBuffer) Write(data []byte) (int, error) {
bufLen := b.buf.Len()
dataLen := len(data)
keep := min(maxOutputLen-bufLen, dataLen)
if keep > 0 {
b.buf.Write(data[:keep])
}
if keep < dataLen {
b.truncated = true
}
return dataLen, nil
}
// The contents of the buffer, with "..." appended if it overflowed.
func (b *limitedBuffer) String() string {
out := b.buf.String()
if b.truncated {
out = out + "..."
}
return out
}
// If configuredValue is zero, use defaultValue instead.
func timeoutWithDefault(configuredValue time.Duration, defaultValue time.Duration) time.Duration {
if configuredValue == 0 {
return defaultValue
}
return configuredValue
}
func min(x, y int) int {
if x < y {
return x
}
return y
}

112
daemon/health_test.go Normal file
View File

@ -0,0 +1,112 @@
package daemon
import (
"testing"
"time"
"github.com/docker/docker/container"
"github.com/docker/docker/daemon/events"
"github.com/docker/engine-api/types"
containertypes "github.com/docker/engine-api/types/container"
eventtypes "github.com/docker/engine-api/types/events"
)
func reset(c *container.Container) {
c.State = &container.State{}
c.State.Health = &container.Health{}
c.State.Health.Status = types.Starting
}
func TestHealthStates(t *testing.T) {
e := events.New()
_, l, _ := e.Subscribe()
defer e.Evict(l)
expect := func(expected string) {
select {
case event := <-l:
ev := event.(eventtypes.Message)
if ev.Status != expected {
t.Errorf("Expecting event %#v, but got %#v\n", expected, ev.Status)
}
case <-time.After(1 * time.Second):
t.Errorf("Expecting event %#v, but got nothing\n", expected)
}
}
c := &container.Container{
CommonContainer: container.CommonContainer{
ID: "container_id",
Name: "container_name",
Config: &containertypes.Config{
Image: "image_name",
},
},
}
daemon := &Daemon{
EventsService: e,
}
c.Config.Healthcheck = &containertypes.HealthConfig{
Retries: 1,
}
reset(c)
handleResult := func(startTime time.Time, exitCode int) {
handleProbeResult(daemon, c, &types.HealthcheckResult{
Start: startTime,
End: startTime,
ExitCode: exitCode,
})
}
// starting -> failed -> success -> failed
handleResult(c.State.StartedAt.Add(1*time.Second), 1)
expect("health_status: unhealthy")
handleResult(c.State.StartedAt.Add(2*time.Second), 0)
expect("health_status: healthy")
handleResult(c.State.StartedAt.Add(3*time.Second), 1)
expect("health_status: unhealthy")
// starting -> starting -> starting ->
// healthy -> starting (invalid transition)
reset(c)
handleResult(c.State.StartedAt.Add(20*time.Second), 2)
handleResult(c.State.StartedAt.Add(40*time.Second), 2)
if c.State.Health.Status != types.Starting {
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
}
handleResult(c.State.StartedAt.Add(50*time.Second), 0)
expect("health_status: healthy")
handleResult(c.State.StartedAt.Add(60*time.Second), 2)
expect("health_status: unhealthy")
// Test retries
reset(c)
c.Config.Healthcheck.Retries = 3
handleResult(c.State.StartedAt.Add(20*time.Second), 1)
handleResult(c.State.StartedAt.Add(40*time.Second), 1)
if c.State.Health.Status != types.Starting {
t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
}
if c.State.Health.FailingStreak != 2 {
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
}
handleResult(c.State.StartedAt.Add(60*time.Second), 1)
expect("health_status: unhealthy")
handleResult(c.State.StartedAt.Add(80*time.Second), 0)
expect("health_status: healthy")
if c.State.Health.FailingStreak != 0 {
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
}
}

View File

@ -108,6 +108,15 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
hostConfig.Links = append(hostConfig.Links, fmt.Sprintf("%s:%s", child.Name, linkAlias))
}
var containerHealth *types.Health
if container.State.Health != nil {
containerHealth = &types.Health{
Status: container.State.Health.Status,
FailingStreak: container.State.Health.FailingStreak,
Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...),
}
}
containerState := &types.ContainerState{
Status: container.State.StateString(),
Running: container.State.Running,
@ -120,6 +129,7 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool)
Error: container.State.Error,
StartedAt: container.State.StartedAt.Format(time.RFC3339Nano),
FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano),
Health: containerHealth,
}
contJSONBase := &types.ContainerJSONBase{

View File

@ -25,6 +25,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
if runtime.GOOS == "windows" {
return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
}
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "oom")
case libcontainerd.StateExit:
c.Lock()
@ -35,6 +36,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
attributes := map[string]string{
"exitCode": strconv.Itoa(int(e.ExitCode)),
}
daemon.updateHealthMonitor(c)
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
// FIXME: here is race condition between two RUN instructions in Dockerfile
@ -54,6 +56,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
"exitCode": strconv.Itoa(int(e.ExitCode)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.updateHealthMonitor(c)
return c.ToDisk()
case libcontainerd.StateExitProcess:
c.Lock()
@ -74,18 +77,24 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
}
case libcontainerd.StateStart, libcontainerd.StateRestore:
// Container is already locked in this case
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
c.HasBeenManuallyStopped = false
if err := c.ToDisk(); err != nil {
c.Reset(false)
return err
}
daemon.initHealthMonitor(c)
daemon.LogContainerEvent(c, "start")
case libcontainerd.StatePause:
// Container is already locked in this case
c.Paused = true
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "pause")
case libcontainerd.StateResume:
// Container is already locked in this case
c.Paused = false
daemon.updateHealthMonitor(c)
daemon.LogContainerEvent(c, "unpause")
}

View File

@ -41,6 +41,8 @@ func (daemon *Daemon) containerStop(container *container.Container, seconds int)
return nil
}
daemon.stopHealthchecks(container)
stopSignal := container.StopSignal()
// 1. Send a stop signal
if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil {

View File

@ -1470,6 +1470,73 @@ The `STOPSIGNAL` instruction sets the system call signal that will be sent to th
This signal can be a valid unsigned number that matches a position in the kernel's syscall table, for instance 9,
or a signal name in the format SIGNAME, for instance SIGKILL.
## HEALTHCHECK
The `HEALTHCHECK` instruction has two forms:
* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container)
* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image)
The `HEALTHCHECK` instruction tells Docker how to test a container to check that
it is still working. This can detect cases such as a web server that is stuck in
an infinite loop and unable to handle new connections, even though the server
process is still running.
When a container has a healthcheck specified, it has a _health status_ in
addition to its normal status. This status is initially `starting`. Whenever a
health check passes, it becomes `healthy` (whatever state it was previously in).
After a certain number of consecutive failures, it becomes `unhealthy`.
The options that can appear before `CMD` are:
* `--interval=DURATION` (default: `30s`)
* `--timeout=DURATION` (default: `30s`)
* `--retries=N` (default: `1`)
The health check will first run **interval** seconds after the container is
started, and then again **interval** seconds after each previous check completes.
If a single run of the check takes longer than **timeout** seconds then the check
is considered to have failed.
It takes **retries** consecutive failures of the health check for the container
to be considered `unhealthy`.
There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list
more than one then only the last `HEALTHCHECK` will take effect.
The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK
CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands;
see e.g. `ENTRYPOINT` for details).
The command's exit status indicates the health status of the container.
The possible values are:
- 0: success - the container is healthy and ready for use
- 1: unhealthy - the container is not working correctly
- 2: starting - the container is not ready for use yet, but is working correctly
If the probe returns 2 ("starting") when the container has already moved out of the
"starting" state then it is treated as "unhealthy" instead.
For example, to check every five minutes or so that a web-server is able to
serve the site's main page within three seconds:
HEALTHCHECK --interval=5m --timeout=3s \
CMD curl -f http://localhost/ || exit 1
To help debug failing probes, any output text (UTF-8 encoded) that the command writes
on stdout or stderr will be stored in the health status and can be queried with
`docker inspect`. Such output should be kept short (only the first 4096 bytes
are stored currently).
When the health status of a container changes, a `health_status` event is
generated with the new status.
The `HEALTHCHECK` feature was added in Docker 1.12.
## Dockerfile examples
Below you can see some examples of Dockerfile syntax. If you're interested in

View File

@ -1250,6 +1250,7 @@ Dockerfile instruction and how the operator can override that setting.
#entrypoint-default-command-to-execute-at-runtime)
- [EXPOSE (Incoming Ports)](#expose-incoming-ports)
- [ENV (Environment Variables)](#env-environment-variables)
- [HEALTHCHECK](#healthcheck)
- [VOLUME (Shared Filesystems)](#volume-shared-filesystems)
- [USER](#user)
- [WORKDIR](#workdir)
@ -1398,6 +1399,65 @@ above, or already defined by the developer with a Dockerfile `ENV`:
Similarly the operator can set the **hostname** with `-h`.
### HEALTHCHECK
```
--health-cmd Command to run to check health
--health-interval Time between running the check
--health-retries Consecutive failures needed to report unhealthy
--health-timeout Maximum time to allow one check to run
--no-healthcheck Disable any container-specified HEALTHCHECK
```
Example:
$ docker run --name=test -d \
--health-cmd='stat /etc/passwd || exit 1' \
--health-interval=2s \
busybox sleep 1d
$ sleep 2; docker inspect --format='{{.State.Health.Status}}' test
healthy
$ docker exec test rm /etc/passwd
$ sleep 2; docker inspect --format='{{json .State.Health}}' test
{
"Status": "unhealthy",
"FailingStreak": 3,
"Log": [
{
"Start": "2016-05-25T17:22:04.635478668Z",
"End": "2016-05-25T17:22:04.7272552Z",
"ExitCode": 0,
"Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..."
},
{
"Start": "2016-05-25T17:22:06.732900633Z",
"End": "2016-05-25T17:22:06.822168935Z",
"ExitCode": 0,
"Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..."
},
{
"Start": "2016-05-25T17:22:08.823956535Z",
"End": "2016-05-25T17:22:08.897359124Z",
"ExitCode": 1,
"Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
},
{
"Start": "2016-05-25T17:22:10.898802931Z",
"End": "2016-05-25T17:22:10.969631866Z",
"ExitCode": 1,
"Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
},
{
"Start": "2016-05-25T17:22:12.971033523Z",
"End": "2016-05-25T17:22:13.082015516Z",
"ExitCode": 1,
"Output": "stat: can't stat '/etc/passwd': No such file or directory\n"
}
]
}
The health status is also displayed in the `docker ps` output.
### TMPFS (mount tmpfs filesystems)
```bash

View File

@ -0,0 +1,154 @@
package main
import (
"encoding/json"
"github.com/docker/docker/pkg/integration/checker"
"github.com/docker/engine-api/types"
"github.com/go-check/check"
"strconv"
"strings"
"time"
)
func waitForStatus(c *check.C, name string, prev string, expected string) {
prev = prev + "\n"
expected = expected + "\n"
for {
out, _ := dockerCmd(c, "inspect", "--format={{.State.Status}}", name)
if out == expected {
return
}
c.Check(out, checker.Equals, prev)
if out != prev {
return
}
time.Sleep(100 * time.Millisecond)
}
}
func waitForHealthStatus(c *check.C, name string, prev string, expected string) {
prev = prev + "\n"
expected = expected + "\n"
for {
out, _ := dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
if out == expected {
return
}
c.Check(out, checker.Equals, prev)
if out != prev {
return
}
time.Sleep(100 * time.Millisecond)
}
}
func getHealth(c *check.C, name string) *types.Health {
out, _ := dockerCmd(c, "inspect", "--format={{json .State.Health}}", name)
var health types.Health
err := json.Unmarshal([]byte(out), &health)
c.Check(err, checker.Equals, nil)
return &health
}
func (s *DockerSuite) TestHealth(c *check.C) {
testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows
imageName := "testhealth"
_, err := buildImage(imageName,
`FROM busybox
RUN echo OK > /status
CMD ["/bin/sleep", "120"]
STOPSIGNAL SIGKILL
HEALTHCHECK --interval=1s --timeout=30s \
CMD cat /status`,
true)
c.Check(err, check.IsNil)
// No health status before starting
name := "test_health"
dockerCmd(c, "create", "--name", name, imageName)
out, _ := dockerCmd(c, "ps", "-a", "--format={{.Status}}")
c.Check(out, checker.Equals, "Created\n")
// Inspect the options
out, _ = dockerCmd(c, "inspect",
"--format='timeout={{.Config.Healthcheck.Timeout}} "+
"interval={{.Config.Healthcheck.Interval}} "+
"retries={{.Config.Healthcheck.Retries}} "+
"test={{.Config.Healthcheck.Test}}'", name)
c.Check(out, checker.Equals, "timeout=30s interval=1s retries=0 test=[CMD-SHELL cat /status]\n")
// Start
dockerCmd(c, "start", name)
waitForHealthStatus(c, name, "starting", "healthy")
// Make it fail
dockerCmd(c, "exec", name, "rm", "/status")
waitForHealthStatus(c, name, "healthy", "unhealthy")
// Inspect the status
out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name)
c.Check(out, checker.Equals, "unhealthy\n")
// Make it healthy again
dockerCmd(c, "exec", name, "touch", "/status")
waitForHealthStatus(c, name, "unhealthy", "healthy")
// Remove container
dockerCmd(c, "rm", "-f", name)
// Disable the check from the CLI
out, _ = dockerCmd(c, "create", "--name=noh", "--no-healthcheck", imageName)
out, _ = dockerCmd(c, "inspect", "--format={{.Config.Healthcheck.Test}}", "noh")
c.Check(out, checker.Equals, "[NONE]\n")
dockerCmd(c, "rm", "noh")
// Disable the check with a new build
_, err = buildImage("no_healthcheck",
`FROM testhealth
HEALTHCHECK NONE`, true)
c.Check(err, check.IsNil)
out, _ = dockerCmd(c, "inspect", "--format={{.ContainerConfig.Healthcheck.Test}}", "no_healthcheck")
c.Check(out, checker.Equals, "[NONE]\n")
// Enable the checks from the CLI
_, _ = dockerCmd(c, "run", "-d", "--name=fatal_healthcheck",
"--health-interval=0.5s",
"--health-retries=3",
"--health-cmd=cat /status",
"no_healthcheck")
waitForHealthStatus(c, "fatal_healthcheck", "starting", "healthy")
health := getHealth(c, "fatal_healthcheck")
c.Check(health.Status, checker.Equals, "healthy")
c.Check(health.FailingStreak, checker.Equals, 0)
last := health.Log[len(health.Log)-1]
c.Check(last.ExitCode, checker.Equals, 0)
c.Check(last.Output, checker.Equals, "OK\n")
// Fail the check, which should now make it exit
dockerCmd(c, "exec", "fatal_healthcheck", "rm", "/status")
waitForStatus(c, "fatal_healthcheck", "running", "exited")
out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", "fatal_healthcheck")
c.Check(out, checker.Equals, "unhealthy\n")
failsStr, _ := dockerCmd(c, "inspect", "--format={{.State.Health.FailingStreak}}", "fatal_healthcheck")
fails, err := strconv.Atoi(strings.TrimSpace(failsStr))
c.Check(err, check.IsNil)
c.Check(fails >= 3, checker.Equals, true)
dockerCmd(c, "rm", "-f", "fatal_healthcheck")
// Check timeout
// Note: if the interval is too small, it seems that Docker spends all its time running health
// checks and never gets around to killing it.
_, _ = dockerCmd(c, "run", "-d", "--name=test",
"--health-interval=1s", "--health-cmd=sleep 5m", "--health-timeout=1ms", imageName)
waitForHealthStatus(c, "test", "starting", "unhealthy")
health = getHealth(c, "test")
last = health.Log[len(health.Log)-1]
c.Check(health.Status, checker.Equals, "unhealthy")
c.Check(last.ExitCode, checker.Equals, -1)
c.Check(last.Output, checker.Equals, "Health check exceeded timeout (1ms)")
dockerCmd(c, "rm", "-f", "test")
}

View File

@ -190,6 +190,17 @@ func (clnt *client) Signal(containerID string, sig int) error {
return err
}
func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
Id: containerID,
Pid: pid,
Signal: uint32(sig),
})
return err
}
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)

View File

@ -304,6 +304,25 @@ func (clnt *client) Signal(containerID string, sig int) error {
return nil
}
// While Linux has support for the full range of signals, signals aren't really implemented on Windows.
// We try to terminate the specified process whatever signal is requested.
func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
cont, err := clnt.getContainer(containerID)
if err != nil {
return err
}
for _, p := range cont.processes {
if p.friendlyName == processFriendlyName {
return hcsshim.TerminateProcessInComputeSystem(containerID, p.systemPid)
}
}
return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID)
}
// Resize handles a CLI event to resize an interactive docker run or docker exec
// window.
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {

View File

@ -34,6 +34,7 @@ type Backend interface {
type Client interface {
Create(containerID string, spec Spec, options ...CreateOption) error
Signal(containerID string, sig int) error
SignalProcess(containerID string, processFriendlyName string, sig int) error
AddProcess(containerID, processFriendlyName string, process Process) error
Resize(containerID, processFriendlyName string, width, height int) error
Pause(containerID string) error

View File

@ -100,6 +100,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
flStopSignal = cmd.String([]string{"-stop-signal"}, signal.DefaultStopSignal, fmt.Sprintf("Signal to stop a container, %v by default", signal.DefaultStopSignal))
flIsolation = cmd.String([]string{"-isolation"}, "", "Container isolation technology")
flShmSize = cmd.String([]string{"-shm-size"}, "", "Size of /dev/shm, default value is 64MB")
// Healthcheck
flNoHealthcheck = cmd.Bool([]string{"-no-healthcheck"}, false, "Disable any container-specified HEALTHCHECK")
flHealthCmd = cmd.String([]string{"-health-cmd"}, "", "Command to run to check health")
flHealthInterval = cmd.Duration([]string{"-health-interval"}, 0, "Time between running the check")
flHealthTimeout = cmd.Duration([]string{"-health-timeout"}, 0, "Maximum time to allow one check to run")
flHealthRetries = cmd.Int([]string{"-health-retries"}, 0, "Consecutive failures needed to report unhealthy")
)
cmd.Var(&flAttach, []string{"a", "-attach"}, "Attach to STDIN, STDOUT or STDERR")
@ -351,6 +357,39 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
return nil, nil, nil, cmd, err
}
// Healthcheck
var healthConfig *container.HealthConfig
haveHealthSettings := *flHealthCmd != "" ||
*flHealthInterval != 0 ||
*flHealthTimeout != 0 ||
*flHealthRetries != 0
if *flNoHealthcheck {
if haveHealthSettings {
return nil, nil, nil, cmd, fmt.Errorf("--no-healthcheck conflicts with --health-* options")
}
test := strslice.StrSlice{"NONE"}
healthConfig = &container.HealthConfig{Test: test}
} else if haveHealthSettings {
var probe strslice.StrSlice
if *flHealthCmd != "" {
args := []string{"CMD-SHELL", *flHealthCmd}
probe = strslice.StrSlice(args)
}
if *flHealthInterval < 0 {
return nil, nil, nil, cmd, fmt.Errorf("--health-interval cannot be negative")
}
if *flHealthTimeout < 0 {
return nil, nil, nil, cmd, fmt.Errorf("--health-timeout cannot be negative")
}
healthConfig = &container.HealthConfig{
Test: probe,
Interval: *flHealthInterval,
Timeout: *flHealthTimeout,
Retries: *flHealthRetries,
}
}
resources := container.Resources{
CgroupParent: *flCgroupParent,
Memory: flMemory,
@ -399,6 +438,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host
Entrypoint: entrypoint,
WorkingDir: *flWorkingDir,
Labels: ConvertKVStringsToMap(labels),
Healthcheck: healthConfig,
}
if cmd.IsSet("-stop-signal") {
config.StopSignal = *flStopSignal

View File

@ -9,6 +9,7 @@ import (
"runtime"
"strings"
"testing"
"time"
flag "github.com/docker/docker/pkg/mflag"
"github.com/docker/docker/runconfig"
@ -584,6 +585,45 @@ func TestParseRestartPolicy(t *testing.T) {
}
}
func TestParseHealth(t *testing.T) {
checkOk := func(args ...string) *container.HealthConfig {
config, _, _, _, err := parseRun(args)
if err != nil {
t.Fatalf("%#v: %v", args, err)
}
return config.Healthcheck
}
checkError := func(expected string, args ...string) {
config, _, _, _, err := parseRun(args)
if err == nil {
t.Fatalf("Expected error, but got %#v", config)
}
if err.Error() != expected {
t.Fatalf("Expected %#v, got %#v", expected, err)
}
}
health := checkOk("--no-healthcheck", "img", "cmd")
if health == nil || len(health.Test) != 1 || health.Test[0] != "NONE" {
t.Fatalf("--no-healthcheck failed: %#v", health)
}
health = checkOk("--health-cmd=/check.sh -q", "img", "cmd")
if len(health.Test) != 2 || health.Test[0] != "CMD-SHELL" || health.Test[1] != "/check.sh -q" {
t.Fatalf("--health-cmd: got %#v", health.Test)
}
if health.Timeout != 0 {
t.Fatalf("--health-cmd: timeout = %f", health.Timeout)
}
checkError("--no-healthcheck conflicts with --health-* options",
"--no-healthcheck", "--health-cmd=/check.sh -q", "img", "cmd")
health = checkOk("--health-timeout=2s", "--health-retries=3", "--health-interval=4.5s", "img", "cmd")
if health.Timeout != 2*time.Second || health.Retries != 3 || health.Interval != 4500*time.Millisecond {
t.Fatalf("--health-*: got %#v", health)
}
}
func TestParseLoggingOpts(t *testing.T) {
// logging opts ko
if _, _, _, _, err := parseRun([]string{"--log-driver=none", "--log-opt=anything", "img", "cmd"}); err == nil || err.Error() != "invalid logging opts for driver none" {