add health check in docker cluster
Signed-off-by: runshenzhu <runshen.zhu@gmail.com>
This commit is contained in:
parent
6f052edcc0
commit
1ded1f26e1
|
@ -2,10 +2,13 @@ package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
|
"time"
|
||||||
|
|
||||||
clustertypes "github.com/docker/docker/daemon/cluster/provider"
|
clustertypes "github.com/docker/docker/daemon/cluster/provider"
|
||||||
"github.com/docker/engine-api/types"
|
"github.com/docker/engine-api/types"
|
||||||
"github.com/docker/engine-api/types/container"
|
"github.com/docker/engine-api/types/container"
|
||||||
|
"github.com/docker/engine-api/types/events"
|
||||||
|
"github.com/docker/engine-api/types/filters"
|
||||||
"github.com/docker/engine-api/types/network"
|
"github.com/docker/engine-api/types/network"
|
||||||
"github.com/docker/libnetwork/cluster"
|
"github.com/docker/libnetwork/cluster"
|
||||||
networktypes "github.com/docker/libnetwork/types"
|
networktypes "github.com/docker/libnetwork/types"
|
||||||
|
@ -33,4 +36,6 @@ type Backend interface {
|
||||||
SetNetworkBootstrapKeys([]*networktypes.EncryptionKey) error
|
SetNetworkBootstrapKeys([]*networktypes.EncryptionKey) error
|
||||||
SetClusterProvider(provider cluster.Provider)
|
SetClusterProvider(provider cluster.Provider)
|
||||||
IsSwarmCompatible() error
|
IsSwarmCompatible() error
|
||||||
|
SubscribeToEvents(since, until time.Time, filter filters.Args) ([]events.Message, chan interface{})
|
||||||
|
UnsubscribeFromEvents(listener chan interface{})
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,11 +7,13 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/Sirupsen/logrus"
|
"github.com/Sirupsen/logrus"
|
||||||
"github.com/docker/docker/api/server/httputils"
|
"github.com/docker/docker/api/server/httputils"
|
||||||
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
||||||
"github.com/docker/engine-api/types"
|
"github.com/docker/engine-api/types"
|
||||||
|
"github.com/docker/engine-api/types/events"
|
||||||
"github.com/docker/engine-api/types/versions"
|
"github.com/docker/engine-api/types/versions"
|
||||||
"github.com/docker/libnetwork"
|
"github.com/docker/libnetwork"
|
||||||
"github.com/docker/swarmkit/api"
|
"github.com/docker/swarmkit/api"
|
||||||
|
@ -168,9 +170,40 @@ func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, er
|
||||||
|
|
||||||
// events issues a call to the events API and returns a channel with all
|
// events issues a call to the events API and returns a channel with all
|
||||||
// events. The stream of events can be shutdown by cancelling the context.
|
// events. The stream of events can be shutdown by cancelling the context.
|
||||||
//
|
func (c *containerAdapter) events(ctx context.Context) <-chan events.Message {
|
||||||
// A chan struct{} is returned that will be closed if the event processing
|
log.G(ctx).Debugf("waiting on events")
|
||||||
// fails and needs to be restarted.
|
buffer, l := c.backend.SubscribeToEvents(time.Time{}, time.Time{}, c.container.eventFilter())
|
||||||
|
eventsq := make(chan events.Message, len(buffer))
|
||||||
|
|
||||||
|
for _, event := range buffer {
|
||||||
|
eventsq <- event
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer c.backend.UnsubscribeFromEvents(l)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case ev := <-l:
|
||||||
|
jev, ok := ev.(events.Message)
|
||||||
|
if !ok {
|
||||||
|
log.G(ctx).Warnf("unexpected event message: %q", ev)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case eventsq <- jev:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return eventsq
|
||||||
|
}
|
||||||
|
|
||||||
func (c *containerAdapter) wait(ctx context.Context) error {
|
func (c *containerAdapter) wait(ctx context.Context) error {
|
||||||
return c.backend.ContainerWaitWithContext(ctx, c.container.name())
|
return c.backend.ContainerWaitWithContext(ctx, c.container.name())
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,8 @@ import (
|
||||||
"github.com/docker/docker/reference"
|
"github.com/docker/docker/reference"
|
||||||
"github.com/docker/engine-api/types"
|
"github.com/docker/engine-api/types"
|
||||||
enginecontainer "github.com/docker/engine-api/types/container"
|
enginecontainer "github.com/docker/engine-api/types/container"
|
||||||
|
"github.com/docker/engine-api/types/events"
|
||||||
|
"github.com/docker/engine-api/types/filters"
|
||||||
"github.com/docker/engine-api/types/network"
|
"github.com/docker/engine-api/types/network"
|
||||||
"github.com/docker/swarmkit/agent/exec"
|
"github.com/docker/swarmkit/agent/exec"
|
||||||
"github.com/docker/swarmkit/api"
|
"github.com/docker/swarmkit/api"
|
||||||
|
@ -420,3 +422,11 @@ func (c *containerConfig) networkCreateRequest(name string) (clustertypes.Networ
|
||||||
|
|
||||||
return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
|
return clustertypes.NetworkCreateRequest{na.Network.ID, types.NetworkCreateRequest{Name: name, NetworkCreate: options}}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c containerConfig) eventFilter() filters.Args {
|
||||||
|
filter := filters.NewArgs()
|
||||||
|
filter.Add("type", events.ContainerEventType)
|
||||||
|
filter.Add("name", c.name())
|
||||||
|
filter.Add("label", fmt.Sprintf("%v.task.id=%v", systemLabelPrefix, c.task.ID))
|
||||||
|
return filter
|
||||||
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import (
|
||||||
|
|
||||||
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
||||||
"github.com/docker/engine-api/types"
|
"github.com/docker/engine-api/types"
|
||||||
|
"github.com/docker/engine-api/types/events"
|
||||||
"github.com/docker/swarmkit/agent/exec"
|
"github.com/docker/swarmkit/agent/exec"
|
||||||
"github.com/docker/swarmkit/api"
|
"github.com/docker/swarmkit/api"
|
||||||
"github.com/docker/swarmkit/log"
|
"github.com/docker/swarmkit/log"
|
||||||
|
@ -153,20 +154,39 @@ func (r *controller) Wait(pctx context.Context) error {
|
||||||
ctx, cancel := context.WithCancel(pctx)
|
ctx, cancel := context.WithCancel(pctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
healthErr := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
|
||||||
|
defer cancel()
|
||||||
|
if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
|
||||||
|
healthErr <- ErrContainerUnhealthy
|
||||||
|
if err := r.Shutdown(ectx); err != nil {
|
||||||
|
log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
err := r.adapter.wait(ctx)
|
err := r.adapter.wait(ctx)
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ee := &exitError{}
|
ee := &exitError{}
|
||||||
if err.Error() != "" {
|
|
||||||
ee.cause = err
|
|
||||||
}
|
|
||||||
if ec, ok := err.(exec.ExitCoder); ok {
|
if ec, ok := err.(exec.ExitCoder); ok {
|
||||||
ee.code = ec.ExitCode()
|
ee.code = ec.ExitCode()
|
||||||
}
|
}
|
||||||
|
select {
|
||||||
|
case e := <-healthErr:
|
||||||
|
ee.cause = e
|
||||||
|
default:
|
||||||
|
if err.Error() != "" {
|
||||||
|
ee.cause = err
|
||||||
|
}
|
||||||
|
}
|
||||||
return ee
|
return ee
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -250,6 +270,21 @@ func (r *controller) Close() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *controller) matchevent(event events.Message) bool {
|
||||||
|
if event.Type != events.ContainerEventType {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(stevvooe): Filter based on ID matching, in addition to name.
|
||||||
|
|
||||||
|
// Make sure the events are for this container.
|
||||||
|
if event.Actor.Attributes["name"] != r.adapter.container.name() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (r *controller) checkClosed() error {
|
func (r *controller) checkClosed() error {
|
||||||
select {
|
select {
|
||||||
case <-r.closed:
|
case <-r.closed:
|
||||||
|
@ -289,3 +324,26 @@ func (e *exitError) ExitCode() int {
|
||||||
func (e *exitError) Cause() error {
|
func (e *exitError) Cause() error {
|
||||||
return e.cause
|
return e.cause
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkHealth blocks until unhealthy container is detected or ctx exits
|
||||||
|
func (r *controller) checkHealth(ctx context.Context) error {
|
||||||
|
eventq := r.adapter.events(ctx)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil
|
||||||
|
case <-r.closed:
|
||||||
|
return nil
|
||||||
|
case event := <-eventq:
|
||||||
|
if !r.matchevent(event) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch event.Action {
|
||||||
|
case "health_status: unhealthy":
|
||||||
|
return ErrContainerUnhealthy
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -9,4 +9,7 @@ var (
|
||||||
// ErrContainerDestroyed returned when a container is prematurely destroyed
|
// ErrContainerDestroyed returned when a container is prematurely destroyed
|
||||||
// during a wait call.
|
// during a wait call.
|
||||||
ErrContainerDestroyed = fmt.Errorf("dockerexec: container destroyed")
|
ErrContainerDestroyed = fmt.Errorf("dockerexec: container destroyed")
|
||||||
|
|
||||||
|
// ErrContainerUnhealthy returned if controller detects the health check failure
|
||||||
|
ErrContainerUnhealthy = fmt.Errorf("dockerexec: unhealthy container")
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package container
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/docker/docker/container"
|
||||||
|
"github.com/docker/docker/daemon"
|
||||||
|
"github.com/docker/docker/daemon/events"
|
||||||
|
containertypes "github.com/docker/engine-api/types/container"
|
||||||
|
"github.com/docker/swarmkit/api"
|
||||||
|
"golang.org/x/net/context"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHealthStates(t *testing.T) {
|
||||||
|
|
||||||
|
// set up environment: events, task, container ....
|
||||||
|
e := events.New()
|
||||||
|
_, l, _ := e.Subscribe()
|
||||||
|
defer e.Evict(l)
|
||||||
|
|
||||||
|
task := &api.Task{
|
||||||
|
ID: "id",
|
||||||
|
ServiceID: "sid",
|
||||||
|
Spec: api.TaskSpec{
|
||||||
|
Runtime: &api.TaskSpec_Container{
|
||||||
|
Container: &api.ContainerSpec{
|
||||||
|
Image: "image_name",
|
||||||
|
Labels: map[string]string{
|
||||||
|
"com.docker.swarm.task.id": "id",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Annotations: api.Annotations{Name: "name"},
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &container.Container{
|
||||||
|
CommonContainer: container.CommonContainer{
|
||||||
|
ID: "id",
|
||||||
|
Name: "name",
|
||||||
|
Config: &containertypes.Config{
|
||||||
|
Image: "image_name",
|
||||||
|
Labels: map[string]string{
|
||||||
|
"com.docker.swarm.task.id": "id",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
daemon := &daemon.Daemon{
|
||||||
|
EventsService: e,
|
||||||
|
}
|
||||||
|
|
||||||
|
controller, err := newController(daemon, task)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create controller fail %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
errChan := make(chan error, 1)
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// fire checkHealth
|
||||||
|
go func() {
|
||||||
|
err := controller.checkHealth(ctx)
|
||||||
|
select {
|
||||||
|
case errChan <- err:
|
||||||
|
case <-ctx.Done():
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// send an event and expect to get expectedErr
|
||||||
|
// if expectedErr is nil, shouldn't get any error
|
||||||
|
logAndExpect := func(msg string, expectedErr error) {
|
||||||
|
daemon.LogContainerEvent(c, msg)
|
||||||
|
|
||||||
|
timer := time.NewTimer(1 * time.Second)
|
||||||
|
defer timer.Stop()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case err := <-errChan:
|
||||||
|
if err != expectedErr {
|
||||||
|
t.Fatalf("expect error %v, but get %v", expectedErr, err)
|
||||||
|
}
|
||||||
|
case <-timer.C:
|
||||||
|
if expectedErr != nil {
|
||||||
|
t.Fatalf("time limit exceeded, didn't get expected error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// events that are ignored by checkHealth
|
||||||
|
logAndExpect("health_status: running", nil)
|
||||||
|
logAndExpect("health_status: healthy", nil)
|
||||||
|
logAndExpect("die", nil)
|
||||||
|
|
||||||
|
// unhealthy event will be caught by checkHealth
|
||||||
|
logAndExpect("health_status: unhealthy", ErrContainerUnhealthy)
|
||||||
|
}
|
Loading…
Reference in New Issue