mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
![Flavio Crisciani](/assets/img/avatar_default.png)
The error type libnetwork.ErrNoSuchNetwork is used in the controller to retry the network creation as a managed network though the manager. The change of the type was breaking the logic causing the network to not being created anymore so that no new container on that network was able to be launched Added unit test Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
690 lines
17 KiB
Go
690 lines
17 KiB
Go
package container
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/docker/docker/api/types/events"
|
|
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
|
"github.com/docker/go-connections/nat"
|
|
"github.com/docker/libnetwork"
|
|
"github.com/docker/swarmkit/agent/exec"
|
|
"github.com/docker/swarmkit/api"
|
|
"github.com/docker/swarmkit/log"
|
|
gogotypes "github.com/gogo/protobuf/types"
|
|
"github.com/pkg/errors"
|
|
"golang.org/x/net/context"
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
const defaultGossipConvergeDelay = 2 * time.Second
|
|
|
|
// controller implements agent.Controller against docker's API.
|
|
//
|
|
// Most operations against docker's API are done through the container name,
|
|
// which is unique to the task.
|
|
type controller struct {
|
|
task *api.Task
|
|
adapter *containerAdapter
|
|
closed chan struct{}
|
|
err error
|
|
pulled chan struct{} // closed after pull
|
|
cancelPull func() // cancels pull context if not nil
|
|
pullErr error // pull error, only read after pulled closed
|
|
}
|
|
|
|
var _ exec.Controller = &controller{}
|
|
|
|
// NewController returns a docker exec runner for the provided task.
|
|
func newController(b executorpkg.Backend, task *api.Task, node *api.NodeDescription, dependencies exec.DependencyGetter) (*controller, error) {
|
|
adapter, err := newContainerAdapter(b, task, node, dependencies)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &controller{
|
|
task: task,
|
|
adapter: adapter,
|
|
closed: make(chan struct{}),
|
|
}, nil
|
|
}
|
|
|
|
func (r *controller) Task() (*api.Task, error) {
|
|
return r.task, nil
|
|
}
|
|
|
|
// ContainerStatus returns the container-specific status for the task.
|
|
func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
|
|
ctnr, err := r.adapter.inspect(ctx)
|
|
if err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
return parseContainerStatus(ctnr)
|
|
}
|
|
|
|
func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
|
|
ctnr, err := r.adapter.inspect(ctx)
|
|
if err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil, nil
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
|
|
return parsePortStatus(ctnr)
|
|
}
|
|
|
|
// Update tasks a recent task update and applies it to the container.
|
|
func (r *controller) Update(ctx context.Context, t *api.Task) error {
|
|
// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
|
|
// updates of metadata, such as labelling, as well as any other properties
|
|
// that make sense.
|
|
return nil
|
|
}
|
|
|
|
// Prepare creates a container and ensures the image is pulled.
|
|
//
|
|
// If the container has already be created, exec.ErrTaskPrepared is returned.
|
|
func (r *controller) Prepare(ctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure all the networks that the task needs are created.
|
|
if err := r.adapter.createNetworks(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Make sure all the volumes that the task needs are created.
|
|
if err := r.adapter.createVolumes(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
|
|
if r.pulled == nil {
|
|
// Fork the pull to a different context to allow pull to continue
|
|
// on re-entrant calls to Prepare. This ensures that Prepare can be
|
|
// idempotent and not incur the extra cost of pulling when
|
|
// cancelled on updates.
|
|
var pctx context.Context
|
|
|
|
r.pulled = make(chan struct{})
|
|
pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
|
|
|
|
go func() {
|
|
defer close(r.pulled)
|
|
r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled
|
|
}()
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-r.pulled:
|
|
if r.pullErr != nil {
|
|
// NOTE(stevvooe): We always try to pull the image to make sure we have
|
|
// the most up to date version. This will return an error, but we only
|
|
// log it. If the image truly doesn't exist, the create below will
|
|
// error out.
|
|
//
|
|
// This gives us some nice behavior where we use up to date versions of
|
|
// mutable tags, but will still run if the old image is available but a
|
|
// registry is down.
|
|
//
|
|
// If you don't want this behavior, lock down your image to an
|
|
// immutable tag or digest.
|
|
log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
|
|
}
|
|
}
|
|
}
|
|
if err := r.adapter.create(ctx); err != nil {
|
|
if isContainerCreateNameConflict(err) {
|
|
if _, err := r.adapter.inspect(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// container is already created. success!
|
|
return exec.ErrTaskPrepared
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Start the container. An error will be returned if the container is already started.
|
|
func (r *controller) Start(ctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
ctnr, err := r.adapter.inspect(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Detect whether the container has *ever* been started. If so, we don't
|
|
// issue the start.
|
|
//
|
|
// TODO(stevvooe): This is very racy. While reading inspect, another could
|
|
// start the process and we could end up starting it twice.
|
|
if ctnr.State.Status != "created" {
|
|
return exec.ErrTaskStarted
|
|
}
|
|
|
|
for {
|
|
if err := r.adapter.start(ctx); err != nil {
|
|
if _, ok := errors.Cause(err).(libnetwork.ErrNoSuchNetwork); ok {
|
|
// Retry network creation again if we
|
|
// failed because some of the networks
|
|
// were not found.
|
|
if err := r.adapter.createNetworks(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
return errors.Wrap(err, "starting container failed")
|
|
}
|
|
|
|
break
|
|
}
|
|
|
|
// no health check
|
|
if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" {
|
|
if err := r.adapter.activateServiceBinding(); err != nil {
|
|
log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name())
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// wait for container to be healthy
|
|
eventq := r.adapter.events(ctx)
|
|
|
|
var healthErr error
|
|
for {
|
|
select {
|
|
case event := <-eventq:
|
|
if !r.matchevent(event) {
|
|
continue
|
|
}
|
|
|
|
switch event.Action {
|
|
case "die": // exit on terminal events
|
|
ctnr, err := r.adapter.inspect(ctx)
|
|
if err != nil {
|
|
return errors.Wrap(err, "die event received")
|
|
} else if ctnr.State.ExitCode != 0 {
|
|
return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
|
|
}
|
|
|
|
return nil
|
|
case "destroy":
|
|
// If we get here, something has gone wrong but we want to exit
|
|
// and report anyways.
|
|
return ErrContainerDestroyed
|
|
case "health_status: unhealthy":
|
|
// in this case, we stop the container and report unhealthy status
|
|
if err := r.Shutdown(ctx); err != nil {
|
|
return errors.Wrap(err, "unhealthy container shutdown failed")
|
|
}
|
|
// set health check error, and wait for container to fully exit ("die" event)
|
|
healthErr = ErrContainerUnhealthy
|
|
case "health_status: healthy":
|
|
if err := r.adapter.activateServiceBinding(); err != nil {
|
|
log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name())
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-r.closed:
|
|
return r.err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Wait on the container to exit.
|
|
func (r *controller) Wait(pctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(pctx)
|
|
defer cancel()
|
|
|
|
healthErr := make(chan error, 1)
|
|
go func() {
|
|
ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
|
|
defer cancel()
|
|
if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
|
|
healthErr <- ErrContainerUnhealthy
|
|
if err := r.Shutdown(ectx); err != nil {
|
|
log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
|
|
}
|
|
}
|
|
}()
|
|
|
|
waitC, err := r.adapter.wait(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if status := <-waitC; status.ExitCode() != 0 {
|
|
exitErr := &exitError{
|
|
code: status.ExitCode(),
|
|
}
|
|
|
|
// Set the cause if it is knowable.
|
|
select {
|
|
case e := <-healthErr:
|
|
exitErr.cause = e
|
|
default:
|
|
if status.Err() != nil {
|
|
exitErr.cause = status.Err()
|
|
}
|
|
}
|
|
|
|
return exitErr
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *controller) hasServiceBinding() bool {
|
|
if r.task == nil {
|
|
return false
|
|
}
|
|
|
|
// service is attached to a network besides the default bridge
|
|
for _, na := range r.task.Networks {
|
|
if na.Network == nil ||
|
|
na.Network.DriverState == nil ||
|
|
na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" {
|
|
continue
|
|
}
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// Shutdown the container cleanly.
|
|
func (r *controller) Shutdown(ctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if r.cancelPull != nil {
|
|
r.cancelPull()
|
|
}
|
|
|
|
if r.hasServiceBinding() {
|
|
// remove container from service binding
|
|
if err := r.adapter.deactivateServiceBinding(); err != nil {
|
|
log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name())
|
|
// Don't return an error here, because failure to deactivate
|
|
// the service binding is expected if the container was never
|
|
// started.
|
|
}
|
|
|
|
// add a delay for gossip converge
|
|
// TODO(dongluochen): this delay should be configurable to fit different cluster size and network delay.
|
|
time.Sleep(defaultGossipConvergeDelay)
|
|
}
|
|
|
|
if err := r.adapter.shutdown(ctx); err != nil {
|
|
if isUnknownContainer(err) || isStoppedContainer(err) {
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Terminate the container, with force.
|
|
func (r *controller) Terminate(ctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if r.cancelPull != nil {
|
|
r.cancelPull()
|
|
}
|
|
|
|
if err := r.adapter.terminate(ctx); err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Remove the container and its resources.
|
|
func (r *controller) Remove(ctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if r.cancelPull != nil {
|
|
r.cancelPull()
|
|
}
|
|
|
|
// It may be necessary to shut down the task before removing it.
|
|
if err := r.Shutdown(ctx); err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil
|
|
}
|
|
// This may fail if the task was already shut down.
|
|
log.G(ctx).WithError(err).Debug("shutdown failed on removal")
|
|
}
|
|
|
|
// Try removing networks referenced in this task in case this
|
|
// task is the last one referencing it
|
|
if err := r.adapter.removeNetworks(ctx); err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
if err := r.adapter.remove(ctx); err != nil {
|
|
if isUnknownContainer(err) {
|
|
return nil
|
|
}
|
|
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// waitReady waits for a container to be "ready".
|
|
// Ready means it's past the started state.
|
|
func (r *controller) waitReady(pctx context.Context) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(pctx)
|
|
defer cancel()
|
|
|
|
eventq := r.adapter.events(ctx)
|
|
|
|
ctnr, err := r.adapter.inspect(ctx)
|
|
if err != nil {
|
|
if !isUnknownContainer(err) {
|
|
return errors.Wrap(err, "inspect container failed")
|
|
}
|
|
} else {
|
|
switch ctnr.State.Status {
|
|
case "running", "exited", "dead":
|
|
return nil
|
|
}
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case event := <-eventq:
|
|
if !r.matchevent(event) {
|
|
continue
|
|
}
|
|
|
|
switch event.Action {
|
|
case "start":
|
|
return nil
|
|
}
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-r.closed:
|
|
return r.err
|
|
}
|
|
}
|
|
}
|
|
|
|
func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
|
|
if err := r.checkClosed(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// if we're following, wait for this container to be ready. there is a
|
|
// problem here: if the container will never be ready (for example, it has
|
|
// been totally deleted) then this will wait forever. however, this doesn't
|
|
// actually cause any UI issues, and shouldn't be a problem. the stuck wait
|
|
// will go away when the follow (context) is canceled.
|
|
if options.Follow {
|
|
if err := r.waitReady(ctx); err != nil {
|
|
return errors.Wrap(err, "container not ready for logs")
|
|
}
|
|
}
|
|
// if we're not following, we're not gonna wait for the container to be
|
|
// ready. just call logs. if the container isn't ready, the call will fail
|
|
// and return an error. no big deal, we don't care, we only want the logs
|
|
// we can get RIGHT NOW with no follow
|
|
|
|
logsContext, cancel := context.WithCancel(ctx)
|
|
msgs, err := r.adapter.logs(logsContext, options)
|
|
defer cancel()
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed getting container logs")
|
|
}
|
|
|
|
var (
|
|
// use a rate limiter to keep things under control but also provides some
|
|
// ability coalesce messages.
|
|
limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
|
|
msgctx = api.LogContext{
|
|
NodeID: r.task.NodeID,
|
|
ServiceID: r.task.ServiceID,
|
|
TaskID: r.task.ID,
|
|
}
|
|
)
|
|
|
|
for {
|
|
msg, ok := <-msgs
|
|
if !ok {
|
|
// we're done here, no more messages
|
|
return nil
|
|
}
|
|
|
|
if msg.Err != nil {
|
|
// the defered cancel closes the adapter's log stream
|
|
return msg.Err
|
|
}
|
|
|
|
// wait here for the limiter to catch up
|
|
if err := limiter.WaitN(ctx, len(msg.Line)); err != nil {
|
|
return errors.Wrap(err, "failed rate limiter")
|
|
}
|
|
tsp, err := gogotypes.TimestampProto(msg.Timestamp)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to convert timestamp")
|
|
}
|
|
var stream api.LogStream
|
|
if msg.Source == "stdout" {
|
|
stream = api.LogStreamStdout
|
|
} else if msg.Source == "stderr" {
|
|
stream = api.LogStreamStderr
|
|
}
|
|
|
|
// parse the details out of the Attrs map
|
|
var attrs []api.LogAttr
|
|
if len(msg.Attrs) != 0 {
|
|
attrs = make([]api.LogAttr, 0, len(msg.Attrs))
|
|
for _, attr := range msg.Attrs {
|
|
attrs = append(attrs, api.LogAttr{Key: attr.Key, Value: attr.Value})
|
|
}
|
|
}
|
|
|
|
if err := publisher.Publish(ctx, api.LogMessage{
|
|
Context: msgctx,
|
|
Timestamp: tsp,
|
|
Stream: stream,
|
|
Attrs: attrs,
|
|
Data: msg.Line,
|
|
}); err != nil {
|
|
return errors.Wrap(err, "failed to publish log message")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close the runner and clean up any ephemeral resources.
|
|
func (r *controller) Close() error {
|
|
select {
|
|
case <-r.closed:
|
|
return r.err
|
|
default:
|
|
if r.cancelPull != nil {
|
|
r.cancelPull()
|
|
}
|
|
|
|
r.err = exec.ErrControllerClosed
|
|
close(r.closed)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *controller) matchevent(event events.Message) bool {
|
|
if event.Type != events.ContainerEventType {
|
|
return false
|
|
}
|
|
// we can't filter using id since it will have huge chances to introduce a deadlock. see #33377.
|
|
return event.Actor.Attributes["name"] == r.adapter.container.name()
|
|
}
|
|
|
|
func (r *controller) checkClosed() error {
|
|
select {
|
|
case <-r.closed:
|
|
return r.err
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
|
|
status := &api.ContainerStatus{
|
|
ContainerID: ctnr.ID,
|
|
PID: int32(ctnr.State.Pid),
|
|
ExitCode: int32(ctnr.State.ExitCode),
|
|
}
|
|
|
|
return status, nil
|
|
}
|
|
|
|
func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
|
|
status := &api.PortStatus{}
|
|
|
|
if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
|
|
exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
status.Ports = exposedPorts
|
|
}
|
|
|
|
return status, nil
|
|
}
|
|
|
|
func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
|
|
exposedPorts := make([]*api.PortConfig, 0, len(portMap))
|
|
|
|
for portProtocol, mapping := range portMap {
|
|
parts := strings.SplitN(string(portProtocol), "/", 2)
|
|
if len(parts) != 2 {
|
|
return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
|
|
}
|
|
|
|
port, err := strconv.ParseUint(parts[0], 10, 16)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
protocol := api.ProtocolTCP
|
|
switch strings.ToLower(parts[1]) {
|
|
case "tcp":
|
|
protocol = api.ProtocolTCP
|
|
case "udp":
|
|
protocol = api.ProtocolUDP
|
|
default:
|
|
return nil, fmt.Errorf("invalid protocol: %s", parts[1])
|
|
}
|
|
|
|
for _, binding := range mapping {
|
|
hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// TODO(aluzzardi): We're losing the port `name` here since
|
|
// there's no way to retrieve it back from the Engine.
|
|
exposedPorts = append(exposedPorts, &api.PortConfig{
|
|
PublishMode: api.PublishModeHost,
|
|
Protocol: protocol,
|
|
TargetPort: uint32(port),
|
|
PublishedPort: uint32(hostPort),
|
|
})
|
|
}
|
|
}
|
|
|
|
return exposedPorts, nil
|
|
}
|
|
|
|
type exitError struct {
|
|
code int
|
|
cause error
|
|
}
|
|
|
|
func (e *exitError) Error() string {
|
|
if e.cause != nil {
|
|
return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
|
|
}
|
|
|
|
return fmt.Sprintf("task: non-zero exit (%v)", e.code)
|
|
}
|
|
|
|
func (e *exitError) ExitCode() int {
|
|
return e.code
|
|
}
|
|
|
|
func (e *exitError) Cause() error {
|
|
return e.cause
|
|
}
|
|
|
|
// checkHealth blocks until unhealthy container is detected or ctx exits
|
|
func (r *controller) checkHealth(ctx context.Context) error {
|
|
eventq := r.adapter.events(ctx)
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case <-r.closed:
|
|
return nil
|
|
case event := <-eventq:
|
|
if !r.matchevent(event) {
|
|
continue
|
|
}
|
|
|
|
switch event.Action {
|
|
case "health_status: unhealthy":
|
|
return ErrContainerUnhealthy
|
|
}
|
|
}
|
|
}
|
|
}
|