1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Retry creating dynamic networks if not found

In cases there are failures in task start, swarmkit might be trying to
restart the task again in the same node which might keep failing. This
creates a race where when a failed task is getting removed it might
remove the associated network while another task for the same service
or a different service but connected to the same network is proceeding
with starting the container knowing that the network is still
present. Fix this by reacting to `ErrNoSuchNetwork` error during
container start by trying to recreate the managed networks. If they
have been removed it will be recreated. If they are already present
nothing bad will happen.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
Jana Radhakrishnan 2016-08-09 13:37:11 -07:00
parent eb28dde01f
commit 117cef5e97

View file

@ -7,6 +7,7 @@ import (
executorpkg "github.com/docker/docker/daemon/cluster/executor"
"github.com/docker/engine-api/types"
"github.com/docker/engine-api/types/events"
"github.com/docker/libnetwork"
"github.com/docker/swarmkit/agent/exec"
"github.com/docker/swarmkit/api"
"github.com/docker/swarmkit/log"
@ -163,8 +164,23 @@ func (r *controller) Start(ctx context.Context) error {
return exec.ErrTaskStarted
}
if err := r.adapter.start(ctx); err != nil {
return errors.Wrap(err, "starting container failed")
for {
if err := r.adapter.start(ctx); err != nil {
if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
// Retry network creation again if we
// failed because some of the networks
// were not found.
if err := r.adapter.createNetworks(ctx); err != nil {
return err
}
continue
}
return errors.Wrap(err, "starting container failed")
}
break
}
// no health check