diff --git a/vendor.conf b/vendor.conf index ded40b8994..e1ff0b4bb7 100644 --- a/vendor.conf +++ b/vendor.conf @@ -125,7 +125,7 @@ github.com/containerd/ttrpc 94dde388801693c54f88a6596f713b51a8b30b2d github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef # cluster -github.com/docker/swarmkit 199cf49cd99690135d99e52a1907ec82e8113c4f +github.com/docker/swarmkit 68266392a176434d282760d2d6d0ab4c68edcae6 github.com/gogo/protobuf v1.0.0 github.com/cloudflare/cfssl 1.3.2 github.com/fernet/fernet-go 1b2437bc582b3cfbb341ee5a29f8ef5b42912ff2 diff --git a/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go b/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go index 407ced0b69..d783fd4c35 100644 --- a/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go +++ b/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go @@ -293,9 +293,16 @@ func (d *Dispatcher) Run(ctx context.Context) error { publishManagers(ev.([]*api.Peer)) case <-d.processUpdatesTrigger: d.processUpdates(ctx) + batchTimer.Stop() + // drain the timer, if it has already expired + select { + case <-batchTimer.C: + default: + } batchTimer.Reset(maxBatchInterval) case <-batchTimer.C: d.processUpdates(ctx) + // batch timer has already expired, so no need to drain batchTimer.Reset(maxBatchInterval) case v := <-configWatcher: cluster := v.(api.EventUpdateCluster) @@ -416,7 +423,7 @@ func (d *Dispatcher) markNodesUnknown(ctx context.Context) error { expireFunc := func() { log := log.WithField("node", nodeID) - log.Info(`heartbeat expiration for node %s in state "unknown"`, nodeID) + log.Infof(`heartbeat expiration for node %s in state "unknown"`, nodeID) if err := d.markNodeNotReady(nodeID, api.NodeStatus_DOWN, `heartbeat failure for node in "unknown" state`); err != nil { log.WithError(err).Error(`failed deregistering node after heartbeat expiration for node in "unknown" state`) } @@ -537,7 +544,7 @@ func (d *Dispatcher) register(ctx context.Context, nodeID string, description *a } expireFunc := func() { - log.G(ctx).Debug("heartbeat expiration for worker %s, setting worker status to NodeStatus_DOWN ", nodeID) + log.G(ctx).Debugf("heartbeat expiration for worker %s, setting worker status to NodeStatus_DOWN ", nodeID) if err := d.markNodeNotReady(nodeID, api.NodeStatus_DOWN, "heartbeat failure"); err != nil { log.G(ctx).WithError(err).Errorf("failed deregistering node after heartbeat expiration") } diff --git a/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go b/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go index 9ee0b9e5c9..9e708ed1b6 100644 --- a/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go +++ b/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go @@ -702,9 +702,20 @@ func (s *Scheduler) scheduleNTasksOnNodes(ctx context.Context, n int, taskGroup return tasksScheduled } +// noSuitableNode checks unassigned tasks and make sure they have an existing service in the store before +// updating the task status and adding it back to: schedulingDecisions, unassignedTasks and allTasks func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) { explanation := s.pipeline.Explain() for _, t := range taskGroup { + var service *api.Service + s.store.View(func(tx store.ReadTx) { + service = store.GetService(tx, t.ServiceID) + }) + if service == nil { + log.G(ctx).WithField("task.id", t.ID).Debug("removing task from the scheduler") + continue + } + log.G(ctx).WithField("task.id", t.ID).Debug("no suitable node available for task") newT := *t diff --git a/vendor/github.com/docker/swarmkit/node/node.go b/vendor/github.com/docker/swarmkit/node/node.go index 98283f936f..83187cc75b 100644 --- a/vendor/github.com/docker/swarmkit/node/node.go +++ b/vendor/github.com/docker/swarmkit/node/node.go @@ -1136,6 +1136,11 @@ func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.Security // re-promoted. In this case, we must assume we were // re-promoted, and restart the manager. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal") + + // We can safely reset this timer without stopping/draining the timer + // first because the only way the code has reached this point is if the timer + // has already expired - if the role changed or the context were canceled, + // then we would have returned already. timer.Reset(roleChangeTimeout) renewer.Renew()