From 2a220f1f3d176adee684df83921c47d003f83e8e Mon Sep 17 00:00:00 2001 From: Adam Williams Date: Mon, 22 Feb 2021 10:32:08 -0800 Subject: [PATCH] Update Swarmkit to pick up fixes to heartbeat period and stalled tasks Signed-off-by: Adam Williams (cherry picked from commit cbd2f726bffc45a01d8737ae9a48b099691a09a4) Signed-off-by: Sebastiaan van Stijn --- vendor.conf | 2 +- .../docker/swarmkit/manager/manager.go | 11 +++++++- .../swarmkit/manager/scheduler/scheduler.go | 27 +++++++++++++++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/vendor.conf b/vendor.conf index b56df8947f..e507095c55 100644 --- a/vendor.conf +++ b/vendor.conf @@ -142,7 +142,7 @@ github.com/gogo/googleapis 01e0f9cca9b92166042241267ee2 github.com/cilium/ebpf 1c8d4c9ef7759622653a1d319284a44652333b28 # cluster -github.com/docker/swarmkit d6592ddefd8a5319aadff74c558b816b1a0b2590 +github.com/docker/swarmkit 17d8d4e4d8bdec33d386e6362d3537fa9493ba00 github.com/gogo/protobuf 5628607bb4c51c3157aacc3a50f0ab707582b805 # v1.3.1 github.com/golang/protobuf 84668698ea25b64748563aa20726db66a6b8d299 # v1.3.5 github.com/cloudflare/cfssl 5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2 diff --git a/vendor/github.com/docker/swarmkit/manager/manager.go b/vendor/github.com/docker/swarmkit/manager/manager.go index 27820eca84..6c31ef3a79 100644 --- a/vendor/github.com/docker/swarmkit/manager/manager.go +++ b/vendor/github.com/docker/swarmkit/manager/manager.go @@ -1049,7 +1049,16 @@ func (m *Manager) becomeLeader(ctx context.Context) { go func(d *dispatcher.Dispatcher) { // Initialize the dispatcher. - d.Init(m.raftNode, dispatcher.DefaultConfig(), drivers.New(m.config.PluginGetter), m.config.SecurityConfig) + var cluster *api.Cluster + s.View(func(tx store.ReadTx) { + cluster = store.GetCluster(tx, clusterID) + }) + var defaultConfig = dispatcher.DefaultConfig() + heartbeatPeriod, err := gogotypes.DurationFromProto(cluster.Spec.Dispatcher.HeartbeatPeriod) + if err == nil { + defaultConfig.HeartbeatPeriod = heartbeatPeriod + } + d.Init(m.raftNode, defaultConfig, drivers.New(m.config.PluginGetter), m.config.SecurityConfig) if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } diff --git a/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go b/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go index fda0089511..78dd7dc3fe 100644 --- a/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go +++ b/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go @@ -721,15 +721,32 @@ func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*ap newT := *t newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) - if explanation != "" { - newT.Status.Err = "no suitable node (" + explanation + ")" + sv := service.SpecVersion + tv := newT.SpecVersion + if sv != nil && tv != nil && sv.Index > tv.Index { + log.G(ctx).WithField("task.id", t.ID).Debug( + "task belongs to old revision of service", + ) + if t.Status.State == api.TaskStatePending && t.DesiredState >= api.TaskStateShutdown { + log.G(ctx).WithField("task.id", t.ID).Debug( + "task is desired shutdown, scheduler will go ahead and do so", + ) + newT.Status.State = api.TaskStateShutdown + newT.Status.Err = "" + } } else { - newT.Status.Err = "no suitable node" + if explanation != "" { + newT.Status.Err = "no suitable node (" + explanation + ")" + } else { + newT.Status.Err = "no suitable node" + } + + // re-enqueue a task that should still be attempted + s.enqueue(&newT) } + s.allTasks[t.ID] = &newT schedulingDecisions[t.ID] = schedulingDecision{old: t, new: &newT} - - s.enqueue(&newT) } }