1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request #30261 from aaronlehmann/rollout-mode

swarm: Add update/rollback order
This commit is contained in:
Brian Goff 2017-04-06 22:41:11 -04:00 committed by GitHub
commit 3de58eb2bc
10 changed files with 192 additions and 3 deletions

View file

@ -2296,6 +2296,12 @@ definitions:
description: "The fraction of tasks that may fail during an update before the failure action is invoked, specified as a floating point number between 0 and 1."
type: "number"
default: 0
Order:
description: "The order of operations when rolling out an updated task. Either the old task is shut down before the new task is started, or the new task is started before the old task is shut down."
type: "string"
enum:
- "stop-first"
- "start-first"
RollbackConfig:
description: "Specification for the rollback strategy of the service."
type: "object"
@ -2322,6 +2328,12 @@ definitions:
description: "The fraction of tasks that may fail during a rollback before the failure action is invoked, specified as a floating point number between 0 and 1."
type: "number"
default: 0
Order:
description: "The order of operations when rolling back a task. Either the old task is shut down before the new task is started, or the new task is started before the old task is shut down."
type: "string"
enum:
- "stop-first"
- "start-first"
Networks:
description: "Array of network names or IDs to attach the service to."
type: "array"

View file

@ -77,6 +77,11 @@ const (
UpdateFailureActionContinue = "continue"
// UpdateFailureActionRollback ROLLBACK
UpdateFailureActionRollback = "rollback"
// UpdateOrderStopFirst STOP_FIRST
UpdateOrderStopFirst = "stop-first"
// UpdateOrderStartFirst START_FIRST
UpdateOrderStartFirst = "start-first"
)
// UpdateConfig represents the update configuration.
@ -111,4 +116,9 @@ type UpdateConfig struct {
// If the failure action is PAUSE, no more tasks will be updated until
// another update is started.
MaxFailureRatio float32
// Order indicates the order of operations when rolling out an updated
// task. Either the old task is shut down before the new task is
// started, or the new task is started before the old task is shut down.
Order string
}

View file

@ -57,6 +57,7 @@ UpdateConfig:
Monitoring Period: {{ .UpdateMonitor }}
{{- end }}
Max failure ratio: {{ .UpdateMaxFailureRatio }}
Update order: {{ .UpdateOrder }}
{{- end }}
{{- if .HasRollbackConfig }}
RollbackConfig:
@ -69,6 +70,7 @@ RollbackConfig:
Monitoring Period: {{ .RollbackMonitor }}
{{- end }}
Max failure ratio: {{ .RollbackMaxFailureRatio }}
Rollback order: {{ .RollbackOrder }}
{{- end }}
ContainerSpec:
Image: {{ .ContainerImage }}
@ -260,6 +262,10 @@ func (ctx *serviceInspectContext) UpdateOnFailure() string {
return ctx.Service.Spec.UpdateConfig.FailureAction
}
func (ctx *serviceInspectContext) UpdateOrder() string {
return ctx.Service.Spec.UpdateConfig.Order
}
func (ctx *serviceInspectContext) HasUpdateMonitor() bool {
return ctx.Service.Spec.UpdateConfig.Monitor.Nanoseconds() > 0
}
@ -304,6 +310,10 @@ func (ctx *serviceInspectContext) RollbackMaxFailureRatio() float32 {
return ctx.Service.Spec.RollbackConfig.MaxFailureRatio
}
func (ctx *serviceInspectContext) RollbackOrder() string {
return ctx.Service.Spec.RollbackConfig.Order
}
func (ctx *serviceInspectContext) ContainerImage() string {
return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
}

View file

@ -188,6 +188,7 @@ type updateOptions struct {
monitor time.Duration
onFailure string
maxFailureRatio floatValue
order string
}
func (opts updateOptions) config() *swarm.UpdateConfig {
@ -197,6 +198,7 @@ func (opts updateOptions) config() *swarm.UpdateConfig {
Monitor: opts.monitor,
FailureAction: opts.onFailure,
MaxFailureRatio: opts.maxFailureRatio.Value(),
Order: opts.order,
}
}
@ -533,6 +535,8 @@ func addServiceFlags(flags *pflag.FlagSet, opts *serviceOptions) {
flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", `Action on update failure ("pause"|"continue"|"rollback")`)
flags.Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, "Failure rate to tolerate during an update")
flags.SetAnnotation(flagUpdateMaxFailureRatio, "version", []string{"1.25"})
flags.StringVar(&opts.update.order, flagUpdateOrder, "stop-first", `Update order ("start-first"|"stop-first")`)
flags.SetAnnotation(flagUpdateOrder, "version", []string{"1.29"})
flags.Uint64Var(&opts.rollback.parallelism, flagRollbackParallelism, 1, "Maximum number of tasks rolled back simultaneously (0 to roll back all at once)")
flags.SetAnnotation(flagRollbackParallelism, "version", []string{"1.28"})
@ -544,6 +548,8 @@ func addServiceFlags(flags *pflag.FlagSet, opts *serviceOptions) {
flags.SetAnnotation(flagRollbackFailureAction, "version", []string{"1.28"})
flags.Var(&opts.rollback.maxFailureRatio, flagRollbackMaxFailureRatio, "Failure rate to tolerate during a rollback")
flags.SetAnnotation(flagRollbackMaxFailureRatio, "version", []string{"1.28"})
flags.StringVar(&opts.rollback.order, flagRollbackOrder, "stop-first", `Rollback order ("start-first"|"stop-first")`)
flags.SetAnnotation(flagRollbackOrder, "version", []string{"1.29"})
flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "vip", "Endpoint mode (vip or dnsrr)")
@ -633,6 +639,7 @@ const (
flagRollbackFailureAction = "rollback-failure-action"
flagRollbackMaxFailureRatio = "rollback-max-failure-ratio"
flagRollbackMonitor = "rollback-monitor"
flagRollbackOrder = "rollback-order"
flagRollbackParallelism = "rollback-parallelism"
flagStopGracePeriod = "stop-grace-period"
flagStopSignal = "stop-signal"
@ -641,6 +648,7 @@ const (
flagUpdateFailureAction = "update-failure-action"
flagUpdateMaxFailureRatio = "update-max-failure-ratio"
flagUpdateMonitor = "update-monitor"
flagUpdateOrder = "update-order"
flagUpdateParallelism = "update-parallelism"
flagUser = "user"
flagWorkdir = "workdir"

View file

@ -320,7 +320,7 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
return err
}
if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio) {
if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio, flagUpdateOrder) {
if spec.UpdateConfig == nil {
spec.UpdateConfig = &swarm.UpdateConfig{}
}
@ -329,9 +329,10 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
updateDuration(flagUpdateMonitor, &spec.UpdateConfig.Monitor)
updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
updateFloatValue(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
updateString(flagUpdateOrder, &spec.UpdateConfig.Order)
}
if anyChanged(flags, flagRollbackParallelism, flagRollbackDelay, flagRollbackMonitor, flagRollbackFailureAction, flagRollbackMaxFailureRatio) {
if anyChanged(flags, flagRollbackParallelism, flagRollbackDelay, flagRollbackMonitor, flagRollbackFailureAction, flagRollbackMaxFailureRatio, flagRollbackOrder) {
if spec.RollbackConfig == nil {
spec.RollbackConfig = &swarm.UpdateConfig{}
}
@ -340,6 +341,7 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
updateDuration(flagRollbackMonitor, &spec.RollbackConfig.Monitor)
updateString(flagRollbackFailureAction, &spec.RollbackConfig.FailureAction)
updateFloatValue(flagRollbackMaxFailureRatio, &spec.RollbackConfig.MaxFailureRatio)
updateString(flagRollbackOrder, &spec.RollbackConfig.Order)
}
if flags.Changed(flagEndpointMode) {

View file

@ -393,6 +393,13 @@ func updateConfigFromGRPC(updateConfig *swarmapi.UpdateConfig) *types.UpdateConf
converted.FailureAction = types.UpdateFailureActionRollback
}
switch updateConfig.Order {
case swarmapi.UpdateConfig_STOP_FIRST:
converted.Order = types.UpdateOrderStopFirst
case swarmapi.UpdateConfig_START_FIRST:
converted.Order = types.UpdateOrderStartFirst
}
return converted
}
@ -415,12 +422,21 @@ func updateConfigToGRPC(updateConfig *types.UpdateConfig) (*swarmapi.UpdateConfi
case types.UpdateFailureActionRollback:
converted.FailureAction = swarmapi.UpdateConfig_ROLLBACK
default:
return nil, fmt.Errorf("unrecongized update failure action %s", updateConfig.FailureAction)
return nil, fmt.Errorf("unrecognized update failure action %s", updateConfig.FailureAction)
}
if updateConfig.Monitor != 0 {
converted.Monitor = gogotypes.DurationProto(updateConfig.Monitor)
}
switch updateConfig.Order {
case types.UpdateOrderStopFirst, "":
converted.Order = swarmapi.UpdateConfig_STOP_FIRST
case types.UpdateOrderStartFirst:
converted.Order = swarmapi.UpdateConfig_START_FIRST
default:
return nil, fmt.Errorf("unrecognized update order %s", updateConfig.Order)
}
return converted, nil
}

View file

@ -65,6 +65,7 @@ Options:
--rollback-max-failure-ratio float Failure rate to tolerate during a rollback
--rollback-monitor duration Duration after each task rollback to monitor for failure
(ns|us|ms|s|m|h) (default 0s)
--rollback-order string Rollback order ("start-first"|"stop-first") (default "stop-first")
--rollback-parallelism uint Maximum number of tasks rolled back simultaneously (0 to roll
back all at once) (default 1)
--secret secret Specify secrets to expose to the service
@ -75,6 +76,7 @@ Options:
--update-failure-action string Action on update failure ("pause"|"continue"|"rollback") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h)
--update-order string Update order ("start-first"|"stop-first") (default "stop-first")
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents

View file

@ -77,6 +77,7 @@ Options:
--rollback-max-failure-ratio float Failure rate to tolerate during a rollback
--rollback-monitor duration Duration after each task rollback to monitor for failure
(ns|us|ms|s|m|h) (default 0s)
--rollback-order string Rollback order ("start-first"|"stop-first") (default "stop-first")
--rollback-parallelism uint Maximum number of tasks rolled back simultaneously (0 to roll
back all at once) (default 1)
--secret-add secret Add or update a secret on a service
@ -88,6 +89,7 @@ Options:
--update-failure-action string Action on update failure ("pause"|"continue"|"rollback") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h)
--update-order string Update order ("start-first"|"stop-first") (default "stop-first")
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents

View file

@ -175,6 +175,115 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
map[string]int{image1: instances})
}
func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateStartFirst(c *check.C) {
d := s.AddDaemon(c, true, true)
// service image at start
image1 := "busybox:latest"
// target image in update
image2 := "testhealth"
// service started from this image won't pass health check
_, _, err := d.BuildImageWithOut(image2,
`FROM busybox
HEALTHCHECK --interval=1s --timeout=1s --retries=1024\
CMD cat /status`,
true)
c.Check(err, check.IsNil)
// create service
instances := 5
parallelism := 2
rollbackParallelism := 3
id := d.CreateService(c, serviceForUpdate, setInstances(instances), setUpdateOrder(swarm.UpdateOrderStartFirst), setRollbackOrder(swarm.UpdateOrderStartFirst))
checkStartingTasks := func(expected int) []swarm.Task {
var startingTasks []swarm.Task
waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
tasks := d.GetServiceTasks(c, id)
startingTasks = nil
for _, t := range tasks {
if t.Status.State == swarm.TaskStateStarting {
startingTasks = append(startingTasks, t)
}
}
return startingTasks, nil
}, checker.HasLen, expected)
return startingTasks
}
makeTasksHealthy := func(tasks []swarm.Task) {
for _, t := range tasks {
containerID := t.Status.ContainerStatus.ContainerID
d.Cmd("exec", containerID, "touch", "/status")
}
}
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
// issue service update
service := d.GetService(c, id)
d.UpdateService(c, service, setImage(image2))
// first batch
// The old tasks should be running, and the new ones should be starting.
startingTasks := checkStartingTasks(parallelism)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
// make it healthy
makeTasksHealthy(startingTasks)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances - parallelism, image2: parallelism})
// 2nd batch
// The old tasks should be running, and the new ones should be starting.
startingTasks = checkStartingTasks(parallelism)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances - parallelism, image2: parallelism})
// make it healthy
makeTasksHealthy(startingTasks)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
// 3nd batch
// The old tasks should be running, and the new ones should be starting.
startingTasks = checkStartingTasks(1)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
// make it healthy
makeTasksHealthy(startingTasks)
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances})
// Roll back to the previous version. This uses the CLI because
// rollback is a client-side operation.
out, err := d.Cmd("service", "update", "--rollback", id)
c.Assert(err, checker.IsNil, check.Commentf(out))
// first batch
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances - rollbackParallelism, image1: rollbackParallelism})
// 2nd batch
waitAndAssert(c, defaultReconciliationTimeout, d.CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
}
func (s *DockerSwarmSuite) TestAPISwarmServicesFailedUpdate(c *check.C) {
const nodeCount = 3
var daemons [nodeCount]*daemon.Swarm

View file

@ -596,6 +596,24 @@ func setInstances(replicas int) daemon.ServiceConstructor {
}
}
func setUpdateOrder(order string) daemon.ServiceConstructor {
return func(s *swarm.Service) {
if s.Spec.UpdateConfig == nil {
s.Spec.UpdateConfig = &swarm.UpdateConfig{}
}
s.Spec.UpdateConfig.Order = order
}
}
func setRollbackOrder(order string) daemon.ServiceConstructor {
return func(s *swarm.Service) {
if s.Spec.RollbackConfig == nil {
s.Spec.RollbackConfig = &swarm.UpdateConfig{}
}
s.Spec.RollbackConfig.Order = order
}
}
func setImage(image string) daemon.ServiceConstructor {
return func(s *swarm.Service) {
s.Spec.TaskTemplate.ContainerSpec.Image = image