Merge pull request #26421 from aaronlehmann/update-thresholds-rollbacks

Service update failure thresholds and rollback
2022-11-09 12:21:53 -05:00 · 2016-10-18 15:26:24 -07:00 · 2016-10-18 15:26:24 -07:00 · 3b0660da30
commit 3b0660da30
parent 701cd7996f 6d4b527699
19 changed files with 441 additions and 207 deletions
--- a/api/server/router/swarm/backend.go
+++ b/api/server/router/swarm/backend.go
@ -15,7 +15,7 @@ type Backend interface {
 	GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
 	GetService(string) (types.Service, error)
 	CreateService(types.ServiceSpec, string) (string, error)
-	UpdateService(string, uint64, types.ServiceSpec, string) error
+	UpdateService(string, uint64, types.ServiceSpec, string, string) error
 	RemoveService(string) error
 	GetNodes(basictypes.NodeListOptions) ([]types.Node, error)
 	GetNode(string) (types.Node, error)
--- a/api/server/router/swarm/cluster_routes.go
+++ b/api/server/router/swarm/cluster_routes.go
@ -156,7 +156,9 @@ func (sr *swarmRouter) updateService(ctx context.Context, w http.ResponseWriter,
 	// Get returns "" if the header does not exist
 	encodedAuth := r.Header.Get("X-Registry-Auth")

-	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth); err != nil {
+	registryAuthFrom := r.URL.Query().Get("registryAuthFrom")
+
+	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth, registryAuthFrom); err != nil {
 		logrus.Errorf("Error updating service %s: %v", vars["id"], err)
 		return err
 	}
--- a/api/types/client.go
+++ b/api/types/client.go
@ -275,6 +275,12 @@ type ServiceCreateResponse struct {
 	ID string
 }

+// Values for RegistryAuthFrom in ServiceUpdateOptions
+const (
+	RegistryAuthFromSpec         = "spec"
+	RegistryAuthFromPreviousSpec = "previous-spec"
+)
+
 // ServiceUpdateOptions contains the options to be used for updating services.
 type ServiceUpdateOptions struct {
 	// EncodedRegistryAuth is the encoded registry authorization credentials to
@ -286,6 +292,11 @@ type ServiceUpdateOptions struct {
 	// TODO(stevvooe): Consider moving the version parameter of ServiceUpdate
 	// into this field. While it does open API users up to racy writes, most
 	// users may not need that level of consistency in practice.
+
+	// RegistryAuthFrom specifies where to find the registry authorization
+	// credentials if they are not given in EncodedRegistryAuth. Valid
+	// values are "spec" and "previous-spec".
+	RegistryAuthFrom string
 }

 // ServiceListOptions holds parameters to list  services with.
--- a/api/types/swarm/service.go
+++ b/api/types/swarm/service.go
@ -7,6 +7,7 @@ type Service struct {
 	ID string
 	Meta
 	Spec         ServiceSpec  `json:",omitempty"`
+	PreviousSpec *ServiceSpec `json:",omitempty"`
 	Endpoint     Endpoint     `json:",omitempty"`
 	UpdateStatus UpdateStatus `json:",omitempty"`
 }
@ -71,7 +72,34 @@ const (

 // UpdateConfig represents the update configuration.
 type UpdateConfig struct {
-	Parallelism   uint64        `json:",omitempty"`
-	Delay         time.Duration `json:",omitempty"`
-	FailureAction string        `json:",omitempty"`
+	// Maximum number of tasks to be updated in one iteration.
+	// 0 means unlimited parallelism.
+	Parallelism uint64 `json:",omitempty"`
+
+	// Amount of time between updates.
+	Delay time.Duration `json:",omitempty"`
+
+	// FailureAction is the action to take when an update failures.
+	FailureAction string `json:",omitempty"`
+
+	// Monitor indicates how long to monitor a task for failure after it is
+	// created. If the task fails by ending up in one of the states
+	// REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
+	// this counts as a failure. If it fails after Monitor, it does not
+	// count as a failure. If Monitor is unspecified, a default value will
+	// be used.
+	Monitor time.Duration `json:",omitempty"`
+
+	// MaxFailureRatio is the fraction of tasks that may fail during
+	// an update before the failure action is invoked. Any task created by
+	// the current update which ends up in one of the states REJECTED,
+	// COMPLETED or FAILED within Monitor from its creation counts as a
+	// failure. The number of failures is divided by the number of tasks
+	// being updated, and if this fraction is greater than
+	// MaxFailureRatio, the failure action is invoked.
+	//
+	// If the failure action is CONTINUE, there is no effect.
+	// If the failure action is PAUSE, no more tasks will be updated until
+	// another update is started.
+	MaxFailureRatio float32
 }
--- a/cli/command/formatter/service.go
+++ b/cli/command/formatter/service.go
@ -41,10 +41,14 @@ Placement:
 {{- if .HasUpdateConfig }}
 UpdateConfig:
 Parallelism:	{{ .UpdateParallelism }}
-{{- if .HasUpdateDelay -}}
+{{- if .HasUpdateDelay}}
 Delay:		{{ .UpdateDelay }}
 {{- end }}
 On failure:	{{ .UpdateOnFailure }}
+{{- if .HasUpdateMonitor}}
+ Monitoring Period: {{ .UpdateMonitor }}
+{{- end }}
+ Max failure ratio: {{ .UpdateMaxFailureRatio }}
 {{- end }}
 ContainerSpec:
 Image:		{{ .ContainerImage }}
@ -218,6 +222,18 @@ func (ctx *serviceInspectContext) UpdateOnFailure() string {
 	return ctx.Service.Spec.UpdateConfig.FailureAction
 }

+func (ctx *serviceInspectContext) HasUpdateMonitor() bool {
+	return ctx.Service.Spec.UpdateConfig.Monitor.Nanoseconds() > 0
+}
+
+func (ctx *serviceInspectContext) UpdateMonitor() time.Duration {
+	return ctx.Service.Spec.UpdateConfig.Monitor
+}
+
+func (ctx *serviceInspectContext) UpdateMaxFailureRatio() float32 {
+	return ctx.Service.Spec.UpdateConfig.MaxFailureRatio
+}
+
 func (ctx *serviceInspectContext) ContainerImage() string {
 	return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
 }
--- a/cli/command/service/opts.go
+++ b/cli/command/service/opts.go
@ -267,9 +267,11 @@ func (m *MountOpt) Value() []mounttypes.Mount {
 }

 type updateOptions struct {
-	parallelism uint64
-	delay       time.Duration
-	onFailure   string
+	parallelism     uint64
+	delay           time.Duration
+	monitor         time.Duration
+	onFailure       string
+	maxFailureRatio float32
 }

 type resourceOptions struct {
@ -458,9 +460,11 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
 		Networks: convertNetworks(opts.networks),
 		Mode:     swarm.ServiceMode{},
 		UpdateConfig: &swarm.UpdateConfig{
-			Parallelism:   opts.update.parallelism,
-			Delay:         opts.update.delay,
-			FailureAction: opts.update.onFailure,
+			Parallelism:     opts.update.parallelism,
+			Delay:           opts.update.delay,
+			Monitor:         opts.update.monitor,
+			FailureAction:   opts.update.onFailure,
+			MaxFailureRatio: opts.update.maxFailureRatio,
 		},
 		EndpointSpec: opts.endpoint.ToEndpointSpec(),
 	}
@ -507,7 +511,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {

 	flags.Uint64Var(&opts.update.parallelism, flagUpdateParallelism, 1, "Maximum number of tasks updated simultaneously (0 to update all at once)")
 	flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates")
+	flags.DurationVar(&opts.update.monitor, flagUpdateMonitor, time.Duration(0), "Duration after each task update to monitor for failure")
 	flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", "Action on update failure (pause|continue)")
+	flags.Float32Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, 0, "Failure rate to tolerate during an update")

 	flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "", "Endpoint mode (vip or dnsrr)")

@ -518,46 +524,48 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
 }

 const (
-	flagConstraint           = "constraint"
-	flagConstraintRemove     = "constraint-rm"
-	flagConstraintAdd        = "constraint-add"
-	flagContainerLabel       = "container-label"
-	flagContainerLabelRemove = "container-label-rm"
-	flagContainerLabelAdd    = "container-label-add"
-	flagEndpointMode         = "endpoint-mode"
-	flagEnv                  = "env"
-	flagEnvRemove            = "env-rm"
-	flagEnvAdd               = "env-add"
-	flagGroupAdd             = "group-add"
-	flagGroupRemove          = "group-rm"
-	flagLabel                = "label"
-	flagLabelRemove          = "label-rm"
-	flagLabelAdd             = "label-add"
-	flagLimitCPU             = "limit-cpu"
-	flagLimitMemory          = "limit-memory"
-	flagMode                 = "mode"
-	flagMount                = "mount"
-	flagMountRemove          = "mount-rm"
-	flagMountAdd             = "mount-add"
-	flagName                 = "name"
-	flagNetwork              = "network"
-	flagPublish              = "publish"
-	flagPublishRemove        = "publish-rm"
-	flagPublishAdd           = "publish-add"
-	flagReplicas             = "replicas"
-	flagReserveCPU           = "reserve-cpu"
-	flagReserveMemory        = "reserve-memory"
-	flagRestartCondition     = "restart-condition"
-	flagRestartDelay         = "restart-delay"
-	flagRestartMaxAttempts   = "restart-max-attempts"
-	flagRestartWindow        = "restart-window"
-	flagStopGracePeriod      = "stop-grace-period"
-	flagUpdateDelay          = "update-delay"
-	flagUpdateFailureAction  = "update-failure-action"
-	flagUpdateParallelism    = "update-parallelism"
-	flagUser                 = "user"
-	flagWorkdir              = "workdir"
-	flagRegistryAuth         = "with-registry-auth"
-	flagLogDriver            = "log-driver"
-	flagLogOpt               = "log-opt"
+	flagConstraint            = "constraint"
+	flagConstraintRemove      = "constraint-rm"
+	flagConstraintAdd         = "constraint-add"
+	flagContainerLabel        = "container-label"
+	flagContainerLabelRemove  = "container-label-rm"
+	flagContainerLabelAdd     = "container-label-add"
+	flagEndpointMode          = "endpoint-mode"
+	flagEnv                   = "env"
+	flagEnvRemove             = "env-rm"
+	flagEnvAdd                = "env-add"
+	flagGroupAdd              = "group-add"
+	flagGroupRemove           = "group-rm"
+	flagLabel                 = "label"
+	flagLabelRemove           = "label-rm"
+	flagLabelAdd              = "label-add"
+	flagLimitCPU              = "limit-cpu"
+	flagLimitMemory           = "limit-memory"
+	flagMode                  = "mode"
+	flagMount                 = "mount"
+	flagMountRemove           = "mount-rm"
+	flagMountAdd              = "mount-add"
+	flagName                  = "name"
+	flagNetwork               = "network"
+	flagPublish               = "publish"
+	flagPublishRemove         = "publish-rm"
+	flagPublishAdd            = "publish-add"
+	flagReplicas              = "replicas"
+	flagReserveCPU            = "reserve-cpu"
+	flagReserveMemory         = "reserve-memory"
+	flagRestartCondition      = "restart-condition"
+	flagRestartDelay          = "restart-delay"
+	flagRestartMaxAttempts    = "restart-max-attempts"
+	flagRestartWindow         = "restart-window"
+	flagStopGracePeriod       = "stop-grace-period"
+	flagUpdateDelay           = "update-delay"
+	flagUpdateFailureAction   = "update-failure-action"
+	flagUpdateMaxFailureRatio = "update-max-failure-ratio"
+	flagUpdateMonitor         = "update-monitor"
+	flagUpdateParallelism     = "update-parallelism"
+	flagUser                  = "user"
+	flagWorkdir               = "workdir"
+	flagRegistryAuth          = "with-registry-auth"
+	flagLogDriver             = "log-driver"
+	flagLogOpt                = "log-opt"
 )
--- a/cli/command/service/update.go
+++ b/cli/command/service/update.go
@ -36,6 +36,7 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command {
 	flags := cmd.Flags()
 	flags.String("image", "", "Service image tag")
 	flags.String("args", "", "Service command args")
+	flags.Bool("rollback", false, "Rollback to previous specification")
 	addServiceFlags(cmd, opts)

 	flags.Var(newListOptsVar(), flagEnvRemove, "Remove an environment variable")
@ -68,7 +69,20 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
 		return err
 	}

-	err = updateService(flags, &service.Spec)
+	rollback, err := flags.GetBool("rollback")
+	if err != nil {
+		return err
+	}
+
+	spec := &service.Spec
+	if rollback {
+		spec = service.PreviousSpec
+		if spec == nil {
+			return fmt.Errorf("service does not have a previous specification to roll back to")
+		}
+	}
+
+	err = updateService(flags, spec)
 	if err != nil {
 		return err
 	}
@ -81,15 +95,19 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
 	if sendAuth {
 		// Retrieve encoded auth token from the image reference
 		// This would be the old image if it didn't change in this update
-		image := service.Spec.TaskTemplate.ContainerSpec.Image
+		image := spec.TaskTemplate.ContainerSpec.Image
 		encodedAuth, err := command.RetrieveAuthTokenFromImage(ctx, dockerCli, image)
 		if err != nil {
 			return err
 		}
 		updateOpts.EncodedRegistryAuth = encodedAuth
+	} else if rollback {
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromPreviousSpec
+	} else {
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromSpec
 	}

-	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, service.Spec, updateOpts)
+	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, *spec, updateOpts)
 	if err != nil {
 		return err
 	}
@ -111,6 +129,12 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
 		}
 	}

+	updateFloat32 := func(flag string, field *float32) {
+		if flags.Changed(flag) {
+			*field, _ = flags.GetFloat32(flag)
+		}
+	}
+
 	updateDuration := func(flag string, field *time.Duration) {
 		if flags.Changed(flag) {
 			*field, _ = flags.GetDuration(flag)
@ -195,13 +219,15 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
 		return err
 	}

-	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateFailureAction) {
+	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio) {
 		if spec.UpdateConfig == nil {
 			spec.UpdateConfig = &swarm.UpdateConfig{}
 		}
 		updateUint64(flagUpdateParallelism, &spec.UpdateConfig.Parallelism)
 		updateDuration(flagUpdateDelay, &spec.UpdateConfig.Delay)
+		updateDuration(flagUpdateMonitor, &spec.UpdateConfig.Monitor)
 		updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
+		updateFloat32(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
 	}

 	if flags.Changed(flagEndpointMode) {
--- a/client/service_update.go
+++ b/client/service_update.go
@ -22,6 +22,10 @@ func (cli *Client) ServiceUpdate(ctx context.Context, serviceID string, version
 		}
 	}

+	if options.RegistryAuthFrom != "" {
+		query.Set("registryAuthFrom", options.RegistryAuthFrom)
+	}
+
 	query.Set("version", strconv.FormatUint(version.Index, 10))

 	resp, err := cli.post(ctx, "/services/"+serviceID+"/update", query, service, headers)
--- a/contrib/completion/bash/docker
+++ b/contrib/completion/bash/docker
@ -1809,9 +1809,12 @@ _docker_service_update() {
 		--restart-delay
 		--restart-max-attempts
 		--restart-window
+		--rollback
 		--stop-grace-period
 		--update-delay
 		--update-failure-action
+		--update-max-failure-ratio
+		--update-monitor
 		--update-parallelism
 		--user -u
 		--workdir -w
--- a/contrib/completion/zsh/_docker
+++ b/contrib/completion/zsh/_docker
@ -1108,6 +1108,8 @@ __docker_service_subcommand() {
        "($help)--stop-grace-period=[Time to wait before force killing a container]:grace period: "
        "($help)--update-delay=[Delay between updates]:delay: "
        "($help)--update-failure-action=[Action on update failure]:mode:(pause continue)"
+        "($help)--update-max-failure-ratio=[Failure rate to tolerate during an update]:fraction: "
+        "($help)--update-monitor=[Duration after each task update to monitor for failure]:window: "
        "($help)--update-parallelism=[Maximum number of tasks updated simultaneously]:number: "
        "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
        "($help)--with-registry-auth[Send registry authentication details to swarm agents]"
@ -1185,6 +1187,7 @@ __docker_service_subcommand() {
                "($help)*--container-label-rm=[Remove a container label by its key]:label: " \
                "($help)*--group-rm=[Remove previously added user groups from the container]:group:_groups" \
                "($help)--image=[Service image tag]:image:__docker_repositories" \
+                "($help)--rollback[Rollback to previous specification]" \
                "($help -)1:service:__docker_complete_services" && ret=0
            ;;
        (help)
--- a/daemon/cluster/cluster.go
+++ b/daemon/cluster/cluster.go
@ -913,7 +913,7 @@ func (c *Cluster) GetService(input string) (types.Service, error) {
 }

 // UpdateService updates existing service to match new properties.
-func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
+func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error {
 	c.RLock()
 	defer c.RUnlock()

@ -948,7 +948,18 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
 	} else {
 		// this is needed because if the encodedAuth isn't being updated then we
 		// shouldn't lose it, and continue to use the one that was already present
-		ctnr := currentService.Spec.Task.GetContainer()
+		var ctnr *swarmapi.ContainerSpec
+		switch registryAuthFrom {
+		case apitypes.RegistryAuthFromSpec, "":
+			ctnr = currentService.Spec.Task.GetContainer()
+		case apitypes.RegistryAuthFromPreviousSpec:
+			if currentService.PreviousSpec == nil {
+				return fmt.Errorf("service does not have a previous spec")
+			}
+			ctnr = currentService.PreviousSpec.Task.GetContainer()
+		default:
+			return fmt.Errorf("unsupported registryAuthFromValue")
+		}
 		if ctnr == nil {
 			return fmt.Errorf("service does not use container tasks")
 		}
--- a/daemon/cluster/convert/service.go
+++ b/daemon/cluster/convert/service.go
@ -12,35 +12,11 @@ import (

 // ServiceFromGRPC converts a grpc Service to a Service.
 func ServiceFromGRPC(s swarmapi.Service) types.Service {
-	spec := s.Spec
-	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
-
-	serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
-	for _, n := range spec.Networks {
-		serviceNetworks = append(serviceNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
-	}
-
-	taskNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Task.Networks))
-	for _, n := range spec.Task.Networks {
-		taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
-	}
-
 	service := types.Service{
-		ID: s.ID,
+		ID:           s.ID,
+		Spec:         *serviceSpecFromGRPC(&s.Spec),
+		PreviousSpec: serviceSpecFromGRPC(s.PreviousSpec),

-		Spec: types.ServiceSpec{
-			TaskTemplate: types.TaskSpec{
-				ContainerSpec: containerSpecFromGRPC(containerConfig),
-				Resources:     resourcesFromGRPC(s.Spec.Task.Resources),
-				RestartPolicy: restartPolicyFromGRPC(s.Spec.Task.Restart),
-				Placement:     placementFromGRPC(s.Spec.Task.Placement),
-				LogDriver:     driverFromGRPC(s.Spec.Task.LogDriver),
-				Networks:      taskNetworks,
-			},
-
-			Networks:     serviceNetworks,
-			EndpointSpec: endpointSpecFromGRPC(s.Spec.Endpoint),
-		},
 		Endpoint: endpointFromGRPC(s.Endpoint),
 	}

@ -49,36 +25,6 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
 	service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
 	service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)

-	// Annotations
-	service.Spec.Name = s.Spec.Annotations.Name
-	service.Spec.Labels = s.Spec.Annotations.Labels
-
-	// UpdateConfig
-	if s.Spec.Update != nil {
-		service.Spec.UpdateConfig = &types.UpdateConfig{
-			Parallelism: s.Spec.Update.Parallelism,
-		}
-
-		service.Spec.UpdateConfig.Delay, _ = ptypes.Duration(&s.Spec.Update.Delay)
-
-		switch s.Spec.Update.FailureAction {
-		case swarmapi.UpdateConfig_PAUSE:
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
-		case swarmapi.UpdateConfig_CONTINUE:
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
-		}
-	}
-
-	// Mode
-	switch t := s.Spec.GetMode().(type) {
-	case *swarmapi.ServiceSpec_Global:
-		service.Spec.Mode.Global = &types.GlobalService{}
-	case *swarmapi.ServiceSpec_Replicated:
-		service.Spec.Mode.Replicated = &types.ReplicatedService{
-			Replicas: &t.Replicated.Replicas,
-		}
-	}
-
 	// UpdateStatus
 	service.UpdateStatus = types.UpdateStatus{}
 	if s.UpdateStatus != nil {
@ -99,6 +45,74 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
 	return service
 }

+func serviceSpecFromGRPC(spec *swarmapi.ServiceSpec) *types.ServiceSpec {
+	if spec == nil {
+		return nil
+	}
+
+	serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
+	for _, n := range spec.Networks {
+		serviceNetworks = append(serviceNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
+	}
+
+	taskNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Task.Networks))
+	for _, n := range spec.Task.Networks {
+		taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
+	}
+
+	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
+	convertedSpec := &types.ServiceSpec{
+		Annotations: types.Annotations{
+			Name:   spec.Annotations.Name,
+			Labels: spec.Annotations.Labels,
+		},
+
+		TaskTemplate: types.TaskSpec{
+			ContainerSpec: containerSpecFromGRPC(containerConfig),
+			Resources:     resourcesFromGRPC(spec.Task.Resources),
+			RestartPolicy: restartPolicyFromGRPC(spec.Task.Restart),
+			Placement:     placementFromGRPC(spec.Task.Placement),
+			LogDriver:     driverFromGRPC(spec.Task.LogDriver),
+			Networks:      taskNetworks,
+		},
+
+		Networks:     serviceNetworks,
+		EndpointSpec: endpointSpecFromGRPC(spec.Endpoint),
+	}
+
+	// UpdateConfig
+	if spec.Update != nil {
+		convertedSpec.UpdateConfig = &types.UpdateConfig{
+			Parallelism:     spec.Update.Parallelism,
+			MaxFailureRatio: spec.Update.MaxFailureRatio,
+		}
+
+		convertedSpec.UpdateConfig.Delay, _ = ptypes.Duration(&spec.Update.Delay)
+		if spec.Update.Monitor != nil {
+			convertedSpec.UpdateConfig.Monitor, _ = ptypes.Duration(spec.Update.Monitor)
+		}
+
+		switch spec.Update.FailureAction {
+		case swarmapi.UpdateConfig_PAUSE:
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
+		case swarmapi.UpdateConfig_CONTINUE:
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
+		}
+	}
+
+	// Mode
+	switch t := spec.GetMode().(type) {
+	case *swarmapi.ServiceSpec_Global:
+		convertedSpec.Mode.Global = &types.GlobalService{}
+	case *swarmapi.ServiceSpec_Replicated:
+		convertedSpec.Mode.Replicated = &types.ReplicatedService{
+			Replicas: &t.Replicated.Replicas,
+		}
+	}
+
+	return convertedSpec
+}
+
 // ServiceSpecToGRPC converts a ServiceSpec to a grpc ServiceSpec.
 func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
 	name := s.Name
@ -158,9 +172,13 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
 			return swarmapi.ServiceSpec{}, fmt.Errorf("unrecongized update failure action %s", s.UpdateConfig.FailureAction)
 		}
 		spec.Update = &swarmapi.UpdateConfig{
-			Parallelism:   s.UpdateConfig.Parallelism,
-			Delay:         *ptypes.DurationProto(s.UpdateConfig.Delay),
-			FailureAction: failureAction,
+			Parallelism:     s.UpdateConfig.Parallelism,
+			Delay:           *ptypes.DurationProto(s.UpdateConfig.Delay),
+			FailureAction:   failureAction,
+			MaxFailureRatio: s.UpdateConfig.MaxFailureRatio,
+		}
+		if s.UpdateConfig.Monitor != 0 {
+			spec.Update.Monitor = ptypes.DurationProto(s.UpdateConfig.Monitor)
 		}
 	}

--- a/docs/reference/api/docker_remote_api.md
+++ b/docs/reference/api/docker_remote_api.md
@ -129,6 +129,7 @@ This section lists each version from latest to oldest.  Each listing includes a
 * `GET /containers/json` now supports a `is-task` filter to filter
  containers that are tasks (part of a service in swarm mode).
 * `POST /containers/create` now takes `StopTimeout` field.
+* `POST /services/create` and `POST /services/(id or name)/update` now accept `Monitor` and `MaxFailureRatio` parameters, which control the response to failures during service updates.

 ### v1.24 API changes

--- a/docs/reference/api/docker_remote_api_v1.25.md
+++ b/docs/reference/api/docker_remote_api_v1.25.md
@ -4877,7 +4877,9 @@ List services
          },
          "UpdateConfig": {
            "Parallelism": 1,
-            "FailureAction": "pause"
+            "FailureAction": "pause",
+            "Monitor": 15000000000,
+            "MaxFailureRatio": 0.15
          },
          "EndpointSpec": {
            "Mode": "vip",
@ -5077,8 +5079,8 @@ image](#create-an-image) section for more details.
    - **RestartPolicy** – Specification for the restart policy which applies to containers created
      as part of this service.
        - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
-        - **Delay** – Delay between restart attempts.
-        - **Attempts** – Maximum attempts to restart a given container before giving up (default value
+        - **Delay** – Delay between restart attempts, in nanoseconds.
+        - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
          is 0, which is ignored).
        - **Window** – Windows is the time window used to evaluate the restart policy (default value is
          0, which is unbounded).
@ -5087,9 +5089,12 @@ image](#create-an-image) section for more details.
 - **UpdateConfig** – Specification for the update strategy of the service.
    - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
      parallelism).
-    - **Delay** – Amount of time between updates.
+    - **Delay** – Amount of time between updates, in nanoseconds.
    - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
      update. Values are `continue` and `pause`.
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
    - **Mode** – The mode of resolution to use for internal load balancing
@ -5259,7 +5264,9 @@ image](#create-an-image) section for more details.
        }
      },
      "UpdateConfig": {
-        "Parallelism": 1
+        "Parallelism": 1,
+        "Monitor": 15000000000,
+        "MaxFailureRatio": 0.15
      },
      "EndpointSpec": {
        "Mode": "vip"
@ -5314,7 +5321,7 @@ image](#create-an-image) section for more details.
    - **RestartPolicy** – Specification for the restart policy which applies to containers created
      as part of this service.
        - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
-        - **Delay** – Delay between restart attempts.
+        - **Delay** – Delay between restart attempts, in nanoseconds.
        - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
          is 0, which is ignored).
        - **Window** – Windows is the time window used to evaluate the restart policy (default value is
@ -5324,7 +5331,12 @@ image](#create-an-image) section for more details.
 - **UpdateConfig** – Specification for the update strategy of the service.
    - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
      parallelism).
-    - **Delay** – Amount of time between updates.
+    - **Delay** – Amount of time between updates, in nanoseconds.
+    - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
+      update. Values are `continue` and `pause`.
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
    - **Mode** – The mode of resolution to use for internal load balancing
@ -5338,6 +5350,10 @@ image](#create-an-image) section for more details.

 - **version** – The version number of the service object being updated. This is
  required to avoid conflicting writes.
+- **registryAuthFrom** - If the X-Registry-Auth header is not specified, this
+  parameter indicates where to find registry authorization credentials. The
+  valid values are `spec` and `previous-spec`. If unspecified, the default is
+  `spec`.

 **Request Headers**:

--- a/docs/reference/commandline/service_create.md
+++ b/docs/reference/commandline/service_create.md
@ -12,36 +12,38 @@ Usage:  docker service create [OPTIONS] IMAGE [COMMAND] [ARG...]
 Create a new service

 Options:
-      --constraint value               Placement constraints (default [])
-      --container-label value          Service container labels (default [])
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
-  -e, --env value                      Set environment variables (default [])
-      --group-add value                Add additional user groups to the container (default [])
-      --help                           Print usage
-  -l, --label value                    Service labels (default [])
-      --limit-cpu value                Limit CPUs (default 0.000)
-      --limit-memory value             Limit Memory (default 0 B)
-      --log-driver string              Logging driver for service
-      --log-opt value                  Logging driver options (default [])
-      --mode string                    Service mode (replicated or global) (default "replicated")
-      --mount value                    Attach a mount to the service
-      --name string                    Service name
-      --network value                  Network attachments (default [])
-  -p, --publish value                  Publish a port as a node port (default [])
-      --replicas value                 Number of tasks (default none)
-      --reserve-cpu value              Reserve CPUs (default 0.000)
-      --reserve-memory value           Reserve Memory (default 0 B)
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
-      --restart-delay value            Delay between restart attempts (default none)
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
-      --restart-window value           Window used to evaluate the restart policy (default none)
-      --stop-grace-period value        Time to wait before force killing a container (default none)
-      --update-delay duration          Delay between updates
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
-      --with-registry-auth             Send registry authentication details to Swarm agents
-  -w, --workdir string                 Working directory inside the container
+      --constraint value                 Placement constraints (default [])
+      --container-label value            Service container labels (default [])
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
+  -e, --env value                        Set environment variables (default [])
+      --group-add value                  Add additional user groups to the container (default [])
+      --help                             Print usage
+  -l, --label value                      Service labels (default [])
+      --limit-cpu value                  Limit CPUs (default 0.000)
+      --limit-memory value               Limit Memory (default 0 B)
+      --log-driver string                Logging driver for service
+      --log-opt value                    Logging driver options (default [])
+      --mode string                      Service mode (replicated or global) (default "replicated")
+      --mount value                      Attach a mount to the service
+      --name string                      Service name
+      --network value                    Network attachments (default [])
+  -p, --publish value                    Publish a port as a node port (default [])
+      --replicas value                   Number of tasks (default none)
+      --reserve-cpu value                Reserve CPUs (default 0.000)
+      --reserve-memory value             Reserve Memory (default 0 B)
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
+      --restart-delay value              Delay between restart attempts (default none)
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
+      --restart-window value             Window used to evaluate the restart policy (default none)
+      --stop-grace-period value          Time to wait before force killing a container (default none)
+      --update-delay duration            Delay between updates
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
+      --with-registry-auth               Send registry authentication details to Swarm agents
+  -w, --workdir string                   Working directory inside the container
 ```

 Creates a service as described by the specified parameters. You must run this
--- a/docs/reference/commandline/service_update.md
+++ b/docs/reference/commandline/service_update.md
@ -12,43 +12,46 @@ Usage:  docker service update [OPTIONS] SERVICE
 Update a service

 Options:
-      --args string                    Service command args
-      --constraint-add value           Add or update placement constraints (default [])
-      --constraint-rm value            Remove a constraint (default [])
-      --container-label-add value      Add or update container labels (default [])
-      --container-label-rm value       Remove a container label by its key (default [])
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
-      --env-add value                  Add or update environment variables (default [])
-      --env-rm value                   Remove an environment variable (default [])
-      --group-add value                Add additional user groups to the container (default [])
-      --group-rm value                 Remove previously added user groups from the container (default [])
-      --help                           Print usage
-      --image string                   Service image tag
-      --label-add value                Add or update service labels (default [])
-      --label-rm value                 Remove a label by its key (default [])
-      --limit-cpu value                Limit CPUs (default 0.000)
-      --limit-memory value             Limit Memory (default 0 B)
-      --log-driver string              Logging driver for service
-      --log-opt value                  Logging driver options (default [])
-      --mount-add value                Add or update a mount on a service
-      --mount-rm value                 Remove a mount by its target path (default [])
-      --name string                    Service name
-      --publish-add value              Add or update a published port (default [])
-      --publish-rm value               Remove a published port by its target port (default [])
-      --replicas value                 Number of tasks (default none)
-      --reserve-cpu value              Reserve CPUs (default 0.000)
-      --reserve-memory value           Reserve Memory (default 0 B)
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
-      --restart-delay value            Delay between restart attempts (default none)
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
-      --restart-window value           Window used to evaluate the restart policy (default none)
-      --stop-grace-period value        Time to wait before force killing a container (default none)
-      --update-delay duration          Delay between updates
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
-      --with-registry-auth             Send registry authentication details to Swarm agents
-  -w, --workdir string                 Working directory inside the container
+      --args string                      Service command args
+      --constraint-add value             Add or update placement constraints (default [])
+      --constraint-rm value              Remove a constraint (default [])
+      --container-label-add value        Add or update container labels (default [])
+      --container-label-rm value         Remove a container label by its key (default [])
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
+      --env-add value                    Add or update environment variables (default [])
+      --env-rm value                     Remove an environment variable (default [])
+      --group-add value                  Add additional user groups to the container (default [])
+      --group-rm value                   Remove previously added user groups from the container (default [])
+      --help                             Print usage
+      --image string                     Service image tag
+      --label-add value                  Add or update service labels (default [])
+      --label-rm value                   Remove a label by its key (default [])
+      --limit-cpu value                  Limit CPUs (default 0.000)
+      --limit-memory value               Limit Memory (default 0 B)
+      --log-driver string                Logging driver for service
+      --log-opt value                    Logging driver options (default [])
+      --mount-add value                  Add or update a mount on a service
+      --mount-rm value                   Remove a mount by its target path (default [])
+      --name string                      Service name
+      --publish-add value                Add or update a published port (default [])
+      --publish-rm value                 Remove a published port by its target port (default [])
+      --replicas value                   Number of tasks (default none)
+      --reserve-cpu value                Reserve CPUs (default 0.000)
+      --reserve-memory value             Reserve Memory (default 0 B)
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
+      --restart-delay value              Delay between restart attempts (default none)
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
+      --restart-window value             Window used to evaluate the restart policy (default none)
+      --rollback                         Rollback to previous specification
+      --stop-grace-period value          Time to wait before force killing a container (default none)
+      --update-delay duration            Delay between updates
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
+      --with-registry-auth               Send registry authentication details to Swarm agents
+  -w, --workdir string                   Working directory inside the container
 ```

 Updates a service as described by the specified parameters. This command has to be run targeting a manager node.
--- a/integration-cli/daemon_swarm.go
+++ b/integration-cli/daemon_swarm.go
@ -139,8 +139,8 @@ func (d *SwarmDaemon) getServiceTasks(c *check.C, service string) []swarm.Task {
 	return tasks
 }

-func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
-	return func(*check.C) (interface{}, check.CommentInterface) {
+func (d *SwarmDaemon) checkServiceRunningTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
 		tasks := d.getServiceTasks(c, service)
 		var runningCount int
 		for _, task := range tasks {
@ -152,8 +152,15 @@ func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(
 	}
 }

-func (d *SwarmDaemon) checkServiceTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
-	return func(*check.C) (interface{}, check.CommentInterface) {
+func (d *SwarmDaemon) checkServiceUpdateState(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
+		service := d.getService(c, service)
+		return service.UpdateStatus.State, nil
+	}
+}
+
+func (d *SwarmDaemon) checkServiceTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
 		tasks := d.getServiceTasks(c, service)
 		return len(tasks), nil
 	}
--- a/integration-cli/docker_api_swarm_test.go
+++ b/integration-cli/docker_api_swarm_test.go
@ -310,6 +310,63 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
 	// 3nd batch
 	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
 		map[string]int{image2: instances})
+
+	// Roll back to the previous version. This uses the CLI because
+	// rollback is a client-side operation.
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+
+	// first batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image2: instances - parallelism, image1: parallelism})
+
+	// 2nd batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image2: instances - 2*parallelism, image1: 2 * parallelism})
+
+	// 3nd batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
+}
+
+func (s *DockerSwarmSuite) TestApiSwarmServicesFailedUpdate(c *check.C) {
+	const nodeCount = 3
+	var daemons [nodeCount]*SwarmDaemon
+	for i := 0; i < nodeCount; i++ {
+		daemons[i] = s.AddDaemon(c, true, i == 0)
+	}
+	// wait for nodes ready
+	waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
+
+	// service image at start
+	image1 := "busybox:latest"
+	// target image in update
+	image2 := "busybox:badtag"
+
+	// create service
+	instances := 5
+	id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
+
+	// wait for tasks ready
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
+
+	// issue service update
+	service := daemons[0].getService(c, id)
+	daemons[0].updateService(c, service, setImage(image2), setFailureAction(swarm.UpdateFailureActionPause), setMaxFailureRatio(0.25), setParallelism(1))
+
+	// should update 2 tasks and then pause
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceUpdateState(id), checker.Equals, swarm.UpdateStatePaused)
+	v, _ := daemons[0].checkServiceRunningTasks(id)(c)
+	c.Assert(v, checker.Equals, instances-2)
+
+	// Roll back to the previous version. This uses the CLI because
+	// rollback is a client-side operation.
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
 }

 func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
@ -326,7 +383,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	instances := 3
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	// validate tasks are running on worker nodes
 	tasks := daemons[0].getServiceTasks(c, id)
 	for _, task := range tasks {
@ -340,7 +397,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	constraints = []string{"node.role!=worker"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	// validate tasks are running on manager nodes
 	for _, task := range tasks {
@ -354,7 +411,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	constraints = []string{"node.role==nosuchrole"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	// validate tasks are not assigned to any node
@ -394,7 +451,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints := []string{"node.labels.security==high"}
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks := daemons[0].getServiceTasks(c, id)
 	// validate all tasks are running on nodes[0]
 	for _, task := range tasks {
@ -407,7 +464,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints = []string{"node.labels.security!=high"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	// validate all tasks are NOT running on nodes[0]
 	for _, task := range tasks {
@ -419,7 +476,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints = []string{"node.labels.security==medium"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	tasks = daemons[0].getServiceTasks(c, id)
@ -437,7 +494,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	tasks = daemons[0].getServiceTasks(c, id)
@ -452,7 +509,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 		}
 	})
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	for _, task := range tasks {
 		c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
@ -1022,6 +1079,24 @@ func setImage(image string) serviceConstructor {
 	}
 }

+func setFailureAction(failureAction string) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.FailureAction = failureAction
+	}
+}
+
+func setMaxFailureRatio(maxFailureRatio float32) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
+	}
+}
+
+func setParallelism(parallelism uint64) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.Parallelism = parallelism
+	}
+}
+
 func setConstraints(constraints []string) serviceConstructor {
 	return func(s *swarm.Service) {
 		if s.Spec.TaskTemplate.Placement == nil {
--- a/integration-cli/docker_cli_swarm_test.go
+++ b/integration-cli/docker_cli_swarm_test.go
@ -349,7 +349,7 @@ func (s *DockerSwarmSuite) TestPsListContainersFilterIsTask(c *check.C) {
 	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")

 	// make sure task has been deployed.
-	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(c, name), checker.Equals, 1)
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(name), checker.Equals, 1)

 	// Filter non-tasks
 	out, err = d.Cmd("ps", "-a", "-q", "--filter=is-task=false")