1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Merge pull request from aaronlehmann/update-thresholds-rollbacks

Service update failure thresholds and rollback
This commit is contained in:
Sebastiaan van Stijn 2016-10-18 15:26:24 -07:00 committed by GitHub
commit 3b0660da30
19 changed files with 441 additions and 207 deletions

View file

@ -15,7 +15,7 @@ type Backend interface {
GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
GetService(string) (types.Service, error)
CreateService(types.ServiceSpec, string) (string, error)
UpdateService(string, uint64, types.ServiceSpec, string) error
UpdateService(string, uint64, types.ServiceSpec, string, string) error
RemoveService(string) error
GetNodes(basictypes.NodeListOptions) ([]types.Node, error)
GetNode(string) (types.Node, error)

View file

@ -156,7 +156,9 @@ func (sr *swarmRouter) updateService(ctx context.Context, w http.ResponseWriter,
// Get returns "" if the header does not exist
encodedAuth := r.Header.Get("X-Registry-Auth")
if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth); err != nil {
registryAuthFrom := r.URL.Query().Get("registryAuthFrom")
if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth, registryAuthFrom); err != nil {
logrus.Errorf("Error updating service %s: %v", vars["id"], err)
return err
}

View file

@ -275,6 +275,12 @@ type ServiceCreateResponse struct {
ID string
}
// Values for RegistryAuthFrom in ServiceUpdateOptions
const (
RegistryAuthFromSpec = "spec"
RegistryAuthFromPreviousSpec = "previous-spec"
)
// ServiceUpdateOptions contains the options to be used for updating services.
type ServiceUpdateOptions struct {
// EncodedRegistryAuth is the encoded registry authorization credentials to
@ -286,6 +292,11 @@ type ServiceUpdateOptions struct {
// TODO(stevvooe): Consider moving the version parameter of ServiceUpdate
// into this field. While it does open API users up to racy writes, most
// users may not need that level of consistency in practice.
// RegistryAuthFrom specifies where to find the registry authorization
// credentials if they are not given in EncodedRegistryAuth. Valid
// values are "spec" and "previous-spec".
RegistryAuthFrom string
}
// ServiceListOptions holds parameters to list services with.

View file

@ -7,6 +7,7 @@ type Service struct {
ID string
Meta
Spec ServiceSpec `json:",omitempty"`
PreviousSpec *ServiceSpec `json:",omitempty"`
Endpoint Endpoint `json:",omitempty"`
UpdateStatus UpdateStatus `json:",omitempty"`
}
@ -71,7 +72,34 @@ const (
// UpdateConfig represents the update configuration.
type UpdateConfig struct {
Parallelism uint64 `json:",omitempty"`
Delay time.Duration `json:",omitempty"`
FailureAction string `json:",omitempty"`
// Maximum number of tasks to be updated in one iteration.
// 0 means unlimited parallelism.
Parallelism uint64 `json:",omitempty"`
// Amount of time between updates.
Delay time.Duration `json:",omitempty"`
// FailureAction is the action to take when an update failures.
FailureAction string `json:",omitempty"`
// Monitor indicates how long to monitor a task for failure after it is
// created. If the task fails by ending up in one of the states
// REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
// this counts as a failure. If it fails after Monitor, it does not
// count as a failure. If Monitor is unspecified, a default value will
// be used.
Monitor time.Duration `json:",omitempty"`
// MaxFailureRatio is the fraction of tasks that may fail during
// an update before the failure action is invoked. Any task created by
// the current update which ends up in one of the states REJECTED,
// COMPLETED or FAILED within Monitor from its creation counts as a
// failure. The number of failures is divided by the number of tasks
// being updated, and if this fraction is greater than
// MaxFailureRatio, the failure action is invoked.
//
// If the failure action is CONTINUE, there is no effect.
// If the failure action is PAUSE, no more tasks will be updated until
// another update is started.
MaxFailureRatio float32
}

View file

@ -41,10 +41,14 @@ Placement:
{{- if .HasUpdateConfig }}
UpdateConfig:
Parallelism: {{ .UpdateParallelism }}
{{- if .HasUpdateDelay -}}
{{- if .HasUpdateDelay}}
Delay: {{ .UpdateDelay }}
{{- end }}
On failure: {{ .UpdateOnFailure }}
{{- if .HasUpdateMonitor}}
Monitoring Period: {{ .UpdateMonitor }}
{{- end }}
Max failure ratio: {{ .UpdateMaxFailureRatio }}
{{- end }}
ContainerSpec:
Image: {{ .ContainerImage }}
@ -218,6 +222,18 @@ func (ctx *serviceInspectContext) UpdateOnFailure() string {
return ctx.Service.Spec.UpdateConfig.FailureAction
}
func (ctx *serviceInspectContext) HasUpdateMonitor() bool {
return ctx.Service.Spec.UpdateConfig.Monitor.Nanoseconds() > 0
}
func (ctx *serviceInspectContext) UpdateMonitor() time.Duration {
return ctx.Service.Spec.UpdateConfig.Monitor
}
func (ctx *serviceInspectContext) UpdateMaxFailureRatio() float32 {
return ctx.Service.Spec.UpdateConfig.MaxFailureRatio
}
func (ctx *serviceInspectContext) ContainerImage() string {
return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
}

View file

@ -267,9 +267,11 @@ func (m *MountOpt) Value() []mounttypes.Mount {
}
type updateOptions struct {
parallelism uint64
delay time.Duration
onFailure string
parallelism uint64
delay time.Duration
monitor time.Duration
onFailure string
maxFailureRatio float32
}
type resourceOptions struct {
@ -458,9 +460,11 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
Networks: convertNetworks(opts.networks),
Mode: swarm.ServiceMode{},
UpdateConfig: &swarm.UpdateConfig{
Parallelism: opts.update.parallelism,
Delay: opts.update.delay,
FailureAction: opts.update.onFailure,
Parallelism: opts.update.parallelism,
Delay: opts.update.delay,
Monitor: opts.update.monitor,
FailureAction: opts.update.onFailure,
MaxFailureRatio: opts.update.maxFailureRatio,
},
EndpointSpec: opts.endpoint.ToEndpointSpec(),
}
@ -507,7 +511,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
flags.Uint64Var(&opts.update.parallelism, flagUpdateParallelism, 1, "Maximum number of tasks updated simultaneously (0 to update all at once)")
flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates")
flags.DurationVar(&opts.update.monitor, flagUpdateMonitor, time.Duration(0), "Duration after each task update to monitor for failure")
flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", "Action on update failure (pause|continue)")
flags.Float32Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, 0, "Failure rate to tolerate during an update")
flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "", "Endpoint mode (vip or dnsrr)")
@ -518,46 +524,48 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
}
const (
flagConstraint = "constraint"
flagConstraintRemove = "constraint-rm"
flagConstraintAdd = "constraint-add"
flagContainerLabel = "container-label"
flagContainerLabelRemove = "container-label-rm"
flagContainerLabelAdd = "container-label-add"
flagEndpointMode = "endpoint-mode"
flagEnv = "env"
flagEnvRemove = "env-rm"
flagEnvAdd = "env-add"
flagGroupAdd = "group-add"
flagGroupRemove = "group-rm"
flagLabel = "label"
flagLabelRemove = "label-rm"
flagLabelAdd = "label-add"
flagLimitCPU = "limit-cpu"
flagLimitMemory = "limit-memory"
flagMode = "mode"
flagMount = "mount"
flagMountRemove = "mount-rm"
flagMountAdd = "mount-add"
flagName = "name"
flagNetwork = "network"
flagPublish = "publish"
flagPublishRemove = "publish-rm"
flagPublishAdd = "publish-add"
flagReplicas = "replicas"
flagReserveCPU = "reserve-cpu"
flagReserveMemory = "reserve-memory"
flagRestartCondition = "restart-condition"
flagRestartDelay = "restart-delay"
flagRestartMaxAttempts = "restart-max-attempts"
flagRestartWindow = "restart-window"
flagStopGracePeriod = "stop-grace-period"
flagUpdateDelay = "update-delay"
flagUpdateFailureAction = "update-failure-action"
flagUpdateParallelism = "update-parallelism"
flagUser = "user"
flagWorkdir = "workdir"
flagRegistryAuth = "with-registry-auth"
flagLogDriver = "log-driver"
flagLogOpt = "log-opt"
flagConstraint = "constraint"
flagConstraintRemove = "constraint-rm"
flagConstraintAdd = "constraint-add"
flagContainerLabel = "container-label"
flagContainerLabelRemove = "container-label-rm"
flagContainerLabelAdd = "container-label-add"
flagEndpointMode = "endpoint-mode"
flagEnv = "env"
flagEnvRemove = "env-rm"
flagEnvAdd = "env-add"
flagGroupAdd = "group-add"
flagGroupRemove = "group-rm"
flagLabel = "label"
flagLabelRemove = "label-rm"
flagLabelAdd = "label-add"
flagLimitCPU = "limit-cpu"
flagLimitMemory = "limit-memory"
flagMode = "mode"
flagMount = "mount"
flagMountRemove = "mount-rm"
flagMountAdd = "mount-add"
flagName = "name"
flagNetwork = "network"
flagPublish = "publish"
flagPublishRemove = "publish-rm"
flagPublishAdd = "publish-add"
flagReplicas = "replicas"
flagReserveCPU = "reserve-cpu"
flagReserveMemory = "reserve-memory"
flagRestartCondition = "restart-condition"
flagRestartDelay = "restart-delay"
flagRestartMaxAttempts = "restart-max-attempts"
flagRestartWindow = "restart-window"
flagStopGracePeriod = "stop-grace-period"
flagUpdateDelay = "update-delay"
flagUpdateFailureAction = "update-failure-action"
flagUpdateMaxFailureRatio = "update-max-failure-ratio"
flagUpdateMonitor = "update-monitor"
flagUpdateParallelism = "update-parallelism"
flagUser = "user"
flagWorkdir = "workdir"
flagRegistryAuth = "with-registry-auth"
flagLogDriver = "log-driver"
flagLogOpt = "log-opt"
)

View file

@ -36,6 +36,7 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command {
flags := cmd.Flags()
flags.String("image", "", "Service image tag")
flags.String("args", "", "Service command args")
flags.Bool("rollback", false, "Rollback to previous specification")
addServiceFlags(cmd, opts)
flags.Var(newListOptsVar(), flagEnvRemove, "Remove an environment variable")
@ -68,7 +69,20 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
return err
}
err = updateService(flags, &service.Spec)
rollback, err := flags.GetBool("rollback")
if err != nil {
return err
}
spec := &service.Spec
if rollback {
spec = service.PreviousSpec
if spec == nil {
return fmt.Errorf("service does not have a previous specification to roll back to")
}
}
err = updateService(flags, spec)
if err != nil {
return err
}
@ -81,15 +95,19 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
if sendAuth {
// Retrieve encoded auth token from the image reference
// This would be the old image if it didn't change in this update
image := service.Spec.TaskTemplate.ContainerSpec.Image
image := spec.TaskTemplate.ContainerSpec.Image
encodedAuth, err := command.RetrieveAuthTokenFromImage(ctx, dockerCli, image)
if err != nil {
return err
}
updateOpts.EncodedRegistryAuth = encodedAuth
} else if rollback {
updateOpts.RegistryAuthFrom = types.RegistryAuthFromPreviousSpec
} else {
updateOpts.RegistryAuthFrom = types.RegistryAuthFromSpec
}
err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, service.Spec, updateOpts)
err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, *spec, updateOpts)
if err != nil {
return err
}
@ -111,6 +129,12 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
}
}
updateFloat32 := func(flag string, field *float32) {
if flags.Changed(flag) {
*field, _ = flags.GetFloat32(flag)
}
}
updateDuration := func(flag string, field *time.Duration) {
if flags.Changed(flag) {
*field, _ = flags.GetDuration(flag)
@ -195,13 +219,15 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
return err
}
if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateFailureAction) {
if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio) {
if spec.UpdateConfig == nil {
spec.UpdateConfig = &swarm.UpdateConfig{}
}
updateUint64(flagUpdateParallelism, &spec.UpdateConfig.Parallelism)
updateDuration(flagUpdateDelay, &spec.UpdateConfig.Delay)
updateDuration(flagUpdateMonitor, &spec.UpdateConfig.Monitor)
updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
updateFloat32(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
}
if flags.Changed(flagEndpointMode) {

View file

@ -22,6 +22,10 @@ func (cli *Client) ServiceUpdate(ctx context.Context, serviceID string, version
}
}
if options.RegistryAuthFrom != "" {
query.Set("registryAuthFrom", options.RegistryAuthFrom)
}
query.Set("version", strconv.FormatUint(version.Index, 10))
resp, err := cli.post(ctx, "/services/"+serviceID+"/update", query, service, headers)

View file

@ -1809,9 +1809,12 @@ _docker_service_update() {
--restart-delay
--restart-max-attempts
--restart-window
--rollback
--stop-grace-period
--update-delay
--update-failure-action
--update-max-failure-ratio
--update-monitor
--update-parallelism
--user -u
--workdir -w

View file

@ -1108,6 +1108,8 @@ __docker_service_subcommand() {
"($help)--stop-grace-period=[Time to wait before force killing a container]:grace period: "
"($help)--update-delay=[Delay between updates]:delay: "
"($help)--update-failure-action=[Action on update failure]:mode:(pause continue)"
"($help)--update-max-failure-ratio=[Failure rate to tolerate during an update]:fraction: "
"($help)--update-monitor=[Duration after each task update to monitor for failure]:window: "
"($help)--update-parallelism=[Maximum number of tasks updated simultaneously]:number: "
"($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
"($help)--with-registry-auth[Send registry authentication details to swarm agents]"
@ -1185,6 +1187,7 @@ __docker_service_subcommand() {
"($help)*--container-label-rm=[Remove a container label by its key]:label: " \
"($help)*--group-rm=[Remove previously added user groups from the container]:group:_groups" \
"($help)--image=[Service image tag]:image:__docker_repositories" \
"($help)--rollback[Rollback to previous specification]" \
"($help -)1:service:__docker_complete_services" && ret=0
;;
(help)

View file

@ -913,7 +913,7 @@ func (c *Cluster) GetService(input string) (types.Service, error) {
}
// UpdateService updates existing service to match new properties.
func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error {
c.RLock()
defer c.RUnlock()
@ -948,7 +948,18 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
} else {
// this is needed because if the encodedAuth isn't being updated then we
// shouldn't lose it, and continue to use the one that was already present
ctnr := currentService.Spec.Task.GetContainer()
var ctnr *swarmapi.ContainerSpec
switch registryAuthFrom {
case apitypes.RegistryAuthFromSpec, "":
ctnr = currentService.Spec.Task.GetContainer()
case apitypes.RegistryAuthFromPreviousSpec:
if currentService.PreviousSpec == nil {
return fmt.Errorf("service does not have a previous spec")
}
ctnr = currentService.PreviousSpec.Task.GetContainer()
default:
return fmt.Errorf("unsupported registryAuthFromValue")
}
if ctnr == nil {
return fmt.Errorf("service does not use container tasks")
}

View file

@ -12,35 +12,11 @@ import (
// ServiceFromGRPC converts a grpc Service to a Service.
func ServiceFromGRPC(s swarmapi.Service) types.Service {
spec := s.Spec
containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
for _, n := range spec.Networks {
serviceNetworks = append(serviceNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
}
taskNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Task.Networks))
for _, n := range spec.Task.Networks {
taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
}
service := types.Service{
ID: s.ID,
ID: s.ID,
Spec: *serviceSpecFromGRPC(&s.Spec),
PreviousSpec: serviceSpecFromGRPC(s.PreviousSpec),
Spec: types.ServiceSpec{
TaskTemplate: types.TaskSpec{
ContainerSpec: containerSpecFromGRPC(containerConfig),
Resources: resourcesFromGRPC(s.Spec.Task.Resources),
RestartPolicy: restartPolicyFromGRPC(s.Spec.Task.Restart),
Placement: placementFromGRPC(s.Spec.Task.Placement),
LogDriver: driverFromGRPC(s.Spec.Task.LogDriver),
Networks: taskNetworks,
},
Networks: serviceNetworks,
EndpointSpec: endpointSpecFromGRPC(s.Spec.Endpoint),
},
Endpoint: endpointFromGRPC(s.Endpoint),
}
@ -49,36 +25,6 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)
// Annotations
service.Spec.Name = s.Spec.Annotations.Name
service.Spec.Labels = s.Spec.Annotations.Labels
// UpdateConfig
if s.Spec.Update != nil {
service.Spec.UpdateConfig = &types.UpdateConfig{
Parallelism: s.Spec.Update.Parallelism,
}
service.Spec.UpdateConfig.Delay, _ = ptypes.Duration(&s.Spec.Update.Delay)
switch s.Spec.Update.FailureAction {
case swarmapi.UpdateConfig_PAUSE:
service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
case swarmapi.UpdateConfig_CONTINUE:
service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
}
}
// Mode
switch t := s.Spec.GetMode().(type) {
case *swarmapi.ServiceSpec_Global:
service.Spec.Mode.Global = &types.GlobalService{}
case *swarmapi.ServiceSpec_Replicated:
service.Spec.Mode.Replicated = &types.ReplicatedService{
Replicas: &t.Replicated.Replicas,
}
}
// UpdateStatus
service.UpdateStatus = types.UpdateStatus{}
if s.UpdateStatus != nil {
@ -99,6 +45,74 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
return service
}
func serviceSpecFromGRPC(spec *swarmapi.ServiceSpec) *types.ServiceSpec {
if spec == nil {
return nil
}
serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
for _, n := range spec.Networks {
serviceNetworks = append(serviceNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
}
taskNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Task.Networks))
for _, n := range spec.Task.Networks {
taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
}
containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
convertedSpec := &types.ServiceSpec{
Annotations: types.Annotations{
Name: spec.Annotations.Name,
Labels: spec.Annotations.Labels,
},
TaskTemplate: types.TaskSpec{
ContainerSpec: containerSpecFromGRPC(containerConfig),
Resources: resourcesFromGRPC(spec.Task.Resources),
RestartPolicy: restartPolicyFromGRPC(spec.Task.Restart),
Placement: placementFromGRPC(spec.Task.Placement),
LogDriver: driverFromGRPC(spec.Task.LogDriver),
Networks: taskNetworks,
},
Networks: serviceNetworks,
EndpointSpec: endpointSpecFromGRPC(spec.Endpoint),
}
// UpdateConfig
if spec.Update != nil {
convertedSpec.UpdateConfig = &types.UpdateConfig{
Parallelism: spec.Update.Parallelism,
MaxFailureRatio: spec.Update.MaxFailureRatio,
}
convertedSpec.UpdateConfig.Delay, _ = ptypes.Duration(&spec.Update.Delay)
if spec.Update.Monitor != nil {
convertedSpec.UpdateConfig.Monitor, _ = ptypes.Duration(spec.Update.Monitor)
}
switch spec.Update.FailureAction {
case swarmapi.UpdateConfig_PAUSE:
convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
case swarmapi.UpdateConfig_CONTINUE:
convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
}
}
// Mode
switch t := spec.GetMode().(type) {
case *swarmapi.ServiceSpec_Global:
convertedSpec.Mode.Global = &types.GlobalService{}
case *swarmapi.ServiceSpec_Replicated:
convertedSpec.Mode.Replicated = &types.ReplicatedService{
Replicas: &t.Replicated.Replicas,
}
}
return convertedSpec
}
// ServiceSpecToGRPC converts a ServiceSpec to a grpc ServiceSpec.
func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
name := s.Name
@ -158,9 +172,13 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
return swarmapi.ServiceSpec{}, fmt.Errorf("unrecongized update failure action %s", s.UpdateConfig.FailureAction)
}
spec.Update = &swarmapi.UpdateConfig{
Parallelism: s.UpdateConfig.Parallelism,
Delay: *ptypes.DurationProto(s.UpdateConfig.Delay),
FailureAction: failureAction,
Parallelism: s.UpdateConfig.Parallelism,
Delay: *ptypes.DurationProto(s.UpdateConfig.Delay),
FailureAction: failureAction,
MaxFailureRatio: s.UpdateConfig.MaxFailureRatio,
}
if s.UpdateConfig.Monitor != 0 {
spec.Update.Monitor = ptypes.DurationProto(s.UpdateConfig.Monitor)
}
}

View file

@ -129,6 +129,7 @@ This section lists each version from latest to oldest. Each listing includes a
* `GET /containers/json` now supports a `is-task` filter to filter
containers that are tasks (part of a service in swarm mode).
* `POST /containers/create` now takes `StopTimeout` field.
* `POST /services/create` and `POST /services/(id or name)/update` now accept `Monitor` and `MaxFailureRatio` parameters, which control the response to failures during service updates.
### v1.24 API changes

View file

@ -4877,7 +4877,9 @@ List services
},
"UpdateConfig": {
"Parallelism": 1,
"FailureAction": "pause"
"FailureAction": "pause",
"Monitor": 15000000000,
"MaxFailureRatio": 0.15
},
"EndpointSpec": {
"Mode": "vip",
@ -5077,8 +5079,8 @@ image](#create-an-image) section for more details.
- **RestartPolicy** Specification for the restart policy which applies to containers created
as part of this service.
- **Condition** Condition for restart (`none`, `on-failure`, or `any`).
- **Delay** Delay between restart attempts.
- **Attempts** Maximum attempts to restart a given container before giving up (default value
- **Delay** Delay between restart attempts, in nanoseconds.
- **MaxAttempts** Maximum attempts to restart a given container before giving up (default value
is 0, which is ignored).
- **Window** Windows is the time window used to evaluate the restart policy (default value is
0, which is unbounded).
@ -5087,9 +5089,12 @@ image](#create-an-image) section for more details.
- **UpdateConfig** Specification for the update strategy of the service.
- **Parallelism** Maximum number of tasks to be updated in one iteration (0 means unlimited
parallelism).
- **Delay** Amount of time between updates.
- **Delay** Amount of time between updates, in nanoseconds.
- **FailureAction** - Action to take if an updated task fails to run, or stops running during the
update. Values are `continue` and `pause`.
- **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
- **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
- **Networks** Array of network names or IDs to attach the service to.
- **EndpointSpec** Properties that can be configured to access and load balance a service.
- **Mode** The mode of resolution to use for internal load balancing
@ -5259,7 +5264,9 @@ image](#create-an-image) section for more details.
}
},
"UpdateConfig": {
"Parallelism": 1
"Parallelism": 1,
"Monitor": 15000000000,
"MaxFailureRatio": 0.15
},
"EndpointSpec": {
"Mode": "vip"
@ -5314,7 +5321,7 @@ image](#create-an-image) section for more details.
- **RestartPolicy** Specification for the restart policy which applies to containers created
as part of this service.
- **Condition** Condition for restart (`none`, `on-failure`, or `any`).
- **Delay** Delay between restart attempts.
- **Delay** Delay between restart attempts, in nanoseconds.
- **MaxAttempts** Maximum attempts to restart a given container before giving up (default value
is 0, which is ignored).
- **Window** Windows is the time window used to evaluate the restart policy (default value is
@ -5324,7 +5331,12 @@ image](#create-an-image) section for more details.
- **UpdateConfig** Specification for the update strategy of the service.
- **Parallelism** Maximum number of tasks to be updated in one iteration (0 means unlimited
parallelism).
- **Delay** Amount of time between updates.
- **Delay** Amount of time between updates, in nanoseconds.
- **FailureAction** - Action to take if an updated task fails to run, or stops running during the
update. Values are `continue` and `pause`.
- **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
- **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
- **Networks** Array of network names or IDs to attach the service to.
- **EndpointSpec** Properties that can be configured to access and load balance a service.
- **Mode** The mode of resolution to use for internal load balancing
@ -5338,6 +5350,10 @@ image](#create-an-image) section for more details.
- **version** The version number of the service object being updated. This is
required to avoid conflicting writes.
- **registryAuthFrom** - If the X-Registry-Auth header is not specified, this
parameter indicates where to find registry authorization credentials. The
valid values are `spec` and `previous-spec`. If unspecified, the default is
`spec`.
**Request Headers**:

View file

@ -12,36 +12,38 @@ Usage: docker service create [OPTIONS] IMAGE [COMMAND] [ARG...]
Create a new service
Options:
--constraint value Placement constraints (default [])
--container-label value Service container labels (default [])
--endpoint-mode string Endpoint mode (vip or dnsrr)
-e, --env value Set environment variables (default [])
--group-add value Add additional user groups to the container (default [])
--help Print usage
-l, --label value Service labels (default [])
--limit-cpu value Limit CPUs (default 0.000)
--limit-memory value Limit Memory (default 0 B)
--log-driver string Logging driver for service
--log-opt value Logging driver options (default [])
--mode string Service mode (replicated or global) (default "replicated")
--mount value Attach a mount to the service
--name string Service name
--network value Network attachments (default [])
-p, --publish value Publish a port as a node port (default [])
--replicas value Number of tasks (default none)
--reserve-cpu value Reserve CPUs (default 0.000)
--reserve-memory value Reserve Memory (default 0 B)
--restart-condition string Restart when condition is met (none, on-failure, or any)
--restart-delay value Delay between restart attempts (default none)
--restart-max-attempts value Maximum number of restarts before giving up (default none)
--restart-window value Window used to evaluate the restart policy (default none)
--stop-grace-period value Time to wait before force killing a container (default none)
--update-delay duration Delay between updates
--update-failure-action string Action on update failure (pause|continue) (default "pause")
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to Swarm agents
-w, --workdir string Working directory inside the container
--constraint value Placement constraints (default [])
--container-label value Service container labels (default [])
--endpoint-mode string Endpoint mode (vip or dnsrr)
-e, --env value Set environment variables (default [])
--group-add value Add additional user groups to the container (default [])
--help Print usage
-l, --label value Service labels (default [])
--limit-cpu value Limit CPUs (default 0.000)
--limit-memory value Limit Memory (default 0 B)
--log-driver string Logging driver for service
--log-opt value Logging driver options (default [])
--mode string Service mode (replicated or global) (default "replicated")
--mount value Attach a mount to the service
--name string Service name
--network value Network attachments (default [])
-p, --publish value Publish a port as a node port (default [])
--replicas value Number of tasks (default none)
--reserve-cpu value Reserve CPUs (default 0.000)
--reserve-memory value Reserve Memory (default 0 B)
--restart-condition string Restart when condition is met (none, on-failure, or any)
--restart-delay value Delay between restart attempts (default none)
--restart-max-attempts value Maximum number of restarts before giving up (default none)
--restart-window value Window used to evaluate the restart policy (default none)
--stop-grace-period value Time to wait before force killing a container (default none)
--update-delay duration Delay between updates
--update-failure-action string Action on update failure (pause|continue) (default "pause")
--update-max-failure-ratio value Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to Swarm agents
-w, --workdir string Working directory inside the container
```
Creates a service as described by the specified parameters. You must run this

View file

@ -12,43 +12,46 @@ Usage: docker service update [OPTIONS] SERVICE
Update a service
Options:
--args string Service command args
--constraint-add value Add or update placement constraints (default [])
--constraint-rm value Remove a constraint (default [])
--container-label-add value Add or update container labels (default [])
--container-label-rm value Remove a container label by its key (default [])
--endpoint-mode string Endpoint mode (vip or dnsrr)
--env-add value Add or update environment variables (default [])
--env-rm value Remove an environment variable (default [])
--group-add value Add additional user groups to the container (default [])
--group-rm value Remove previously added user groups from the container (default [])
--help Print usage
--image string Service image tag
--label-add value Add or update service labels (default [])
--label-rm value Remove a label by its key (default [])
--limit-cpu value Limit CPUs (default 0.000)
--limit-memory value Limit Memory (default 0 B)
--log-driver string Logging driver for service
--log-opt value Logging driver options (default [])
--mount-add value Add or update a mount on a service
--mount-rm value Remove a mount by its target path (default [])
--name string Service name
--publish-add value Add or update a published port (default [])
--publish-rm value Remove a published port by its target port (default [])
--replicas value Number of tasks (default none)
--reserve-cpu value Reserve CPUs (default 0.000)
--reserve-memory value Reserve Memory (default 0 B)
--restart-condition string Restart when condition is met (none, on-failure, or any)
--restart-delay value Delay between restart attempts (default none)
--restart-max-attempts value Maximum number of restarts before giving up (default none)
--restart-window value Window used to evaluate the restart policy (default none)
--stop-grace-period value Time to wait before force killing a container (default none)
--update-delay duration Delay between updates
--update-failure-action string Action on update failure (pause|continue) (default "pause")
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to Swarm agents
-w, --workdir string Working directory inside the container
--args string Service command args
--constraint-add value Add or update placement constraints (default [])
--constraint-rm value Remove a constraint (default [])
--container-label-add value Add or update container labels (default [])
--container-label-rm value Remove a container label by its key (default [])
--endpoint-mode string Endpoint mode (vip or dnsrr)
--env-add value Add or update environment variables (default [])
--env-rm value Remove an environment variable (default [])
--group-add value Add additional user groups to the container (default [])
--group-rm value Remove previously added user groups from the container (default [])
--help Print usage
--image string Service image tag
--label-add value Add or update service labels (default [])
--label-rm value Remove a label by its key (default [])
--limit-cpu value Limit CPUs (default 0.000)
--limit-memory value Limit Memory (default 0 B)
--log-driver string Logging driver for service
--log-opt value Logging driver options (default [])
--mount-add value Add or update a mount on a service
--mount-rm value Remove a mount by its target path (default [])
--name string Service name
--publish-add value Add or update a published port (default [])
--publish-rm value Remove a published port by its target port (default [])
--replicas value Number of tasks (default none)
--reserve-cpu value Reserve CPUs (default 0.000)
--reserve-memory value Reserve Memory (default 0 B)
--restart-condition string Restart when condition is met (none, on-failure, or any)
--restart-delay value Delay between restart attempts (default none)
--restart-max-attempts value Maximum number of restarts before giving up (default none)
--restart-window value Window used to evaluate the restart policy (default none)
--rollback Rollback to previous specification
--stop-grace-period value Time to wait before force killing a container (default none)
--update-delay duration Delay between updates
--update-failure-action string Action on update failure (pause|continue) (default "pause")
--update-max-failure-ratio value Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to Swarm agents
-w, --workdir string Working directory inside the container
```
Updates a service as described by the specified parameters. This command has to be run targeting a manager node.

View file

@ -139,8 +139,8 @@ func (d *SwarmDaemon) getServiceTasks(c *check.C, service string) []swarm.Task {
return tasks
}
func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
return func(*check.C) (interface{}, check.CommentInterface) {
func (d *SwarmDaemon) checkServiceRunningTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
return func(c *check.C) (interface{}, check.CommentInterface) {
tasks := d.getServiceTasks(c, service)
var runningCount int
for _, task := range tasks {
@ -152,8 +152,15 @@ func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(
}
}
func (d *SwarmDaemon) checkServiceTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
return func(*check.C) (interface{}, check.CommentInterface) {
func (d *SwarmDaemon) checkServiceUpdateState(service string) func(*check.C) (interface{}, check.CommentInterface) {
return func(c *check.C) (interface{}, check.CommentInterface) {
service := d.getService(c, service)
return service.UpdateStatus.State, nil
}
}
func (d *SwarmDaemon) checkServiceTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
return func(c *check.C) (interface{}, check.CommentInterface) {
tasks := d.getServiceTasks(c, service)
return len(tasks), nil
}

View file

@ -310,6 +310,63 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
// 3nd batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances})
// Roll back to the previous version. This uses the CLI because
// rollback is a client-side operation.
out, err := daemons[0].Cmd("service", "update", "--rollback", id)
c.Assert(err, checker.IsNil, check.Commentf(out))
// first batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances - parallelism, image1: parallelism})
// 2nd batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances - 2*parallelism, image1: 2 * parallelism})
// 3nd batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
}
func (s *DockerSwarmSuite) TestApiSwarmServicesFailedUpdate(c *check.C) {
const nodeCount = 3
var daemons [nodeCount]*SwarmDaemon
for i := 0; i < nodeCount; i++ {
daemons[i] = s.AddDaemon(c, true, i == 0)
}
// wait for nodes ready
waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
// service image at start
image1 := "busybox:latest"
// target image in update
image2 := "busybox:badtag"
// create service
instances := 5
id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
// issue service update
service := daemons[0].getService(c, id)
daemons[0].updateService(c, service, setImage(image2), setFailureAction(swarm.UpdateFailureActionPause), setMaxFailureRatio(0.25), setParallelism(1))
// should update 2 tasks and then pause
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceUpdateState(id), checker.Equals, swarm.UpdateStatePaused)
v, _ := daemons[0].checkServiceRunningTasks(id)(c)
c.Assert(v, checker.Equals, instances-2)
// Roll back to the previous version. This uses the CLI because
// rollback is a client-side operation.
out, err := daemons[0].Cmd("service", "update", "--rollback", id)
c.Assert(err, checker.IsNil, check.Commentf(out))
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
}
func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
@ -326,7 +383,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
instances := 3
id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
// validate tasks are running on worker nodes
tasks := daemons[0].getServiceTasks(c, id)
for _, task := range tasks {
@ -340,7 +397,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
constraints = []string{"node.role!=worker"}
id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
tasks = daemons[0].getServiceTasks(c, id)
// validate tasks are running on manager nodes
for _, task := range tasks {
@ -354,7 +411,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
constraints = []string{"node.role==nosuchrole"}
id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks created
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
// let scheduler try
time.Sleep(250 * time.Millisecond)
// validate tasks are not assigned to any node
@ -394,7 +451,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
constraints := []string{"node.labels.security==high"}
id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
tasks := daemons[0].getServiceTasks(c, id)
// validate all tasks are running on nodes[0]
for _, task := range tasks {
@ -407,7 +464,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
constraints = []string{"node.labels.security!=high"}
id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
tasks = daemons[0].getServiceTasks(c, id)
// validate all tasks are NOT running on nodes[0]
for _, task := range tasks {
@ -419,7 +476,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
constraints = []string{"node.labels.security==medium"}
id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks created
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
// let scheduler try
time.Sleep(250 * time.Millisecond)
tasks = daemons[0].getServiceTasks(c, id)
@ -437,7 +494,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
}
id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
// wait for tasks created
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
// let scheduler try
time.Sleep(250 * time.Millisecond)
tasks = daemons[0].getServiceTasks(c, id)
@ -452,7 +509,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
}
})
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
tasks = daemons[0].getServiceTasks(c, id)
for _, task := range tasks {
c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
@ -1022,6 +1079,24 @@ func setImage(image string) serviceConstructor {
}
}
func setFailureAction(failureAction string) serviceConstructor {
return func(s *swarm.Service) {
s.Spec.UpdateConfig.FailureAction = failureAction
}
}
func setMaxFailureRatio(maxFailureRatio float32) serviceConstructor {
return func(s *swarm.Service) {
s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
}
}
func setParallelism(parallelism uint64) serviceConstructor {
return func(s *swarm.Service) {
s.Spec.UpdateConfig.Parallelism = parallelism
}
}
func setConstraints(constraints []string) serviceConstructor {
return func(s *swarm.Service) {
if s.Spec.TaskTemplate.Placement == nil {

View file

@ -349,7 +349,7 @@ func (s *DockerSwarmSuite) TestPsListContainersFilterIsTask(c *check.C) {
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
// make sure task has been deployed.
waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(c, name), checker.Equals, 1)
waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(name), checker.Equals, 1)
// Filter non-tasks
out, err = d.Cmd("ps", "-a", "-q", "--filter=is-task=false")