From 17288c611a4f3f75ecb3bbb4533820b1836c55a6 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Thu, 19 Jan 2017 15:27:37 -0800 Subject: [PATCH] Topology-aware scheduling This adds support for placement preferences in Swarm services. - Convert PlacementPreferences between GRPC API and HTTP API - Add --placement-pref, --placement-pref-add and --placement-pref-rm to CLI - Add support for placement preferences in service inspect --pretty - Add integration test Signed-off-by: Aaron Lehmann --- api/swagger.yaml | 12 ++++ api/types/swarm/task.go | 16 ++++- cli/command/formatter/service.go | 18 ++++- cli/command/service/create.go | 2 + cli/command/service/opts.go | 55 ++++++++++++-- cli/command/service/update.go | 44 +++++++++++- cli/command/service/update_test.go | 30 +++++++- daemon/cluster/convert/service.go | 32 +++++++-- docs/reference/commandline/service_create.md | 72 +++++++++++++++++++ docs/reference/commandline/service_update.md | 2 + .../docker_api_swarm_service_test.go | 42 +++++++++++ integration-cli/docker_api_swarm_test.go | 9 +++ 12 files changed, 319 insertions(+), 15 deletions(-) diff --git a/api/swagger.yaml b/api/swagger.yaml index 2efe55cc84..8dee7fd2c8 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -2047,6 +2047,18 @@ definitions: type: "array" items: type: "string" + Preferences: + description: "Preferences provide a way to make the scheduler aware of factors such as topology. They are provided in order from highest to lowest precedence." + type: "array" + items: + type: "object" + properties: + Spread: + type: "object" + properties: + SpreadDescriptor: + description: "label descriptor, such as engine.labels.az" + type: "string" ForceUpdate: description: "A counter that triggers an update even if no relevant parameters have been changed." type: "integer" diff --git a/api/types/swarm/task.go b/api/types/swarm/task.go index ace12cc89f..1769b6082b 100644 --- a/api/types/swarm/task.go +++ b/api/types/swarm/task.go @@ -81,7 +81,21 @@ type ResourceRequirements struct { // Placement represents orchestration parameters. type Placement struct { - Constraints []string `json:",omitempty"` + Constraints []string `json:",omitempty"` + Preferences []PlacementPreference `json:",omitempty"` +} + +// PlacementPreference provides a way to make the scheduler aware of factors +// such as topology. +type PlacementPreference struct { + Spread *SpreadOver +} + +// SpreadOver is a scheduling preference that instructs the scheduler to spread +// tasks evenly over groups of nodes identified by labels. +type SpreadOver struct { + // label descriptor, such as engine.labels.az + SpreadDescriptor string } // RestartPolicy represents the restart policy. diff --git a/cli/command/formatter/service.go b/cli/command/formatter/service.go index b13c5ee608..b8b476dd66 100644 --- a/cli/command/formatter/service.go +++ b/cli/command/formatter/service.go @@ -39,9 +39,12 @@ UpdateStatus: Message: {{ .UpdateStatusMessage }} {{- end }} Placement: -{{- if .TaskPlacementConstraints -}} +{{- if .TaskPlacementConstraints }} Constraints: {{ .TaskPlacementConstraints }} {{- end }} +{{- if .TaskPlacementPreferences }} + Preferences: {{ .TaskPlacementPreferences }} +{{- end }} {{- if .HasUpdateConfig }} UpdateConfig: Parallelism: {{ .UpdateParallelism }} @@ -211,6 +214,19 @@ func (ctx *serviceInspectContext) TaskPlacementConstraints() []string { return nil } +func (ctx *serviceInspectContext) TaskPlacementPreferences() []string { + if ctx.Service.Spec.TaskTemplate.Placement == nil { + return nil + } + var strings []string + for _, pref := range ctx.Service.Spec.TaskTemplate.Placement.Preferences { + if pref.Spread != nil { + strings = append(strings, "spread="+pref.Spread.SpreadDescriptor) + } + } + return strings +} + func (ctx *serviceInspectContext) HasUpdateConfig() bool { return ctx.Service.Spec.UpdateConfig != nil } diff --git a/cli/command/service/create.go b/cli/command/service/create.go index ab90868424..c2eb81727a 100644 --- a/cli/command/service/create.go +++ b/cli/command/service/create.go @@ -37,6 +37,8 @@ func newCreateCommand(dockerCli *command.DockerCli) *cobra.Command { flags.Var(&opts.envFile, flagEnvFile, "Read in a file of environment variables") flags.Var(&opts.mounts, flagMount, "Attach a filesystem mount to the service") flags.Var(&opts.constraints, flagConstraint, "Placement constraints") + flags.Var(&opts.placementPrefs, flagPlacementPref, "Add a placement preference") + flags.SetAnnotation(flagPlacementPref, "version", []string{"1.27"}) flags.Var(&opts.networks, flagNetwork, "Network attachments") flags.Var(&opts.secrets, flagSecret, "Specify secrets to expose to the service") flags.SetAnnotation(flagSecret, "version", []string{"1.25"}) diff --git a/cli/command/service/opts.go b/cli/command/service/opts.go index d8618e73ca..060f7017fa 100644 --- a/cli/command/service/opts.go +++ b/cli/command/service/opts.go @@ -1,6 +1,7 @@ package service import ( + "errors" "fmt" "strconv" "strings" @@ -117,6 +118,45 @@ func (f *floatValue) Value() float32 { return float32(*f) } +// placementPrefOpts holds a list of placement preferences. +type placementPrefOpts struct { + prefs []swarm.PlacementPreference + strings []string +} + +func (opts *placementPrefOpts) String() string { + if len(opts.strings) == 0 { + return "" + } + return fmt.Sprintf("%v", opts.strings) +} + +// Set validates the input value and adds it to the internal slices. +// Note: in the future strategies other than "spread", may be supported, +// as well as additional comma-separated options. +func (opts *placementPrefOpts) Set(value string) error { + fields := strings.Split(value, "=") + if len(fields) != 2 { + return errors.New(`placement preference must be of the format "="`) + } + if fields[0] != "spread" { + return fmt.Errorf("unsupported placement preference %s (only spread is supported)", fields[0]) + } + + opts.prefs = append(opts.prefs, swarm.PlacementPreference{ + Spread: &swarm.SpreadOver{ + SpreadDescriptor: fields[1], + }, + }) + opts.strings = append(opts.strings, value) + return nil +} + +// Type returns a string name for this Option type +func (opts *placementPrefOpts) Type() string { + return "pref" +} + type updateOptions struct { parallelism uint64 delay time.Duration @@ -283,11 +323,12 @@ type serviceOptions struct { replicas Uint64Opt mode string - restartPolicy restartPolicyOptions - constraints opts.ListOpts - update updateOptions - networks opts.ListOpts - endpoint endpointOptions + restartPolicy restartPolicyOptions + constraints opts.ListOpts + placementPrefs placementPrefOpts + update updateOptions + networks opts.ListOpts + endpoint endpointOptions registryAuth bool @@ -398,6 +439,7 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) { RestartPolicy: opts.restartPolicy.ToRestartPolicy(), Placement: &swarm.Placement{ Constraints: opts.constraints.GetAll(), + Preferences: opts.placementPrefs.prefs, }, LogDriver: opts.logDriver.toLogDriver(), }, @@ -473,6 +515,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) { } const ( + flagPlacementPref = "placement-pref" + flagPlacementPrefAdd = "placement-pref-add" + flagPlacementPrefRemove = "placement-pref-rm" flagConstraint = "constraint" flagConstraintRemove = "constraint-rm" flagConstraintAdd = "constraint-add" diff --git a/cli/command/service/update.go b/cli/command/service/update.go index 7f461c90a9..1300e5e381 100644 --- a/cli/command/service/update.go +++ b/cli/command/service/update.go @@ -69,6 +69,10 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command { flags.SetAnnotation(flagSecretAdd, "version", []string{"1.25"}) flags.Var(&serviceOpts.mounts, flagMountAdd, "Add or update a mount on a service") flags.Var(&serviceOpts.constraints, flagConstraintAdd, "Add or update a placement constraint") + flags.Var(&serviceOpts.placementPrefs, flagPlacementPrefAdd, "Add a placement preference") + flags.SetAnnotation(flagPlacementPrefAdd, "version", []string{"1.27"}) + flags.Var(&placementPrefOpts{}, flagPlacementPrefRemove, "Remove a placement preference") + flags.SetAnnotation(flagPlacementPrefRemove, "version", []string{"1.27"}) flags.Var(&serviceOpts.endpoint.publishPorts, flagPublishAdd, "Add or update a published port") flags.Var(&serviceOpts.groups, flagGroupAdd, "Add an additional supplementary user group to the container") flags.SetAnnotation(flagGroupAdd, "version", []string{"1.25"}) @@ -260,7 +264,14 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error { if task.Placement == nil { task.Placement = &swarm.Placement{} } - updatePlacement(flags, task.Placement) + updatePlacementConstraints(flags, task.Placement) + } + + if anyChanged(flags, flagPlacementPrefAdd, flagPlacementPrefRemove) { + if task.Placement == nil { + task.Placement = &swarm.Placement{} + } + updatePlacementPreferences(flags, task.Placement) } if err := updateReplicas(flags, &spec.Mode); err != nil { @@ -372,7 +383,7 @@ func anyChanged(flags *pflag.FlagSet, fields ...string) bool { return false } -func updatePlacement(flags *pflag.FlagSet, placement *swarm.Placement) { +func updatePlacementConstraints(flags *pflag.FlagSet, placement *swarm.Placement) { if flags.Changed(flagConstraintAdd) { values := flags.Lookup(flagConstraintAdd).Value.(*opts.ListOpts).GetAll() placement.Constraints = append(placement.Constraints, values...) @@ -391,6 +402,35 @@ func updatePlacement(flags *pflag.FlagSet, placement *swarm.Placement) { placement.Constraints = newConstraints } +func updatePlacementPreferences(flags *pflag.FlagSet, placement *swarm.Placement) { + var newPrefs []swarm.PlacementPreference + + if flags.Changed(flagPlacementPrefRemove) { + for _, existing := range placement.Preferences { + removed := false + for _, removal := range flags.Lookup(flagPlacementPrefRemove).Value.(*placementPrefOpts).prefs { + if removal.Spread != nil && existing.Spread != nil && removal.Spread.SpreadDescriptor == existing.Spread.SpreadDescriptor { + removed = true + break + } + } + if !removed { + newPrefs = append(newPrefs, existing) + } + } + } else { + newPrefs = placement.Preferences + } + + if flags.Changed(flagPlacementPrefAdd) { + for _, addition := range flags.Lookup(flagPlacementPrefAdd).Value.(*placementPrefOpts).prefs { + newPrefs = append(newPrefs, addition) + } + } + + placement.Preferences = newPrefs +} + func updateContainerLabels(flags *pflag.FlagSet, field *map[string]string) { if flags.Changed(flagContainerLabelAdd) { if *field == nil { diff --git a/cli/command/service/update_test.go b/cli/command/service/update_test.go index f2887e229d..422ab33dac 100644 --- a/cli/command/service/update_test.go +++ b/cli/command/service/update_test.go @@ -51,7 +51,7 @@ func TestUpdateLabelsRemoveALabelThatDoesNotExist(t *testing.T) { assert.Equal(t, len(labels), 1) } -func TestUpdatePlacement(t *testing.T) { +func TestUpdatePlacementConstraints(t *testing.T) { flags := newUpdateCommand(nil).Flags() flags.Set("constraint-add", "node=toadd") flags.Set("constraint-rm", "node!=toremove") @@ -60,12 +60,38 @@ func TestUpdatePlacement(t *testing.T) { Constraints: []string{"node!=toremove", "container=tokeep"}, } - updatePlacement(flags, placement) + updatePlacementConstraints(flags, placement) assert.Equal(t, len(placement.Constraints), 2) assert.Equal(t, placement.Constraints[0], "container=tokeep") assert.Equal(t, placement.Constraints[1], "node=toadd") } +func TestUpdatePlacementPrefs(t *testing.T) { + flags := newUpdateCommand(nil).Flags() + flags.Set("placement-pref-add", "spread=node.labels.dc") + flags.Set("placement-pref-rm", "spread=node.labels.rack") + + placement := &swarm.Placement{ + Preferences: []swarm.PlacementPreference{ + { + Spread: &swarm.SpreadOver{ + SpreadDescriptor: "node.labels.rack", + }, + }, + { + Spread: &swarm.SpreadOver{ + SpreadDescriptor: "node.labels.row", + }, + }, + }, + } + + updatePlacementPreferences(flags, placement) + assert.Equal(t, len(placement.Preferences), 2) + assert.Equal(t, placement.Preferences[0].Spread.SpreadDescriptor, "node.labels.row") + assert.Equal(t, placement.Preferences[1].Spread.SpreadDescriptor, "node.labels.dc") +} + func TestUpdateEnvironment(t *testing.T) { flags := newUpdateCommand(nil).Flags() flags.Set("env-add", "toadd=newenv") diff --git a/daemon/cluster/convert/service.go b/daemon/cluster/convert/service.go index a22e0b012a..9655be88c5 100644 --- a/daemon/cluster/convert/service.go +++ b/daemon/cluster/convert/service.go @@ -162,8 +162,21 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) { spec.Task.Restart = restartPolicy if s.TaskTemplate.Placement != nil { + var preferences []*swarmapi.PlacementPreference + for _, pref := range s.TaskTemplate.Placement.Preferences { + if pref.Spread != nil { + preferences = append(preferences, &swarmapi.PlacementPreference{ + Preference: &swarmapi.PlacementPreference_Spread{ + Spread: &swarmapi.SpreadOver{ + SpreadDescriptor: pref.Spread.SpreadDescriptor, + }, + }, + }) + } + } spec.Task.Placement = &swarmapi.Placement{ Constraints: s.TaskTemplate.Placement.Constraints, + Preferences: preferences, } } @@ -351,10 +364,21 @@ func restartPolicyToGRPC(p *types.RestartPolicy) (*swarmapi.RestartPolicy, error } func placementFromGRPC(p *swarmapi.Placement) *types.Placement { - var r *types.Placement - if p != nil { - r = &types.Placement{} - r.Constraints = p.Constraints + if p == nil { + return nil + } + r := &types.Placement{ + Constraints: p.Constraints, + } + + for _, pref := range p.Preferences { + if spread := pref.GetSpread(); spread != nil { + r.Preferences = append(r.Preferences, types.PlacementPreference{ + Spread: &types.SpreadOver{ + SpreadDescriptor: spread.SpreadDescriptor, + }, + }) + } } return r diff --git a/docs/reference/commandline/service_create.md b/docs/reference/commandline/service_create.md index 6ee3bb535a..480e99bbe5 100644 --- a/docs/reference/commandline/service_create.md +++ b/docs/reference/commandline/service_create.md @@ -47,6 +47,7 @@ Options: --name string Service name --network list Network attachments (default []) --no-healthcheck Disable any container-specified HEALTHCHECK + --placement-pref pref Add a placement preference -p, --publish port Publish a port as a node port --read-only Mount the container's root filesystem as read only --replicas uint Number of tasks @@ -435,6 +436,77 @@ $ docker service create \ redis:3.0.6 ``` +### Specify service placement preferences (--placement-pref) + +You can set up the service to divide tasks evenly over different categories of +nodes. One example of where this can be useful is to balance tasks over a set +of datacenters or availability zones. The example below illustrates this: + +```bash +$ docker service create \ + --replicas 9 \ + --name redis_2 \ + --placement-pref 'spread=node.labels.datacenter' \ + redis:3.0.6 +``` + +This uses `--placement-pref` with a `spread` strategy (currently the only +supported strategy) to spread tasks evenly over the values of the `datacenter` +node label. In this example, we assume that every node has a `datacenter` node +label attached to it. If there are three different values of this label among +nodes in the swarm, one third of the tasks will be placed on the nodes +associated with each value. This is true even if there are more nodes with one +value than another. For example, consider the following set of nodes: + +- Three nodes with `node.labels.datacenter=east` +- Two nodes with `node.labels.datacenter=south` +- One node with `node.labels.datacenter=west` + +Since we are spreading over the values of the `datacenter` label and the +service has 9 replicas, 3 replicas will end up in each datacenter. There are +three nodes associated with the value `east`, so each one will get one of the +three replicas reserved for this value. There are two nodes with the value +`south`, and the three replicas for this value will be divided between them, +with one receiving two replicas and another receiving just one. Finally, `west` +has a single node that will get all three replicas reserved for `west`. + +If the nodes in one category (for example, those with +`node.labels.datacenter=south`) can't handle their fair share of tasks due to +constraints or resource limitations, the extra tasks will be assigned to other +nodes instead, if possible. + +Both engine labels and node labels are supported by placement preferences. The +example above uses a node label, because the label is referenced with +`node.labels.datacenter`. To spread over the values of an engine label, use +`--placement-pref spread=engine.labels.`. + +It is possible to add multiple placement preferences to a service. This +establishes a hierarchy of preferences, so that tasks are first divided over +one category, and then further divided over additional categories. One example +of where this may be useful is dividing tasks fairly between datacenters, and +then splitting the tasks within each datacenter over a choice of racks. To add +multiple placement preferences, specify the `--placement-pref` flag multiple +times. The order is significant, and the placement preferences will be applied +in the order given when making scheduling decisions. + +The following example sets up a service with multiple placement preferences. +Tasks are spread first over the various datacenters, and then over racks +(as indicated by the respective labels): + +```bash +$ docker service create \ + --replicas 9 \ + --name redis_2 \ + --placement-pref 'spread=node.labels.datacenter' \ + --placement-pref 'spread=node.labels.rack' \ + redis:3.0.6 +``` + +When updating a service with `docker service update`, `--placement-pref-add` +appends a new placement preference after all existing placement preferences. +`--placement-pref-rm` removes an existing placement preference that matches the +argument. + ### Attach a service to an existing network (--network) You can use overlay networks to connect one or more services within the swarm. diff --git a/docs/reference/commandline/service_update.md b/docs/reference/commandline/service_update.md index 223ba2a51f..131ccef365 100644 --- a/docs/reference/commandline/service_update.md +++ b/docs/reference/commandline/service_update.md @@ -56,6 +56,8 @@ Options: --mount-add mount Add or update a mount on a service --mount-rm list Remove a mount by its target path (default []) --no-healthcheck Disable any container-specified HEALTHCHECK + --placement-pref-add pref Add a placement preference + --placement-pref-rm pref Remove a placement preference --publish-add port Add or update a published port --publish-rm port Remove a published port by its target port --read-only Mount the container's root filesystem as read only diff --git a/integration-cli/docker_api_swarm_service_test.go b/integration-cli/docker_api_swarm_service_test.go index 67cda889b5..1ef1a84cde 100644 --- a/integration-cli/docker_api_swarm_service_test.go +++ b/integration-cli/docker_api_swarm_service_test.go @@ -365,6 +365,48 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) { } } +func (s *DockerSwarmSuite) TestAPISwarmServicePlacementPrefs(c *check.C) { + const nodeCount = 3 + var daemons [nodeCount]*daemon.Swarm + for i := 0; i < nodeCount; i++ { + daemons[i] = s.AddDaemon(c, true, i == 0) + } + // wait for nodes ready + waitAndAssert(c, 5*time.Second, daemons[0].CheckNodeReadyCount, checker.Equals, nodeCount) + nodes := daemons[0].ListNodes(c) + c.Assert(len(nodes), checker.Equals, nodeCount) + + // add labels to nodes + daemons[0].UpdateNode(c, nodes[0].ID, func(n *swarm.Node) { + n.Spec.Annotations.Labels = map[string]string{ + "rack": "a", + } + }) + for i := 1; i < nodeCount; i++ { + daemons[0].UpdateNode(c, nodes[i].ID, func(n *swarm.Node) { + n.Spec.Annotations.Labels = map[string]string{ + "rack": "b", + } + }) + } + + // create service + instances := 4 + prefs := []swarm.PlacementPreference{{Spread: &swarm.SpreadOver{SpreadDescriptor: "node.labels.rack"}}} + id := daemons[0].CreateService(c, simpleTestService, setPlacementPrefs(prefs), setInstances(instances)) + // wait for tasks ready + waitAndAssert(c, defaultReconciliationTimeout, daemons[0].CheckServiceRunningTasks(id), checker.Equals, instances) + tasks := daemons[0].GetServiceTasks(c, id) + // validate all tasks are running on nodes[0] + tasksOnNode := make(map[string]int) + for _, task := range tasks { + tasksOnNode[task.NodeID]++ + } + c.Assert(tasksOnNode[nodes[0].ID], checker.Equals, 2) + c.Assert(tasksOnNode[nodes[1].ID], checker.Equals, 1) + c.Assert(tasksOnNode[nodes[2].ID], checker.Equals, 1) +} + func (s *DockerSwarmSuite) TestAPISwarmServicesStateReporting(c *check.C) { testRequires(c, SameHostDaemon) testRequires(c, DaemonIsLinux) diff --git a/integration-cli/docker_api_swarm_test.go b/integration-cli/docker_api_swarm_test.go index eafde63efc..f833ea46fa 100644 --- a/integration-cli/docker_api_swarm_test.go +++ b/integration-cli/docker_api_swarm_test.go @@ -596,6 +596,15 @@ func setConstraints(constraints []string) daemon.ServiceConstructor { } } +func setPlacementPrefs(prefs []swarm.PlacementPreference) daemon.ServiceConstructor { + return func(s *swarm.Service) { + if s.Spec.TaskTemplate.Placement == nil { + s.Spec.TaskTemplate.Placement = &swarm.Placement{} + } + s.Spec.TaskTemplate.Placement.Preferences = prefs + } +} + func setGlobalMode(s *swarm.Service) { s.Spec.Mode = swarm.ServiceMode{ Global: &swarm.GlobalService{},