diff --git a/cmd/dockerd/daemon.go b/cmd/dockerd/daemon.go index eecbd00480..e860c34d8e 100644 --- a/cmd/dockerd/daemon.go +++ b/cmd/dockerd/daemon.go @@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) { logrus.Fatalf("Error creating cluster component: %v", err) } + // Restart all autostart containers which has a swarm endpoint + // and is not yet running now that we have successfully + // initialized the cluster. + d.RestartSwarmContainers() + logrus.Info("Daemon has completed initialization") logrus.WithFields(logrus.Fields{ diff --git a/daemon/cluster/cluster.go b/daemon/cluster/cluster.go index 8106115837..97f36995ff 100644 --- a/daemon/cluster/cluster.go +++ b/daemon/cluster/cluster.go @@ -135,10 +135,11 @@ type Cluster struct { // helps in identifying the attachment ID via the taskID and the // corresponding attachment configuration obtained from the manager. type attacher struct { - taskID string - config *network.NetworkingConfig - attachWaitCh chan *network.NetworkingConfig - detachWaitCh chan struct{} + taskID string + config *network.NetworkingConfig + attachWaitCh chan *network.NetworkingConfig + attachCompleteCh chan struct{} + detachWaitCh chan struct{} } type node struct { @@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, agent := c.node.Agent() c.RUnlock() - if ok && attacher != nil && attacher.detachWaitCh != nil { + if ok && attacher != nil && + attacher.detachWaitCh != nil && + attacher.attachCompleteCh != nil { + // Attachment may be in progress still so wait for + // attachment to complete. select { - case <-attacher.detachWaitCh: + case <-attacher.attachCompleteCh: case <-ctx.Done(): return ctx.Err() } + + if attacher.taskID == taskID { + select { + case <-attacher.detachWaitCh: + case <-ctx.Done(): + return ctx.Err() + } + } } return agent.ResourceAllocator().DetachNetwork(ctx, taskID) @@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s agent := c.node.Agent() attachWaitCh := make(chan *network.NetworkingConfig) detachWaitCh := make(chan struct{}) + attachCompleteCh := make(chan struct{}) c.attachers[aKey] = &attacher{ - attachWaitCh: attachWaitCh, - detachWaitCh: detachWaitCh, + attachWaitCh: attachWaitCh, + attachCompleteCh: attachCompleteCh, + detachWaitCh: detachWaitCh, } c.Unlock() @@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s return nil, fmt.Errorf("Could not attach to network %s: %v", target, err) } + c.Lock() + c.attachers[aKey].taskID = taskID + close(attachCompleteCh) + c.Unlock() + logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID) var config *network.NetworkingConfig @@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s } c.Lock() - c.attachers[aKey].taskID = taskID c.attachers[aKey].config = config c.Unlock() return config, nil diff --git a/daemon/container_operations.go b/daemon/container_operations.go index 818c82ccbf..cb30d77f2e 100644 --- a/daemon/container_operations.go +++ b/daemon/container_operations.go @@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN return nil, nil, err } + // This container has attachment to a swarm scope + // network. Update the container network settings accordingly. + container.NetworkSettings.HasSwarmEndpoint = true return n, config, nil } @@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error { // on first network connecting. defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName() if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok { + cleanOperationalData(nConf) if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil { return err } @@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error { } for i, epConf := range epConfigs { + cleanOperationalData(epConf) if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil { return err } diff --git a/daemon/daemon.go b/daemon/daemon.go index 4faefcf310..cc537ae973 100755 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -206,7 +206,13 @@ func (daemon *Daemon) restore() error { // fixme: only if not running // get list of containers we need to restart if !c.IsRunning() && !c.IsPaused() { - if daemon.configStore.AutoRestart && c.ShouldRestart() { + // Do not autostart containers which + // has endpoints in a swarm scope + // network yet since the cluster is + // not initialized yet. We will start + // it after the cluster is + // initialized. + if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint { mapLock.Lock() restartContainers[c] = make(chan struct{}) mapLock.Unlock() @@ -350,6 +356,30 @@ func (daemon *Daemon) restore() error { return nil } +// RestartSwarmContainers restarts any autostart container which has a +// swarm endpoint. +func (daemon *Daemon) RestartSwarmContainers() { + group := sync.WaitGroup{} + for _, c := range daemon.List() { + if !c.IsRunning() && !c.IsPaused() { + // Autostart all the containers which has a + // swarm endpoint now that the cluster is + // initialized. + if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint { + group.Add(1) + go func(c *container.Container) { + defer group.Done() + if err := daemon.containerStart(c, ""); err != nil { + logrus.Error(err) + } + }(c) + } + } + + } + group.Wait() +} + // waitForNetworks is used during daemon initialization when starting up containers // It ensures that all of a container's networks are available before the daemon tries to start the container. // In practice it just makes sure the discovery service is available for containers which use a network that require discovery. diff --git a/daemon/network/settings.go b/daemon/network/settings.go index 1958f2330a..8f6b7dd59e 100644 --- a/daemon/network/settings.go +++ b/daemon/network/settings.go @@ -21,6 +21,7 @@ type Settings struct { SecondaryIPAddresses []networktypes.Address SecondaryIPv6Addresses []networktypes.Address IsAnonymousEndpoint bool + HasSwarmEndpoint bool } // EndpointSettings is a package local wrapper for diff --git a/integration-cli/docker_cli_swarm_test.go b/integration-cli/docker_cli_swarm_test.go index a1ffcd9027..e9c7ace3f3 100644 --- a/integration-cli/docker_cli_swarm_test.go +++ b/integration-cli/docker_cli_swarm_test.go @@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) { c.Assert(err, checker.IsNil) c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777") } + +func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) { + d := s.AddDaemon(c, true, true) + + out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo") + c.Assert(err, checker.IsNil) + c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "") + + out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top") + c.Assert(err, checker.IsNil) + c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "") + + out, err = d.Cmd("ps", "-q") + c.Assert(err, checker.IsNil) + c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "") + + d.Restart() + + out, err = d.Cmd("ps", "-q") + c.Assert(err, checker.IsNil) + c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "") +}