mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Fix autostart for swarm scope connected containers
The swarm scope network connected containers with autostart enabled there was a dependency problem with the cluster to be initialized before we can autostart them. With the current container restart code happening before cluster init, these containers were not getting autostarted properly. Added a fix to delay the container start of those containers which has atleast one swarm scope endpoint to until after the cluster is initialized. Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
parent
cf58eb437c
commit
c9fb551d60
6 changed files with 92 additions and 10 deletions
|
@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
|
|||
logrus.Fatalf("Error creating cluster component: %v", err)
|
||||
}
|
||||
|
||||
// Restart all autostart containers which has a swarm endpoint
|
||||
// and is not yet running now that we have successfully
|
||||
// initialized the cluster.
|
||||
d.RestartSwarmContainers()
|
||||
|
||||
logrus.Info("Daemon has completed initialization")
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
|
|
|
@ -135,10 +135,11 @@ type Cluster struct {
|
|||
// helps in identifying the attachment ID via the taskID and the
|
||||
// corresponding attachment configuration obtained from the manager.
|
||||
type attacher struct {
|
||||
taskID string
|
||||
config *network.NetworkingConfig
|
||||
attachWaitCh chan *network.NetworkingConfig
|
||||
detachWaitCh chan struct{}
|
||||
taskID string
|
||||
config *network.NetworkingConfig
|
||||
attachWaitCh chan *network.NetworkingConfig
|
||||
attachCompleteCh chan struct{}
|
||||
detachWaitCh chan struct{}
|
||||
}
|
||||
|
||||
type node struct {
|
||||
|
@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID,
|
|||
agent := c.node.Agent()
|
||||
c.RUnlock()
|
||||
|
||||
if ok && attacher != nil && attacher.detachWaitCh != nil {
|
||||
if ok && attacher != nil &&
|
||||
attacher.detachWaitCh != nil &&
|
||||
attacher.attachCompleteCh != nil {
|
||||
// Attachment may be in progress still so wait for
|
||||
// attachment to complete.
|
||||
select {
|
||||
case <-attacher.detachWaitCh:
|
||||
case <-attacher.attachCompleteCh:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
if attacher.taskID == taskID {
|
||||
select {
|
||||
case <-attacher.detachWaitCh:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
|
||||
|
@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
agent := c.node.Agent()
|
||||
attachWaitCh := make(chan *network.NetworkingConfig)
|
||||
detachWaitCh := make(chan struct{})
|
||||
attachCompleteCh := make(chan struct{})
|
||||
c.attachers[aKey] = &attacher{
|
||||
attachWaitCh: attachWaitCh,
|
||||
detachWaitCh: detachWaitCh,
|
||||
attachWaitCh: attachWaitCh,
|
||||
attachCompleteCh: attachCompleteCh,
|
||||
detachWaitCh: detachWaitCh,
|
||||
}
|
||||
c.Unlock()
|
||||
|
||||
|
@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
|
||||
}
|
||||
|
||||
c.Lock()
|
||||
c.attachers[aKey].taskID = taskID
|
||||
close(attachCompleteCh)
|
||||
c.Unlock()
|
||||
|
||||
logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
|
||||
|
||||
var config *network.NetworkingConfig
|
||||
|
@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
|
|||
}
|
||||
|
||||
c.Lock()
|
||||
c.attachers[aKey].taskID = taskID
|
||||
c.attachers[aKey].config = config
|
||||
c.Unlock()
|
||||
return config, nil
|
||||
|
|
|
@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
// This container has attachment to a swarm scope
|
||||
// network. Update the container network settings accordingly.
|
||||
container.NetworkSettings.HasSwarmEndpoint = true
|
||||
return n, config, nil
|
||||
}
|
||||
|
||||
|
@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
|
|||
// on first network connecting.
|
||||
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
|
||||
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
|
||||
cleanOperationalData(nConf)
|
||||
if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
|
|||
}
|
||||
|
||||
for i, epConf := range epConfigs {
|
||||
cleanOperationalData(epConf)
|
||||
if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -202,7 +202,13 @@ func (daemon *Daemon) restore() error {
|
|||
// fixme: only if not running
|
||||
// get list of containers we need to restart
|
||||
if !c.IsRunning() && !c.IsPaused() {
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() {
|
||||
// Do not autostart containers which
|
||||
// has endpoints in a swarm scope
|
||||
// network yet since the cluster is
|
||||
// not initialized yet. We will start
|
||||
// it after the cluster is
|
||||
// initialized.
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
|
||||
mapLock.Lock()
|
||||
restartContainers[c] = make(chan struct{})
|
||||
mapLock.Unlock()
|
||||
|
@ -346,6 +352,30 @@ func (daemon *Daemon) restore() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RestartSwarmContainers restarts any autostart container which has a
|
||||
// swarm endpoint.
|
||||
func (daemon *Daemon) RestartSwarmContainers() {
|
||||
group := sync.WaitGroup{}
|
||||
for _, c := range daemon.List() {
|
||||
if !c.IsRunning() && !c.IsPaused() {
|
||||
// Autostart all the containers which has a
|
||||
// swarm endpoint now that the cluster is
|
||||
// initialized.
|
||||
if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint {
|
||||
group.Add(1)
|
||||
go func(c *container.Container) {
|
||||
defer group.Done()
|
||||
if err := daemon.containerStart(c, ""); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}(c)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
group.Wait()
|
||||
}
|
||||
|
||||
// waitForNetworks is used during daemon initialization when starting up containers
|
||||
// It ensures that all of a container's networks are available before the daemon tries to start the container.
|
||||
// In practice it just makes sure the discovery service is available for containers which use a network that require discovery.
|
||||
|
|
|
@ -21,6 +21,7 @@ type Settings struct {
|
|||
SecondaryIPAddresses []networktypes.Address
|
||||
SecondaryIPv6Addresses []networktypes.Address
|
||||
IsAnonymousEndpoint bool
|
||||
HasSwarmEndpoint bool
|
||||
}
|
||||
|
||||
// EndpointSettings is a package local wrapper for
|
||||
|
|
|
@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) {
|
|||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777")
|
||||
}
|
||||
|
||||
func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) {
|
||||
d := s.AddDaemon(c, true, true)
|
||||
|
||||
out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
out, err = d.Cmd("ps", "-q")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
|
||||
d.Restart()
|
||||
|
||||
out, err = d.Cmd("ps", "-q")
|
||||
c.Assert(err, checker.IsNil)
|
||||
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue