mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Daemon Restart: attempt to wait for container deps
This provides a best effort on daemon restarts to restart containers which have linked containers that are not up yet instead of failing. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
parent
603d488a00
commit
19762da67e
2 changed files with 108 additions and 28 deletions
|
@ -351,41 +351,57 @@ func (daemon *Daemon) restore() error {
|
|||
}
|
||||
}
|
||||
|
||||
group := sync.WaitGroup{}
|
||||
restartContainers := make(map[*container.Container]chan struct{})
|
||||
for _, c := range containers {
|
||||
group.Add(1)
|
||||
|
||||
go func(container *container.Container, registered bool) {
|
||||
defer group.Done()
|
||||
|
||||
if !registered {
|
||||
if !c.registered {
|
||||
// Try to set the default name for a container if it exists prior to links
|
||||
container.Name, err = daemon.generateNewName(container.ID)
|
||||
c.container.Name, err = daemon.generateNewName(c.container.ID)
|
||||
if err != nil {
|
||||
logrus.Debugf("Setting default id - %s", err)
|
||||
}
|
||||
if err := daemon.registerName(c.container); err != nil {
|
||||
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
|
||||
continue
|
||||
}
|
||||
if err := daemon.registerName(container); err != nil {
|
||||
logrus.Errorf("Failed to register container %s: %s", container.ID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := daemon.Register(container); err != nil {
|
||||
logrus.Errorf("Failed to register container %s: %s", container.ID, err)
|
||||
// The container register failed should not be started.
|
||||
return
|
||||
if err := daemon.Register(c.container); err != nil {
|
||||
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
|
||||
continue
|
||||
}
|
||||
// get list of containers we need to restart
|
||||
if daemon.configStore.AutoRestart && c.container.ShouldRestart() {
|
||||
restartContainers[c.container] = make(chan struct{})
|
||||
}
|
||||
}
|
||||
|
||||
// check the restart policy on the containers and restart any container with
|
||||
// the restart policy of "always"
|
||||
if daemon.configStore.AutoRestart && container.ShouldRestart() {
|
||||
group := sync.WaitGroup{}
|
||||
for c, notifier := range restartContainers {
|
||||
group.Add(1)
|
||||
go func(container *container.Container, chNotify chan struct{}) {
|
||||
defer group.Done()
|
||||
logrus.Debugf("Starting container %s", container.ID)
|
||||
|
||||
// ignore errors here as this is a best effort to wait for children to be
|
||||
// running before we try to start the container
|
||||
children, err := daemon.children(container.Name)
|
||||
if err != nil {
|
||||
logrus.Warnf("error getting children for %s: %v", container.Name, err)
|
||||
}
|
||||
timeout := time.After(5 * time.Second)
|
||||
for _, child := range children {
|
||||
if notifier, exists := restartContainers[child]; exists {
|
||||
select {
|
||||
case <-notifier:
|
||||
case <-timeout:
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := daemon.containerStart(container); err != nil {
|
||||
logrus.Errorf("Failed to start container %s: %s", container.ID, err)
|
||||
}
|
||||
}
|
||||
}(c.container, c.registered)
|
||||
close(chNotify)
|
||||
}(c, notifier)
|
||||
}
|
||||
group.Wait()
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/integration/checker"
|
||||
|
@ -1878,3 +1879,66 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
|
|||
out, err := s.d.Cmd("pull", "registry:2")
|
||||
c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device"))
|
||||
}
|
||||
|
||||
// Test daemon restart with container links + auto restart
|
||||
func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
|
||||
d := NewDaemon(c)
|
||||
err := d.StartWithBusybox()
|
||||
c.Assert(err, checker.IsNil)
|
||||
|
||||
parent1Args := []string{}
|
||||
parent2Args := []string{}
|
||||
wg := sync.WaitGroup{}
|
||||
maxChildren := 10
|
||||
chErr := make(chan error, maxChildren)
|
||||
|
||||
for i := 0; i < maxChildren; i++ {
|
||||
wg.Add(1)
|
||||
name := fmt.Sprintf("test%d", i)
|
||||
|
||||
if i < maxChildren/2 {
|
||||
parent1Args = append(parent1Args, []string{"--link", name}...)
|
||||
} else {
|
||||
parent2Args = append(parent2Args, []string{"--link", name}...)
|
||||
}
|
||||
|
||||
go func() {
|
||||
_, err = d.Cmd("run", "-d", "--name", name, "--restart=always", "busybox", "top")
|
||||
chErr <- err
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
close(chErr)
|
||||
for err := range chErr {
|
||||
c.Assert(err, check.IsNil)
|
||||
}
|
||||
|
||||
parent1Args = append([]string{"run", "-d"}, parent1Args...)
|
||||
parent1Args = append(parent1Args, []string{"--name=parent1", "--restart=always", "busybox", "top"}...)
|
||||
parent2Args = append([]string{"run", "-d"}, parent2Args...)
|
||||
parent2Args = append(parent2Args, []string{"--name=parent2", "--restart=always", "busybox", "top"}...)
|
||||
|
||||
_, err = d.Cmd(parent1Args[0], parent1Args[1:]...)
|
||||
c.Assert(err, check.IsNil)
|
||||
_, err = d.Cmd(parent2Args[0], parent2Args[1:]...)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
err = d.Stop()
|
||||
c.Assert(err, check.IsNil)
|
||||
// clear the log file -- we don't need any of it but may for the next part
|
||||
// can ignore the error here, this is just a cleanup
|
||||
os.Truncate(d.LogfileName(), 0)
|
||||
err = d.Start()
|
||||
c.Assert(err, check.IsNil)
|
||||
|
||||
for _, num := range []string{"1", "2"} {
|
||||
out, err := d.Cmd("inspect", "-f", "{{ .State.Running }}", "parent"+num)
|
||||
c.Assert(err, check.IsNil)
|
||||
if strings.TrimSpace(out) != "true" {
|
||||
log, _ := ioutil.ReadFile(d.LogfileName())
|
||||
c.Fatalf("parent container is not running\n%s", string(log))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue