1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

Daemon Restart: attempt to wait for container deps

This provides a best effort on daemon restarts to restart containers
which have linked containers that are not up yet instead of failing.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
Brian Goff 2015-11-24 15:25:12 -05:00
parent 603d488a00
commit 19762da67e
2 changed files with 108 additions and 28 deletions

View file

@ -351,41 +351,57 @@ func (daemon *Daemon) restore() error {
} }
} }
group := sync.WaitGroup{} restartContainers := make(map[*container.Container]chan struct{})
for _, c := range containers { for _, c := range containers {
if !c.registered {
// Try to set the default name for a container if it exists prior to links
c.container.Name, err = daemon.generateNewName(c.container.ID)
if err != nil {
logrus.Debugf("Setting default id - %s", err)
}
if err := daemon.registerName(c.container); err != nil {
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
continue
}
}
if err := daemon.Register(c.container); err != nil {
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
continue
}
// get list of containers we need to restart
if daemon.configStore.AutoRestart && c.container.ShouldRestart() {
restartContainers[c.container] = make(chan struct{})
}
}
group := sync.WaitGroup{}
for c, notifier := range restartContainers {
group.Add(1) group.Add(1)
go func(container *container.Container, chNotify chan struct{}) {
go func(container *container.Container, registered bool) {
defer group.Done() defer group.Done()
logrus.Debugf("Starting container %s", container.ID)
if !registered { // ignore errors here as this is a best effort to wait for children to be
// Try to set the default name for a container if it exists prior to links // running before we try to start the container
container.Name, err = daemon.generateNewName(container.ID) children, err := daemon.children(container.Name)
if err != nil { if err != nil {
logrus.Debugf("Setting default id - %s", err) logrus.Warnf("error getting children for %s: %v", container.Name, err)
}
timeout := time.After(5 * time.Second)
for _, child := range children {
if notifier, exists := restartContainers[child]; exists {
select {
case <-notifier:
case <-timeout:
}
} }
} }
if err := daemon.registerName(container); err != nil { if err := daemon.containerStart(container); err != nil {
logrus.Errorf("Failed to register container %s: %s", container.ID, err) logrus.Errorf("Failed to start container %s: %s", container.ID, err)
return
} }
close(chNotify)
if err := daemon.Register(container); err != nil { }(c, notifier)
logrus.Errorf("Failed to register container %s: %s", container.ID, err)
// The container register failed should not be started.
return
}
// check the restart policy on the containers and restart any container with
// the restart policy of "always"
if daemon.configStore.AutoRestart && container.ShouldRestart() {
logrus.Debugf("Starting container %s", container.ID)
if err := daemon.containerStart(container); err != nil {
logrus.Errorf("Failed to start container %s: %s", container.ID, err)
}
}
}(c.container, c.registered)
} }
group.Wait() group.Wait()

View file

@ -14,6 +14,7 @@ import (
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/docker/docker/pkg/integration/checker" "github.com/docker/docker/pkg/integration/checker"
@ -1878,3 +1879,66 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
out, err := s.d.Cmd("pull", "registry:2") out, err := s.d.Cmd("pull", "registry:2")
c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device")) c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device"))
} }
// Test daemon restart with container links + auto restart
func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
d := NewDaemon(c)
err := d.StartWithBusybox()
c.Assert(err, checker.IsNil)
parent1Args := []string{}
parent2Args := []string{}
wg := sync.WaitGroup{}
maxChildren := 10
chErr := make(chan error, maxChildren)
for i := 0; i < maxChildren; i++ {
wg.Add(1)
name := fmt.Sprintf("test%d", i)
if i < maxChildren/2 {
parent1Args = append(parent1Args, []string{"--link", name}...)
} else {
parent2Args = append(parent2Args, []string{"--link", name}...)
}
go func() {
_, err = d.Cmd("run", "-d", "--name", name, "--restart=always", "busybox", "top")
chErr <- err
wg.Done()
}()
}
wg.Wait()
close(chErr)
for err := range chErr {
c.Assert(err, check.IsNil)
}
parent1Args = append([]string{"run", "-d"}, parent1Args...)
parent1Args = append(parent1Args, []string{"--name=parent1", "--restart=always", "busybox", "top"}...)
parent2Args = append([]string{"run", "-d"}, parent2Args...)
parent2Args = append(parent2Args, []string{"--name=parent2", "--restart=always", "busybox", "top"}...)
_, err = d.Cmd(parent1Args[0], parent1Args[1:]...)
c.Assert(err, check.IsNil)
_, err = d.Cmd(parent2Args[0], parent2Args[1:]...)
c.Assert(err, check.IsNil)
err = d.Stop()
c.Assert(err, check.IsNil)
// clear the log file -- we don't need any of it but may for the next part
// can ignore the error here, this is just a cleanup
os.Truncate(d.LogfileName(), 0)
err = d.Start()
c.Assert(err, check.IsNil)
for _, num := range []string{"1", "2"} {
out, err := d.Cmd("inspect", "-f", "{{ .State.Running }}", "parent"+num)
c.Assert(err, check.IsNil)
if strings.TrimSpace(out) != "true" {
log, _ := ioutil.ReadFile(d.LogfileName())
c.Fatalf("parent container is not running\n%s", string(log))
}
}
}