From 19762da67e40879be77b1b55c21a9754235fbb78 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Tue, 24 Nov 2015 15:25:12 -0500 Subject: [PATCH] Daemon Restart: attempt to wait for container deps This provides a best effort on daemon restarts to restart containers which have linked containers that are not up yet instead of failing. Signed-off-by: Brian Goff --- daemon/daemon.go | 72 ++++++++++++++--------- integration-cli/docker_cli_daemon_test.go | 64 ++++++++++++++++++++ 2 files changed, 108 insertions(+), 28 deletions(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index f858e73dd2..b803379e19 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -351,41 +351,57 @@ func (daemon *Daemon) restore() error { } } - group := sync.WaitGroup{} + restartContainers := make(map[*container.Container]chan struct{}) for _, c := range containers { + if !c.registered { + // Try to set the default name for a container if it exists prior to links + c.container.Name, err = daemon.generateNewName(c.container.ID) + if err != nil { + logrus.Debugf("Setting default id - %s", err) + } + if err := daemon.registerName(c.container); err != nil { + logrus.Errorf("Failed to register container %s: %s", c.container.ID, err) + continue + } + } + + if err := daemon.Register(c.container); err != nil { + logrus.Errorf("Failed to register container %s: %s", c.container.ID, err) + continue + } + // get list of containers we need to restart + if daemon.configStore.AutoRestart && c.container.ShouldRestart() { + restartContainers[c.container] = make(chan struct{}) + } + } + + group := sync.WaitGroup{} + for c, notifier := range restartContainers { group.Add(1) - - go func(container *container.Container, registered bool) { + go func(container *container.Container, chNotify chan struct{}) { defer group.Done() + logrus.Debugf("Starting container %s", container.ID) - if !registered { - // Try to set the default name for a container if it exists prior to links - container.Name, err = daemon.generateNewName(container.ID) - if err != nil { - logrus.Debugf("Setting default id - %s", err) + // ignore errors here as this is a best effort to wait for children to be + // running before we try to start the container + children, err := daemon.children(container.Name) + if err != nil { + logrus.Warnf("error getting children for %s: %v", container.Name, err) + } + timeout := time.After(5 * time.Second) + for _, child := range children { + if notifier, exists := restartContainers[child]; exists { + select { + case <-notifier: + case <-timeout: + } } } - if err := daemon.registerName(container); err != nil { - logrus.Errorf("Failed to register container %s: %s", container.ID, err) - return + if err := daemon.containerStart(container); err != nil { + logrus.Errorf("Failed to start container %s: %s", container.ID, err) } - - if err := daemon.Register(container); err != nil { - logrus.Errorf("Failed to register container %s: %s", container.ID, err) - // The container register failed should not be started. - return - } - - // check the restart policy on the containers and restart any container with - // the restart policy of "always" - if daemon.configStore.AutoRestart && container.ShouldRestart() { - logrus.Debugf("Starting container %s", container.ID) - - if err := daemon.containerStart(container); err != nil { - logrus.Errorf("Failed to start container %s: %s", container.ID, err) - } - } - }(c.container, c.registered) + close(chNotify) + }(c, notifier) } group.Wait() diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index 977518feea..210edfbd64 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -14,6 +14,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/docker/docker/pkg/integration/checker" @@ -1878,3 +1879,66 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) { out, err := s.d.Cmd("pull", "registry:2") c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device")) } + +// Test daemon restart with container links + auto restart +func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) { + d := NewDaemon(c) + err := d.StartWithBusybox() + c.Assert(err, checker.IsNil) + + parent1Args := []string{} + parent2Args := []string{} + wg := sync.WaitGroup{} + maxChildren := 10 + chErr := make(chan error, maxChildren) + + for i := 0; i < maxChildren; i++ { + wg.Add(1) + name := fmt.Sprintf("test%d", i) + + if i < maxChildren/2 { + parent1Args = append(parent1Args, []string{"--link", name}...) + } else { + parent2Args = append(parent2Args, []string{"--link", name}...) + } + + go func() { + _, err = d.Cmd("run", "-d", "--name", name, "--restart=always", "busybox", "top") + chErr <- err + wg.Done() + }() + } + + wg.Wait() + close(chErr) + for err := range chErr { + c.Assert(err, check.IsNil) + } + + parent1Args = append([]string{"run", "-d"}, parent1Args...) + parent1Args = append(parent1Args, []string{"--name=parent1", "--restart=always", "busybox", "top"}...) + parent2Args = append([]string{"run", "-d"}, parent2Args...) + parent2Args = append(parent2Args, []string{"--name=parent2", "--restart=always", "busybox", "top"}...) + + _, err = d.Cmd(parent1Args[0], parent1Args[1:]...) + c.Assert(err, check.IsNil) + _, err = d.Cmd(parent2Args[0], parent2Args[1:]...) + c.Assert(err, check.IsNil) + + err = d.Stop() + c.Assert(err, check.IsNil) + // clear the log file -- we don't need any of it but may for the next part + // can ignore the error here, this is just a cleanup + os.Truncate(d.LogfileName(), 0) + err = d.Start() + c.Assert(err, check.IsNil) + + for _, num := range []string{"1", "2"} { + out, err := d.Cmd("inspect", "-f", "{{ .State.Running }}", "parent"+num) + c.Assert(err, check.IsNil) + if strings.TrimSpace(out) != "true" { + log, _ := ioutil.ReadFile(d.LogfileName()) + c.Fatalf("parent container is not running\n%s", string(log)) + } + } +}