mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Daemon Restart: attempt to wait for container deps
This provides a best effort on daemon restarts to restart containers which have linked containers that are not up yet instead of failing. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
This commit is contained in:
parent
603d488a00
commit
19762da67e
2 changed files with 108 additions and 28 deletions
|
@ -351,41 +351,57 @@ func (daemon *Daemon) restore() error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
group := sync.WaitGroup{}
|
restartContainers := make(map[*container.Container]chan struct{})
|
||||||
for _, c := range containers {
|
for _, c := range containers {
|
||||||
|
if !c.registered {
|
||||||
|
// Try to set the default name for a container if it exists prior to links
|
||||||
|
c.container.Name, err = daemon.generateNewName(c.container.ID)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("Setting default id - %s", err)
|
||||||
|
}
|
||||||
|
if err := daemon.registerName(c.container); err != nil {
|
||||||
|
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := daemon.Register(c.container); err != nil {
|
||||||
|
logrus.Errorf("Failed to register container %s: %s", c.container.ID, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// get list of containers we need to restart
|
||||||
|
if daemon.configStore.AutoRestart && c.container.ShouldRestart() {
|
||||||
|
restartContainers[c.container] = make(chan struct{})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
group := sync.WaitGroup{}
|
||||||
|
for c, notifier := range restartContainers {
|
||||||
group.Add(1)
|
group.Add(1)
|
||||||
|
go func(container *container.Container, chNotify chan struct{}) {
|
||||||
go func(container *container.Container, registered bool) {
|
|
||||||
defer group.Done()
|
defer group.Done()
|
||||||
|
logrus.Debugf("Starting container %s", container.ID)
|
||||||
|
|
||||||
if !registered {
|
// ignore errors here as this is a best effort to wait for children to be
|
||||||
// Try to set the default name for a container if it exists prior to links
|
// running before we try to start the container
|
||||||
container.Name, err = daemon.generateNewName(container.ID)
|
children, err := daemon.children(container.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Debugf("Setting default id - %s", err)
|
logrus.Warnf("error getting children for %s: %v", container.Name, err)
|
||||||
|
}
|
||||||
|
timeout := time.After(5 * time.Second)
|
||||||
|
for _, child := range children {
|
||||||
|
if notifier, exists := restartContainers[child]; exists {
|
||||||
|
select {
|
||||||
|
case <-notifier:
|
||||||
|
case <-timeout:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := daemon.registerName(container); err != nil {
|
if err := daemon.containerStart(container); err != nil {
|
||||||
logrus.Errorf("Failed to register container %s: %s", container.ID, err)
|
logrus.Errorf("Failed to start container %s: %s", container.ID, err)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
close(chNotify)
|
||||||
if err := daemon.Register(container); err != nil {
|
}(c, notifier)
|
||||||
logrus.Errorf("Failed to register container %s: %s", container.ID, err)
|
|
||||||
// The container register failed should not be started.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// check the restart policy on the containers and restart any container with
|
|
||||||
// the restart policy of "always"
|
|
||||||
if daemon.configStore.AutoRestart && container.ShouldRestart() {
|
|
||||||
logrus.Debugf("Starting container %s", container.ID)
|
|
||||||
|
|
||||||
if err := daemon.containerStart(container); err != nil {
|
|
||||||
logrus.Errorf("Failed to start container %s: %s", container.ID, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}(c.container, c.registered)
|
|
||||||
}
|
}
|
||||||
group.Wait()
|
group.Wait()
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/docker/docker/pkg/integration/checker"
|
"github.com/docker/docker/pkg/integration/checker"
|
||||||
|
@ -1878,3 +1879,66 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
|
||||||
out, err := s.d.Cmd("pull", "registry:2")
|
out, err := s.d.Cmd("pull", "registry:2")
|
||||||
c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device"))
|
c.Assert(out, check.Not(check.Equals), 1, check.Commentf("no space left on device"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test daemon restart with container links + auto restart
|
||||||
|
func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
|
||||||
|
d := NewDaemon(c)
|
||||||
|
err := d.StartWithBusybox()
|
||||||
|
c.Assert(err, checker.IsNil)
|
||||||
|
|
||||||
|
parent1Args := []string{}
|
||||||
|
parent2Args := []string{}
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
maxChildren := 10
|
||||||
|
chErr := make(chan error, maxChildren)
|
||||||
|
|
||||||
|
for i := 0; i < maxChildren; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
name := fmt.Sprintf("test%d", i)
|
||||||
|
|
||||||
|
if i < maxChildren/2 {
|
||||||
|
parent1Args = append(parent1Args, []string{"--link", name}...)
|
||||||
|
} else {
|
||||||
|
parent2Args = append(parent2Args, []string{"--link", name}...)
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
_, err = d.Cmd("run", "-d", "--name", name, "--restart=always", "busybox", "top")
|
||||||
|
chErr <- err
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
close(chErr)
|
||||||
|
for err := range chErr {
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
}
|
||||||
|
|
||||||
|
parent1Args = append([]string{"run", "-d"}, parent1Args...)
|
||||||
|
parent1Args = append(parent1Args, []string{"--name=parent1", "--restart=always", "busybox", "top"}...)
|
||||||
|
parent2Args = append([]string{"run", "-d"}, parent2Args...)
|
||||||
|
parent2Args = append(parent2Args, []string{"--name=parent2", "--restart=always", "busybox", "top"}...)
|
||||||
|
|
||||||
|
_, err = d.Cmd(parent1Args[0], parent1Args[1:]...)
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
_, err = d.Cmd(parent2Args[0], parent2Args[1:]...)
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
|
||||||
|
err = d.Stop()
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
// clear the log file -- we don't need any of it but may for the next part
|
||||||
|
// can ignore the error here, this is just a cleanup
|
||||||
|
os.Truncate(d.LogfileName(), 0)
|
||||||
|
err = d.Start()
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
|
||||||
|
for _, num := range []string{"1", "2"} {
|
||||||
|
out, err := d.Cmd("inspect", "-f", "{{ .State.Running }}", "parent"+num)
|
||||||
|
c.Assert(err, check.IsNil)
|
||||||
|
if strings.TrimSpace(out) != "true" {
|
||||||
|
log, _ := ioutil.ReadFile(d.LogfileName())
|
||||||
|
c.Fatalf("parent container is not running\n%s", string(log))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue