Avoid returning early on agent join failures

When a gossip join failure happens do not return early in the call chain
because a join failure is most likely transient and the retry logic
built in the networkdb is going to retry and succeed. Returning early
makes the initialization of ingress network/sandbox to not happen which
causes a problem even after the gossip join on retry is successful.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
Jana Radhakrishnan 2016-09-27 00:12:07 -07:00
parent f9e11527ec
commit 22c322dded
2 changed files with 5 additions and 2 deletions

View File

@ -191,8 +191,7 @@ func (c *controller) agentSetup() error {
if remoteAddr != "" {
if err := c.agentJoin(remoteAddr); err != nil {
logrus.Errorf("Error in agentJoin : %v", err)
return nil
logrus.Errorf("Error in joining gossip cluster : %v(join will be retried in background)", err)
}
}

View File

@ -161,6 +161,10 @@ func (nDB *NetworkDB) retryJoin(members []string, stop <-chan struct{}) {
logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err)
continue
}
if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil {
logrus.Errorf("failed to send node join on retry: %v", err)
continue
}
return
case <-stop:
return