Allow concurrent calls to agentClose

- This fixes a panic in memberlist.Leave() because called
  after memberlist.shutdown = false
  It happens because of two interlocking calls to NetworkDB.clusterLeave()
  It is easily reproducible with two back-to-back calls
  to docker swarm init && docker swarm leave --force
  While the first clusterLeave() is waiting for sendNodeEvent(NodeEventTypeLeave)
  to timeout (5 sec) a second clusterLeave() is called. The second clusterLeave()
  will end up invoking memberlist.Leave() after the previous call already did
  the same, therefore after memberlist.shutdown was set false.
- The fix is to have agentClose() acquire the agent instance and reset the
  agent pointer right away under lock. Then execute the closing/leave functions
  on the agent instance.

Signed-off-by: Alessandro Boch <aboch@docker.com>
This commit is contained in:
Alessandro Boch 2016-11-01 14:32:19 -07:00
parent fd70adfac5
commit efc25da851
1 changed files with 11 additions and 7 deletions

View File

@ -328,22 +328,26 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
}
func (c *controller) agentClose() {
if c.agent == nil {
// Acquire current agent instance and reset its pointer
// then run closing functions
c.Lock()
agent := c.agent
c.agent = nil
c.Unlock()
if agent == nil {
return
}
for _, cancelFuncs := range c.agent.driverCancelFuncs {
for _, cancelFuncs := range agent.driverCancelFuncs {
for _, cancel := range cancelFuncs {
cancel()
}
}
c.agent.epTblCancel()
c.agent.networkDB.Close()
agent.epTblCancel()
c.Lock()
c.agent = nil
c.Unlock()
agent.networkDB.Close()
}
func (n *network) isClusterEligible() bool {