Allow concurrent calls to agentClose

- This fixes a panic in memberlist.Leave() because called after memberlist.shutdown = false It happens because of two interlocking calls to NetworkDB.clusterLeave() It is easily reproducible with two back-to-back calls to docker swarm init && docker swarm leave --force While the first clusterLeave() is waiting for sendNodeEvent(NodeEventTypeLeave) to timeout (5 sec) a second clusterLeave() is called. The second clusterLeave() will end up invoking memberlist.Leave() after the previous call already did the same, therefore after memberlist.shutdown was set false. - The fix is to have agentClose() acquire the agent instance and reset the agent pointer right away under lock. Then execute the closing/leave functions on the agent instance. Signed-off-by: Alessandro Boch <aboch@docker.com>
2022-11-09 12:21:53 -05:00 · 2016-11-01 14:32:19 -07:00 · 2016-11-01 14:32:19 -07:00 · efc25da851
commit efc25da851
parent fd70adfac5
1 changed files with 11 additions and 7 deletions
--- a/libnetwork/agent.go
+++ b/libnetwork/agent.go
@ -328,22 +328,26 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) {
 }

 func (c *controller) agentClose() {
-	if c.agent == nil {
+	// Acquire current agent instance and reset its pointer
+	// then run closing functions
+	c.Lock()
+	agent := c.agent
+	c.agent = nil
+	c.Unlock()
+
+	if agent == nil {
 		return
 	}

-	for _, cancelFuncs := range c.agent.driverCancelFuncs {
+	for _, cancelFuncs := range agent.driverCancelFuncs {
 		for _, cancel := range cancelFuncs {
 			cancel()
 		}
 	}
-	c.agent.epTblCancel()

-	c.agent.networkDB.Close()
+	agent.epTblCancel()

-	c.Lock()
-	c.agent = nil
-	c.Unlock()
+	agent.networkDB.Close()
 }

 func (n *network) isClusterEligible() bool {