From efc25da851bd68e52e6505e7c65f209bcb1fdfe4 Mon Sep 17 00:00:00 2001 From: Alessandro Boch Date: Tue, 1 Nov 2016 14:32:19 -0700 Subject: [PATCH] Allow concurrent calls to agentClose - This fixes a panic in memberlist.Leave() because called after memberlist.shutdown = false It happens because of two interlocking calls to NetworkDB.clusterLeave() It is easily reproducible with two back-to-back calls to docker swarm init && docker swarm leave --force While the first clusterLeave() is waiting for sendNodeEvent(NodeEventTypeLeave) to timeout (5 sec) a second clusterLeave() is called. The second clusterLeave() will end up invoking memberlist.Leave() after the previous call already did the same, therefore after memberlist.shutdown was set false. - The fix is to have agentClose() acquire the agent instance and reset the agent pointer right away under lock. Then execute the closing/leave functions on the agent instance. Signed-off-by: Alessandro Boch --- libnetwork/agent.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/libnetwork/agent.go b/libnetwork/agent.go index 4c8980b2e1..f589d7bee2 100644 --- a/libnetwork/agent.go +++ b/libnetwork/agent.go @@ -328,22 +328,26 @@ func (c *controller) agentDriverNotify(d driverapi.Driver) { } func (c *controller) agentClose() { - if c.agent == nil { + // Acquire current agent instance and reset its pointer + // then run closing functions + c.Lock() + agent := c.agent + c.agent = nil + c.Unlock() + + if agent == nil { return } - for _, cancelFuncs := range c.agent.driverCancelFuncs { + for _, cancelFuncs := range agent.driverCancelFuncs { for _, cancel := range cancelFuncs { cancel() } } - c.agent.epTblCancel() - c.agent.networkDB.Close() + agent.epTblCancel() - c.Lock() - c.agent = nil - c.Unlock() + agent.networkDB.Close() } func (n *network) isClusterEligible() bool {