From f0c86fb56e84289d08d50799bb5492af649ddb46 Mon Sep 17 00:00:00 2001 From: Chris Telfer Date: Fri, 8 Jun 2018 13:53:36 -0400 Subject: [PATCH] Fix deadlock introduced in b64997ea Commit b64997ea prevented data corruption due to simultaneous driver.CreateNetwork()/driver.DeleteNetwork() by holding the network lock through the read/modify part of the operation. However, part of the DeleteNetwork operation entails sending a message to the peerDB to tell that goroutine to flush entries on deletion. This can lead to a deadlock where: * driver.DeleteNetwork() starts and acquires driver.Lock() * peerDB receives some other request (e.g. EventNotify) and blocks on driver.Lock() * driver.DeleteNetwork() attempts a peerDB flush and blocks waiting on the synchronous peerDB operation channel This patch fixes the issue by deferring the peerDB flush operation until after DeleteNetwork() unlocks driver.Lock(). Commit b64997ea only modified CreateNetwork() and DeleteNetwork() and the critical section that driver.Lock() protects in CreateNetwork() does not perform any peerDB notifications or other locks of driver data structures. So this solution should be a complete fix for any regressions introduced in b64997ea. Signed-off-by: Chris Telfer --- libnetwork/drivers/overlay/ov_network.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libnetwork/drivers/overlay/ov_network.go b/libnetwork/drivers/overlay/ov_network.go index 9c83127d4f..8b6839656b 100644 --- a/libnetwork/drivers/overlay/ov_network.go +++ b/libnetwork/drivers/overlay/ov_network.go @@ -244,7 +244,15 @@ func (d *driver) DeleteNetwork(nid string) error { } d.Lock() - defer d.Unlock() + // Only perform a peer flush operation (if required) AFTER unlocking + // the driver lock to avoid deadlocking w/ the peerDB. + var doPeerFlush bool + defer func() { + d.Unlock() + if doPeerFlush { + d.peerFlush(nid) + } + }() // This is similar to d.network(), but we need to keep holding the lock // until we are done removing this network. @@ -270,7 +278,7 @@ func (d *driver) DeleteNetwork(nid string) error { } } // flush the peerDB entries - d.peerFlush(nid) + doPeerFlush = true delete(d.networks, nid) vnis, err := n.releaseVxlanID()