mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
libnetwork: processEndpointDelete: Fix deadlock between getSvcRecords and processEndpointDelete
We had some hosts with quite a bit of cycling containers that ocassionally causes docker daemons to lock up.
Most prominently `docker run` commands do not respond and nothing happens anymore.
Looking at the stack trace the following is at least likely sometimes a cause to that:
Two goroutines g0 and g1 can race against each other:
* (g0) 1. getSvcRecords is called and calls (*network).Lock()
--> Network is locked.
* (g1) 2. processEndpointDelete is called, and calls (*controller).Lock()
--> Controller is locked
* (g1) 3. processEndpointDelete tries (*network).ID() which calls (*network).Lock().
* (g0) 4. getSvcRecords calls (*controller).Lock().
3./4. are deadlocked against each other since the other goroutine holds the lock they need.
References b5dc370370/network.go
Signed-off-by: Steffen Butzer <steffen.butzer@outlook.com>
This commit is contained in:
parent
44269c6653
commit
7c97896747
1 changed files with 7 additions and 4 deletions
|
@ -399,11 +399,14 @@ func (c *controller) processEndpointDelete(nmap map[string]*netWatch, ep *endpoi
|
|||
return
|
||||
}
|
||||
|
||||
networkID := n.ID()
|
||||
endpointID := ep.ID()
|
||||
|
||||
c.Lock()
|
||||
nw, ok := nmap[n.ID()]
|
||||
nw, ok := nmap[networkID]
|
||||
|
||||
if ok {
|
||||
delete(nw.localEps, ep.ID())
|
||||
delete(nw.localEps, endpointID)
|
||||
c.Unlock()
|
||||
|
||||
// Update the svc db about local endpoint leave right away
|
||||
|
@ -417,9 +420,9 @@ func (c *controller) processEndpointDelete(nmap map[string]*netWatch, ep *endpoi
|
|||
|
||||
// This is the last container going away for the network. Destroy
|
||||
// this network's svc db entry
|
||||
delete(c.svcRecords, n.ID())
|
||||
delete(c.svcRecords, networkID)
|
||||
|
||||
delete(nmap, n.ID())
|
||||
delete(nmap, networkID)
|
||||
}
|
||||
}
|
||||
c.Unlock()
|
||||
|
|
Loading…
Reference in a new issue