mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Fix race conditions in overlay network driver
The overlay network driver is not properly using it's mutexes or sync.Onces. It made the classic mistake of not holding a lock through various read-modify-write operations. This can result in inconsistent state storage leading to more catastrophic issues. This patch attempts to maintain the previous semantics while holding the driver lock through operations that are read-modify-write of the driver's network state. One example of this race would be if two goroutines tried to invoke d.network() after the network ID was removed from the table. Both would try to reinstall it causing the "once" to get reinitialized twice without any lock protection. This could then lead to the "once" getting invoked twice on the same network. Furthermore, the changes to one of these network structures gets effectively discarded. It's also the case, that because there would be two simultaneous instances of the network, the various network Lock() invocations would be meaningless for race prevention. Signed-off-by: Chris Telfer <ctelfer@docker.com>
This commit is contained in:
parent
5c679b051d
commit
b64997ea82
1 changed files with 32 additions and 23 deletions
|
@ -203,6 +203,12 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
|||
n.subnets = append(n.subnets, s)
|
||||
}
|
||||
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
if d.networks[n.id] != nil {
|
||||
return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
|
||||
}
|
||||
|
||||
if err := n.writeToStore(); err != nil {
|
||||
return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
|
||||
}
|
||||
|
@ -217,11 +223,13 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
|||
|
||||
if nInfo != nil {
|
||||
if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
|
||||
// XXX Undo writeToStore? No method to so. Why?
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
d.addNetwork(n)
|
||||
d.networks[id] = n
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -235,7 +243,15 @@ func (d *driver) DeleteNetwork(nid string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
n := d.network(nid)
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
// This is similar to d.network(), but we need to keep holding the lock
|
||||
// until we are done removing this network.
|
||||
n, ok := d.networks[nid]
|
||||
if !ok {
|
||||
n = d.restoreNetworkFromStore(nid)
|
||||
}
|
||||
if n == nil {
|
||||
return fmt.Errorf("could not find network with id %s", nid)
|
||||
}
|
||||
|
@ -255,7 +271,7 @@ func (d *driver) DeleteNetwork(nid string) error {
|
|||
}
|
||||
// flush the peerDB entries
|
||||
d.peerFlush(nid)
|
||||
d.deleteNetwork(nid)
|
||||
delete(d.networks, nid)
|
||||
|
||||
vnis, err := n.releaseVxlanID()
|
||||
if err != nil {
|
||||
|
@ -805,32 +821,25 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
|
|||
}
|
||||
}
|
||||
|
||||
func (d *driver) addNetwork(n *network) {
|
||||
d.Lock()
|
||||
d.networks[n.id] = n
|
||||
d.Unlock()
|
||||
}
|
||||
|
||||
func (d *driver) deleteNetwork(nid string) {
|
||||
d.Lock()
|
||||
delete(d.networks, nid)
|
||||
d.Unlock()
|
||||
// Restore a network from the store to the driver if it is present.
|
||||
// Must be called with the driver locked!
|
||||
func (d *driver) restoreNetworkFromStore(nid string) *network {
|
||||
n := d.getNetworkFromStore(nid)
|
||||
if n != nil {
|
||||
n.driver = d
|
||||
n.endpoints = endpointTable{}
|
||||
n.once = &sync.Once{}
|
||||
d.networks[nid] = n
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (d *driver) network(nid string) *network {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
n, ok := d.networks[nid]
|
||||
d.Unlock()
|
||||
if !ok {
|
||||
n = d.getNetworkFromStore(nid)
|
||||
if n != nil {
|
||||
n.driver = d
|
||||
n.endpoints = endpointTable{}
|
||||
n.once = &sync.Once{}
|
||||
d.Lock()
|
||||
d.networks[nid] = n
|
||||
d.Unlock()
|
||||
}
|
||||
n = d.restoreNetworkFromStore(nid)
|
||||
}
|
||||
|
||||
return n
|
||||
|
|
Loading…
Reference in a new issue