From c92a4e9fd6ab07c59ce280d175438fa49319f3e5 Mon Sep 17 00:00:00 2001 From: Alessandro Boch Date: Sat, 5 Mar 2016 02:00:31 -0800 Subject: [PATCH] Avoid network/endpoint count inconsistences - ... on ungraceful shutdown during network create - Allow forceful deletion of network - On network delete, first mark the network for deletion - On controller creation, first forcely remove any network that is marked for deletion. Signed-off-by: Alessandro Boch --- libnetwork/controller.go | 18 ++++++++--- libnetwork/libnetwork_test.go | 9 ------ libnetwork/network.go | 60 +++++++++++++++++++++++------------ libnetwork/store.go | 23 ++++++++++++-- 4 files changed, 72 insertions(+), 38 deletions(-) diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 6abc4d1ddf..0703a64cb4 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -187,6 +187,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) { c.sandboxCleanup() c.cleanupLocalEndpoints() + c.networkCleanup() if err := c.startExternalKeyListener(); err != nil { return nil, err @@ -479,19 +480,23 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti } }() - if err = c.updateToStore(network); err != nil { + // First store the endpoint count, then the network. To avoid to + // end up with a datastore containing a network and not an epCnt, + // in case of an ungraceful shutdown during this function call. + epCnt := &endpointCnt{n: network} + if err = c.updateToStore(epCnt); err != nil { return nil, err } defer func() { if err != nil { - if e := c.deleteFromStore(network); e != nil { - log.Warnf("couldnt rollback from store, network %s on failure (%v): %v", network.name, err, e) + if e := c.deleteFromStore(epCnt); e != nil { + log.Warnf("couldnt rollback from store, epCnt %v on failure (%v): %v", epCnt, err, e) } } }() - network.epCnt = &endpointCnt{n: network} - if err = c.updateToStore(network.epCnt); err != nil { + network.epCnt = epCnt + if err = c.updateToStore(network); err != nil { return nil, err } @@ -521,6 +526,9 @@ func (c *controller) Networks() []Network { } for _, n := range networks { + if n.inDelete { + continue + } list = append(list, n) } diff --git a/libnetwork/libnetwork_test.go b/libnetwork/libnetwork_test.go index d1ee64f946..f70288bab4 100644 --- a/libnetwork/libnetwork_test.go +++ b/libnetwork/libnetwork_test.go @@ -252,15 +252,6 @@ func TestHost(t *testing.T) { if err := ep3.Delete(false); err != nil { t.Fatal(err) } - - // host type is special network. Cannot be removed. - err = network.Delete() - if err == nil { - t.Fatal(err) - } - if _, ok := err.(types.ForbiddenError); !ok { - t.Fatalf("Unexpected error type") - } } func TestBridge(t *testing.T) { diff --git a/libnetwork/network.go b/libnetwork/network.go index 25dc39c3f5..422d1f2062 100644 --- a/libnetwork/network.go +++ b/libnetwork/network.go @@ -167,6 +167,7 @@ type network struct { stopWatchCh chan struct{} drvOnce *sync.Once internal bool + inDelete bool sync.Mutex } @@ -306,6 +307,7 @@ func (n *network) CopyTo(o datastore.KVObject) error { dstN.dbExists = n.dbExists dstN.drvOnce = n.drvOnce dstN.internal = n.internal + dstN.inDelete = n.inDelete for _, v4conf := range n.ipamV4Config { dstV4Conf := &IpamConf{} @@ -394,6 +396,7 @@ func (n *network) MarshalJSON() ([]byte, error) { netMap["ipamV6Info"] = string(iis) } netMap["internal"] = n.internal + netMap["inDelete"] = n.inDelete return json.Marshal(netMap) } @@ -463,6 +466,9 @@ func (n *network) UnmarshalJSON(b []byte) (err error) { if s, ok := netMap["scope"]; ok { n.scope = s.(string) } + if v, ok := netMap["inDelete"]; ok { + n.inDelete = v.(bool) + } return nil } @@ -611,6 +617,10 @@ func (n *network) driver(load bool) (driverapi.Driver, error) { } func (n *network) Delete() error { + return n.delete(false) +} + +func (n *network) delete(force bool) error { n.Lock() c := n.ctrlr name := n.name @@ -622,33 +632,39 @@ func (n *network) Delete() error { return &UnknownNetworkError{name: name, id: id} } - numEps := n.getEpCnt().EndpointCnt() - if numEps != 0 { + if !force && n.getEpCnt().EndpointCnt() != 0 { return &ActiveEndpointsError{name: n.name, id: n.id} } - if err = n.deleteNetwork(); err != nil { - return err + // Mark the network for deletion + n.inDelete = true + if err = c.updateToStore(n); err != nil { + return fmt.Errorf("error marking network %s (%s) for deletion: %v", n.Name(), n.ID(), err) } - defer func() { - if err != nil { - if e := c.addNetwork(n); e != nil { - log.Warnf("failed to rollback deleteNetwork for network %s: %v", - n.Name(), err) - } + + if err = n.deleteNetwork(); err != nil { + if !force { + return err } - }() + log.Debugf("driver failed to delete stale network %s (%s): %v", n.Name(), n.ID(), err) + } + + n.ipamRelease() + if err = c.updateToStore(n); err != nil { + log.Warnf("Failed to update store after ipam release for network %s (%s): %v", n.Name(), n.ID(), err) + } // deleteFromStore performs an atomic delete operation and the // network.epCnt will help prevent any possible // race between endpoint join and network delete - if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil { - return fmt.Errorf("error deleting network endpoint count from store: %v", err) + if err = c.deleteFromStore(n.getEpCnt()); err != nil { + if !force { + return fmt.Errorf("error deleting network endpoint count from store: %v", err) + } + log.Debugf("Error deleting endpoint count from store for stale network %s (%s) for deletion: %v", n.Name(), n.ID(), err) } - n.ipamRelease() - - if err = n.getController().deleteFromStore(n); err != nil { + if err = c.deleteFromStore(n); err != nil { return fmt.Errorf("error deleting network from store: %v", err) } @@ -1098,25 +1114,25 @@ func (n *network) ipamRelease() { } func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) { - var infoList []*IpamInfo + var infoList *[]*IpamInfo switch ipVer { case 4: - infoList = n.ipamV4Info + infoList = &n.ipamV4Info case 6: - infoList = n.ipamV6Info + infoList = &n.ipamV6Info default: log.Warnf("incorrect ip version passed to ipam release: %d", ipVer) return } - if infoList == nil { + if *infoList == nil { return } log.Debugf("releasing IPv%d pools from network %s (%s)", ipVer, n.Name(), n.ID()) - for _, d := range infoList { + for _, d := range *infoList { if d.Gateway != nil { if err := ipam.ReleaseAddress(d.PoolID, d.Gateway.IP); err != nil { log.Warnf("Failed to release gateway ip address %s on delete of network %s (%s): %v", d.Gateway.IP, n.Name(), n.ID(), err) @@ -1135,6 +1151,8 @@ func (n *network) ipamReleaseVersion(ipVer int, ipam ipamapi.Ipam) { log.Warnf("Failed to release address pool %s on delete of network %s (%s): %v", d.PoolID, n.Name(), n.ID(), err) } } + + *infoList = nil } func (n *network) getIPInfo(ipVer int) []*IpamInfo { diff --git a/libnetwork/store.go b/libnetwork/store.go index 182923ef6c..41dd21b746 100644 --- a/libnetwork/store.go +++ b/libnetwork/store.go @@ -71,7 +71,7 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) { ec := &endpointCnt{n: n} err = store.GetObject(datastore.Key(ec.Key()...), ec) - if err != nil { + if err != nil && !n.inDelete { return nil, fmt.Errorf("could not find endpoint count for network %s: %v", n.Name(), err) } @@ -104,7 +104,7 @@ func (c *controller) getNetworksForScope(scope string) ([]*network, error) { ec := &endpointCnt{n: n} err = store.GetObject(datastore.Key(ec.Key()...), ec) - if err != nil { + if err != nil && !n.inDelete { log.Warnf("Could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err) continue } @@ -139,7 +139,7 @@ func (c *controller) getNetworksFromStore() ([]*network, error) { ec := &endpointCnt{n: n} err = store.GetObject(datastore.Key(ec.Key()...), ec) - if err != nil { + if err != nil && !n.inDelete { log.Warnf("could not find endpoint count key %s for network %s while listing: %v", datastore.Key(ec.Key()...), n.Name(), err) continue } @@ -428,3 +428,20 @@ func (c *controller) startWatch() { go c.watchLoop() } + +func (c *controller) networkCleanup() { + networks, err := c.getNetworksFromStore() + if err != nil { + log.Warnf("Could not retrieve networks from store(s) during network cleanup: %v", err) + return + } + + for _, n := range networks { + if n.inDelete { + log.Infof("Removing stale network %s (%s)", n.Name(), n.ID()) + if err := n.delete(true); err != nil { + log.Debugf("Error while removing stale network: %v", err) + } + } + } +}