From a7c52918fd25735a7303556e7a606159fc53228a Mon Sep 17 00:00:00 2001 From: Madhu Venugopal Date: Sat, 16 Jan 2016 14:24:44 -0800 Subject: [PATCH 1/2] Force delete sandbox during sandboxCleanup Stale sandbox and endpoints are cleaned up during controller init. Since we reuse the exact same code-path, for sandbox and endpoint delete, they try to load the plugin and it causes daemon startup timeouts since the external plugin containers cant be loaded at that time. Since the cleanup is actually performed for the libnetwork core states, we can force delete sandbox and endpoint even if the driver is not loaded. Signed-off-by: Madhu Venugopal --- libnetwork/controller.go | 4 ++-- libnetwork/default_gateway.go | 2 +- libnetwork/endpoint.go | 28 +++++++++++++++++----------- libnetwork/endpoint_info.go | 2 +- libnetwork/network.go | 13 ++++++++----- libnetwork/sandbox.go | 12 +++++++++--- libnetwork/sandbox_store.go | 2 +- 7 files changed, 39 insertions(+), 24 deletions(-) diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 7efc409356..ef214fd2ce 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -387,7 +387,7 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti // Make sure we have a driver available for this network type // before we allocate anything. - if _, err := network.driver(); err != nil { + if _, err := network.driver(true); err != nil { return nil, err } @@ -432,7 +432,7 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti } func (c *controller) addNetwork(n *network) error { - d, err := n.driver() + d, err := n.driver(true) if err != nil { return err } diff --git a/libnetwork/default_gateway.go b/libnetwork/default_gateway.go index bfd7b725d3..9a3ca0d6bd 100644 --- a/libnetwork/default_gateway.go +++ b/libnetwork/default_gateway.go @@ -84,7 +84,7 @@ func (sb *sandbox) clearDefaultGW() error { return nil } - if err := ep.sbLeave(sb); err != nil { + if err := ep.sbLeave(sb, false); err != nil { return fmt.Errorf("container %s: endpoint leaving GW Network failed: %v", sb.containerID, err) } if err := ep.Delete(false); err != nil { diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index 88312e9c15..9c12df5e62 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -406,7 +406,7 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error { ep.processOptions(options...) - driver, err := network.driver() + driver, err := network.driver(true) if err != nil { return fmt.Errorf("failed to join endpoint: %v", err) } @@ -533,10 +533,10 @@ func (ep *endpoint) Leave(sbox Sandbox, options ...EndpointOption) error { sb.joinLeaveStart() defer sb.joinLeaveEnd() - return ep.sbLeave(sbox, options...) + return ep.sbLeave(sbox, false, options...) } -func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error { +func (ep *endpoint) sbLeave(sbox Sandbox, force bool, options ...EndpointOption) error { sb, ok := sbox.(*sandbox) if !ok { return types.BadRequestErrorf("not a valid Sandbox interface") @@ -565,7 +565,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error { ep.processOptions(options...) - d, err := n.driver() + d, err := n.driver(!force) if err != nil { return fmt.Errorf("failed to leave endpoint: %v", err) } @@ -575,9 +575,11 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error { ep.network = n ep.Unlock() - if err := d.Leave(n.id, ep.id); err != nil { - if _, ok := err.(types.MaskableError); !ok { - log.Warnf("driver error disconnecting container %s : %v", ep.name, err) + if d != nil { + if err := d.Leave(n.id, ep.id); err != nil { + if _, ok := err.(types.MaskableError); !ok { + log.Warnf("driver error disconnecting container %s : %v", ep.name, err) + } } } @@ -649,7 +651,7 @@ func (ep *endpoint) Delete(force bool) error { } if sb != nil { - if e := ep.sbLeave(sb); e != nil { + if e := ep.sbLeave(sb, force); e != nil { log.Warnf("failed to leave sandbox for endpoint %s : %v", name, e) } } @@ -681,7 +683,7 @@ func (ep *endpoint) Delete(force bool) error { // unwatch for service records n.getController().unWatchSvcRecord(ep) - if err = ep.deleteEndpoint(); err != nil && !force { + if err = ep.deleteEndpoint(force); err != nil && !force { return err } @@ -690,18 +692,22 @@ func (ep *endpoint) Delete(force bool) error { return nil } -func (ep *endpoint) deleteEndpoint() error { +func (ep *endpoint) deleteEndpoint(force bool) error { ep.Lock() n := ep.network name := ep.name epid := ep.id ep.Unlock() - driver, err := n.driver() + driver, err := n.driver(!force) if err != nil { return fmt.Errorf("failed to delete endpoint: %v", err) } + if driver == nil { + return nil + } + if err := driver.DeleteEndpoint(n.id, epid); err != nil { if _, ok := err.(types.ForbiddenError); ok { return err diff --git a/libnetwork/endpoint_info.go b/libnetwork/endpoint_info.go index 624bc533c5..4ba8e3d548 100644 --- a/libnetwork/endpoint_info.go +++ b/libnetwork/endpoint_info.go @@ -188,7 +188,7 @@ func (ep *endpoint) DriverInfo() (map[string]interface{}, error) { return nil, fmt.Errorf("could not find network in store for driver info: %v", err) } - driver, err := n.driver() + driver, err := n.driver(true) if err != nil { return nil, fmt.Errorf("failed to get driver info: %v", err) } diff --git a/libnetwork/network.go b/libnetwork/network.go index 7449c90ac4..c82304899a 100644 --- a/libnetwork/network.go +++ b/libnetwork/network.go @@ -566,7 +566,7 @@ func (n *network) driverScope() string { return dd.capability.DataScope } -func (n *network) driver() (driverapi.Driver, error) { +func (n *network) driver(load bool) (driverapi.Driver, error) { c := n.getController() c.Lock() @@ -574,12 +574,15 @@ func (n *network) driver() (driverapi.Driver, error) { dd, ok := c.drivers[n.networkType] c.Unlock() - if !ok { + if !ok && load { var err error dd, err = c.loadDriver(n.networkType) if err != nil { return nil, err } + } else if !ok { + // dont fail if driver loading is not required + return nil, nil } return dd.driver, nil @@ -631,7 +634,7 @@ func (n *network) Delete() error { } func (n *network) deleteNetwork() error { - d, err := n.driver() + d, err := n.driver(true) if err != nil { return fmt.Errorf("failed deleting network: %v", err) } @@ -651,7 +654,7 @@ func (n *network) deleteNetwork() error { } func (n *network) addEndpoint(ep *endpoint) error { - d, err := n.driver() + d, err := n.driver(true) if err != nil { return fmt.Errorf("failed to add endpoint: %v", err) } @@ -725,7 +728,7 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi } defer func() { if err != nil { - if e := ep.deleteEndpoint(); e != nil { + if e := ep.deleteEndpoint(false); e != nil { log.Warnf("cleaning up endpoint failed %s : %v", name, e) } } diff --git a/libnetwork/sandbox.go b/libnetwork/sandbox.go index 9dbb100ef9..fe1927f7e5 100644 --- a/libnetwork/sandbox.go +++ b/libnetwork/sandbox.go @@ -160,6 +160,10 @@ func (sb *sandbox) Statistics() (map[string]*types.InterfaceStatistics, error) { } func (sb *sandbox) Delete() error { + return sb.delete(false) +} + +func (sb *sandbox) delete(force bool) error { sb.Lock() if sb.inDelete { sb.Unlock() @@ -194,11 +198,13 @@ func (sb *sandbox) Delete() error { continue } - if err := ep.Leave(sb); err != nil { - log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err) + if !force { + if err := ep.Leave(sb); err != nil { + log.Warnf("Failed detaching sandbox %s from endpoint %s: %v\n", sb.ID(), ep.ID(), err) + } } - if err := ep.Delete(false); err != nil { + if err := ep.Delete(force); err != nil { log.Warnf("Failed deleting endpoint %s: %v\n", ep.ID(), err) } } diff --git a/libnetwork/sandbox_store.go b/libnetwork/sandbox_store.go index 61eda408e4..d3f327193e 100644 --- a/libnetwork/sandbox_store.go +++ b/libnetwork/sandbox_store.go @@ -226,7 +226,7 @@ func (c *controller) sandboxCleanup() { heap.Push(&sb.endpoints, ep) } - if err := sb.Delete(); err != nil { + if err := sb.delete(true); err != nil { logrus.Errorf("failed to delete sandbox %s while trying to cleanup: %v", sb.id, err) } } From 315004b575c8e34fdb35784194dfb7ee5bd9d862 Mon Sep 17 00:00:00 2001 From: Madhu Venugopal Date: Sun, 17 Jan 2016 12:43:41 -0800 Subject: [PATCH 2/2] Cache scope in network object Its safe to cache the scope value in network object and can be reused for cleanup operations. The current implementation assume the presence of driver during cleanup operation. Since a remote driver may not be present, we should not fail such cleanup operations. Hence make use of the scope variable from network object. Signed-off-by: Madhu Venugopal --- libnetwork/endpoint.go | 2 +- libnetwork/network.go | 16 ++++++++++++++-- libnetwork/store.go | 27 +++++++++++++++++---------- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index 9c12df5e62..22ca3a5253 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -962,7 +962,7 @@ func (c *controller) cleanupLocalEndpoints() { } for _, ep := range epl { - if err := ep.Delete(false); err != nil { + if err := ep.Delete(true); err != nil { log.Warnf("Could not delete local endpoint %s during endpoint cleanup: %v", ep.name, err) } } diff --git a/libnetwork/network.go b/libnetwork/network.go index c82304899a..aa32cb8d68 100644 --- a/libnetwork/network.go +++ b/libnetwork/network.go @@ -149,6 +149,7 @@ type network struct { name string networkType string id string + scope string ipamType string ipamOptions map[string]string addrSpace string @@ -246,6 +247,7 @@ func (n *network) New() datastore.KVObject { return &network{ ctrlr: n.ctrlr, drvOnce: &sync.Once{}, + scope: n.scope, } } @@ -295,6 +297,7 @@ func (n *network) CopyTo(o datastore.KVObject) error { dstN.name = n.name dstN.id = n.id dstN.networkType = n.networkType + dstN.scope = n.scope dstN.ipamType = n.ipamType dstN.enableIPv6 = n.enableIPv6 dstN.persist = n.persist @@ -337,7 +340,7 @@ func (n *network) CopyTo(o datastore.KVObject) error { } func (n *network) DataScope() string { - return n.driverScope() + return n.Scope() } func (n *network) getEpCnt() *endpointCnt { @@ -353,6 +356,7 @@ func (n *network) MarshalJSON() ([]byte, error) { netMap["name"] = n.name netMap["id"] = n.id netMap["networkType"] = n.networkType + netMap["scope"] = n.scope netMap["ipamType"] = n.ipamType netMap["addrSpace"] = n.addrSpace netMap["enableIPv6"] = n.enableIPv6 @@ -456,6 +460,9 @@ func (n *network) UnmarshalJSON(b []byte) (err error) { if v, ok := netMap["internal"]; ok { n.internal = v.(bool) } + if s, ok := netMap["scope"]; ok { + n.scope = s.(string) + } return nil } @@ -585,6 +592,9 @@ func (n *network) driver(load bool) (driverapi.Driver, error) { return nil, nil } + n.Lock() + n.scope = dd.capability.DataScope + n.Unlock() return dd.driver, nil } @@ -1172,7 +1182,9 @@ func (n *network) DriverOptions() map[string]string { } func (n *network) Scope() string { - return n.driverScope() + n.Lock() + defer n.Unlock() + return n.scope } func (n *network) IpamConfig() (string, map[string]string, []*IpamConf, []*IpamConf) { diff --git a/libnetwork/store.go b/libnetwork/store.go index be3e8ae638..89248800c9 100644 --- a/libnetwork/store.go +++ b/libnetwork/store.go @@ -75,6 +75,7 @@ func (c *controller) getNetworkFromStore(nid string) (*network, error) { } n.epCnt = ec + n.scope = store.Scope() return n, nil } @@ -107,6 +108,7 @@ func (c *controller) getNetworksForScope(scope string) ([]*network, error) { } n.epCnt = ec + n.scope = scope nl = append(nl, n) } @@ -140,6 +142,7 @@ func (c *controller) getNetworksFromStore() ([]*network, error) { } n.epCnt = ec + n.scope = store.Scope() nl = append(nl, n) } } @@ -148,17 +151,21 @@ func (c *controller) getNetworksFromStore() ([]*network, error) { } func (n *network) getEndpointFromStore(eid string) (*endpoint, error) { - store := n.ctrlr.getStore(n.Scope()) - if store == nil { - return nil, fmt.Errorf("could not find endpoint %s: datastore not found for scope %s", eid, n.Scope()) + var errors []string + for _, store := range n.ctrlr.getStores() { + ep := &endpoint{id: eid, network: n} + err := store.GetObject(datastore.Key(ep.Key()...), ep) + // Continue searching in the next store if the key is not found in this store + if err != nil { + if err != datastore.ErrKeyNotFound { + errors = append(errors, fmt.Sprintf("{%s:%v}, ", store.Scope(), err)) + log.Debugf("could not find endpoint %s in %s: %v", eid, store.Scope(), err) + } + continue + } + return ep, nil } - - ep := &endpoint{id: eid, network: n} - err := store.GetObject(datastore.Key(ep.Key()...), ep) - if err != nil { - return nil, fmt.Errorf("could not find endpoint %s: %v", eid, err) - } - return ep, nil + return nil, fmt.Errorf("could not find endpoint %s: %v", eid, errors) } func (n *network) getEndpointsFromStore() ([]*endpoint, error) {