diff --git a/libnetwork/drivers/overlay/joinleave.go b/libnetwork/drivers/overlay/joinleave.go index 985d997784..a51bcd8985 100644 --- a/libnetwork/drivers/overlay/joinleave.go +++ b/libnetwork/drivers/overlay/joinleave.go @@ -47,18 +47,10 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err) } - if err := n.joinSandbox(false); err != nil { + if err := n.joinSandbox(s, false, true); err != nil { return fmt.Errorf("network sandbox join failed: %v", err) } - if err := n.joinSubnetSandbox(s, false); err != nil { - return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) - } - - // joinSubnetSandbox gets called when an endpoint comes up on a new subnet in the - // overlay network. Hence the Endpoint count should be updated outside joinSubnetSandbox - n.incEndpointCount() - sbox := n.sandbox() overlayIfName, containerIfName, err := createVethPair() diff --git a/libnetwork/drivers/overlay/ov_network.go b/libnetwork/drivers/overlay/ov_network.go index 628a706933..cf32e45951 100644 --- a/libnetwork/drivers/overlay/ov_network.go +++ b/libnetwork/drivers/overlay/ov_network.go @@ -39,7 +39,7 @@ var ( type networkTable map[string]*network type subnet struct { - once *sync.Once + sboxInit bool vxlanName string brName string vni uint32 @@ -63,7 +63,7 @@ type network struct { endpoints endpointTable driver *driver joinCnt int - once *sync.Once + sboxInit bool initEpoch int initErr error subnets []*subnet @@ -150,7 +150,6 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d id: id, driver: d, endpoints: endpointTable{}, - once: &sync.Once{}, subnets: []*subnet{}, } @@ -193,7 +192,6 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d s := &subnet{ subnetIP: ipd.Pool, gwIP: ipd.Gateway, - once: &sync.Once{}, } if len(vnis) != 0 { @@ -277,7 +275,7 @@ func (d *driver) DeleteNetwork(nid string) error { logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err) } } - // flush the peerDB entries + doPeerFlush = true delete(d.networks, nid) @@ -304,29 +302,54 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error { return nil } -func (n *network) incEndpointCount() { - n.Lock() - defer n.Unlock() - n.joinCnt++ -} - -func (n *network) joinSandbox(restore bool) error { +func (n *network) joinSandbox(s *subnet, restore bool, incJoinCount bool) error { // If there is a race between two go routines here only one will win // the other will wait. - n.once.Do(func() { - // save the error status of initSandbox in n.initErr so that - // all the racing go routines are able to know the status. + networkOnce.Do(networkOnceInit) + + n.Lock() + // If non-restore initialization occurred and was successful then + // tell the peerDB to initialize the sandbox with all the peers + // previously received from networkdb. But only do this after + // unlocking the network. Otherwise we could deadlock with + // on the peerDB channel while peerDB is waiting for the network lock. + var doInitPeerDB bool + defer func() { + n.Unlock() + if doInitPeerDB { + n.driver.initSandboxPeerDB(n.id) + } + }() + + if !n.sboxInit { n.initErr = n.initSandbox(restore) - }) + doInitPeerDB = n.initErr == nil && !restore + // If there was an error, we cannot recover it + n.sboxInit = true + } - return n.initErr -} + if n.initErr != nil { + return fmt.Errorf("network sandbox join failed: %v", n.initErr) + } -func (n *network) joinSubnetSandbox(s *subnet, restore bool) error { - s.once.Do(func() { - s.initErr = n.initSubnetSandbox(s, restore) - }) - return s.initErr + subnetErr := s.initErr + if !s.sboxInit { + subnetErr = n.initSubnetSandbox(s, restore) + // We can recover from these errors, but not on restore + if restore || subnetErr == nil { + s.initErr = subnetErr + s.sboxInit = true + } + } + if subnetErr != nil { + return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), subnetErr) + } + + if incJoinCount { + n.joinCnt++ + } + + return nil } func (n *network) leaveSandbox() { @@ -337,15 +360,14 @@ func (n *network) leaveSandbox() { return } - // We are about to destroy sandbox since the container is leaving the network - // Reinitialize the once variable so that we will be able to trigger one time - // sandbox initialization(again) when another container joins subsequently. - n.once = &sync.Once{} - for _, s := range n.subnets { - s.once = &sync.Once{} - } - n.destroySandbox() + + n.sboxInit = false + n.initErr = nil + for _, s := range n.subnets { + s.sboxInit = false + s.initErr = nil + } } // to be called while holding network lock @@ -478,7 +500,7 @@ func (n *network) generateVxlanName(s *subnet) string { id = n.id[:5] } - return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + id + return fmt.Sprintf("vx-%06x-%v", s.vni, id) } func (n *network) generateBridgeName(s *subnet) string { @@ -491,7 +513,7 @@ func (n *network) generateBridgeName(s *subnet) string { } func (n *network) getBridgeNamePrefix(s *subnet) string { - return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + return fmt.Sprintf("ov-%06x", s.vni) } func checkOverlap(nw *net.IPNet) error { @@ -513,7 +535,7 @@ func checkOverlap(nw *net.IPNet) error { } func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) error { - sbox := n.sandbox() + sbox := n.sbox // restore overlay osl sandbox Ifaces := make(map[string][]osl.IfaceOption) @@ -542,7 +564,7 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s) } // Try to delete the vxlan interface by vni if already present - deleteVxlanByVNI("", n.vxlanID(s)) + deleteVxlanByVNI("", s.vni) if err := checkOverlap(s.subnetIP); err != nil { return err @@ -556,24 +578,24 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error // it must a stale namespace from previous // life. Destroy it completely and reclaim resourced. networkMu.Lock() - path, ok := vniTbl[n.vxlanID(s)] + path, ok := vniTbl[s.vni] networkMu.Unlock() if ok { - deleteVxlanByVNI(path, n.vxlanID(s)) + deleteVxlanByVNI(path, s.vni) if err := syscall.Unmount(path, syscall.MNT_FORCE); err != nil { logrus.Errorf("unmount of %s failed: %v", path, err) } os.Remove(path) networkMu.Lock() - delete(vniTbl, n.vxlanID(s)) + delete(vniTbl, s.vni) networkMu.Unlock() } } // create a bridge and vxlan device for this subnet and move it to the sandbox - sbox := n.sandbox() + sbox := n.sbox if err := sbox.AddInterface(brName, "br", sbox.InterfaceOptions().Address(s.gwIP), @@ -581,7 +603,7 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) } - err := createVxlan(vxlanName, n.vxlanID(s), n.maxMTU()) + err := createVxlan(vxlanName, s.vni, n.maxMTU()) if err != nil { return err } @@ -636,6 +658,7 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error return nil } +// Must be called with the network lock func (n *network) initSubnetSandbox(s *subnet, restore bool) error { brName := n.generateBridgeName(s) vxlanName := n.generateVxlanName(s) @@ -646,17 +669,12 @@ func (n *network) initSubnetSandbox(s *subnet, restore bool) error { } } else { if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { - // The error in setupSubnetSandbox could be a temporary glitch. reset the - // subnet once object to allow the setup to be retried on another endpoint join. - s.once = &sync.Once{} return err } } - n.Lock() s.vxlanName = vxlanName s.brName = brName - n.Unlock() return nil } @@ -697,11 +715,7 @@ func (n *network) cleanupStaleSandboxes() { } func (n *network) initSandbox(restore bool) error { - n.Lock() n.initEpoch++ - n.Unlock() - - networkOnce.Do(networkOnceInit) if !restore { if hostMode { @@ -731,12 +745,7 @@ func (n *network) initSandbox(restore bool) error { } // this is needed to let the peerAdd configure the sandbox - n.setSandbox(sbox) - - if !restore { - // Initialize the sandbox with all the peers previously received from networkdb - n.driver.initSandboxPeerDB(n.id) - } + n.sbox = sbox // If we are in swarm mode, we don't need anymore the watchMiss routine. // This will save 1 thread and 1 netlink socket per network @@ -754,7 +763,7 @@ func (n *network) initSandbox(restore bool) error { tv := syscall.NsecToTimeval(soTimeout.Nanoseconds()) err = nlSock.SetReceiveTimeout(&tv) }) - n.setNetlinkSocket(nlSock) + n.nlSocket = nlSock if err == nil { go n.watchMiss(nlSock, key) @@ -856,7 +865,6 @@ func (d *driver) restoreNetworkFromStore(nid string) *network { if n != nil { n.driver = d n.endpoints = endpointTable{} - n.once = &sync.Once{} d.networks[nid] = n } return n @@ -864,11 +872,11 @@ func (d *driver) restoreNetworkFromStore(nid string) *network { func (d *driver) network(nid string) *network { d.Lock() - defer d.Unlock() n, ok := d.networks[nid] if !ok { n = d.restoreNetworkFromStore(nid) } + d.Unlock() return n } @@ -889,26 +897,12 @@ func (d *driver) getNetworkFromStore(nid string) *network { func (n *network) sandbox() osl.Sandbox { n.Lock() defer n.Unlock() - return n.sbox } -func (n *network) setSandbox(sbox osl.Sandbox) { - n.Lock() - n.sbox = sbox - n.Unlock() -} - -func (n *network) setNetlinkSocket(nlSk *nl.NetlinkSocket) { - n.Lock() - n.nlSocket = nlSk - n.Unlock() -} - func (n *network) vxlanID(s *subnet) uint32 { n.Lock() defer n.Unlock() - return s.vni } @@ -1017,7 +1011,6 @@ func (n *network) SetValue(value []byte) error { subnetIP: subnetIP, gwIP: gwIP, vni: vni, - once: &sync.Once{}, } n.subnets = append(n.subnets, s) } else { @@ -1043,7 +1036,10 @@ func (n *network) writeToStore() error { } func (n *network) releaseVxlanID() ([]uint32, error) { - if len(n.subnets) == 0 { + n.Lock() + nSubnets := len(n.subnets) + n.Unlock() + if nSubnets == 0 { return nil, nil } @@ -1059,14 +1055,17 @@ func (n *network) releaseVxlanID() ([]uint32, error) { } } var vnis []uint32 + n.Lock() for _, s := range n.subnets { if n.driver.vxlanIdm != nil { - vni := n.vxlanID(s) - vnis = append(vnis, vni) - n.driver.vxlanIdm.Release(uint64(vni)) + vnis = append(vnis, s.vni) } + s.vni = 0 + } + n.Unlock() - n.setVxlanID(s, 0) + for _, vni := range vnis { + n.driver.vxlanIdm.Release(uint64(vni)) } return vnis, nil @@ -1074,7 +1073,7 @@ func (n *network) releaseVxlanID() ([]uint32, error) { func (n *network) obtainVxlanID(s *subnet) error { //return if the subnet already has a vxlan id assigned - if s.vni != 0 { + if n.vxlanID(s) != 0 { return nil } @@ -1087,7 +1086,7 @@ func (n *network) obtainVxlanID(s *subnet) error { return fmt.Errorf("getting network %q from datastore failed %v", n.id, err) } - if s.vni == 0 { + if n.vxlanID(s) == 0 { vxlanID, err := n.driver.vxlanIdm.GetID(true) if err != nil { return fmt.Errorf("failed to allocate vxlan id: %v", err) diff --git a/libnetwork/drivers/overlay/overlay.go b/libnetwork/drivers/overlay/overlay.go index f31a8ca597..1bbd761c2f 100644 --- a/libnetwork/drivers/overlay/overlay.go +++ b/libnetwork/drivers/overlay/overlay.go @@ -105,17 +105,6 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { logrus.Warnf("Failure during overlay endpoints restore: %v", err) } - // If an error happened when the network join the sandbox during the endpoints restore - // we should reset it now along with the once variable, so that subsequent endpoint joins - // outside of the restore path can potentially fix the network join and succeed. - for nid, n := range d.networks { - if n.initErr != nil { - logrus.Infof("resetting init error and once variable for network %s after unsuccessful endpoint restore: %v", nid, n.initErr) - n.initErr = nil - n.once = &sync.Once{} - } - } - return dc.RegisterDriver(networkType, d, c) } @@ -151,14 +140,10 @@ func (d *driver) restoreEndpoints() error { return fmt.Errorf("could not find subnet for endpoint %s", ep.id) } - if err := n.joinSandbox(true); err != nil { + if err := n.joinSandbox(s, true, true); err != nil { return fmt.Errorf("restore network sandbox failed: %v", err) } - if err := n.joinSubnetSandbox(s, true); err != nil { - return fmt.Errorf("restore subnet sandbox failed for %q: %v", s.subnetIP.String(), err) - } - Ifaces := make(map[string][]osl.IfaceOption) vethIfaceOption := make([]osl.IfaceOption, 1) vethIfaceOption = append(vethIfaceOption, n.sbox.InterfaceOptions().Master(s.brName)) @@ -166,10 +151,10 @@ func (d *driver) restoreEndpoints() error { err := n.sbox.Restore(Ifaces, nil, nil, nil) if err != nil { + n.leaveSandbox() return fmt.Errorf("failed to restore overlay sandbox: %v", err) } - n.incEndpointCount() d.peerAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true) } return nil diff --git a/libnetwork/drivers/overlay/peerdb.go b/libnetwork/drivers/overlay/peerdb.go index bdd3cb12af..819b68ff90 100644 --- a/libnetwork/drivers/overlay/peerdb.go +++ b/libnetwork/drivers/overlay/peerdb.go @@ -384,7 +384,7 @@ func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err) } - if err := n.joinSubnetSandbox(s, false); err != nil { + if err := n.joinSandbox(s, false, false); err != nil { return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) }