diff --git a/libnetwork/drivers/overlay/encryption.go b/libnetwork/drivers/overlay/encryption.go index f12d7a8c67..802d7bc36d 100644 --- a/libnetwork/drivers/overlay/encryption.go +++ b/libnetwork/drivers/overlay/encryption.go @@ -21,7 +21,6 @@ import ( const ( r = 0xD0C4E3 - timeout = 30 pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8) ) diff --git a/libnetwork/drivers/overlay/joinleave.go b/libnetwork/drivers/overlay/joinleave.go index a07838b760..0770513e7d 100644 --- a/libnetwork/drivers/overlay/joinleave.go +++ b/libnetwork/drivers/overlay/joinleave.go @@ -68,7 +68,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, ep.ifName = containerIfName - if err := d.writeEndpointToStore(ep); err != nil { + if err = d.writeEndpointToStore(ep); err != nil { return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err) } @@ -86,7 +86,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, return err } - if err := sbox.AddInterface(overlayIfName, "veth", + if err = sbox.AddInterface(overlayIfName, "veth", sbox.InterfaceOptions().Master(s.brName)); err != nil { return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err) } @@ -100,7 +100,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, return err } - if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil { + if err = nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil { return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err) } @@ -108,7 +108,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, if sub == s { continue } - if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil { + if err = jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil { logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id) } } @@ -122,7 +122,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true) - if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil { + if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil { logrus.Warn(err) } @@ -200,7 +200,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri } if etype == driverapi.Delete { - d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep) + d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, false) return } @@ -232,11 +232,9 @@ func (d *driver) Leave(nid, eid string) error { } } - n.leaveSandbox() + d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true) - if err := d.checkEncryption(nid, nil, 0, true, false); err != nil { - logrus.Warn(err) - } + n.leaveSandbox() return nil } diff --git a/libnetwork/drivers/overlay/ov_network.go b/libnetwork/drivers/overlay/ov_network.go index 0e9ca77866..758b329187 100644 --- a/libnetwork/drivers/overlay/ov_network.go +++ b/libnetwork/drivers/overlay/ov_network.go @@ -251,8 +251,9 @@ func (d *driver) DeleteNetwork(nid string) error { if err := d.deleteEndpointFromStore(ep); err != nil { logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err) } - } + // flush the peerDB entries + d.peerFlush(nid) d.deleteNetwork(nid) vnis, err := n.releaseVxlanID() @@ -505,11 +506,7 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro vxlanIfaceOption := make([]osl.IfaceOption, 1) vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName)) Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption - err = sbox.Restore(Ifaces, nil, nil, nil) - if err != nil { - return err - } - return nil + return sbox.Restore(Ifaces, nil, nil, nil) } func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { @@ -760,58 +757,38 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) - if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { continue } if n.driver.isSerfAlive() { + logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) if err != nil { logrus.Errorf("could not resolve peer %q: %v", ip, err) continue } n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) - } else { - // If the gc_thresh values are lower kernel might knock off the neighor entries. - // When we get a L3 miss check if its a valid peer and reprogram the neighbor - // entry again. Rate limit it to once attempt every 500ms, just in case a faulty - // container sends a flood of packets to invalid peers - if !l3Miss { - continue - } - if time.Since(t) > 500*time.Millisecond { + } else if l3Miss && time.Since(t) > time.Second { + // All the local peers will trigger a miss notification but this one is expected and the local container will reply + // autonomously to the ARP request + // In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following + // extra logs that will inform of the possible issue. + // Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html: + // Entries which are marked as permanent are never deleted by the garbage-collector. + // The time limit here is to guarantee that the dbSearch is not + // done too frequently causing a stall of the peerDB operations. + pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip) + if err == nil && !pEntry.isLocal { t = time.Now() - n.programNeighbor(ip) + logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v", + neigh, l3Miss, l2Miss, *pKey, *pEntry, err) } } } } } -func (n *network) programNeighbor(ip net.IP) { - peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip) - if err != nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip) - return - } - s := n.getSubnetforIPAddr(ip) - if s == nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip) - return - } - sbox := n.sandbox() - if sbox == nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip) - return - } - if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err) - return - } -} - func (d *driver) addNetwork(n *network) { d.Lock() d.networks[n.id] = n @@ -1090,15 +1067,6 @@ func (n *network) contains(ip net.IP) bool { return false } -func (n *network) getSubnetforIPAddr(ip net.IP) *subnet { - for _, s := range n.subnets { - if s.subnetIP.Contains(ip) { - return s - } - } - return nil -} - // getSubnetforIP returns the subnet to which the given IP belongs func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { for _, s := range n.subnets { diff --git a/libnetwork/drivers/overlay/ov_serf.go b/libnetwork/drivers/overlay/ov_serf.go index 6e034ada46..f644799afd 100644 --- a/libnetwork/drivers/overlay/ov_serf.go +++ b/libnetwork/drivers/overlay/ov_serf.go @@ -122,7 +122,7 @@ func (d *driver) processEvent(u serf.UserEvent) { case "join": d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false, false, false) case "leave": - d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr)) + d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false) } } @@ -135,13 +135,13 @@ func (d *driver) processQuery(q *serf.Query) { fmt.Printf("Failed to scan query payload string: %v\n", err) } - peerMac, peerIPMask, vtep, err := d.peerDbSearch(nid, net.ParseIP(ipStr)) + pKey, pEntry, err := d.peerDbSearch(nid, net.ParseIP(ipStr)) if err != nil { return } - logrus.Debugf("Sending peer query resp mac %s, mask %s, vtep %s", peerMac, net.IP(peerIPMask), vtep) - q.Respond([]byte(fmt.Sprintf("%s %s %s", peerMac.String(), net.IP(peerIPMask).String(), vtep.String()))) + logrus.Debugf("Sending peer query resp mac %v, mask %s, vtep %s", pKey.peerMac, net.IP(pEntry.peerIPMask).String(), pEntry.vtep) + q.Respond([]byte(fmt.Sprintf("%s %s %s", pKey.peerMac.String(), net.IP(pEntry.peerIPMask).String(), pEntry.vtep.String()))) } func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { diff --git a/libnetwork/drivers/overlay/overlay.go b/libnetwork/drivers/overlay/overlay.go index 2bae0823e1..f029c5cce4 100644 --- a/libnetwork/drivers/overlay/overlay.go +++ b/libnetwork/drivers/overlay/overlay.go @@ -262,7 +262,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) { d.Unlock() // If containers are already running on this network update the - // advertiseaddress in the peerDB + // advertise address in the peerDB d.localJoinOnce.Do(func() { d.peerDBUpdateSelf() }) diff --git a/libnetwork/drivers/overlay/overlay.proto b/libnetwork/drivers/overlay/overlay.proto index 45b8c9de7e..3133386e03 100644 --- a/libnetwork/drivers/overlay/overlay.proto +++ b/libnetwork/drivers/overlay/overlay.proto @@ -24,4 +24,4 @@ message PeerRecord { // which this container is running and can be reached by // building a tunnel to that host IP. string tunnel_endpoint_ip = 3 [(gogoproto.customname) = "TunnelEndpointIP"]; -} \ No newline at end of file +} diff --git a/libnetwork/drivers/overlay/peerdb.go b/libnetwork/drivers/overlay/peerdb.go index f953e3c872..a27687bcbe 100644 --- a/libnetwork/drivers/overlay/peerdb.go +++ b/libnetwork/drivers/overlay/peerdb.go @@ -8,6 +8,7 @@ import ( "syscall" "github.com/docker/libnetwork/common" + "github.com/docker/libnetwork/osl" "github.com/sirupsen/logrus" ) @@ -22,16 +23,45 @@ type peerEntry struct { eid string vtep net.IP peerIPMask net.IPMask - inSandbox bool isLocal bool } +func (p *peerEntry) MarshalDB() peerEntryDB { + return peerEntryDB{ + eid: p.eid, + vtep: p.vtep.String(), + peerIPMask: p.peerIPMask.String(), + isLocal: p.isLocal, + } +} + +// This the structure saved into the set (SetMatrix), due to the implementation of it +// the value inserted in the set has to be Hashable so the []byte had to be converted into +// strings +type peerEntryDB struct { + eid string + vtep string + peerIPMask string + isLocal bool +} + +func (p *peerEntryDB) UnMarshalDB() peerEntry { + return peerEntry{ + eid: p.eid, + vtep: net.ParseIP(p.vtep), + peerIPMask: net.IPMask(net.ParseIP(p.peerIPMask)), + isLocal: p.isLocal, + } +} + type peerMap struct { - mp map[string]peerEntry + // set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks + mp common.SetMatrix sync.Mutex } type peerNetworkMap struct { + // map with key peerKey mp map[string]*peerMap sync.Mutex } @@ -54,11 +84,7 @@ func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { } pKey.peerMac, err = net.ParseMAC(string(macB)) - if err != nil { - return err - } - - return nil + return err } func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { @@ -87,10 +113,13 @@ func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool } mp := map[string]peerEntry{} - pMap.Lock() - for pKeyStr, pEntry := range pMap.mp { - mp[pKeyStr] = pEntry + for _, pKeyStr := range pMap.mp.Keys() { + entryDBList, ok := pMap.mp.Get(pKeyStr) + if ok { + peerEntryDB := entryDBList[0].(peerEntryDB) + mp[pKeyStr] = peerEntryDB.UnMarshalDB() + } } pMap.Unlock() @@ -107,45 +136,38 @@ func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool return nil } -func (d *driver) peerDbSearch(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { - var ( - peerMac net.HardwareAddr - vtep net.IP - peerIPMask net.IPMask - found bool - ) - +func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { + var pKeyMatched *peerKey + var pEntryMatched *peerEntry err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { if pKey.peerIP.Equal(peerIP) { - peerMac = pKey.peerMac - peerIPMask = pEntry.peerIPMask - vtep = pEntry.vtep - found = true - return found + pKeyMatched = pKey + pEntryMatched = pEntry + return true } - return found + return false }) if err != nil { - return nil, nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) + return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) } - if !found { - return nil, nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) + if pKeyMatched == nil || pEntryMatched == nil { + return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) } - return peerMac, peerIPMask, vtep, nil + return pKeyMatched, pEntryMatched, nil } func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP, isLocal bool) { + peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { d.peerDb.mp[nid] = &peerMap{ - mp: make(map[string]peerEntry), + mp: common.NewSetMatrix(), } pMap = d.peerDb.mp[nid] @@ -165,18 +187,24 @@ func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask } pMap.Lock() - pMap.mp[pKey.String()] = pEntry - pMap.Unlock() + defer pMap.Unlock() + b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) + if i != 1 { + // Transient case, there is more than one endpoint that is using the same IP,MAC pair + s, _ := pMap.mp.String(pKey.String()) + logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) + } + return b, i } func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) peerEntry { + peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { d.peerDb.Unlock() - return peerEntry{} + return false, 0 } d.peerDb.Unlock() @@ -185,22 +213,22 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPM peerMac: peerMac, } - pMap.Lock() - - pEntry, ok := pMap.mp[pKey.String()] - if ok { - // Mismatched endpoint ID(possibly outdated). Do not - // delete peerdb - if pEntry.eid != eid { - pMap.Unlock() - return pEntry - } + pEntry := peerEntry{ + eid: eid, + vtep: vtep, + peerIPMask: peerIPMask, + isLocal: isLocal, } - delete(pMap.mp, pKey.String()) - pMap.Unlock() - - return pEntry + pMap.Lock() + defer pMap.Unlock() + b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) + if i != 0 { + // Transient case, there is more than one endpoint that is using the same IP,MAC pair + s, _ := pMap.mp.String(pKey.String()) + logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) + } + return b, i } // The overlay uses a lazy initialization approach, this means that when a network is created @@ -224,6 +252,7 @@ const ( peerOperationINIT peerOperationType = iota peerOperationADD peerOperationDELETE + peerOperationFLUSH ) type peerOperation struct { @@ -253,7 +282,9 @@ func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { case peerOperationADD: err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer) case peerOperationDELETE: - err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP) + err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) + case peerOperationFLUSH: + err = d.peerFlushOp(op.networkID) } if err != nil { logrus.Warnf("Peer operation failed:%s op:%v", err, op) @@ -286,7 +317,6 @@ func (d *driver) peerInitOp(nid string) error { func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { - callerName := common.CallerName(1) d.peerOpCh <- &peerOperation{ opType: peerOperationADD, networkID: nid, @@ -298,24 +328,32 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, l2Miss: l2Miss, l3Miss: l3Miss, localPeer: localPeer, - callerName: callerName, + callerName: common.CallerName(1), } } func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, updateOnlyDB bool) error { + peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { if err := validateID(nid, eid); err != nil { return err } + var dbEntries int + var inserted bool if updateDB { - d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, false) - if updateOnlyDB { - return nil + inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) + if !inserted { + logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", + nid, eid, peerIP, peerMac, localPeer, vtep) } } + // Local peers do not need any further configuration + if localPeer { + return nil + } + n := d.network(nid) if n == nil { return nil @@ -353,21 +391,26 @@ func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask // Add neighbor entry for the peer IP if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { - return fmt.Errorf("could not add neighbor entry into the sandbox: %v", err) + if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { + // We are in the transient case so only the first configuration is programmed into the kernel + // Upon deletion if the active configuration is deleted the next one from the database will be restored + // Note we are skipping also the next configuration + return nil + } + return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) } // Add fdb entry to the bridge for the peer mac if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName), sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil { - return fmt.Errorf("could not add fdb entry into the sandbox: %v", err) + return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) } return nil } func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) { - callerName := common.CallerName(1) + peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { d.peerOpCh <- &peerOperation{ opType: peerOperationDELETE, networkID: nid, @@ -376,18 +419,23 @@ func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMas peerIPMask: peerIPMask, peerMac: peerMac, vtepIP: vtep, - callerName: callerName, + callerName: common.CallerName(1), + localPeer: localPeer, } } func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) error { + peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { if err := validateID(nid, eid); err != nil { return err } - pEntry := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep) + deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) + if !deleted { + logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", + nid, eid, peerIP, peerMac, localPeer, vtep) + } n := d.network(nid) if n == nil { @@ -399,30 +447,59 @@ func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPM return nil } - // Delete fdb entry to the bridge for the peer mac only if the - // entry existed in local peerdb. If it is a stale delete - // request, still call DeleteNeighbor but only to cleanup any - // leftover sandbox neighbor cache and not actually delete the - // kernel state. - if (eid == pEntry.eid && vtep.Equal(pEntry.vtep)) || - (eid != pEntry.eid && !vtep.Equal(pEntry.vtep)) { - if err := sbox.DeleteNeighbor(vtep, peerMac, - eid == pEntry.eid && vtep.Equal(pEntry.vtep)); err != nil { - return fmt.Errorf("could not delete fdb entry into the sandbox: %v", err) - } - } - - // Delete neighbor entry for the peer IP - if eid == pEntry.eid { - if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { - return fmt.Errorf("could not delete neighbor entry into the sandbox: %v", err) - } - } - - if err := d.checkEncryption(nid, vtep, 0, false, false); err != nil { + if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil { logrus.Warn(err) } + // Local peers do not have any local configuration to delete + if !localPeer { + // Remove fdb entry to the bridge for the peer mac + if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil { + if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { + // We fall in here if there is a transient state and if the neighbor that is being deleted + // was never been configured into the kernel (we allow only 1 configuration at the time per mapping) + return nil + } + return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) + } + + // Delete neighbor entry for the peer IP + if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { + return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) + } + } + + if dbEntries == 0 { + return nil + } + + // If there is still an entry into the database and the deletion went through without errors means that there is now no + // configuration active in the kernel. + // Restore one configuration for the directly from the database, note that is guaranteed that there is one + peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) + if err != nil { + logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) + return err + } + return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) +} + +func (d *driver) peerFlush(nid string) { + d.peerOpCh <- &peerOperation{ + opType: peerOperationFLUSH, + networkID: nid, + callerName: common.CallerName(1), + } +} + +func (d *driver) peerFlushOp(nid string) error { + d.peerDb.Lock() + defer d.peerDb.Unlock() + _, ok := d.peerDb.mp[nid] + if !ok { + return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) + } + delete(d.peerDb.mp, nid) return nil } diff --git a/libnetwork/networkdb/networkdb.go b/libnetwork/networkdb/networkdb.go index 9a2195ea54..02502cb483 100644 --- a/libnetwork/networkdb/networkdb.go +++ b/libnetwork/networkdb/networkdb.go @@ -505,7 +505,10 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) { nDB.deleteEntry(nid, tname, key) } - nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value)) + // Notify to the upper layer only entries not already marked for deletion + if !oldEntry.deleting { + nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value)) + } return false }) } diff --git a/libnetwork/osl/neigh_linux.go b/libnetwork/osl/neigh_linux.go index 4e479489fa..6bf1c16dc5 100644 --- a/libnetwork/osl/neigh_linux.go +++ b/libnetwork/osl/neigh_linux.go @@ -9,6 +9,17 @@ import ( "github.com/vishvananda/netlink" ) +// NeighborSearchError indicates that the neighbor is already present +type NeighborSearchError struct { + ip net.IP + mac net.HardwareAddr + present bool +} + +func (n NeighborSearchError) Error() string { + return fmt.Sprintf("Search neighbor failed for IP %v, mac %v, present in db:%t", n.ip, n.mac, n.present) +} + // NeighOption is a function option type to set interface options type NeighOption func(nh *neigh) @@ -41,7 +52,7 @@ func (n *networkNamespace) DeleteNeighbor(dstIP net.IP, dstMac net.HardwareAddr, nh := n.findNeighbor(dstIP, dstMac) if nh == nil { - return fmt.Errorf("could not find the neighbor entry to delete") + return NeighborSearchError{dstIP, dstMac, false} } if osDelete { @@ -103,26 +114,27 @@ func (n *networkNamespace) DeleteNeighbor(dstIP net.IP, dstMac net.HardwareAddr, } } n.Unlock() - logrus.Debugf("Neighbor entry deleted for IP %v, mac %v", dstIP, dstMac) + logrus.Debugf("Neighbor entry deleted for IP %v, mac %v osDelete:%t", dstIP, dstMac, osDelete) return nil } func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, force bool, options ...NeighOption) error { var ( - iface netlink.Link - err error + iface netlink.Link + err error + neighborAlreadyPresent bool ) // If the namespace already has the neighbor entry but the AddNeighbor is called // because of a miss notification (force flag) program the kernel anyway. nh := n.findNeighbor(dstIP, dstMac) if nh != nil { + neighborAlreadyPresent = true + logrus.Warnf("Neighbor entry already present for IP %v, mac %v neighbor:%+v forceUpdate:%t", dstIP, dstMac, nh, force) if !force { - logrus.Warnf("Neighbor entry already present for IP %v, mac %v", dstIP, dstMac) - return nil + return NeighborSearchError{dstIP, dstMac, true} } - logrus.Warnf("Force kernel update, Neighbor entry already present for IP %v, mac %v", dstIP, dstMac) } nh = &neigh{ @@ -146,8 +158,7 @@ func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, fo if nh.linkDst != "" { iface, err = nlh.LinkByName(nh.linkDst) if err != nil { - return fmt.Errorf("could not find interface with destination name %s: %v", - nh.linkDst, err) + return fmt.Errorf("could not find interface with destination name %s: %v", nh.linkDst, err) } } @@ -167,13 +178,17 @@ func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, fo } if err := nlh.NeighSet(nlnh); err != nil { - return fmt.Errorf("could not add neighbor entry: %v", err) + return fmt.Errorf("could not add neighbor entry:%+v error:%v", nlnh, err) + } + + if neighborAlreadyPresent { + return nil } n.Lock() n.neighbors = append(n.neighbors, nh) n.Unlock() - logrus.Debugf("Neighbor entry added for IP %v, mac %v", dstIP, dstMac) + logrus.Debugf("Neighbor entry added for IP:%v, mac:%v on ifc:%s", dstIP, dstMac, nh.linkName) return nil }