diff --git a/libnetwork/networkdb/cluster.go b/libnetwork/networkdb/cluster.go index a8231481df..af6f5d9f7b 100644 --- a/libnetwork/networkdb/cluster.go +++ b/libnetwork/networkdb/cluster.go @@ -17,11 +17,15 @@ import ( ) const ( - reapInterval = 30 * time.Minute - reapPeriod = 5 * time.Second - retryInterval = 1 * time.Second - nodeReapInterval = 24 * time.Hour - nodeReapPeriod = 2 * time.Hour + // The garbage collection logic for entries leverage the presence of the network. + // For this reason the expiration time of the network is put slightly higher than the entry expiration so that + // there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network. + reapEntryInterval = 30 * time.Minute + reapNetworkInterval = reapEntryInterval + 5*reapPeriod + reapPeriod = 5 * time.Second + retryInterval = 1 * time.Second + nodeReapInterval = 24 * time.Hour + nodeReapPeriod = 2 * time.Hour ) type logWriter struct{} @@ -300,8 +304,9 @@ func (nDB *NetworkDB) reconnectNode() { // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This // is safe as long as no other concurrent path touches the reapTime field. func (nDB *NetworkDB) reapState() { - nDB.reapNetworks() + // The reapTableEntries leverage the presence of the network so garbage collect entries first nDB.reapTableEntries() + nDB.reapNetworks() } func (nDB *NetworkDB) reapNetworks() { @@ -414,8 +419,8 @@ func (nDB *NetworkDB) gossip() { // Collect stats and print the queue info, note this code is here also to have a view of the queues empty network.qMessagesSent += len(msgs) if printStats { - logrus.Infof("NetworkDB stats - net:%s Entries:%d Queue qLen:%d netPeers:%d netMsg/s:%d", - nid, network.entriesNumber, broadcastQ.NumQueued(), broadcastQ.NumNodes(), + logrus.Infof("NetworkDB stats - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", + nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) network.qMessagesSent = 0 } diff --git a/libnetwork/networkdb/delegate.go b/libnetwork/networkdb/delegate.go index bcddc9014b..28919cf3d2 100644 --- a/libnetwork/networkdb/delegate.go +++ b/libnetwork/networkdb/delegate.go @@ -165,7 +165,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool { n.ltime = nEvent.LTime n.leaving = nEvent.Type == NetworkEventTypeLeave if n.leaving { - n.reapTime = reapInterval + n.reapTime = reapNetworkInterval // The remote node is leaving the network, but not the gossip cluster. // Mark all its entries in deleted state, this will guarantee that @@ -242,7 +242,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool { // field. If that is not the case, this can be a BUG if e.deleting && e.reapTime == 0 { logrus.Warnf("handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", tEvent) - e.reapTime = reapInterval + e.reapTime = reapEntryInterval } nDB.Lock() diff --git a/libnetwork/networkdb/networkdb.go b/libnetwork/networkdb/networkdb.go index caa3cfc5a6..afdf32e2c2 100644 --- a/libnetwork/networkdb/networkdb.go +++ b/libnetwork/networkdb/networkdb.go @@ -405,7 +405,7 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error { node: nDB.config.NodeName, value: value, deleting: true, - reapTime: reapInterval, + reapTime: reapEntryInterval, } if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil { @@ -478,7 +478,7 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) { node: oldEntry.node, value: oldEntry.value, deleting: true, - reapTime: reapInterval, + reapTime: reapEntryInterval, } // we arrived at this point in 2 cases: @@ -619,8 +619,9 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error { return fmt.Errorf("could not find network %s while trying to leave", nid) } + logrus.Debugf("%s: leaving network %s", nDB.config.NodeName, nid) n.ltime = ltime - n.reapTime = reapInterval + n.reapTime = reapNetworkInterval n.leaving = true return nil }