mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00

The time to keep a node failed into the failed node list was originally supposed to be 24h. If a node leaves explicitly it will be removed from the list of nodes and put into the leftNodes list. This way the NotifyLeave event won't insert it into the retry list. NOTE: if the event is lost instead the behavior will be the same as a failed node. If a node fails, the NotifyLeave will insert it into the failedNodes list with a reapTime of 24h. This means that the node will be checked for 24h before being completely forgot. The current check time is every 1 second and is done by the reconnectNode function. The failed node list is updated every 2h instead. Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
59 lines
1.6 KiB
Go
59 lines
1.6 KiB
Go
package networkdb
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/hashicorp/memberlist"
|
|
)
|
|
|
|
type eventDelegate struct {
|
|
nDB *NetworkDB
|
|
}
|
|
|
|
func (e *eventDelegate) broadcastNodeEvent(addr net.IP, op opType) {
|
|
value, err := json.Marshal(&NodeAddr{addr})
|
|
if err == nil {
|
|
e.nDB.broadcaster.Write(makeEvent(op, NodeTable, "", "", value))
|
|
} else {
|
|
logrus.Errorf("Error marshalling node broadcast event %s", addr.String())
|
|
}
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyJoin(mn *memberlist.Node) {
|
|
e.broadcastNodeEvent(mn.Addr, opCreate)
|
|
e.nDB.Lock()
|
|
// In case the node is rejoining after a failure or leave,
|
|
// wait until an explicit join message arrives before adding
|
|
// it to the nodes just to make sure this is not a stale
|
|
// join. If you don't know about this node add it immediately.
|
|
_, fOk := e.nDB.failedNodes[mn.Name]
|
|
_, lOk := e.nDB.leftNodes[mn.Name]
|
|
if fOk || lOk {
|
|
e.nDB.Unlock()
|
|
return
|
|
}
|
|
|
|
e.nDB.nodes[mn.Name] = &node{Node: *mn}
|
|
e.nDB.Unlock()
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyLeave(mn *memberlist.Node) {
|
|
e.broadcastNodeEvent(mn.Addr, opDelete)
|
|
e.nDB.deleteNodeTableEntries(mn.Name)
|
|
e.nDB.deleteNetworkEntriesForNode(mn.Name)
|
|
e.nDB.Lock()
|
|
if n, ok := e.nDB.nodes[mn.Name]; ok {
|
|
delete(e.nDB.nodes, mn.Name)
|
|
|
|
// In case of node failure, keep retrying to reconnect every retryInterval (1sec) for nodeReapInterval (24h)
|
|
// Explicit leave will have already removed the node from the list of nodes (nDB.nodes) and put it into the leftNodes map
|
|
n.reapTime = nodeReapInterval
|
|
e.nDB.failedNodes[mn.Name] = n
|
|
}
|
|
e.nDB.Unlock()
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) {
|
|
}
|