mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
5f5dad3c02
Currently if there is any transient gossip failure in any node the recoevry process depends on other nodes propogating the information indirectly. In cases if these transient failures affects all the nodes that this node has in its memberlist then this node will be permenantly cutoff from the the gossip channel. Added node state management code in networkdb to address these problems by trying to rejoin the cluster via the failed nodes when there is a failure. This also necessitates the need to add new messages called node event messages to differentiate between node leave and node failure. Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
38 lines
948 B
Go
38 lines
948 B
Go
package networkdb
|
|
|
|
import "github.com/hashicorp/memberlist"
|
|
|
|
type eventDelegate struct {
|
|
nDB *NetworkDB
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyJoin(mn *memberlist.Node) {
|
|
e.nDB.Lock()
|
|
// In case the node is rejoining after a failure or leave,
|
|
// wait until an explicit join message arrives before adding
|
|
// it to the nodes just to make sure this is not a stale
|
|
// join. If you don't know about this node add it immediately.
|
|
_, fOk := e.nDB.failedNodes[mn.Name]
|
|
_, lOk := e.nDB.leftNodes[mn.Name]
|
|
if fOk || lOk {
|
|
e.nDB.Unlock()
|
|
return
|
|
}
|
|
|
|
e.nDB.nodes[mn.Name] = &node{Node: *mn}
|
|
e.nDB.Unlock()
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyLeave(mn *memberlist.Node) {
|
|
e.nDB.deleteNodeTableEntries(mn.Name)
|
|
e.nDB.deleteNetworkEntriesForNode(mn.Name)
|
|
e.nDB.Lock()
|
|
if n, ok := e.nDB.nodes[mn.Name]; ok {
|
|
delete(e.nDB.nodes, mn.Name)
|
|
e.nDB.failedNodes[mn.Name] = n
|
|
}
|
|
e.nDB.Unlock()
|
|
}
|
|
|
|
func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) {
|
|
}
|