mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
Merge pull request #1860 from fcrisciani/network-db-stabilization
Network db stabilization
This commit is contained in:
commit
b486c76cce
5 changed files with 80 additions and 107 deletions
|
@ -114,7 +114,8 @@ type tableEventMessage struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool {
|
func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool {
|
||||||
return false
|
otherm := other.(*tableEventMessage)
|
||||||
|
return m.tname == otherm.tname && m.id == otherm.id && m.key == otherm.key
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *tableEventMessage) Message() []byte {
|
func (m *tableEventMessage) Message() []byte {
|
||||||
|
|
|
@ -290,13 +290,6 @@ func (nDB *NetworkDB) reconnectNode() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update all the local table state to a new time to
|
|
||||||
// force update on the node we are trying to rejoin, just in
|
|
||||||
// case that node has these in deleting state still. This is
|
|
||||||
// facilitate fast convergence after recovering from a gossip
|
|
||||||
// failure.
|
|
||||||
nDB.updateLocalTableTime()
|
|
||||||
|
|
||||||
logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name)
|
logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name)
|
||||||
nDB.bulkSync([]string{node.Name}, true)
|
nDB.bulkSync([]string{node.Name}, true)
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,25 +133,12 @@ func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
||||||
var flushEntries bool
|
|
||||||
// Update our local clock if the received messages has newer
|
// Update our local clock if the received messages has newer
|
||||||
// time.
|
// time.
|
||||||
nDB.networkClock.Witness(nEvent.LTime)
|
nDB.networkClock.Witness(nEvent.LTime)
|
||||||
|
|
||||||
nDB.Lock()
|
nDB.Lock()
|
||||||
defer func() {
|
defer nDB.Unlock()
|
||||||
nDB.Unlock()
|
|
||||||
// When a node leaves a network on the last task removal cleanup the
|
|
||||||
// local entries for this network & node combination. When the tasks
|
|
||||||
// on a network are removed we could have missed the gossip updates.
|
|
||||||
// Not doing this cleanup can leave stale entries because bulksyncs
|
|
||||||
// from the node will no longer include this network state.
|
|
||||||
//
|
|
||||||
// deleteNodeNetworkEntries takes nDB lock.
|
|
||||||
if flushEntries {
|
|
||||||
nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
if nEvent.NodeName == nDB.config.NodeName {
|
if nEvent.NodeName == nDB.config.NodeName {
|
||||||
return false
|
return false
|
||||||
|
@ -179,7 +166,12 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
||||||
n.leaving = nEvent.Type == NetworkEventTypeLeave
|
n.leaving = nEvent.Type == NetworkEventTypeLeave
|
||||||
if n.leaving {
|
if n.leaving {
|
||||||
n.reapTime = reapInterval
|
n.reapTime = reapInterval
|
||||||
flushEntries = true
|
|
||||||
|
// The remote node is leaving the network, but not the gossip cluster.
|
||||||
|
// Mark all its entries in deleted state, this will guarantee that
|
||||||
|
// if some node bulk sync with us, the deleted state of
|
||||||
|
// these entries will be propagated.
|
||||||
|
nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if nEvent.Type == NetworkEventTypeLeave {
|
if nEvent.Type == NetworkEventTypeLeave {
|
||||||
|
@ -214,17 +206,22 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
||||||
nDB.RLock()
|
nDB.RLock()
|
||||||
networks := nDB.networks[nDB.config.NodeName]
|
networks := nDB.networks[nDB.config.NodeName]
|
||||||
network, ok := networks[tEvent.NetworkID]
|
network, ok := networks[tEvent.NetworkID]
|
||||||
nDB.RUnlock()
|
// Check if the owner of the event is still part of the network
|
||||||
if !ok || network.leaving {
|
nodes := nDB.networkNodes[tEvent.NetworkID]
|
||||||
return true
|
var nodePresent bool
|
||||||
|
for _, node := range nodes {
|
||||||
|
if node == tEvent.NodeName {
|
||||||
|
nodePresent = true
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
nDB.RUnlock()
|
||||||
e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key)
|
if !ok || network.leaving || !nodePresent {
|
||||||
if err != nil && tEvent.Type == TableEventTypeDelete {
|
// I'm out of the network OR the event owner is not anymore part of the network so do not propagate
|
||||||
// If it is a delete event and we don't have the entry here nothing to do.
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// We have the latest state. Ignore the event
|
// We have the latest state. Ignore the event
|
||||||
// since it is stale.
|
// since it is stale.
|
||||||
|
@ -249,6 +246,11 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
||||||
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.NetworkID, tEvent.TableName, tEvent.Key), e)
|
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.NetworkID, tEvent.TableName, tEvent.Key), e)
|
||||||
nDB.Unlock()
|
nDB.Unlock()
|
||||||
|
|
||||||
|
if err != nil && tEvent.Type == TableEventTypeDelete {
|
||||||
|
// If it is a delete event and we didn't have the entry here don't repropagate
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
var op opType
|
var op opType
|
||||||
switch tEvent.Type {
|
switch tEvent.Type {
|
||||||
case TableEventTypeCreate:
|
case TableEventTypeCreate:
|
||||||
|
@ -289,8 +291,7 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not rebroadcast a bulk sync
|
if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast {
|
||||||
if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast && !isBulkSync {
|
|
||||||
var err error
|
var err error
|
||||||
buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
|
buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -45,9 +45,12 @@ func (e *eventDelegate) NotifyLeave(mn *memberlist.Node) {
|
||||||
var failed bool
|
var failed bool
|
||||||
logrus.Infof("Node %s/%s, left gossip cluster", mn.Name, mn.Addr)
|
logrus.Infof("Node %s/%s, left gossip cluster", mn.Name, mn.Addr)
|
||||||
e.broadcastNodeEvent(mn.Addr, opDelete)
|
e.broadcastNodeEvent(mn.Addr, opDelete)
|
||||||
e.nDB.deleteNodeTableEntries(mn.Name)
|
// The node left or failed, delete all the entries created by it.
|
||||||
e.nDB.deleteNetworkEntriesForNode(mn.Name)
|
// If the node was temporary down, deleting the entries will guarantee that the CREATE events will be accepted
|
||||||
|
// If the node instead left because was going down, then it makes sense to just delete all its state
|
||||||
e.nDB.Lock()
|
e.nDB.Lock()
|
||||||
|
e.nDB.deleteNetworkEntriesForNode(mn.Name)
|
||||||
|
e.nDB.deleteNodeTableEntries(mn.Name)
|
||||||
if n, ok := e.nDB.nodes[mn.Name]; ok {
|
if n, ok := e.nDB.nodes[mn.Name]; ok {
|
||||||
delete(e.nDB.nodes, mn.Name)
|
delete(e.nDB.nodes, mn.Name)
|
||||||
|
|
||||||
|
@ -61,7 +64,6 @@ func (e *eventDelegate) NotifyLeave(mn *memberlist.Node) {
|
||||||
if failed {
|
if failed {
|
||||||
logrus.Infof("Node %s/%s, added to failed nodes list", mn.Name, mn.Addr)
|
logrus.Infof("Node %s/%s, added to failed nodes list", mn.Name, mn.Addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) {
|
func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) {
|
||||||
|
|
|
@ -418,7 +418,6 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) {
|
func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) {
|
||||||
nDB.Lock()
|
|
||||||
for nid, nodes := range nDB.networkNodes {
|
for nid, nodes := range nDB.networkNodes {
|
||||||
updatedNodes := make([]string, 0, len(nodes))
|
updatedNodes := make([]string, 0, len(nodes))
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
|
@ -433,11 +432,25 @@ func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(nDB.networks, deletedNode)
|
delete(nDB.networks, deletedNode)
|
||||||
nDB.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// deleteNodeNetworkEntries is called in 2 conditions with 2 different outcomes:
|
||||||
|
// 1) when a notification is coming of a node leaving the network
|
||||||
|
// - Walk all the network entries and mark the leaving node's entries for deletion
|
||||||
|
// These will be garbage collected when the reap timer will expire
|
||||||
|
// 2) when the local node is leaving the network
|
||||||
|
// - Walk all the network entries:
|
||||||
|
// A) if the entry is owned by the local node
|
||||||
|
// then we will mark it for deletion. This will ensure that if a node did not
|
||||||
|
// yet received the notification that the local node is leaving, will be aware
|
||||||
|
// of the entries to be deleted.
|
||||||
|
// B) if the entry is owned by a remote node, then we can safely delete it. This
|
||||||
|
// ensures that if we join back this network as we receive the CREATE event for
|
||||||
|
// entries owned by remote nodes, we will accept them and we notify the application
|
||||||
func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
|
func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
|
||||||
nDB.Lock()
|
// Indicates if the delete is triggered for the local node
|
||||||
|
isNodeLocal := node == nDB.config.NodeName
|
||||||
|
|
||||||
nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid),
|
nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid),
|
||||||
func(path string, v interface{}) bool {
|
func(path string, v interface{}) bool {
|
||||||
oldEntry := v.(*entry)
|
oldEntry := v.(*entry)
|
||||||
|
@ -446,7 +459,15 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
|
||||||
tname := params[1]
|
tname := params[1]
|
||||||
key := params[2]
|
key := params[2]
|
||||||
|
|
||||||
if oldEntry.node != node {
|
// If the entry is owned by a remote node and this node is not leaving the network
|
||||||
|
if oldEntry.node != node && !isNodeLocal {
|
||||||
|
// Don't do anything because the event is triggered for a node that does not own this entry
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this entry is already marked for deletion and this node is not leaving the network
|
||||||
|
if oldEntry.deleting && !isNodeLocal {
|
||||||
|
// Don't do anything this entry will be already garbage collected using the old reapTime
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -458,17 +479,29 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
|
||||||
reapTime: reapInterval,
|
reapTime: reapInterval,
|
||||||
}
|
}
|
||||||
|
|
||||||
nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
|
// we arrived at this point in 2 cases:
|
||||||
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
|
// 1) this entry is owned by the node that is leaving the network
|
||||||
|
// 2) the local node is leaving the network
|
||||||
|
if oldEntry.node == node {
|
||||||
|
if isNodeLocal {
|
||||||
|
// TODO fcrisciani: this can be removed if there is no way to leave the network
|
||||||
|
// without doing a delete of all the objects
|
||||||
|
entry.ltime++
|
||||||
|
}
|
||||||
|
nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
|
||||||
|
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
|
||||||
|
} else {
|
||||||
|
// the local node is leaving the network, all the entries of remote nodes can be safely removed
|
||||||
|
nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
|
||||||
|
nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key))
|
||||||
|
}
|
||||||
|
|
||||||
nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
|
nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
|
||||||
return false
|
return false
|
||||||
})
|
})
|
||||||
nDB.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
|
func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
|
||||||
nDB.Lock()
|
|
||||||
nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
|
nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
|
||||||
oldEntry := v.(*entry)
|
oldEntry := v.(*entry)
|
||||||
if oldEntry.node != node {
|
if oldEntry.node != node {
|
||||||
|
@ -480,21 +513,12 @@ func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
|
||||||
nid := params[1]
|
nid := params[1]
|
||||||
key := params[2]
|
key := params[2]
|
||||||
|
|
||||||
entry := &entry{
|
nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
|
||||||
ltime: oldEntry.ltime,
|
nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key))
|
||||||
node: node,
|
|
||||||
value: oldEntry.value,
|
|
||||||
deleting: true,
|
|
||||||
reapTime: reapInterval,
|
|
||||||
}
|
|
||||||
|
|
||||||
nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
|
nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, oldEntry.value))
|
||||||
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
|
|
||||||
|
|
||||||
nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
|
|
||||||
return false
|
return false
|
||||||
})
|
})
|
||||||
nDB.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// WalkTable walks a single table in NetworkDB and invokes the passed
|
// WalkTable walks a single table in NetworkDB and invokes the passed
|
||||||
|
@ -573,37 +597,12 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
|
||||||
|
|
||||||
nDB.Lock()
|
nDB.Lock()
|
||||||
defer nDB.Unlock()
|
defer nDB.Unlock()
|
||||||
var (
|
|
||||||
paths []string
|
|
||||||
entries []*entry
|
|
||||||
)
|
|
||||||
|
|
||||||
|
// Remove myself from the list of the nodes participating to the network
|
||||||
nDB.deleteNetworkNode(nid, nDB.config.NodeName)
|
nDB.deleteNetworkNode(nid, nDB.config.NodeName)
|
||||||
|
|
||||||
nwWalker := func(path string, v interface{}) bool {
|
// Update all the local entries marking them for deletion and delete all the remote entries
|
||||||
entry, ok := v.(*entry)
|
nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeName)
|
||||||
if !ok {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
paths = append(paths, path)
|
|
||||||
entries = append(entries, entry)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), nwWalker)
|
|
||||||
for _, path := range paths {
|
|
||||||
params := strings.Split(path[1:], "/")
|
|
||||||
tname := params[1]
|
|
||||||
key := params[2]
|
|
||||||
|
|
||||||
if _, ok := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key)); !ok {
|
|
||||||
logrus.Errorf("Could not delete entry in table %s with network id %s and key %s as it does not exist", tname, nid, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, ok := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)); !ok {
|
|
||||||
logrus.Errorf("Could not delete entry in network %s with table name %s and key %s as it does not exist", nid, tname, key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
|
nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -616,6 +615,7 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
n.ltime = ltime
|
n.ltime = ltime
|
||||||
|
n.reapTime = reapInterval
|
||||||
n.leaving = true
|
n.leaving = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -679,27 +679,3 @@ func (nDB *NetworkDB) updateLocalNetworkTime() {
|
||||||
n.ltime = ltime
|
n.ltime = ltime
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (nDB *NetworkDB) updateLocalTableTime() {
|
|
||||||
nDB.Lock()
|
|
||||||
defer nDB.Unlock()
|
|
||||||
|
|
||||||
ltime := nDB.tableClock.Increment()
|
|
||||||
nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
|
|
||||||
entry := v.(*entry)
|
|
||||||
if entry.node != nDB.config.NodeName {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
params := strings.Split(path[1:], "/")
|
|
||||||
tname := params[0]
|
|
||||||
nid := params[1]
|
|
||||||
key := params[2]
|
|
||||||
entry.ltime = ltime
|
|
||||||
|
|
||||||
nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
|
|
||||||
nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
|
|
||||||
|
|
||||||
return false
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue