From cd20621201d48f76b856d7d2404ee182e41e3492 Mon Sep 17 00:00:00 2001 From: Jana Radhakrishnan Date: Fri, 23 Sep 2016 16:04:48 -0700 Subject: [PATCH] Vendoring libnetwork @bf3d9ccfb8e Fixes certain node management issues when a daemon is restarted or fails or leaves and joins a swarm. Signed-off-by: Jana Radhakrishnan --- hack/vendor.sh | 2 +- .../docker/libnetwork/networkdb/cluster.go | 16 +++---- .../docker/libnetwork/networkdb/delegate.go | 44 +++++++++++++++---- .../docker/libnetwork/networkdb/networkdb.go | 9 +++- 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/hack/vendor.sh b/hack/vendor.sh index c045757268..f8210dddc9 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -70,7 +70,7 @@ clone git github.com/RackSec/srslog 365bf33cd9acc21ae1c355209865f17228ca534e clone git github.com/imdario/mergo 0.2.1 #get libnetwork packages -clone git github.com/docker/libnetwork 6caf9022fa093e0247f9f4b572edca868c27ece3 +clone git github.com/docker/libnetwork bf3d9ccfb8ebf768843691143c66d137743cc5e9 clone git github.com/docker/go-events 18b43f1bc85d9cdd42c05a6cd2d444c7a200a894 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go b/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go index ed24e4c7a6..3b624c9a27 100644 --- a/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go +++ b/vendor/src/github.com/docker/libnetwork/networkdb/cluster.go @@ -190,7 +190,7 @@ func (nDB *NetworkDB) clusterLeave() error { mlist := nDB.memberlist if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil { - return fmt.Errorf("failed to send node leave: %v", err) + logrus.Errorf("failed to send node leave: %v", err) } if err := mlist.Leave(time.Second); err != nil { @@ -237,13 +237,6 @@ func (nDB *NetworkDB) reconnectNode() { } nDB.RUnlock() - // Update all the local state to a new time to force update on - // the node we are trying to rejoin, just in case that node - // has these in leaving/deleting state still. This is - // facilitate fast convergence after recovering from a gossip - // failure. - nDB.updateLocalStateTime() - node := nodes[randomOffset(len(nodes))] addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)} @@ -256,6 +249,13 @@ func (nDB *NetworkDB) reconnectNode() { return } + // Update all the local table state to a new time to + // force update on the node we are trying to rejoin, just in + // case that node has these in deleting state still. This is + // facilitate fast convergence after recovering from a gossip + // failure. + nDB.updateLocalTableTime() + logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name) nDB.bulkSync([]string{node.Name}, true) } diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go b/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go index 3e96384465..eb8d18557d 100644 --- a/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go +++ b/vendor/src/github.com/docker/libnetwork/networkdb/delegate.go @@ -3,6 +3,7 @@ package networkdb import ( "fmt" "net" + "strings" "time" "github.com/Sirupsen/logrus" @@ -31,7 +32,7 @@ func (nDB *NetworkDB) checkAndGetNode(nEvent *NodeEvent) *node { return nil } - delete(nDB.failedNodes, n.Name) + delete(nodes, n.Name) return n } } @@ -39,16 +40,36 @@ func (nDB *NetworkDB) checkAndGetNode(nEvent *NodeEvent) *node { return nil } -func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { - // Update our local clock if the received messages has newer - // time. - nDB.networkClock.Witness(nEvent.LTime) +func (nDB *NetworkDB) purgeSameNode(n *node) { + nDB.Lock() + defer nDB.Unlock() + prefix := strings.Split(n.Name, "-")[0] + for _, nodes := range []map[string]*node{ + nDB.failedNodes, + nDB.leftNodes, + nDB.nodes, + } { + var nodeNames []string + for name, node := range nodes { + if strings.HasPrefix(name, prefix) && n.Addr.Equal(node.Addr) { + nodeNames = append(nodeNames, name) + } + } + + for _, name := range nodeNames { + delete(nodes, name) + } + } +} + +func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { n := nDB.checkAndGetNode(nEvent) if n == nil { return false } + nDB.purgeSameNode(n) n.ltime = nEvent.LTime switch nEvent.Type { @@ -357,6 +378,15 @@ func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte { } func (d *delegate) LocalState(join bool) []byte { + if join { + // Update all the local node/network state to a new time to + // force update on the node we are trying to rejoin, just in + // case that node has these in leaving state still. This is + // facilitate fast convergence after recovering from a gossip + // failure. + d.nDB.updateLocalNetworkTime() + } + d.nDB.RLock() defer d.nDB.RUnlock() @@ -408,10 +438,6 @@ func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { return } - if pp.LTime > 0 { - d.nDB.networkClock.Witness(pp.LTime) - } - nodeEvent := &NodeEvent{ LTime: pp.LTime, NodeName: pp.NodeName, diff --git a/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go b/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go index 1502d7300e..a8c942c9cc 100644 --- a/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go +++ b/vendor/src/github.com/docker/libnetwork/networkdb/networkdb.go @@ -524,7 +524,7 @@ func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string { return networks } -func (nDB *NetworkDB) updateLocalStateTime() { +func (nDB *NetworkDB) updateLocalNetworkTime() { nDB.Lock() defer nDB.Unlock() @@ -532,8 +532,13 @@ func (nDB *NetworkDB) updateLocalStateTime() { for _, n := range nDB.networks[nDB.config.NodeName] { n.ltime = ltime } +} - ltime = nDB.tableClock.Increment() +func (nDB *NetworkDB) updateLocalTableTime() { + nDB.Lock() + defer nDB.Unlock() + + ltime := nDB.tableClock.Increment() nDB.indexes[byTable].Walk(func(path string, v interface{}) bool { entry := v.(*entry) if entry.node != nDB.config.NodeName {