From 060aa49a70725bedb2a0131e955f93af8492ade8 Mon Sep 17 00:00:00 2001 From: Jana Radhakrishnan Date: Sat, 23 Apr 2016 13:26:34 -0700 Subject: [PATCH] Fix gossip network event overwriting self When a node joins a network it sends out a gossip event before it updates it's own in-memory state. This can create a race where the node gets the event back from a remote node before we update in-memory state and we treat that as latest state. To avoid this race, always generate the gossip after updating local state. Signed-off-by: Jana Radhakrishnan --- libnetwork/networkdb/cluster.go | 7 ++++++- libnetwork/networkdb/networkdb.go | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libnetwork/networkdb/cluster.go b/libnetwork/networkdb/cluster.go index ea3cfa8b1c..bfba59f698 100644 --- a/libnetwork/networkdb/cluster.go +++ b/libnetwork/networkdb/cluster.go @@ -197,9 +197,14 @@ func (nDB *NetworkDB) gossip() { broadcastQ := nDB.networks[nDB.config.NodeName][nid].tableBroadcasts nDB.RUnlock() + if broadcastQ == nil { + logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid) + continue + } + msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail) if len(msgs) == 0 { - break + continue } // Create a compound message diff --git a/libnetwork/networkdb/networkdb.go b/libnetwork/networkdb/networkdb.go index a0ddf2a4f1..1c49371896 100644 --- a/libnetwork/networkdb/networkdb.go +++ b/libnetwork/networkdb/networkdb.go @@ -336,10 +336,6 @@ func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bo func (nDB *NetworkDB) JoinNetwork(nid string) error { ltime := nDB.networkClock.Increment() - if err := nDB.sendNetworkEvent(nid, networkJoin, ltime); err != nil { - return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) - } - nDB.Lock() nodeNetworks, ok := nDB.networks[nDB.config.NodeName] if !ok { @@ -356,6 +352,10 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error { nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName) nDB.Unlock() + if err := nDB.sendNetworkEvent(nid, networkJoin, ltime); err != nil { + return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) + } + logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid) if _, err := nDB.bulkSync(nid, true); err != nil { logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)