mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
NetworkDB incorrect number of entries in networkNodes
A rapid (within networkReapTime 30min) leave/join network can corrupt the list of nodes per network with multiple copies of the same nodes. The fix makes sure that each node is present only once Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
This commit is contained in:
parent
7db7a01ea1
commit
051a0d5ce9
4 changed files with 92 additions and 5 deletions
|
@ -310,12 +310,11 @@ func (nDB *NetworkDB) reapState() {
|
|||
|
||||
func (nDB *NetworkDB) reapNetworks() {
|
||||
nDB.Lock()
|
||||
for name, nn := range nDB.networks {
|
||||
for _, nn := range nDB.networks {
|
||||
for id, n := range nn {
|
||||
if n.leaving {
|
||||
if n.reapTime <= 0 {
|
||||
delete(nn, id)
|
||||
nDB.deleteNetworkNode(id, name)
|
||||
continue
|
||||
}
|
||||
n.reapTime -= reapPeriod
|
||||
|
|
|
@ -179,7 +179,12 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
|||
flushEntries = true
|
||||
}
|
||||
|
||||
nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
|
||||
if nEvent.Type == NetworkEventTypeLeave {
|
||||
nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName)
|
||||
} else {
|
||||
nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
|
|
|
@ -487,7 +487,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
|
|||
},
|
||||
RetransmitMult: 4,
|
||||
}
|
||||
nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName)
|
||||
nDB.addNetworkNode(nid, nDB.config.NodeName)
|
||||
networkNodes := nDB.networkNodes[nid]
|
||||
nDB.Unlock()
|
||||
|
||||
|
@ -522,6 +522,8 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
|
|||
entries []*entry
|
||||
)
|
||||
|
||||
nDB.deleteNetworkNode(nid, nDB.config.NodeName)
|
||||
|
||||
nwWalker := func(path string, v interface{}) bool {
|
||||
entry, ok := v.(*entry)
|
||||
if !ok {
|
||||
|
@ -580,7 +582,10 @@ func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) {
|
|||
// passed network. Caller should hold the NetworkDB lock while calling
|
||||
// this
|
||||
func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
|
||||
nodes := nDB.networkNodes[nid]
|
||||
nodes, ok := nDB.networkNodes[nid]
|
||||
if !ok || len(nodes) == 0 {
|
||||
return
|
||||
}
|
||||
newNodes := make([]string, 0, len(nodes)-1)
|
||||
for _, name := range nodes {
|
||||
if name == nodeName {
|
||||
|
|
|
@ -436,3 +436,81 @@ func TestNetworkDBCRUDMediumCluster(t *testing.T) {
|
|||
log.Print("Closing DB instances...")
|
||||
closeNetworkDBInstances(dbs)
|
||||
}
|
||||
|
||||
func TestNetworkDBNodeJoinLeaveIteration(t *testing.T) {
|
||||
maxRetry := 5
|
||||
dbs := createNetworkDBInstances(t, 2, "node")
|
||||
|
||||
// Single node Join/Leave
|
||||
err := dbs[0].JoinNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
if len(dbs[0].networkNodes["network1"]) != 1 {
|
||||
t.Fatalf("The networkNodes list has to have be 1 instead of %d", len(dbs[0].networkNodes["network1"]))
|
||||
}
|
||||
|
||||
err = dbs[0].LeaveNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
if len(dbs[0].networkNodes["network1"]) != 0 {
|
||||
t.Fatalf("The networkNodes list has to have be 0 instead of %d", len(dbs[0].networkNodes["network1"]))
|
||||
}
|
||||
|
||||
// Multiple nodes Join/Leave
|
||||
err = dbs[0].JoinNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = dbs[1].JoinNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for the propagation on db[0]
|
||||
for i := 0; i < maxRetry; i++ {
|
||||
if len(dbs[0].networkNodes["network1"]) == 2 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
if len(dbs[0].networkNodes["network1"]) != 2 {
|
||||
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[0].networkNodes["network1"]), dbs[0].networkNodes["network1"])
|
||||
}
|
||||
|
||||
// Wait for the propagation on db[1]
|
||||
for i := 0; i < maxRetry; i++ {
|
||||
if len(dbs[1].networkNodes["network1"]) == 2 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
if len(dbs[1].networkNodes["network1"]) != 2 {
|
||||
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[1].networkNodes["network1"]), dbs[1].networkNodes["network1"])
|
||||
}
|
||||
|
||||
// Try a quick leave/join
|
||||
err = dbs[0].LeaveNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
err = dbs[0].JoinNetwork("network1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
for i := 0; i < maxRetry; i++ {
|
||||
if len(dbs[0].networkNodes["network1"]) == 2 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
if len(dbs[0].networkNodes["network1"]) != 2 {
|
||||
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[0].networkNodes["network1"]), dbs[0].networkNodes["network1"])
|
||||
}
|
||||
|
||||
for i := 0; i < maxRetry; i++ {
|
||||
if len(dbs[1].networkNodes["network1"]) == 2 {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
if len(dbs[1].networkNodes["network1"]) != 2 {
|
||||
t.Fatalf("The networkNodes list has to have be 2 instead of %d - %v", len(dbs[1].networkNodes["network1"]), dbs[1].networkNodes["network1"])
|
||||
}
|
||||
|
||||
dbs[0].Close()
|
||||
dbs[1].Close()
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue