1
0
Fork 0
mirror of https://github.com/moby/moby.git synced 2022-11-09 12:21:53 -05:00

libnetwork: make rejoin intervals configurable

This allows the rejoin intervals to be chosen according to the context
within which the component is used, and, in particular, this allows
lower intervals to be used within TestNetworkDBIslands test.

Signed-off-by: Roman Volosatovs <roman.volosatovs@docker.com>
This commit is contained in:
Roman Volosatovs 2021-07-12 11:12:56 +02:00
parent c81abefdb1
commit d7a2635537
No known key found for this signature in database
GPG key ID: 216DD5F8CA6618A1
3 changed files with 45 additions and 20 deletions

View file

@ -18,12 +18,10 @@ import (
)
const (
reapPeriod = 5 * time.Second
rejoinClusterDuration = 10 * time.Second
rejoinInterval = 60 * time.Second
retryInterval = 1 * time.Second
nodeReapInterval = 24 * time.Hour
nodeReapPeriod = 2 * time.Hour
reapPeriod = 5 * time.Second
retryInterval = 1 * time.Second
nodeReapInterval = 24 * time.Hour
nodeReapPeriod = 2 * time.Hour
// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
// the following is roughly 1 minute
maxQueueLenBroadcastOnSync = 500
@ -172,7 +170,7 @@ func (nDB *NetworkDB) clusterInit() error {
{config.PushPullInterval, nDB.bulkSyncTables},
{retryInterval, nDB.reconnectNode},
{nodeReapPeriod, nDB.reapDeadNode},
{rejoinInterval, nDB.rejoinClusterBootStrap},
{nDB.config.rejoinClusterInterval, nDB.rejoinClusterBootStrap},
} {
t := time.NewTicker(trigger.interval)
go nDB.triggerFunc(trigger.interval, t.C, trigger.fn)
@ -210,7 +208,8 @@ func (nDB *NetworkDB) clusterJoin(members []string) error {
if _, err := mlist.Join(members); err != nil {
// In case of failure, we no longer need to explicitly call retryJoin.
// rejoinClusterBootStrap, which runs every minute, will retryJoin for 10sec
// rejoinClusterBootStrap, which runs every nDB.config.rejoinClusterInterval,
// will retryJoin for nDB.config.rejoinClusterDuration.
return fmt.Errorf("could not join node to memberlist: %v", err)
}
@ -324,7 +323,7 @@ func (nDB *NetworkDB) rejoinClusterBootStrap() {
}
// None of the bootStrap nodes are in the cluster, call memberlist join
logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration)
ctx, cancel := context.WithTimeout(nDB.ctx, nDB.config.rejoinClusterDuration)
defer cancel()
nDB.retryJoin(ctx, bootStrapIPs)
}

View file

@ -192,6 +192,14 @@ type Config struct {
// NOTE this MUST always be higher than reapEntryInterval
reapNetworkInterval time.Duration
// rejoinClusterDuration represents retryJoin timeout used by rejoinClusterBootStrap.
// Default is 10sec.
rejoinClusterDuration time.Duration
// rejoinClusterInterval represents interval on which rejoinClusterBootStrap runs.
// Default is 60sec.
rejoinClusterInterval time.Duration
// StatsPrintPeriod the period to use to print queue stats
// Default is 5min
StatsPrintPeriod time.Duration
@ -225,13 +233,15 @@ type entry struct {
func DefaultConfig() *Config {
hostname, _ := os.Hostname()
return &Config{
NodeID: stringid.TruncateID(stringid.GenerateRandomID()),
Hostname: hostname,
BindAddr: "0.0.0.0",
PacketBufferSize: 1400,
StatsPrintPeriod: 5 * time.Minute,
HealthPrintPeriod: 1 * time.Minute,
reapEntryInterval: 30 * time.Minute,
NodeID: stringid.TruncateID(stringid.GenerateRandomID()),
Hostname: hostname,
BindAddr: "0.0.0.0",
PacketBufferSize: 1400,
StatsPrintPeriod: 5 * time.Minute,
HealthPrintPeriod: 1 * time.Minute,
reapEntryInterval: 30 * time.Minute,
rejoinClusterDuration: 10 * time.Second,
rejoinClusterInterval: 60 * time.Second,
}
}

View file

@ -819,8 +819,24 @@ func TestParallelDelete(t *testing.T) {
}
func TestNetworkDBIslands(t *testing.T) {
pollTimeout := func() time.Duration {
const defaultTimeout = 120 * time.Second
dl, ok := t.Deadline()
if !ok {
return defaultTimeout
}
if d := time.Until(dl); d <= defaultTimeout {
return d
}
return defaultTimeout
}
logrus.SetLevel(logrus.DebugLevel)
dbs := createNetworkDBInstances(t, 5, "node", DefaultConfig())
conf := DefaultConfig()
// Shorten durations to speed up test execution.
conf.rejoinClusterDuration = conf.rejoinClusterDuration / 10
conf.rejoinClusterInterval = conf.rejoinClusterInterval / 10
dbs := createNetworkDBInstances(t, 5, "node", conf)
// Get the node IP used currently
node := dbs[0].nodes[dbs[0].config.NodeID]
@ -868,7 +884,7 @@ func TestNetworkDBIslands(t *testing.T) {
}
return poll.Success()
}
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(120*time.Second))
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout()))
// Spawn again the first 3 nodes with different names but same IP:port
for i := 0; i < 3; i++ {
@ -877,7 +893,7 @@ func TestNetworkDBIslands(t *testing.T) {
dbs[i] = launchNode(t, *dbs[i].config)
}
// Give some time for the reconnect routine to run, it runs every 60s
// Give some time for the reconnect routine to run, it runs every 6s.
check = func(t poll.LogT) poll.Result {
// Verify that the cluster is again all connected. Note that the 3 previous node did not do any join
for i := 0; i < 5; i++ {
@ -908,6 +924,6 @@ func TestNetworkDBIslands(t *testing.T) {
}
return poll.Success()
}
poll.WaitOn(t, check, poll.WithDelay(10*time.Second), poll.WithTimeout(120*time.Second))
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout()))
closeNetworkDBInstances(t, dbs)
}