mirror of
https://github.com/moby/moby.git
synced 2022-11-09 12:21:53 -05:00
libnetwork: make rejoin intervals configurable
This allows the rejoin intervals to be chosen according to the context within which the component is used, and, in particular, this allows lower intervals to be used within TestNetworkDBIslands test. Signed-off-by: Roman Volosatovs <roman.volosatovs@docker.com>
This commit is contained in:
parent
c81abefdb1
commit
d7a2635537
3 changed files with 45 additions and 20 deletions
|
@ -18,12 +18,10 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
reapPeriod = 5 * time.Second
|
||||
rejoinClusterDuration = 10 * time.Second
|
||||
rejoinInterval = 60 * time.Second
|
||||
retryInterval = 1 * time.Second
|
||||
nodeReapInterval = 24 * time.Hour
|
||||
nodeReapPeriod = 2 * time.Hour
|
||||
reapPeriod = 5 * time.Second
|
||||
retryInterval = 1 * time.Second
|
||||
nodeReapInterval = 24 * time.Hour
|
||||
nodeReapPeriod = 2 * time.Hour
|
||||
// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
|
||||
// the following is roughly 1 minute
|
||||
maxQueueLenBroadcastOnSync = 500
|
||||
|
@ -172,7 +170,7 @@ func (nDB *NetworkDB) clusterInit() error {
|
|||
{config.PushPullInterval, nDB.bulkSyncTables},
|
||||
{retryInterval, nDB.reconnectNode},
|
||||
{nodeReapPeriod, nDB.reapDeadNode},
|
||||
{rejoinInterval, nDB.rejoinClusterBootStrap},
|
||||
{nDB.config.rejoinClusterInterval, nDB.rejoinClusterBootStrap},
|
||||
} {
|
||||
t := time.NewTicker(trigger.interval)
|
||||
go nDB.triggerFunc(trigger.interval, t.C, trigger.fn)
|
||||
|
@ -210,7 +208,8 @@ func (nDB *NetworkDB) clusterJoin(members []string) error {
|
|||
|
||||
if _, err := mlist.Join(members); err != nil {
|
||||
// In case of failure, we no longer need to explicitly call retryJoin.
|
||||
// rejoinClusterBootStrap, which runs every minute, will retryJoin for 10sec
|
||||
// rejoinClusterBootStrap, which runs every nDB.config.rejoinClusterInterval,
|
||||
// will retryJoin for nDB.config.rejoinClusterDuration.
|
||||
return fmt.Errorf("could not join node to memberlist: %v", err)
|
||||
}
|
||||
|
||||
|
@ -324,7 +323,7 @@ func (nDB *NetworkDB) rejoinClusterBootStrap() {
|
|||
}
|
||||
// None of the bootStrap nodes are in the cluster, call memberlist join
|
||||
logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
|
||||
ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration)
|
||||
ctx, cancel := context.WithTimeout(nDB.ctx, nDB.config.rejoinClusterDuration)
|
||||
defer cancel()
|
||||
nDB.retryJoin(ctx, bootStrapIPs)
|
||||
}
|
||||
|
|
|
@ -192,6 +192,14 @@ type Config struct {
|
|||
// NOTE this MUST always be higher than reapEntryInterval
|
||||
reapNetworkInterval time.Duration
|
||||
|
||||
// rejoinClusterDuration represents retryJoin timeout used by rejoinClusterBootStrap.
|
||||
// Default is 10sec.
|
||||
rejoinClusterDuration time.Duration
|
||||
|
||||
// rejoinClusterInterval represents interval on which rejoinClusterBootStrap runs.
|
||||
// Default is 60sec.
|
||||
rejoinClusterInterval time.Duration
|
||||
|
||||
// StatsPrintPeriod the period to use to print queue stats
|
||||
// Default is 5min
|
||||
StatsPrintPeriod time.Duration
|
||||
|
@ -225,13 +233,15 @@ type entry struct {
|
|||
func DefaultConfig() *Config {
|
||||
hostname, _ := os.Hostname()
|
||||
return &Config{
|
||||
NodeID: stringid.TruncateID(stringid.GenerateRandomID()),
|
||||
Hostname: hostname,
|
||||
BindAddr: "0.0.0.0",
|
||||
PacketBufferSize: 1400,
|
||||
StatsPrintPeriod: 5 * time.Minute,
|
||||
HealthPrintPeriod: 1 * time.Minute,
|
||||
reapEntryInterval: 30 * time.Minute,
|
||||
NodeID: stringid.TruncateID(stringid.GenerateRandomID()),
|
||||
Hostname: hostname,
|
||||
BindAddr: "0.0.0.0",
|
||||
PacketBufferSize: 1400,
|
||||
StatsPrintPeriod: 5 * time.Minute,
|
||||
HealthPrintPeriod: 1 * time.Minute,
|
||||
reapEntryInterval: 30 * time.Minute,
|
||||
rejoinClusterDuration: 10 * time.Second,
|
||||
rejoinClusterInterval: 60 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -819,8 +819,24 @@ func TestParallelDelete(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestNetworkDBIslands(t *testing.T) {
|
||||
pollTimeout := func() time.Duration {
|
||||
const defaultTimeout = 120 * time.Second
|
||||
dl, ok := t.Deadline()
|
||||
if !ok {
|
||||
return defaultTimeout
|
||||
}
|
||||
if d := time.Until(dl); d <= defaultTimeout {
|
||||
return d
|
||||
}
|
||||
return defaultTimeout
|
||||
}
|
||||
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
dbs := createNetworkDBInstances(t, 5, "node", DefaultConfig())
|
||||
conf := DefaultConfig()
|
||||
// Shorten durations to speed up test execution.
|
||||
conf.rejoinClusterDuration = conf.rejoinClusterDuration / 10
|
||||
conf.rejoinClusterInterval = conf.rejoinClusterInterval / 10
|
||||
dbs := createNetworkDBInstances(t, 5, "node", conf)
|
||||
|
||||
// Get the node IP used currently
|
||||
node := dbs[0].nodes[dbs[0].config.NodeID]
|
||||
|
@ -868,7 +884,7 @@ func TestNetworkDBIslands(t *testing.T) {
|
|||
}
|
||||
return poll.Success()
|
||||
}
|
||||
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(120*time.Second))
|
||||
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout()))
|
||||
|
||||
// Spawn again the first 3 nodes with different names but same IP:port
|
||||
for i := 0; i < 3; i++ {
|
||||
|
@ -877,7 +893,7 @@ func TestNetworkDBIslands(t *testing.T) {
|
|||
dbs[i] = launchNode(t, *dbs[i].config)
|
||||
}
|
||||
|
||||
// Give some time for the reconnect routine to run, it runs every 60s
|
||||
// Give some time for the reconnect routine to run, it runs every 6s.
|
||||
check = func(t poll.LogT) poll.Result {
|
||||
// Verify that the cluster is again all connected. Note that the 3 previous node did not do any join
|
||||
for i := 0; i < 5; i++ {
|
||||
|
@ -908,6 +924,6 @@ func TestNetworkDBIslands(t *testing.T) {
|
|||
}
|
||||
return poll.Success()
|
||||
}
|
||||
poll.WaitOn(t, check, poll.WithDelay(10*time.Second), poll.WithTimeout(120*time.Second))
|
||||
poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout()))
|
||||
closeNetworkDBInstances(t, dbs)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue