Prevent thundering herd of Action Cable clients

This commit makes a few changes to the Action Cable client to prevent a
"thundering herd" of client reconnects after server connectivity loss:

* The client will wait a random amount between 1x and 3x of the stale
  threshold after the server's last ping before making the first
  reconnection attempt.
* Subsequent reconnection attempts now use exponential backoff instead
  of logarithmic backoff.  To allow the delay between reconnection
  attempts to increase slowly at first, the default exponentiation base
  is < 2.
* Random jitter is applied to each delay between reconnection attempts.

Co-authored-by: John Williams <john@veloshots.com>
This commit is contained in:
Jonathan Hefner 2020-09-15 12:55:43 -05:00
parent d5386cfba2
commit cc9a9e8503
5 changed files with 110 additions and 30 deletions

View File

@ -1,3 +1,14 @@
* The Action Cable client now includes safeguards to prevent a "thundering
herd" of client reconnects after server connectivity loss:
* The client will wait a random amount between 1x and 3x of the stale
threshold after the server's last ping before making the first
reconnection attempt.
* Subsequent reconnection attempts now use exponential backoff instead of
logarithmic backoff. To allow the delay between reconnection attempts to
increase slowly at first, the default exponentiation base is < 2.
* Random jitter is applied to each delay between reconnection attempts.
*Jonathan Hefner*
Please check [6-1-stable](https://github.com/rails/rails/blob/6-1-stable/actioncable/CHANGELOG.md) for previous changes.

View File

@ -50,9 +50,6 @@
var secondsSince = function secondsSince(time) {
return (now() - time) / 1e3;
};
var clamp = function clamp(number, min, max) {
return Math.max(min, Math.min(max, number));
};
var ConnectionMonitor = function() {
function ConnectionMonitor(connection) {
classCallCheck(this, ConnectionMonitor);
@ -66,7 +63,7 @@
delete this.stoppedAt;
this.startPolling();
addEventListener("visibilitychange", this.visibilityDidChange);
logger.log("ConnectionMonitor started. pollInterval = " + this.getPollInterval() + " ms");
logger.log("ConnectionMonitor started. stale threshold = " + this.constructor.staleThreshold + " s");
}
};
ConnectionMonitor.prototype.stop = function stop() {
@ -108,16 +105,18 @@
}, this.getPollInterval());
};
ConnectionMonitor.prototype.getPollInterval = function getPollInterval() {
var _constructor$pollInte = this.constructor.pollInterval, min = _constructor$pollInte.min, max = _constructor$pollInte.max, multiplier = _constructor$pollInte.multiplier;
var interval = multiplier * Math.log(this.reconnectAttempts + 1);
return Math.round(clamp(interval, min, max) * 1e3);
var _constructor = this.constructor, staleThreshold = _constructor.staleThreshold, reconnectionBackoffRate = _constructor.reconnectionBackoffRate;
var backoff = Math.pow(1 + reconnectionBackoffRate, Math.min(this.reconnectAttempts, 10));
var jitterMax = this.reconnectAttempts === 0 ? 1 : reconnectionBackoffRate;
var jitter = jitterMax * Math.random();
return staleThreshold * 1e3 * backoff * (1 + jitter);
};
ConnectionMonitor.prototype.reconnectIfStale = function reconnectIfStale() {
if (this.connectionIsStale()) {
logger.log("ConnectionMonitor detected stale connection. reconnectAttempts = " + this.reconnectAttempts + ", pollInterval = " + this.getPollInterval() + " ms, time disconnected = " + secondsSince(this.disconnectedAt) + " s, stale threshold = " + this.constructor.staleThreshold + " s");
logger.log("ConnectionMonitor detected stale connection. reconnectAttempts = " + this.reconnectAttempts + ", time stale = " + secondsSince(this.refreshedAt) + " s, stale threshold = " + this.constructor.staleThreshold + " s");
this.reconnectAttempts++;
if (this.disconnectedRecently()) {
logger.log("ConnectionMonitor skipping reopening recent disconnect");
logger.log("ConnectionMonitor skipping reopening recent disconnect. time disconnected = " + secondsSince(this.disconnectedAt) + " s");
} else {
logger.log("ConnectionMonitor reopening");
this.connection.reopen();
@ -125,7 +124,7 @@
}
};
ConnectionMonitor.prototype.connectionIsStale = function connectionIsStale() {
return secondsSince(this.pingedAt ? this.pingedAt : this.startedAt) > this.constructor.staleThreshold;
return secondsSince(this.refreshedAt) > this.constructor.staleThreshold;
};
ConnectionMonitor.prototype.disconnectedRecently = function disconnectedRecently() {
return this.disconnectedAt && secondsSince(this.disconnectedAt) < this.constructor.staleThreshold;
@ -141,14 +140,16 @@
}, 200);
}
};
createClass(ConnectionMonitor, [ {
key: "refreshedAt",
get: function get$$1() {
return this.pingedAt ? this.pingedAt : this.startedAt;
}
} ]);
return ConnectionMonitor;
}();
ConnectionMonitor.pollInterval = {
min: 3,
max: 30,
multiplier: 5
};
ConnectionMonitor.staleThreshold = 6;
ConnectionMonitor.reconnectionBackoffRate = .15;
var INTERNAL = {
message_types: {
welcome: "welcome",

View File

@ -7,8 +7,6 @@ const now = () => new Date().getTime()
const secondsSince = time => (now() - time) / 1000
const clamp = (number, min, max) => Math.max(min, Math.min(max, number))
class ConnectionMonitor {
constructor(connection) {
this.visibilityDidChange = this.visibilityDidChange.bind(this)
@ -22,7 +20,7 @@ class ConnectionMonitor {
delete this.stoppedAt
this.startPolling()
addEventListener("visibilitychange", this.visibilityDidChange)
logger.log(`ConnectionMonitor started. pollInterval = ${this.getPollInterval()} ms`)
logger.log(`ConnectionMonitor started. stale threshold = ${this.constructor.staleThreshold} s`)
}
}
@ -75,17 +73,19 @@ class ConnectionMonitor {
}
getPollInterval() {
const {min, max, multiplier} = this.constructor.pollInterval
const interval = multiplier * Math.log(this.reconnectAttempts + 1)
return Math.round(clamp(interval, min, max) * 1000)
const { staleThreshold, reconnectionBackoffRate } = this.constructor
const backoff = Math.pow(1 + reconnectionBackoffRate, Math.min(this.reconnectAttempts, 10))
const jitterMax = this.reconnectAttempts === 0 ? 1.0 : reconnectionBackoffRate
const jitter = jitterMax * Math.random()
return staleThreshold * 1000 * backoff * (1 + jitter)
}
reconnectIfStale() {
if (this.connectionIsStale()) {
logger.log(`ConnectionMonitor detected stale connection. reconnectAttempts = ${this.reconnectAttempts}, pollInterval = ${this.getPollInterval()} ms, time disconnected = ${secondsSince(this.disconnectedAt)} s, stale threshold = ${this.constructor.staleThreshold} s`)
logger.log(`ConnectionMonitor detected stale connection. reconnectAttempts = ${this.reconnectAttempts}, time stale = ${secondsSince(this.refreshedAt)} s, stale threshold = ${this.constructor.staleThreshold} s`)
this.reconnectAttempts++
if (this.disconnectedRecently()) {
logger.log("ConnectionMonitor skipping reopening recent disconnect")
logger.log(`ConnectionMonitor skipping reopening recent disconnect. time disconnected = ${secondsSince(this.disconnectedAt)} s`)
} else {
logger.log("ConnectionMonitor reopening")
this.connection.reopen()
@ -93,8 +93,12 @@ class ConnectionMonitor {
}
}
get refreshedAt() {
return this.pingedAt ? this.pingedAt : this.startedAt
}
connectionIsStale() {
return secondsSince(this.pingedAt ? this.pingedAt : this.startedAt) > this.constructor.staleThreshold
return secondsSince(this.refreshedAt) > this.constructor.staleThreshold
}
disconnectedRecently() {
@ -115,12 +119,7 @@ class ConnectionMonitor {
}
ConnectionMonitor.pollInterval = {
min: 3,
max: 30,
multiplier: 5
}
ConnectionMonitor.staleThreshold = 6 // Server::Connections::BEAT_INTERVAL * 2 (missed two pings)
ConnectionMonitor.reconnectionBackoffRate = 0.15
export default ConnectionMonitor

View File

@ -1,6 +1,7 @@
import "./test_helpers/index"
import "./unit/action_cable_test"
import "./unit/connection_test"
import "./unit/connection_monitor_test"
import "./unit/consumer_test"
import "./unit/subscription_test"
import "./unit/subscriptions_test"

View File

@ -0,0 +1,68 @@
import * as ActionCable from "../../../../app/javascript/action_cable/index"
const {module, test} = QUnit
module("ActionCable.ConnectionMonitor", hooks => {
let monitor
hooks.beforeEach(() => monitor = new ActionCable.ConnectionMonitor({}))
module("#getPollInterval", hooks => {
hooks.beforeEach(() => Math._random = Math.random)
hooks.afterEach(() => Math.random = Math._random)
const { staleThreshold, reconnectionBackoffRate } = ActionCable.ConnectionMonitor
const backoffFactor = 1 + reconnectionBackoffRate
const ms = 1000
test("uses exponential backoff", assert => {
Math.random = () => 0
monitor.reconnectAttempts = 0
assert.equal(monitor.getPollInterval(), staleThreshold * ms)
monitor.reconnectAttempts = 1
assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * ms)
monitor.reconnectAttempts = 2
assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * backoffFactor * ms)
})
test("caps exponential backoff after some number of reconnection attempts", assert => {
Math.random = () => 0
monitor.reconnectAttempts = 42
const cappedPollInterval = monitor.getPollInterval()
monitor.reconnectAttempts = 9001
assert.equal(monitor.getPollInterval(), cappedPollInterval)
})
test("uses 100% jitter when 0 reconnection attempts", assert => {
Math.random = () => 0
assert.equal(monitor.getPollInterval(), staleThreshold * ms)
Math.random = () => 0.5
assert.equal(monitor.getPollInterval(), staleThreshold * 1.5 * ms)
})
test("uses reconnectionBackoffRate for jitter when >0 reconnection attempts", assert => {
monitor.reconnectAttempts = 1
Math.random = () => 0.25
assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * (1 + reconnectionBackoffRate * 0.25) * ms)
Math.random = () => 0.5
assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * (1 + reconnectionBackoffRate * 0.5) * ms)
})
test("applies jitter after capped exponential backoff", assert => {
monitor.reconnectAttempts = 9001
Math.random = () => 0
const withoutJitter = monitor.getPollInterval()
Math.random = () => 0.5
const withJitter = monitor.getPollInterval()
assert.ok(withJitter > withoutJitter)
})
})
})