From cc9a9e8503f2df41aee556f4e5e6b397048338fb Mon Sep 17 00:00:00 2001 From: Jonathan Hefner Date: Tue, 15 Sep 2020 12:55:43 -0500 Subject: [PATCH] Prevent thundering herd of Action Cable clients This commit makes a few changes to the Action Cable client to prevent a "thundering herd" of client reconnects after server connectivity loss: * The client will wait a random amount between 1x and 3x of the stale threshold after the server's last ping before making the first reconnection attempt. * Subsequent reconnection attempts now use exponential backoff instead of logarithmic backoff. To allow the delay between reconnection attempts to increase slowly at first, the default exponentiation base is < 2. * Random jitter is applied to each delay between reconnection attempts. Co-authored-by: John Williams --- actioncable/CHANGELOG.md | 11 +++ .../app/assets/javascripts/action_cable.js | 31 +++++---- .../action_cable/connection_monitor.js | 29 ++++---- actioncable/test/javascript/src/test.js | 1 + .../src/unit/connection_monitor_test.js | 68 +++++++++++++++++++ 5 files changed, 110 insertions(+), 30 deletions(-) create mode 100644 actioncable/test/javascript/src/unit/connection_monitor_test.js diff --git a/actioncable/CHANGELOG.md b/actioncable/CHANGELOG.md index fe11e0ace7..e33ec06d0a 100644 --- a/actioncable/CHANGELOG.md +++ b/actioncable/CHANGELOG.md @@ -1,3 +1,14 @@ +* The Action Cable client now includes safeguards to prevent a "thundering + herd" of client reconnects after server connectivity loss: + * The client will wait a random amount between 1x and 3x of the stale + threshold after the server's last ping before making the first + reconnection attempt. + * Subsequent reconnection attempts now use exponential backoff instead of + logarithmic backoff. To allow the delay between reconnection attempts to + increase slowly at first, the default exponentiation base is < 2. + * Random jitter is applied to each delay between reconnection attempts. + + *Jonathan Hefner* Please check [6-1-stable](https://github.com/rails/rails/blob/6-1-stable/actioncable/CHANGELOG.md) for previous changes. diff --git a/actioncable/app/assets/javascripts/action_cable.js b/actioncable/app/assets/javascripts/action_cable.js index 8d14edbde1..49a84c65b5 100644 --- a/actioncable/app/assets/javascripts/action_cable.js +++ b/actioncable/app/assets/javascripts/action_cable.js @@ -50,9 +50,6 @@ var secondsSince = function secondsSince(time) { return (now() - time) / 1e3; }; - var clamp = function clamp(number, min, max) { - return Math.max(min, Math.min(max, number)); - }; var ConnectionMonitor = function() { function ConnectionMonitor(connection) { classCallCheck(this, ConnectionMonitor); @@ -66,7 +63,7 @@ delete this.stoppedAt; this.startPolling(); addEventListener("visibilitychange", this.visibilityDidChange); - logger.log("ConnectionMonitor started. pollInterval = " + this.getPollInterval() + " ms"); + logger.log("ConnectionMonitor started. stale threshold = " + this.constructor.staleThreshold + " s"); } }; ConnectionMonitor.prototype.stop = function stop() { @@ -108,16 +105,18 @@ }, this.getPollInterval()); }; ConnectionMonitor.prototype.getPollInterval = function getPollInterval() { - var _constructor$pollInte = this.constructor.pollInterval, min = _constructor$pollInte.min, max = _constructor$pollInte.max, multiplier = _constructor$pollInte.multiplier; - var interval = multiplier * Math.log(this.reconnectAttempts + 1); - return Math.round(clamp(interval, min, max) * 1e3); + var _constructor = this.constructor, staleThreshold = _constructor.staleThreshold, reconnectionBackoffRate = _constructor.reconnectionBackoffRate; + var backoff = Math.pow(1 + reconnectionBackoffRate, Math.min(this.reconnectAttempts, 10)); + var jitterMax = this.reconnectAttempts === 0 ? 1 : reconnectionBackoffRate; + var jitter = jitterMax * Math.random(); + return staleThreshold * 1e3 * backoff * (1 + jitter); }; ConnectionMonitor.prototype.reconnectIfStale = function reconnectIfStale() { if (this.connectionIsStale()) { - logger.log("ConnectionMonitor detected stale connection. reconnectAttempts = " + this.reconnectAttempts + ", pollInterval = " + this.getPollInterval() + " ms, time disconnected = " + secondsSince(this.disconnectedAt) + " s, stale threshold = " + this.constructor.staleThreshold + " s"); + logger.log("ConnectionMonitor detected stale connection. reconnectAttempts = " + this.reconnectAttempts + ", time stale = " + secondsSince(this.refreshedAt) + " s, stale threshold = " + this.constructor.staleThreshold + " s"); this.reconnectAttempts++; if (this.disconnectedRecently()) { - logger.log("ConnectionMonitor skipping reopening recent disconnect"); + logger.log("ConnectionMonitor skipping reopening recent disconnect. time disconnected = " + secondsSince(this.disconnectedAt) + " s"); } else { logger.log("ConnectionMonitor reopening"); this.connection.reopen(); @@ -125,7 +124,7 @@ } }; ConnectionMonitor.prototype.connectionIsStale = function connectionIsStale() { - return secondsSince(this.pingedAt ? this.pingedAt : this.startedAt) > this.constructor.staleThreshold; + return secondsSince(this.refreshedAt) > this.constructor.staleThreshold; }; ConnectionMonitor.prototype.disconnectedRecently = function disconnectedRecently() { return this.disconnectedAt && secondsSince(this.disconnectedAt) < this.constructor.staleThreshold; @@ -141,14 +140,16 @@ }, 200); } }; + createClass(ConnectionMonitor, [ { + key: "refreshedAt", + get: function get$$1() { + return this.pingedAt ? this.pingedAt : this.startedAt; + } + } ]); return ConnectionMonitor; }(); - ConnectionMonitor.pollInterval = { - min: 3, - max: 30, - multiplier: 5 - }; ConnectionMonitor.staleThreshold = 6; + ConnectionMonitor.reconnectionBackoffRate = .15; var INTERNAL = { message_types: { welcome: "welcome", diff --git a/actioncable/app/javascript/action_cable/connection_monitor.js b/actioncable/app/javascript/action_cable/connection_monitor.js index 3bcee72e8f..0d89f3c237 100644 --- a/actioncable/app/javascript/action_cable/connection_monitor.js +++ b/actioncable/app/javascript/action_cable/connection_monitor.js @@ -7,8 +7,6 @@ const now = () => new Date().getTime() const secondsSince = time => (now() - time) / 1000 -const clamp = (number, min, max) => Math.max(min, Math.min(max, number)) - class ConnectionMonitor { constructor(connection) { this.visibilityDidChange = this.visibilityDidChange.bind(this) @@ -22,7 +20,7 @@ class ConnectionMonitor { delete this.stoppedAt this.startPolling() addEventListener("visibilitychange", this.visibilityDidChange) - logger.log(`ConnectionMonitor started. pollInterval = ${this.getPollInterval()} ms`) + logger.log(`ConnectionMonitor started. stale threshold = ${this.constructor.staleThreshold} s`) } } @@ -75,17 +73,19 @@ class ConnectionMonitor { } getPollInterval() { - const {min, max, multiplier} = this.constructor.pollInterval - const interval = multiplier * Math.log(this.reconnectAttempts + 1) - return Math.round(clamp(interval, min, max) * 1000) + const { staleThreshold, reconnectionBackoffRate } = this.constructor + const backoff = Math.pow(1 + reconnectionBackoffRate, Math.min(this.reconnectAttempts, 10)) + const jitterMax = this.reconnectAttempts === 0 ? 1.0 : reconnectionBackoffRate + const jitter = jitterMax * Math.random() + return staleThreshold * 1000 * backoff * (1 + jitter) } reconnectIfStale() { if (this.connectionIsStale()) { - logger.log(`ConnectionMonitor detected stale connection. reconnectAttempts = ${this.reconnectAttempts}, pollInterval = ${this.getPollInterval()} ms, time disconnected = ${secondsSince(this.disconnectedAt)} s, stale threshold = ${this.constructor.staleThreshold} s`) + logger.log(`ConnectionMonitor detected stale connection. reconnectAttempts = ${this.reconnectAttempts}, time stale = ${secondsSince(this.refreshedAt)} s, stale threshold = ${this.constructor.staleThreshold} s`) this.reconnectAttempts++ if (this.disconnectedRecently()) { - logger.log("ConnectionMonitor skipping reopening recent disconnect") + logger.log(`ConnectionMonitor skipping reopening recent disconnect. time disconnected = ${secondsSince(this.disconnectedAt)} s`) } else { logger.log("ConnectionMonitor reopening") this.connection.reopen() @@ -93,8 +93,12 @@ class ConnectionMonitor { } } + get refreshedAt() { + return this.pingedAt ? this.pingedAt : this.startedAt + } + connectionIsStale() { - return secondsSince(this.pingedAt ? this.pingedAt : this.startedAt) > this.constructor.staleThreshold + return secondsSince(this.refreshedAt) > this.constructor.staleThreshold } disconnectedRecently() { @@ -115,12 +119,7 @@ class ConnectionMonitor { } -ConnectionMonitor.pollInterval = { - min: 3, - max: 30, - multiplier: 5 -} - ConnectionMonitor.staleThreshold = 6 // Server::Connections::BEAT_INTERVAL * 2 (missed two pings) +ConnectionMonitor.reconnectionBackoffRate = 0.15 export default ConnectionMonitor diff --git a/actioncable/test/javascript/src/test.js b/actioncable/test/javascript/src/test.js index eea1c0a408..54529ac10c 100644 --- a/actioncable/test/javascript/src/test.js +++ b/actioncable/test/javascript/src/test.js @@ -1,6 +1,7 @@ import "./test_helpers/index" import "./unit/action_cable_test" import "./unit/connection_test" +import "./unit/connection_monitor_test" import "./unit/consumer_test" import "./unit/subscription_test" import "./unit/subscriptions_test" diff --git a/actioncable/test/javascript/src/unit/connection_monitor_test.js b/actioncable/test/javascript/src/unit/connection_monitor_test.js new file mode 100644 index 0000000000..ac5d92494e --- /dev/null +++ b/actioncable/test/javascript/src/unit/connection_monitor_test.js @@ -0,0 +1,68 @@ +import * as ActionCable from "../../../../app/javascript/action_cable/index" + +const {module, test} = QUnit + +module("ActionCable.ConnectionMonitor", hooks => { + let monitor + hooks.beforeEach(() => monitor = new ActionCable.ConnectionMonitor({})) + + module("#getPollInterval", hooks => { + hooks.beforeEach(() => Math._random = Math.random) + hooks.afterEach(() => Math.random = Math._random) + + const { staleThreshold, reconnectionBackoffRate } = ActionCable.ConnectionMonitor + const backoffFactor = 1 + reconnectionBackoffRate + const ms = 1000 + + test("uses exponential backoff", assert => { + Math.random = () => 0 + + monitor.reconnectAttempts = 0 + assert.equal(monitor.getPollInterval(), staleThreshold * ms) + + monitor.reconnectAttempts = 1 + assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * ms) + + monitor.reconnectAttempts = 2 + assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * backoffFactor * ms) + }) + + test("caps exponential backoff after some number of reconnection attempts", assert => { + Math.random = () => 0 + monitor.reconnectAttempts = 42 + const cappedPollInterval = monitor.getPollInterval() + + monitor.reconnectAttempts = 9001 + assert.equal(monitor.getPollInterval(), cappedPollInterval) + }) + + test("uses 100% jitter when 0 reconnection attempts", assert => { + Math.random = () => 0 + assert.equal(monitor.getPollInterval(), staleThreshold * ms) + + Math.random = () => 0.5 + assert.equal(monitor.getPollInterval(), staleThreshold * 1.5 * ms) + }) + + test("uses reconnectionBackoffRate for jitter when >0 reconnection attempts", assert => { + monitor.reconnectAttempts = 1 + + Math.random = () => 0.25 + assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * (1 + reconnectionBackoffRate * 0.25) * ms) + + Math.random = () => 0.5 + assert.equal(monitor.getPollInterval(), staleThreshold * backoffFactor * (1 + reconnectionBackoffRate * 0.5) * ms) + }) + + test("applies jitter after capped exponential backoff", assert => { + monitor.reconnectAttempts = 9001 + + Math.random = () => 0 + const withoutJitter = monitor.getPollInterval() + Math.random = () => 0.5 + const withJitter = monitor.getPollInterval() + + assert.ok(withJitter > withoutJitter) + }) + }) +})