From 237c70fe374142509e031dbbf136d57f43dd4913 Mon Sep 17 00:00:00 2001 From: Franz Liedke Date: Wed, 8 Sep 2021 00:04:01 +0200 Subject: [PATCH] Reconnect on "UNBLOCKED force unblock" errors (#4985) These errors can occur during Sidekiq's long-running job fetching command. This uses Redis' blocking BRPOP primitive. On failover in a cluster setup, these commands are interrupted by the server. This error causes the worker threads to be restarted, but as they are bubbled up to the top, they cause a lot of spam in our error logging systems. As related errors from other commands are being handled (see --- lib/sidekiq.rb | 3 ++- test/test_sidekiq.rb | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/sidekiq.rb b/lib/sidekiq.rb index 3ee4f9d2..188013dd 100644 --- a/lib/sidekiq.rb +++ b/lib/sidekiq.rb @@ -100,7 +100,8 @@ module Sidekiq # 2550 Failover can cause the server to become a replica, need # to disconnect and reopen the socket to get back to the primary. # 4495 Use the same logic if we have a "Not enough replicas" error from the primary - if retryable && ex.message =~ /READONLY|NOREPLICAS/ + # 4985 Use the same logic when a blocking command is force-unblocked + if retryable && ex.message =~ /READONLY|NOREPLICAS|UNBLOCKED/ conn.disconnect! retryable = false retry diff --git a/test/test_sidekiq.rb b/test/test_sidekiq.rb index 7a1c6256..1852a626 100644 --- a/test/test_sidekiq.rb +++ b/test/test_sidekiq.rb @@ -96,6 +96,16 @@ describe Sidekiq do assert_equal 2, counts.size assert_equal counts[0] + 1, counts[1] end + + it 'reconnects if instance state changed' do + counts = [] + Sidekiq.redis do |c| + counts << c.info['total_connections_received'].to_i + raise Redis::CommandError, "UNBLOCKED force unblock from blocking operation, instance state changed (master -> replica?)" if counts.size == 1 + end + assert_equal 2, counts.size + assert_equal counts[0] + 1, counts[1] + end end describe 'redis info' do