1
0
Fork 0
mirror of https://github.com/rails/rails.git synced 2022-11-09 12:12:34 -05:00
rails--rails/activejob/lib/active_job/exceptions.rb
John Hawthorn 5d9359bbc3 Use ActiveJob 5.2 retry logic for old jobs
Rails 6 introduces retries per-exception, instead of a global count of
retries. Because ActiveJob 5.2 doesn't serialize the execution count
per-exception, when ActiveJob 6.0 picks up an "old" job it can't know
the exception count in the new format.

This can also be an issue if AJ 6.0 serializes a new job with
exception_executions which is later picked up by AJ 5.2, which would
clear exception_executions (since it has no knowledge of it).

Previously we handled this by resetting exception_executions, if it
wasn't defined on a job, which could result in the worst case retrying
the job 2x the times we should.

This commit changes how we handle loading a legacy job: instead of
resetting exception_executions, we instead will always use the global
executions count.

This way, jobs which only have one retry_on (and didn't have a behaviour
change in AJ 6) are backwards-and-forwards-compatible with counts
respected exactly.

Jobs with multiple retry_on will revert to the AJ5.2 behaviour if they
were ever run under AJ5.2.
2019-04-22 14:53:38 -07:00

157 lines
6.6 KiB
Ruby

# frozen_string_literal: true
require "active_support/core_ext/numeric/time"
module ActiveJob
# Provides behavior for retrying and discarding jobs on exceptions.
module Exceptions
extend ActiveSupport::Concern
module ClassMethods
# Catch the exception and reschedule job for re-execution after so many seconds, for a specific number of attempts.
# If the exception keeps getting raised beyond the specified number of attempts, the exception is allowed to
# bubble up to the underlying queuing system, which may have its own retry mechanism or place it in a
# holding queue for inspection.
#
# You can also pass a block that'll be invoked if the retry attempts fail for custom logic rather than letting
# the exception bubble up. This block is yielded with the job instance as the first and the error instance as the second parameter.
#
# ==== Options
# * <tt>:wait</tt> - Re-enqueues the job with a delay specified either in seconds (default: 3 seconds),
# as a computing proc that the number of executions so far as an argument, or as a symbol reference of
# <tt>:exponentially_longer</tt>, which applies the wait algorithm of <tt>(executions ** 4) + 2</tt>
# (first wait 3s, then 18s, then 83s, etc)
# * <tt>:attempts</tt> - Re-enqueues the job the specified number of times (default: 5 attempts)
# * <tt>:queue</tt> - Re-enqueues the job on a different queue
# * <tt>:priority</tt> - Re-enqueues the job with a different priority
#
# ==== Examples
#
# class RemoteServiceJob < ActiveJob::Base
# retry_on CustomAppException # defaults to 3s wait, 5 attempts
# retry_on AnotherCustomAppException, wait: ->(executions) { executions * 2 }
#
# retry_on ActiveRecord::Deadlocked, wait: 5.seconds, attempts: 3
# retry_on Net::OpenTimeout, Timeout::Error, wait: :exponentially_longer, attempts: 10 # retries at most 10 times for Net::OpenTimeout and Timeout::Error combined
# # To retry at most 10 times for each individual exception:
# # retry_on Net::OpenTimeout, wait: :exponentially_longer, attempts: 10
# # retry_on Timeout::Error, wait: :exponentially_longer, attempts: 10
#
# retry_on(YetAnotherCustomAppException) do |job, error|
# ExceptionNotifier.caught(error)
# end
#
# def perform(*args)
# # Might raise CustomAppException, AnotherCustomAppException, or YetAnotherCustomAppException for something domain specific
# # Might raise ActiveRecord::Deadlocked when a local db deadlock is detected
# # Might raise Net::OpenTimeout or Timeout::Error when the remote service is down
# end
# end
def retry_on(*exceptions, wait: 3.seconds, attempts: 5, queue: nil, priority: nil)
rescue_from(*exceptions) do |error|
executions = executions_for(exceptions)
if executions < attempts
retry_job wait: determine_delay(seconds_or_duration_or_algorithm: wait, executions: executions), queue: queue, priority: priority, error: error
else
if block_given?
instrument :retry_stopped, error: error do
yield self, error
end
else
instrument :retry_stopped, error: error
raise error
end
end
end
end
# Discard the job with no attempts to retry, if the exception is raised. This is useful when the subject of the job,
# like an Active Record, is no longer available, and the job is thus no longer relevant.
#
# You can also pass a block that'll be invoked. This block is yielded with the job instance as the first and the error instance as the second parameter.
#
# ==== Example
#
# class SearchIndexingJob < ActiveJob::Base
# discard_on ActiveJob::DeserializationError
# discard_on(CustomAppException) do |job, error|
# ExceptionNotifier.caught(error)
# end
#
# def perform(record)
# # Will raise ActiveJob::DeserializationError if the record can't be deserialized
# # Might raise CustomAppException for something domain specific
# end
# end
def discard_on(*exceptions)
rescue_from(*exceptions) do |error|
instrument :discard, error: error do
yield self, error if block_given?
end
end
end
end
# Reschedules the job to be re-executed. This is useful in combination
# with the +rescue_from+ option. When you rescue an exception from your job
# you can ask Active Job to retry performing your job.
#
# ==== Options
# * <tt>:wait</tt> - Enqueues the job with the specified delay in seconds
# * <tt>:wait_until</tt> - Enqueues the job at the time specified
# * <tt>:queue</tt> - Enqueues the job on the specified queue
# * <tt>:priority</tt> - Enqueues the job with the specified priority
#
# ==== Examples
#
# class SiteScraperJob < ActiveJob::Base
# rescue_from(ErrorLoadingSite) do
# retry_job queue: :low_priority
# end
#
# def perform(*args)
# # raise ErrorLoadingSite if cannot scrape
# end
# end
def retry_job(options = {})
instrument :enqueue_retry, options.slice(:error, :wait) do
enqueue options
end
end
private
def determine_delay(seconds_or_duration_or_algorithm:, executions:)
case seconds_or_duration_or_algorithm
when :exponentially_longer
(executions**4) + 2
when ActiveSupport::Duration
duration = seconds_or_duration_or_algorithm
duration.to_i
when Integer
seconds = seconds_or_duration_or_algorithm
seconds
when Proc
algorithm = seconds_or_duration_or_algorithm
algorithm.call(executions)
else
raise "Couldn't determine a delay based on #{seconds_or_duration_or_algorithm.inspect}"
end
end
def instrument(name, error: nil, wait: nil, &block)
payload = { job: self, adapter: self.class.queue_adapter, error: error, wait: wait }
ActiveSupport::Notifications.instrument("#{name}.active_job", payload, &block)
end
def executions_for(exceptions)
if exception_executions
exception_executions[exceptions.to_s] = (exception_executions[exceptions.to_s] || 0) + 1
else
# Guard against jobs that were persisted before we started having individual executions counters per retry_on
executions
end
end
end
end