2012-02-08 20:04:02 -05:00
|
|
|
require 'sidekiq/util'
|
2015-10-07 15:21:10 -04:00
|
|
|
require 'sidekiq/fetch'
|
2015-10-07 12:42:10 -04:00
|
|
|
require 'thread'
|
2015-10-07 17:27:47 -04:00
|
|
|
require 'concurrent'
|
2012-02-08 20:04:02 -05:00
|
|
|
|
2012-01-25 16:32:51 -05:00
|
|
|
module Sidekiq
|
2012-06-13 00:55:06 -04:00
|
|
|
##
|
|
|
|
# The Processor receives a message from the Manager and actually
|
|
|
|
# processes it. It instantiates the worker, runs the middleware
|
|
|
|
# chain and then calls Sidekiq::Worker#perform.
|
2012-01-25 16:32:51 -05:00
|
|
|
class Processor
|
2015-10-07 12:40:15 -04:00
|
|
|
|
2015-10-05 13:13:00 -04:00
|
|
|
# To prevent a memory leak, ensure that stats expire. However, they
|
|
|
|
# should take up a minimal amount of storage so keep them around
|
|
|
|
# for a long time.
|
2014-03-23 23:02:36 -04:00
|
|
|
STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5
|
2013-05-31 12:02:27 -04:00
|
|
|
|
2012-01-26 15:45:04 -05:00
|
|
|
include Util
|
2012-01-25 16:32:51 -05:00
|
|
|
|
2015-10-05 13:13:00 -04:00
|
|
|
attr_reader :thread
|
2015-10-07 15:21:10 -04:00
|
|
|
attr_accessor :job
|
2013-03-27 01:56:49 -04:00
|
|
|
|
2015-10-07 15:21:10 -04:00
|
|
|
def initialize(mgr, options)
|
2015-10-05 13:13:00 -04:00
|
|
|
@mgr = mgr
|
2015-10-07 15:21:10 -04:00
|
|
|
@down = false
|
2015-10-05 13:13:00 -04:00
|
|
|
@done = false
|
2015-10-07 15:21:10 -04:00
|
|
|
@job = nil
|
|
|
|
@strategy = (options[:fetch] || Sidekiq::BasicFetch).new(options)
|
2015-10-05 13:13:00 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def terminate(wait=false)
|
|
|
|
@done = true
|
2015-10-06 15:43:01 -04:00
|
|
|
@thread.value if wait
|
|
|
|
end
|
|
|
|
|
|
|
|
def kill(wait=false)
|
2015-10-05 13:13:00 -04:00
|
|
|
# unlike the other actors, terminate does not wait
|
|
|
|
# for the thread to finish because we don't know how
|
|
|
|
# long the job will take to finish. Instead we
|
|
|
|
# provide a `kill` method to call after the shutdown
|
|
|
|
# timeout passes.
|
2015-10-06 15:43:01 -04:00
|
|
|
@thread.raise ::Sidekiq::Shutdown
|
2015-10-05 13:13:00 -04:00
|
|
|
@thread.value if wait
|
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
def start
|
|
|
|
@thread ||= safe_thread("processor", &method(:run))
|
2012-02-04 19:53:09 -05:00
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
private unless $TESTING
|
2015-10-05 13:13:00 -04:00
|
|
|
|
|
|
|
def run
|
|
|
|
begin
|
|
|
|
while !@done
|
2015-10-07 15:21:10 -04:00
|
|
|
self.job = fetch
|
2015-10-06 15:43:01 -04:00
|
|
|
process(job) if job
|
2015-10-07 15:21:10 -04:00
|
|
|
self.job = nil
|
2015-10-05 13:13:00 -04:00
|
|
|
end
|
|
|
|
rescue Exception => ex
|
|
|
|
@mgr.processor_died(self, ex)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-10-07 15:21:10 -04:00
|
|
|
def get_one
|
|
|
|
begin
|
|
|
|
work = @strategy.retrieve_work
|
|
|
|
(logger.info("Redis is online, #{Time.now - @down} sec downtime"); @down = nil) if @down
|
|
|
|
work
|
|
|
|
rescue => ex
|
|
|
|
handle_fetch_exception(ex)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch
|
|
|
|
j = get_one
|
|
|
|
if j && @done
|
|
|
|
j.requeue
|
|
|
|
nil
|
|
|
|
else
|
|
|
|
j
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def handle_fetch_exception(ex)
|
|
|
|
if !@down
|
|
|
|
@down = Time.now
|
|
|
|
logger.error("Error fetching message: #{ex}")
|
|
|
|
ex.backtrace.each do |bt|
|
|
|
|
logger.error(bt)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
sleep(1)
|
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
def process(work)
|
2013-01-06 00:17:08 -05:00
|
|
|
msgstr = work.message
|
|
|
|
queue = work.queue_name
|
2012-06-29 23:37:45 -04:00
|
|
|
|
2015-09-09 15:32:41 -04:00
|
|
|
ack = false
|
2014-02-01 23:49:42 -05:00
|
|
|
begin
|
|
|
|
msg = Sidekiq.load_json(msgstr)
|
2015-09-21 17:58:18 -04:00
|
|
|
klass = msg['class'.freeze].constantize
|
2014-02-01 23:49:42 -05:00
|
|
|
worker = klass.new
|
2015-09-21 17:58:18 -04:00
|
|
|
worker.jid = msg['jid'.freeze]
|
2014-02-01 23:49:42 -05:00
|
|
|
|
|
|
|
stats(worker, msg, queue) do
|
|
|
|
Sidekiq.server_middleware.invoke(worker, msg, queue) do
|
2015-09-09 15:32:41 -04:00
|
|
|
# Only ack if we either attempted to start this job or
|
|
|
|
# successfully completed it. This prevents us from
|
|
|
|
# losing jobs if a middleware raises an exception before yielding
|
|
|
|
ack = true
|
2015-09-21 17:58:18 -04:00
|
|
|
execute_job(worker, cloned(msg['args'.freeze]))
|
2012-08-16 21:12:25 -04:00
|
|
|
end
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
2015-09-09 15:32:41 -04:00
|
|
|
ack = true
|
2014-02-01 23:49:42 -05:00
|
|
|
rescue Sidekiq::Shutdown
|
|
|
|
# Had to force kill this job because it didn't finish
|
|
|
|
# within the timeout. Don't acknowledge the work since
|
|
|
|
# we didn't properly finish it.
|
|
|
|
ack = false
|
|
|
|
rescue Exception => ex
|
|
|
|
handle_exception(ex, msg || { :message => msgstr })
|
|
|
|
raise
|
|
|
|
ensure
|
|
|
|
work.acknowledge if ack
|
2012-02-04 19:53:09 -05:00
|
|
|
end
|
2013-06-08 01:15:13 -04:00
|
|
|
end
|
|
|
|
|
2014-09-09 21:57:39 -04:00
|
|
|
def execute_job(worker, cloned_args)
|
|
|
|
worker.perform(*cloned_args)
|
|
|
|
end
|
|
|
|
|
2014-03-02 19:36:00 -05:00
|
|
|
def thread_identity
|
2014-03-19 23:12:12 -04:00
|
|
|
@str ||= Thread.current.object_id.to_s(36)
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
|
|
|
|
2015-10-07 17:27:47 -04:00
|
|
|
WORKER_STATE = Concurrent::Map.new
|
|
|
|
PROCESSED = Concurrent::AtomicFixnum.new
|
|
|
|
FAILURE = Concurrent::AtomicFixnum.new
|
|
|
|
|
2012-02-11 02:16:12 -05:00
|
|
|
def stats(worker, msg, queue)
|
2014-02-24 23:06:48 -05:00
|
|
|
# Do not conflate errors from the job with errors caused by updating
|
|
|
|
# stats so calling code can react appropriately
|
2015-10-07 17:27:47 -04:00
|
|
|
tid = thread_identity
|
|
|
|
WORKER_STATE[tid] = {:queue => queue, :payload => msg, :run_at => Time.now.to_i }
|
2012-02-11 02:16:12 -05:00
|
|
|
|
|
|
|
begin
|
|
|
|
yield
|
2012-05-12 16:23:23 -04:00
|
|
|
rescue Exception
|
2015-10-07 17:27:47 -04:00
|
|
|
FAILURE.increment
|
2012-02-11 02:16:12 -05:00
|
|
|
raise
|
|
|
|
ensure
|
2015-10-07 17:27:47 -04:00
|
|
|
WORKER_STATE.delete(tid)
|
|
|
|
PROCESSED.increment
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
2012-08-04 15:11:46 -04:00
|
|
|
end
|
2012-02-11 02:16:12 -05:00
|
|
|
|
2014-01-27 23:29:19 -05:00
|
|
|
# Deep clone the arguments passed to the worker so that if
|
2012-08-04 15:11:46 -04:00
|
|
|
# the message fails, what is pushed back onto Redis hasn't
|
|
|
|
# been mutated by the worker.
|
|
|
|
def cloned(ary)
|
2014-01-27 23:29:19 -05:00
|
|
|
Marshal.load(Marshal.dump(ary))
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
2014-02-24 14:58:35 -05:00
|
|
|
|
2014-02-24 16:10:07 -05:00
|
|
|
# If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
|
2014-02-24 14:58:35 -05:00
|
|
|
# All exceptions will be swallowed and logged.
|
2015-05-19 22:08:54 -04:00
|
|
|
def retry_and_suppress_exceptions(max_retries = 5)
|
2014-02-24 14:58:35 -05:00
|
|
|
retry_count = 0
|
|
|
|
begin
|
|
|
|
yield
|
2014-02-24 16:10:58 -05:00
|
|
|
rescue => e
|
2014-02-24 14:58:35 -05:00
|
|
|
retry_count += 1
|
|
|
|
if retry_count <= max_retries
|
2015-10-06 15:43:01 -04:00
|
|
|
Sidekiq.logger.info {"Suppressing and retrying error: #{e.inspect}"}
|
2015-05-20 13:31:16 -04:00
|
|
|
pause_for_recovery(retry_count)
|
2014-02-24 14:58:35 -05:00
|
|
|
retry
|
|
|
|
else
|
2014-02-24 23:47:44 -05:00
|
|
|
handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
|
2014-02-24 14:58:35 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2015-05-19 18:05:09 -04:00
|
|
|
|
2015-05-20 13:31:16 -04:00
|
|
|
def pause_for_recovery(retry_count)
|
2015-05-19 18:05:09 -04:00
|
|
|
sleep(retry_count)
|
|
|
|
end
|
2012-01-25 16:32:51 -05:00
|
|
|
end
|
|
|
|
end
|