2015-12-31 18:33:35 -05:00
|
|
|
# frozen_string_literal: true
|
2012-02-08 20:04:02 -05:00
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
require "sidekiq/util"
|
|
|
|
require "sidekiq/fetch"
|
|
|
|
require "sidekiq/job_logger"
|
|
|
|
require "sidekiq/job_retry"
|
2019-08-02 13:56:15 -04:00
|
|
|
|
2012-01-25 16:32:51 -05:00
|
|
|
module Sidekiq
|
2012-06-13 00:55:06 -04:00
|
|
|
##
|
2015-10-09 18:33:42 -04:00
|
|
|
# The Processor is a standalone thread which:
|
|
|
|
#
|
|
|
|
# 1. fetches a job from Redis
|
|
|
|
# 2. executes the job
|
|
|
|
# a. instantiate the Worker
|
|
|
|
# b. run the middleware chain
|
|
|
|
# c. call #perform
|
|
|
|
#
|
|
|
|
# A Processor can exit due to shutdown (processor_stopped)
|
|
|
|
# or due to an error during job execution (processor_died)
|
|
|
|
#
|
|
|
|
# If an error occurs in the job execution, the
|
|
|
|
# Processor calls the Manager to create a new one
|
|
|
|
# to replace itself and exits.
|
|
|
|
#
|
2012-01-25 16:32:51 -05:00
|
|
|
class Processor
|
2012-01-26 15:45:04 -05:00
|
|
|
include Util
|
2012-01-25 16:32:51 -05:00
|
|
|
|
2015-10-05 13:13:00 -04:00
|
|
|
attr_reader :thread
|
2015-10-08 12:48:28 -04:00
|
|
|
attr_reader :job
|
2013-03-27 01:56:49 -04:00
|
|
|
|
2015-10-08 12:37:37 -04:00
|
|
|
def initialize(mgr)
|
2015-10-05 13:13:00 -04:00
|
|
|
@mgr = mgr
|
2015-10-07 15:21:10 -04:00
|
|
|
@down = false
|
2015-10-05 13:13:00 -04:00
|
|
|
@done = false
|
2015-10-07 15:21:10 -04:00
|
|
|
@job = nil
|
2015-10-09 18:33:42 -04:00
|
|
|
@thread = nil
|
2015-10-08 12:37:37 -04:00
|
|
|
@strategy = (mgr.options[:fetch] || Sidekiq::BasicFetch).new(mgr.options)
|
2016-02-01 18:59:20 -05:00
|
|
|
@reloader = Sidekiq.options[:reloader]
|
2018-12-28 18:05:51 -05:00
|
|
|
@job_logger = (mgr.options[:job_logger] || Sidekiq::JobLogger).new
|
2017-01-17 17:58:08 -05:00
|
|
|
@retrier = Sidekiq::JobRetry.new
|
2015-10-05 13:13:00 -04:00
|
|
|
end
|
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
def terminate(wait = false)
|
2015-10-05 13:13:00 -04:00
|
|
|
@done = true
|
2019-04-01 12:20:41 -04:00
|
|
|
return unless @thread
|
2015-10-06 15:43:01 -04:00
|
|
|
@thread.value if wait
|
|
|
|
end
|
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
def kill(wait = false)
|
2015-10-08 12:37:37 -04:00
|
|
|
@done = true
|
2019-04-01 12:20:41 -04:00
|
|
|
return unless @thread
|
2015-10-05 13:13:00 -04:00
|
|
|
# unlike the other actors, terminate does not wait
|
|
|
|
# for the thread to finish because we don't know how
|
|
|
|
# long the job will take to finish. Instead we
|
|
|
|
# provide a `kill` method to call after the shutdown
|
|
|
|
# timeout passes.
|
2015-10-06 15:43:01 -04:00
|
|
|
@thread.raise ::Sidekiq::Shutdown
|
2015-10-05 13:13:00 -04:00
|
|
|
@thread.value if wait
|
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
def start
|
|
|
|
@thread ||= safe_thread("processor", &method(:run))
|
2012-02-04 19:53:09 -05:00
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
private unless $TESTING
|
2015-10-05 13:13:00 -04:00
|
|
|
|
|
|
|
def run
|
2019-04-01 12:20:41 -04:00
|
|
|
process_one until @done
|
|
|
|
@mgr.processor_stopped(self)
|
|
|
|
rescue Sidekiq::Shutdown
|
|
|
|
@mgr.processor_stopped(self)
|
|
|
|
rescue Exception => ex
|
|
|
|
@mgr.processor_died(self, ex)
|
2015-10-05 13:13:00 -04:00
|
|
|
end
|
|
|
|
|
2015-10-08 12:37:37 -04:00
|
|
|
def process_one
|
2015-10-08 12:48:28 -04:00
|
|
|
@job = fetch
|
|
|
|
process(@job) if @job
|
|
|
|
@job = nil
|
2015-10-08 12:37:37 -04:00
|
|
|
end
|
|
|
|
|
2015-10-07 15:21:10 -04:00
|
|
|
def get_one
|
2019-04-01 12:20:41 -04:00
|
|
|
work = @strategy.retrieve_work
|
|
|
|
if @down
|
|
|
|
logger.info { "Redis is online, #{::Process.clock_gettime(::Process::CLOCK_MONOTONIC) - @down} sec downtime" }
|
|
|
|
@down = nil
|
2015-10-07 15:21:10 -04:00
|
|
|
end
|
2019-04-01 12:20:41 -04:00
|
|
|
work
|
|
|
|
rescue Sidekiq::Shutdown
|
|
|
|
rescue => ex
|
|
|
|
handle_fetch_exception(ex)
|
2015-10-07 15:21:10 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def fetch
|
|
|
|
j = get_one
|
|
|
|
if j && @done
|
|
|
|
j.requeue
|
|
|
|
nil
|
|
|
|
else
|
|
|
|
j
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def handle_fetch_exception(ex)
|
2019-04-01 12:20:41 -04:00
|
|
|
unless @down
|
2018-10-18 16:51:58 -04:00
|
|
|
@down = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
|
2017-11-29 19:23:49 -05:00
|
|
|
logger.error("Error fetching job: #{ex}")
|
2017-11-30 13:12:39 -05:00
|
|
|
handle_exception(ex)
|
2015-10-07 15:21:10 -04:00
|
|
|
end
|
|
|
|
sleep(1)
|
2016-03-01 11:57:38 -05:00
|
|
|
nil
|
2015-10-07 15:21:10 -04:00
|
|
|
end
|
|
|
|
|
2019-10-08 00:48:39 -04:00
|
|
|
def dispatch(job_hash, queue, jobstr)
|
2017-01-17 17:58:08 -05:00
|
|
|
# since middleware can mutate the job hash
|
2019-10-08 00:48:39 -04:00
|
|
|
# we need to clone it to report the original
|
2017-01-17 17:58:08 -05:00
|
|
|
# job structure to the Web UI
|
2019-10-08 00:48:39 -04:00
|
|
|
# or to push back to redis when retrying.
|
|
|
|
# To avoid costly and, most of the time, useless cloning here,
|
|
|
|
# we pass original String of JSON to respected methods
|
|
|
|
# to re-parse it there if we need access to the original, untouched job
|
2017-01-17 17:58:08 -05:00
|
|
|
|
2019-09-25 18:53:04 -04:00
|
|
|
@job_logger.prepare(job_hash) do
|
2019-10-08 00:48:39 -04:00
|
|
|
@retrier.global(jobstr, queue) do
|
2018-12-28 18:05:51 -05:00
|
|
|
@job_logger.call(job_hash, queue) do
|
2019-10-08 00:48:39 -04:00
|
|
|
stats(jobstr, queue) do
|
2017-03-16 14:42:02 -04:00
|
|
|
# Rails 5 requires a Reloader to wrap code execution. In order to
|
|
|
|
# constantize the worker and instantiate an instance, we have to call
|
|
|
|
# the Reloader. It handles code loading, db connection management, etc.
|
|
|
|
# Effectively this block denotes a "unit of work" to Rails.
|
|
|
|
@reloader.call do
|
2019-08-28 13:13:41 -04:00
|
|
|
klass = constantize(job_hash["class"])
|
2017-03-16 14:42:02 -04:00
|
|
|
worker = klass.new
|
2019-04-01 12:20:41 -04:00
|
|
|
worker.jid = job_hash["jid"]
|
2019-10-08 00:48:39 -04:00
|
|
|
@retrier.local(worker, jobstr, queue) do
|
2017-03-16 14:42:02 -04:00
|
|
|
yield worker
|
|
|
|
end
|
2017-01-17 17:58:08 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-10-06 15:43:01 -04:00
|
|
|
def process(work)
|
2015-10-23 18:05:50 -04:00
|
|
|
jobstr = work.job
|
2013-01-06 00:17:08 -05:00
|
|
|
queue = work.queue_name
|
2012-06-29 23:37:45 -04:00
|
|
|
|
2019-04-11 21:33:59 -04:00
|
|
|
# Treat malformed JSON as a special case: job goes straight to the morgue.
|
|
|
|
job_hash = nil
|
2016-10-28 23:41:06 -04:00
|
|
|
begin
|
2019-04-11 21:33:59 -04:00
|
|
|
job_hash = Sidekiq.load_json(jobstr)
|
|
|
|
rescue => ex
|
2019-04-12 12:30:48 -04:00
|
|
|
handle_exception(ex, {context: "Invalid JSON for job", jobstr: jobstr})
|
2019-04-11 21:33:59 -04:00
|
|
|
# we can't notify because the job isn't a valid hash payload.
|
|
|
|
DeadSet.new.kill(jobstr, notify_failure: false)
|
|
|
|
return work.acknowledge
|
|
|
|
end
|
2017-01-17 17:58:08 -05:00
|
|
|
|
2019-08-05 14:00:21 -04:00
|
|
|
ack = false
|
2019-04-11 21:33:59 -04:00
|
|
|
begin
|
2019-10-08 00:48:39 -04:00
|
|
|
dispatch(job_hash, queue, jobstr) do |worker|
|
2017-01-17 17:58:08 -05:00
|
|
|
Sidekiq.server_middleware.invoke(worker, job_hash, queue) do
|
2019-10-01 21:23:02 -04:00
|
|
|
execute_job(worker, job_hash["args"])
|
2017-01-17 17:58:08 -05:00
|
|
|
end
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
2019-08-05 14:00:21 -04:00
|
|
|
ack = true
|
2016-10-28 23:41:06 -04:00
|
|
|
rescue Sidekiq::Shutdown
|
|
|
|
# Had to force kill this job because it didn't finish
|
|
|
|
# within the timeout. Don't acknowledge the work since
|
|
|
|
# we didn't properly finish it.
|
2019-04-11 21:33:59 -04:00
|
|
|
rescue Sidekiq::JobRetry::Handled => h
|
|
|
|
# this is the common case: job raised error and Sidekiq::JobRetry::Handled
|
|
|
|
# signals that we created a retry successfully. We can acknowlege the job.
|
2019-08-05 14:00:21 -04:00
|
|
|
ack = true
|
2019-04-12 12:30:48 -04:00
|
|
|
e = h.cause || h
|
2019-04-01 12:20:41 -04:00
|
|
|
handle_exception(e, {context: "Job raised exception", job: job_hash, jobstr: jobstr})
|
2017-02-15 14:13:10 -05:00
|
|
|
raise e
|
2019-04-11 21:33:59 -04:00
|
|
|
rescue Exception => ex
|
|
|
|
# Unexpected error! This is very bad and indicates an exception that got past
|
|
|
|
# the retry subsystem (e.g. network partition). We won't acknowledge the job
|
|
|
|
# so it can be rescued when using Sidekiq Pro.
|
2019-04-12 12:30:48 -04:00
|
|
|
handle_exception(ex, {context: "Internal exception!", job: job_hash, jobstr: jobstr})
|
2020-01-13 18:46:55 -05:00
|
|
|
raise ex
|
2016-10-28 23:41:06 -04:00
|
|
|
ensure
|
2019-08-05 14:00:21 -04:00
|
|
|
if ack
|
|
|
|
# We don't want a shutdown signal to interrupt job acknowledgment.
|
|
|
|
Thread.handle_interrupt(Sidekiq::Shutdown => :never) do
|
|
|
|
work.acknowledge
|
|
|
|
end
|
|
|
|
end
|
2012-02-04 19:53:09 -05:00
|
|
|
end
|
2013-06-08 01:15:13 -04:00
|
|
|
end
|
|
|
|
|
2014-09-09 21:57:39 -04:00
|
|
|
def execute_job(worker, cloned_args)
|
|
|
|
worker.perform(*cloned_args)
|
|
|
|
end
|
|
|
|
|
2018-04-23 18:46:58 -04:00
|
|
|
# Ruby doesn't provide atomic counters out of the box so we'll
|
|
|
|
# implement something simple ourselves.
|
|
|
|
# https://bugs.ruby-lang.org/issues/14706
|
|
|
|
class Counter
|
|
|
|
def initialize
|
|
|
|
@value = 0
|
|
|
|
@lock = Mutex.new
|
|
|
|
end
|
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
def incr(amount = 1)
|
|
|
|
@lock.synchronize { @value += amount }
|
2018-04-23 18:46:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def reset
|
2019-04-01 12:20:41 -04:00
|
|
|
@lock.synchronize {
|
|
|
|
val = @value
|
|
|
|
@value = 0
|
|
|
|
val
|
|
|
|
}
|
2018-04-23 18:46:58 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-09-11 12:53:52 -04:00
|
|
|
# jruby's Hash implementation is not threadsafe, so we wrap it in a mutex here
|
|
|
|
class SharedWorkerState
|
|
|
|
def initialize
|
|
|
|
@worker_state = {}
|
|
|
|
@lock = Mutex.new
|
|
|
|
end
|
|
|
|
|
|
|
|
def set(tid, hash)
|
|
|
|
@lock.synchronize { @worker_state[tid] = hash }
|
|
|
|
end
|
|
|
|
|
|
|
|
def delete(tid)
|
|
|
|
@lock.synchronize { @worker_state.delete(tid) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def dup
|
|
|
|
@lock.synchronize { @worker_state.dup }
|
|
|
|
end
|
|
|
|
|
|
|
|
def size
|
|
|
|
@lock.synchronize { @worker_state.size }
|
|
|
|
end
|
|
|
|
|
|
|
|
def clear
|
|
|
|
@lock.synchronize { @worker_state.clear }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-04-23 18:46:58 -04:00
|
|
|
PROCESSED = Counter.new
|
|
|
|
FAILURE = Counter.new
|
2018-09-11 12:53:52 -04:00
|
|
|
WORKER_STATE = SharedWorkerState.new
|
2015-10-07 17:27:47 -04:00
|
|
|
|
2019-10-08 00:48:39 -04:00
|
|
|
def stats(jobstr, queue)
|
|
|
|
WORKER_STATE.set(tid, {queue: queue, payload: jobstr, run_at: Time.now.to_i})
|
2012-02-11 02:16:12 -05:00
|
|
|
|
|
|
|
begin
|
|
|
|
yield
|
2012-05-12 16:23:23 -04:00
|
|
|
rescue Exception
|
2018-04-23 18:46:58 -04:00
|
|
|
FAILURE.incr
|
2012-02-11 02:16:12 -05:00
|
|
|
raise
|
|
|
|
ensure
|
2015-10-07 17:27:47 -04:00
|
|
|
WORKER_STATE.delete(tid)
|
2018-04-23 18:46:58 -04:00
|
|
|
PROCESSED.incr
|
2012-02-11 02:16:12 -05:00
|
|
|
end
|
2012-08-04 15:11:46 -04:00
|
|
|
end
|
2012-02-11 02:16:12 -05:00
|
|
|
|
2017-05-14 23:58:44 -04:00
|
|
|
def constantize(str)
|
2019-09-14 18:54:34 -04:00
|
|
|
return Object.const_get(str) unless str.include?("::")
|
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
names = str.split("::")
|
2017-05-14 23:58:44 -04:00
|
|
|
names.shift if names.empty? || names.first.empty?
|
|
|
|
|
|
|
|
names.inject(Object) do |constant, name|
|
2017-12-20 06:53:31 -05:00
|
|
|
# the false flag limits search for name to under the constant namespace
|
|
|
|
# which mimics Rails' behaviour
|
2019-09-14 18:54:34 -04:00
|
|
|
constant.const_get(name, false)
|
2017-05-14 23:58:44 -04:00
|
|
|
end
|
|
|
|
end
|
2012-01-25 16:32:51 -05:00
|
|
|
end
|
|
|
|
end
|