mperham--sidekiq/lib/sidekiq/processor.rb

require 'sidekiq/util'
require 'sidekiq/fetch'
require 'thread'
require 'concurrent'

module Sidekiq
  ##
  # The Processor receives a message from the Manager and actually
  # processes it.  It instantiates the worker, runs the middleware
  # chain and then calls Sidekiq::Worker#perform.
  class Processor

    # To prevent a memory leak, ensure that stats expire. However, they
    # should take up a minimal amount of storage so keep them around
    # for a long time.
    STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5

    include Util

    attr_reader :thread
    attr_accessor :job

    def initialize(mgr, options)
      @mgr = mgr
      @down = false
      @done = false
      @job = nil
      @strategy = (options[:fetch] || Sidekiq::BasicFetch).new(options)
    end

    def terminate(wait=false)
      @done = true
      @thread.value if wait
    end

    def kill(wait=false)
      # unlike the other actors, terminate does not wait
      # for the thread to finish because we don't know how
      # long the job will take to finish.  Instead we
      # provide a `kill` method to call after the shutdown
      # timeout passes.
      @thread.raise ::Sidekiq::Shutdown
      @thread.value if wait
    end

    def start
      @thread ||= safe_thread("processor", &method(:run))
    end

    private unless $TESTING

    def run
      begin
        while !@done
          self.job = fetch
          process(job) if job
          self.job = nil
        end
      rescue Exception => ex
        @mgr.processor_died(self, ex)
      end
    end

    def get_one
      begin
        work = @strategy.retrieve_work
        (logger.info("Redis is online, #{Time.now - @down} sec downtime"); @down = nil) if @down
        work
      rescue => ex
        handle_fetch_exception(ex)
      end
    end

    def fetch
      j = get_one
      if j && @done
        j.requeue
        nil
      else
        j
      end
    end

    def handle_fetch_exception(ex)
      if !@down
        @down = Time.now
        logger.error("Error fetching message: #{ex}")
        ex.backtrace.each do |bt|
          logger.error(bt)
        end
      end
      sleep(1)
    end

    def process(work)
      msgstr = work.message
      queue = work.queue_name

      ack = false
      begin
        msg = Sidekiq.load_json(msgstr)
        klass  = msg['class'.freeze].constantize
        worker = klass.new
        worker.jid = msg['jid'.freeze]

        stats(worker, msg, queue) do
          Sidekiq.server_middleware.invoke(worker, msg, queue) do
            # Only ack if we either attempted to start this job or
            # successfully completed it. This prevents us from
            # losing jobs if a middleware raises an exception before yielding
            ack = true
            execute_job(worker, cloned(msg['args'.freeze]))
          end
        end
        ack = true
      rescue Sidekiq::Shutdown
        # Had to force kill this job because it didn't finish
        # within the timeout.  Don't acknowledge the work since
        # we didn't properly finish it.
        ack = false
      rescue Exception => ex
        handle_exception(ex, msg || { :message => msgstr })
        raise
      ensure
        work.acknowledge if ack
      end
    end

    def execute_job(worker, cloned_args)
      worker.perform(*cloned_args)
    end

    def thread_identity
      @str ||= Thread.current.object_id.to_s(36)
    end

    WORKER_STATE = Concurrent::Map.new
    PROCESSED = Concurrent::AtomicFixnum.new
    FAILURE = Concurrent::AtomicFixnum.new

    def stats(worker, msg, queue)
      # Do not conflate errors from the job with errors caused by updating
      # stats so calling code can react appropriately
      tid = thread_identity
      WORKER_STATE[tid] = {:queue => queue, :payload => msg, :run_at => Time.now.to_i }

      begin
        yield
      rescue Exception
        FAILURE.increment
        raise
      ensure
        WORKER_STATE.delete(tid)
        PROCESSED.increment
      end
    end

    # Deep clone the arguments passed to the worker so that if
    # the message fails, what is pushed back onto Redis hasn't
    # been mutated by the worker.
    def cloned(ary)
      Marshal.load(Marshal.dump(ary))
    end

    # If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
    # All exceptions will be swallowed and logged.
    def retry_and_suppress_exceptions(max_retries = 5)
      retry_count = 0
      begin
        yield
      rescue => e
        retry_count += 1
        if retry_count <= max_retries
          Sidekiq.logger.info {"Suppressing and retrying error: #{e.inspect}"}
          pause_for_recovery(retry_count)
          retry
        else
          handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
        end
      end
    end

    def pause_for_recovery(retry_count)
      sleep(retry_count)
    end
  end
end
refactor middleware into client/server layers 2012-02-08 20:04:02 -05:00			`require 'sidekiq/util'`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`require 'sidekiq/fetch'`
Remove actor, update requires 2015-10-07 12:42:10 -04:00			`require 'thread'`
Remove sync stats update, move update into heartbeat 2015-10-07 17:27:47 -04:00			`require 'concurrent'`
refactor middleware into client/server layers 2012-02-08 20:04:02 -05:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`module Sidekiq`
Doc update, bump version 2012-06-13 00:55:06 -04:00			`##`
			`# The Processor receives a message from the Manager and actually`
			`# processes it. It instantiates the worker, runs the middleware`
			`# chain and then calls Sidekiq::Worker#perform.`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`class Processor`
Move server middleware stuff into Sidekiq module 2015-10-07 12:40:15 -04:00
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`# To prevent a memory leak, ensure that stats expire. However, they`
			`# should take up a minimal amount of storage so keep them around`
			`# for a long time.`
Increase expiration time for stats from 180 days to 5 years. 2014-03-23 23:02:36 -04:00			`STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5`
make cleanup unnecessary by using expire 2013-05-31 12:02:27 -04:00
Add Sidekiq::Processor testing, with bug fixes 2012-01-26 15:45:04 -05:00			`include Util`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`attr_reader :thread`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`attr_accessor :job`
Raise within any workers which haven't finished within the hard timeout, fixes #377 2013-03-27 01:56:49 -04:00
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`def initialize(mgr, options)`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`@mgr = mgr`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`@down = false`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`@done = false`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`@job = nil`
			`@strategy = (options[:fetch] \|\| Sidekiq::BasicFetch).new(options)`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`end`

			`def terminate(wait=false)`
			`@done = true`
WIP manager and launcher 2015-10-06 15:43:01 -04:00			`@thread.value if wait`
			`end`

			`def kill(wait=false)`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`# unlike the other actors, terminate does not wait`
			`# for the thread to finish because we don't know how`
			`# long the job will take to finish. Instead we`
			# provide a `kill` method to call after the shutdown
			`# timeout passes.`
WIP manager and launcher 2015-10-06 15:43:01 -04:00			`@thread.raise ::Sidekiq::Shutdown`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`@thread.value if wait`
			`end`

WIP manager and launcher 2015-10-06 15:43:01 -04:00			`def start`
			`@thread \|\|= safe_thread("processor", &method(:run))`
Implement message processing middleware, patterned after Rack 2012-02-04 19:53:09 -05:00			`end`

WIP manager and launcher 2015-10-06 15:43:01 -04:00			`private unless $TESTING`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00
			`def run`
			`begin`
			`while !@done`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`self.job = fetch`
WIP manager and launcher 2015-10-06 15:43:01 -04:00			`process(job) if job`
Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`self.job = nil`
Refactor Processor to use bare threads 2015-10-05 13:13:00 -04:00			`end`
			`rescue Exception => ex`
			`@mgr.processor_died(self, ex)`
			`end`
			`end`

Move fetching into the processor This removes thread context switching and network delay. 2015-10-07 15:21:10 -04:00			`def get_one`
			`begin`
			`work = @strategy.retrieve_work`
			`(logger.info("Redis is online, #{Time.now - @down} sec downtime"); @down = nil) if @down`
			`work`
			`rescue => ex`
			`handle_fetch_exception(ex)`
			`end`
			`end`

			`def fetch`
			`j = get_one`
			`if j && @done`
			`j.requeue`
			`nil`
			`else`
			`j`
			`end`
			`end`

			`def handle_fetch_exception(ex)`
			`if !@down`
			`@down = Time.now`
			`logger.error("Error fetching message: #{ex}")`
			`ex.backtrace.each do \|bt\|`
			`logger.error(bt)`
			`end`
			`end`
			`sleep(1)`
			`end`

WIP manager and launcher 2015-10-06 15:43:01 -04:00			`def process(work)`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`msgstr = work.message`
			`queue = work.queue_name`
Use Celluloid's exclusive mode to avoid tiny Fiber stack 2012-06-29 23:37:45 -04:00
Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 15:32:41 -04:00			`ack = false`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 23:49:42 -05:00			`begin`
			`msg = Sidekiq.load_json(msgstr)`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 17:58:18 -04:00			`klass = msg['class'.freeze].constantize`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 23:49:42 -05:00			`worker = klass.new`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 17:58:18 -04:00			`worker.jid = msg['jid'.freeze]`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 23:49:42 -05:00
			`stats(worker, msg, queue) do`
			`Sidekiq.server_middleware.invoke(worker, msg, queue) do`
Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 15:32:41 -04:00			`# Only ack if we either attempted to start this job or`
			`# successfully completed it. This prevents us from`
			`# losing jobs if a middleware raises an exception before yielding`
			`ack = true`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 17:58:18 -04:00			`execute_job(worker, cloned(msg['args'.freeze]))`
Revert Celluloid's exclusive mode 2012-08-16 21:12:25 -04:00			`end`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`end`
Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 15:32:41 -04:00			`ack = true`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 23:49:42 -05:00			`rescue Sidekiq::Shutdown`
			`# Had to force kill this job because it didn't finish`
			`# within the timeout. Don't acknowledge the work since`
			`# we didn't properly finish it.`
			`ack = false`
			`rescue Exception => ex`
			`handle_exception(ex, msg \|\| { :message => msgstr })`
			`raise`
			`ensure`
			`work.acknowledge if ack`
Implement message processing middleware, patterned after Rack 2012-02-04 19:53:09 -05:00			`end`
Modern celluloid now stable for testing purposes, yay! 2013-06-08 01:15:13 -04:00			`end`

extracts worker.perform so call can be enhanced - defines execute_job as an API hook 2014-09-09 21:57:39 -04:00			`def execute_job(worker, cloned_args)`
			`worker.perform(*cloned_args)`
			`end`

Initial pass at heartbeat, still failing tests 2014-03-02 19:36:00 -05:00			`def thread_identity`
Busy page should show proper TID and JID 2014-03-19 23:12:12 -04:00			`@str \|\|= Thread.current.object_id.to_s(36)`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`end`

Remove sync stats update, move update into heartbeat 2015-10-07 17:27:47 -04:00			`WORKER_STATE = Concurrent::Map.new`
			`PROCESSED = Concurrent::AtomicFixnum.new`
			`FAILURE = Concurrent::AtomicFixnum.new`

First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`def stats(worker, msg, queue)`
merge master 2014-02-24 23:06:48 -05:00			`# Do not conflate errors from the job with errors caused by updating`
			`# stats so calling code can react appropriately`
Remove sync stats update, move update into heartbeat 2015-10-07 17:27:47 -04:00			`tid = thread_identity`
			`WORKER_STATE[tid] = {:queue => queue, :payload => msg, :run_at => Time.now.to_i }`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00
			`begin`
			`yield`
Remove unnecessary resque stat, #189 2012-05-12 16:23:23 -04:00			`rescue Exception`
Remove sync stats update, move update into heartbeat 2015-10-07 17:27:47 -04:00			`FAILURE.increment`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`raise`
			`ensure`
Remove sync stats update, move update into heartbeat 2015-10-07 17:27:47 -04:00			`WORKER_STATE.delete(tid)`
			`PROCESSED.increment`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`end`
Clone arguments before calling workers to avoid mutation [#265] 2012-08-04 15:11:46 -04:00			`end`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00
Marshal arguments to avoid worker mutation, fixes #1452 2014-01-27 23:29:19 -05:00			`# Deep clone the arguments passed to the worker so that if`
Clone arguments before calling workers to avoid mutation [#265] 2012-08-04 15:11:46 -04:00			`# the message fails, what is pushed back onto Redis hasn't`
			`# been mutated by the worker.`
			`def cloned(ary)`
Marshal arguments to avoid worker mutation, fixes #1452 2014-01-27 23:29:19 -05:00			`Marshal.load(Marshal.dump(ary))`
First pass at Resque-compatible processing stats 2012-02-11 02:16:12 -05:00			`end`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00
Update comment. 2014-02-24 16:10:07 -05:00			`# If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`# All exceptions will be swallowed and logged.`
increase method retry_and_suppress_exceptions default retries to 5 2015-05-19 22:08:54 -04:00			`def retry_and_suppress_exceptions(max_retries = 5)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry_count = 0`
			`begin`
			`yield`
Sleep before retrying. 2014-02-24 16:10:58 -05:00			`rescue => e`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry_count += 1`
			`if retry_count <= max_retries`
WIP manager and launcher 2015-10-06 15:43:01 -04:00			`Sidekiq.logger.info {"Suppressing and retrying error: #{e.inspect}"}`
rename method 2015-05-20 13:31:16 -04:00			`pause_for_recovery(retry_count)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry`
			`else`
Add global error handler feature 2014-02-24 23:47:44 -05:00			`handle_exception(e, { :message => "Exhausted #{max_retries} retries"})`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`end`
			`end`
			`end`
created a method for sleep with retry count for monkey patching 2015-05-19 18:05:09 -04:00
rename method 2015-05-20 13:31:16 -04:00			`def pause_for_recovery(retry_count)`
created a method for sleep with retry count for monkey patching 2015-05-19 18:05:09 -04:00			`sleep(retry_count)`
			`end`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`end`
			`end`