mperham--sidekiq/lib/sidekiq/processor.rb

require 'sidekiq/util'
require 'sidekiq/actor'

require 'sidekiq/middleware/server/retry_jobs'
require 'sidekiq/middleware/server/logging'

module Sidekiq
  ##
  # The Processor receives a message from the Manager and actually
  # processes it.  It instantiates the worker, runs the middleware
  # chain and then calls Sidekiq::Worker#perform.
  class Processor
    # To prevent a memory leak, ensure that stats expire. However, they should take up a minimal amount of storage
    # so keep them around for a long time
    STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5

    include Util
    include Actor

    def self.default_middleware
      Middleware::Chain.new do |m|
        m.add Middleware::Server::Logging
        m.add Middleware::Server::RetryJobs
        if defined?(::ActiveRecord::Base)
          require 'sidekiq/middleware/server/active_record'
          m.add Sidekiq::Middleware::Server::ActiveRecord
        end
      end
    end

    attr_accessor :proxy_id

    def initialize(boss)
      @boss = boss
    end

    def process(work)
      msgstr = work.message
      queue = work.queue_name

      @boss.async.real_thread(proxy_id, Thread.current)

      ack = false
      begin
        msg = Sidekiq.load_json(msgstr)
        klass  = msg['class'.freeze].constantize
        worker = klass.new
        worker.jid = msg['jid'.freeze]

        stats(worker, msg, queue) do
          Sidekiq.server_middleware.invoke(worker, msg, queue) do
            # Only ack if we either attempted to start this job or
            # successfully completed it. This prevents us from
            # losing jobs if a middleware raises an exception before yielding
            ack = true
            execute_job(worker, cloned(msg['args'.freeze]))
          end
        end
        ack = true
      rescue Sidekiq::Shutdown
        # Had to force kill this job because it didn't finish
        # within the timeout.  Don't acknowledge the work since
        # we didn't properly finish it.
        ack = false
      rescue Exception => ex
        handle_exception(ex, msg || { :message => msgstr })
        raise
      ensure
        work.acknowledge if ack
      end

      @boss.async.processor_done(current_actor)
    end

    def inspect
      "<Processor##{object_id.to_s(16)}>"
    end

    def execute_job(worker, cloned_args)
      worker.perform(*cloned_args)
    end

    private

    def thread_identity
      @str ||= Thread.current.object_id.to_s(36)
    end

    def stats(worker, msg, queue)
      # Do not conflate errors from the job with errors caused by updating
      # stats so calling code can react appropriately
      retry_and_suppress_exceptions do
        hash = Sidekiq.dump_json({:queue => queue, :payload => msg, :run_at => Time.now.to_i })
        Sidekiq.redis do |conn|
          conn.multi do
            conn.hmset("#{identity}:workers", thread_identity, hash)
            conn.expire("#{identity}:workers", 60*60*4)
          end
        end
      end

      nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)
      begin
        yield
      rescue Exception
        retry_and_suppress_exceptions do
          failed = "stat:failed:#{nowdate}"
          Sidekiq.redis do |conn|
            conn.multi do
              conn.incrby("stat:failed".freeze, 1)
              conn.incrby(failed, 1)
              conn.expire(failed, STATS_TIMEOUT)
            end
          end
        end
        raise
      ensure
        retry_and_suppress_exceptions do
          processed = "stat:processed:#{nowdate}"
          Sidekiq.redis do |conn|
            conn.multi do
              conn.hdel("#{identity}:workers", thread_identity)
              conn.incrby("stat:processed".freeze, 1)
              conn.incrby(processed, 1)
              conn.expire(processed, STATS_TIMEOUT)
            end
          end
        end
      end
    end

    # Deep clone the arguments passed to the worker so that if
    # the message fails, what is pushed back onto Redis hasn't
    # been mutated by the worker.
    def cloned(ary)
      Marshal.load(Marshal.dump(ary))
    end

    # If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.
    # All exceptions will be swallowed and logged.
    def retry_and_suppress_exceptions(max_retries = 5)
      retry_count = 0
      begin
        yield
      rescue => e
        retry_count += 1
        if retry_count <= max_retries
          Sidekiq.logger.debug {"Suppressing and retrying error: #{e.inspect}"}
          pause_for_recovery(retry_count)
          retry
        else
          handle_exception(e, { :message => "Exhausted #{max_retries} retries"})
        end
      end
    end

    def pause_for_recovery(retry_count)
      sleep(retry_count)
    end
  end
end
refactor middleware into client/server layers 2012-02-08 17:04:02 -08:00			`require 'sidekiq/util'`
Add Sidekiq::Actor which provides a testable alternative to Celluloid 2013-05-10 20:43:53 -07:00			`require 'sidekiq/actor'`
refactor middleware into client/server layers 2012-02-08 17:04:02 -08:00
HOT new automatic retry feature. Needs testing. 2012-03-17 13:41:53 -07:00			`require 'sidekiq/middleware/server/retry_jobs'`
Better logging, fixes GH-55 2012-02-25 13:43:53 -08:00			`require 'sidekiq/middleware/server/logging'`
Refactor options to be globally available for things like connection pool sizing. 2012-02-19 13:02:32 -08:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 13:32:51 -08:00			`module Sidekiq`
Doc update, bump version 2012-06-12 21:55:06 -07:00			`##`
			`# The Processor receives a message from the Manager and actually`
			`# processes it. It instantiates the worker, runs the middleware`
			`# chain and then calls Sidekiq::Worker#perform.`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 13:32:51 -08:00			`class Processor`
Increase expiration time for stats from 180 days to 5 years. 2014-03-23 23:02:36 -04:00			`# To prevent a memory leak, ensure that stats expire. However, they should take up a minimal amount of storage`
			`# so keep them around for a long time`
			`STATS_TIMEOUT = 24 * 60 * 60 * 365 * 5`
make cleanup unnecessary by using expire 2013-05-31 09:02:27 -07:00
Add Sidekiq::Processor testing, with bug fixes 2012-01-26 12:45:04 -08:00			`include Util`
Add Sidekiq::Actor which provides a testable alternative to Celluloid 2013-05-10 20:43:53 -07:00			`include Actor`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 13:32:51 -08:00
Refactor options to be globally available for things like connection pool sizing. 2012-02-19 13:02:32 -08:00			`def self.default_middleware`
			`Middleware::Chain.new do \|m\|`
Better logging, fixes GH-55 2012-02-25 13:43:53 -08:00			`m.add Middleware::Server::Logging`
Auto failure retry now working! 2012-03-17 23:04:31 -07:00			`m.add Middleware::Server::RetryJobs`
Don't load AR middleware at all unless AR::Base is defined, #1666 2014-04-24 21:37:20 -07:00			`if defined?(::ActiveRecord::Base)`
			`require 'sidekiq/middleware/server/active_record'`
			`m.add Sidekiq::Middleware::Server::ActiveRecord`
			`end`
Refactor options to be globally available for things like connection pool sizing. 2012-02-19 13:02:32 -08:00			`end`
			`end`

Avoid calling processor during hard shutdown, fixes #997 2013-06-10 22:20:15 -07:00			`attr_accessor :proxy_id`
Raise within any workers which haven't finished within the hard timeout, fixes #377 2013-03-26 22:56:49 -07:00
refactor middleware into client/server layers 2012-02-08 17:04:02 -08:00			`def initialize(boss)`
			`@boss = boss`
Implement message processing middleware, patterned after Rack 2012-02-04 16:53:09 -08:00			`end`

Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-05 21:17:08 -08:00			`def process(work)`
			`msgstr = work.message`
			`queue = work.queue_name`
Use Celluloid's exclusive mode to avoid tiny Fiber stack 2012-06-29 20:37:45 -07:00
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 20:49:42 -08:00			`@boss.async.real_thread(proxy_id, Thread.current)`

Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 12:32:41 -07:00			`ack = false`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 20:49:42 -08:00			`begin`
			`msg = Sidekiq.load_json(msgstr)`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`klass = msg['class'.freeze].constantize`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 20:49:42 -08:00			`worker = klass.new`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`worker.jid = msg['jid'.freeze]`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 20:49:42 -08:00
			`stats(worker, msg, queue) do`
			`Sidekiq.server_middleware.invoke(worker, msg, queue) do`
Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 12:32:41 -07:00			`# Only ack if we either attempted to start this job or`
			`# successfully completed it. This prevents us from`
			`# losing jobs if a middleware raises an exception before yielding`
			`ack = true`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`execute_job(worker, cloned(msg['args'.freeze]))`
Revert Celluloid's exclusive mode 2012-08-16 18:12:25 -07:00			`end`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`
Don't ack unless we actually started working on the job. Fixes #2531 2015-09-09 12:32:41 -07:00			`ack = true`
Remove defer support, required for large execution stacks on Ruby 1.9 2014-02-01 20:49:42 -08:00			`rescue Sidekiq::Shutdown`
			`# Had to force kill this job because it didn't finish`
			`# within the timeout. Don't acknowledge the work since`
			`# we didn't properly finish it.`
			`ack = false`
			`rescue Exception => ex`
			`handle_exception(ex, msg \|\| { :message => msgstr })`
			`raise`
			`ensure`
			`work.acknowledge if ack`
Implement message processing middleware, patterned after Rack 2012-02-04 16:53:09 -08:00			`end`
Upgrade to Celluloid 0.14, remove defer usage, fixes #919 2013-05-11 16:12:01 -07:00
Update Celluloid API usage, use .async, fixes #500 2012-11-03 19:56:06 -07:00			`@boss.async.processor_done(current_actor)`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 13:32:51 -08:00			`end`
Work around Celluloid issue #22 causing hung tests 2012-01-29 14:35:16 -08:00
Modern celluloid now stable for testing purposes, yay! 2013-06-07 22:15:13 -07:00			`def inspect`
			`"<Processor##{object_id.to_s(16)}>"`
			`end`

extracts worker.perform so call can be enhanced - defines execute_job as an API hook 2014-09-09 19:57:39 -06:00			`def execute_job(worker, cloned_args)`
			`worker.perform(*cloned_args)`
			`end`

Don't override to_s 2013-03-26 22:55:07 -07:00			`private`

Initial pass at heartbeat, still failing tests 2014-03-02 16:36:00 -08:00			`def thread_identity`
Busy page should show proper TID and JID 2014-03-19 20:12:12 -07:00			`@str \|\|= Thread.current.object_id.to_s(36)`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`

			`def stats(worker, msg, queue)`
merge master 2014-02-24 20:06:48 -08:00			`# Do not conflate errors from the job with errors caused by updating`
			`# stats so calling code can react appropriately`
Retry all exceptions. 2014-02-24 16:08:57 -05:00			`retry_and_suppress_exceptions do`
merge master 2014-02-24 20:06:48 -08:00			`hash = Sidekiq.dump_json({:queue => queue, :payload => msg, :run_at => Time.now.to_i })`
Rework process/worker data model. We no longer have a global 'workers' set but rather a global 'processes' set. Each process has its own workers hash, keyed by thread id. Rely as much as possible on Redis key expiration to naturally prune any lingering data. Process data only has a one minute TTL, with the heartbeat refreshing the TTL, so it will expire quickly after a process dies. 2014-03-07 22:41:10 -08:00			`Sidekiq.redis do \|conn\|`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`conn.multi do`
Busy page should show proper TID and JID 2014-03-19 20:12:12 -07:00			`conn.hmset("#{identity}:workers", thread_identity, hash)`
Length workers TTL to avoid disappearing jobs, #1612 2014-06-11 11:17:40 -07:00			`conn.expire("#{identity}:workers", 60604)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`end`
Rework redis connections so that the manager and the client use separate pools. This is so the Rails app Sidekiq::Client and Sidekiq::Manager can use different configurations. Also, fix issue where workers were not unregistered in Redis upon shutdown. 2012-02-11 13:14:03 -08:00			`end`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`

Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`nowdate = Time.now.utc.strftime("%Y-%m-%d".freeze)`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`begin`
			`yield`
Remove unnecessary resque stat, #189 2012-05-12 13:23:23 -07:00			`rescue Exception`
Retry all exceptions. 2014-02-24 16:08:57 -05:00			`retry_and_suppress_exceptions do`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`failed = "stat:failed:#{nowdate}"`
Rework process/worker data model. We no longer have a global 'workers' set but rather a global 'processes' set. Each process has its own workers hash, keyed by thread id. Rely as much as possible on Redis key expiration to naturally prune any lingering data. Process data only has a one minute TTL, with the heartbeat refreshing the TTL, so it will expire quickly after a process dies. 2014-03-07 22:41:10 -08:00			`Sidekiq.redis do \|conn\|`
Refactor redis usage to avoid nil result from multi, see redis/redis-rb#490 2014-12-18 09:15:52 -08:00			`conn.multi do`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`conn.incrby("stat:failed".freeze, 1)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`conn.incrby(failed, 1)`
Refactor redis usage to avoid nil result from multi, see redis/redis-rb#490 2014-12-18 09:15:52 -08:00			`conn.expire(failed, STATS_TIMEOUT)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`end`
Rework redis connections so that the manager and the client use separate pools. This is so the Rails app Sidekiq::Client and Sidekiq::Manager can use different configurations. Also, fix issue where workers were not unregistered in Redis upon shutdown. 2012-02-11 13:14:03 -08:00			`end`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`
			`raise`
			`ensure`
Retry all exceptions. 2014-02-24 16:08:57 -05:00			`retry_and_suppress_exceptions do`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`processed = "stat:processed:#{nowdate}"`
Rework process/worker data model. We no longer have a global 'workers' set but rather a global 'processes' set. Each process has its own workers hash, keyed by thread id. Rely as much as possible on Redis key expiration to naturally prune any lingering data. Process data only has a one minute TTL, with the heartbeat refreshing the TTL, so it will expire quickly after a process dies. 2014-03-07 22:41:10 -08:00			`Sidekiq.redis do \|conn\|`
Refactor redis usage to avoid nil result from multi, see redis/redis-rb#490 2014-12-18 09:15:52 -08:00			`conn.multi do`
Busy page should show proper TID and JID 2014-03-19 20:12:12 -07:00			`conn.hdel("#{identity}:workers", thread_identity)`
Hardcode date format so we don't rely on global state, fixes #2559 2015-09-21 14:58:18 -07:00			`conn.incrby("stat:processed".freeze, 1)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`conn.incrby(processed, 1)`
Refactor redis usage to avoid nil result from multi, see redis/redis-rb#490 2014-12-18 09:15:52 -08:00			`conn.expire(processed, STATS_TIMEOUT)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`end`
Rework redis connections so that the manager and the client use separate pools. This is so the Rails app Sidekiq::Client and Sidekiq::Manager can use different configurations. Also, fix issue where workers were not unregistered in Redis upon shutdown. 2012-02-11 13:14:03 -08:00			`end`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`
			`end`
Clone arguments before calling workers to avoid mutation [#265] 2012-08-04 12:11:46 -07:00			`end`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00
Marshal arguments to avoid worker mutation, fixes #1452 2014-01-27 20:29:19 -08:00			`# Deep clone the arguments passed to the worker so that if`
Clone arguments before calling workers to avoid mutation [#265] 2012-08-04 12:11:46 -07:00			`# the message fails, what is pushed back onto Redis hasn't`
			`# been mutated by the worker.`
			`def cloned(ary)`
Marshal arguments to avoid worker mutation, fixes #1452 2014-01-27 20:29:19 -08:00			`Marshal.load(Marshal.dump(ary))`
First pass at Resque-compatible processing stats 2012-02-10 23:16:12 -08:00			`end`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00
Update comment. 2014-02-24 16:10:07 -05:00			`# If an exception occurs in the block passed to this method, that block will be retried up to max_retries times.`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`# All exceptions will be swallowed and logged.`
increase method retry_and_suppress_exceptions default retries to 5 2015-05-19 19:08:54 -07:00			`def retry_and_suppress_exceptions(max_retries = 5)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry_count = 0`
			`begin`
			`yield`
Sleep before retrying. 2014-02-24 16:10:58 -05:00			`rescue => e`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry_count += 1`
			`if retry_count <= max_retries`
Retry all exceptions. 2014-02-24 16:08:57 -05:00			`Sidekiq.logger.debug {"Suppressing and retrying error: #{e.inspect}"}`
rename method 2015-05-20 10:31:16 -07:00			`pause_for_recovery(retry_count)`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`retry`
			`else`
Add global error handler feature 2014-02-24 20:47:44 -08:00			`handle_exception(e, { :message => "Exhausted #{max_retries} retries"})`
Swallow (and attempt to retry) Redis timeout errors when updating stats. This is so code which calls #stats does not conflate job failures with stats failures. 2014-02-24 14:58:35 -05:00			`end`
			`end`
			`end`
created a method for sleep with retry count for monkey patching 2015-05-19 15:05:09 -07:00
rename method 2015-05-20 10:31:16 -07:00			`def pause_for_recovery(retry_count)`
created a method for sleep with retry count for monkey patching 2015-05-19 15:05:09 -07:00			`sleep(retry_count)`
			`end`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 13:32:51 -08:00			`end`
			`end`