mperham--sidekiq/lib/sidekiq/manager.rb

require 'celluloid'

require 'sidekiq/util'
require 'sidekiq/processor'
require 'sidekiq/fetch'

module Sidekiq

  ##
  # The main router in the system.  This
  # manages the processor state and accepts messages
  # from Redis to be dispatched to an idle processor.
  #
  class Manager
    include Util
    include Celluloid

    trap_exit :processor_died

    def initialize(options={})
      logger.debug { options.inspect }
      @count = options[:concurrency] || 25
      @done_callback = nil

      @in_progress = {}
      @done = false
      @busy = []
      @fetcher = Fetcher.new(current_actor, options)
      @ready = @count.times.map { Processor.new_link(current_actor) }
    end

    def stop(options={})
      watchdog('Manager#stop died') do
        shutdown = options[:shutdown]
        timeout = options[:timeout]

        @done = true
        Sidekiq::Fetcher.done!
        @fetcher.async.terminate if @fetcher.alive?

        logger.info { "Shutting down #{@ready.size} quiet workers" }
        @ready.each { |x| x.terminate if x.alive? }
        @ready.clear

        clear_worker_set

        return after(0) { signal(:shutdown) } if @busy.empty?
        hard_shutdown_in timeout if shutdown
      end
    end

    def start
      @ready.each { dispatch }
    end

    def when_done(&blk)
      @done_callback = blk
    end

    def processor_done(processor)
      watchdog('Manager#processor_done died') do
        @done_callback.call(processor) if @done_callback
        @in_progress.delete(processor.object_id)
        @busy.delete(processor)
        if stopped?
          processor.terminate if processor.alive?
          signal(:shutdown) if @busy.empty?
        else
          @ready << processor if processor.alive?
        end
        dispatch
      end
    end

    def processor_died(processor, reason)
      watchdog("Manager#processor_died died") do
        @in_progress.delete(processor.object_id)
        @busy.delete(processor)

        unless stopped?
          @ready << Processor.new_link(current_actor)
          dispatch
        else
          signal(:shutdown) if @busy.empty?
        end
      end
    end

    def assign(work)
      watchdog("Manager#assign died") do
        if stopped?
          # Race condition between Manager#stop if Fetcher
          # is blocked on redis and gets a message after
          # all the ready Processors have been stopped.
          # Push the message back to redis.
          work.requeue
        else
          processor = @ready.pop
          @in_progress[processor.object_id] = work
          @busy << processor
          processor.async.process(work)
        end
      end
    end

    def procline(tag)
      "sidekiq #{Sidekiq::VERSION} #{tag}[#{@busy.size} of #{@count} busy]#{stopped? ? ' stopping' : ''}"
    end

    private

    def clear_worker_set
      # Clearing workers in Redis
      # NOTE: we do this before terminating worker threads because the
      # process will likely receive a hard shutdown soon anyway, which
      # means the threads will killed.
      logger.debug { "Clearing workers in redis" }
      Sidekiq.redis do |conn|
        workers = conn.smembers('workers')
        workers_to_remove = workers.select do |worker_name|
          worker_name =~ /:#{process_id}-/
        end
        conn.srem('workers', workers_to_remove) if !workers_to_remove.empty?
      end
    rescue => ex
      Sidekiq.logger.warn("Unable to clear worker set while shutting down: #{ex.message}")
    end

    def hard_shutdown_in(delay)
      logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }

      after(delay) do
        watchdog("Manager#hard_shutdown_in died") do
          # We've reached the timeout and we still have busy workers.
          # They must die but their messages shall live on.
          logger.info("Still waiting for #{@busy.size} busy workers")

          # Re-enqueue terminated jobs
          # NOTE: You may notice that we may push a job back to redis before
          # the worker thread is terminated. This is ok because Sidekiq's
          # contract says that jobs are run AT LEAST once. Process termination
          # is delayed until we're certain the jobs are back in Redis because
          # it is worse to lose a job than to run it twice.
          Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values)

          logger.debug { "Terminating worker threads" }
          @busy.each do |processor|
            t = processor.bare_object.actual_work_thread
            t.raise Shutdown if processor.alive?
          end

          after(0) { signal(:shutdown) }
        end
      end
    end

    def dispatch
      return if stopped?
      # This is a safety check to ensure we haven't leaked
      # processors somehow.
      raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
      raise "No ready processor!?" if @ready.empty?

      @fetcher.async.fetch
    end

    def stopped?
      @done
    end
  end
end
Server starts up now! 2012-01-22 19:01:46 -05:00			`require 'celluloid'`

Holy crap, it boots Rails3 and actually sends messages to the workers! 2012-01-23 17:05:03 -05:00			`require 'sidekiq/util'`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`require 'sidekiq/processor'`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`require 'sidekiq/fetch'`
Server starts up now! 2012-01-22 19:01:46 -05:00
Misc 2012-01-16 19:14:47 -05:00			`module Sidekiq`

			`##`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`# The main router in the system. This`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`# manages the processor state and accepts messages`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`# from Redis to be dispatched to an idle processor.`
Misc 2012-01-16 19:14:47 -05:00			`#`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`class Manager`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`include Util`
Add celluloid 2012-01-16 23:02:58 -05:00			`include Celluloid`
get it working 2012-01-16 19:18:36 -05:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`trap_exit :processor_died`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Rework redis connections so that the manager and the client use separate pools. This is so the Rails app Sidekiq::Client and Sidekiq::Manager can use different configurations. Also, fix issue where workers were not unregistered in Redis upon shutdown. 2012-02-11 16:14:03 -05:00			`def initialize(options={})`
Update Sidekiq logging to use standard Ruby logger 2012-02-14 12:00:26 -05:00			`logger.debug { options.inspect }`
Standardize on concurrency, not processor_count 2012-02-16 12:45:55 -05:00			`@count = options[:concurrency] \|\| 25`
Last minute fixes 2012-02-05 16:22:57 -05:00			`@done_callback = nil`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress = {}`
Server starts up now! 2012-01-22 19:01:46 -05:00			`@done = false`
			`@busy = []`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`@fetcher = Fetcher.new(current_actor, options)`
performance improvements for dispatcher; addresses issue #13 2012-02-11 02:19:05 -05:00			`@ready = @count.times.map { Processor.new_link(current_actor) }`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

Implement USR1 - stop accepting new work, GH-69 2012-03-08 23:58:51 -05:00			`def stop(options={})`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`watchdog('Manager#stop died') do`
			`shutdown = options[:shutdown]`
			`timeout = options[:timeout]`
Implement USR1 - stop accepting new work, GH-69 2012-03-08 23:58:51 -05:00
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@done = true`
Bloody hack to shutdown the Fetcher quickly, since Celluloid's terminate is not working as I expect 2012-05-12 00:25:38 -04:00			`Sidekiq::Fetcher.done!`
Update Celluloid API usage, use .async, fixes #500 2012-11-03 22:56:06 -04:00			`@fetcher.async.terminate if @fetcher.alive?`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00
			`logger.info { "Shutting down #{@ready.size} quiet workers" }`
Now works against Celluloid HEAD 2012-03-31 00:22:19 -04:00			`@ready.each { \|x\| x.terminate if x.alive? }`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@ready.clear`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
clear worker set upon clean shutdown, fixes #840 2013-04-10 12:02:49 -04:00			`clear_worker_set`

Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`return after(0) { signal(:shutdown) } if @busy.empty?`
polish 2012-04-07 22:33:32 -04:00			`hard_shutdown_in timeout if shutdown`
Speed up shutdown, only pause 5 seconds if necessary 2012-02-18 00:33:17 -05:00			`end`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

			`def start`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@ready.each { dispatch }`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`
get it working 2012-01-16 19:18:36 -05:00
make when_done callback setting a little more idiomatic 2012-02-09 11:15:31 -05:00			`def when_done(&blk)`
			`@done_callback = blk`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`end`

Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`def processor_done(processor)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog('Manager#processor_done died') do`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`@done_callback.call(processor) if @done_callback`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress.delete(processor.object_id)`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`@busy.delete(processor)`
			`if stopped?`
Test busy actor termination 2012-02-18 23:01:29 -05:00			`processor.terminate if processor.alive?`
If possible, shutdown asap rather than waiting the full timeout #69 2012-03-11 23:06:20 -04:00			`signal(:shutdown) if @busy.empty?`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`else`
More actor safety checks, #53 2012-03-12 22:57:04 -04:00			`@ready << processor if processor.alive?`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`end`
			`dispatch`
Add celluloid 2012-01-16 23:02:58 -05:00			`end`
get it working 2012-01-16 19:18:36 -05:00			`end`
Add celluloid 2012-01-16 23:02:58 -05:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`def processor_died(processor, reason)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog("Manager#processor_died died") do`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress.delete(processor.object_id)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`@busy.delete(processor)`
Holy crap, it boots Rails3 and actually sends messages to the workers! 2012-01-23 17:05:03 -05:00
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`unless stopped?`
			`@ready << Processor.new_link(current_actor)`
			`dispatch`
			`else`
			`signal(:shutdown) if @busy.empty?`
			`end`
Server starts up now! 2012-01-22 19:01:46 -05:00			`end`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`def assign(work)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog("Manager#assign died") do`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`if stopped?`
			`# Race condition between Manager#stop if Fetcher`
			`# is blocked on redis and gets a message after`
			`# all the ready Processors have been stopped.`
			`# Push the message back to redis.`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`work.requeue`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`else`
			`processor = @ready.pop`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`@in_progress[processor.object_id] = work`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`@busy << processor`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`processor.async.process(work)`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`end`
Message dispatch loop now working 2012-01-24 01:08:38 -05:00			`end`
			`end`

Extract procline feature from Manager to Launcher. We don't want to touch the process name if we're integrating Sidekiq into existing process, so better not to put procline assignment in Manager, but in CLI where we launch standalone Sidekiq process. 2013-01-29 11:43:44 -05:00			`def procline(tag)`
			`"sidekiq #{Sidekiq::VERSION} #{tag}[#{@busy.size} of #{@count} busy]#{stopped? ? ' stopping' : ''}"`
			`end`

Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`private`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
clear worker set upon clean shutdown, fixes #840 2013-04-10 12:02:49 -04:00			`def clear_worker_set`
			`# Clearing workers in Redis`
			`# NOTE: we do this before terminating worker threads because the`
			`# process will likely receive a hard shutdown soon anyway, which`
			`# means the threads will killed.`
			`logger.debug { "Clearing workers in redis" }`
			`Sidekiq.redis do \|conn\|`
			`workers = conn.smembers('workers')`
			`workers_to_remove = workers.select do \|worker_name\|`
			`worker_name =~ /:#{process_id}-/`
			`end`
			`conn.srem('workers', workers_to_remove) if !workers_to_remove.empty?`
			`end`
Sidekiq will now shut down successfully if Redis is down. 2013-05-10 20:19:23 -04:00			`rescue => ex`
			`Sidekiq.logger.warn("Unable to clear worker set while shutting down: #{ex.message}")`
clear worker set upon clean shutdown, fixes #840 2013-04-10 12:02:49 -04:00			`end`

Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`def hard_shutdown_in(delay)`
only log if necessary 2013-04-04 23:01:16 -04:00			`logger.info { "Pausing up to #{delay} seconds to allow workers to finish..." }`

polish 2012-04-07 22:33:32 -04:00			`after(delay) do`
fix wrong method name 2013-03-27 01:57:12 -04:00			`watchdog("Manager#hard_shutdown_in died") do`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`# We've reached the timeout and we still have busy workers.`
			`# They must die but their messages shall live on.`
			`logger.info("Still waiting for #{@busy.size} busy workers")`

Move bulk requeue into the fetch strategy, #624 2013-01-17 00:48:21 -05:00			`# Re-enqueue terminated jobs`
			`# NOTE: You may notice that we may push a job back to redis before`
			`# the worker thread is terminated. This is ok because Sidekiq's`
			`# contract says that jobs are run AT LEAST once. Process termination`
			`# is delayed until we're certain the jobs are back in Redis because`
			`# it is worse to lose a job than to run it twice.`
Fix unset fetch strategy crashing requeue, #680 2013-02-05 18:11:41 -05:00			`Sidekiq::Fetcher.strategy.bulk_requeue(@in_progress.values)`
Move bulk requeue into the fetch strategy, #624 2013-01-17 00:48:21 -05:00
			`logger.debug { "Terminating worker threads" }`
			`@busy.each do \|processor\|`
Raise within any workers which haven't finished within the hard timeout, fixes #377 2013-03-27 01:56:49 -04:00			`t = processor.bare_object.actual_work_thread`
			`t.raise Shutdown if processor.alive?`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`end`

			`after(0) { signal(:shutdown) }`
			`end`
			`end`
			`end`

Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`def dispatch`
			`return if stopped?`
			`# This is a safety check to ensure we haven't leaked`
			`# processors somehow.`
			`raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`raise "No ready processor!?" if @ready.empty?`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00
Update Celluloid API usage, use .async, fixes #500 2012-11-03 22:56:06 -04:00			`@fetcher.async.fetch`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

			`def stopped?`
			`@done`
			`end`
Misc 2012-01-16 19:14:47 -05:00			`end`
			`end`