mperham--sidekiq/lib/sidekiq/manager.rb

require 'celluloid'

require 'sidekiq/util'
require 'sidekiq/processor'
require 'sidekiq/fetch'

module Sidekiq

  ##
  # The main router in the system.  This
  # manages the processor state and accepts messages
  # from Redis to be dispatched to an idle processor.
  #
  class Manager
    include Util
    include Celluloid

    trap_exit :processor_died

    def initialize(options={})
      logger.debug { options.inspect }
      @count = options[:concurrency] || 25
      @done_callback = nil

      @in_progress = {}
      @done = false
      @busy = []
      @fetcher = Fetcher.new(current_actor, options)
      @ready = @count.times.map { Processor.new_link(current_actor) }
      procline(options[:tag] ? "#{options[:tag]} " : '')
    end

    def stop(options={})
      watchdog('Manager#stop died') do
        shutdown = options[:shutdown]
        timeout = options[:timeout]

        @done = true
        Sidekiq::Fetcher.done!
        @fetcher.async.terminate if @fetcher.alive?

        logger.info { "Shutting down #{@ready.size} quiet workers" }
        @ready.each { |x| x.terminate if x.alive? }
        @ready.clear

        return after(0) { signal(:shutdown) } if @busy.empty?
        logger.info { "Pausing up to #{timeout} seconds to allow workers to finish..." }
        hard_shutdown_in timeout if shutdown
      end
    end

    def start
      @ready.each { dispatch }
    end

    def when_done(&blk)
      @done_callback = blk
    end

    def processor_done(processor)
      watchdog('Manager#processor_done died') do
        @done_callback.call(processor) if @done_callback
        @in_progress.delete(processor.object_id)
        @busy.delete(processor)
        if stopped?
          processor.terminate if processor.alive?
          signal(:shutdown) if @busy.empty?
        else
          @ready << processor if processor.alive?
        end
        dispatch
      end
    end

    def processor_died(processor, reason)
      watchdog("Manager#processor_died died") do
        @in_progress.delete(processor.object_id)
        @busy.delete(processor)

        unless stopped?
          @ready << Processor.new_link(current_actor)
          dispatch
        else
          signal(:shutdown) if @busy.empty?
        end
      end
    end

    def assign(work)
      watchdog("Manager#assign died") do
        if stopped?
          # Race condition between Manager#stop if Fetcher
          # is blocked on redis and gets a message after
          # all the ready Processors have been stopped.
          # Push the message back to redis.
          work.requeue
        else
          processor = @ready.pop
          @in_progress[processor.object_id] = work
          @busy << processor
          processor.async.process(work)
        end
      end
    end

    private

    def hard_shutdown_in(delay)
      after(delay) do
        watchdog("Manager#watch_for_shutdown died") do
          # We've reached the timeout and we still have busy workers.
          # They must die but their messages shall live on.
          logger.info("Still waiting for #{@busy.size} busy workers")

          Sidekiq.redis do |conn|
            logger.debug { "Clearing workers in redis" }
            workers = conn.smembers('workers')
            workers_to_remove = workers.select do |worker_name|
              worker_name =~ /:#{process_id}-/
            end
            conn.srem('workers', workers_to_remove)

            # Re-enqueue terminated jobs
            # NOTE: You may notice that we may push a job back to redis before
            # the worker thread is terminated. This is ok because Sidekiq's
            # contract says that jobs are run AT LEAST once. Process termination
            # is delayed until we're certain the jobs are back in Redis because
            # it is worse to lose a job than to run it twice.
            jobs_to_requeue = {}
            @busy.each do |processor|
              # processor is an actor proxy and we can't call any methods
              # that would go to the actor (since it's busy).  Instead
              # we'll use the object_id to track the worker's data here.
              unit_of_work = @in_progress[processor.object_id]
              jobs_to_requeue[unit_of_work.queue] ||= []
              jobs_to_requeue[unit_of_work.queue] << unit_of_work.message
            end
            jobs_to_requeue.each do |queue, jobs|
              conn.rpush(queue, jobs)
            end

            # Lastly, terminate worker threads
            @busy.each do |processor|
              processor.terminate if processor.alive?
            end
          end
          logger.info("Pushed #{@busy.size} messages back to Redis")

          after(0) { signal(:shutdown) }
        end
      end
    end

    def dispatch
      return if stopped?
      # This is a safety check to ensure we haven't leaked
      # processors somehow.
      raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?
      raise "No ready processor!?" if @ready.empty?

      @fetcher.async.fetch
    end

    def stopped?
      @done
    end

    def procline(tag)
      $0 = "sidekiq #{Sidekiq::VERSION} #{tag}[#{@busy.size} of #{@count} busy]#{stopped? ? ' stopping' : ''}"
      after(5) { procline(tag) }
    end
  end
end
Server starts up now! 2012-01-22 19:01:46 -05:00			`require 'celluloid'`

Holy crap, it boots Rails3 and actually sends messages to the workers! 2012-01-23 17:05:03 -05:00			`require 'sidekiq/util'`
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`require 'sidekiq/processor'`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`require 'sidekiq/fetch'`
Server starts up now! 2012-01-22 19:01:46 -05:00
Misc 2012-01-16 19:14:47 -05:00			`module Sidekiq`

			`##`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`# The main router in the system. This`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`# manages the processor state and accepts messages`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`# from Redis to be dispatched to an idle processor.`
Misc 2012-01-16 19:14:47 -05:00			`#`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`class Manager`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`include Util`
Add celluloid 2012-01-16 23:02:58 -05:00			`include Celluloid`
get it working 2012-01-16 19:18:36 -05:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`trap_exit :processor_died`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Rework redis connections so that the manager and the client use separate pools. This is so the Rails app Sidekiq::Client and Sidekiq::Manager can use different configurations. Also, fix issue where workers were not unregistered in Redis upon shutdown. 2012-02-11 16:14:03 -05:00			`def initialize(options={})`
Update Sidekiq logging to use standard Ruby logger 2012-02-14 12:00:26 -05:00			`logger.debug { options.inspect }`
Standardize on concurrency, not processor_count 2012-02-16 12:45:55 -05:00			`@count = options[:concurrency] \|\| 25`
Last minute fixes 2012-02-05 16:22:57 -05:00			`@done_callback = nil`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress = {}`
Server starts up now! 2012-01-22 19:01:46 -05:00			`@done = false`
			`@busy = []`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`@fetcher = Fetcher.new(current_actor, options)`
performance improvements for dispatcher; addresses issue #13 2012-02-11 02:19:05 -05:00			`@ready = @count.times.map { Processor.new_link(current_actor) }`
Tag sidekiq process with Rails app name [#463] 2012-10-31 13:29:32 -04:00			`procline(options[:tag] ? "#{options[:tag]} " : '')`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

Implement USR1 - stop accepting new work, GH-69 2012-03-08 23:58:51 -05:00			`def stop(options={})`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`watchdog('Manager#stop died') do`
			`shutdown = options[:shutdown]`
			`timeout = options[:timeout]`
Implement USR1 - stop accepting new work, GH-69 2012-03-08 23:58:51 -05:00
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@done = true`
Bloody hack to shutdown the Fetcher quickly, since Celluloid's terminate is not working as I expect 2012-05-12 00:25:38 -04:00			`Sidekiq::Fetcher.done!`
Update Celluloid API usage, use .async, fixes #500 2012-11-03 22:56:06 -04:00			`@fetcher.async.terminate if @fetcher.alive?`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00
			`logger.info { "Shutting down #{@ready.size} quiet workers" }`
Now works against Celluloid HEAD 2012-03-31 00:22:19 -04:00			`@ready.each { \|x\| x.terminate if x.alive? }`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@ready.clear`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`return after(0) { signal(:shutdown) } if @busy.empty?`
			`logger.info { "Pausing up to #{timeout} seconds to allow workers to finish..." }`
polish 2012-04-07 22:33:32 -04:00			`hard_shutdown_in timeout if shutdown`
Speed up shutdown, only pause 5 seconds if necessary 2012-02-18 00:33:17 -05:00			`end`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

			`def start`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`@ready.each { dispatch }`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`
get it working 2012-01-16 19:18:36 -05:00
make when_done callback setting a little more idiomatic 2012-02-09 11:15:31 -05:00			`def when_done(&blk)`
			`@done_callback = blk`
Add full multithreaded integration test for manager 2012-02-03 13:02:57 -05:00			`end`

Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`def processor_done(processor)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog('Manager#processor_done died') do`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`@done_callback.call(processor) if @done_callback`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress.delete(processor.object_id)`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`@busy.delete(processor)`
			`if stopped?`
Test busy actor termination 2012-02-18 23:01:29 -05:00			`processor.terminate if processor.alive?`
If possible, shutdown asap rather than waiting the full timeout #69 2012-03-11 23:06:20 -04:00			`signal(:shutdown) if @busy.empty?`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`else`
More actor safety checks, #53 2012-03-12 22:57:04 -04:00			`@ready << processor if processor.alive?`
Add new sidekiq/testing feature. This makes job processing synchronous when required. 2012-02-10 00:46:44 -05:00			`end`
			`dispatch`
Add celluloid 2012-01-16 23:02:58 -05:00			`end`
get it working 2012-01-16 19:18:36 -05:00			`end`
Add celluloid 2012-01-16 23:02:58 -05:00
Rename workers to processors. New Railtie support. Workers are the user's classes, the threads are now called processors. Add secret sauce to make Rails config much easier. Use a railtie to auto-add app/workers to the autoload path. 2012-01-25 16:32:51 -05:00			`def processor_died(processor, reason)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog("Manager#processor_died died") do`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@in_progress.delete(processor.object_id)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`@busy.delete(processor)`
Holy crap, it boots Rails3 and actually sends messages to the workers! 2012-01-23 17:05:03 -05:00
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`unless stopped?`
			`@ready << Processor.new_link(current_actor)`
			`dispatch`
			`else`
			`signal(:shutdown) if @busy.empty?`
			`end`
Server starts up now! 2012-01-22 19:01:46 -05:00			`end`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`def assign(work)`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`watchdog("Manager#assign died") do`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`if stopped?`
			`# Race condition between Manager#stop if Fetcher`
			`# is blocked on redis and gets a message after`
			`# all the ready Processors have been stopped.`
			`# Push the message back to redis.`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`work.requeue`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`else`
			`processor = @ready.pop`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`@in_progress[processor.object_id] = work`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`@busy << processor`
Refactor fetch strategy, fix queueing to lpush and rpop. 2013-01-06 00:17:08 -05:00			`processor.async.process(work)`
Handle shutdown race condition, fixes #134 2012-04-16 23:18:48 -04:00			`end`
Message dispatch loop now working 2012-01-24 01:08:38 -05:00			`end`
			`end`

Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`private`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`def hard_shutdown_in(delay)`
polish 2012-04-07 22:33:32 -04:00			`after(delay) do`
			`watchdog("Manager#watch_for_shutdown died") do`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`# We've reached the timeout and we still have busy workers.`
			`# They must die but their messages shall live on.`
			`logger.info("Still waiting for #{@busy.size} busy workers")`

			`Sidekiq.redis do \|conn\|`
Don't clear workers set until actually shutting down, #492 2012-11-09 23:40:23 -05:00			`logger.debug { "Clearing workers in redis" }`
			`workers = conn.smembers('workers')`
Batch process worker removal for performance. #623 2013-01-16 16:26:26 -05:00			`workers_to_remove = workers.select do \|worker_name\|`
			`worker_name =~ /:#{process_id}-/`
Don't clear workers set until actually shutting down, #492 2012-11-09 23:40:23 -05:00			`end`
Batch process worker removal for performance. #623 2013-01-16 16:26:26 -05:00			`conn.srem('workers', workers_to_remove)`
Don't clear workers set until actually shutting down, #492 2012-11-09 23:40:23 -05:00
Improve efficiency and reliability of re-queueing when workers are terminated. #623 2013-01-16 16:48:51 -05:00			`# Re-enqueue terminated jobs`
			`# NOTE: You may notice that we may push a job back to redis before`
			`# the worker thread is terminated. This is ok because Sidekiq's`
			`# contract says that jobs are run AT LEAST once. Process termination`
			`# is delayed until we're certain the jobs are back in Redis because`
			`# it is worse to lose a job than to run it twice.`
			`jobs_to_requeue = {}`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`@busy.each do \|processor\|`
			`# processor is an actor proxy and we can't call any methods`
			`# that would go to the actor (since it's busy). Instead`
			`# we'll use the object_id to track the worker's data here.`
Fix #621 fix job re-queueing on hard shutdown (e.g. SIGTERM). 2013-01-16 14:08:46 -05:00			`unit_of_work = @in_progress[processor.object_id]`
Improve efficiency and reliability of re-queueing when workers are terminated. #623 2013-01-16 16:48:51 -05:00			`jobs_to_requeue[unit_of_work.queue] \|\|= []`
			`jobs_to_requeue[unit_of_work.queue] << unit_of_work.message`
			`end`
			`jobs_to_requeue.each do \|queue, jobs\|`
			`conn.rpush(queue, jobs)`
			`end`

			`# Lastly, terminate worker threads`
			`@busy.each do \|processor\|`
			`processor.terminate if processor.alive?`
Add hard shutdown with pushback to Redis, fixes #110 2012-04-06 23:53:03 -04:00			`end`
			`end`
			`logger.info("Pushed #{@busy.size} messages back to Redis")`

			`after(0) { signal(:shutdown) }`
			`end`
			`end`
			`end`

Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00			`def dispatch`
			`return if stopped?`
			`# This is a safety check to ensure we haven't leaked`
			`# processors somehow.`
			`raise "BUG: No processors, cannot continue!" if @ready.empty? && @busy.empty?`
Simpler fetch logic/design 2012-03-25 22:52:15 -04:00			`raise "No ready processor!?" if @ready.empty?`
Redesign message poll [WIP] Instead of using the manager to constantly poll the Redis server, use a dedicated Fetcher actor + BLPOP with a timeout. This should dramatically reduce Sidekiq's network chattiness. 2012-03-24 16:28:18 -04:00
Update Celluloid API usage, use .async, fixes #500 2012-11-03 22:56:06 -04:00			`@fetcher.async.fetch`
worker mgmt and msg dispatch 2012-01-22 14:32:38 -05:00			`end`

			`def stopped?`
			`@done`
			`end`
Set procline for easy monitoring with ps aux 2012-04-17 22:26:56 -04:00
Tag sidekiq process with Rails app name [#463] 2012-10-31 13:29:32 -04:00			`def procline(tag)`
			`$0 = "sidekiq #{Sidekiq::VERSION} #{tag}[#{@busy.size} of #{@count} busy]#{stopped? ? ' stopping' : ''}"`
			`after(5) { procline(tag) }`
Set procline for easy monitoring with ps aux 2012-04-17 22:26:56 -04:00			`end`
Misc 2012-01-16 19:14:47 -05:00			`end`
			`end`