2015-12-31 18:33:35 -05:00
|
|
|
# frozen_string_literal: true
|
2019-04-01 12:20:41 -04:00
|
|
|
|
|
|
|
require "sidekiq"
|
|
|
|
require "sidekiq/util"
|
|
|
|
require "sidekiq/api"
|
2012-05-25 23:21:42 -04:00
|
|
|
|
|
|
|
module Sidekiq
|
|
|
|
module Scheduled
|
2019-04-01 12:20:41 -04:00
|
|
|
SETS = %w[retry schedule]
|
2012-05-25 23:21:42 -04:00
|
|
|
|
2015-01-27 18:07:56 -05:00
|
|
|
class Enq
|
2021-11-05 12:19:25 -04:00
|
|
|
LUA_ZPOPBYSCORE = <<~LUA
|
|
|
|
local key, now = KEYS[1], ARGV[1]
|
|
|
|
local jobs = redis.call("zrangebyscore", key, "-inf", now, "limit", 0, 1)
|
|
|
|
if jobs[1] then
|
|
|
|
redis.call("zrem", key, jobs[1])
|
|
|
|
return jobs[1]
|
|
|
|
end
|
|
|
|
LUA
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@lua_zpopbyscore_sha = nil
|
|
|
|
end
|
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
def enqueue_jobs(now = Time.now.to_f.to_s, sorted_sets = SETS)
|
2015-01-27 18:07:56 -05:00
|
|
|
# A job's "score" in Redis is the time at which it should be processed.
|
|
|
|
# Just check Redis for the set of jobs with a timestamp before now.
|
|
|
|
Sidekiq.redis do |conn|
|
|
|
|
sorted_sets.each do |sorted_set|
|
2021-11-05 12:19:25 -04:00
|
|
|
# Get next item in the queue with score (time to execute) <= now.
|
|
|
|
# We need to go through the list one at a time to reduce the risk of something
|
|
|
|
# going wrong between the time jobs are popped from the scheduled queue and when
|
|
|
|
# they are pushed onto a work queue and losing the jobs.
|
|
|
|
while (job = zpopbyscore(conn, keys: [sorted_set], argv: [now]))
|
|
|
|
Sidekiq::Client.push(Sidekiq.load_json(job))
|
|
|
|
Sidekiq.logger.debug { "enqueued #{sorted_set}: #{job}" }
|
2015-01-27 18:07:56 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2021-11-05 12:19:25 -04:00
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def zpopbyscore(conn, keys: nil, argv: nil)
|
|
|
|
@lua_zpopbyscore_sha = conn.script(:load, LUA_ZPOPBYSCORE) if @lua_zpopbyscore_sha.nil?
|
|
|
|
|
|
|
|
conn.evalsha(@lua_zpopbyscore_sha, keys: keys, argv: argv)
|
|
|
|
rescue Redis::CommandError => e
|
|
|
|
raise unless e.message.start_with?("NOSCRIPT")
|
|
|
|
|
|
|
|
@lua_zpopbyscore_sha = nil
|
|
|
|
retry
|
|
|
|
end
|
2015-01-27 18:07:56 -05:00
|
|
|
end
|
2012-05-25 23:21:42 -04:00
|
|
|
|
|
|
|
##
|
2014-10-06 11:53:25 -04:00
|
|
|
# The Poller checks Redis every N seconds for jobs in the retry or scheduled
|
2012-05-25 23:21:42 -04:00
|
|
|
# set have passed their timestamp and should be enqueued. If so, it
|
2014-10-06 11:53:25 -04:00
|
|
|
# just pops the job back onto its original queue so the
|
|
|
|
# workers can pick it up like any other job.
|
2012-05-25 23:21:42 -04:00
|
|
|
class Poller
|
2013-05-10 23:43:53 -04:00
|
|
|
include Util
|
2012-05-25 23:21:42 -04:00
|
|
|
|
2015-01-27 18:07:56 -05:00
|
|
|
INITIAL_WAIT = 10
|
|
|
|
|
|
|
|
def initialize
|
2015-01-29 14:39:41 -05:00
|
|
|
@enq = (Sidekiq.options[:scheduled_enq] || Sidekiq::Scheduled::Enq).new
|
2015-10-09 18:33:42 -04:00
|
|
|
@sleeper = ConnectionPool::TimedStack.new
|
2015-10-03 00:07:09 -04:00
|
|
|
@done = false
|
2016-08-30 16:29:03 -04:00
|
|
|
@thread = nil
|
2021-05-03 16:12:51 -04:00
|
|
|
@count_calls = 0
|
2015-01-27 18:07:56 -05:00
|
|
|
end
|
2012-05-25 23:21:42 -04:00
|
|
|
|
2015-10-09 18:33:42 -04:00
|
|
|
# Shut down this instance, will pause until the thread is dead.
|
2015-10-03 00:07:09 -04:00
|
|
|
def terminate
|
|
|
|
@done = true
|
|
|
|
if @thread
|
|
|
|
t = @thread
|
|
|
|
@thread = nil
|
2015-10-09 18:33:42 -04:00
|
|
|
@sleeper << 0
|
2015-10-03 00:07:09 -04:00
|
|
|
t.value
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def start
|
2019-04-01 12:20:41 -04:00
|
|
|
@thread ||= safe_thread("scheduler") {
|
2015-10-06 17:45:10 -04:00
|
|
|
initial_wait
|
2015-10-03 00:07:09 -04:00
|
|
|
|
2019-04-01 12:20:41 -04:00
|
|
|
until @done
|
2015-10-03 00:07:09 -04:00
|
|
|
enqueue
|
2015-10-06 17:45:10 -04:00
|
|
|
wait
|
2012-05-25 23:21:42 -04:00
|
|
|
end
|
2015-10-06 17:45:10 -04:00
|
|
|
Sidekiq.logger.info("Scheduler exiting...")
|
2019-04-01 12:20:41 -04:00
|
|
|
}
|
2015-10-03 00:07:09 -04:00
|
|
|
end
|
2012-05-25 23:21:42 -04:00
|
|
|
|
2015-10-03 00:07:09 -04:00
|
|
|
def enqueue
|
2019-04-01 12:20:41 -04:00
|
|
|
@enq.enqueue_jobs
|
|
|
|
rescue => ex
|
|
|
|
# Most likely a problem with redis networking.
|
|
|
|
# Punt and try again at the next interval
|
|
|
|
logger.error ex.message
|
|
|
|
handle_exception(ex)
|
2012-05-25 23:21:42 -04:00
|
|
|
end
|
|
|
|
|
2012-06-14 11:36:00 -04:00
|
|
|
private
|
|
|
|
|
2015-10-06 17:45:10 -04:00
|
|
|
def wait
|
2015-10-09 18:33:42 -04:00
|
|
|
@sleeper.pop(random_poll_interval)
|
2015-10-06 17:45:10 -04:00
|
|
|
rescue Timeout::Error
|
2016-03-04 12:25:13 -05:00
|
|
|
# expected
|
|
|
|
rescue => ex
|
|
|
|
# if poll_interval_average hasn't been calculated yet, we can
|
|
|
|
# raise an error trying to reach Redis.
|
|
|
|
logger.error ex.message
|
2017-11-30 13:12:39 -05:00
|
|
|
handle_exception(ex)
|
2016-03-04 12:25:13 -05:00
|
|
|
sleep 5
|
2015-10-06 17:45:10 -04:00
|
|
|
end
|
|
|
|
|
2015-04-24 12:00:58 -04:00
|
|
|
def random_poll_interval
|
2018-07-18 13:02:38 -04:00
|
|
|
# We want one Sidekiq process to schedule jobs every N seconds. We have M processes
|
|
|
|
# and **don't** want to coordinate.
|
|
|
|
#
|
|
|
|
# So in N*M second timespan, we want each process to schedule once. The basic loop is:
|
|
|
|
#
|
2018-07-20 13:46:29 -04:00
|
|
|
# * sleep a random amount within that N*M timespan
|
|
|
|
# * wake up and schedule
|
2018-07-18 13:02:38 -04:00
|
|
|
#
|
2018-07-20 13:46:29 -04:00
|
|
|
# We want to avoid one edge case: imagine a set of 2 processes, scheduling every 5 seconds,
|
|
|
|
# so N*M = 10. Each process decides to randomly sleep 8 seconds, now we've failed to meet
|
|
|
|
# that 5 second average. Thankfully each schedule cycle will sleep randomly so the next
|
|
|
|
# iteration could see each process sleep for 1 second, undercutting our average.
|
2018-07-18 13:02:38 -04:00
|
|
|
#
|
|
|
|
# So below 10 processes, we special case and ensure the processes sleep closer to the average.
|
2018-07-20 13:46:29 -04:00
|
|
|
# In the example above, each process should schedule every 10 seconds on average. We special
|
|
|
|
# case smaller clusters to add 50% so they would sleep somewhere between 5 and 15 seconds.
|
|
|
|
# As we run more processes, the scheduling interval average will approach an even spread
|
|
|
|
# between 0 and poll interval so we don't need this artifical boost.
|
2018-07-18 13:02:38 -04:00
|
|
|
#
|
|
|
|
if process_count < 10
|
2018-07-20 13:46:29 -04:00
|
|
|
# For small clusters, calculate a random interval that is ±50% the desired average.
|
2018-07-18 13:02:38 -04:00
|
|
|
poll_interval_average * rand + poll_interval_average.to_f / 2
|
|
|
|
else
|
|
|
|
# With 10+ processes, we should have enough randomness to get decent polling
|
|
|
|
# across the entire timespan
|
2018-07-20 13:46:29 -04:00
|
|
|
poll_interval_average * rand
|
2018-07-18 13:02:38 -04:00
|
|
|
end
|
2015-04-24 12:00:58 -04:00
|
|
|
end
|
|
|
|
|
2015-05-04 13:52:40 -04:00
|
|
|
# We do our best to tune the poll interval to the size of the active Sidekiq
|
2014-06-07 18:12:31 -04:00
|
|
|
# cluster. If you have 30 processes and poll every 15 seconds, that means one
|
|
|
|
# Sidekiq is checking Redis every 0.5 seconds - way too often for most people
|
|
|
|
# and really bad if the retry or scheduled sets are large.
|
|
|
|
#
|
|
|
|
# Instead try to avoid polling more than once every 15 seconds. If you have
|
2015-05-04 13:52:40 -04:00
|
|
|
# 30 Sidekiq processes, we'll poll every 30 * 15 or 450 seconds.
|
2014-06-07 18:12:31 -04:00
|
|
|
# To keep things statistically random, we'll sleep a random amount between
|
2015-04-24 12:00:58 -04:00
|
|
|
# 225 and 675 seconds for each poll or 450 seconds on average. Otherwise restarting
|
2014-06-07 18:12:31 -04:00
|
|
|
# all your Sidekiq processes at the same time will lead to them all polling at
|
|
|
|
# the same time: the thundering herd problem.
|
|
|
|
#
|
2015-10-14 19:26:04 -04:00
|
|
|
# We only do this if poll_interval_average is unset (the default).
|
2015-04-24 11:35:47 -04:00
|
|
|
def poll_interval_average
|
|
|
|
Sidekiq.options[:poll_interval_average] ||= scaled_poll_interval
|
|
|
|
end
|
|
|
|
|
|
|
|
# Calculates an average poll interval based on the number of known Sidekiq processes.
|
|
|
|
# This minimizes a single point of failure by dispersing check-ins but without taxing
|
|
|
|
# Redis if you run many Sidekiq processes.
|
|
|
|
def scaled_poll_interval
|
2018-07-18 13:02:38 -04:00
|
|
|
process_count * Sidekiq.options[:average_scheduled_poll_interval]
|
|
|
|
end
|
|
|
|
|
|
|
|
def process_count
|
2021-05-03 16:12:51 -04:00
|
|
|
# The work buried within Sidekiq::ProcessSet#cleanup can be
|
|
|
|
# expensive at scale. Cut it down by 90% with this counter.
|
|
|
|
# NB: This method is only called by the scheduler thread so we
|
|
|
|
# don't need to worry about the thread safety of +=.
|
|
|
|
pcount = Sidekiq::ProcessSet.new(@count_calls % 10 == 0).size
|
2015-04-24 11:35:47 -04:00
|
|
|
pcount = 1 if pcount == 0
|
2021-05-03 16:12:51 -04:00
|
|
|
@count_calls += 1
|
2018-07-18 13:02:38 -04:00
|
|
|
pcount
|
2012-06-19 16:39:20 -04:00
|
|
|
end
|
|
|
|
|
2014-06-07 18:12:31 -04:00
|
|
|
def initial_wait
|
2015-10-03 00:07:09 -04:00
|
|
|
# Have all processes sleep between 5-15 seconds. 10 seconds
|
|
|
|
# to give time for the heartbeat to register (if the poll interval is going to be calculated by the number
|
|
|
|
# of workers), and 5 random seconds to ensure they don't all hit Redis at the same time.
|
|
|
|
total = 0
|
|
|
|
total += INITIAL_WAIT unless Sidekiq.options[:poll_interval_average]
|
|
|
|
total += (5 * rand)
|
2015-10-06 17:45:10 -04:00
|
|
|
|
2015-10-09 18:33:42 -04:00
|
|
|
@sleeper.pop(total)
|
2015-10-06 17:45:10 -04:00
|
|
|
rescue Timeout::Error
|
2012-06-14 11:36:00 -04:00
|
|
|
end
|
2012-05-25 23:21:42 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|