2019-02-28 09:45:20 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Gitlab
|
|
|
|
module SidekiqMiddleware
|
|
|
|
class MemoryKiller
|
|
|
|
# Default the RSS limit to 0, meaning the MemoryKiller is disabled
|
|
|
|
MAX_RSS = (ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] || 0).to_s.to_i
|
|
|
|
# Give Sidekiq 15 minutes of grace time after exceeding the RSS limit
|
|
|
|
GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
|
|
|
|
# Wait 30 seconds for running jobs to finish during graceful shutdown
|
|
|
|
SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
|
|
|
|
|
|
|
|
# Create a mutex used to ensure there will be only one thread waiting to
|
|
|
|
# shut Sidekiq down
|
|
|
|
MUTEX = Mutex.new
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
attr_reader :worker
|
|
|
|
|
2019-02-28 09:45:20 -05:00
|
|
|
def call(worker, job, queue)
|
|
|
|
yield
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
@worker = worker
|
2019-02-28 09:45:20 -05:00
|
|
|
current_rss = get_rss
|
|
|
|
|
|
|
|
return unless MAX_RSS > 0 && current_rss > MAX_RSS
|
|
|
|
|
|
|
|
Thread.new do
|
|
|
|
# Return if another thread is already waiting to shut Sidekiq down
|
|
|
|
next unless MUTEX.try_lock
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
warn("Sidekiq worker PID-#{pid} current RSS #{current_rss}"\
|
|
|
|
" exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}")
|
|
|
|
|
|
|
|
warn("Sidekiq worker PID-#{pid} will stop fetching new jobs"\
|
|
|
|
" in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later")
|
2019-02-28 09:45:20 -05:00
|
|
|
|
|
|
|
# Wait `GRACE_TIME` to give the memory intensive job time to finish.
|
|
|
|
# Then, tell Sidekiq to stop fetching new jobs.
|
2019-03-01 05:35:28 -05:00
|
|
|
wait_and_signal(GRACE_TIME, 'SIGTSTP', 'stop fetching new jobs')
|
2019-02-28 09:45:20 -05:00
|
|
|
|
|
|
|
# Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
|
|
|
|
# Then, tell Sidekiq to gracefully shut down by giving jobs a few more
|
|
|
|
# moments to finish, killing and requeuing them if they didn't, and
|
2019-02-28 18:25:37 -05:00
|
|
|
# then terminating itself. Sidekiq will replicate the TERM to all its
|
|
|
|
# children if it can.
|
2019-02-28 09:45:20 -05:00
|
|
|
wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')
|
|
|
|
|
|
|
|
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
|
2019-02-28 18:25:37 -05:00
|
|
|
# Kill the whole pgroup, so we can be sure no children are left behind
|
|
|
|
wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
|
2019-02-28 09:45:20 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def get_rss
|
|
|
|
output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid}), Rails.root.to_s)
|
2020-08-05 11:09:59 -04:00
|
|
|
return 0 unless status == 0
|
2019-02-28 09:45:20 -05:00
|
|
|
|
|
|
|
output.to_i
|
|
|
|
end
|
|
|
|
|
2019-02-28 18:25:37 -05:00
|
|
|
# If this sidekiq process is pgroup leader, signal to the whole pgroup
|
|
|
|
def wait_and_signal_pgroup(time, signal, explanation)
|
|
|
|
return wait_and_signal(time, signal, explanation) unless Process.getpgrp == pid
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
warn("waiting #{time} seconds before sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})", signal: signal)
|
2019-02-28 18:25:37 -05:00
|
|
|
sleep(time)
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
warn("sending Sidekiq worker PGRP-#{pid} #{signal} (#{explanation})", signal: signal)
|
2019-03-09 07:30:19 -05:00
|
|
|
Process.kill(signal, 0)
|
2019-02-28 18:25:37 -05:00
|
|
|
end
|
|
|
|
|
2019-02-28 09:45:20 -05:00
|
|
|
def wait_and_signal(time, signal, explanation)
|
2019-07-22 04:50:50 -04:00
|
|
|
warn("waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})", signal: signal)
|
2019-02-28 09:45:20 -05:00
|
|
|
sleep(time)
|
|
|
|
|
2019-07-22 04:50:50 -04:00
|
|
|
warn("sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})", signal: signal)
|
2019-02-28 09:45:20 -05:00
|
|
|
Process.kill(signal, pid)
|
|
|
|
end
|
|
|
|
|
|
|
|
def pid
|
|
|
|
Process.pid
|
|
|
|
end
|
2019-07-22 04:50:50 -04:00
|
|
|
|
|
|
|
def warn(message, signal: nil)
|
2019-07-30 00:42:36 -04:00
|
|
|
Sidekiq.logger.warn(class: worker.class.name, pid: pid, signal: signal, message: message)
|
2019-07-22 04:50:50 -04:00
|
|
|
end
|
2019-02-28 09:45:20 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|