2019-02-28 09:45:20 -05:00
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class MemoryKiller
# Default the RSS limit to 0, meaning the MemoryKiller is disabled
MAX_RSS = ( ENV [ 'SIDEKIQ_MEMORY_KILLER_MAX_RSS' ] || 0 ) . to_s . to_i
# Give Sidekiq 15 minutes of grace time after exceeding the RSS limit
GRACE_TIME = ( ENV [ 'SIDEKIQ_MEMORY_KILLER_GRACE_TIME' ] || 15 * 60 ) . to_s . to_i
# Wait 30 seconds for running jobs to finish during graceful shutdown
SHUTDOWN_WAIT = ( ENV [ 'SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT' ] || 30 ) . to_s . to_i
# Create a mutex used to ensure there will be only one thread waiting to
# shut Sidekiq down
MUTEX = Mutex . new
def call ( worker , job , queue )
yield
current_rss = get_rss
return unless MAX_RSS > 0 && current_rss > MAX_RSS
Thread . new do
# Return if another thread is already waiting to shut Sidekiq down
next unless MUTEX . try_lock
Sidekiq . logger . warn " Sidekiq worker PID- #{ pid } current RSS #{ current_rss } " \
" exceeds maximum RSS #{ MAX_RSS } after finishing job #{ worker . class } JID- #{ job [ 'jid' ] } "
Sidekiq . logger . warn " Sidekiq worker PID- #{ pid } will stop fetching new jobs in #{ GRACE_TIME } seconds, and will be shut down #{ SHUTDOWN_WAIT } seconds later "
# Wait `GRACE_TIME` to give the memory intensive job time to finish.
# Then, tell Sidekiq to stop fetching new jobs.
2019-03-01 05:35:28 -05:00
wait_and_signal ( GRACE_TIME , 'SIGTSTP' , 'stop fetching new jobs' )
2019-02-28 09:45:20 -05:00
# Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
# Then, tell Sidekiq to gracefully shut down by giving jobs a few more
# moments to finish, killing and requeuing them if they didn't, and
2019-02-28 18:25:37 -05:00
# then terminating itself. Sidekiq will replicate the TERM to all its
# children if it can.
2019-02-28 09:45:20 -05:00
wait_and_signal ( SHUTDOWN_WAIT , 'SIGTERM' , 'gracefully shut down' )
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
2019-02-28 18:25:37 -05:00
# Kill the whole pgroup, so we can be sure no children are left behind
wait_and_signal_pgroup ( Sidekiq . options [ :timeout ] + 2 , 'SIGKILL' , 'die' )
2019-02-28 09:45:20 -05:00
end
end
private
def get_rss
output , status = Gitlab :: Popen . popen ( %W( ps -o rss= -p #{ pid } ) , Rails . root . to_s )
return 0 unless status . zero?
output . to_i
end
2019-02-28 18:25:37 -05:00
# If this sidekiq process is pgroup leader, signal to the whole pgroup
def wait_and_signal_pgroup ( time , signal , explanation )
return wait_and_signal ( time , signal , explanation ) unless Process . getpgrp == pid
Sidekiq . logger . warn " waiting #{ time } seconds before sending Sidekiq worker PGRP- #{ pid } #{ signal } ( #{ explanation } ) "
sleep ( time )
Sidekiq . logger . warn " sending Sidekiq worker PGRP- #{ pid } #{ signal } ( #{ explanation } ) "
2019-03-09 07:30:19 -05:00
Process . kill ( signal , 0 )
2019-02-28 18:25:37 -05:00
end
2019-02-28 09:45:20 -05:00
def wait_and_signal ( time , signal , explanation )
Sidekiq . logger . warn " waiting #{ time } seconds before sending Sidekiq worker PID- #{ pid } #{ signal } ( #{ explanation } ) "
sleep ( time )
Sidekiq . logger . warn " sending Sidekiq worker PID- #{ pid } #{ signal } ( #{ explanation } ) "
Process . kill ( signal , pid )
end
def pid
Process . pid
end
end
end
end