mirror of
https://github.com/puma/puma.git
synced 2022-11-09 13:48:40 -05:00
Add ability to detect and terminate hung workers. Fixes #333
This commit is contained in:
parent
b90fc4fa05
commit
ef4e55a5b7
2 changed files with 57 additions and 7 deletions
|
@ -8,6 +8,7 @@ module Puma
|
|||
@phase = 0
|
||||
@workers = []
|
||||
@worker_index = 0
|
||||
@next_check = nil
|
||||
|
||||
@phased_state = :idle
|
||||
@phased_restart = false
|
||||
|
@ -36,18 +37,28 @@ module Puma
|
|||
@phase = phase
|
||||
@stage = :started
|
||||
@signal = "TERM"
|
||||
@last_checkin = Time.now
|
||||
end
|
||||
|
||||
attr_reader :index, :pid, :phase, :signal
|
||||
attr_reader :index, :pid, :phase, :signal, :last_checkin
|
||||
|
||||
def booted?
|
||||
@stage == :booted
|
||||
end
|
||||
|
||||
def boot!
|
||||
@last_checkin = Time.now
|
||||
@stage = :booted
|
||||
end
|
||||
|
||||
def ping!
|
||||
@last_checkin = Time.now
|
||||
end
|
||||
|
||||
def ping_timeout?(which)
|
||||
Time.now - @last_checkin > which
|
||||
end
|
||||
|
||||
def term
|
||||
begin
|
||||
if @first_term_sent && (Time.new - @first_term_sent) > 30
|
||||
|
@ -60,6 +71,11 @@ module Puma
|
|||
rescue Errno::ESRCH
|
||||
end
|
||||
end
|
||||
|
||||
def kill
|
||||
Process.kill "KILL", @pid
|
||||
rescue Errno::ESRCH
|
||||
end
|
||||
end
|
||||
|
||||
def spawn_workers
|
||||
|
@ -93,6 +109,24 @@ module Puma
|
|||
end
|
||||
|
||||
def check_workers
|
||||
return if @next_check && @next_check >= Time.now
|
||||
|
||||
@next_check = Time.now + 5
|
||||
|
||||
any = false
|
||||
|
||||
@workers.each do |w|
|
||||
if w.ping_timeout?(@options[:worker_timeout])
|
||||
log "! Terminating timed out worker: #{w.pid}"
|
||||
w.kill
|
||||
any = true
|
||||
end
|
||||
end
|
||||
|
||||
# If we killed any timed out workers, try to catch them
|
||||
# during this loop by giving the kernel time to kill them.
|
||||
sleep 1 if any
|
||||
|
||||
while @workers.any?
|
||||
pid = Process.waitpid(-1, Process::WNOHANG)
|
||||
break unless pid
|
||||
|
@ -168,6 +202,15 @@ module Puma
|
|||
return
|
||||
end
|
||||
|
||||
Thread.new(@worker_write) do |io|
|
||||
payload = "p#{Process.pid}\n"
|
||||
|
||||
while true
|
||||
sleep 5
|
||||
io << payload
|
||||
end
|
||||
end
|
||||
|
||||
server.run.join
|
||||
|
||||
ensure
|
||||
|
@ -302,15 +345,20 @@ module Puma
|
|||
if res
|
||||
req = read.read_nonblock(1)
|
||||
|
||||
if req == "b"
|
||||
pid = read.gets.to_i
|
||||
w = @workers.find { |x| x.pid == pid }
|
||||
if w
|
||||
next if !req || req == "!"
|
||||
|
||||
pid = read.gets.to_i
|
||||
|
||||
if w = @workers.find { |x| x.pid == pid }
|
||||
case req
|
||||
when "b"
|
||||
w.boot!
|
||||
log "- Worker #{w.index} (pid: #{pid}) booted, phase: #{w.phase}"
|
||||
else
|
||||
log "! Out-of-sync worker list, no #{pid} worker"
|
||||
when "p"
|
||||
w.ping!
|
||||
end
|
||||
else
|
||||
log "! Out-of-sync worker list, no #{pid} worker"
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ module Puma
|
|||
|
||||
DefaultTCPHost = "0.0.0.0"
|
||||
DefaultTCPPort = 9292
|
||||
DefaultWorkerTimeout = 60
|
||||
|
||||
def initialize(options)
|
||||
@options = options
|
||||
|
@ -21,6 +22,7 @@ module Puma
|
|||
@options[:binds] ||= []
|
||||
@options[:on_restart] ||= []
|
||||
@options[:worker_boot] ||= []
|
||||
@options[:worker_timeout] ||= DefaultWorkerTimeout
|
||||
end
|
||||
|
||||
attr_reader :options
|
||||
|
|
Loading…
Reference in a new issue