2013-07-05 19:08:13 -04:00
|
|
|
require 'puma/runner'
|
|
|
|
|
|
|
|
module Puma
|
|
|
|
class Cluster < Runner
|
|
|
|
def initialize(cli)
|
|
|
|
super cli
|
|
|
|
|
|
|
|
@phase = 0
|
|
|
|
@workers = []
|
2014-01-25 19:54:40 -05:00
|
|
|
@next_check = nil
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
@phased_state = :idle
|
2013-07-06 00:13:29 -04:00
|
|
|
@phased_restart = false
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def stop_workers
|
|
|
|
log "- Gracefully shutting down workers..."
|
|
|
|
@workers.each { |x| x.term }
|
|
|
|
|
|
|
|
begin
|
|
|
|
Process.waitall
|
|
|
|
rescue Interrupt
|
|
|
|
log "! Cancelled waiting for workers"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def start_phased_restart
|
|
|
|
@phase += 1
|
|
|
|
log "- Starting phased worker restart, phase: #{@phase}"
|
|
|
|
end
|
|
|
|
|
|
|
|
class Worker
|
2014-01-25 16:53:02 -05:00
|
|
|
def initialize(idx, pid, phase)
|
|
|
|
@index = idx
|
2013-07-05 19:08:13 -04:00
|
|
|
@pid = pid
|
|
|
|
@phase = phase
|
|
|
|
@stage = :started
|
2013-08-30 14:52:39 -04:00
|
|
|
@signal = "TERM"
|
2014-01-25 19:54:40 -05:00
|
|
|
@last_checkin = Time.now
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
attr_reader :index, :pid, :phase, :signal, :last_checkin
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
def booted?
|
|
|
|
@stage == :booted
|
|
|
|
end
|
|
|
|
|
|
|
|
def boot!
|
2014-01-25 19:54:40 -05:00
|
|
|
@last_checkin = Time.now
|
2013-07-05 19:08:13 -04:00
|
|
|
@stage = :booted
|
|
|
|
end
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
def ping!
|
|
|
|
@last_checkin = Time.now
|
|
|
|
end
|
|
|
|
|
|
|
|
def ping_timeout?(which)
|
|
|
|
Time.now - @last_checkin > which
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def term
|
|
|
|
begin
|
2013-08-30 14:52:39 -04:00
|
|
|
if @first_term_sent && (Time.new - @first_term_sent) > 30
|
|
|
|
@signal = "KILL"
|
|
|
|
else
|
|
|
|
@first_term_sent ||= Time.new
|
|
|
|
end
|
|
|
|
|
|
|
|
Process.kill @signal, @pid
|
2013-07-05 19:08:13 -04:00
|
|
|
rescue Errno::ESRCH
|
|
|
|
end
|
|
|
|
end
|
2014-01-25 19:54:40 -05:00
|
|
|
|
|
|
|
def kill
|
|
|
|
Process.kill "KILL", @pid
|
|
|
|
rescue Errno::ESRCH
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def spawn_workers
|
|
|
|
diff = @options[:workers] - @workers.size
|
|
|
|
|
|
|
|
upgrade = (@phased_state == :waiting)
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
master = Process.pid
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
diff.times do
|
2014-01-25 16:53:02 -05:00
|
|
|
idx = next_worker_index
|
|
|
|
|
|
|
|
pid = fork { worker(idx, upgrade, master) }
|
2013-07-05 19:08:13 -04:00
|
|
|
@cli.debug "Spawned worker: #{pid}"
|
2014-01-25 16:53:02 -05:00
|
|
|
@workers << Worker.new(idx, pid, @phase)
|
2014-02-26 09:06:58 -05:00
|
|
|
@options[:after_worker_boot].each { |h| h.call }
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
if diff > 0
|
|
|
|
@phased_state = :idle
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-01-25 16:53:02 -05:00
|
|
|
def next_worker_index
|
2014-01-27 07:13:48 -05:00
|
|
|
all_positions = 0...@options[:workers]
|
|
|
|
occupied_positions = @workers.map { |w| w.index }
|
|
|
|
available_positions = all_positions.to_a - occupied_positions
|
|
|
|
available_positions.first
|
2014-01-25 16:53:02 -05:00
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def all_workers_booted?
|
|
|
|
@workers.count { |w| !w.booted? } == 0
|
|
|
|
end
|
|
|
|
|
|
|
|
def check_workers
|
2014-01-25 19:54:40 -05:00
|
|
|
return if @next_check && @next_check >= Time.now
|
|
|
|
|
|
|
|
@next_check = Time.now + 5
|
|
|
|
|
|
|
|
any = false
|
|
|
|
|
|
|
|
@workers.each do |w|
|
|
|
|
if w.ping_timeout?(@options[:worker_timeout])
|
|
|
|
log "! Terminating timed out worker: #{w.pid}"
|
|
|
|
w.kill
|
|
|
|
any = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# If we killed any timed out workers, try to catch them
|
|
|
|
# during this loop by giving the kernel time to kill them.
|
|
|
|
sleep 1 if any
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
while @workers.any?
|
|
|
|
pid = Process.waitpid(-1, Process::WNOHANG)
|
|
|
|
break unless pid
|
|
|
|
|
|
|
|
@workers.delete_if { |w| w.pid == pid }
|
|
|
|
end
|
|
|
|
|
|
|
|
spawn_workers
|
|
|
|
|
2013-08-30 14:52:39 -04:00
|
|
|
if all_workers_booted?
|
2013-07-05 19:08:13 -04:00
|
|
|
# If we're running at proper capacity, check to see if
|
|
|
|
# we need to phase any workers out (which will restart
|
|
|
|
# in the right phase).
|
|
|
|
#
|
|
|
|
w = @workers.find { |x| x.phase != @phase }
|
|
|
|
|
|
|
|
if w
|
2013-08-30 14:52:39 -04:00
|
|
|
if @phased_state == :idle
|
|
|
|
@phased_state = :waiting
|
|
|
|
log "- Stopping #{w.pid} for phased upgrade..."
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
w.term
|
2013-08-30 14:52:39 -04:00
|
|
|
log "- #{w.signal} sent to #{w.pid}..."
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def wakeup!
|
|
|
|
begin
|
|
|
|
@wakeup.write "!" unless @wakeup.closed?
|
|
|
|
rescue SystemCallError, IOError
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-01-25 16:53:02 -05:00
|
|
|
def worker(index, upgrade, master)
|
|
|
|
$0 = "puma: cluster worker #{index}: #{master}"
|
2013-07-05 19:08:13 -04:00
|
|
|
Signal.trap "SIGINT", "IGNORE"
|
|
|
|
|
|
|
|
@master_read.close
|
|
|
|
@suicide_pipe.close
|
|
|
|
|
|
|
|
Thread.new do
|
|
|
|
IO.select [@check_pipe]
|
|
|
|
log "! Detected parent died, dying"
|
|
|
|
exit! 1
|
|
|
|
end
|
|
|
|
|
|
|
|
# Be sure to change the directory again before loading
|
|
|
|
# the app. This way we can pick up new code.
|
|
|
|
if upgrade
|
|
|
|
if dir = @options[:worker_directory]
|
|
|
|
log "+ Changing to #{dir}"
|
|
|
|
Dir.chdir dir
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-28 17:17:37 -05:00
|
|
|
# If we're not running under a Bundler context, then
|
|
|
|
# report the info about the context we will be using
|
|
|
|
if !ENV['BUNDLER_GEMFILE'] and File.exist?("Gemfile")
|
|
|
|
log "+ Gemfile in context: #{File.expand_path("Gemfile")}"
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
# Invoke any worker boot hooks so they can get
|
|
|
|
# things in shape before booting the app.
|
2014-02-26 09:06:58 -05:00
|
|
|
hooks = @options[:before_worker_boot]
|
2014-01-25 16:53:02 -05:00
|
|
|
hooks.each { |h| h.call(index) }
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
server = start_server
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
Signal.trap "SIGTERM" do
|
|
|
|
server.stop
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
@worker_write << "b#{Process.pid}\n"
|
|
|
|
rescue SystemCallError, IOError
|
|
|
|
STDERR.puts "Master seems to have exitted, exitting."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
Thread.new(@worker_write) do |io|
|
|
|
|
payload = "p#{Process.pid}\n"
|
|
|
|
|
|
|
|
while true
|
|
|
|
sleep 5
|
|
|
|
io << payload
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
server.run.join
|
|
|
|
|
|
|
|
ensure
|
|
|
|
@worker_write.close
|
|
|
|
end
|
|
|
|
|
|
|
|
def restart
|
|
|
|
@restart = true
|
2013-07-06 00:13:29 -04:00
|
|
|
stop
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def phased_restart
|
|
|
|
return false if @options[:preload_app]
|
|
|
|
|
|
|
|
@phased_restart = true
|
|
|
|
wakeup!
|
|
|
|
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
def stop
|
|
|
|
@status = :stop
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
def stop_blocked
|
|
|
|
@status = :stop if @status == :run
|
|
|
|
wakeup!
|
2013-07-09 01:36:43 -04:00
|
|
|
@control.stop(true) if @control
|
2013-07-05 19:08:13 -04:00
|
|
|
Process.waitall
|
|
|
|
end
|
|
|
|
|
|
|
|
def halt
|
|
|
|
@status = :halt
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
def stats
|
2014-02-21 05:59:06 -05:00
|
|
|
%Q!{ "workers": #{@workers.size}, "phase": #{@phase}, "booted_workers": #{@workers.count{|w| w.booted?}} }!
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
def preload?
|
|
|
|
@options[:preload_app]
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def run
|
|
|
|
@status = :run
|
|
|
|
|
2013-07-05 20:09:18 -04:00
|
|
|
output_header "cluster"
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
log "* Process workers: #{@options[:workers]}"
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
if preload?
|
2013-07-05 19:08:13 -04:00
|
|
|
log "* Preloading application"
|
|
|
|
load_and_bind
|
|
|
|
else
|
|
|
|
log "* Phased restart available"
|
|
|
|
|
|
|
|
unless @cli.config.app_configured?
|
|
|
|
error "No application configured, nothing to run"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
|
|
|
@cli.binder.parse @options[:binds], self
|
|
|
|
end
|
|
|
|
|
|
|
|
read, @wakeup = Puma::Util.pipe
|
|
|
|
|
|
|
|
Signal.trap "SIGCHLD" do
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
2013-10-28 18:21:53 -04:00
|
|
|
Signal.trap "TTIN" do
|
|
|
|
@options[:workers] += 1
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
Signal.trap "TTOU" do
|
|
|
|
@options[:workers] -= 1 if @options[:workers] >= 2
|
|
|
|
@workers.last.term
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
master_pid = Process.pid
|
|
|
|
|
|
|
|
Signal.trap "SIGTERM" do
|
|
|
|
# The worker installs their own SIGTERM when booted.
|
|
|
|
# Until then, this is run by the worker and the worker
|
|
|
|
# should just exit if they get it.
|
|
|
|
if Process.pid != master_pid
|
|
|
|
log "Early termination of worker"
|
|
|
|
exit! 0
|
|
|
|
else
|
2013-07-06 00:13:29 -04:00
|
|
|
stop
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Used by the workers to detect if the master process dies.
|
|
|
|
# If select says that @check_pipe is ready, it's because the
|
|
|
|
# master has exited and @suicide_pipe has been automatically
|
|
|
|
# closed.
|
|
|
|
#
|
|
|
|
@check_pipe, @suicide_pipe = Puma::Util.pipe
|
|
|
|
|
2013-07-05 20:09:18 -04:00
|
|
|
if daemon?
|
|
|
|
log "* Daemonizing..."
|
2013-07-05 19:08:13 -04:00
|
|
|
Process.daemon(true)
|
|
|
|
else
|
|
|
|
log "Use Ctrl-C to stop"
|
|
|
|
end
|
|
|
|
|
|
|
|
redirect_io
|
|
|
|
|
2013-07-09 01:36:43 -04:00
|
|
|
start_control
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
@cli.write_state
|
|
|
|
|
|
|
|
@master_read, @worker_write = read, @wakeup
|
|
|
|
spawn_workers
|
|
|
|
|
|
|
|
Signal.trap "SIGINT" do
|
2013-07-06 00:13:29 -04:00
|
|
|
stop
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2013-07-05 19:54:15 -04:00
|
|
|
@cli.events.fire_on_booted!
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
begin
|
|
|
|
while @status == :run
|
|
|
|
begin
|
|
|
|
res = IO.select([read], nil, nil, 5)
|
|
|
|
|
|
|
|
if res
|
|
|
|
req = read.read_nonblock(1)
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
next if !req || req == "!"
|
|
|
|
|
|
|
|
pid = read.gets.to_i
|
|
|
|
|
|
|
|
if w = @workers.find { |x| x.pid == pid }
|
|
|
|
case req
|
|
|
|
when "b"
|
2013-07-05 19:08:13 -04:00
|
|
|
w.boot!
|
2014-01-25 16:53:02 -05:00
|
|
|
log "- Worker #{w.index} (pid: #{pid}) booted, phase: #{w.phase}"
|
2014-01-25 19:54:40 -05:00
|
|
|
when "p"
|
|
|
|
w.ping!
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
2014-01-25 19:54:40 -05:00
|
|
|
else
|
|
|
|
log "! Out-of-sync worker list, no #{pid} worker"
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if @phased_restart
|
|
|
|
start_phased_restart
|
|
|
|
@phased_restart = false
|
|
|
|
end
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
check_workers
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
rescue Interrupt
|
|
|
|
@status = :stop
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
stop_workers unless @status == :halt
|
|
|
|
ensure
|
|
|
|
@check_pipe.close
|
|
|
|
@suicide_pipe.close
|
|
|
|
read.close
|
|
|
|
@wakeup.close
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|