2018-09-17 12:41:14 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
require 'puma/runner'
|
2016-09-05 14:29:16 -04:00
|
|
|
require 'puma/util'
|
|
|
|
require 'puma/plugin'
|
|
|
|
|
2016-03-05 19:12:16 -05:00
|
|
|
require 'time'
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
module Puma
|
2018-05-01 16:42:12 -04:00
|
|
|
# This class is instantiated by the `Puma::Launcher` and used
|
|
|
|
# to boot and serve a Ruby application when puma "workers" are needed
|
|
|
|
# i.e. when using multi-processes. For example `$ puma -w 5`
|
|
|
|
#
|
|
|
|
# At the core of this class is running an instance of `Puma::Server` which
|
|
|
|
# gets created via the `start_server` method from the `Puma::Runner` class
|
|
|
|
# that this inherits from.
|
|
|
|
#
|
|
|
|
# An instance of this class will spawn the number of processes passed in
|
|
|
|
# via the `spawn_workers` method call. Each worker will have it's own
|
|
|
|
# instance of a `Puma::Server`.
|
2013-07-05 19:08:13 -04:00
|
|
|
class Cluster < Runner
|
2016-02-06 22:00:29 -05:00
|
|
|
def initialize(cli, events)
|
|
|
|
super cli, events
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
@phase = 0
|
|
|
|
@workers = []
|
2020-02-17 23:49:56 -05:00
|
|
|
@next_check = Time.now
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
@phased_restart = false
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def stop_workers
|
|
|
|
log "- Gracefully shutting down workers..."
|
|
|
|
@workers.each { |x| x.term }
|
|
|
|
|
|
|
|
begin
|
2019-08-23 03:07:28 -04:00
|
|
|
loop do
|
|
|
|
wait_workers
|
2020-05-01 18:44:58 -04:00
|
|
|
break if @workers.reject {|w| w.pid.nil?}.empty?
|
2019-08-23 03:07:28 -04:00
|
|
|
sleep 0.2
|
2019-03-15 18:26:20 -04:00
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
rescue Interrupt
|
|
|
|
log "! Cancelled waiting for workers"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def start_phased_restart
|
|
|
|
@phase += 1
|
|
|
|
log "- Starting phased worker restart, phase: #{@phase}"
|
2014-02-17 08:34:40 -05:00
|
|
|
|
|
|
|
# Be sure to change the directory again before loading
|
|
|
|
# the app. This way we can pick up new code.
|
2016-03-20 17:09:30 -04:00
|
|
|
dir = @launcher.restart_dir
|
|
|
|
log "+ Changing to #{dir}"
|
|
|
|
Dir.chdir dir
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2014-07-27 10:01:05 -04:00
|
|
|
def redirect_io
|
|
|
|
super
|
|
|
|
|
|
|
|
@workers.each { |x| x.hup }
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
class Worker
|
2015-01-15 07:58:16 -05:00
|
|
|
def initialize(idx, pid, phase, options)
|
2014-01-25 16:53:02 -05:00
|
|
|
@index = idx
|
2013-07-05 19:08:13 -04:00
|
|
|
@pid = pid
|
|
|
|
@phase = phase
|
|
|
|
@stage = :started
|
2013-08-30 14:52:39 -04:00
|
|
|
@signal = "TERM"
|
2015-01-15 07:58:16 -05:00
|
|
|
@options = options
|
|
|
|
@first_term_sent = nil
|
2019-08-01 15:25:15 -04:00
|
|
|
@started_at = Time.now
|
2014-01-25 19:54:40 -05:00
|
|
|
@last_checkin = Time.now
|
2019-12-17 00:38:48 -05:00
|
|
|
@last_status = {}
|
2019-08-23 03:07:28 -04:00
|
|
|
@term = false
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2019-08-01 15:25:15 -04:00
|
|
|
attr_reader :index, :pid, :phase, :signal, :last_checkin, :last_status, :started_at
|
2020-09-17 11:15:19 -04:00
|
|
|
|
|
|
|
# @version 5.0.0
|
2020-05-01 18:44:58 -04:00
|
|
|
attr_writer :pid, :phase
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
def booted?
|
|
|
|
@stage == :booted
|
|
|
|
end
|
|
|
|
|
|
|
|
def boot!
|
2014-01-25 19:54:40 -05:00
|
|
|
@last_checkin = Time.now
|
2013-07-05 19:08:13 -04:00
|
|
|
@stage = :booted
|
|
|
|
end
|
|
|
|
|
2019-08-23 03:07:28 -04:00
|
|
|
def term?
|
|
|
|
@term
|
|
|
|
end
|
|
|
|
|
2016-02-18 14:35:14 -05:00
|
|
|
def ping!(status)
|
2014-01-25 19:54:40 -05:00
|
|
|
@last_checkin = Time.now
|
2020-05-17 12:46:14 -04:00
|
|
|
require 'json'
|
2020-04-14 00:06:30 -04:00
|
|
|
@last_status = JSON.parse(status, symbolize_names: true)
|
2014-01-25 19:54:40 -05:00
|
|
|
end
|
|
|
|
|
2020-09-17 11:15:19 -04:00
|
|
|
# @see Puma::Cluster#check_workers
|
|
|
|
# @version 5.0.0
|
2020-02-17 23:49:56 -05:00
|
|
|
def ping_timeout
|
|
|
|
@last_checkin +
|
|
|
|
(booted? ?
|
|
|
|
@options[:worker_timeout] :
|
|
|
|
@options[:worker_boot_timeout]
|
|
|
|
)
|
2014-01-25 19:54:40 -05:00
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def term
|
|
|
|
begin
|
2016-02-19 20:05:45 -05:00
|
|
|
if @first_term_sent && (Time.now - @first_term_sent) > @options[:worker_shutdown_timeout]
|
2013-08-30 14:52:39 -04:00
|
|
|
@signal = "KILL"
|
|
|
|
else
|
2019-08-23 03:07:28 -04:00
|
|
|
@term ||= true
|
2016-02-19 20:05:45 -05:00
|
|
|
@first_term_sent ||= Time.now
|
2013-08-30 14:52:39 -04:00
|
|
|
end
|
2020-05-01 18:44:58 -04:00
|
|
|
Process.kill @signal, @pid if @pid
|
2013-07-05 19:08:13 -04:00
|
|
|
rescue Errno::ESRCH
|
|
|
|
end
|
|
|
|
end
|
2014-01-25 19:54:40 -05:00
|
|
|
|
|
|
|
def kill
|
2020-02-17 23:49:56 -05:00
|
|
|
@signal = 'KILL'
|
|
|
|
term
|
2014-01-25 19:54:40 -05:00
|
|
|
end
|
2014-07-27 10:01:05 -04:00
|
|
|
|
|
|
|
def hup
|
|
|
|
Process.kill "HUP", @pid
|
|
|
|
rescue Errno::ESRCH
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def spawn_workers
|
|
|
|
diff = @options[:workers] - @workers.size
|
2016-12-21 14:48:39 -05:00
|
|
|
return if diff < 1
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
master = Process.pid
|
2020-05-01 18:44:58 -04:00
|
|
|
if @options[:fork_worker]
|
|
|
|
@fork_writer << "-1\n"
|
|
|
|
end
|
2013-07-06 00:13:29 -04:00
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
diff.times do
|
2014-01-25 16:53:02 -05:00
|
|
|
idx = next_worker_index
|
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
if @options[:fork_worker] && idx != 0
|
|
|
|
@fork_writer << "#{idx}\n"
|
|
|
|
pid = nil
|
|
|
|
else
|
|
|
|
pid = spawn_worker(idx, master)
|
2016-04-07 14:41:53 -04:00
|
|
|
end
|
|
|
|
|
2016-02-06 22:00:29 -05:00
|
|
|
debug "Spawned worker: #{pid}"
|
2015-01-15 07:58:16 -05:00
|
|
|
@workers << Worker.new(idx, pid, @phase, @options)
|
2020-05-01 18:44:58 -04:00
|
|
|
end
|
2016-02-07 17:51:54 -05:00
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
if @options[:fork_worker] &&
|
|
|
|
@workers.all? {|x| x.phase == @phase}
|
|
|
|
|
|
|
|
@fork_writer << "0\n"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-09-17 11:15:19 -04:00
|
|
|
# @version 5.0.0
|
2020-05-01 18:44:58 -04:00
|
|
|
def spawn_worker(idx, master)
|
|
|
|
@launcher.config.run_hooks :before_worker_fork, idx, @launcher.events
|
|
|
|
|
|
|
|
pid = fork { worker(idx, master) }
|
|
|
|
if !pid
|
|
|
|
log "! Complete inability to spawn new workers detected"
|
|
|
|
log "! Seppuku is the only choice."
|
|
|
|
exit! 1
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
2020-05-01 18:44:58 -04:00
|
|
|
|
|
|
|
@launcher.config.run_hooks :after_worker_fork, idx, @launcher.events
|
|
|
|
pid
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2016-12-21 14:48:39 -05:00
|
|
|
def cull_workers
|
|
|
|
diff = @workers.size - @options[:workers]
|
|
|
|
return if diff < 1
|
|
|
|
|
|
|
|
debug "Culling #{diff.inspect} workers"
|
|
|
|
|
|
|
|
workers_to_cull = @workers[-diff,diff]
|
|
|
|
debug "Workers to cull: #{workers_to_cull.inspect}"
|
|
|
|
|
|
|
|
workers_to_cull.each do |worker|
|
|
|
|
log "- Worker #{worker.index} (pid: #{worker.pid}) terminating"
|
|
|
|
worker.term
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-01-25 16:53:02 -05:00
|
|
|
def next_worker_index
|
2014-07-27 10:01:05 -04:00
|
|
|
all_positions = 0...@options[:workers]
|
2014-01-27 07:13:48 -05:00
|
|
|
occupied_positions = @workers.map { |w| w.index }
|
2014-07-27 10:01:05 -04:00
|
|
|
available_positions = all_positions.to_a - occupied_positions
|
2014-01-27 07:13:48 -05:00
|
|
|
available_positions.first
|
2014-01-25 16:53:02 -05:00
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def all_workers_booted?
|
|
|
|
@workers.count { |w| !w.booted? } == 0
|
|
|
|
end
|
|
|
|
|
2020-02-17 23:49:56 -05:00
|
|
|
def check_workers
|
|
|
|
return if @next_check >= Time.now
|
2014-01-25 19:54:40 -05:00
|
|
|
|
2019-03-20 00:06:25 -04:00
|
|
|
@next_check = Time.now + Const::WORKER_CHECK_INTERVAL
|
2014-01-25 19:54:40 -05:00
|
|
|
|
2020-02-17 23:49:56 -05:00
|
|
|
timeout_workers
|
2019-08-23 03:07:28 -04:00
|
|
|
wait_workers
|
2016-12-21 14:48:39 -05:00
|
|
|
cull_workers
|
2013-07-05 19:08:13 -04:00
|
|
|
spawn_workers
|
|
|
|
|
2013-08-30 14:52:39 -04:00
|
|
|
if all_workers_booted?
|
2013-07-05 19:08:13 -04:00
|
|
|
# If we're running at proper capacity, check to see if
|
|
|
|
# we need to phase any workers out (which will restart
|
|
|
|
# in the right phase).
|
|
|
|
#
|
|
|
|
w = @workers.find { |x| x.phase != @phase }
|
|
|
|
|
|
|
|
if w
|
2020-02-17 23:49:56 -05:00
|
|
|
log "- Stopping #{w.pid} for phased upgrade..."
|
2019-09-19 06:09:55 -04:00
|
|
|
unless w.term?
|
|
|
|
w.term
|
|
|
|
log "- #{w.signal} sent to #{w.pid}..."
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
2020-02-17 23:49:56 -05:00
|
|
|
|
|
|
|
@next_check = [
|
|
|
|
@workers.reject(&:term?).map(&:ping_timeout).min,
|
|
|
|
@next_check
|
|
|
|
].compact.min
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def wakeup!
|
2015-07-14 14:31:14 -04:00
|
|
|
return unless @wakeup
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
begin
|
|
|
|
@wakeup.write "!" unless @wakeup.closed?
|
|
|
|
rescue SystemCallError, IOError
|
2017-07-19 14:22:36 -04:00
|
|
|
Thread.current.purge_interrupt_queue if Thread.current.respond_to? :purge_interrupt_queue
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-17 08:34:40 -05:00
|
|
|
def worker(index, master)
|
2017-08-02 21:02:40 -04:00
|
|
|
title = "puma: cluster worker #{index}: #{master}"
|
|
|
|
title += " [#{@options[:tag]}]" if @options[:tag] && !@options[:tag].empty?
|
2015-01-20 13:03:34 -05:00
|
|
|
$0 = title
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
Signal.trap "SIGINT", "IGNORE"
|
2020-05-15 13:27:37 -04:00
|
|
|
Signal.trap "SIGCHLD", "DEFAULT"
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
fork_worker = @options[:fork_worker] && index == 0
|
|
|
|
|
2014-07-27 10:01:05 -04:00
|
|
|
@workers = []
|
2020-05-01 18:44:58 -04:00
|
|
|
if !@options[:fork_worker] || fork_worker
|
|
|
|
@master_read.close
|
|
|
|
@suicide_pipe.close
|
|
|
|
@fork_writer.close
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
Thread.new do
|
2019-09-15 04:52:34 -04:00
|
|
|
Puma.set_thread_name "worker check pipe"
|
2013-07-05 19:08:13 -04:00
|
|
|
IO.select [@check_pipe]
|
|
|
|
log "! Detected parent died, dying"
|
|
|
|
exit! 1
|
|
|
|
end
|
|
|
|
|
2014-02-28 17:17:37 -05:00
|
|
|
# If we're not running under a Bundler context, then
|
|
|
|
# report the info about the context we will be using
|
2016-03-08 08:19:33 -05:00
|
|
|
if !ENV['BUNDLE_GEMFILE']
|
|
|
|
if File.exist?("Gemfile")
|
|
|
|
log "+ Gemfile in context: #{File.expand_path("Gemfile")}"
|
|
|
|
elsif File.exist?("gems.rb")
|
|
|
|
log "+ Gemfile in context: #{File.expand_path("gems.rb")}"
|
|
|
|
end
|
2014-02-28 17:17:37 -05:00
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
# Invoke any worker boot hooks so they can get
|
|
|
|
# things in shape before booting the app.
|
2020-03-10 13:41:17 -04:00
|
|
|
@launcher.config.run_hooks :before_worker_boot, index, @launcher.events
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
server = @server ||= start_server
|
|
|
|
restart_server = Queue.new << true << false
|
|
|
|
|
|
|
|
if fork_worker
|
|
|
|
restart_server.clear
|
2020-05-15 13:27:37 -04:00
|
|
|
worker_pids = []
|
2020-05-01 18:44:58 -04:00
|
|
|
Signal.trap "SIGCHLD" do
|
2020-05-15 13:27:37 -04:00
|
|
|
wakeup! if worker_pids.reject! do |p|
|
|
|
|
Process.wait(p, Process::WNOHANG) rescue true
|
|
|
|
end
|
2020-05-01 18:44:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
Thread.new do
|
|
|
|
Puma.set_thread_name "worker fork pipe"
|
|
|
|
while (idx = @fork_pipe.gets)
|
|
|
|
idx = idx.to_i
|
|
|
|
if idx == -1 # stop server
|
|
|
|
if restart_server.length > 0
|
|
|
|
restart_server.clear
|
|
|
|
server.begin_restart(true)
|
|
|
|
@launcher.config.run_hooks :before_refork, nil, @launcher.events
|
2020-05-10 23:50:36 -04:00
|
|
|
nakayoshi_gc
|
2020-05-01 18:44:58 -04:00
|
|
|
end
|
|
|
|
elsif idx == 0 # restart server
|
|
|
|
restart_server << true << false
|
|
|
|
else # fork worker
|
2020-05-15 13:27:37 -04:00
|
|
|
worker_pids << pid = spawn_worker(idx, master)
|
2020-05-01 18:44:58 -04:00
|
|
|
@worker_write << "f#{pid}:#{idx}\n" rescue nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
Signal.trap "SIGTERM" do
|
2019-09-19 06:09:55 -04:00
|
|
|
@worker_write << "e#{Process.pid}\n" rescue nil
|
2013-07-05 19:08:13 -04:00
|
|
|
server.stop
|
2020-05-01 18:44:58 -04:00
|
|
|
restart_server << false
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
2020-05-01 18:44:58 -04:00
|
|
|
@worker_write << "b#{Process.pid}:#{index}\n"
|
2013-07-05 19:08:13 -04:00
|
|
|
rescue SystemCallError, IOError
|
2017-07-19 14:22:36 -04:00
|
|
|
Thread.current.purge_interrupt_queue if Thread.current.respond_to? :purge_interrupt_queue
|
2015-10-17 13:31:55 -04:00
|
|
|
STDERR.puts "Master seems to have exited, exiting."
|
2013-07-05 19:08:13 -04:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
Thread.new(@worker_write) do |io|
|
2019-09-15 04:52:34 -04:00
|
|
|
Puma.set_thread_name "stat payload"
|
2014-01-25 19:54:40 -05:00
|
|
|
|
|
|
|
while true
|
2019-03-20 00:06:25 -04:00
|
|
|
sleep Const::WORKER_CHECK_INTERVAL
|
2016-02-06 22:00:29 -05:00
|
|
|
begin
|
2020-05-17 12:46:14 -04:00
|
|
|
require 'json'
|
2020-04-14 00:06:30 -04:00
|
|
|
io << "p#{Process.pid}#{server.stats.to_json}\n"
|
2016-02-06 22:00:29 -05:00
|
|
|
rescue IOError
|
2017-07-19 14:22:36 -04:00
|
|
|
Thread.current.purge_interrupt_queue if Thread.current.respond_to? :purge_interrupt_queue
|
2016-02-06 22:00:29 -05:00
|
|
|
break
|
|
|
|
end
|
2014-01-25 19:54:40 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
server.run.join while restart_server.pop
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2014-08-01 06:04:39 -04:00
|
|
|
# Invoke any worker shutdown hooks so they can prevent the worker
|
2014-08-01 06:28:33 -04:00
|
|
|
# exiting until any background operations are completed
|
2020-03-10 13:41:17 -04:00
|
|
|
@launcher.config.run_hooks :before_worker_shutdown, index, @launcher.events
|
2013-07-05 19:08:13 -04:00
|
|
|
ensure
|
2016-01-14 20:01:35 -05:00
|
|
|
@worker_write << "t#{Process.pid}\n" rescue nil
|
2013-07-05 19:08:13 -04:00
|
|
|
@worker_write.close
|
|
|
|
end
|
|
|
|
|
|
|
|
def restart
|
|
|
|
@restart = true
|
2013-07-06 00:13:29 -04:00
|
|
|
stop
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def phased_restart
|
|
|
|
return false if @options[:preload_app]
|
|
|
|
|
|
|
|
@phased_restart = true
|
|
|
|
wakeup!
|
|
|
|
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
def stop
|
|
|
|
@status = :stop
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
def stop_blocked
|
|
|
|
@status = :stop if @status == :run
|
|
|
|
wakeup!
|
2013-07-09 01:36:43 -04:00
|
|
|
@control.stop(true) if @control
|
2013-07-05 19:08:13 -04:00
|
|
|
Process.waitall
|
|
|
|
end
|
|
|
|
|
|
|
|
def halt
|
|
|
|
@status = :halt
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
2014-02-25 08:52:20 -05:00
|
|
|
def reload_worker_directory
|
2016-03-20 17:09:30 -04:00
|
|
|
dir = @launcher.restart_dir
|
|
|
|
log "+ Changing to #{dir}"
|
|
|
|
Dir.chdir dir
|
2014-02-25 08:52:20 -05:00
|
|
|
end
|
2019-09-12 05:59:54 -04:00
|
|
|
|
|
|
|
# Inside of a child process, this will return all zeroes, as @workers is only populated in
|
2019-09-12 04:12:01 -04:00
|
|
|
# the master process.
|
2013-07-05 19:08:13 -04:00
|
|
|
def stats
|
2016-01-15 12:20:47 -05:00
|
|
|
old_worker_count = @workers.count { |w| w.phase != @phase }
|
2019-12-17 00:38:48 -05:00
|
|
|
worker_status = @workers.map do |w|
|
|
|
|
{
|
|
|
|
started_at: w.started_at.utc.iso8601,
|
|
|
|
pid: w.pid,
|
|
|
|
index: w.index,
|
|
|
|
phase: w.phase,
|
|
|
|
booted: w.booted?,
|
|
|
|
last_checkin: w.last_checkin.utc.iso8601,
|
|
|
|
last_status: w.last_status,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
{
|
|
|
|
started_at: @started_at.utc.iso8601,
|
|
|
|
workers: @workers.size,
|
|
|
|
phase: @phase,
|
|
|
|
booted_workers: worker_status.count { |w| w[:booted] },
|
|
|
|
old_workers: old_worker_count,
|
|
|
|
worker_status: worker_status,
|
|
|
|
}
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
def preload?
|
|
|
|
@options[:preload_app]
|
|
|
|
end
|
|
|
|
|
2020-09-17 11:15:19 -04:00
|
|
|
# @version 5.0.0
|
2020-05-01 18:44:58 -04:00
|
|
|
def fork_worker!
|
|
|
|
if (worker = @workers.find { |w| w.index == 0 })
|
|
|
|
worker.phase += 1
|
|
|
|
end
|
|
|
|
phased_restart
|
|
|
|
end
|
|
|
|
|
2016-07-14 15:12:50 -04:00
|
|
|
# We do this in a separate method to keep the lambda scope
|
2016-02-25 16:54:33 -05:00
|
|
|
# of the signals handlers as small as possible.
|
|
|
|
def setup_signals
|
2020-05-01 18:44:58 -04:00
|
|
|
if @options[:fork_worker]
|
|
|
|
Signal.trap "SIGURG" do
|
|
|
|
fork_worker!
|
|
|
|
end
|
|
|
|
|
|
|
|
# Auto-fork after the specified number of requests.
|
|
|
|
if (fork_requests = @options[:fork_worker].to_i) > 0
|
|
|
|
@launcher.events.register(:ping!) do |w|
|
|
|
|
fork_worker! if w.index == 0 &&
|
|
|
|
w.phase == 0 &&
|
|
|
|
w.last_status[:requests_count] >= fork_requests
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-02-25 16:54:33 -05:00
|
|
|
Signal.trap "SIGCHLD" do
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
Signal.trap "TTIN" do
|
|
|
|
@options[:workers] += 1
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
Signal.trap "TTOU" do
|
|
|
|
@options[:workers] -= 1 if @options[:workers] >= 2
|
|
|
|
wakeup!
|
|
|
|
end
|
|
|
|
|
|
|
|
master_pid = Process.pid
|
|
|
|
|
|
|
|
Signal.trap "SIGTERM" do
|
|
|
|
# The worker installs their own SIGTERM when booted.
|
|
|
|
# Until then, this is run by the worker and the worker
|
|
|
|
# should just exit if they get it.
|
|
|
|
if Process.pid != master_pid
|
|
|
|
log "Early termination of worker"
|
|
|
|
exit! 0
|
|
|
|
else
|
2019-06-07 15:10:20 -04:00
|
|
|
@launcher.close_binder_listeners
|
[close #1802] Close listeners on SIGTERM
Currently when a SIGTERM is sent to a puma cluster, the signal is trapped, then sent to all children, it then waits for children to exit and then the parent process exits. The socket that accepts connections is only closed when the parent process calls `exit 0`. The problem with this flow is there is a period of time where there are no child processes to work on an incoming connection, however the socket is still open so clients can connect to it. When this happens, the client will connect, but the connection will be closed with no response. Instead, the desired behavior is for the connection from the client to be rejected. This allows the client to re-connect, or if there is a load balance between the client and the puma server, it allows the request to be routed to another node.
This PR fixes the existing behavior by manually closing the socket when SIGTERM is received before shutting down the workers/children processes. When the socket is closed, any incoming requests will fail to connect and they will be rejected, this is our desired behavior. Existing requests that are in-flight can still respond.
## Test
This behavior is quite difficult to test, you'll notice that the test is far longer than the code change. In this test we send an initial request to an endpoint that sleeps for 1 second. We then signal to other threads that they can continue. We send the parent process a SIGTERM, while simultaneously sending other requests. Some of these will happen after the SIGTERM is received by the server. When that happens we want none of the requests to get a `ECONNRESET` error, this would indicate the request was accepted but then closed. Instead we want `ECONNREFUSED`.
I ran this test in a loop for a few hours and it passes with my patch, it fails immediately if you remove the call to close the listeners.
```
$ while m test/test_integration.rb:235; do :; done
```
## Considerations
This PR only fixes the problem for "cluster" (i.e. multi-worker) mode. When trying to reproduce the test with single mode, on (removing the `-w 2` config) it already passes. This leads us to believe that either the bug does not exist in single threaded mode, or at the very least reproducing the bug via a test in the single threaded mode requires a different approach.
Co-authored-by: Danny Fallon <Danny.fallon.ie+github@gmail.com>
Co-authored-by: Richard Schneeman <richard.schneeman+foo@gmail.com>
2019-05-30 14:09:17 -04:00
|
|
|
|
2017-08-16 11:23:51 -04:00
|
|
|
stop_workers
|
2016-02-25 16:54:33 -05:00
|
|
|
stop
|
2017-08-16 11:23:51 -04:00
|
|
|
|
2019-02-21 14:12:24 -05:00
|
|
|
raise(SignalException, "SIGTERM") if @options[:raise_exception_on_sigterm]
|
2019-02-21 15:32:45 -05:00
|
|
|
exit 0 # Clean exit, workers were stopped
|
2016-02-25 16:54:33 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
def run
|
|
|
|
@status = :run
|
|
|
|
|
2013-07-05 20:09:18 -04:00
|
|
|
output_header "cluster"
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
log "* Process workers: #{@options[:workers]}"
|
|
|
|
|
2014-11-23 21:55:34 -05:00
|
|
|
before = Thread.list
|
|
|
|
|
2013-07-06 00:13:29 -04:00
|
|
|
if preload?
|
2013-07-05 19:08:13 -04:00
|
|
|
log "* Preloading application"
|
|
|
|
load_and_bind
|
2014-11-23 21:55:34 -05:00
|
|
|
|
|
|
|
after = Thread.list
|
|
|
|
|
|
|
|
if after.size > before.size
|
|
|
|
threads = (after - before)
|
|
|
|
if threads.first.respond_to? :backtrace
|
2014-11-23 22:00:15 -05:00
|
|
|
log "! WARNING: Detected #{after.size-before.size} Thread(s) started in app boot:"
|
2014-11-23 21:55:34 -05:00
|
|
|
threads.each do |t|
|
2015-01-29 09:04:52 -05:00
|
|
|
log "! #{t.inspect} - #{t.backtrace ? t.backtrace.first : ''}"
|
2014-11-23 21:55:34 -05:00
|
|
|
end
|
2014-11-23 22:00:15 -05:00
|
|
|
else
|
|
|
|
log "! WARNING: Detected #{after.size-before.size} Thread(s) started in app boot"
|
2014-11-23 21:55:34 -05:00
|
|
|
end
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
else
|
|
|
|
log "* Phased restart available"
|
|
|
|
|
2016-02-06 22:00:29 -05:00
|
|
|
unless @launcher.config.app_configured?
|
2013-07-05 19:08:13 -04:00
|
|
|
error "No application configured, nothing to run"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2016-02-06 22:00:29 -05:00
|
|
|
@launcher.binder.parse @options[:binds], self
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
read, @wakeup = Puma::Util.pipe
|
|
|
|
|
2016-02-25 16:54:33 -05:00
|
|
|
setup_signals
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
# Used by the workers to detect if the master process dies.
|
|
|
|
# If select says that @check_pipe is ready, it's because the
|
|
|
|
# master has exited and @suicide_pipe has been automatically
|
|
|
|
# closed.
|
|
|
|
#
|
|
|
|
@check_pipe, @suicide_pipe = Puma::Util.pipe
|
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
# Separate pipe used by worker 0 to receive commands to
|
|
|
|
# fork new worker processes.
|
|
|
|
@fork_pipe, @fork_writer = Puma::Util.pipe
|
|
|
|
|
2020-03-10 14:08:34 -04:00
|
|
|
log "Use Ctrl-C to stop"
|
2013-07-05 19:08:13 -04:00
|
|
|
|
|
|
|
redirect_io
|
|
|
|
|
2016-07-24 19:00:57 -04:00
|
|
|
Plugins.fire_background
|
2013-07-09 01:36:43 -04:00
|
|
|
|
2016-02-06 22:00:29 -05:00
|
|
|
@launcher.write_state
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2016-07-24 19:00:57 -04:00
|
|
|
start_control
|
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
@master_read, @worker_write = read, @wakeup
|
2015-08-05 20:03:36 -04:00
|
|
|
|
2020-03-10 13:41:17 -04:00
|
|
|
@launcher.config.run_hooks :before_fork, nil, @launcher.events
|
2020-05-10 23:50:36 -04:00
|
|
|
nakayoshi_gc
|
2015-08-05 20:03:36 -04:00
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
spawn_workers
|
|
|
|
|
|
|
|
Signal.trap "SIGINT" do
|
2013-07-06 00:13:29 -04:00
|
|
|
stop
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
|
2016-02-06 22:00:29 -05:00
|
|
|
@launcher.events.fire_on_booted!
|
2013-07-05 19:54:15 -04:00
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
begin
|
|
|
|
while @status == :run
|
|
|
|
begin
|
2016-03-20 17:14:35 -04:00
|
|
|
if @phased_restart
|
|
|
|
start_phased_restart
|
|
|
|
@phased_restart = false
|
|
|
|
end
|
|
|
|
|
2020-02-17 23:49:56 -05:00
|
|
|
check_workers
|
2013-07-05 19:08:13 -04:00
|
|
|
|
2020-02-17 23:49:56 -05:00
|
|
|
res = IO.select([read], nil, nil, [0, @next_check - Time.now].max)
|
2016-03-20 17:14:35 -04:00
|
|
|
|
2013-07-05 19:08:13 -04:00
|
|
|
if res
|
|
|
|
req = read.read_nonblock(1)
|
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
@next_check = Time.now if req == "!"
|
2014-01-25 19:54:40 -05:00
|
|
|
next if !req || req == "!"
|
|
|
|
|
2016-02-18 14:35:14 -05:00
|
|
|
result = read.gets
|
|
|
|
pid = result.to_i
|
2014-01-25 19:54:40 -05:00
|
|
|
|
2020-05-01 18:44:58 -04:00
|
|
|
if req == "b" || req == "f"
|
|
|
|
pid, idx = result.split(':').map(&:to_i)
|
|
|
|
w = @workers.find {|x| x.index == idx}
|
|
|
|
w.pid = pid if w.pid.nil?
|
|
|
|
end
|
|
|
|
|
2014-01-25 19:54:40 -05:00
|
|
|
if w = @workers.find { |x| x.pid == pid }
|
|
|
|
case req
|
|
|
|
when "b"
|
2013-07-05 19:08:13 -04:00
|
|
|
w.boot!
|
2014-01-25 16:53:02 -05:00
|
|
|
log "- Worker #{w.index} (pid: #{pid}) booted, phase: #{w.phase}"
|
2020-02-17 23:49:56 -05:00
|
|
|
@next_check = Time.now
|
2019-09-19 06:09:55 -04:00
|
|
|
when "e"
|
|
|
|
# external term, see worker method, Signal.trap "SIGTERM"
|
|
|
|
w.instance_variable_set :@term, true
|
2016-01-14 20:01:35 -05:00
|
|
|
when "t"
|
2019-09-19 06:09:55 -04:00
|
|
|
w.term unless w.term?
|
2014-01-25 19:54:40 -05:00
|
|
|
when "p"
|
2016-02-18 14:35:14 -05:00
|
|
|
w.ping!(result.sub(/^\d+/,'').chomp)
|
2020-05-01 18:44:58 -04:00
|
|
|
@launcher.events.fire(:ping!, w)
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
2014-01-25 19:54:40 -05:00
|
|
|
else
|
|
|
|
log "! Out-of-sync worker list, no #{pid} worker"
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
rescue Interrupt
|
|
|
|
@status = :stop
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
stop_workers unless @status == :halt
|
|
|
|
ensure
|
|
|
|
@check_pipe.close
|
|
|
|
@suicide_pipe.close
|
|
|
|
read.close
|
|
|
|
@wakeup.close
|
|
|
|
end
|
|
|
|
end
|
2019-08-23 03:07:28 -04:00
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
# loops thru @workers, removing workers that exited, and calling
|
|
|
|
# `#term` if needed
|
|
|
|
def wait_workers
|
|
|
|
@workers.reject! do |w|
|
2020-05-01 18:44:58 -04:00
|
|
|
next false if w.pid.nil?
|
2019-08-23 03:07:28 -04:00
|
|
|
begin
|
|
|
|
if Process.wait(w.pid, Process::WNOHANG)
|
|
|
|
true
|
|
|
|
else
|
|
|
|
w.term if w.term?
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
rescue Errno::ECHILD
|
2020-05-01 18:44:58 -04:00
|
|
|
begin
|
|
|
|
Process.kill(0, w.pid)
|
|
|
|
false # child still alive, but has another parent
|
|
|
|
rescue Errno::ESRCH, Errno::EPERM
|
|
|
|
true # child is already terminated
|
|
|
|
end
|
2019-08-23 03:07:28 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2020-02-17 23:49:56 -05:00
|
|
|
|
2020-09-17 11:15:19 -04:00
|
|
|
# @version 5.0.0
|
2020-02-17 23:49:56 -05:00
|
|
|
def timeout_workers
|
|
|
|
@workers.each do |w|
|
|
|
|
if !w.term? && w.ping_timeout <= Time.now
|
|
|
|
log "! Terminating timed out worker: #{w.pid}"
|
|
|
|
w.kill
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2020-05-10 23:50:36 -04:00
|
|
|
|
2020-09-17 11:15:19 -04:00
|
|
|
# @version 5.0.0
|
2020-05-10 23:50:36 -04:00
|
|
|
def nakayoshi_gc
|
|
|
|
return unless @options[:nakayoshi_fork]
|
|
|
|
log "! Promoting existing objects to old generation..."
|
|
|
|
4.times { GC.start(full_mark: false) }
|
|
|
|
if GC.respond_to?(:compact)
|
|
|
|
log "! Compacting..."
|
|
|
|
GC.compact
|
|
|
|
end
|
|
|
|
log "! Friendly fork preparation complete."
|
|
|
|
end
|
2013-07-05 19:08:13 -04:00
|
|
|
end
|
|
|
|
end
|