2020-03-11 20:09:34 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2021-12-08 10:13:43 -05:00
|
|
|
require_relative '../config/bundler_setup'
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
require 'optparse'
|
|
|
|
require 'logger'
|
|
|
|
require 'time'
|
|
|
|
|
2021-11-08 13:09:52 -05:00
|
|
|
# In environments where code is preloaded and cached such as `spring`,
|
|
|
|
# we may run into "already initialized" warnings, hence the check.
|
|
|
|
require_relative '../lib/gitlab' unless Object.const_defined?('Gitlab')
|
|
|
|
require_relative '../lib/gitlab/utils'
|
|
|
|
require_relative '../lib/gitlab/sidekiq_config/cli_methods'
|
|
|
|
require_relative '../lib/gitlab/sidekiq_config/worker_matcher'
|
|
|
|
require_relative '../lib/gitlab/sidekiq_logging/json_formatter'
|
2022-03-17 08:07:26 -04:00
|
|
|
require_relative '../metrics_server/dependencies'
|
2021-12-08 10:13:43 -05:00
|
|
|
require_relative '../metrics_server/metrics_server'
|
2021-11-08 13:09:52 -05:00
|
|
|
require_relative 'sidekiq_cluster'
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
module Gitlab
|
|
|
|
module SidekiqCluster
|
|
|
|
class CLI
|
2021-12-15 19:15:50 -05:00
|
|
|
THREAD_NAME = 'supervisor'
|
|
|
|
|
|
|
|
# The signals that should terminate both the master and workers.
|
|
|
|
TERMINATE_SIGNALS = %i(INT TERM).freeze
|
|
|
|
|
|
|
|
# The signals that should simply be forwarded to the workers.
|
|
|
|
FORWARD_SIGNALS = %i(TTIN USR1 USR2 HUP).freeze
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
CommandError = Class.new(StandardError)
|
|
|
|
|
2021-06-08 11:10:00 -04:00
|
|
|
def initialize(log_output = $stderr)
|
2020-03-11 20:09:34 -04:00
|
|
|
# As recommended by https://github.com/mperham/sidekiq/wiki/Advanced-Options#concurrency
|
|
|
|
@max_concurrency = 50
|
|
|
|
@min_concurrency = 0
|
|
|
|
@environment = ENV['RAILS_ENV'] || 'development'
|
2021-12-15 19:15:50 -05:00
|
|
|
@metrics_dir = ENV["prometheus_multiproc_dir"] || File.absolute_path("tmp/prometheus_multiproc_dir/sidekiq")
|
2020-03-11 20:09:34 -04:00
|
|
|
@pid = nil
|
|
|
|
@interval = 5
|
2022-02-22 07:14:09 -05:00
|
|
|
@soft_timeout_seconds = DEFAULT_SOFT_TIMEOUT_SECONDS
|
2020-03-11 20:09:34 -04:00
|
|
|
@logger = Logger.new(log_output)
|
|
|
|
@logger.formatter = ::Gitlab::SidekiqLogging::JSONFormatter.new
|
|
|
|
@rails_path = Dir.pwd
|
|
|
|
@dryrun = false
|
2021-07-30 08:10:12 -04:00
|
|
|
@list_queues = false
|
2020-03-11 20:09:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def run(argv = ARGV)
|
2021-12-15 19:15:50 -05:00
|
|
|
Thread.current.name = THREAD_NAME
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
if argv.empty?
|
|
|
|
raise CommandError,
|
|
|
|
'You must specify at least one queue to start a worker for'
|
|
|
|
end
|
|
|
|
|
|
|
|
option_parser.parse!(argv)
|
|
|
|
|
2021-07-30 08:10:12 -04:00
|
|
|
if @dryrun && @list_queues
|
|
|
|
raise CommandError,
|
|
|
|
'The --dryrun and --list-queues options are mutually exclusive'
|
|
|
|
end
|
|
|
|
|
2021-04-19 08:09:04 -04:00
|
|
|
worker_metadatas = SidekiqConfig::CliMethods.worker_metadatas(@rails_path)
|
|
|
|
worker_queues = SidekiqConfig::CliMethods.worker_queues(@rails_path)
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2021-04-19 08:09:04 -04:00
|
|
|
queue_groups = argv.map do |queues_or_query_string|
|
2021-09-01 20:10:56 -04:00
|
|
|
if queues_or_query_string =~ /[\r\n]/
|
|
|
|
raise CommandError,
|
|
|
|
'The queue arguments cannot contain newlines'
|
|
|
|
end
|
|
|
|
|
2021-04-19 08:09:04 -04:00
|
|
|
next worker_queues if queues_or_query_string == SidekiqConfig::WorkerMatcher::WILDCARD_MATCH
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2020-11-03 01:08:58 -05:00
|
|
|
# When using the queue query syntax, we treat each queue group
|
|
|
|
# as a worker attribute query, and resolve the queues for the
|
|
|
|
# queue group using this query.
|
|
|
|
|
2021-06-08 08:10:09 -04:00
|
|
|
if @queue_selector
|
2021-04-19 08:09:04 -04:00
|
|
|
SidekiqConfig::CliMethods.query_queues(queues_or_query_string, worker_metadatas)
|
2020-03-11 20:09:34 -04:00
|
|
|
else
|
2021-04-19 08:09:04 -04:00
|
|
|
SidekiqConfig::CliMethods.expand_queues(queues_or_query_string.split(','), worker_queues)
|
2020-03-11 20:09:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if @negate_queues
|
2021-04-19 08:09:04 -04:00
|
|
|
queue_groups.map! { |queues| worker_queues - queues }
|
2020-03-11 20:09:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
if queue_groups.all?(&:empty?)
|
|
|
|
raise CommandError,
|
|
|
|
'No queues found, you must select at least one queue'
|
|
|
|
end
|
|
|
|
|
2021-07-30 08:10:12 -04:00
|
|
|
if @list_queues
|
|
|
|
puts queue_groups.map(&:sort) # rubocop:disable Rails/Output
|
|
|
|
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2020-03-20 17:09:17 -04:00
|
|
|
unless @dryrun
|
|
|
|
@logger.info("Starting cluster with #{queue_groups.length} processes")
|
2022-03-29 02:08:52 -04:00
|
|
|
|
|
|
|
# Make sure we reset the metrics directory prior to:
|
|
|
|
# - starting a metrics server process
|
|
|
|
# - starting new workers
|
|
|
|
::Prometheus::CleanupMultiprocDirService.new(@metrics_dir).execute
|
2020-03-20 17:09:17 -04:00
|
|
|
end
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
start_and_supervise_workers(queue_groups)
|
|
|
|
end
|
2021-12-08 10:13:43 -05:00
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
def start_and_supervise_workers(queue_groups)
|
|
|
|
worker_pids = SidekiqCluster.start(
|
2020-03-11 20:09:34 -04:00
|
|
|
queue_groups,
|
|
|
|
env: @environment,
|
|
|
|
directory: @rails_path,
|
|
|
|
max_concurrency: @max_concurrency,
|
|
|
|
min_concurrency: @min_concurrency,
|
2020-03-25 05:08:11 -04:00
|
|
|
dryrun: @dryrun,
|
2022-02-22 07:14:09 -05:00
|
|
|
timeout: @soft_timeout_seconds
|
2020-03-11 20:09:34 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
return if @dryrun
|
|
|
|
|
2021-11-18 01:10:36 -05:00
|
|
|
ProcessManagement.write_pid(@pid) if @pid
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2022-03-15 08:07:44 -04:00
|
|
|
supervisor = SidekiqProcessSupervisor.instance(
|
2022-02-22 07:14:09 -05:00
|
|
|
health_check_interval_seconds: @interval,
|
|
|
|
terminate_timeout_seconds: @soft_timeout_seconds + TIMEOUT_GRACE_PERIOD_SECONDS,
|
|
|
|
term_signals: TERMINATE_SIGNALS,
|
2022-03-15 08:07:44 -04:00
|
|
|
forwarded_signals: FORWARD_SIGNALS,
|
|
|
|
synchronous: true
|
2022-02-22 07:14:09 -05:00
|
|
|
)
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
metrics_server_pid = start_metrics_server
|
2020-03-11 20:09:34 -04:00
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
all_pids = worker_pids + Array(metrics_server_pid)
|
2021-12-08 10:13:43 -05:00
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
supervisor.supervise(all_pids) do |dead_pids|
|
|
|
|
# If we're not in the process of shutting down the cluster,
|
|
|
|
# and the metrics server died, restart it.
|
|
|
|
if supervisor.alive && dead_pids.include?(metrics_server_pid)
|
2022-03-15 08:07:44 -04:00
|
|
|
@logger.info('Sidekiq metrics server terminated, restarting...')
|
2022-03-25 08:07:52 -04:00
|
|
|
metrics_server_pid = restart_metrics_server
|
2022-02-22 07:14:09 -05:00
|
|
|
all_pids = worker_pids + Array(metrics_server_pid)
|
|
|
|
else
|
|
|
|
# If a worker process died we'll just terminate the whole cluster.
|
|
|
|
# We let an external system (runit, kubernetes) handle the restart.
|
2020-03-11 20:09:34 -04:00
|
|
|
@logger.info('A worker terminated, shutting down the cluster')
|
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
ProcessManagement.signal_processes(all_pids - dead_pids, :TERM)
|
|
|
|
# Signal supervisor not to respawn workers and shut down.
|
|
|
|
[]
|
2020-03-11 20:09:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-02-22 07:14:09 -05:00
|
|
|
def start_metrics_server
|
2021-12-08 10:13:43 -05:00
|
|
|
return unless metrics_server_enabled?
|
|
|
|
|
2022-03-25 08:07:52 -04:00
|
|
|
restart_metrics_server
|
2022-02-22 07:14:09 -05:00
|
|
|
end
|
|
|
|
|
2022-03-25 08:07:52 -04:00
|
|
|
def restart_metrics_server
|
2021-12-08 10:13:43 -05:00
|
|
|
@logger.info("Starting metrics server on port #{sidekiq_exporter_port}")
|
2022-04-25 17:09:46 -04:00
|
|
|
MetricsServer.start_for_sidekiq(
|
2021-12-15 19:15:50 -05:00
|
|
|
metrics_dir: @metrics_dir,
|
2022-02-15 04:17:01 -05:00
|
|
|
reset_signals: TERMINATE_SIGNALS + FORWARD_SIGNALS
|
2021-12-15 19:15:50 -05:00
|
|
|
)
|
2021-12-08 10:13:43 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def sidekiq_exporter_enabled?
|
2022-01-04 10:15:09 -05:00
|
|
|
::Settings.dig('monitoring', 'sidekiq_exporter', 'enabled')
|
2021-12-08 10:13:43 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def exporter_has_a_unique_port?
|
|
|
|
# In https://gitlab.com/gitlab-org/gitlab/-/issues/345802 we added settings for sidekiq_health_checks.
|
|
|
|
# These settings default to the same values as sidekiq_exporter for backwards compatibility.
|
|
|
|
# If a different port for sidekiq_health_checks has been set up, we know that the
|
|
|
|
# user wants to serve health checks and metrics from different servers.
|
|
|
|
return false if sidekiq_health_check_port.nil? || sidekiq_exporter_port.nil?
|
|
|
|
|
|
|
|
sidekiq_exporter_port != sidekiq_health_check_port
|
|
|
|
end
|
|
|
|
|
|
|
|
def sidekiq_exporter_port
|
2022-01-04 10:15:09 -05:00
|
|
|
::Settings.dig('monitoring', 'sidekiq_exporter', 'port')
|
2021-12-08 10:13:43 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def sidekiq_health_check_port
|
2022-01-04 10:15:09 -05:00
|
|
|
::Settings.dig('monitoring', 'sidekiq_health_checks', 'port')
|
2021-12-08 10:13:43 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def metrics_server_enabled?
|
|
|
|
!@dryrun && sidekiq_exporter_enabled? && exporter_has_a_unique_port?
|
|
|
|
end
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
def option_parser
|
|
|
|
OptionParser.new do |opt|
|
|
|
|
opt.banner = "#{File.basename(__FILE__)} [QUEUE,QUEUE] [QUEUE] ... [OPTIONS]"
|
|
|
|
|
|
|
|
opt.separator "\nOptions:\n"
|
|
|
|
|
|
|
|
opt.on('-h', '--help', 'Shows this help message') do
|
|
|
|
abort opt.to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('-m', '--max-concurrency INT', 'Maximum threads to use with Sidekiq (default: 50, 0 to disable)') do |int|
|
|
|
|
@max_concurrency = int.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('--min-concurrency INT', 'Minimum threads to use with Sidekiq (default: 0)') do |int|
|
|
|
|
@min_concurrency = int.to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('-e', '--environment ENV', 'The application environment') do |env|
|
|
|
|
@environment = env
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('-P', '--pidfile PATH', 'Path to the PID file') do |pid|
|
|
|
|
@pid = pid
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('-r', '--require PATH', 'Location of the Rails application') do |path|
|
|
|
|
@rails_path = path
|
|
|
|
end
|
|
|
|
|
2020-11-03 01:08:58 -05:00
|
|
|
opt.on('--queue-selector', 'Run workers based on the provided selector') do |queue_selector|
|
|
|
|
@queue_selector = queue_selector
|
|
|
|
end
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
opt.on('-n', '--negate', 'Run workers for all queues in sidekiq_queues.yml except the given ones') do
|
|
|
|
@negate_queues = true
|
|
|
|
end
|
|
|
|
|
|
|
|
opt.on('-i', '--interval INT', 'The number of seconds to wait between worker checks') do |int|
|
|
|
|
@interval = int.to_i
|
|
|
|
end
|
|
|
|
|
2020-03-25 05:08:11 -04:00
|
|
|
opt.on('-t', '--timeout INT', 'Graceful timeout for all running processes') do |timeout|
|
|
|
|
@soft_timeout_seconds = timeout.to_i
|
|
|
|
end
|
|
|
|
|
2020-03-11 20:09:34 -04:00
|
|
|
opt.on('-d', '--dryrun', 'Print commands that would be run without this flag, and quit') do |int|
|
|
|
|
@dryrun = true
|
|
|
|
end
|
2021-07-30 08:10:12 -04:00
|
|
|
|
|
|
|
opt.on('--list-queues', 'List matching queues, and quit') do |int|
|
|
|
|
@list_queues = true
|
|
|
|
end
|
2020-03-11 20:09:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|