From b1d24c0d14afdf3312e8f0745cc5ba87e41004b4 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Tue, 25 Sep 2018 18:44:08 +0900 Subject: [PATCH] Fix stuck job worker. Fix sidekiq queue namespace --- app/workers/all_queues.yml | 2 +- app/workers/stuck_ci_jobs_worker.rb | 46 ++++++++++++++++--------- lib/gitlab/ci/status/build/scheduled.rb | 3 +- scheduled_job_fixture.rb | 38 +++++++++++--------- 4 files changed, 55 insertions(+), 34 deletions(-) diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index b5a492122a3..f21789de37d 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -60,7 +60,6 @@ - pipeline_default:build_trace_sections - pipeline_default:pipeline_metrics - pipeline_default:pipeline_notification -- pipeline_default:ci_build_schedule - pipeline_hooks:build_hooks - pipeline_hooks:pipeline_hooks - pipeline_processing:build_finished @@ -71,6 +70,7 @@ - pipeline_processing:pipeline_update - pipeline_processing:stage_update - pipeline_processing:update_head_pipeline_for_merge_request +- pipeline_processing:ci_build_schedule - repository_check:repository_check_clear - repository_check:repository_check_batch diff --git a/app/workers/stuck_ci_jobs_worker.rb b/app/workers/stuck_ci_jobs_worker.rb index 884843e4465..67d88c75f91 100644 --- a/app/workers/stuck_ci_jobs_worker.rb +++ b/app/workers/stuck_ci_jobs_worker.rb @@ -16,10 +16,10 @@ class StuckCiJobsWorker Rails.logger.info "#{self.class}: Cleaning stuck builds" - drop :running, :updated_at, BUILD_RUNNING_OUTDATED_TIMEOUT, :stuck_or_timeout_failure - drop :pending, :updated_at, BUILD_PENDING_OUTDATED_TIMEOUT, :stuck_or_timeout_failure - drop :scheduled, :scheduled_at, BUILD_SCHEDULED_OUTDATED_TIMEOUT, :schedule_expired - drop_stuck :pending, :updated_at, BUILD_PENDING_STUCK_TIMEOUT, :stuck_or_timeout_failure + drop :running, condition_for_outdated_running, :stuck_or_timeout_failure + drop :pending, condition_for_outdated_pending, :stuck_or_timeout_failure + drop :scheduled, condition_for_outdated_scheduled, :schedule_expired + drop_stuck :pending, condition_for_outdated_pending_stuck, :stuck_or_timeout_failure remove_lease end @@ -34,27 +34,41 @@ class StuckCiJobsWorker Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid) end - def drop(status, column, timeout, reason) - search(status, column, timeout) do |build| - drop_build :outdated, build, status, timeout, reason + def drop(status, condition, reason) + search(status, condition) do |build| + drop_build :outdated, build, status, reason end end - def drop_stuck(status, column, timeout, reason) - search(status, column, timeout) do |build| + def drop_stuck(status, condition, reason) + search(status, condition) do |build| break unless build.stuck? - drop_build :stuck, build, status, timeout, reason + drop_build :stuck, build, status, reason end end + def condition_for_outdated_running + ["updated_at < ?", BUILD_RUNNING_OUTDATED_TIMEOUT.ago] + end + + def condition_for_outdated_pending + ["updated_at < ?", BUILD_PENDING_OUTDATED_TIMEOUT.ago] + end + + def condition_for_outdated_scheduled + ["scheduled_at <> '' && scheduled_at < ?", BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago] + end + + def condition_for_outdated_pending_stuck + ["updated_at < ?", BUILD_PENDING_STUCK_TIMEOUT.ago] + end + # rubocop: disable CodeReuse/ActiveRecord - def search(status, column, timeout) - quoted_column = ActiveRecord::Base.connection.quote_column_name(column) - + def search(status, condition) loop do jobs = Ci::Build.where(status: status) - .where("#{quoted_column} < ?", timeout.ago) + .where(*condition) .includes(:tags, :runner, project: :namespace) .limit(100) .to_a @@ -67,8 +81,8 @@ class StuckCiJobsWorker end # rubocop: enable CodeReuse/ActiveRecord - def drop_build(type, build, status, timeout, reason) - Rails.logger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status}, timeout: #{timeout})" + def drop_build(type, build, status, reason) + Rails.logger.info "#{self.class}: Dropping #{type} build #{build.id} for runner #{build.runner_id} (status: #{status})" Gitlab::OptimisticLocking.retry_lock(build, 3) do |b| b.drop(reason) end diff --git a/lib/gitlab/ci/status/build/scheduled.rb b/lib/gitlab/ci/status/build/scheduled.rb index 270a2706c87..c6713f0d633 100644 --- a/lib/gitlab/ci/status/build/scheduled.rb +++ b/lib/gitlab/ci/status/build/scheduled.rb @@ -23,7 +23,8 @@ module Gitlab private def execute_in - Time.at(subject.scheduled_at).utc.strftime("%H:%M:%S") + diff = [0, subject.scheduled_at - Time.now].max + Time.at(diff).utc.strftime("%H:%M:%S") end end end diff --git a/scheduled_job_fixture.rb b/scheduled_job_fixture.rb index 9ed59d337f7..ae33c6be6ad 100644 --- a/scheduled_job_fixture.rb +++ b/scheduled_job_fixture.rb @@ -1,4 +1,10 @@ ## +# ### +# IMPORTANT +# - Enable the feature flag `ci_enable_scheduled_build` on rails console! You can do `Feature.enable('ci_enable_scheduled_build')` +# This feature is off by default! +# +# # This is a debug script to reproduce specific scenarios for scheduled jobs (https://gitlab.com/gitlab-org/gitlab-ce/issues/51352) # By using this script, you don't need to setup GitLab runner. # This script is specifically made for FE/UX engineers. They can quickly check how scheduled jobs behave. @@ -78,42 +84,42 @@ cleanup: # # ### Reproduce the scenario ~ when rollout 10% jobs failed ~ # -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).finish_stage_until('test') +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).finish_stage_until('test') # 1. Wait until rollout 10% job is triggered -# 1. ScheduledJobFixture.new(29, 1).drop_jobs('rollout 10%') +# 1. ScheduledJobFixture.new(16, 1).drop_jobs('rollout 10%') # # Expectation: Following stages should be skipped. # # ### Reproduce the scenario ~ when user clicked cancel button before build job finished ~ # -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).cancel_pipeline +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).cancel_pipeline # # Expectation: All stages should be canceled. # # ### Reproduce the scenario ~ when user canceled the pipeline after rollout 10% job is scheduled ~ # -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).finish_stage_until('test') +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).finish_stage_until('test') # 1. Run next command before rollout 10% job is triggered -# 1. ScheduledJobFixture.new(29, 1).cancel_pipeline +# 1. ScheduledJobFixture.new(16, 1).cancel_pipeline # # Expectation: rollout 10% job will be canceled. Following stages will be skipped. # # ### Reproduce the scenario ~ when user canceled rollout 10% job after rollout 10% job is scheduled ~ # -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).finish_stage_until('test') +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).finish_stage_until('test') # 1. Run next command before rollout 10% job is triggered -# 1. ScheduledJobFixture.new(29, 1).cancel_jobs('rollout 10%') +# 1. ScheduledJobFixture.new(16, 1).cancel_jobs('rollout 10%') # # Expectation: rollout 10% job will be canceled. Following stages will be skipped. # # ### Reproduce the scenario ~ when user played rollout 10% job immidiately ~ # -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).finish_stage_until('test') +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).finish_stage_until('test') # 1. Play rollout 10% job before rollout 10% job is triggered # # Expectation: rollout 10% becomes pending immidiately @@ -121,10 +127,10 @@ cleanup: # ### Reproduce the scenario ~ when rollout 10% job is allowed to fail ~ # # 1. Set `allow_failure: true` to rollout 10% job -# 1. ScheduledJobFixture.new(29, 1).create_pipeline('master') -# 1. ScheduledJobFixture.new(29, 1).finish_stage_until('test') +# 1. ScheduledJobFixture.new(16, 1).create_pipeline('master') +# 1. ScheduledJobFixture.new(16, 1).finish_stage_until('test') # 1. Wait until rollout 10% job is triggered -# 1. ScheduledJobFixture.new(29, 1).drop_jobs('rollout 10%') +# 1. ScheduledJobFixture.new(16, 1).drop_jobs('rollout 10%') # # Expectation: rollout 50% job should be triggered #