Periodically mark projects that are stuck in importing as failed

Adds import jid to projects
Refactor middleware to set custom expiration time via sidekiq options
Add completed_jids option to sidekiq status and a few other changes
This commit is contained in:
James Lopez 2017-03-24 12:08:34 +01:00
parent 7196adaaa4
commit 58371efbb0
12 changed files with 138 additions and 3 deletions

View file

@ -2,6 +2,8 @@ class RepositoryImportWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
sidekiq_options status_expiration: StuckImportJobsWorker::IMPORT_EXPIRATION
attr_accessor :project, :current_user
def perform(project_id)
@ -12,7 +14,7 @@ class RepositoryImportWorker
import_url: @project.import_url,
path: @project.path_with_namespace)
project.update_column(:import_error, nil)
project.update_columns(import_jid: self.jid, import_error: nil)
result = Projects::ImportService.new(project, current_user).execute

View file

@ -0,0 +1,50 @@
class StuckImportJobsWorker
include Sidekiq::Worker
include CronjobQueue
EXCLUSIVE_LEASE_KEY = 'fail_stuck_imports_worker_lease'.freeze
IMPORT_EXPIRATION = 15.hours.to_i
def perform
return unless try_obtain_lease
stuck_projects.find_in_batches(batch_size: 500) do |group|
jids = group.map(&:import_jid)
# Find the jobs that aren't currently running or that exceeded the threshold.
completed_jids = Gitlab::SidekiqStatus.completed_jids(jids)
if completed_jids.any?
completed_ids = group.select { |project| completed_jids.include?(project.import_jid) }.map(&:id)
fail_batch!(completed_jids, completed_ids)
end
end
remove_lease
end
private
def stuck_projects
Project.select('id, import_jid').with_import_status(:started).where.not(import_jid: nil)
end
def fail_batch!(completed_jids, completed_ids)
Project.where(id: completed_ids).update_all(import_status: 'failed', import_error: error_message)
Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_jids.join(', ')}")
end
def error_message
"Import timed out. Import took longer than #{IMPORT_EXPIRATION} seconds"
end
def try_obtain_lease
@uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain
end
def remove_lease
Gitlab::ExclusiveLease.cancel(EXCLUSIVE_LEASE_KEY, @uuid)
end
end

View file

@ -0,0 +1,4 @@
---
title: Periodically mark projects that are stuck in importing as failed
merge_request:
author:

View file

@ -349,6 +349,9 @@ Settings.cron_jobs['trending_projects_worker']['job_class'] = 'TrendingProjectsW
Settings.cron_jobs['remove_unreferenced_lfs_objects_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['remove_unreferenced_lfs_objects_worker']['cron'] ||= '20 0 * * *'
Settings.cron_jobs['remove_unreferenced_lfs_objects_worker']['job_class'] = 'RemoveUnreferencedLfsObjectsWorker'
Settings.cron_jobs['stuck_import_jobs_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['stuck_import_jobs_worker']['cron'] ||= '15 * * * *'
Settings.cron_jobs['stuck_import_jobs_worker']['job_class'] = 'StuckImportJobsWorker'
#
# GitLab Shell

View file

@ -0,0 +1,9 @@
class AddImportJidToProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :projects, :import_jid, :string
end
end

View file

@ -920,6 +920,7 @@ ActiveRecord::Schema.define(version: 20170402231018) do
t.text "description_html"
t.boolean "only_allow_merge_if_all_discussions_are_resolved"
t.boolean "printing_merge_request_link_enabled", default: true, null: false
t.string "import_jid"
end
add_index "projects", ["ci_id"], name: "index_projects_on_ci_id", using: :btree

View file

@ -72,6 +72,8 @@ module Gitlab
# job_ids - The Sidekiq job IDs to check.
#
# Returns an array of true or false indicating job completion.
# true = job is still running
# false = job completed
def self.job_status(job_ids)
keys = job_ids.map { |jid| key_for(jid) }
@ -82,6 +84,17 @@ module Gitlab
end
end
# Returns the JIDs that are completed
#
# job_ids - The Sidekiq job IDs to check.
#
# Returns an array of completed JIDs
def self.completed_jids(job_ids)
Sidekiq.redis do |redis|
job_ids.reject { |jid| redis.exists(key_for(jid)) }
end
end
def self.key_for(jid)
STATUS_KEY % jid
end

View file

@ -2,7 +2,9 @@ module Gitlab
module SidekiqStatus
class ClientMiddleware
def call(_, job, _, _)
Gitlab::SidekiqStatus.set(job['jid'])
status_expiration = job['status_expiration'] || Gitlab::SidekiqStatus::DEFAULT_EXPIRATION
Gitlab::SidekiqStatus.set(job['jid'], status_expiration)
yield
end
end

View file

@ -3,7 +3,7 @@ require 'spec_helper'
describe Gitlab::SidekiqStatus::ClientMiddleware do
describe '#call' do
it 'tracks the job in Redis' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123')
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', Gitlab::SidekiqStatus::DEFAULT_EXPIRATION)
described_class.new.
call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil }

View file

@ -73,4 +73,17 @@ describe Gitlab::SidekiqStatus do
expect(key).to include('123')
end
end
describe 'completed', :redis do
it 'returns the completed job' do
expect(described_class.completed_jids(%w(123))).to eq(['123'])
end
it 'returns only the jobs completed' do
described_class.set('123')
described_class.set('456')
expect(described_class.completed_jids(%w(123 456 789))).to eq(['789'])
end
end
end

View file

@ -23,10 +23,12 @@ describe RepositoryImportWorker do
error = %q{remote: Not Found fatal: repository 'https://user:pass@test.com/root/repoC.git/' not found }
expect_any_instance_of(Projects::ImportService).to receive(:execute).
and_return({ status: :error, message: error })
allow(subject).to receive(:jid).and_return('123')
subject.perform(project.id)
expect(project.reload.import_error).to include("https://*****:*****@test.com/root/repoC.git/")
expect(project.reload.import_jid).not_to be_nil
end
end
end

View file

@ -0,0 +1,36 @@
require 'spec_helper'
describe StuckImportJobsWorker do
let(:worker) { described_class.new }
let(:exclusive_lease_uuid) { SecureRandom.uuid }
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).and_return(exclusive_lease_uuid)
end
describe 'long running import' do
let(:project) { create(:empty_project, import_jid: '123', import_status: 'started') }
before do
allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return(['123'])
end
it 'marks the project as failed' do
expect { worker.perform }.to change { project.reload.import_status }.to('failed')
end
end
describe 'running import' do
let(:project) { create(:empty_project, import_jid: '123', import_status: 'started') }
before do
allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([])
end
it 'does not mark the project as failed' do
worker.perform
expect(project.reload.import_status).to eq('started')
end
end
end