Clear repository check columns asynchronously
This commit is contained in:
parent
9a30d3b5ae
commit
0f602be99f
|
@ -20,18 +20,14 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController
|
|||
end
|
||||
|
||||
def clear_repository_check_states
|
||||
Project.update_all(
|
||||
last_repository_check_failed: false,
|
||||
last_repository_check_at: nil
|
||||
)
|
||||
RepositoryCheck::ClearWorker.perform_async
|
||||
|
||||
redirect_to(
|
||||
admin_application_settings_path,
|
||||
notice: 'All repository check states were cleared.'
|
||||
notice: 'Started asynchronous removal of all repository check states.'
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
|
||||
def set_application_setting
|
||||
|
|
|
@ -32,7 +32,7 @@ class Admin::ProjectsController < Admin::ApplicationController
|
|||
end
|
||||
|
||||
def repository_check
|
||||
SingleRepositoryCheckWorker.perform_async(@project.id)
|
||||
RepositoryCheck::SingleRepositoryWorker.perform_async(@project.id)
|
||||
|
||||
redirect_to(
|
||||
admin_namespace_project_path(@project.namespace, @project),
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
module RepositoryCheck
|
||||
class BatchWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
RUN_TIME = 3600
|
||||
|
||||
sidekiq_options retry: false
|
||||
|
||||
def perform
|
||||
start = Time.now
|
||||
|
||||
# This loop will break after a little more than one hour ('a little
|
||||
# more' because `git fsck` may take a few minutes), or if it runs out of
|
||||
# projects to check. By default sidekiq-cron will start a new
|
||||
# RepositoryCheckWorker each hour so that as long as there are repositories to
|
||||
# check, only one (or two) will be checked at a time.
|
||||
project_ids.each do |project_id|
|
||||
break if Time.now - start >= RUN_TIME
|
||||
break unless current_settings.repository_checks_enabled
|
||||
|
||||
next unless try_obtain_lease(project_id)
|
||||
|
||||
SingleRepositoryWorker.new.perform(project_id)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Project.find_each does not support WHERE clauses and
|
||||
# Project.find_in_batches does not support ordering. So we just build an
|
||||
# array of ID's. This is OK because we do it only once an hour, because
|
||||
# getting ID's from Postgres is not terribly slow, and because no user
|
||||
# has to sit and wait for this query to finish.
|
||||
def project_ids
|
||||
limit = 10_000
|
||||
never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
|
||||
pluck(:id)
|
||||
old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago).
|
||||
reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
|
||||
never_checked_projects + old_check_projects
|
||||
end
|
||||
|
||||
def try_obtain_lease(id)
|
||||
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
|
||||
# super slow we definitely do not want to run it twice in parallel.
|
||||
Gitlab::ExclusiveLease.new(
|
||||
"project_repository_check:#{id}",
|
||||
timeout: 24.hours
|
||||
).try_obtain
|
||||
end
|
||||
|
||||
def current_settings
|
||||
# No caching of the settings! If we cache them and an admin disables
|
||||
# this feature, an active RepositoryCheckWorker would keep going for up
|
||||
# to 1 hour after the feature was disabled.
|
||||
if Rails.env.test?
|
||||
Gitlab::CurrentSettings.fake_application_settings
|
||||
else
|
||||
ApplicationSetting.current
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
module RepositoryCheck
|
||||
class ClearWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
sidekiq_options retry: false
|
||||
|
||||
def perform
|
||||
# Do batched updates because these updates will be slow and locking
|
||||
Project.select(:id).find_in_batches(batch_size: 1000) do |batch|
|
||||
Project.where(id: batch.map(&:id)).update_all(
|
||||
last_repository_check_failed: nil,
|
||||
last_repository_check_at: nil,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,36 @@
|
|||
module RepositoryCheck
|
||||
class SingleRepositoryWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
sidekiq_options retry: false
|
||||
|
||||
def perform(project_id)
|
||||
project = Project.find(project_id)
|
||||
project.update_columns(
|
||||
last_repository_check_failed: !check(project),
|
||||
last_repository_check_at: Time.now,
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def check(project)
|
||||
# Use 'map do', not 'all? do', to prevent short-circuiting
|
||||
[project.repository, project.wiki.repository].map do |repository|
|
||||
git_fsck(repository.path_to_repo)
|
||||
end.all?
|
||||
end
|
||||
|
||||
def git_fsck(path)
|
||||
cmd = %W(nice git --git-dir=#{path} fsck)
|
||||
output, status = Gitlab::Popen.popen(cmd)
|
||||
|
||||
if status.zero?
|
||||
true
|
||||
else
|
||||
Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}")
|
||||
false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,61 +0,0 @@
|
|||
class RepositoryCheckWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
RUN_TIME = 3600
|
||||
|
||||
sidekiq_options retry: false
|
||||
|
||||
def perform
|
||||
start = Time.now
|
||||
|
||||
# This loop will break after a little more than one hour ('a little
|
||||
# more' because `git fsck` may take a few minutes), or if it runs out of
|
||||
# projects to check. By default sidekiq-cron will start a new
|
||||
# RepositoryCheckWorker each hour so that as long as there are repositories to
|
||||
# check, only one (or two) will be checked at a time.
|
||||
project_ids.each do |project_id|
|
||||
break if Time.now - start >= RUN_TIME
|
||||
break unless current_settings.repository_checks_enabled
|
||||
|
||||
next unless try_obtain_lease(project_id)
|
||||
|
||||
SingleRepositoryCheckWorker.new.perform(project_id)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Project.find_each does not support WHERE clauses and
|
||||
# Project.find_in_batches does not support ordering. So we just build an
|
||||
# array of ID's. This is OK because we do it only once an hour, because
|
||||
# getting ID's from Postgres is not terribly slow, and because no user
|
||||
# has to sit and wait for this query to finish.
|
||||
def project_ids
|
||||
limit = 10_000
|
||||
never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
|
||||
pluck(:id)
|
||||
old_check_projects = Project.where('last_repository_check_at < ?', 1.week.ago).
|
||||
reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
|
||||
never_checked_projects + old_check_projects
|
||||
end
|
||||
|
||||
def try_obtain_lease(id)
|
||||
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
|
||||
# super slow we definitely do not want to run it twice in parallel.
|
||||
Gitlab::ExclusiveLease.new(
|
||||
"project_repository_check:#{id}",
|
||||
timeout: 24.hours
|
||||
).try_obtain
|
||||
end
|
||||
|
||||
def current_settings
|
||||
# No caching of the settings! If we cache them and an admin disables
|
||||
# this feature, an active RepositoryCheckWorker would keep going for up
|
||||
# to 1 hour after the feature was disabled.
|
||||
if Rails.env.test?
|
||||
Gitlab::CurrentSettings.fake_application_settings
|
||||
else
|
||||
ApplicationSetting.current
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,34 +0,0 @@
|
|||
class SingleRepositoryCheckWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
sidekiq_options retry: false
|
||||
|
||||
def perform(project_id)
|
||||
project = Project.find(project_id)
|
||||
project.update_columns(
|
||||
last_repository_check_failed: !check(project),
|
||||
last_repository_check_at: Time.now,
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def check(project)
|
||||
# Use 'map do', not 'all? do', to prevent short-circuiting
|
||||
[project.repository, project.wiki.repository].map do |repository|
|
||||
git_fsck(repository.path_to_repo)
|
||||
end.all?
|
||||
end
|
||||
|
||||
def git_fsck(path)
|
||||
cmd = %W(nice git --git-dir=#{path} fsck)
|
||||
output, status = Gitlab::Popen.popen(cmd)
|
||||
|
||||
if status.zero?
|
||||
true
|
||||
else
|
||||
Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}")
|
||||
false
|
||||
end
|
||||
end
|
||||
end
|
|
@ -241,7 +241,7 @@ Settings.cron_jobs['stuck_ci_builds_worker']['cron'] ||= '0 0 * * *'
|
|||
Settings.cron_jobs['stuck_ci_builds_worker']['job_class'] = 'StuckCiBuildsWorker'
|
||||
Settings.cron_jobs['repository_check_worker'] ||= Settingslogic.new({})
|
||||
Settings.cron_jobs['repository_check_worker']['cron'] ||= '20 * * * *'
|
||||
Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheckWorker'
|
||||
Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::BatchWorker'
|
||||
Settings.cron_jobs['admin_email_worker'] ||= Settingslogic.new({})
|
||||
Settings.cron_jobs['admin_email_worker']['cron'] ||= '0 0 * * *'
|
||||
Settings.cron_jobs['admin_email_worker']['job_class'] = 'AdminEmailWorker'
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
require 'rails_helper'
|
||||
|
||||
feature 'Admin uses repository checks', feature: true do
|
||||
before do
|
||||
login_as :admin
|
||||
end
|
||||
before { login_as :admin }
|
||||
|
||||
scenario 'to trigger a single check' do
|
||||
project = create(:empty_project)
|
||||
|
@ -17,7 +15,12 @@ feature 'Admin uses repository checks', feature: true do
|
|||
end
|
||||
|
||||
scenario 'to see a single failed repository check' do
|
||||
visit_admin_project_page(broken_project)
|
||||
project = create(:empty_project)
|
||||
project.update_columns(
|
||||
last_repository_check_failed: true,
|
||||
last_repository_check_at: Time.now,
|
||||
)
|
||||
visit_admin_project_page(project)
|
||||
|
||||
page.within('.alert') do
|
||||
expect(page.text).to match(/Last repository check \(.* ago\) failed/)
|
||||
|
@ -25,24 +28,16 @@ feature 'Admin uses repository checks', feature: true do
|
|||
end
|
||||
|
||||
scenario 'to clear all repository checks', js: true do
|
||||
project = broken_project
|
||||
visit admin_application_settings_path
|
||||
|
||||
expect(RepositoryCheck::ClearWorker).to receive(:perform_async)
|
||||
|
||||
click_link 'Clear all repository checks' # pop-up should be auto confirmed
|
||||
click_link 'Clear all repository checks'
|
||||
|
||||
expect(project.reload.last_repository_check_failed).to eq(false)
|
||||
expect(page).to have_content('Started asynchronous removal of all repository check states.')
|
||||
end
|
||||
|
||||
def visit_admin_project_page(project)
|
||||
visit admin_namespace_project_path(project.namespace, project)
|
||||
end
|
||||
|
||||
def broken_project
|
||||
project = create(:empty_project)
|
||||
project.update_columns(
|
||||
last_repository_check_failed: true,
|
||||
last_repository_check_at: Time.now,
|
||||
)
|
||||
project
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe RepositoryCheckWorker do
|
||||
subject { RepositoryCheckWorker.new }
|
||||
describe RepositoryCheck::BatchWorker do
|
||||
subject { described_class.new }
|
||||
|
||||
it 'prefers projects that have never been checked' do
|
||||
projects = create_list(:project, 3)
|
|
@ -0,0 +1,17 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe RepositoryCheck::ClearWorker do
|
||||
it 'clears repository check columns' do
|
||||
project = create(:empty_project)
|
||||
project.update_columns(
|
||||
last_repository_check_failed: true,
|
||||
last_repository_check_at: Time.now,
|
||||
)
|
||||
|
||||
described_class.new.perform
|
||||
project.reload
|
||||
|
||||
expect(project.last_repository_check_failed).to be_nil
|
||||
expect(project.last_repository_check_at).to be_nil
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue