diff --git a/app/views/admin/application_settings/_repository_check.html.haml b/app/views/admin/application_settings/_repository_check.html.haml index f33769b23c2..fe335f30a62 100644 --- a/app/views/admin/application_settings/_repository_check.html.haml +++ b/app/views/admin/application_settings/_repository_check.html.haml @@ -12,7 +12,7 @@ Enable Repository Checks .help-block GitLab will periodically run - %a{ href: 'https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html', target: 'blank' } 'git fsck' + %a{ href: 'https://git-scm.com/docs/git-fsck', target: 'blank' } 'git fsck' in all project and wiki repositories to look for silent disk corruption issues. .form-group .col-sm-offset-2.col-sm-10 diff --git a/app/workers/admin_email_worker.rb b/app/workers/admin_email_worker.rb index bec0a003a1c..044e470141e 100644 --- a/app/workers/admin_email_worker.rb +++ b/app/workers/admin_email_worker.rb @@ -3,6 +3,12 @@ class AdminEmailWorker include CronjobQueue def perform + send_repository_check_mail if Gitlab::CurrentSettings.repository_checks_enabled + end + + private + + def send_repository_check_mail repository_check_failed_count = Project.where(last_repository_check_failed: true).count return if repository_check_failed_count.zero? diff --git a/app/workers/repository_check/batch_worker.rb b/app/workers/repository_check/batch_worker.rb index 76688cf51c1..72f0a9b0619 100644 --- a/app/workers/repository_check/batch_worker.rb +++ b/app/workers/repository_check/batch_worker.rb @@ -4,8 +4,11 @@ module RepositoryCheck include CronjobQueue RUN_TIME = 3600 + BATCH_SIZE = 10_000 def perform + return unless Gitlab::CurrentSettings.repository_checks_enabled + start = Time.now # This loop will break after a little more than one hour ('a little @@ -15,7 +18,6 @@ module RepositoryCheck # check, only one (or two) will be checked at a time. project_ids.each do |project_id| break if Time.now - start >= RUN_TIME - break unless current_settings.repository_checks_enabled next unless try_obtain_lease(project_id) @@ -31,12 +33,20 @@ module RepositoryCheck # getting ID's from Postgres is not terribly slow, and because no user # has to sit and wait for this query to finish. def project_ids - limit = 10_000 - never_checked_projects = Project.where('last_repository_check_at IS NULL AND created_at < ?', 24.hours.ago) - .limit(limit).pluck(:id) - old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago) - .reorder('last_repository_check_at ASC').limit(limit).pluck(:id) - never_checked_projects + old_check_projects + never_checked_project_ids(BATCH_SIZE) + old_checked_project_ids(BATCH_SIZE) + end + + def never_checked_project_ids(batch_size) + Project.where(last_repository_check_at: nil) + .where('created_at < ?', 24.hours.ago) + .limit(batch_size).pluck(:id) + end + + def old_checked_project_ids(batch_size) + Project.where.not(last_repository_check_at: nil) + .where('last_repository_check_at < ?', 1.month.ago) + .reorder(last_repository_check_at: :asc) + .limit(batch_size).pluck(:id) end def try_obtain_lease(id) @@ -47,16 +57,5 @@ module RepositoryCheck timeout: 24.hours ).try_obtain end - - def current_settings - # No caching of the settings! If we cache them and an admin disables - # this feature, an active RepositoryCheckWorker would keep going for up - # to 1 hour after the feature was disabled. - if Rails.env.test? - Gitlab::CurrentSettings.fake_application_settings - else - ApplicationSetting.current - end - end end end diff --git a/app/workers/repository_check/single_repository_worker.rb b/app/workers/repository_check/single_repository_worker.rb index 116bc185b38..3cffb8b14e4 100644 --- a/app/workers/repository_check/single_repository_worker.rb +++ b/app/workers/repository_check/single_repository_worker.rb @@ -5,27 +5,34 @@ module RepositoryCheck def perform(project_id) project = Project.find(project_id) - project.update_columns( - last_repository_check_failed: !check(project), - last_repository_check_at: Time.now - ) + healthy = project_healthy?(project) + + update_repository_check_status(project, healthy) end private - def check(project) - if has_pushes?(project) && !git_fsck(project.repository) - false - elsif project.wiki_enabled? - # Historically some projects never had their wiki repos initialized; - # this happens on project creation now. Let's initialize an empty repo - # if it is not already there. - project.create_wiki + def update_repository_check_status(project, healthy) + project.update_columns( + last_repository_check_failed: !healthy, + last_repository_check_at: Time.now + ) + end - git_fsck(project.wiki.repository) - else - true - end + def project_healthy?(project) + repo_healthy?(project) && wiki_repo_healthy?(project) + end + + def repo_healthy?(project) + return true unless has_changes?(project) + + git_fsck(project.repository) + end + + def wiki_repo_healthy?(project) + return true unless has_wiki_changes?(project) + + git_fsck(project.wiki.repository) end def git_fsck(repository) @@ -39,8 +46,19 @@ module RepositoryCheck false end - def has_pushes?(project) + def has_changes?(project) Project.with_push.exists?(project.id) end + + def has_wiki_changes?(project) + return false unless project.wiki_enabled? + + # Historically some projects never had their wiki repos initialized; + # this happens on project creation now. Let's initialize an empty repo + # if it is not already there. + return false unless project.create_wiki + + has_changes?(project) + end end end diff --git a/changelogs/unreleased/tc-repo-verify-mails.yml b/changelogs/unreleased/tc-repo-verify-mails.yml new file mode 100644 index 00000000000..b4d3c4b1596 --- /dev/null +++ b/changelogs/unreleased/tc-repo-verify-mails.yml @@ -0,0 +1,5 @@ +--- +title: Small improvements to repository checks +merge_request: 18484 +author: +type: changed diff --git a/doc/administration/repository_checks.md b/doc/administration/repository_checks.md index ee37ea49874..efeec9db517 100644 --- a/doc/administration/repository_checks.md +++ b/doc/administration/repository_checks.md @@ -13,12 +13,12 @@ checks failed you can see their output on the admin log page under ## Periodic checks -When enabled, GitLab periodically runs a repository check on all project -repositories and wiki repositories in order to detect data corruption problems. +When enabled, GitLab periodically runs a repository check on all project +repositories and wiki repositories in order to detect data corruption. A project will be checked no more than once per month. If any projects fail their repository checks all GitLab administrators will receive an email -notification of the situation. This notification is sent out once a week on -Sunday, by default. +notification of the situation. This notification is sent out once a week, +by default, midnight at the start of Sunday. ## Disabling periodic checks @@ -28,16 +28,18 @@ panel. ## What to do if a check failed If the repository check fails for some repository you should look up the error -in repocheck.log (in the admin panel or on disk; see -`/var/log/gitlab/gitlab-rails` for Omnibus installations or -`/home/git/gitlab/log` for installations from source). Once you have -resolved the issue use the admin panel to trigger a new repository check on -the project. This will clear the 'check failed' state. +in `repocheck.log`: + +- in the [admin panel](logs.md#repocheck.log) +- or on disk, see: + - `/var/log/gitlab/gitlab-rails` for Omnibus installations + - `/home/git/gitlab/log` for installations from source If for some reason the periodic repository check caused a lot of false -alarms you can choose to clear ALL repository check states from the -'Settings' page of the admin panel. +alarms you can choose to clear *all* repository check states by +clicking "Clear all repository checks" on the **Settings** page of the +admin panel (`/admin/application_settings`). --- [ce-3232]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/3232 "Auto git fsck" -[git-fsck]: https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html "git fsck documentation" +[git-fsck]: https://git-scm.com/docs/git-fsck "git fsck documentation" diff --git a/spec/workers/admin_email_worker_spec.rb b/spec/workers/admin_email_worker_spec.rb new file mode 100644 index 00000000000..27687f069ea --- /dev/null +++ b/spec/workers/admin_email_worker_spec.rb @@ -0,0 +1,41 @@ +require 'spec_helper' + +describe AdminEmailWorker do + subject(:worker) { described_class.new } + + describe '.perform' do + it 'does not attempt to send repository check mail when they are disabled' do + stub_application_setting(repository_checks_enabled: false) + + expect(worker).not_to receive(:send_repository_check_mail) + + worker.perform + end + + context 'repository_checks enabled' do + before do + stub_application_setting(repository_checks_enabled: true) + end + + it 'checks if repository check mail should be sent' do + expect(worker).to receive(:send_repository_check_mail) + + worker.perform + end + + it 'does not send mail when there are no failed repos' do + expect(RepositoryCheckMailer).not_to receive(:notify) + + worker.perform + end + + it 'send mail when there is a failed repo' do + create(:project, last_repository_check_failed: true, last_repository_check_at: Date.yesterday) + + expect(RepositoryCheckMailer).to receive(:notify).and_return(spy) + + worker.perform + end + end + end +end diff --git a/spec/workers/repository_check/batch_worker_spec.rb b/spec/workers/repository_check/batch_worker_spec.rb index 850b8cd8f5c..6cd27d2fafb 100644 --- a/spec/workers/repository_check/batch_worker_spec.rb +++ b/spec/workers/repository_check/batch_worker_spec.rb @@ -31,8 +31,8 @@ describe RepositoryCheck::BatchWorker do it 'does nothing when repository checks are disabled' do create(:project, created_at: 1.week.ago) - current_settings = double('settings', repository_checks_enabled: false) - expect(subject).to receive(:current_settings) { current_settings } + + stub_application_setting(repository_checks_enabled: false) expect(subject.perform).to eq(nil) end diff --git a/spec/workers/repository_check/single_repository_worker_spec.rb b/spec/workers/repository_check/single_repository_worker_spec.rb index 1d9bbf2ca62..a021235aed6 100644 --- a/spec/workers/repository_check/single_repository_worker_spec.rb +++ b/spec/workers/repository_check/single_repository_worker_spec.rb @@ -2,44 +2,60 @@ require 'spec_helper' require 'fileutils' describe RepositoryCheck::SingleRepositoryWorker do - subject { described_class.new } + subject(:worker) { described_class.new } - it 'passes when the project has no push events' do - project = create(:project_empty_repo, :wiki_disabled) + it 'skips when the project has no push events' do + project = create(:project, :repository, :wiki_disabled) project.events.destroy_all - break_repo(project) + break_project(project) - subject.perform(project.id) + expect(worker).not_to receive(:git_fsck) + + worker.perform(project.id) expect(project.reload.last_repository_check_failed).to eq(false) end it 'fails when the project has push events and a broken repository' do - project = create(:project_empty_repo) + project = create(:project, :repository) create_push_event(project) - break_repo(project) + break_project(project) - subject.perform(project.id) + worker.perform(project.id) expect(project.reload.last_repository_check_failed).to eq(true) end + it 'succeeds when the project repo is valid' do + project = create(:project, :repository, :wiki_disabled) + create_push_event(project) + + expect(worker).to receive(:git_fsck).and_call_original + + expect do + worker.perform(project.id) + end.to change { project.reload.last_repository_check_at } + + expect(project.reload.last_repository_check_failed).to eq(false) + end + it 'fails if the wiki repository is broken' do - project = create(:project_empty_repo, :wiki_enabled) + project = create(:project, :repository, :wiki_enabled) project.create_wiki + create_push_event(project) # Test sanity: everything should be fine before the wiki repo is broken - subject.perform(project.id) + worker.perform(project.id) expect(project.reload.last_repository_check_failed).to eq(false) break_wiki(project) - subject.perform(project.id) + worker.perform(project.id) expect(project.reload.last_repository_check_failed).to eq(true) end it 'skips wikis when disabled' do - project = create(:project_empty_repo, :wiki_disabled) + project = create(:project, :wiki_disabled) # Make sure the test would fail if the wiki repo was checked break_wiki(project) @@ -49,8 +65,8 @@ describe RepositoryCheck::SingleRepositoryWorker do end it 'creates missing wikis' do - project = create(:project_empty_repo, :wiki_enabled) - FileUtils.rm_rf(wiki_path(project)) + project = create(:project, :wiki_enabled) + Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path) subject.perform(project.id) @@ -58,34 +74,39 @@ describe RepositoryCheck::SingleRepositoryWorker do end it 'does not create a wiki if the main repo does not exist at all' do - project = create(:project_empty_repo) - create_push_event(project) - FileUtils.rm_rf(project.repository.path_to_repo) - FileUtils.rm_rf(wiki_path(project)) + project = create(:project, :repository) + Gitlab::Shell.new.rm_directory(project.repository_storage, project.path) + Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path) subject.perform(project.id) - expect(File.exist?(wiki_path(project))).to eq(false) - end - - def break_wiki(project) - objects_dir = wiki_path(project) + '/objects' - - # Replace the /objects directory with a file so that the repo is - # invalid, _and_ 'git init' cannot fix it. - FileUtils.rm_rf(objects_dir) - FileUtils.touch(objects_dir) if File.directory?(wiki_path(project)) - end - - def wiki_path(project) - project.wiki.repository.path_to_repo + expect(Gitlab::Shell.new.exists?(project.repository_storage, project.wiki.path)).to eq(false) end def create_push_event(project) project.events.create(action: Event::PUSHED, author_id: create(:user).id) end - def break_repo(project) - FileUtils.rm_rf(File.join(project.repository.path_to_repo, 'objects')) + def break_wiki(project) + break_repo(wiki_path(project)) + end + + def wiki_path(project) + project.wiki.repository.path_to_repo + end + + def break_project(project) + break_repo(project.repository.path_to_repo) + end + + def break_repo(repo) + # Create or replace blob ffffffffffffffffffffffffffffffffffffffff with an empty file + # This will make the repo invalid, _and_ 'git init' cannot fix it. + path = File.join(repo, 'objects', 'ff') + file = File.join(path, 'ffffffffffffffffffffffffffffffffffffff') + + FileUtils.mkdir_p(path) + FileUtils.rm_f(file) + FileUtils.touch(file) end end