Merge branch 'fix-data-integrity-issue-with-repository-downloads-path' into 'master'
Avoid data-integrity issue when cleaning up repository archive cache ## What does this MR do? Sets the default value for `repository_downloads_path` if someone has it configured incorrectly, and it points to the path where repositories are stored. It's also replace invocation of `find` with Ruby code that matches old cached files in a better, and safe way to avoid data-integrity issues. ## Why was this MR needed? The `repository_downloads_path` is used by the `RepositoryArchiveCacheWorker` to remove outdated repository archives, if it points to the wrong directory can cause some data-integrity issue. ## What are the relevant issue numbers? Closes #14222 See merge request !5285
This commit is contained in:
commit
d2598f6273
|
@ -22,6 +22,7 @@ v 8.10.0 (unreleased)
|
|||
- Delete award emoji when deleting a user
|
||||
- Remove pinTo from Flash and make inline flash messages look nicer. !4854 (winniehell)
|
||||
- Add an API for downloading latest successful build from a particular branch or tag. !5347
|
||||
- Avoid data-integrity issue when cleaning up repository archive cache.
|
||||
- Add link to profile to commit avatar. !5163 (winniehell)
|
||||
- Wrap code blocks on Activies and Todos page. !4783 (winniehell)
|
||||
- Align flash messages with left side of page content. !4959 (winniehell)
|
||||
|
|
|
@ -11,16 +11,6 @@ class Repository
|
|||
|
||||
attr_accessor :path_with_namespace, :project
|
||||
|
||||
def self.clean_old_archives
|
||||
Gitlab::Metrics.measure(:clean_old_archives) do
|
||||
repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path
|
||||
|
||||
return unless File.directory?(repository_downloads_path)
|
||||
|
||||
Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete))
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(path_with_namespace, project)
|
||||
@path_with_namespace = path_with_namespace
|
||||
@project = project
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
class RepositoryArchiveCleanUpService
|
||||
LAST_MODIFIED_TIME_IN_MINUTES = 120
|
||||
|
||||
def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES)
|
||||
@mmin = mmin
|
||||
@path = Gitlab.config.gitlab.repository_downloads_path
|
||||
end
|
||||
|
||||
def execute
|
||||
Gitlab::Metrics.measure(:repository_archive_clean_up) do
|
||||
return unless File.directory?(path)
|
||||
|
||||
clean_up_old_archives
|
||||
clean_up_empty_directories
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
attr_reader :mmin, :path
|
||||
|
||||
def clean_up_old_archives
|
||||
run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete))
|
||||
end
|
||||
|
||||
def clean_up_empty_directories
|
||||
run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete))
|
||||
end
|
||||
|
||||
def run(cmd)
|
||||
Gitlab::Popen.popen(cmd)
|
||||
end
|
||||
end
|
|
@ -4,6 +4,6 @@ class RepositoryArchiveCacheWorker
|
|||
sidekiq_options queue: :default
|
||||
|
||||
def perform
|
||||
Repository.clean_old_archives
|
||||
RepositoryArchiveCleanUpService.new.execute
|
||||
end
|
||||
end
|
||||
|
|
|
@ -106,8 +106,8 @@ production: &base
|
|||
|
||||
## Repository downloads directory
|
||||
# When a user clicks e.g. 'Download zip' on a project, a temporary zip file is created in the following directory.
|
||||
# The default is 'tmp/repositories' relative to the root of the Rails app.
|
||||
# repository_downloads_path: tmp/repositories
|
||||
# The default is 'shared/cache/archive/' relative to the root of the Rails app.
|
||||
# repository_downloads_path: shared/cache/archive/
|
||||
|
||||
## Reply by email
|
||||
# Allow users to comment on issues and merge requests by replying to notification emails.
|
||||
|
|
|
@ -211,7 +211,6 @@ Settings.gitlab.default_projects_features['snippets'] = false if Setti
|
|||
Settings.gitlab.default_projects_features['builds'] = true if Settings.gitlab.default_projects_features['builds'].nil?
|
||||
Settings.gitlab.default_projects_features['container_registry'] = true if Settings.gitlab.default_projects_features['container_registry'].nil?
|
||||
Settings.gitlab.default_projects_features['visibility_level'] = Settings.send(:verify_constant, Gitlab::VisibilityLevel, Settings.gitlab.default_projects_features['visibility_level'], Gitlab::VisibilityLevel::PRIVATE)
|
||||
Settings.gitlab['repository_downloads_path'] = File.join(Settings.shared['path'], 'cache/archive') if Settings.gitlab['repository_downloads_path'].nil?
|
||||
Settings.gitlab['domain_whitelist'] ||= []
|
||||
Settings.gitlab['import_sources'] ||= %w[github bitbucket gitlab gitorious google_code fogbugz git gitlab_project]
|
||||
Settings.gitlab['trusted_proxies'] ||= []
|
||||
|
@ -315,6 +314,21 @@ Settings.repositories['storages'] ||= {}
|
|||
# Setting gitlab_shell.repos_path is DEPRECATED and WILL BE REMOVED in version 9.0
|
||||
Settings.repositories.storages['default'] ||= Settings.gitlab_shell['repos_path'] || Settings.gitlab['user_home'] + '/repositories/'
|
||||
|
||||
#
|
||||
# The repository_downloads_path is used to remove outdated repository
|
||||
# archives, if someone has it configured incorrectly, and it points
|
||||
# to the path where repositories are stored this can cause some
|
||||
# data-integrity issue. In this case, we sets it to the default
|
||||
# repository_downloads_path value.
|
||||
#
|
||||
repositories_storages_path = Settings.repositories.storages.values
|
||||
repository_downloads_path = Settings.gitlab['repository_downloads_path'].to_s.gsub(/\/$/, '')
|
||||
repository_downloads_full_path = File.expand_path(repository_downloads_path, Settings.gitlab['user_home'])
|
||||
|
||||
if repository_downloads_path.blank? || repositories_storages_path.any? { |path| [repository_downloads_path, repository_downloads_full_path].include?(path.gsub(/\/$/, '')) }
|
||||
Settings.gitlab['repository_downloads_path'] = File.join(Settings.shared['path'], 'cache/archive')
|
||||
end
|
||||
|
||||
#
|
||||
# Backup
|
||||
#
|
||||
|
|
|
@ -1172,30 +1172,6 @@ describe Repository, models: true do
|
|||
end
|
||||
end
|
||||
|
||||
describe '.clean_old_archives' do
|
||||
let(:path) { Gitlab.config.gitlab.repository_downloads_path }
|
||||
|
||||
context 'when the downloads directory does not exist' do
|
||||
it 'does not remove any archives' do
|
||||
expect(File).to receive(:directory?).with(path).and_return(false)
|
||||
|
||||
expect(Gitlab::Popen).not_to receive(:popen)
|
||||
|
||||
described_class.clean_old_archives
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the downloads directory exists' do
|
||||
it 'removes old archives' do
|
||||
expect(File).to receive(:directory?).with(path).and_return(true)
|
||||
|
||||
expect(Gitlab::Popen).to receive(:popen)
|
||||
|
||||
described_class.clean_old_archives
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#keep_around" do
|
||||
it "stores a reference to the specified commit sha so it isn't garbage collected" do
|
||||
repository.keep_around(sample_commit.id)
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe RepositoryArchiveCleanUpService, services: true do
|
||||
describe '#execute' do
|
||||
subject(:service) { described_class.new }
|
||||
|
||||
context 'when the downloads directory does not exist' do
|
||||
it 'does not remove any archives' do
|
||||
path = '/invalid/path/'
|
||||
stub_repository_downloads_path(path)
|
||||
|
||||
expect(File).to receive(:directory?).with(path).and_return(false)
|
||||
expect(service).not_to receive(:clean_up_old_archives)
|
||||
expect(service).not_to receive(:clean_up_empty_directories)
|
||||
|
||||
service.execute
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the downloads directory exists' do
|
||||
shared_examples 'invalid archive files' do |dirname, extensions, mtime|
|
||||
it 'does not remove files and directoy' do
|
||||
in_directory_with_files(dirname, extensions, mtime) do |dir, files|
|
||||
service.execute
|
||||
|
||||
files.each { |file| expect(File.exist?(file)).to eq true }
|
||||
expect(File.directory?(dir)).to eq true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it 'removes files older than 2 hours that matches valid archive extensions' do
|
||||
in_directory_with_files('sample.git', %w[tar tar.bz2 tar.gz zip], 2.hours) do |dir, files|
|
||||
service.execute
|
||||
|
||||
files.each { |file| expect(File.exist?(file)).to eq false }
|
||||
expect(File.directory?(dir)).to eq false
|
||||
end
|
||||
end
|
||||
|
||||
context 'with files older than 2 hours that does not matches valid archive extensions' do
|
||||
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 2.hours
|
||||
end
|
||||
|
||||
context 'with files older than 2 hours inside invalid directories' do
|
||||
it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours
|
||||
end
|
||||
|
||||
context 'with files newer than 2 hours that matches valid archive extensions' do
|
||||
it_behaves_like 'invalid archive files', 'sample.git', %w[tar tar.bz2 tar.gz zip], 1.hour
|
||||
end
|
||||
|
||||
context 'with files newer than 2 hours that does not matches valid archive extensions' do
|
||||
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb], 1.hour
|
||||
end
|
||||
|
||||
context 'with files newer than 2 hours inside invalid directories' do
|
||||
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour
|
||||
end
|
||||
end
|
||||
|
||||
def in_directory_with_files(dirname, extensions, mtime)
|
||||
Dir.mktmpdir do |tmpdir|
|
||||
stub_repository_downloads_path(tmpdir)
|
||||
dir = File.join(tmpdir, dirname)
|
||||
files = create_temporary_files(dir, extensions, mtime)
|
||||
|
||||
yield(dir, files)
|
||||
end
|
||||
end
|
||||
|
||||
def stub_repository_downloads_path(path)
|
||||
allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
|
||||
end
|
||||
|
||||
def create_temporary_files(dir, extensions, mtime)
|
||||
FileUtils.mkdir_p(dir)
|
||||
FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue