gitlab-org--gitlab-foss/lib/gitlab/cleanup/project_uploads.rb

132 lines
4.0 KiB
Ruby

# frozen_string_literal: true
module Gitlab
module Cleanup
class ProjectUploads
LOST_AND_FOUND = File.join(ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR, '-', 'project-lost-found')
attr_reader :logger
def initialize(logger: nil)
@logger = logger || Gitlab::AppLogger
end
def run!(dry_run: true)
logger.info "Looking for orphaned project uploads to clean up#{'. Dry run' if dry_run}..."
each_orphan_file do |path, upload_path|
result = cleanup(path, upload_path, dry_run)
logger.info result
end
end
private
def cleanup(path, upload_path, dry_run)
# This happened in staging:
# `find` returned a path on which `File.delete` raised `Errno::ENOENT`
return "Cannot find file: #{path}" unless File.exist?(path)
correct_path = upload_path && find_correct_path(upload_path)
if correct_path
move(path, correct_path, 'fix', dry_run)
else
move_to_lost_and_found(path, dry_run)
end
end
# Accepts a path in the form of "#{hex_secret}/#{filename}"
# rubocop: disable CodeReuse/ActiveRecord
def find_correct_path(upload_path)
upload = Upload.find_by(uploader: 'FileUploader', path: upload_path)
return unless upload && upload.local? && upload.model
upload.absolute_path
rescue StandardError => e
logger.error e.message
# absolute_path depends on a lot of code. If it doesn't work, then it
# it doesn't matter if the upload file is in the right place. Treat it
# as uncorrectable.
# I.e. the project record might be missing, which raises an exception.
nil
end
# rubocop: enable CodeReuse/ActiveRecord
def move_to_lost_and_found(path, dry_run)
new_path = path.sub(/\A#{ProjectUploadFileFinder::ABSOLUTE_UPLOAD_DIR}/o, LOST_AND_FOUND)
move(path, new_path, 'move to lost and found', dry_run)
end
def move(path, new_path, prefix, dry_run)
action = "#{prefix} #{path} -> #{new_path}"
if dry_run
"Can #{action}"
else
begin
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(path, new_path)
"Did #{action}"
rescue StandardError => e
"Error during #{action}: #{e.inspect}"
end
end
end
# Yields absolute paths of project upload files that are not in the
# uploads table
def each_orphan_file
ProjectUploadFileFinder.new.each_file_batch do |file_paths|
logger.debug "Processing batch of #{file_paths.size} project upload file paths, starting with #{file_paths.first}"
file_paths.each do |path|
pup = ProjectUploadPath.from_path(path)
yield(path, pup.upload_path) if pup.orphan?
end
end
end
class ProjectUploadPath
PROJECT_FULL_PATH_REGEX = %r{\A#{FileUploader.root}/(.+)/(\h+/[^/]+)\z}.freeze
attr_reader :full_path, :upload_path
def initialize(full_path, upload_path)
@full_path = full_path
@upload_path = upload_path
end
def self.from_path(path)
path_matched = path.match(PROJECT_FULL_PATH_REGEX)
return new(nil, nil) unless path_matched
new(path_matched[1], path_matched[2])
end
# rubocop: disable CodeReuse/ActiveRecord
def orphan?
return true if full_path.nil? || upload_path.nil?
# It's possible to reduce to one query, but `where_full_path_in` is complex
!Upload.exists?(path: upload_path, model_id: project_id, model_type: 'Project', uploader: 'FileUploader')
end
# rubocop: enable CodeReuse/ActiveRecord
private
# rubocop: disable CodeReuse/ActiveRecord
def project_id
@project_id ||= Project.where_full_path_in([full_path]).pluck(:id)
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
end
end