diff --git a/app/models/repository.rb b/app/models/repository.rb index 0784891d1bf..e4202505634 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -956,6 +956,10 @@ class Repository blob_data_at(sha, path) end + def lfsconfig_for(sha) + blob_data_at(sha, '.lfsconfig') + end + def fetch_ref(source_repository, source_ref:, target_ref:) raw_repository.fetch_ref(source_repository.raw_repository, source_ref: source_ref, target_ref: target_ref) end diff --git a/app/services/base_service.rb b/app/services/base_service.rb index 6883ba36c71..3519b7c5e7d 100644 --- a/app/services/base_service.rb +++ b/app/services/base_service.rb @@ -3,7 +3,7 @@ class BaseService attr_accessor :project, :current_user, :params - def initialize(project, user, params = {}) + def initialize(project, user = nil, params = {}) @project, @current_user, @params = project, user, params.dup end diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb index 00080717600..1781a01cbd4 100644 --- a/app/services/projects/import_service.rb +++ b/app/services/projects/import_service.rb @@ -17,6 +17,8 @@ module Projects def execute add_repository_to_project + download_lfs_objects + import_data success @@ -37,7 +39,7 @@ module Projects # We should skip the repository for a GitHub import or GitLab project import, # because these importers fetch the project repositories for us. - return if has_importer? && importer_class.try(:imports_repository?) + return if importer_imports_repository? if unknown_url? # In this case, we only want to import issues, not a repository. @@ -73,6 +75,27 @@ module Projects end end + def download_lfs_objects + # In this case, we only want to import issues + return if unknown_url? + + # If it has its own repository importer, it has to implements its own lfs import download + return if importer_imports_repository? + + return unless project.lfs_enabled? + + oids_to_download = Projects::LfsPointers::LfsImportService.new(project).execute + download_service = Projects::LfsPointers::LfsDownloadService.new(project) + + oids_to_download.each do |oid, link| + download_service.execute(oid, link) + end + rescue => e + # Right now, to avoid aborting the importing process, we silently fail + # if any exception raises. + Rails.logger.error("The Lfs import process failed. #{e.message}") + end + def import_data return unless has_importer? @@ -98,5 +121,9 @@ module Projects def unknown_url? project.import_url == Project::UNKNOWN_IMPORT_URL end + + def importer_imports_repository? + has_importer? && importer_class.try(:imports_repository?) + end end end diff --git a/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb new file mode 100644 index 00000000000..d9fb74b090e --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb @@ -0,0 +1,93 @@ +# This service lists the download link from a remote source based on the +# oids provided +module Projects + module LfsPointers + class LfsDownloadLinkListService < BaseService + DOWNLOAD_ACTION = 'download'.freeze + + DownloadLinksError = Class.new(StandardError) + DownloadLinkNotFound = Class.new(StandardError) + + attr_reader :remote_uri + + def initialize(project, remote_uri: nil) + super(project) + + @remote_uri = remote_uri + end + + # This method accepts two parameters: + # - oids: hash of oids to query. The structure is { lfs_file_oid => lfs_file_size } + # + # Returns a hash with the structure { lfs_file_oids => download_link } + def execute(oids) + return {} unless project&.lfs_enabled? && remote_uri && oids.present? + + get_download_links(oids) + end + + private + + def get_download_links(oids) + response = Gitlab::HTTP.post(remote_uri, + body: request_body(oids), + headers: headers) + + raise DownloadLinksError, response.message unless response.success? + + parse_response_links(response['objects']) + end + + def parse_response_links(objects_response) + objects_response.each_with_object({}) do |entry, link_list| + begin + oid = entry['oid'] + link = entry.dig('actions', DOWNLOAD_ACTION, 'href') + + raise DownloadLinkNotFound unless link + + link_list[oid] = add_credentials(link) + rescue DownloadLinkNotFound, URI::InvalidURIError + Rails.logger.error("Link for Lfs Object with oid #{oid} not found or invalid.") + end + end + end + + def request_body(oids) + { + operation: DOWNLOAD_ACTION, + objects: oids.map { |oid, size| { oid: oid, size: size } } + }.to_json + end + + def headers + { + 'Accept' => LfsRequest::CONTENT_TYPE, + 'Content-Type' => LfsRequest::CONTENT_TYPE + }.freeze + end + + def add_credentials(link) + uri = URI.parse(link) + + if should_add_credentials?(uri) + uri.user = remote_uri.user + uri.password = remote_uri.password + end + + uri.to_s + end + + # The download link can be a local url or an object storage url + # If the download link has the some host as the import url then + # we add the same credentials because we may need them + def should_add_credentials?(link_uri) + url_credentials? && link_uri.host == remote_uri.host + end + + def url_credentials? + remote_uri.user.present? || remote_uri.password.present? + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_download_service.rb b/app/services/projects/lfs_pointers/lfs_download_service.rb new file mode 100644 index 00000000000..6ea43561d61 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_download_service.rb @@ -0,0 +1,58 @@ +# This service downloads and links lfs objects from a remote URL +module Projects + module LfsPointers + class LfsDownloadService < BaseService + def execute(oid, url) + return unless project&.lfs_enabled? && oid.present? && url.present? + + return if LfsObject.exists?(oid: oid) + + sanitized_uri = Gitlab::UrlSanitizer.new(url) + + with_tmp_file(oid) do |file| + size = download_and_save_file(file, sanitized_uri) + lfs_object = LfsObject.new(oid: oid, size: size, file: file) + + project.all_lfs_objects << lfs_object + end + rescue StandardError => e + Rails.logger.error("LFS file with oid #{oid} could't be downloaded from #{sanitized_uri.sanitized_url}: #{e.message}") + end + + private + + def download_and_save_file(file, sanitized_uri) + IO.copy_stream(open(sanitized_uri.sanitized_url, headers(sanitized_uri)), file) + end + + def headers(sanitized_uri) + {}.tap do |headers| + credentials = sanitized_uri.credentials + + if credentials[:user].present? || credentials[:password].present? + # Using authentication headers in the request + headers[:http_basic_authentication] = [credentials[:user], credentials[:password]] + end + end + end + + def with_tmp_file(oid) + create_tmp_storage_dir + + File.open(File.join(tmp_storage_dir, oid), 'w') { |file| yield file } + end + + def create_tmp_storage_dir + FileUtils.makedirs(tmp_storage_dir) unless Dir.exist?(tmp_storage_dir) + end + + def tmp_storage_dir + @tmp_storage_dir ||= File.join(storage_dir, 'tmp', 'download') + end + + def storage_dir + @storage_dir ||= Gitlab.config.lfs.storage_path + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_import_service.rb b/app/services/projects/lfs_pointers/lfs_import_service.rb new file mode 100644 index 00000000000..b6b0dec142f --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_import_service.rb @@ -0,0 +1,92 @@ +# This service manages the whole worflow of discovering the Lfs files in a +# repository, linking them to the project and downloading (and linking) the non +# existent ones. +module Projects + module LfsPointers + class LfsImportService < BaseService + include Gitlab::Utils::StrongMemoize + + HEAD_REV = 'HEAD'.freeze + LFS_ENDPOINT_PATTERN = /^\t?url\s*=\s*(.+)$/.freeze + LFS_BATCH_API_ENDPOINT = '/info/lfs/objects/batch'.freeze + + LfsImportError = Class.new(StandardError) + + def execute + return {} unless project&.lfs_enabled? + + if external_lfs_endpoint? + # If the endpoint host is different from the import_url it means + # that the repo is using a third party service for storing the LFS files. + # In this case, we have to disable lfs in the project + disable_lfs! + + return {} + end + + get_download_links + rescue LfsDownloadLinkListService::DownloadLinksError => e + raise LfsImportError, "The LFS objects download list couldn't be imported. Error: #{e.message}" + end + + private + + def external_lfs_endpoint? + lfsconfig_endpoint_uri && lfsconfig_endpoint_uri.host != import_uri.host + end + + def disable_lfs! + project.update(lfs_enabled: false) + end + + def get_download_links + existent_lfs = LfsListService.new(project).execute + linked_oids = LfsLinkService.new(project).execute(existent_lfs.keys) + + # Retrieving those oids not linked and which we need to download + not_linked_lfs = existent_lfs.except(*linked_oids) + + LfsDownloadLinkListService.new(project, remote_uri: current_endpoint_uri).execute(not_linked_lfs) + end + + def lfsconfig_endpoint_uri + strong_memoize(:lfsconfig_endpoint_uri) do + # Retrieveing the blob data from the .lfsconfig file + data = project.repository.lfsconfig_for(HEAD_REV) + # Parsing the data to retrieve the url + parsed_data = data&.match(LFS_ENDPOINT_PATTERN) + + if parsed_data + URI.parse(parsed_data[1]).tap do |endpoint| + endpoint.user ||= import_uri.user + endpoint.password ||= import_uri.password + end + end + end + rescue URI::InvalidURIError + raise LfsImportError, 'Invalid URL in .lfsconfig file' + end + + def import_uri + @import_uri ||= URI.parse(project.import_url) + rescue URI::InvalidURIError + raise LfsImportError, 'Invalid project import URL' + end + + def current_endpoint_uri + (lfsconfig_endpoint_uri || default_endpoint_uri) + end + + # The import url must end with '.git' here we ensure it is + def default_endpoint_uri + @default_endpoint_uri ||= begin + import_uri.dup.tap do |uri| + path = uri.path.gsub(%r(/$), '') + path += '.git' unless path.ends_with?('.git') + uri.path = path + LFS_BATCH_API_ENDPOINT + end + end + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_link_service.rb b/app/services/projects/lfs_pointers/lfs_link_service.rb new file mode 100644 index 00000000000..d20bdf86c58 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_link_service.rb @@ -0,0 +1,29 @@ +# Given a list of oids, this services links the existent Lfs Objects to the project +module Projects + module LfsPointers + class LfsLinkService < BaseService + # Accept an array of oids to link + # + # Returns a hash with the same structure with oids linked + def execute(oids) + return {} unless project&.lfs_enabled? + + # Search and link existing LFS Object + link_existing_lfs_objects(oids) + end + + private + + def link_existing_lfs_objects(oids) + existent_lfs_objects = LfsObject.where(oid: oids) + + return [] unless existent_lfs_objects.any? + + not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) + project.all_lfs_objects << not_linked_lfs_objects + + existent_lfs_objects.pluck(:oid) + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_list_service.rb b/app/services/projects/lfs_pointers/lfs_list_service.rb new file mode 100644 index 00000000000..b770982cbc0 --- /dev/null +++ b/app/services/projects/lfs_pointers/lfs_list_service.rb @@ -0,0 +1,19 @@ +# This service list all existent Lfs objects in a repository +module Projects + module LfsPointers + class LfsListService < BaseService + REV = 'HEAD'.freeze + + # Retrieve all lfs blob pointers and returns a hash + # with the structure { lfs_file_oid => lfs_file_size } + def execute + return {} unless project&.lfs_enabled? + + Gitlab::Git::LfsChanges.new(project.repository, REV) + .all_pointers + .map! { |blob| [blob.lfs_oid, blob.lfs_size] } + .to_h + end + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index e42995d9a28..30b6796a7d6 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -31,12 +31,14 @@ - github_importer:github_import_import_diff_note - github_importer:github_import_import_issue - github_importer:github_import_import_note +- github_importer:github_import_import_lfs_object - github_importer:github_import_import_pull_request - github_importer:github_import_refresh_import_jid - github_importer:github_import_stage_finish_import - github_importer:github_import_stage_import_base_data - github_importer:github_import_stage_import_issues_and_diff_notes - github_importer:github_import_stage_import_notes +- github_importer:github_import_stage_import_lfs_objects - github_importer:github_import_stage_import_pull_requests - github_importer:github_import_stage_import_repository diff --git a/app/workers/gitlab/github_import/advance_stage_worker.rb b/app/workers/gitlab/github_import/advance_stage_worker.rb index 8d708e15a66..be0b6c180b0 100644 --- a/app/workers/gitlab/github_import/advance_stage_worker.rb +++ b/app/workers/gitlab/github_import/advance_stage_worker.rb @@ -21,6 +21,7 @@ module Gitlab STAGES = { issues_and_diff_notes: Stage::ImportIssuesAndDiffNotesWorker, notes: Stage::ImportNotesWorker, + lfs_objects: Stage::ImportLfsObjectsWorker, finish: Stage::FinishImportWorker }.freeze diff --git a/app/workers/gitlab/github_import/import_lfs_object_worker.rb b/app/workers/gitlab/github_import/import_lfs_object_worker.rb new file mode 100644 index 00000000000..520c5cb091a --- /dev/null +++ b/app/workers/gitlab/github_import/import_lfs_object_worker.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class ImportLfsObjectWorker + include ObjectImporter + + def representation_class + Representation::LfsObject + end + + def importer_class + Importer::LfsObjectImporter + end + + def counter_name + :github_importer_imported_lfs_objects + end + + def counter_description + 'The number of imported GitHub Lfs Objects' + end + end + end +end diff --git a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb new file mode 100644 index 00000000000..29257603a9d --- /dev/null +++ b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Stage + class ImportLfsObjectsWorker + include ApplicationWorker + include GithubImport::Queue + include StageMethods + + def perform(project_id) + return unless (project = find_project(project_id)) + + import(project) + end + + # project - An instance of Project. + def import(project) + waiter = Importer::LfsObjectsImporter + .new(project, nil) + .execute + + AdvanceStageWorker.perform_async( + project.id, + { waiter.key => waiter.jobs_remaining }, + :finish + ) + end + end + end + end +end diff --git a/app/workers/gitlab/github_import/stage/import_notes_worker.rb b/app/workers/gitlab/github_import/stage/import_notes_worker.rb index 5f4678a595f..ccf0013180d 100644 --- a/app/workers/gitlab/github_import/stage/import_notes_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_notes_worker.rb @@ -18,7 +18,7 @@ module Gitlab AdvanceStageWorker.perform_async( project.id, { waiter.key => waiter.jobs_remaining }, - :finish + :lfs_objects ) end end diff --git a/changelogs/unreleased/fj-40401-support-import-lfs-objects.yml b/changelogs/unreleased/fj-40401-support-import-lfs-objects.yml new file mode 100644 index 00000000000..a8abdd943ba --- /dev/null +++ b/changelogs/unreleased/fj-40401-support-import-lfs-objects.yml @@ -0,0 +1,5 @@ +--- +title: Added support for LFS Download in the importing process +merge_request: 18871 +author: +type: fixed diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index d1b13ca2342..93f9adaf1f1 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -1543,7 +1543,7 @@ module Gitlab end end - def rev_list(including: [], excluding: [], objects: false, &block) + def rev_list(including: [], excluding: [], options: [], objects: false, &block) args = ['rev-list'] args.push(*rev_list_param(including)) @@ -1556,6 +1556,10 @@ module Gitlab args.push('--objects') if objects + if options.any? + args.push(*options) + end + run_git!(args, lazy_block: block) end diff --git a/lib/gitlab/git/rev_list.rb b/lib/gitlab/git/rev_list.rb index 38c3a55f96f..79544ccf13d 100644 --- a/lib/gitlab/git/rev_list.rb +++ b/lib/gitlab/git/rev_list.rb @@ -38,7 +38,10 @@ module Gitlab end def all_objects(require_path: nil, &lazy_block) - get_objects(including: :all, require_path: require_path, &lazy_block) + get_objects(including: :all, + options: ["--filter=blob:limit=#{Gitlab::Git::Blob::LFS_POINTER_MAX_SIZE}"], + require_path: require_path, + &lazy_block) end # This methods returns an array of missed references @@ -54,8 +57,8 @@ module Gitlab repository.rev_list(args).split("\n") end - def get_objects(including: [], excluding: [], require_path: nil) - opts = { including: including, excluding: excluding, objects: true } + def get_objects(including: [], excluding: [], options: [], require_path: nil) + opts = { including: including, excluding: excluding, options: options, objects: true } repository.rev_list(opts) do |lazy_output| objects = objects_from_output(lazy_output, require_path: require_path) diff --git a/lib/gitlab/github_import/importer/lfs_object_importer.rb b/lib/gitlab/github_import/importer/lfs_object_importer.rb new file mode 100644 index 00000000000..a88c17aaf82 --- /dev/null +++ b/lib/gitlab/github_import/importer/lfs_object_importer.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LfsObjectImporter + attr_reader :lfs_object, :project + + # lfs_object - An instance of `Gitlab::GithubImport::Representation::LfsObject`. + # project - An instance of `Project`. + def initialize(lfs_object, project, _) + @lfs_object = lfs_object + @project = project + end + + def execute + Projects::LfsPointers::LfsDownloadService + .new(project) + .execute(lfs_object.oid, lfs_object.download_link) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/lfs_objects_importer.rb b/lib/gitlab/github_import/importer/lfs_objects_importer.rb new file mode 100644 index 00000000000..6046e30d4ef --- /dev/null +++ b/lib/gitlab/github_import/importer/lfs_objects_importer.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LfsObjectsImporter + include ParallelScheduling + + def importer_class + LfsObjectImporter + end + + def representation_class + Representation::LfsObject + end + + def sidekiq_worker_class + ImportLfsObjectWorker + end + + def collection_method + :lfs_objects + end + + def each_object_to_import + lfs_objects = Projects::LfsPointers::LfsImportService.new(project).execute + + lfs_objects.each do |object| + yield object + end + rescue StandardError => e + Rails.logger.error("The Lfs import process failed. #{e.message}") + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/lfs_object.rb b/lib/gitlab/github_import/representation/lfs_object.rb new file mode 100644 index 00000000000..debe0fa0baf --- /dev/null +++ b/lib/gitlab/github_import/representation/lfs_object.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class LfsObject + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :oid, :download_link + + # Builds a lfs_object + def self.from_api_response(lfs_object) + new({ oid: lfs_object[0], download_link: lfs_object[1] }) + end + + # Builds a new lfs_object using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Representation.symbolize_hash(raw_hash)) + end + + # attributes - A Hash containing the raw lfs_object details. The keys of this + # Hash must be Symbols. + def initialize(attributes) + @attributes = attributes + end + end + end + end +end diff --git a/lib/gitlab/github_import/sequential_importer.rb b/lib/gitlab/github_import/sequential_importer.rb index 4f7324536a0..3cad919b4eb 100644 --- a/lib/gitlab/github_import/sequential_importer.rb +++ b/lib/gitlab/github_import/sequential_importer.rb @@ -19,7 +19,8 @@ module Gitlab Importer::PullRequestsImporter, Importer::IssuesImporter, Importer::DiffNotesImporter, - Importer::NotesImporter + Importer::NotesImporter, + Importer::LfsObjectsImporter ].freeze # project - The project to import the data into. diff --git a/spec/lib/gitlab/git/rev_list_spec.rb b/spec/lib/gitlab/git/rev_list_spec.rb index 95dc47e2a00..70e90659b0f 100644 --- a/spec/lib/gitlab/git/rev_list_spec.rb +++ b/spec/lib/gitlab/git/rev_list_spec.rb @@ -88,7 +88,7 @@ describe Gitlab::Git::RevList do context '#all_objects' do it 'fetches list of all pushed objects using rev-list' do - stub_popen_rev_list('--all', '--objects', output: "sha1\nsha2") + stub_popen_rev_list('--all', '--objects', '--filter=blob:limit=200', output: "sha1\nsha2") expect { |b| rev_list.all_objects(&b) }.to yield_with_args(%w[sha1 sha2]) end diff --git a/spec/lib/gitlab/github_import/importer/lfs_object_importer_spec.rb b/spec/lib/gitlab/github_import/importer/lfs_object_importer_spec.rb new file mode 100644 index 00000000000..4857f2afbe2 --- /dev/null +++ b/spec/lib/gitlab/github_import/importer/lfs_object_importer_spec.rb @@ -0,0 +1,23 @@ +require 'spec_helper' + +describe Gitlab::GithubImport::Importer::LfsObjectImporter do + let(:project) { create(:project) } + let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" } + + let(:github_lfs_object) do + Gitlab::GithubImport::Representation::LfsObject.new( + oid: 'oid', download_link: download_link + ) + end + + let(:importer) { described_class.new(github_lfs_object, project, nil) } + + describe '#execute' do + it 'calls the LfsDownloadService with the lfs object attributes' do + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService) + .to receive(:execute).with('oid', download_link) + + importer.execute + end + end +end diff --git a/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb b/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb new file mode 100644 index 00000000000..5f5c6b803c0 --- /dev/null +++ b/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb @@ -0,0 +1,94 @@ +require 'spec_helper' + +describe Gitlab::GithubImport::Importer::LfsObjectsImporter do + let(:project) { double(:project, id: 4, import_source: 'foo/bar') } + let(:client) { double(:client) } + let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" } + + let(:github_lfs_object) { ['oid', download_link] } + + describe '#parallel?' do + it 'returns true when running in parallel mode' do + importer = described_class.new(project, client) + expect(importer).to be_parallel + end + + it 'returns false when running in sequential mode' do + importer = described_class.new(project, client, parallel: false) + expect(importer).not_to be_parallel + end + end + + describe '#execute' do + context 'when running in parallel mode' do + it 'imports lfs objects in parallel' do + importer = described_class.new(project, client) + + expect(importer).to receive(:parallel_import) + + importer.execute + end + end + + context 'when running in sequential mode' do + it 'imports lfs objects in sequence' do + importer = described_class.new(project, client, parallel: false) + + expect(importer).to receive(:sequential_import) + + importer.execute + end + end + end + + describe '#sequential_import' do + it 'imports each lfs object in sequence' do + importer = described_class.new(project, client, parallel: false) + lfs_object_importer = double(:lfs_object_importer) + + allow(importer) + .to receive(:each_object_to_import) + .and_yield(['oid', download_link]) + + expect(Gitlab::GithubImport::Importer::LfsObjectImporter) + .to receive(:new) + .with( + an_instance_of(Gitlab::GithubImport::Representation::LfsObject), + project, + client + ) + .and_return(lfs_object_importer) + + expect(lfs_object_importer).to receive(:execute) + + importer.sequential_import + end + end + + describe '#parallel_import' do + it 'imports each lfs object in parallel' do + importer = described_class.new(project, client) + + allow(importer) + .to receive(:each_object_to_import) + .and_yield(github_lfs_object) + + expect(Gitlab::GithubImport::ImportLfsObjectWorker) + .to receive(:perform_async) + .with(project.id, an_instance_of(Hash), an_instance_of(String)) + + waiter = importer.parallel_import + + expect(waiter).to be_an_instance_of(Gitlab::JobWaiter) + expect(waiter.jobs_remaining).to eq(1) + end + end + + describe '#collection_options' do + it 'returns an empty Hash' do + importer = described_class.new(project, client) + + expect(importer.collection_options).to eq({}) + end + end +end diff --git a/spec/lib/gitlab/github_import/importer/repository_importer_spec.rb b/spec/lib/gitlab/github_import/importer/repository_importer_spec.rb index cc9e4b67e72..d8f01dcb76b 100644 --- a/spec/lib/gitlab/github_import/importer/repository_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/repository_importer_spec.rb @@ -14,7 +14,8 @@ describe Gitlab::GithubImport::Importer::RepositoryImporter do disk_path: 'foo', repository: repository, create_wiki: true, - import_state: import_state + import_state: import_state, + lfs_enabled?: true ) end diff --git a/spec/lib/gitlab/import_sources_spec.rb b/spec/lib/gitlab/import_sources_spec.rb index f2fa315e3ec..10341486512 100644 --- a/spec/lib/gitlab/import_sources_spec.rb +++ b/spec/lib/gitlab/import_sources_spec.rb @@ -91,4 +91,23 @@ describe Gitlab::ImportSources do end end end + + describe 'imports_repository? checker' do + let(:allowed_importers) { %w[github gitlab_project] } + + it 'fails if any importer other than the allowed ones implements this method' do + current_importers = described_class.values.select { |kind| described_class.importer(kind).try(:imports_repository?) } + not_allowed_importers = current_importers - allowed_importers + + expect(not_allowed_importers).to be_empty, failure_message(not_allowed_importers) + end + + def failure_message(importers_class_names) + <<-MSG + It looks like the #{importers_class_names.join(', ')} importers implements its own way to import the repository. + That means that the lfs object download must be handled for each of them. You can use 'LfsImportService' and + 'LfsDownloadService' to implement it. After that, add the importer name to the list of allowed importers in this spec. + MSG + end + end end diff --git a/spec/services/projects/import_service_spec.rb b/spec/services/projects/import_service_spec.rb index 30c89ebd821..b3815045792 100644 --- a/spec/services/projects/import_service_spec.rb +++ b/spec/services/projects/import_service_spec.rb @@ -3,9 +3,17 @@ require 'spec_helper' describe Projects::ImportService do let!(:project) { create(:project) } let(:user) { project.creator } + let(:import_url) { 'http://www.gitlab.com/demo/repo.git' } + let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } } subject { described_class.new(project, user) } + before do + allow(project).to receive(:lfs_enabled?).and_return(true) + allow_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute) + allow_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links) + end + describe '#async?' do it 'returns true for an asynchronous importer' do importer_class = double(:importer, async?: true) @@ -63,6 +71,15 @@ describe Projects::ImportService do expect(result[:status]).to eq :error expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - The repository could not be created." end + + context 'when repository creation succeeds' do + it 'does not download lfs files' do + expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute) + + subject.execute + end + end end context 'with known url' do @@ -91,6 +108,15 @@ describe Projects::ImportService do expect(result[:status]).to eq :error end + + context 'when repository import scheduled' do + it 'does not download lfs objects' do + expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute) + + subject.execute + end + end end context 'with a non Github repository' do @@ -99,9 +125,10 @@ describe Projects::ImportService do project.import_type = 'bitbucket' end - it 'succeeds if repository import is successfully' do + it 'succeeds if repository import is successfull' do expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true) expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true) + expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return({}) result = subject.execute @@ -116,6 +143,29 @@ describe Projects::ImportService do expect(result[:status]).to eq :error expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository" end + + context 'when repository import scheduled' do + before do + allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true) + allow(subject).to receive(:import_data) + end + + it 'downloads lfs objects if lfs_enabled is enabled for project' do + allow(project).to receive(:lfs_enabled?).and_return(true) + expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute).twice + + subject.execute + end + + it 'does not download lfs objects if lfs_enabled is not enabled for project' do + allow(project).to receive(:lfs_enabled?).and_return(false) + expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute) + + subject.execute + end + end end end @@ -147,6 +197,26 @@ describe Projects::ImportService do expect(result[:status]).to eq :error end + + context 'when importer' do + it 'has a custom repository importer it does not download lfs objects' do + allow(Gitlab::GithubImport::ParallelImporter).to receive(:imports_repository?).and_return(true) + + expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute) + + subject.execute + end + + it 'does not have a custom repository importer downloads lfs objects' do + allow(Gitlab::GithubImport::ParallelImporter).to receive(:imports_repository?).and_return(false) + + expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute) + + subject.execute + end + end end context 'with blocked import_URL' do diff --git a/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb new file mode 100644 index 00000000000..d7a2829d5f8 --- /dev/null +++ b/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb @@ -0,0 +1,102 @@ +require 'spec_helper' + +describe Projects::LfsPointers::LfsDownloadLinkListService do + let(:import_url) { 'http://www.gitlab.com/demo/repo.git' } + let(:lfs_endpoint) { "#{import_url}/info/lfs/objects/batch" } + let!(:project) { create(:project, import_url: import_url) } + let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } } + let(:remote_uri) { URI.parse(lfs_endpoint) } + + let(:objects_response) do + body = new_oids.map do |oid, size| + { + 'oid' => oid, + 'size' => size, + 'actions' => { + 'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" } + } + } + end + + Struct.new(:success?, :objects).new(true, body) + end + + let(:invalid_object_response) do + [ + 'oid' => 'whatever', + 'size' => 123 + ] + end + + subject { described_class.new(project, remote_uri: remote_uri) } + + before do + allow(project).to receive(:lfs_enabled?).and_return(true) + allow(Gitlab::HTTP).to receive(:post).and_return(objects_response) + end + + describe '#execute' do + it 'retrieves each download link of every non existent lfs object' do + subject.execute(new_oids).each do |oid, link| + expect(link).to eq "#{import_url}/gitlab-lfs/objects/#{oid}" + end + end + + context 'credentials' do + context 'when the download link and the lfs_endpoint have the same host' do + context 'when lfs_endpoint has credentials' do + let(:import_url) { 'http://user:password@www.gitlab.com/demo/repo.git' } + + it 'adds credentials to the download_link' do + result = subject.execute(new_oids) + + result.each do |oid, link| + expect(link.starts_with?('http://user:password@')).to be_truthy + end + end + end + + context 'when lfs_endpoint does not have any credentials' do + it 'does not add any credentials' do + result = subject.execute(new_oids) + + result.each do |oid, link| + expect(link.starts_with?('http://user:password@')).to be_falsey + end + end + end + end + + context 'when the download link and the lfs_endpoint have different hosts' do + let(:import_url_with_credentials) { 'http://user:password@www.otherdomain.com/demo/repo.git' } + let(:lfs_endpoint) { "#{import_url_with_credentials}/info/lfs/objects/batch" } + + it 'downloads without any credentials' do + result = subject.execute(new_oids) + + result.each do |oid, link| + expect(link.starts_with?('http://user:password@')).to be_falsey + end + end + end + end + end + + describe '#get_download_links' do + it 'raise errorif request fails' do + allow(Gitlab::HTTP).to receive(:post).and_return(Struct.new(:success?, :message).new(false, 'Failed request')) + + expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) + end + end + + describe '#parse_response_links' do + it 'does not add oid entry if href not found' do + expect(Rails.logger).to receive(:error).with("Link for Lfs Object with oid whatever not found or invalid.") + + result = subject.send(:parse_response_links, invalid_object_response) + + expect(result).to be_empty + end + end +end diff --git a/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb new file mode 100644 index 00000000000..6af5bfc7689 --- /dev/null +++ b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb @@ -0,0 +1,69 @@ +require 'spec_helper' + +describe Projects::LfsPointers::LfsDownloadService do + let(:project) { create(:project) } + let(:oid) { '9e548e25631dd9ce6b43afd6359ab76da2819d6a5b474e66118c7819e1d8b3e8' } + let(:download_link) { "http://gitlab.com/#{oid}" } + let(:lfs_content) do + <<~HEREDOC + whatever + HEREDOC + end + + subject { described_class.new(project) } + + before do + allow(project).to receive(:lfs_enabled?).and_return(true) + WebMock.stub_request(:get, download_link).to_return(body: lfs_content) + end + + describe '#execute' do + context 'when file download succeeds' do + it 'a new lfs object is created' do + expect { subject.execute(oid, download_link) }.to change { LfsObject.count }.from(0).to(1) + end + + it 'has the same oid' do + subject.execute(oid, download_link) + + expect(LfsObject.first.oid).to eq oid + end + + it 'stores the content' do + subject.execute(oid, download_link) + + expect(File.read(LfsObject.first.file.file.file)).to eq lfs_content + end + end + + context 'when file download fails' do + it 'no lfs object is created' do + expect { subject.execute(oid, download_link) }.to change { LfsObject.count } + end + end + + context 'when credentials present' do + let(:download_link_with_credentials) { "http://user:password@gitlab.com/#{oid}" } + + before do + WebMock.stub_request(:get, download_link).with(headers: { 'Authorization' => 'Basic dXNlcjpwYXNzd29yZA==' }).to_return(body: lfs_content) + end + + it 'the request adds authorization headers' do + subject.execute(oid, download_link_with_credentials) + end + end + + context 'when an lfs object with the same oid already exists' do + before do + create(:lfs_object, oid: 'oid') + end + + it 'does not download the file' do + expect(subject).not_to receive(:download_and_save_file) + + subject.execute('oid', download_link) + end + end + end +end diff --git a/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb new file mode 100644 index 00000000000..5a75fb38dec --- /dev/null +++ b/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb @@ -0,0 +1,146 @@ +require 'spec_helper' + +describe Projects::LfsPointers::LfsImportService do + let(:import_url) { 'http://www.gitlab.com/demo/repo.git' } + let(:default_endpoint) { "#{import_url}/info/lfs/objects/batch"} + let(:group) { create(:group, lfs_enabled: true)} + let!(:project) { create(:project, namespace: group, import_url: import_url, lfs_enabled: true) } + let!(:lfs_objects_project) { create_list(:lfs_objects_project, 2, project: project) } + let!(:existing_lfs_objects) { LfsObject.pluck(:oid, :size).to_h } + let(:oids) { { 'oid1' => 123, 'oid2' => 125 } } + let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } } + let(:all_oids) { existing_lfs_objects.merge(oids) } + let(:remote_uri) { URI.parse(lfs_endpoint) } + + subject { described_class.new(project) } + + before do + allow(project.repository).to receive(:lfsconfig_for).and_return(nil) + allow(Gitlab.config.lfs).to receive(:enabled).and_return(true) + allow_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute).and_return(all_oids) + end + + describe '#execute' do + context 'when no lfs pointer is linked' do + before do + allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return([]) + allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).and_return(oid_download_links) + expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: URI.parse(default_endpoint)).and_call_original + end + + it 'retrieves all lfs pointers in the project repository' do + expect_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute) + + subject.execute + end + + it 'links existent lfs objects to the project' do + expect_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute) + + subject.execute + end + + it 'retrieves the download links of non existent objects' do + expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(all_oids) + + subject.execute + end + end + + context 'when some lfs objects are linked' do + before do + allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return(existing_lfs_objects.keys) + allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).and_return(oid_download_links) + end + + it 'retrieves the download links of non existent objects' do + expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(oids) + + subject.execute + end + end + + context 'when all lfs objects are linked' do + before do + allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return(all_oids.keys) + allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute) + end + + it 'retrieves no download links' do + expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with({}).and_call_original + + expect(subject.execute).to be_empty + end + end + + context 'when lfsconfig file exists' do + before do + allow(project.repository).to receive(:lfsconfig_for).and_return("[lfs]\n\turl = #{lfs_endpoint}\n") + end + + context 'when url points to the same import url host' do + let(:lfs_endpoint) { "#{import_url}/different_endpoint" } + let(:service) { double } + + before do + allow(service).to receive(:execute) + end + it 'downloads lfs object using the new endpoint' do + expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: remote_uri).and_return(service) + + subject.execute + end + + context 'when import url has credentials' do + let(:import_url) { 'http://user:password@www.gitlab.com/demo/repo.git'} + + it 'adds the credentials to the new endpoint' do + expect(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:new).with(project, remote_uri: URI.parse("http://user:password@www.gitlab.com/demo/repo.git/different_endpoint")) + .and_return(service) + + subject.execute + end + + context 'when url has its own credentials' do + let(:lfs_endpoint) { "http://user1:password1@www.gitlab.com/demo/repo.git/different_endpoint" } + + it 'does not add the import url credentials' do + expect(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:new).with(project, remote_uri: remote_uri) + .and_return(service) + + subject.execute + end + end + end + end + + context 'when url points to a third party service' do + let(:lfs_endpoint) { 'http://third_party_service.com/info/lfs/objects/' } + + it 'disables lfs from the project' do + expect(project.lfs_enabled?).to be_truthy + + subject.execute + + expect(project.lfs_enabled?).to be_falsey + end + + it 'does not download anything' do + expect_any_instance_of(Projects::LfsPointers::LfsListService).not_to receive(:execute) + + subject.execute + end + end + end + end + + describe '#default_endpoint_uri' do + let(:import_url) { 'http://www.gitlab.com/demo/repo' } + + it 'adds suffix .git if the url does not have it' do + expect(subject.send(:default_endpoint_uri).path).to match(/repo.git/) + end + end +end diff --git a/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb new file mode 100644 index 00000000000..b7b153655db --- /dev/null +++ b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb @@ -0,0 +1,33 @@ +require 'spec_helper' + +describe Projects::LfsPointers::LfsLinkService do + let!(:project) { create(:project, lfs_enabled: true) } + let!(:lfs_objects_project) { create_list(:lfs_objects_project, 2, project: project) } + let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } } + let(:all_oids) { LfsObject.pluck(:oid, :size).to_h.merge(new_oids) } + let(:new_lfs_object) { create(:lfs_object) } + let(:new_oid_list) { all_oids.merge(new_lfs_object.oid => new_lfs_object.size) } + + subject { described_class.new(project) } + + before do + allow(project).to receive(:lfs_enabled?).and_return(true) + end + + describe '#execute' do + it 'links existing lfs objects to the project' do + expect(project.all_lfs_objects.count).to eq 2 + + linked = subject.execute(new_oid_list.keys) + + expect(project.all_lfs_objects.count).to eq 3 + expect(linked.size).to eq 3 + end + + it 'returns linked oids' do + linked = lfs_objects_project.map(&:lfs_object).map(&:oid) << new_lfs_object.oid + + expect(subject.execute(new_oid_list.keys)).to eq linked + end + end +end diff --git a/spec/workers/gitlab/github_import/stage/import_lfs_objects_worker_spec.rb b/spec/workers/gitlab/github_import/stage/import_lfs_objects_worker_spec.rb new file mode 100644 index 00000000000..b19884d7991 --- /dev/null +++ b/spec/workers/gitlab/github_import/stage/import_lfs_objects_worker_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +describe Gitlab::GithubImport::Stage::ImportLfsObjectsWorker do + let(:project) { create(:project) } + let(:worker) { described_class.new } + + describe '#import' do + it 'imports all the lfs objects' do + importer = double(:importer) + waiter = Gitlab::JobWaiter.new(2, '123') + + expect(Gitlab::GithubImport::Importer::LfsObjectsImporter) + .to receive(:new) + .with(project, nil) + .and_return(importer) + + expect(importer) + .to receive(:execute) + .and_return(waiter) + + expect(Gitlab::GithubImport::AdvanceStageWorker) + .to receive(:perform_async) + .with(project.id, { '123' => 2 }, :finish) + + worker.import(project) + end + end +end diff --git a/spec/workers/gitlab/github_import/stage/import_notes_worker_spec.rb b/spec/workers/gitlab/github_import/stage/import_notes_worker_spec.rb index 098d2d55386..94cff9e4e80 100644 --- a/spec/workers/gitlab/github_import/stage/import_notes_worker_spec.rb +++ b/spec/workers/gitlab/github_import/stage/import_notes_worker_spec.rb @@ -21,7 +21,7 @@ describe Gitlab::GithubImport::Stage::ImportNotesWorker do expect(Gitlab::GithubImport::AdvanceStageWorker) .to receive(:perform_async) - .with(project.id, { '123' => 2 }, :finish) + .with(project.id, { '123' => 2 }, :lfs_objects) worker.import(client, project) end