2021-05-27 18:10:52 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module BulkImports
|
|
|
|
module Common
|
|
|
|
module Extractors
|
|
|
|
class NdjsonExtractor
|
|
|
|
include Gitlab::ImportExport::CommandLineUtil
|
|
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
|
2021-06-22 09:07:12 +00:00
|
|
|
FILE_SIZE_LIMIT = 5.gigabytes
|
|
|
|
ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
|
2021-05-27 18:10:52 +00:00
|
|
|
EXPORT_DOWNLOAD_URL_PATH = "/%{resource}/%{full_path}/export_relations/download?relation=%{relation}"
|
|
|
|
|
|
|
|
def initialize(relation:)
|
|
|
|
@relation = relation
|
|
|
|
@tmp_dir = Dir.mktmpdir
|
|
|
|
end
|
|
|
|
|
|
|
|
def extract(context)
|
|
|
|
download_service(tmp_dir, context).execute
|
|
|
|
decompression_service(tmp_dir).execute
|
|
|
|
relations = ndjson_reader(tmp_dir).consume_relation('', relation)
|
|
|
|
|
|
|
|
BulkImports::Pipeline::ExtractedData.new(data: relations)
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_tmp_dir
|
|
|
|
FileUtils.remove_entry(tmp_dir)
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
attr_reader :relation, :tmp_dir
|
|
|
|
|
|
|
|
def filename
|
|
|
|
@filename ||= "#{relation}.ndjson.gz"
|
|
|
|
end
|
|
|
|
|
|
|
|
def download_service(tmp_dir, context)
|
|
|
|
@download_service ||= BulkImports::FileDownloadService.new(
|
|
|
|
configuration: context.configuration,
|
|
|
|
relative_url: relative_resource_url(context),
|
|
|
|
dir: tmp_dir,
|
2021-06-22 09:07:12 +00:00
|
|
|
filename: filename,
|
|
|
|
file_size_limit: FILE_SIZE_LIMIT,
|
|
|
|
allowed_content_types: ALLOWED_CONTENT_TYPES
|
2021-05-27 18:10:52 +00:00
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def decompression_service(tmp_dir)
|
|
|
|
@decompression_service ||= BulkImports::FileDecompressionService.new(
|
|
|
|
dir: tmp_dir,
|
|
|
|
filename: filename
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def ndjson_reader(tmp_dir)
|
2021-06-11 18:10:13 +00:00
|
|
|
@ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir)
|
2021-05-27 18:10:52 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def relative_resource_url(context)
|
|
|
|
strong_memoize(:relative_resource_url) do
|
2021-09-27 18:12:38 +00:00
|
|
|
resource = context.entity.pluralized_name
|
2021-05-27 18:10:52 +00:00
|
|
|
encoded_full_path = context.entity.encoded_source_full_path
|
|
|
|
|
|
|
|
EXPORT_DOWNLOAD_URL_PATH % { resource: resource, full_path: encoded_full_path, relation: relation }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|