gitlab-org--gitlab-foss/lib/bulk_imports/common/extractors/ndjson_extractor.rb

73 lines
2.2 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
module BulkImports
module Common
module Extractors
class NdjsonExtractor
include Gitlab::ImportExport::CommandLineUtil
include Gitlab::Utils::StrongMemoize
FILE_SIZE_LIMIT = 5.gigabytes
ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze
EXPORT_DOWNLOAD_URL_PATH = "/%{resource}/%{full_path}/export_relations/download?relation=%{relation}"
def initialize(relation:)
@relation = relation
@tmp_dir = Dir.mktmpdir
end
def extract(context)
download_service(tmp_dir, context).execute
decompression_service(tmp_dir).execute
relations = ndjson_reader(tmp_dir).consume_relation('', relation)
BulkImports::Pipeline::ExtractedData.new(data: relations)
end
def remove_tmp_dir
FileUtils.remove_entry(tmp_dir)
end
private
attr_reader :relation, :tmp_dir
def filename
@filename ||= "#{relation}.ndjson.gz"
end
def download_service(tmp_dir, context)
@download_service ||= BulkImports::FileDownloadService.new(
configuration: context.configuration,
relative_url: relative_resource_url(context),
dir: tmp_dir,
filename: filename,
file_size_limit: FILE_SIZE_LIMIT,
allowed_content_types: ALLOWED_CONTENT_TYPES
)
end
def decompression_service(tmp_dir)
@decompression_service ||= BulkImports::FileDecompressionService.new(
dir: tmp_dir,
filename: filename
)
end
def ndjson_reader(tmp_dir)
@ndjson_reader ||= Gitlab::ImportExport::Json::NdjsonReader.new(tmp_dir)
end
def relative_resource_url(context)
strong_memoize(:relative_resource_url) do
resource = context.entity.pluralized_name
encoded_full_path = context.entity.encoded_source_full_path
EXPORT_DOWNLOAD_URL_PATH % { resource: resource, full_path: encoded_full_path, relation: relation }
end
end
end
end
end
end