gitlab-org--gitlab-foss/lib/gitlab/import_export/project_tree_restorer.rb

205 lines
7.7 KiB
Ruby
Raw Normal View History

2016-04-14 15:10:57 +00:00
module Gitlab
module ImportExport
class ProjectTreeRestorer
# Relations which cannot have both group_id and project_id at the same time
RESTRICT_PROJECT_AND_GROUP = %i(milestone milestones).freeze
def initialize(user:, shared:, project:)
@path = File.join(shared.export_path, 'project.json')
@user = user
@shared = shared
@project = project
2017-09-01 15:20:09 +00:00
@project_id = project.id
2017-09-06 06:56:38 +00:00
@saved = true
end
def restore
begin
json = IO.read(@path)
@tree_hash = ActiveSupport::JSON.decode(json)
rescue => e
Rails.logger.error("Import/Export error: #{e.message}")
raise Gitlab::ImportExport::Error.new('Incorrect JSON format')
end
2016-04-11 16:30:54 +00:00
@project_members = @tree_hash.delete('project_members')
2017-09-03 18:51:50 +00:00
ActiveRecord::Base.uncached do
ActiveRecord::Base.no_touching do
create_relations
end
end
2016-05-06 13:18:25 +00:00
rescue => e
@shared.error(e)
false
2016-03-10 17:43:57 +00:00
end
def restored_project
return @project unless @tree_hash
@restored_project ||= restore_project
end
2016-03-10 17:43:57 +00:00
private
def members_mapper
@members_mapper ||= Gitlab::ImportExport::MembersMapper.new(exported_members: @project_members,
user: @user,
project: restored_project)
2016-03-10 17:43:57 +00:00
end
# Loops through the tree of models defined in import_export.yml and
# finds them in the imported JSON so they can be instantiated and saved
# in the DB. The structure and relationships between models are guessed from
# the configuration yaml file too.
# Finally, it updates each attribute in the newly imported project.
def create_relations
default_relation_list.each do |relation|
2017-09-01 15:20:09 +00:00
if relation.is_a?(Hash)
create_sub_relations(relation, @tree_hash)
2017-09-06 08:16:11 +00:00
elsif @tree_hash[relation.to_s].present?
2017-09-06 09:11:02 +00:00
save_relation_hash(@tree_hash[relation.to_s], relation)
2017-09-01 15:20:09 +00:00
end
end
2017-09-05 15:24:57 +00:00
Use latest_merge_request_diff association Compared to the merge_request_diff association: 1. It's simpler to query. The query uses a foreign key to the merge_request_diffs table, so no ordering is necessary. 2. It's faster for preloading. The merge_request_diff association has to load every diff for the MRs in the set, then discard all but the most recent for each. This association means that Rails can just query for N diffs from N MRs. 3. It's more complicated to update. This is a bidirectional foreign key, so we need to update two tables when adding a diff record. This also means we need to handle this as a special case when importing a GitLab project. There is some juggling with this association in the merge request model: * `MergeRequest#latest_merge_request_diff` is _always_ the latest diff. * `MergeRequest#merge_request_diff` reuses `MergeRequest#latest_merge_request_diff` unless: * Arguments are passed. These are typically to force-reload the association. * It doesn't exist. That means we might be trying to implicitly create a diff. This only seems to happen in specs. * The association is already loaded. This is important for the reasons explained in the comment, which I'll reiterate here: if we a) load a non-latest diff, then b) get its `merge_request`, then c) get that MR's `merge_request_diff`, we should get the diff we loaded in c), even though that's not the latest diff. Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases, but not quite all.
2017-11-15 17:22:18 +00:00
@project.merge_requests.set_latest_merge_request_diff_ids!
2017-09-06 06:56:38 +00:00
@saved
2017-09-01 15:20:09 +00:00
end
2017-09-01 15:20:09 +00:00
def save_relation_hash(relation_hash_batch, relation_key)
relation_hash = create_relation(relation_key, relation_hash_batch)
2017-09-02 14:17:41 +00:00
2017-09-06 06:56:38 +00:00
@saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash)
2017-09-05 15:24:57 +00:00
2017-09-06 06:56:38 +00:00
# Restore the project again, extra query that skips holding the AR objects in memory
2017-09-06 08:09:24 +00:00
@restored_project = Project.find(@project_id)
end
def default_relation_list
reader.tree.reject do |model|
model.is_a?(Hash) && model[:project_members]
end
end
def restore_project
@project.update_columns(project_params)
@project
end
2017-09-01 15:20:09 +00:00
def project_params
@project_params ||= begin
attrs = json_params.merge(override_params)
# Cleaning all imported and overridden params
Gitlab::ImportExport::AttributeCleaner.clean(relation_hash: attrs,
relation_class: Project,
excluded_keys: excluded_keys_for_relation(:project))
end
end
def override_params
@override_params ||= @project.import_data&.data&.fetch('override_params', nil) || {}
end
def json_params
@json_params ||= @tree_hash.reject do |key, value|
# return params that are not 1 to many or 1 to 1 relations
value.respond_to?(:each) && !Project.column_names.include?(key)
end
end
2016-06-13 14:55:51 +00:00
# Given a relation hash containing one or more models and its relationships,
# loops through each model and each object from a model type and
# and assigns its correspondent attributes hash from +tree_hash+
# Example:
# +relation_key+ issues, loops through the list of *issues* and for each individual
# issue, finds any subrelations such as notes, creates them and assign them back to the hash
#
# Recursively calls this method if the sub-relation is a hash containing more sub-relations
2017-09-06 09:11:02 +00:00
def create_sub_relations(relation, tree_hash, save: true)
relation_key = relation.keys.first.to_s
return if tree_hash[relation_key].blank?
tree_array = [tree_hash[relation_key]].flatten
2017-09-05 15:24:57 +00:00
# Avoid keeping a possible heavy object in memory once we are done with it
while relation_item = tree_array.shift
2017-09-05 15:24:57 +00:00
# The transaction at this level is less speedy than one single transaction
# But we can't have it in the upper level or GC won't get rid of the AR objects
# after we save the batch.
2017-09-03 18:51:50 +00:00
Project.transaction do
2017-09-03 18:01:14 +00:00
process_sub_relation(relation, relation_item)
2017-09-05 15:24:57 +00:00
# For every subrelation that hangs from Project, save the associated records alltogether
# This effectively batches all records per subrelation item, only keeping those in memory
# We have to keep in mind that more batch granularity << Memory, but >> Slowness
if save
save_relation_hash([relation_item], relation_key)
2017-09-03 18:01:14 +00:00
tree_hash[relation_key].delete(relation_item)
end
2017-09-03 18:51:50 +00:00
end
2016-04-11 16:30:54 +00:00
end
tree_hash.delete(relation_key) if save
2016-04-11 16:30:54 +00:00
end
2017-09-03 18:01:14 +00:00
def process_sub_relation(relation, relation_item)
relation.values.flatten.each do |sub_relation|
# We just use author to get the user ID, do not attempt to create an instance.
next if sub_relation == :author
2017-09-06 09:11:02 +00:00
create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash)
2017-09-03 18:01:14 +00:00
relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation)
relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank?
end
end
2016-06-13 14:55:51 +00:00
def assign_relation_hash(relation_item, sub_relation)
if sub_relation.is_a?(Hash)
relation_hash = relation_item[sub_relation.keys.first.to_s]
sub_relation = sub_relation.keys.first
else
relation_hash = relation_item[sub_relation.to_s]
end
2016-06-14 08:20:47 +00:00
[relation_hash, sub_relation]
2016-06-13 14:55:51 +00:00
end
2016-03-10 17:43:57 +00:00
def create_relation(relation, relation_hash_list)
2016-06-02 08:59:54 +00:00
relation_array = [relation_hash_list].flatten.map do |relation_hash|
2017-09-05 19:06:27 +00:00
Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym,
relation_hash: parsed_relation_hash(relation_hash, relation.to_sym),
members_mapper: members_mapper,
user: @user,
project: @restored_project,
excluded_keys: excluded_keys_for_relation(relation))
end.compact
2016-06-02 08:59:54 +00:00
relation_hash_list.is_a?(Array) ? relation_array : relation_array.first
end
def parsed_relation_hash(relation_hash, relation_type)
if RESTRICT_PROJECT_AND_GROUP.include?(relation_type)
params = {}
params['group_id'] = restored_project.group.try(:id) if relation_hash['group_id']
params['project_id'] = restored_project.id if relation_hash['project_id']
else
params = { 'group_id' => restored_project.group.try(:id), 'project_id' => restored_project.id }
end
relation_hash.merge(params)
end
def reader
@reader ||= Gitlab::ImportExport::Reader.new(shared: @shared)
end
def excluded_keys_for_relation(relation)
@reader.attributes_finder.find_excluded_keys(relation)
end
end
end
end