gitlab-org--gitlab-foss/lib/github/import.rb

410 lines
14 KiB
Ruby
Raw Normal View History

require_relative 'error'
module Github
class Import
include Gitlab::ShellAdapter
2017-04-19 21:01:14 -04:00
class MergeRequest < ::MergeRequest
self.table_name = 'merge_requests'
self.reset_callbacks :save
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
class Issue < ::Issue
self.table_name = 'issues'
self.reset_callbacks :save
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
class Note < ::Note
self.table_name = 'notes'
self.reset_callbacks :save
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
class LegacyDiffNote < ::LegacyDiffNote
self.table_name = 'notes'
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose
2017-04-19 19:04:58 -04:00
def initialize(project, options)
@project = project
@repository = project.repository
2017-04-19 20:03:46 -04:00
@repo = project.import_source
2017-04-19 19:04:58 -04:00
@options = options
@verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = []
end
# rubocop: disable Rails/Output
2017-04-19 20:03:46 -04:00
def execute
puts 'Fetching repository...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
fetch_repository
puts 'Fetching labels...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
fetch_labels
puts 'Fetching milestones...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
fetch_milestones
puts 'Fetching pull requests...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
fetch_pull_requests
puts 'Fetching issues...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
fetch_issues
puts 'Cloning wiki repository...'.color(:aqua) if verbose
2017-04-19 21:01:14 -04:00
fetch_wiki_repository
puts 'Expiring repository cache...'.color(:aqua) if verbose
2017-04-19 20:03:46 -04:00
expire_repository_cache
true
rescue Github::RepositoryFetchError
false
ensure
keep_track_of_errors
2017-04-19 20:03:46 -04:00
end
private
def fetch_repository
begin
project.create_repository unless project.repository.exists?
2017-04-19 21:01:14 -04:00
project.repository.add_remote('github', "https://{options.fetch(:token)}@github.com/#{repo}.git")
project.repository.set_remote_as_mirror('github')
project.repository.fetch_remote('github', forced: true)
rescue Gitlab::Shell::Error => e
2017-04-19 20:03:46 -04:00
error(:project, "https://github.com/#{repo}.git", e.message)
raise Github::RepositoryFetchError
end
2017-04-19 20:03:46 -04:00
end
2017-04-19 21:01:14 -04:00
def fetch_wiki_repository
wiki_url = "https://{options.fetch(:token)}@github.com/#{repo}.wiki.git"
wiki_path = "#{project.path_with_namespace}.wiki"
unless project.wiki.repository_exists?
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
end
rescue Gitlab::Shell::Error => e
# GitHub error message when the wiki repo has not been created,
# this means that repo has wiki enabled, but have no pages. So,
# we can skip the import.
if e.message !~ /repository not exported/
errors(:wiki, wiki_url, e.message)
end
end
2017-04-19 20:03:46 -04:00
def fetch_labels
url = "/repos/#{repo}/labels"
2017-04-19 19:45:49 -04:00
while url
2017-04-19 19:04:58 -04:00
response = Github::Client.new(options).get(url)
response.body.each do |raw|
begin
representation = Github::Representation::Label.new(raw)
label = project.labels.find_or_create_by!(title: representation.title) do |label|
label.color = representation.color
end
cached[:label_ids][label.title] = label.id
rescue => e
2017-04-25 14:57:50 -04:00
error(:label, representation.url, e.message)
end
end
2017-04-19 19:45:49 -04:00
url = response.rels[:next]
end
2017-04-19 20:03:46 -04:00
end
2017-04-13 17:21:03 -04:00
2017-04-19 20:03:46 -04:00
def fetch_milestones
url = "/repos/#{repo}/milestones"
2017-04-19 19:45:49 -04:00
while url
2017-04-19 19:04:58 -04:00
response = Github::Client.new(options).get(url, state: :all)
response.body.each do |raw|
begin
milestone = Github::Representation::Milestone.new(raw)
next if project.milestones.where(iid: milestone.iid).exists?
project.milestones.create!(
iid: milestone.iid,
title: milestone.title,
description: milestone.description,
due_date: milestone.due_date,
state: milestone.state,
created_at: milestone.created_at,
updated_at: milestone.updated_at
)
rescue => e
error(:milestone, milestone.url, e.message)
end
end
2017-04-19 19:45:49 -04:00
url = response.rels[:next]
end
2017-04-19 20:03:46 -04:00
end
2017-04-19 20:03:46 -04:00
def fetch_pull_requests
url = "/repos/#{repo}/pulls"
2017-04-19 19:45:49 -04:00
while url
2017-04-19 19:04:58 -04:00
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
response.body.each do |raw|
pull_request = Github::Representation::PullRequest.new(raw, options.merge(project: project))
merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id)
next unless merge_request.new_record? && pull_request.valid?
begin
2017-04-25 16:08:32 -04:00
restore_branches(pull_request)
2017-04-26 13:35:34 -04:00
author_id = user_id(pull_request.author, project.creator_id)
description = format_description(pull_request.description, pull_request.author)
merge_request.attributes = {
iid: pull_request.iid,
title: pull_request.title,
description: description,
source_project: pull_request.source_project,
source_branch: pull_request.source_branch_name,
source_branch_sha: pull_request.source_branch_sha,
target_project: pull_request.target_project,
target_branch: pull_request.target_branch_name,
target_branch_sha: pull_request.target_branch_sha,
state: pull_request.state,
milestone_id: milestone_id(pull_request.milestone),
author_id: author_id,
assignee_id: user_id(pull_request.assignee),
created_at: pull_request.created_at,
updated_at: pull_request.updated_at
}
2017-04-26 13:35:34 -04:00
merge_request.save!(validate: false)
merge_request.merge_request_diffs.create
2017-04-13 16:05:39 -04:00
# Fetch review comments
2017-04-19 20:03:46 -04:00
review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments"
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
2017-04-13 16:05:39 -04:00
# Fetch comments
2017-04-19 20:03:46 -04:00
comments_url = "/repos/#{repo}/issues/#{pull_request.iid}/comments"
fetch_comments(merge_request, :comment, comments_url)
rescue => e
2017-04-19 21:01:14 -04:00
error(:pull_request, pull_request.url, e.message)
ensure
clean_up_restored_branches(pull_request)
end
end
2017-04-19 19:45:49 -04:00
url = response.rels[:next]
end
2017-04-19 20:03:46 -04:00
end
2017-04-19 20:03:46 -04:00
def fetch_issues
url = "/repos/#{repo}/issues"
2017-04-19 19:45:49 -04:00
while url
2017-04-19 19:04:58 -04:00
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
response.body.each do |raw|
2017-04-19 19:04:58 -04:00
representation = Github::Representation::Issue.new(raw, options)
begin
2017-04-13 17:21:03 -04:00
# Every pull request is an issue, but not every issue
# is a pull request. For this reason, "shared" actions
# for both features, like manipulating assignees, labels
# and milestones, are provided within the Issues API.
if representation.pull_request?
next unless representation.has_labels?
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
merge_request.update_attribute(:label_ids, label_ids(representation.labels))
else
next if Issue.where(iid: representation.iid, project_id: project.id).exists?
author_id = user_id(representation.author, project.creator_id)
2017-04-13 17:21:03 -04:00
issue = Issue.new
issue.iid = representation.iid
issue.project_id = project.id
issue.title = representation.title
issue.description = format_description(representation.description, representation.author)
2017-04-13 17:21:03 -04:00
issue.state = representation.state
issue.label_ids = label_ids(representation.labels)
issue.milestone_id = milestone_id(representation.milestone)
issue.author_id = author_id
2017-04-13 17:21:03 -04:00
issue.assignee_id = user_id(representation.assignee)
issue.created_at = representation.created_at
issue.updated_at = representation.updated_at
issue.save!(validate: false)
2017-04-13 17:21:03 -04:00
2017-04-19 21:01:14 -04:00
# Fetch comments
2017-04-13 17:21:03 -04:00
if representation.has_comments?
2017-04-19 20:03:46 -04:00
comments_url = "/repos/#{repo}/issues/#{issue.iid}/comments"
fetch_comments(issue, :comment, comments_url)
2017-04-13 16:31:29 -04:00
end
end
rescue => e
error(:issue, representation.url, e.message)
end
end
2017-04-19 19:45:49 -04:00
url = response.rels[:next]
end
end
def fetch_comments(noteable, type, url, klass = Note)
2017-04-19 19:45:49 -04:00
while url
2017-04-19 19:04:58 -04:00
comments = Github::Client.new(options).get(url)
ActiveRecord::Base.no_touching do
comments.body.each do |raw|
begin
representation = Github::Representation::Comment.new(raw, options)
author_id = user_id(representation.author, project.creator_id)
note = klass.new
note.project_id = project.id
note.noteable = noteable
note.note = format_description(representation.note, representation.author)
note.commit_id = representation.commit_id
note.line_code = representation.line_code
note.author_id = author_id
note.created_at = representation.created_at
note.updated_at = representation.updated_at
note.save!(validate: false)
rescue => e
error(type, representation.url, e.message)
end
end
end
2017-04-19 19:45:49 -04:00
url = comments.rels[:next]
end
end
2017-04-19 20:18:11 -04:00
def fetch_releases
url = "/repos/#{repo}/releases"
while url
response = Github::Client.new(options).get(url)
response.body.each do |raw|
representation = Github::Representation::Release.new(raw)
next unless representation.valid?
release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
next unless relese.new_record?
begin
release.description = representation.description
release.created_at = representation.created_at
release.updated_at = representation.updated_at
release.save!(validate: false)
rescue => e
error(:release, representation.url, e.message)
end
end
url = response.rels[:next]
end
end
2017-04-25 16:08:32 -04:00
def restore_branches(pull_request)
restore_source_branch(pull_request) unless pull_request.source_branch_exists?
restore_target_branch(pull_request) unless pull_request.target_branch_exists?
end
def restore_source_branch(pull_request)
repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
end
def restore_target_branch(pull_request)
repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
end
def remove_branch(name)
repository.delete_branch(name)
rescue Rugged::ReferenceError
errors << { type: :branch, url: nil, error: "Could not clean up restored branch: #{name}" }
end
def clean_up_restored_branches(pull_request)
return if pull_request.opened?
remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
end
2017-04-24 22:41:46 -04:00
def label_ids(labels)
labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact
2017-04-13 17:21:03 -04:00
end
def milestone_id(milestone)
return unless milestone.present?
project.milestones.select(:id).find_by(iid: milestone.iid)&.id
end
def user_id(user, fallback_id = nil)
return unless user.present?
return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id)
gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
cached[:gitlab_user_ids][user.id] = gitlab_user_id.present?
cached[:user_ids][user.id] = gitlab_user_id || fallback_id
end
def user_id_by_email(email)
return nil unless email
::User.find_by_any_email(email)&.id
end
def user_id_by_external_uid(id)
return nil unless id
::User.select(:id)
.joins(:identities)
.merge(::Identity.where(provider: :github, extern_uid: id))
.first&.id
end
def format_description(body, author)
return body if cached[:gitlab_user_ids][author.id]
"*Created by: #{author.username}*\n\n#{body}"
end
2017-04-19 20:03:46 -04:00
def expire_repository_cache
repository.expire_content_cache
end
def keep_track_of_errors
2017-04-19 21:01:37 -04:00
return unless errors.any?
project.update_column(:import_error, {
message: 'The remote data could not be fully imported.',
errors: errors
}.to_json)
end
def error(type, url, message)
errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message }
end
end
end