gitlab-org--gitlab-foss/lib/github/import.rb
Rémy Coutable 7833d3fb29
Improve & simplify GitHub's representation classes
Signed-off-by: Rémy Coutable <remy@rymai.me>
2017-10-05 11:05:45 +02:00

376 lines
12 KiB
Ruby

require_relative 'error'
require_relative 'import/issue'
require_relative 'import/legacy_diff_note'
require_relative 'import/merge_request'
require_relative 'import/note'
module Github
class Import
include Gitlab::ShellAdapter
attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
:options, :errors, :cached, :verbose, :last_fetched_at
def initialize(project, options = {})
@project = project
@repository = project.repository
@repo = project.import_source
@repo_url = project.import_url
@wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
@options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user))
@verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = []
@last_fetched_at = nil
end
# rubocop: disable Rails/Output
def execute
puts 'Fetching repository...'.color(:aqua) if verbose
setup_and_fetch_repository
puts 'Fetching labels...'.color(:aqua) if verbose
fetch_labels
puts 'Fetching milestones...'.color(:aqua) if verbose
fetch_milestones
puts 'Fetching pull requests...'.color(:aqua) if verbose
fetch_pull_requests
puts 'Fetching issues...'.color(:aqua) if verbose
fetch_issues
puts 'Fetching releases...'.color(:aqua) if verbose
fetch_releases
puts 'Cloning wiki repository...'.color(:aqua) if verbose
fetch_wiki_repository
puts 'Expiring repository cache...'.color(:aqua) if verbose
expire_repository_cache
errors.empty?
rescue Github::RepositoryFetchError
expire_repository_cache
false
ensure
keep_track_of_errors
end
private
def setup_and_fetch_repository
begin
project.ensure_repository
project.repository.add_remote('github', repo_url)
project.repository.set_import_remote_as_mirror('github')
project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head')
fetch_remote(forced: true)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, repo_url, e.message)
raise Github::RepositoryFetchError
end
end
def fetch_remote(forced: false)
@last_fetched_at = Time.now
project.repository.fetch_remote('github', forced: forced)
end
def fetch_wiki_repository
return if project.wiki.repository_exists?
wiki_path = "#{project.disk_path}.wiki"
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
rescue Gitlab::Shell::Error => e
# GitHub error message when the wiki repo has not been created,
# this means that repo has wiki enabled, but have no pages. So,
# we can skip the import.
if e.message !~ /repository not exported/
error(:wiki, wiki_url, e.message)
end
end
def fetch_labels
url = "/repos/#{repo}/labels"
while url
response = Github::Client.new(options).get(url)
response.body.each do |raw|
begin
representation = Github::Representation::Label.new(raw)
label = project.labels.find_or_create_by!(title: representation.title) do |label|
label.color = representation.color
end
cached[:label_ids][representation.title] = label.id
rescue => e
error(:label, representation.url, e.message)
end
end
url = response.rels[:next]
end
end
def fetch_milestones
url = "/repos/#{repo}/milestones"
while url
response = Github::Client.new(options).get(url, state: :all)
response.body.each do |raw|
begin
milestone = Github::Representation::Milestone.new(raw)
next if project.milestones.where(iid: milestone.iid).exists?
project.milestones.create!(
iid: milestone.iid,
title: milestone.title,
description: milestone.description,
due_date: milestone.due_date,
state: milestone.state,
created_at: milestone.created_at,
updated_at: milestone.updated_at
)
rescue => e
error(:milestone, milestone.url, e.message)
end
end
url = response.rels[:next]
end
end
def fetch_pull_requests
url = "/repos/#{repo}/pulls"
while url
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
response.body.each do |raw|
pull_request = Github::Representation::PullRequest.new(raw, options.merge(project: project))
merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id)
next unless merge_request.new_record? && pull_request.valid?
begin
# If the PR has been created/updated after we last fetched the
# remote, we fetch again to get the up-to-date refs.
fetch_remote if pull_request.updated_at > last_fetched_at
author_id = user_id(pull_request.author, project.creator_id)
description = format_description(pull_request.description, pull_request.author)
merge_request.attributes = {
iid: pull_request.iid,
title: pull_request.title,
description: description,
ref_fetched: true,
source_project: pull_request.source_project,
source_branch: pull_request.source_branch_name,
source_branch_sha: pull_request.source_branch_sha,
target_project: pull_request.target_project,
target_branch: pull_request.target_branch_name,
target_branch_sha: pull_request.target_branch_sha,
state: pull_request.state,
milestone_id: milestone_id(pull_request.milestone),
author_id: author_id,
assignee_id: user_id(pull_request.assignee),
created_at: pull_request.created_at,
updated_at: pull_request.updated_at
}
merge_request.save!(validate: false)
merge_request.merge_request_diffs.create
review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments"
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
rescue => e
error(:pull_request, pull_request.url, e.message)
end
end
url = response.rels[:next]
end
end
def fetch_issues
url = "/repos/#{repo}/issues"
while url
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
response.body.each { |raw| populate_issue(raw) }
url = response.rels[:next]
end
end
def populate_issue(raw)
representation = Github::Representation::Issue.new(raw, options)
begin
# Every pull request is an issue, but not every issue
# is a pull request. For this reason, "shared" actions
# for both features, like manipulating assignees, labels
# and milestones, are provided within the Issues API.
if representation.pull_request?
return unless representation.labels? || representation.comments?
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
if representation.labels?
merge_request.update_attribute(:label_ids, label_ids(representation.labels))
end
fetch_comments_conditionally(merge_request, representation)
else
return if Issue.exists?(iid: representation.iid, project_id: project.id)
author_id = user_id(representation.author, project.creator_id)
issue = Issue.new
issue.iid = representation.iid
issue.project_id = project.id
issue.title = representation.title
issue.description = format_description(representation.description, representation.author)
issue.state = representation.state
issue.milestone_id = milestone_id(representation.milestone)
issue.author_id = author_id
issue.created_at = representation.created_at
issue.updated_at = representation.updated_at
issue.save!(validate: false)
issue.update(
label_ids: label_ids(representation.labels),
assignee_ids: assignee_ids(representation.assignees))
fetch_comments_conditionally(issue, representation)
end
rescue => e
error(:issue, representation.url, e.message)
end
end
def fetch_comments_conditionally(issuable, representation)
if representation.comments?
comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments"
fetch_comments(issuable, :comment, comments_url)
end
end
def fetch_comments(noteable, type, url, klass = Note)
while url
comments = Github::Client.new(options).get(url)
ActiveRecord::Base.no_touching do
comments.body.each do |raw|
begin
representation = Github::Representation::Comment.new(raw, options)
author_id = user_id(representation.author, project.creator_id)
note = klass.new
note.project_id = project.id
note.noteable = noteable
note.note = format_description(representation.note, representation.author)
note.commit_id = representation.commit_id
note.line_code = representation.line_code
note.author_id = author_id
note.created_at = representation.created_at
note.updated_at = representation.updated_at
note.save!(validate: false)
rescue => e
error(type, representation.url, e.message)
end
end
end
url = comments.rels[:next]
end
end
def fetch_releases
url = "/repos/#{repo}/releases"
while url
response = Github::Client.new(options).get(url)
response.body.each do |raw|
representation = Github::Representation::Release.new(raw)
next unless representation.valid?
release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
next unless release.new_record?
begin
release.description = representation.description
release.created_at = representation.created_at
release.updated_at = representation.updated_at
release.save!(validate: false)
rescue => e
error(:release, representation.url, e.message)
end
end
url = response.rels[:next]
end
end
def label_ids(labels)
labels.map { |label| cached[:label_ids][label.title] }.compact
end
def assignee_ids(assignees)
assignees.map { |assignee| user_id(assignee) }.compact
end
def milestone_id(milestone)
return unless milestone.present?
project.milestones.select(:id).find_by(iid: milestone.iid)&.id
end
def user_id(user, fallback_id = nil)
return unless user.present?
return cached[:user_ids][user.id] if cached[:user_ids][user.id].present?
gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
cached[:gitlab_user_ids][user.id] = gitlab_user_id.present?
cached[:user_ids][user.id] = gitlab_user_id || fallback_id
end
def user_id_by_email(email)
return nil unless email
::User.find_by_any_email(email)&.id
end
def user_id_by_external_uid(id)
return nil unless id
::User.select(:id)
.joins(:identities)
.merge(::Identity.where(provider: :github, extern_uid: id))
.first&.id
end
def format_description(body, author)
return body if cached[:gitlab_user_ids][author.id]
"*Created by: #{author.username}*\n\n#{body}"
end
def expire_repository_cache
repository.expire_content_cache if project.repository_exists?
end
def keep_track_of_errors
return unless errors.any?
project.update_column(:import_error, {
message: 'The remote data could not be fully imported.',
errors: errors
}.to_json)
end
def error(type, url, message)
errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message }
end
end
end