Modify GitHub importer to be retryable

This commit is contained in:
Ahmad Sherif 2016-10-19 20:42:31 +02:00
parent 6e590af148
commit 14fbd25d06
9 changed files with 137 additions and 28 deletions

View File

@ -29,6 +29,7 @@ Please view this file on the master branch, on stable branches it's out of date.
- Fix documents and comments on Build API `scope` - Fix documents and comments on Build API `scope`
- Fix applying labels for GitHub-imported MRs - Fix applying labels for GitHub-imported MRs
- Fix importing MR comments from GitHub - Fix importing MR comments from GitHub
- Modify GitHub importer to be retryable
- Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov) - Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov)
- Shortened merge request modal to let clipboard button not overlap - Shortened merge request modal to let clipboard button not overlap

View File

@ -10,7 +10,9 @@ module Gitlab
end end
def create! def create!
self.klass.create!(self.attributes) project.send(project_association).find_or_create_by!(find_condition) do |record|
record.attributes = attributes
end
end end
private private

View File

@ -20,13 +20,13 @@ module Gitlab
end end
def execute def execute
import_labels import_labels unless imported?(:labels)
import_milestones import_milestones unless imported?(:milestones)
import_issues import_issues unless imported?(:issues)
import_pull_requests import_pull_requests unless imported?(:pull_requests)
import_comments import_comments
import_wiki import_wiki
import_releases import_releases unless imported?(:releases)
handle_errors handle_errors
true true
@ -48,7 +48,7 @@ module Gitlab
end end
def import_labels def import_labels
client.labels(repo, per_page: 100) do |labels| client.labels(repo, page: current_page(:labels), per_page: 100) do |labels|
labels.each do |raw| labels.each do |raw|
begin begin
label = LabelFormatter.new(project, raw).create! label = LabelFormatter.new(project, raw).create!
@ -57,11 +57,15 @@ module Gitlab
errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end end
end end
increment_page(:labels)
end end
imported!(:labels)
end end
def import_milestones def import_milestones
client.milestones(repo, state: :all, per_page: 100) do |milestones| client.milestones(repo, state: :all, page: current_page(:milestones), per_page: 100) do |milestones|
milestones.each do |raw| milestones.each do |raw|
begin begin
MilestoneFormatter.new(project, raw).create! MilestoneFormatter.new(project, raw).create!
@ -69,11 +73,15 @@ module Gitlab
errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end end
end end
increment_page(:milestones)
end end
imported!(:milestones)
end end
def import_issues def import_issues
client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| client.issues(repo, state: :all, sort: :created, direction: :asc, page: current_page(:issues), per_page: 100) do |issues|
issues.each do |raw| issues.each do |raw|
gh_issue = IssueFormatter.new(project, raw) gh_issue = IssueFormatter.new(project, raw)
@ -86,11 +94,15 @@ module Gitlab
end end
end end
end end
increment_page(:issues)
end end
imported!(:issues)
end end
def import_pull_requests def import_pull_requests
client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| client.pull_requests(repo, state: :all, sort: :created, direction: :asc, page: current_page(:pull_requests), per_page: 100) do |pull_requests|
pull_requests.each do |raw| pull_requests.each do |raw|
pull_request = PullRequestFormatter.new(project, raw) pull_request = PullRequestFormatter.new(project, raw)
next unless pull_request.valid? next unless pull_request.valid?
@ -107,9 +119,13 @@ module Gitlab
clean_up_restored_branches(pull_request) clean_up_restored_branches(pull_request)
end end
end end
increment_page(:pull_requests)
end end
project.repository.after_remove_branch project.repository.after_remove_branch
imported!(:pull_requests)
end end
def restore_source_branch(pull_request) def restore_source_branch(pull_request)
@ -149,13 +165,34 @@ module Gitlab
end end
def import_comments def import_comments
client.issues_comments(repo, per_page: 100) do |comments| # We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
create_comments(comments) # compare it against every comment in the current imported page until we find match, and that's where start importing
end last_note = Note.where(noteable_type: 'Issue').last
client.issues_comments(repo, page: current_page(:issue_comments), per_page: 100) do |comments|
if last_note
discard_inserted_comments(comments, last_note)
last_note = nil
end
client.pull_requests_comments(repo, per_page: 100) do |comments|
create_comments(comments) create_comments(comments)
end increment_page(:issue_comments)
end unless imported?(:issue_comments)
imported!(:issue_comments)
last_note = Note.where(noteable_type: 'MergeRequest').last
client.pull_requests_comments(repo, page: current_page(:pull_request_comments), per_page: 100) do |comments|
if last_note
discard_inserted_comments(comments, last_note)
last_note = nil
end
create_comments(comments)
increment_page(:pull_request_comments)
end unless imported?(:pull_request_comments)
imported!(:pull_request_comments)
end end
def create_comments(comments) def create_comments(comments)
@ -177,6 +214,24 @@ module Gitlab
end end
end end
def discard_inserted_comments(comments, last_note)
last_note_attrs = nil
cut_off_index = comments.find_index do |raw|
comment = CommentFormatter.new(project, raw)
comment_attrs = comment.attributes
last_note_attrs ||= last_note.slice(*comment_attrs.keys)
comment_attrs.with_indifferent_access == last_note_attrs
end
# No matching resource in the collection, which means we got halted right on the end of the last page, so all good
return unless cut_off_index
# Otherwise, remove the resouces we've already inserted
comments.shift(cut_off_index + 1)
end
def import_wiki def import_wiki
unless project.wiki.repository_exists? unless project.wiki.repository_exists?
wiki = WikiFormatter.new(project) wiki = WikiFormatter.new(project)
@ -192,7 +247,7 @@ module Gitlab
end end
def import_releases def import_releases
client.releases(repo, per_page: 100) do |releases| client.releases(repo, page: current_page(:releases), per_page: 100) do |releases|
releases.each do |raw| releases.each do |raw|
begin begin
gh_release = ReleaseFormatter.new(project, raw) gh_release = ReleaseFormatter.new(project, raw)
@ -201,7 +256,39 @@ module Gitlab
errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end end
end end
increment_page(:releases)
end end
imported!(:releases)
end
def imported?(resource_type)
Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
end
def imported!(resource_type)
Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
end
def increment_page(resource_type)
key = "#{cache_key_prefix}:#{resource_type}:current-page"
# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
page = Rails.cache.read(key)
page += 1
Rails.cache.write(key, page)
page
end
def current_page(resource_type)
Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
end
def cache_key_prefix
@cache_key_prefix ||= "github-import:#{project.id}"
end end
end end
end end

View File

@ -20,8 +20,12 @@ module Gitlab
raw_data.comments > 0 raw_data.comments > 0
end end
def klass def project_association
Issue :issues
end
def find_condition
{ iid: number }
end end
def number def number

View File

@ -9,8 +9,8 @@ module Gitlab
} }
end end
def klass def project_association
Label :labels
end end
def create! def create!

View File

@ -14,8 +14,12 @@ module Gitlab
} }
end end
def klass def project_association
Milestone :milestones
end
def find_condition
{ iid: raw_data.number }
end end
private private

View File

@ -24,8 +24,12 @@ module Gitlab
} }
end end
def klass def project_association
MergeRequest :merge_requests
end
def find_condition
{ iid: number }
end end
def number def number

View File

@ -11,8 +11,12 @@ module Gitlab
} }
end end
def klass def project_association
Release :releases
end
def find_condition
{ tag: raw_data.tag_name }
end end
def valid? def valid?

View File

@ -2,6 +2,10 @@ require 'spec_helper'
describe Gitlab::GithubImport::Importer, lib: true do describe Gitlab::GithubImport::Importer, lib: true do
describe '#execute' do describe '#execute' do
before do
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
end
context 'when an error occurs' do context 'when an error occurs' do
let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) } let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) }
let(:octocat) { double(id: 123456, login: 'octocat') } let(:octocat) { double(id: 123456, login: 'octocat') }
@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do
message: 'The remote data could not be fully imported.', message: 'The remote data could not be fully imported.',
errors: [ errors: [
{ type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" }, { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" },
{ type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" },
{ type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" }, { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" }, { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :wiki, errors: "Gitlab::Shell::Error" }, { type: :wiki, errors: "Gitlab::Shell::Error" },
{ type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" } { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" }
] ]