Merge branch 'github-importer-refactor' into 'master'
Rewrite the GitHub importer to perform work in parallel and greatly improve performance Closes #33135, #38621, and #39361 See merge request gitlab-org/gitlab-ce!14731
This commit is contained in:
commit
92249f1ac8
|
@ -43,7 +43,7 @@ class Import::GithubController < Import::BaseController
|
|||
@target_namespace = find_or_create_namespace(namespace_path, current_user.namespace_path)
|
||||
|
||||
if can?(current_user, :create_projects, @target_namespace)
|
||||
@project = Gitlab::GithubImport::ProjectCreator.new(repo, @project_name, @target_namespace, current_user, access_params, type: provider).execute
|
||||
@project = Gitlab::LegacyGithubImport::ProjectCreator.new(repo, @project_name, @target_namespace, current_user, access_params, type: provider).execute
|
||||
else
|
||||
render 'unauthorized'
|
||||
end
|
||||
|
@ -52,7 +52,7 @@ class Import::GithubController < Import::BaseController
|
|||
private
|
||||
|
||||
def client
|
||||
@client ||= Gitlab::GithubImport::Client.new(session[access_token_key], client_options)
|
||||
@client ||= Gitlab::LegacyGithubImport::Client.new(session[access_token_key], client_options)
|
||||
end
|
||||
|
||||
def verify_import_enabled
|
||||
|
|
|
@ -365,6 +365,7 @@ class Project < ActiveRecord::Base
|
|||
scope :abandoned, -> { where('projects.last_activity_at < ?', 6.months.ago) }
|
||||
|
||||
scope :excluding_project, ->(project) { where.not(id: project) }
|
||||
scope :import_started, -> { where(import_status: 'started') }
|
||||
|
||||
state_machine :import_status, initial: :none do
|
||||
event :import_schedule do
|
||||
|
@ -1190,6 +1191,10 @@ class Project < ActiveRecord::Base
|
|||
!!repository.exists?
|
||||
end
|
||||
|
||||
def wiki_repository_exists?
|
||||
wiki.repository_exists?
|
||||
end
|
||||
|
||||
# update visibility_level of forks
|
||||
def update_forks_visibility_level
|
||||
return unless visibility_level < visibility_level_was
|
||||
|
@ -1433,6 +1438,31 @@ class Project < ActiveRecord::Base
|
|||
reload_repository!
|
||||
end
|
||||
|
||||
def after_import
|
||||
repository.after_import
|
||||
import_finish
|
||||
remove_import_jid
|
||||
update_project_counter_caches
|
||||
end
|
||||
|
||||
def update_project_counter_caches
|
||||
classes = [
|
||||
Projects::OpenIssuesCountService,
|
||||
Projects::OpenMergeRequestsCountService
|
||||
]
|
||||
|
||||
classes.each do |klass|
|
||||
klass.new(self).refresh_cache
|
||||
end
|
||||
end
|
||||
|
||||
def remove_import_jid
|
||||
return unless import_jid
|
||||
|
||||
Gitlab::SidekiqStatus.unset(import_jid)
|
||||
update_column(:import_jid, nil)
|
||||
end
|
||||
|
||||
def running_or_pending_build_count(force: false)
|
||||
Rails.cache.fetch(['projects', id, 'running_or_pending_build_count'], force: force) do
|
||||
builds.running_or_pending.count(:all)
|
||||
|
@ -1690,6 +1720,17 @@ class Project < ActiveRecord::Base
|
|||
Gitlab::ReferenceCounter.new(gl_repository(is_wiki: wiki))
|
||||
end
|
||||
|
||||
# Refreshes the expiration time of the associated import job ID.
|
||||
#
|
||||
# This method can be used by asynchronous importers to refresh the status,
|
||||
# preventing the StuckImportJobsWorker from marking the import as failed.
|
||||
def refresh_import_jid_expiration
|
||||
return unless import_jid
|
||||
|
||||
Gitlab::SidekiqStatus
|
||||
.set(import_jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def storage
|
||||
|
|
|
@ -973,6 +973,10 @@ class Repository
|
|||
raw_repository.fetch_source_branch!(source_repository.raw_repository, source_branch, local_ref)
|
||||
end
|
||||
|
||||
def remote_exists?(name)
|
||||
raw_repository.remote_exists?(name)
|
||||
end
|
||||
|
||||
def compare_source_branch(target_branch_name, source_repository, source_branch_name, straight:)
|
||||
raw_repository.compare_source_branch(target_branch_name, source_repository.raw_repository, source_branch_name, straight: straight)
|
||||
end
|
||||
|
|
|
@ -267,18 +267,23 @@ class User < ActiveRecord::Base
|
|||
end
|
||||
end
|
||||
|
||||
def for_github_id(id)
|
||||
joins(:identities)
|
||||
.where(identities: { provider: :github, extern_uid: id.to_s })
|
||||
end
|
||||
|
||||
# Find a User by their primary email or any associated secondary email
|
||||
def find_by_any_email(email)
|
||||
sql = 'SELECT *
|
||||
FROM users
|
||||
WHERE id IN (
|
||||
SELECT id FROM users WHERE email = :email
|
||||
UNION
|
||||
SELECT emails.user_id FROM emails WHERE email = :email
|
||||
)
|
||||
LIMIT 1;'
|
||||
by_any_email(email).take
|
||||
end
|
||||
|
||||
User.find_by_sql([sql, { email: email }]).first
|
||||
# Returns a relation containing all the users for the given Email address
|
||||
def by_any_email(email)
|
||||
users = where(email: email)
|
||||
emails = joins(:emails).where(emails: { email: email })
|
||||
union = Gitlab::SQL::Union.new([users, emails])
|
||||
|
||||
from("(#{union.to_sql}) #{table_name}")
|
||||
end
|
||||
|
||||
def filter(filter_name)
|
||||
|
|
|
@ -4,6 +4,18 @@ module Projects
|
|||
|
||||
Error = Class.new(StandardError)
|
||||
|
||||
# Returns true if this importer is supposed to perform its work in the
|
||||
# background.
|
||||
#
|
||||
# This method will only return `true` if async importing is explicitly
|
||||
# supported by an importer class (`Gitlab::GithubImport::ParallelImporter`
|
||||
# for example).
|
||||
def async?
|
||||
return false unless has_importer?
|
||||
|
||||
!!importer_class.try(:async?)
|
||||
end
|
||||
|
||||
def execute
|
||||
add_repository_to_project unless project.gitlab_project_import?
|
||||
|
||||
|
@ -75,12 +87,16 @@ module Projects
|
|||
end
|
||||
end
|
||||
|
||||
def importer_class
|
||||
Gitlab::ImportSources.importer(project.import_type)
|
||||
end
|
||||
|
||||
def has_importer?
|
||||
Gitlab::ImportSources.importer_names.include?(project.import_type)
|
||||
end
|
||||
|
||||
def importer
|
||||
Gitlab::ImportSources.importer(project.import_type).new(project)
|
||||
importer_class.new(project)
|
||||
end
|
||||
|
||||
def unknown_url?
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# NotifyUponDeath can be included into a GitHub worker class if it should
|
||||
# notify any JobWaiter instances upon being moved to the Sidekiq dead queue.
|
||||
#
|
||||
# Note that this will only notify the waiter upon graceful termination, a
|
||||
# SIGKILL will still result in the waiter _not_ being notified.
|
||||
#
|
||||
# Workers including this module must have jobs passed where the last
|
||||
# argument is the key to notify, as a String.
|
||||
module NotifyUponDeath
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
# If a job is being exhausted we still want to notify the
|
||||
# AdvanceStageWorker. This prevents the entire import from getting stuck
|
||||
# just because 1 job threw too many errors.
|
||||
sidekiq_retries_exhausted do |job|
|
||||
args = job['args']
|
||||
jid = job['jid']
|
||||
|
||||
if args.length == 3 && (key = args.last) && key.is_a?(String)
|
||||
JobWaiter.notify(key, jid)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,54 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# ObjectImporter defines the base behaviour for every Sidekiq worker that
|
||||
# imports a single resource such as a note or pull request.
|
||||
module ObjectImporter
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include ReschedulingMethods
|
||||
include NotifyUponDeath
|
||||
end
|
||||
|
||||
# project - An instance of `Project` to import the data into.
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
# hash - A Hash containing the details of the object to import.
|
||||
def import(project, client, hash)
|
||||
object = representation_class.from_json_hash(hash)
|
||||
|
||||
importer_class.new(object, project, client).execute
|
||||
|
||||
counter.increment(project: project.path_with_namespace)
|
||||
end
|
||||
|
||||
def counter
|
||||
@counter ||= Gitlab::Metrics.counter(counter_name, counter_description)
|
||||
end
|
||||
|
||||
# Returns the representation class to use for the object. This class must
|
||||
# define the class method `from_json_hash`.
|
||||
def representation_class
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the class to use for importing the object.
|
||||
def importer_class
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the name (as a Symbol) of the Prometheus counter.
|
||||
def counter_name
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Returns the description (as a String) of the Prometheus counter.
|
||||
def counter_description
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,16 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module Queue
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
# If a job produces an error it may block a stage from advancing
|
||||
# forever. To prevent this from happening we prevent jobs from going to
|
||||
# the dead queue. This does mean some resources may not be imported, but
|
||||
# this is better than a project being stuck in the "import" state
|
||||
# forever.
|
||||
sidekiq_options queue: 'github_importer', dead: false, retry: 5
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,40 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# Module that provides methods shared by the various workers used for
|
||||
# importing GitHub projects.
|
||||
module ReschedulingMethods
|
||||
# project_id - The ID of the GitLab project to import the note into.
|
||||
# hash - A Hash containing the details of the GitHub object to imoprt.
|
||||
# notify_key - The Redis key to notify upon completion, if any.
|
||||
def perform(project_id, hash, notify_key = nil)
|
||||
project = Project.find_by(id: project_id)
|
||||
|
||||
return notify_waiter(notify_key) unless project
|
||||
|
||||
client = GithubImport.new_client_for(project, parallel: true)
|
||||
|
||||
if try_import(project, client, hash)
|
||||
notify_waiter(notify_key)
|
||||
else
|
||||
# In the event of hitting the rate limit we want to reschedule the job
|
||||
# so its retried after our rate limit has been reset.
|
||||
self.class
|
||||
.perform_in(client.rate_limit_resets_in, project.id, hash, notify_key)
|
||||
end
|
||||
end
|
||||
|
||||
def try_import(*args)
|
||||
import(*args)
|
||||
true
|
||||
rescue RateLimitError
|
||||
false
|
||||
end
|
||||
|
||||
def notify_waiter(key = nil)
|
||||
JobWaiter.notify(key, jid) if key
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module StageMethods
|
||||
# project_id - The ID of the GitLab project to import the data into.
|
||||
def perform(project_id)
|
||||
return unless (project = find_project(project_id))
|
||||
|
||||
client = GithubImport.new_client_for(project)
|
||||
|
||||
try_import(client, project)
|
||||
end
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def try_import(client, project)
|
||||
import(client, project)
|
||||
rescue RateLimitError
|
||||
self.class.perform_in(client.rate_limit_resets_in, project.id)
|
||||
end
|
||||
|
||||
def find_project(id)
|
||||
# If the project has been marked as failed we want to bail out
|
||||
# automatically.
|
||||
Project.import_started.find_by(id: id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,74 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# AdvanceStageWorker is a worker used by the GitHub importer to wait for a
|
||||
# number of jobs to complete, without blocking a thread. Once all jobs have
|
||||
# been completed this worker will advance the import process to the next
|
||||
# stage.
|
||||
class AdvanceStageWorker
|
||||
include Sidekiq::Worker
|
||||
|
||||
sidekiq_options queue: 'github_importer_advance_stage', dead: false
|
||||
|
||||
INTERVAL = 30.seconds.to_i
|
||||
|
||||
# The number of seconds to wait (while blocking the thread) before
|
||||
# continueing to the next waiter.
|
||||
BLOCKING_WAIT_TIME = 5
|
||||
|
||||
# The known importer stages and their corresponding Sidekiq workers.
|
||||
STAGES = {
|
||||
issues_and_diff_notes: Stage::ImportIssuesAndDiffNotesWorker,
|
||||
notes: Stage::ImportNotesWorker,
|
||||
finish: Stage::FinishImportWorker
|
||||
}.freeze
|
||||
|
||||
# project_id - The ID of the project being imported.
|
||||
# waiters - A Hash mapping Gitlab::JobWaiter keys to the number of
|
||||
# remaining jobs.
|
||||
# next_stage - The name of the next stage to start when all jobs have been
|
||||
# completed.
|
||||
def perform(project_id, waiters, next_stage)
|
||||
return unless (project = find_project(project_id))
|
||||
|
||||
new_waiters = wait_for_jobs(waiters)
|
||||
|
||||
if new_waiters.empty?
|
||||
# We refresh the import JID here so workers importing individual
|
||||
# resources (e.g. notes) don't have to do this all the time, reducing
|
||||
# the pressure on Redis. We _only_ do this once all jobs are done so
|
||||
# we don't get stuck forever if one or more jobs failed to notify the
|
||||
# JobWaiter.
|
||||
project.refresh_import_jid_expiration
|
||||
|
||||
STAGES.fetch(next_stage.to_sym).perform_async(project_id)
|
||||
else
|
||||
self.class.perform_in(INTERVAL, project_id, new_waiters, next_stage)
|
||||
end
|
||||
end
|
||||
|
||||
def wait_for_jobs(waiters)
|
||||
waiters.each_with_object({}) do |(key, remaining), new_waiters|
|
||||
waiter = JobWaiter.new(remaining, key)
|
||||
|
||||
# We wait for a brief moment of time so we don't reschedule if we can
|
||||
# complete the work fast enough.
|
||||
waiter.wait(BLOCKING_WAIT_TIME)
|
||||
|
||||
next unless waiter.jobs_remaining.positive?
|
||||
|
||||
new_waiters[waiter.key] = waiter.jobs_remaining
|
||||
end
|
||||
end
|
||||
|
||||
def find_project(id)
|
||||
# We only care about the import JID so we can refresh it. We also only
|
||||
# want the project if it hasn't been marked as failed yet. It's possible
|
||||
# the import gets marked as stuck when jobs of the current stage failed
|
||||
# somehow.
|
||||
Project.select(:import_jid).import_started.find_by(id: id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class ImportDiffNoteWorker
|
||||
include ObjectImporter
|
||||
|
||||
def representation_class
|
||||
Representation::DiffNote
|
||||
end
|
||||
|
||||
def importer_class
|
||||
Importer::DiffNoteImporter
|
||||
end
|
||||
|
||||
def counter_name
|
||||
:github_importer_imported_diff_notes
|
||||
end
|
||||
|
||||
def counter_description
|
||||
'The number of imported GitHub pull request review comments'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class ImportIssueWorker
|
||||
include ObjectImporter
|
||||
|
||||
def representation_class
|
||||
Representation::Issue
|
||||
end
|
||||
|
||||
def importer_class
|
||||
Importer::IssueAndLabelLinksImporter
|
||||
end
|
||||
|
||||
def counter_name
|
||||
:github_importer_imported_issues
|
||||
end
|
||||
|
||||
def counter_description
|
||||
'The number of imported GitHub issues'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class ImportNoteWorker
|
||||
include ObjectImporter
|
||||
|
||||
def representation_class
|
||||
Representation::Note
|
||||
end
|
||||
|
||||
def importer_class
|
||||
Importer::NoteImporter
|
||||
end
|
||||
|
||||
def counter_name
|
||||
:github_importer_imported_notes
|
||||
end
|
||||
|
||||
def counter_description
|
||||
'The number of imported GitHub comments'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class ImportPullRequestWorker
|
||||
include ObjectImporter
|
||||
|
||||
def representation_class
|
||||
Representation::PullRequest
|
||||
end
|
||||
|
||||
def importer_class
|
||||
Importer::PullRequestImporter
|
||||
end
|
||||
|
||||
def counter_name
|
||||
:github_importer_imported_pull_requests
|
||||
end
|
||||
|
||||
def counter_description
|
||||
'The number of imported GitHub pull requests'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,38 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class RefreshImportJidWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
|
||||
# The interval to schedule new instances of this job at.
|
||||
INTERVAL = 1.minute.to_i
|
||||
|
||||
def self.perform_in_the_future(*args)
|
||||
perform_in(INTERVAL, *args)
|
||||
end
|
||||
|
||||
# project_id - The ID of the project that is being imported.
|
||||
# check_job_id - The ID of the job for which to check the status.
|
||||
def perform(project_id, check_job_id)
|
||||
return unless (project = find_project(project_id))
|
||||
|
||||
if SidekiqStatus.running?(check_job_id)
|
||||
# As long as the repository is being cloned we want to keep refreshing
|
||||
# the import JID status.
|
||||
project.refresh_import_jid_expiration
|
||||
self.class.perform_in_the_future(project_id, check_job_id)
|
||||
end
|
||||
|
||||
# If the job is no longer running there's nothing else we need to do. If
|
||||
# the clone job completed successfully it will have scheduled the next
|
||||
# stage, if it died there's nothing we can do anyway.
|
||||
end
|
||||
|
||||
def find_project(id)
|
||||
Project.select(:import_jid).import_started.find_by(id: id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,43 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class FinishImportWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# project - An instance of Project.
|
||||
def import(_, project)
|
||||
project.after_import
|
||||
report_import_time(project)
|
||||
end
|
||||
|
||||
def report_import_time(project)
|
||||
duration = Time.zone.now - project.created_at
|
||||
path = project.path_with_namespace
|
||||
|
||||
histogram.observe({ project: path }, duration)
|
||||
counter.increment
|
||||
|
||||
logger.info("GitHub importer finished for #{path} in #{duration.round(2)} seconds")
|
||||
end
|
||||
|
||||
def histogram
|
||||
@histogram ||= Gitlab::Metrics.histogram(
|
||||
:github_importer_total_duration_seconds,
|
||||
'Total time spent importing GitHub projects, in seconds'
|
||||
)
|
||||
end
|
||||
|
||||
def counter
|
||||
@counter ||= Gitlab::Metrics.counter(
|
||||
:github_importer_imported_projects,
|
||||
'The number of imported GitHub projects'
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,33 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class ImportBaseDataWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# These importers are fast enough that we can just run them in the same
|
||||
# thread.
|
||||
IMPORTERS = [
|
||||
Importer::LabelsImporter,
|
||||
Importer::MilestonesImporter,
|
||||
Importer::ReleasesImporter
|
||||
].freeze
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def import(client, project)
|
||||
IMPORTERS.each do |klass|
|
||||
klass.new(project, client).execute
|
||||
end
|
||||
|
||||
project.refresh_import_jid_expiration
|
||||
|
||||
ImportPullRequestsWorker.perform_async(project.id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class ImportIssuesAndDiffNotesWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# The importers to run in this stage. Issues can't be imported earlier
|
||||
# on as we also use these to enrich pull requests with assigned labels.
|
||||
IMPORTERS = [
|
||||
Importer::IssuesImporter,
|
||||
Importer::DiffNotesImporter
|
||||
].freeze
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def import(client, project)
|
||||
waiters = IMPORTERS.each_with_object({}) do |klass, hash|
|
||||
waiter = klass.new(project, client).execute
|
||||
hash[waiter.key] = waiter.jobs_remaining
|
||||
end
|
||||
|
||||
AdvanceStageWorker.perform_async(project.id, waiters, :notes)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class ImportNotesWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def import(client, project)
|
||||
waiter = Importer::NotesImporter
|
||||
.new(project, client)
|
||||
.execute
|
||||
|
||||
AdvanceStageWorker.perform_async(
|
||||
project.id,
|
||||
{ waiter.key => waiter.jobs_remaining },
|
||||
:finish
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,29 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class ImportPullRequestsWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def import(client, project)
|
||||
waiter = Importer::PullRequestsImporter
|
||||
.new(project, client)
|
||||
.execute
|
||||
|
||||
project.refresh_import_jid_expiration
|
||||
|
||||
AdvanceStageWorker.perform_async(
|
||||
project.id,
|
||||
{ waiter.key => waiter.jobs_remaining },
|
||||
:issues_and_diff_notes
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,38 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Stage
|
||||
class ImportRepositoryWorker
|
||||
include Sidekiq::Worker
|
||||
include GithubImport::Queue
|
||||
include StageMethods
|
||||
|
||||
# client - An instance of Gitlab::GithubImport::Client.
|
||||
# project - An instance of Project.
|
||||
def import(client, project)
|
||||
# In extreme cases it's possible for a clone to take more than the
|
||||
# import job expiration time. To work around this we schedule a
|
||||
# separate job that will periodically run and refresh the import
|
||||
# expiration time.
|
||||
RefreshImportJidWorker.perform_in_the_future(project.id, jid)
|
||||
|
||||
importer = Importer::RepositoryImporter.new(project, client)
|
||||
|
||||
return unless importer.execute
|
||||
|
||||
counter.increment
|
||||
|
||||
ImportBaseDataWorker.perform_async(project.id)
|
||||
end
|
||||
|
||||
def counter
|
||||
Gitlab::Metrics.counter(
|
||||
:github_importer_imported_repositories,
|
||||
'The number of imported GitHub repositories'
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -17,11 +17,16 @@ class RepositoryImportWorker
|
|||
import_url: project.import_url,
|
||||
path: project.full_path)
|
||||
|
||||
result = Projects::ImportService.new(project, project.creator).execute
|
||||
service = Projects::ImportService.new(project, project.creator)
|
||||
result = service.execute
|
||||
|
||||
# Some importers may perform their work asynchronously. In this case it's up
|
||||
# to those importers to mark the import process as complete.
|
||||
return if service.async?
|
||||
|
||||
raise ImportError, result[:message] if result[:status] == :error
|
||||
|
||||
project.repository.after_import
|
||||
project.import_finish
|
||||
project.after_import
|
||||
rescue ImportError => ex
|
||||
fail_import(project, ex.message)
|
||||
raise
|
||||
|
|
|
@ -40,6 +40,8 @@
|
|||
- [upload_checksum, 1]
|
||||
- [repository_fork, 1]
|
||||
- [repository_import, 1]
|
||||
- [github_importer, 1]
|
||||
- [github_importer_advance_stage, 1]
|
||||
- [project_service, 1]
|
||||
- [delete_user, 1]
|
||||
- [delete_merged_branches, 1]
|
||||
|
|
|
@ -171,6 +171,7 @@ have access to GitLab administration tools and settings.
|
|||
- [GitLab performance monitoring with InfluxDB](administration/monitoring/performance/introduction.md): Configure GitLab and InfluxDB for measuring performance metrics.
|
||||
- [GitLab performance monitoring with Prometheus](administration/monitoring/prometheus/index.md): Configure GitLab and Prometheus for measuring performance metrics.
|
||||
- [Monitoring uptime](user/admin_area/monitoring/health_check.md): Check the server status using the health check endpoint.
|
||||
- [Monitoring GitHub imports](administration/monitoring/github_imports.md)
|
||||
|
||||
### Performance
|
||||
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
# Monitoring GitHub imports
|
||||
|
||||
>**Note:**
|
||||
Available since [GitLab 10.2][14731].
|
||||
|
||||
The GitHub importer exposes various Prometheus metrics that you can use to
|
||||
monitor the health and progress of the importer.
|
||||
|
||||
## Import Duration Times
|
||||
|
||||
| Name | Type |
|
||||
|------------------------------------------|-----------|
|
||||
| `github_importer_total_duration_seconds` | histogram |
|
||||
|
||||
This metric tracks the total time spent (in seconds) importing a project (from
|
||||
project creation until the import process finishes), for every imported project.
|
||||
|
||||
The name of the project is stored in the `project` label in the format
|
||||
`namespace/name` (e.g. `gitlab-org/gitlab-ce`).
|
||||
|
||||
## Number of imported projects
|
||||
|
||||
| Name | Type |
|
||||
|-------------------------------------|---------|
|
||||
| `github_importer_imported_projects` | counter |
|
||||
|
||||
This metric tracks the total number of projects imported over time. This metric
|
||||
does not expose any labels.
|
||||
|
||||
## Number of GitHub API calls
|
||||
|
||||
| Name | Type |
|
||||
|---------------------------------|---------|
|
||||
| `github_importer_request_count` | counter |
|
||||
|
||||
This metric tracks the total number of GitHub API calls performed over time, for
|
||||
all projects. This metric does not expose any labels.
|
||||
|
||||
## Rate limit errors
|
||||
|
||||
| Name | Type |
|
||||
|-----------------------------------|---------|
|
||||
| `github_importer_rate_limit_hits` | counter |
|
||||
|
||||
This metric tracks the number of times we hit the GitHub rate limit, for all
|
||||
projects. This metric does not expose any labels.
|
||||
|
||||
## Number of imported issues
|
||||
|
||||
| Name | Type |
|
||||
|-----------------------------------|---------|
|
||||
| `github_importer_imported_issues` | counter |
|
||||
|
||||
This metric tracks the number of imported issues across all projects.
|
||||
|
||||
The name of the project is stored in the `project` label in the format
|
||||
`namespace/name` (e.g. `gitlab-org/gitlab-ce`).
|
||||
|
||||
## Number of imported pull requests
|
||||
|
||||
| Name | Type |
|
||||
|------------------------------------------|---------|
|
||||
| `github_importer_imported_pull_requests` | counter |
|
||||
|
||||
This metric tracks the number of imported pull requests across all projects.
|
||||
|
||||
The name of the project is stored in the `project` label in the format
|
||||
`namespace/name` (e.g. `gitlab-org/gitlab-ce`).
|
||||
|
||||
## Number of imported comments
|
||||
|
||||
| Name | Type |
|
||||
|----------------------------------|---------|
|
||||
| `github_importer_imported_notes` | counter |
|
||||
|
||||
This metric tracks the number of imported comments across all projects.
|
||||
|
||||
The name of the project is stored in the `project` label in the format
|
||||
`namespace/name` (e.g. `gitlab-org/gitlab-ce`).
|
||||
|
||||
## Number of imported pull request review comments
|
||||
|
||||
| Name | Type |
|
||||
|---------------------------------------|---------|
|
||||
| `github_importer_imported_diff_notes` | counter |
|
||||
|
||||
This metric tracks the number of imported comments across all projects.
|
||||
|
||||
The name of the project is stored in the `project` label in the format
|
||||
`namespace/name` (e.g. `gitlab-org/gitlab-ce`).
|
||||
|
||||
## Number of imported repositories
|
||||
|
||||
| Name | Type |
|
||||
|-----------------------------------------|---------|
|
||||
| `github_importer_imported_repositories` | counter |
|
||||
|
||||
This metric tracks the number of imported repositories across all projects. This
|
||||
metric does not expose any labels.
|
||||
|
||||
[14731]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/14731
|
|
@ -37,6 +37,7 @@ comments: false
|
|||
- [Gotchas](gotchas.md) to avoid
|
||||
- [Issue and merge requests state models](object_state_models.md)
|
||||
- [How to dump production data to staging](db_dump.md)
|
||||
- [Working with the GitHub importer](github_importer.md)
|
||||
|
||||
## Performance guides
|
||||
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
# Working with the GitHub importer
|
||||
|
||||
In GitLab 10.2 a new version of the GitHub importer was introduced. This new
|
||||
importer performs its work in parallel using Sidekiq, greatly reducing the time
|
||||
necessary to import GitHub projects into a GitLab instance.
|
||||
|
||||
The GitHub importer offers two different types of importers: a sequential
|
||||
importer and a parallel importer. The Rake task `import:github` uses the
|
||||
sequential importer, while everything else uses the parallel importer. The
|
||||
difference between these two importers is quite simple: the sequential importer
|
||||
does all work in a single thread, making it more useful for debugging purposes
|
||||
or Rake tasks. The parallel importer on the other hand uses Sidekiq.
|
||||
|
||||
## Requirements
|
||||
|
||||
* GitLab CE 10.2.0 or newer.
|
||||
* Sidekiq workers that process the `github_importer` and
|
||||
`github_importer_advance_stage` queues (this is enabled by default).
|
||||
* Octokit (used for interacting with the GitHub API)
|
||||
|
||||
## Code structure
|
||||
|
||||
The importer's codebase is broken up into the following directories:
|
||||
|
||||
* `lib/gitlab/github_import`: this directory contains most of the code such as
|
||||
the classes used for importing resources.
|
||||
* `app/workers/gitlab/github_import`: this directory contains the Sidekiq
|
||||
workers.
|
||||
* `app/workers/concerns/gitlab/github_import`: this directory contains a few
|
||||
modules reused by the various Sidekiq workers.
|
||||
|
||||
## Architecture overview
|
||||
|
||||
When a GitHub project is imported we schedule and execute a job for the
|
||||
`RepositoryImportworker` worker as all other importers. However, unlike other
|
||||
importers we don't immediately perform the work necessary. Instead work is
|
||||
divided into separate stages, with each stage consisting out of a set of Sidekiq
|
||||
jobs that are executed. Between every stage a job is scheduled that periodically
|
||||
checks if all work of the current stage is completed, advancing the import
|
||||
process to the next stage when this is the case. The worker handling this is
|
||||
called `Gitlab::GithubImport::AdvanceStageWorker`.
|
||||
|
||||
## Stages
|
||||
|
||||
### 1. RepositoryImportWorker
|
||||
|
||||
This worker will kick off the import process by simply scheduling a job for the
|
||||
next worker.
|
||||
|
||||
### 2. Stage::ImportRepositoryWorker
|
||||
|
||||
This worker will import the repository and wiki, scheduling the next stage when
|
||||
done.
|
||||
|
||||
### 3. Stage::ImportBaseDataWorker
|
||||
|
||||
This worker will import base data such as labels, milestones, and releases. This
|
||||
work is done in a single thread since it can be performed fast enough that we
|
||||
don't need to perform this work in parallel.
|
||||
|
||||
### 4. Stage::ImportPullRequestsWorker
|
||||
|
||||
This worker will import all pull requests. For every pull request a job for the
|
||||
`Gitlab::GithubImport::ImportPullRequestWorker` worker is scheduled.
|
||||
|
||||
### 5. Stage::ImportIssuesAndDiffNotesWorker
|
||||
|
||||
This worker will import all issues and pull request comments. For every issue we
|
||||
schedule a job for the `Gitlab::GithubImport::ImportIssueWorker` worker. For
|
||||
pull request comments we instead schedule jobs for the
|
||||
`Gitlab::GithubImport::DiffNoteImporter` worker.
|
||||
|
||||
This worker processes both issues and diff notes in parallel so we don't need to
|
||||
schedule a separate stage and wait for the previous one to complete.
|
||||
|
||||
Issues are imported separately from pull requests because only the "issues" API
|
||||
includes labels for both issue and pull requests. Importing issues and setting
|
||||
label links in the same worker removes the need for performing a separate crawl
|
||||
through the API data, reducing the number of API calls necessary to import a
|
||||
project.
|
||||
|
||||
### 6. Stage::ImportNotesWorker
|
||||
|
||||
This worker imports regular comments for both issues and pull requests. For
|
||||
every comment we schedule a job for the
|
||||
`Gitlab::GithubImport::ImportNoteWorker` worker.
|
||||
|
||||
Regular comments have to be imported at the end since the GitHub API used
|
||||
returns comments for both issues and pull requests. This means we have to wait
|
||||
for all issues and pull requests to be imported before we can import regular
|
||||
comments.
|
||||
|
||||
### 7. Stage::FinishImportWorker
|
||||
|
||||
This worker will wrap up the import process by performing some housekeeping
|
||||
(such as flushing any caches) and by marking the import as completed.
|
||||
|
||||
## Advancing stages
|
||||
|
||||
Advancing stages is done in one of two ways:
|
||||
|
||||
1. Scheduling the worker for the next stage directly.
|
||||
2. Scheduling a job for `Gitlab::GithubImport::AdvanceStageWorker` which will
|
||||
advance the stage when all work of the current stage has been completed.
|
||||
|
||||
The first approach should only be used by workers that perform all their work in
|
||||
a single thread, while `AdvanceStageWorker` should be used for everything else.
|
||||
|
||||
The way `AdvanceStageWorker` works is fairly simple. When scheduling a job it
|
||||
will be given a project ID, a list of Redis keys, and the name of the next
|
||||
stage. The Redis keys (produced by `Gitlab::JobWaiter`) are used to check if the
|
||||
currently running stage has been completed or not. If the stage has not yet been
|
||||
completed `AdvanceStageWorker` will reschedule itself. Once a stage finishes
|
||||
`AdvanceStageworker` will refresh the import JID (more on this below) and
|
||||
schedule the worker of the next stage.
|
||||
|
||||
To reduce the number of `AdvanceStageWorker` jobs scheduled this worker will
|
||||
briefly wait for jobs to complete before deciding what the next action should
|
||||
be. For small projects this may slow down the import process a bit, but it will
|
||||
also reduce pressure on the system as a whole.
|
||||
|
||||
## Refreshing import JIDs
|
||||
|
||||
GitLab includes a worker called `StuckImportJobsWorker` that will periodically
|
||||
run and mark project imports as failed if they have been running for more than
|
||||
15 hours. For GitHub projects this poses a bit of a problem: importing large
|
||||
projects could take several hours depending on how often we hit the GitHub rate
|
||||
limit (more on this below), but we don't want `StuckImportJobsWorker` to mark
|
||||
our import as failed because of this.
|
||||
|
||||
To prevent this from happening we periodically refresh the expiration time of
|
||||
the import process. This works by storing the JID of the import job in the
|
||||
database, then refreshing this JID's TTL at various stages throughout the import
|
||||
process. This is done by calling `Project#refresh_import_jid_expiration`. By
|
||||
refreshing this TTL we can ensure our import does not get marked as failed so
|
||||
long we're still performing work.
|
||||
|
||||
## GitHub rate limit
|
||||
|
||||
GitHub has a rate limit of 5 000 API calls per hour. The number of requests
|
||||
necessary to import a project is largely dominated by the number of unique users
|
||||
involved in a project (e.g. issue authors). Other data such as issue pages
|
||||
and comments typically only requires a few dozen requests to import. This is
|
||||
because we need the Email address of users in order to map them to GitLab users.
|
||||
|
||||
We handle this by doing the following:
|
||||
|
||||
1. Once we hit the rate limit all jobs will automatically reschedule themselves
|
||||
in such a way that they are not executed until the rate limit has been reset.
|
||||
2. We cache the mapping of GitHub users to GitLab users in Redis.
|
||||
|
||||
More information on user caching can be found below.
|
||||
|
||||
## Caching user lookups
|
||||
|
||||
When mapping GitHub users to GitLab users we need to (in the worst case)
|
||||
perform:
|
||||
|
||||
1. One API call to get the user's Email address.
|
||||
2. Two database queries to see if a corresponding GitLab user exists. One query
|
||||
will try to find the user based on the GitHub user ID, while the second query
|
||||
is used to find the user using their GitHub Email address.
|
||||
|
||||
Because this process is quite expensive we cache the result of these lookups in
|
||||
Redis. For every user looked up we store three keys:
|
||||
|
||||
1. A Redis key mapping GitHub usernames to their Email addresses.
|
||||
2. A Redis key mapping a GitHub Email addresses to a GitLab user ID.
|
||||
3. A Redis key mapping a GitHub user ID to GitLab user ID.
|
||||
|
||||
There are two types of lookups we cache:
|
||||
|
||||
1. A positive lookup, meaning we found a GitLab user ID.
|
||||
2. A negative lookup, meaning we didn't find a GitLab user ID. Caching this
|
||||
prevents us from performing the same work for users that we know don't exist
|
||||
in our GitLab database.
|
||||
|
||||
The expiration time of these keys is 24 hours. When retrieving the cache of a
|
||||
positive lookups we refresh the TTL automatically. The TTL of false lookups is
|
||||
never refreshed.
|
||||
|
||||
Because of this caching layer it's possible newly registered GitLab accounts
|
||||
won't be linked to their corresponding GitHub accounts. This however will sort
|
||||
itself out once the cached keys expire.
|
||||
|
||||
The user cache lookup is shared across projects. This means that the more
|
||||
projects get imported the fewer GitHub API calls will be needed.
|
||||
|
||||
The code for this resides in:
|
||||
|
||||
* `lib/gitlab/github_import/user_finder.rb`
|
||||
* `lib/gitlab/github_import/caching.rb`
|
||||
|
||||
## Mapping labels and milestones
|
||||
|
||||
To reduce pressure on the database we do not query it when setting labels and
|
||||
milestones on issues and merge requests. Instead we cache this data when we
|
||||
import labels and milestones, then we reuse this cache when assigning them to
|
||||
issues/merge requests. Similar to the user lookups these cache keys are expired
|
||||
automatically after 24 hours of not being used.
|
||||
|
||||
Unlike the user lookup caches these label and milestone caches are scoped to the
|
||||
project that is being imported.
|
||||
|
||||
The code for this resides in:
|
||||
|
||||
* `lib/gitlab/github_import/label_finder.rb`
|
||||
* `lib/gitlab/github_import/milestone_finder.rb`
|
||||
* `lib/gitlab/github_import/caching.rb`
|
|
@ -24,6 +24,8 @@ constrains of a Sidekiq worker.
|
|||
- the milestones (GitLab 8.7+)
|
||||
- the labels (GitLab 8.7+)
|
||||
- the release note descriptions (GitLab 8.12+)
|
||||
- the pull request review comments (GitLab 10.2+)
|
||||
- the regular issue and pull request comments
|
||||
- References to pull requests and issues are preserved (GitLab 8.7+)
|
||||
- Repository public access is retained. If a repository is private in GitHub
|
||||
it will be created as private in GitLab as well.
|
||||
|
@ -43,10 +45,13 @@ the case the namespace is taken, the repository will be imported under the user'
|
|||
namespace that started the import process.
|
||||
|
||||
The importer will also import branches on forks of projects related to open pull
|
||||
requests. These branches will be imported with a naming scheume similar to
|
||||
requests. These branches will be imported with a naming scheme similar to
|
||||
GH-SHA-Username/Pull-Request-number/fork-name/branch. This may lead to a discrepency
|
||||
in branches compared to the GitHub Repository.
|
||||
|
||||
For a more technical description and an overview of the architecture you can
|
||||
refer to [Working with the GitHub importer][gh-import-dev-docs].
|
||||
|
||||
## Importing your GitHub repositories
|
||||
|
||||
The importer page is visible when you create a new project.
|
||||
|
@ -121,7 +126,29 @@ If you want, you can import all your GitHub projects in one go by hitting
|
|||
You can also choose a different name for the project and a different namespace,
|
||||
if you have the privileges to do so.
|
||||
|
||||
## Making the import process go faster
|
||||
|
||||
For large projects it may take a while to import all data. To reduce the time
|
||||
necessary you can increase the number of Sidekiq workers that process the
|
||||
following queues:
|
||||
|
||||
* `github_importer`
|
||||
* `github_importer_advance_stage`
|
||||
|
||||
For an optimal experience we recommend having at least 4 Sidekiq processes (each
|
||||
running a number of threads equal to the number of CPU cores) that _only_
|
||||
process these queues. We also recommend that these processes run on separate
|
||||
servers. For 4 servers with 8 cores this means you can import up to 32 objects
|
||||
(e.g. issues) in parallel.
|
||||
|
||||
Reducing the time spent in cloning a repository can be done by increasing
|
||||
network throughput, CPU capacity, and disk performance (e.g. by using high
|
||||
performance SSDs) of the disks that store the Git repositories (for your GitLab
|
||||
instance). Increasing the number of Sidekiq workers will _not_ reduce the time
|
||||
spent cloning repositories.
|
||||
|
||||
[gh-import]: ../../../integration/github.md "GitHub integration"
|
||||
[gh-rake]: ../../../administration/raketasks/github_import.md "GitHub rake task"
|
||||
[gh-integration]: #authorize-access-to-your-repositories-using-the-github-integration
|
||||
[gh-token]: #authorize-access-to-your-repositories-using-a-personal-access-token
|
||||
[gh-import-dev-docs]: ../../../development/github_importer.md "Working with the GitHub importer"
|
||||
|
|
|
@ -5,6 +5,10 @@ class Feature
|
|||
class FlipperFeature < Flipper::Adapters::ActiveRecord::Feature
|
||||
# Using `self.table_name` won't work. ActiveRecord bug?
|
||||
superclass.table_name = 'features'
|
||||
|
||||
def self.feature_names
|
||||
pluck(:key)
|
||||
end
|
||||
end
|
||||
|
||||
class FlipperGate < Flipper::Adapters::ActiveRecord::Gate
|
||||
|
@ -22,11 +26,19 @@ class Feature
|
|||
flipper.feature(key)
|
||||
end
|
||||
|
||||
def persisted_names
|
||||
if RequestStore.active?
|
||||
RequestStore[:flipper_persisted_names] ||= FlipperFeature.feature_names
|
||||
else
|
||||
FlipperFeature.feature_names
|
||||
end
|
||||
end
|
||||
|
||||
def persisted?(feature)
|
||||
# Flipper creates on-memory features when asked for a not-yet-created one.
|
||||
# If we want to check if a feature has been actually set, we look for it
|
||||
# on the persisted features list.
|
||||
all.map(&:name).include?(feature.name)
|
||||
persisted_names.include?(feature.name)
|
||||
end
|
||||
|
||||
def enabled?(key, thing = nil)
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
module Github
|
||||
class Client
|
||||
TIMEOUT = 60
|
||||
DEFAULT_PER_PAGE = 100
|
||||
|
||||
attr_reader :connection, :rate_limit
|
||||
|
||||
def initialize(options)
|
||||
@connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday|
|
||||
faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT)
|
||||
faraday.options.timeout = options.fetch(:timeout, TIMEOUT)
|
||||
faraday.authorization 'token', options.fetch(:token)
|
||||
faraday.adapter :net_http
|
||||
faraday.ssl.verify = verify_ssl
|
||||
end
|
||||
|
||||
@rate_limit = RateLimit.new(connection)
|
||||
end
|
||||
|
||||
def get(url, query = {})
|
||||
exceed, reset_in = rate_limit.get
|
||||
sleep reset_in if exceed
|
||||
|
||||
Github::Response.new(connection.get(url, { per_page: DEFAULT_PER_PAGE }.merge(query)))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def root_endpoint
|
||||
custom_endpoint || github_endpoint
|
||||
end
|
||||
|
||||
def custom_endpoint
|
||||
github_omniauth_provider.dig('args', 'client_options', 'site')
|
||||
end
|
||||
|
||||
def verify_ssl
|
||||
# If there is no config, we're connecting to github.com
|
||||
# and we should verify ssl.
|
||||
github_omniauth_provider.fetch('verify_ssl', true)
|
||||
end
|
||||
|
||||
def github_endpoint
|
||||
OmniAuth::Strategies::GitHub.default_options[:client_options][:site]
|
||||
end
|
||||
|
||||
def github_omniauth_provider
|
||||
@github_omniauth_provider ||=
|
||||
Gitlab.config.omniauth.providers
|
||||
.find { |provider| provider.name == 'github' }
|
||||
.to_h
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,29 +0,0 @@
|
|||
module Github
|
||||
class Collection
|
||||
attr_reader :options
|
||||
|
||||
def initialize(options)
|
||||
@options = options
|
||||
end
|
||||
|
||||
def fetch(url, query = {})
|
||||
return [] if url.blank?
|
||||
|
||||
Enumerator.new do |yielder|
|
||||
loop do
|
||||
response = client.get(url, query)
|
||||
response.body.each { |item| yielder << item }
|
||||
|
||||
raise StopIteration unless response.rels.key?(:next)
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end.lazy
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def client
|
||||
@client ||= Github::Client.new(options)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,3 +0,0 @@
|
|||
module Github
|
||||
RepositoryFetchError = Class.new(StandardError)
|
||||
end
|
|
@ -1,377 +0,0 @@
|
|||
require_relative 'error'
|
||||
require_relative 'import/issue'
|
||||
require_relative 'import/legacy_diff_note'
|
||||
require_relative 'import/merge_request'
|
||||
require_relative 'import/note'
|
||||
|
||||
module Github
|
||||
class Import
|
||||
include Gitlab::ShellAdapter
|
||||
|
||||
attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
|
||||
:options, :errors, :cached, :verbose, :last_fetched_at
|
||||
|
||||
def initialize(project, options = {})
|
||||
@project = project
|
||||
@repository = project.repository
|
||||
@repo = project.import_source
|
||||
@repo_url = project.import_url
|
||||
@wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
|
||||
@options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user))
|
||||
@verbose = options.fetch(:verbose, false)
|
||||
@cached = Hash.new { |hash, key| hash[key] = Hash.new }
|
||||
@errors = []
|
||||
@last_fetched_at = nil
|
||||
end
|
||||
|
||||
# rubocop: disable Rails/Output
|
||||
def execute
|
||||
puts 'Fetching repository...'.color(:aqua) if verbose
|
||||
setup_and_fetch_repository
|
||||
puts 'Fetching labels...'.color(:aqua) if verbose
|
||||
fetch_labels
|
||||
puts 'Fetching milestones...'.color(:aqua) if verbose
|
||||
fetch_milestones
|
||||
puts 'Fetching pull requests...'.color(:aqua) if verbose
|
||||
fetch_pull_requests
|
||||
puts 'Fetching issues...'.color(:aqua) if verbose
|
||||
fetch_issues
|
||||
puts 'Fetching releases...'.color(:aqua) if verbose
|
||||
fetch_releases
|
||||
puts 'Cloning wiki repository...'.color(:aqua) if verbose
|
||||
fetch_wiki_repository
|
||||
puts 'Expiring repository cache...'.color(:aqua) if verbose
|
||||
expire_repository_cache
|
||||
|
||||
errors.empty?
|
||||
rescue Github::RepositoryFetchError
|
||||
expire_repository_cache
|
||||
false
|
||||
ensure
|
||||
keep_track_of_errors
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def setup_and_fetch_repository
|
||||
begin
|
||||
project.ensure_repository
|
||||
project.repository.add_remote('github', repo_url)
|
||||
project.repository.set_import_remote_as_mirror('github')
|
||||
project.repository.add_remote_fetch_config('github', '+refs/pull/*/head:refs/merge-requests/*/head')
|
||||
fetch_remote(forced: true)
|
||||
rescue Gitlab::Git::Repository::NoRepository,
|
||||
Gitlab::Git::RepositoryMirroring::RemoteError,
|
||||
Gitlab::Shell::Error => e
|
||||
error(:project, repo_url, e.message)
|
||||
raise Github::RepositoryFetchError
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_remote(forced: false)
|
||||
@last_fetched_at = Time.now
|
||||
project.repository.fetch_remote('github', forced: forced)
|
||||
end
|
||||
|
||||
def fetch_wiki_repository
|
||||
return if project.wiki.repository_exists?
|
||||
|
||||
wiki_path = project.wiki.disk_path
|
||||
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
|
||||
rescue Gitlab::Shell::Error => e
|
||||
# GitHub error message when the wiki repo has not been created,
|
||||
# this means that repo has wiki enabled, but have no pages. So,
|
||||
# we can skip the import.
|
||||
if e.message !~ /repository not exported/
|
||||
error(:wiki, wiki_url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_labels
|
||||
url = "/repos/#{repo}/labels"
|
||||
|
||||
while url
|
||||
response = Github::Client.new(options).get(url)
|
||||
|
||||
response.body.each do |raw|
|
||||
begin
|
||||
representation = Github::Representation::Label.new(raw)
|
||||
|
||||
label = project.labels.find_or_create_by!(title: representation.title) do |label|
|
||||
label.color = representation.color
|
||||
end
|
||||
|
||||
cached[:label_ids][representation.title] = label.id
|
||||
rescue => e
|
||||
error(:label, representation.url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_milestones
|
||||
url = "/repos/#{repo}/milestones"
|
||||
|
||||
while url
|
||||
response = Github::Client.new(options).get(url, state: :all)
|
||||
|
||||
response.body.each do |raw|
|
||||
begin
|
||||
milestone = Github::Representation::Milestone.new(raw)
|
||||
next if project.milestones.where(iid: milestone.iid).exists?
|
||||
|
||||
project.milestones.create!(
|
||||
iid: milestone.iid,
|
||||
title: milestone.title,
|
||||
description: milestone.description,
|
||||
due_date: milestone.due_date,
|
||||
state: milestone.state,
|
||||
created_at: milestone.created_at,
|
||||
updated_at: milestone.updated_at
|
||||
)
|
||||
rescue => e
|
||||
error(:milestone, milestone.url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_pull_requests
|
||||
url = "/repos/#{repo}/pulls"
|
||||
|
||||
while url
|
||||
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
|
||||
|
||||
response.body.each do |raw|
|
||||
pull_request = Github::Representation::PullRequest.new(raw, options.merge(project: project))
|
||||
merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id)
|
||||
next unless merge_request.new_record? && pull_request.valid?
|
||||
|
||||
begin
|
||||
# If the PR has been created/updated after we last fetched the
|
||||
# remote, we fetch again to get the up-to-date refs.
|
||||
fetch_remote if pull_request.updated_at > last_fetched_at
|
||||
|
||||
author_id = user_id(pull_request.author, project.creator_id)
|
||||
description = format_description(pull_request.description, pull_request.author)
|
||||
|
||||
merge_request.attributes = {
|
||||
iid: pull_request.iid,
|
||||
title: pull_request.title,
|
||||
description: description,
|
||||
source_project: pull_request.source_project,
|
||||
source_branch: pull_request.source_branch_name,
|
||||
source_branch_sha: pull_request.source_branch_sha,
|
||||
target_project: pull_request.target_project,
|
||||
target_branch: pull_request.target_branch_name,
|
||||
target_branch_sha: pull_request.target_branch_sha,
|
||||
state: pull_request.state,
|
||||
milestone_id: milestone_id(pull_request.milestone),
|
||||
author_id: author_id,
|
||||
assignee_id: user_id(pull_request.assignee),
|
||||
created_at: pull_request.created_at,
|
||||
updated_at: pull_request.updated_at
|
||||
}
|
||||
|
||||
merge_request.save!(validate: false)
|
||||
merge_request.merge_request_diffs.create
|
||||
|
||||
review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments"
|
||||
fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
|
||||
rescue => e
|
||||
error(:pull_request, pull_request.url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_issues
|
||||
url = "/repos/#{repo}/issues"
|
||||
|
||||
while url
|
||||
response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
|
||||
|
||||
response.body.each { |raw| populate_issue(raw) }
|
||||
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def populate_issue(raw)
|
||||
representation = Github::Representation::Issue.new(raw, options)
|
||||
|
||||
begin
|
||||
# Every pull request is an issue, but not every issue
|
||||
# is a pull request. For this reason, "shared" actions
|
||||
# for both features, like manipulating assignees, labels
|
||||
# and milestones, are provided within the Issues API.
|
||||
if representation.pull_request?
|
||||
return unless representation.labels? || representation.comments?
|
||||
|
||||
merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
|
||||
|
||||
if representation.labels?
|
||||
merge_request.update_attribute(:label_ids, label_ids(representation.labels))
|
||||
end
|
||||
|
||||
fetch_comments_conditionally(merge_request, representation)
|
||||
else
|
||||
return if Issue.exists?(iid: representation.iid, project_id: project.id)
|
||||
|
||||
author_id = user_id(representation.author, project.creator_id)
|
||||
issue = Issue.new
|
||||
issue.iid = representation.iid
|
||||
issue.project_id = project.id
|
||||
issue.title = representation.title
|
||||
issue.description = format_description(representation.description, representation.author)
|
||||
issue.state = representation.state
|
||||
issue.milestone_id = milestone_id(representation.milestone)
|
||||
issue.author_id = author_id
|
||||
issue.created_at = representation.created_at
|
||||
issue.updated_at = representation.updated_at
|
||||
issue.save!(validate: false)
|
||||
|
||||
issue.update(
|
||||
label_ids: label_ids(representation.labels),
|
||||
assignee_ids: assignee_ids(representation.assignees))
|
||||
|
||||
fetch_comments_conditionally(issue, representation)
|
||||
end
|
||||
rescue => e
|
||||
error(:issue, representation.url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_comments_conditionally(issuable, representation)
|
||||
if representation.comments?
|
||||
comments_url = "/repos/#{repo}/issues/#{issuable.iid}/comments"
|
||||
fetch_comments(issuable, :comment, comments_url)
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_comments(noteable, type, url, klass = Note)
|
||||
while url
|
||||
comments = Github::Client.new(options).get(url)
|
||||
|
||||
ActiveRecord::Base.no_touching do
|
||||
comments.body.each do |raw|
|
||||
begin
|
||||
representation = Github::Representation::Comment.new(raw, options)
|
||||
author_id = user_id(representation.author, project.creator_id)
|
||||
|
||||
note = klass.new
|
||||
note.project_id = project.id
|
||||
note.noteable = noteable
|
||||
note.note = format_description(representation.note, representation.author)
|
||||
note.commit_id = representation.commit_id
|
||||
note.line_code = representation.line_code
|
||||
note.author_id = author_id
|
||||
note.created_at = representation.created_at
|
||||
note.updated_at = representation.updated_at
|
||||
note.save!(validate: false)
|
||||
rescue => e
|
||||
error(type, representation.url, e.message)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
url = comments.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def fetch_releases
|
||||
url = "/repos/#{repo}/releases"
|
||||
|
||||
while url
|
||||
response = Github::Client.new(options).get(url)
|
||||
|
||||
response.body.each do |raw|
|
||||
representation = Github::Representation::Release.new(raw)
|
||||
next unless representation.valid?
|
||||
|
||||
release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
|
||||
next unless release.new_record?
|
||||
|
||||
begin
|
||||
release.description = representation.description
|
||||
release.created_at = representation.created_at
|
||||
release.updated_at = representation.updated_at
|
||||
release.save!(validate: false)
|
||||
rescue => e
|
||||
error(:release, representation.url, e.message)
|
||||
end
|
||||
end
|
||||
|
||||
url = response.rels[:next]
|
||||
end
|
||||
end
|
||||
|
||||
def label_ids(labels)
|
||||
labels.map { |label| cached[:label_ids][label.title] }.compact
|
||||
end
|
||||
|
||||
def assignee_ids(assignees)
|
||||
assignees.map { |assignee| user_id(assignee) }.compact
|
||||
end
|
||||
|
||||
def milestone_id(milestone)
|
||||
return unless milestone.present?
|
||||
|
||||
project.milestones.select(:id).find_by(iid: milestone.iid)&.id
|
||||
end
|
||||
|
||||
def user_id(user, fallback_id = nil)
|
||||
return unless user.present?
|
||||
return cached[:user_ids][user.id] if cached[:user_ids][user.id].present?
|
||||
|
||||
gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
|
||||
|
||||
cached[:gitlab_user_ids][user.id] = gitlab_user_id.present?
|
||||
cached[:user_ids][user.id] = gitlab_user_id || fallback_id
|
||||
end
|
||||
|
||||
def user_id_by_email(email)
|
||||
return nil unless email
|
||||
|
||||
::User.find_by_any_email(email)&.id
|
||||
end
|
||||
|
||||
def user_id_by_external_uid(id)
|
||||
return nil unless id
|
||||
|
||||
::User.select(:id)
|
||||
.joins(:identities)
|
||||
.merge(::Identity.where(provider: :github, extern_uid: id))
|
||||
.first&.id
|
||||
end
|
||||
|
||||
def format_description(body, author)
|
||||
return body if cached[:gitlab_user_ids][author.id]
|
||||
|
||||
"*Created by: #{author.username}*\n\n#{body}"
|
||||
end
|
||||
|
||||
def expire_repository_cache
|
||||
repository.expire_content_cache if project.repository_exists?
|
||||
end
|
||||
|
||||
def keep_track_of_errors
|
||||
return unless errors.any?
|
||||
|
||||
project.update_column(:import_error, {
|
||||
message: 'The remote data could not be fully imported.',
|
||||
errors: errors
|
||||
}.to_json)
|
||||
end
|
||||
|
||||
def error(type, url, message)
|
||||
errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message }
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,13 +0,0 @@
|
|||
module Github
|
||||
class Import
|
||||
class Issue < ::Issue
|
||||
self.table_name = 'issues'
|
||||
|
||||
self.reset_callbacks :save
|
||||
self.reset_callbacks :create
|
||||
self.reset_callbacks :commit
|
||||
self.reset_callbacks :update
|
||||
self.reset_callbacks :validate
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,12 +0,0 @@
|
|||
module Github
|
||||
class Import
|
||||
class LegacyDiffNote < ::LegacyDiffNote
|
||||
self.table_name = 'notes'
|
||||
self.store_full_sti_class = false
|
||||
|
||||
self.reset_callbacks :commit
|
||||
self.reset_callbacks :update
|
||||
self.reset_callbacks :validate
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,13 +0,0 @@
|
|||
module Github
|
||||
class Import
|
||||
class MergeRequest < ::MergeRequest
|
||||
self.table_name = 'merge_requests'
|
||||
|
||||
self.reset_callbacks :create
|
||||
self.reset_callbacks :save
|
||||
self.reset_callbacks :commit
|
||||
self.reset_callbacks :update
|
||||
self.reset_callbacks :validate
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,13 +0,0 @@
|
|||
module Github
|
||||
class Import
|
||||
class Note < ::Note
|
||||
self.table_name = 'notes'
|
||||
self.store_full_sti_class = false
|
||||
|
||||
self.reset_callbacks :save
|
||||
self.reset_callbacks :commit
|
||||
self.reset_callbacks :update
|
||||
self.reset_callbacks :validate
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,27 +0,0 @@
|
|||
module Github
|
||||
class RateLimit
|
||||
SAFE_REMAINING_REQUESTS = 100
|
||||
SAFE_RESET_TIME = 500
|
||||
RATE_LIMIT_URL = '/rate_limit'.freeze
|
||||
|
||||
attr_reader :connection
|
||||
|
||||
def initialize(connection)
|
||||
@connection = connection
|
||||
end
|
||||
|
||||
def get
|
||||
response = connection.get(RATE_LIMIT_URL)
|
||||
|
||||
# GitHub Rate Limit API returns 404 when the rate limit is disabled
|
||||
return false unless response.status != 404
|
||||
|
||||
body = Oj.load(response.body, class_cache: false, mode: :compat)
|
||||
remaining = body.dig('rate', 'remaining').to_i
|
||||
reset_in = body.dig('rate', 'reset').to_i
|
||||
exceed = remaining <= SAFE_REMAINING_REQUESTS
|
||||
|
||||
[exceed, reset_in]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,19 +0,0 @@
|
|||
module Github
|
||||
class Repositories
|
||||
attr_reader :options
|
||||
|
||||
def initialize(options)
|
||||
@options = options
|
||||
end
|
||||
|
||||
def fetch
|
||||
Collection.new(options).fetch(repos_url)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def repos_url
|
||||
'/user/repos'
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,30 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Base
|
||||
def initialize(raw, options = {})
|
||||
@raw = raw
|
||||
@options = options
|
||||
end
|
||||
|
||||
def id
|
||||
raw['id']
|
||||
end
|
||||
|
||||
def url
|
||||
raw['url']
|
||||
end
|
||||
|
||||
def created_at
|
||||
raw['created_at']
|
||||
end
|
||||
|
||||
def updated_at
|
||||
raw['updated_at']
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
attr_reader :raw, :options
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,55 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Branch < Representation::Base
|
||||
attr_reader :repository
|
||||
|
||||
def user
|
||||
raw.dig('user', 'login') || 'unknown'
|
||||
end
|
||||
|
||||
def repo?
|
||||
raw['repo'].present?
|
||||
end
|
||||
|
||||
def repo
|
||||
return unless repo?
|
||||
|
||||
@repo ||= Github::Representation::Repo.new(raw['repo'])
|
||||
end
|
||||
|
||||
def ref
|
||||
raw['ref']
|
||||
end
|
||||
|
||||
def sha
|
||||
raw['sha']
|
||||
end
|
||||
|
||||
def short_sha
|
||||
Commit.truncate_sha(sha)
|
||||
end
|
||||
|
||||
def valid?
|
||||
sha.present? && ref.present?
|
||||
end
|
||||
|
||||
def restore!(name)
|
||||
repository.create_branch(name, sha)
|
||||
rescue Gitlab::Git::Repository::InvalidRef => e
|
||||
Rails.logger.error("#{self.class.name}: Could not restore branch #{name}: #{e}")
|
||||
end
|
||||
|
||||
def remove!(name)
|
||||
repository.delete_branch(name)
|
||||
rescue Gitlab::Git::Repository::DeleteBranchError => e
|
||||
Rails.logger.error("#{self.class.name}: Could not remove branch #{name}: #{e}")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def repository
|
||||
@repository ||= options.fetch(:repository)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,42 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Comment < Representation::Base
|
||||
def note
|
||||
raw['body'] || ''
|
||||
end
|
||||
|
||||
def author
|
||||
@author ||= Github::Representation::User.new(raw['user'], options)
|
||||
end
|
||||
|
||||
def commit_id
|
||||
raw['commit_id']
|
||||
end
|
||||
|
||||
def line_code
|
||||
return unless on_diff?
|
||||
|
||||
parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines)
|
||||
generate_line_code(parsed_lines.to_a.last)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def generate_line_code(line)
|
||||
Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos)
|
||||
end
|
||||
|
||||
def on_diff?
|
||||
diff_hunk.present?
|
||||
end
|
||||
|
||||
def diff_hunk
|
||||
raw['diff_hunk']
|
||||
end
|
||||
|
||||
def file_path
|
||||
raw['path']
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,37 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Issuable < Representation::Base
|
||||
def iid
|
||||
raw['number']
|
||||
end
|
||||
|
||||
def title
|
||||
raw['title']
|
||||
end
|
||||
|
||||
def description
|
||||
raw['body'] || ''
|
||||
end
|
||||
|
||||
def milestone
|
||||
return unless raw['milestone'].present?
|
||||
|
||||
@milestone ||= Github::Representation::Milestone.new(raw['milestone'])
|
||||
end
|
||||
|
||||
def author
|
||||
@author ||= Github::Representation::User.new(raw['user'], options)
|
||||
end
|
||||
|
||||
def labels?
|
||||
raw['labels'].any?
|
||||
end
|
||||
|
||||
def labels
|
||||
@labels ||= Array(raw['labels']).map do |label|
|
||||
Github::Representation::Label.new(label, options)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,27 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Issue < Representation::Issuable
|
||||
def state
|
||||
raw['state'] == 'closed' ? 'closed' : 'opened'
|
||||
end
|
||||
|
||||
def comments?
|
||||
raw['comments'] > 0
|
||||
end
|
||||
|
||||
def pull_request?
|
||||
raw['pull_request'].present?
|
||||
end
|
||||
|
||||
def assigned?
|
||||
raw['assignees'].present?
|
||||
end
|
||||
|
||||
def assignees
|
||||
@assignees ||= Array(raw['assignees']).map do |user|
|
||||
Github::Representation::User.new(user, options)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,13 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Label < Representation::Base
|
||||
def color
|
||||
"##{raw['color']}"
|
||||
end
|
||||
|
||||
def title
|
||||
raw['name']
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,25 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Milestone < Representation::Base
|
||||
def iid
|
||||
raw['number']
|
||||
end
|
||||
|
||||
def title
|
||||
raw['title']
|
||||
end
|
||||
|
||||
def description
|
||||
raw['description']
|
||||
end
|
||||
|
||||
def due_date
|
||||
raw['due_on']
|
||||
end
|
||||
|
||||
def state
|
||||
raw['state'] == 'closed' ? 'closed' : 'active'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,71 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class PullRequest < Representation::Issuable
|
||||
delegate :sha, to: :source_branch, prefix: true
|
||||
delegate :sha, to: :target_branch, prefix: true
|
||||
|
||||
def source_project
|
||||
project
|
||||
end
|
||||
|
||||
def source_branch_name
|
||||
# Mimic the "user:branch" displayed in the MR widget,
|
||||
# i.e. "Request to merge rymai:add-external-mounts into master"
|
||||
cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref
|
||||
end
|
||||
|
||||
def target_project
|
||||
project
|
||||
end
|
||||
|
||||
def target_branch_name
|
||||
target_branch.ref
|
||||
end
|
||||
|
||||
def state
|
||||
return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present?
|
||||
return 'closed' if raw['state'] == 'closed'
|
||||
|
||||
'opened'
|
||||
end
|
||||
|
||||
def opened?
|
||||
state == 'opened'
|
||||
end
|
||||
|
||||
def valid?
|
||||
source_branch.valid? && target_branch.valid?
|
||||
end
|
||||
|
||||
def assigned?
|
||||
raw['assignee'].present?
|
||||
end
|
||||
|
||||
def assignee
|
||||
return unless assigned?
|
||||
|
||||
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def project
|
||||
@project ||= options.fetch(:project)
|
||||
end
|
||||
|
||||
def source_branch
|
||||
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
|
||||
end
|
||||
|
||||
def target_branch
|
||||
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
|
||||
end
|
||||
|
||||
def cross_project?
|
||||
return true unless source_branch.repo?
|
||||
|
||||
source_branch.repo.id != target_branch.repo.id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,17 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Release < Representation::Base
|
||||
def description
|
||||
raw['body']
|
||||
end
|
||||
|
||||
def tag
|
||||
raw['tag_name']
|
||||
end
|
||||
|
||||
def valid?
|
||||
!raw['draft']
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,6 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class Repo < Representation::Base
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,15 +0,0 @@
|
|||
module Github
|
||||
module Representation
|
||||
class User < Representation::Base
|
||||
def email
|
||||
return @email if defined?(@email)
|
||||
|
||||
@email = Github::User.new(username, options).get.fetch('email', nil)
|
||||
end
|
||||
|
||||
def username
|
||||
raw['login']
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,25 +0,0 @@
|
|||
module Github
|
||||
class Response
|
||||
attr_reader :raw, :headers, :status
|
||||
|
||||
def initialize(response)
|
||||
@raw = response
|
||||
@headers = response.headers
|
||||
@status = response.status
|
||||
end
|
||||
|
||||
def body
|
||||
Oj.load(raw.body, class_cache: false, mode: :compat)
|
||||
end
|
||||
|
||||
def rels
|
||||
links = headers['Link'].to_s.split(', ').map do |link|
|
||||
href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures
|
||||
|
||||
[name.to_sym, href]
|
||||
end
|
||||
|
||||
Hash[*links.flatten]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,24 +0,0 @@
|
|||
module Github
|
||||
class User
|
||||
attr_reader :username, :options
|
||||
|
||||
def initialize(username, options)
|
||||
@username = username
|
||||
@options = options
|
||||
end
|
||||
|
||||
def get
|
||||
client.get(user_url).body
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def client
|
||||
@client ||= Github::Client.new(options)
|
||||
end
|
||||
|
||||
def user_url
|
||||
"/users/#{username}"
|
||||
end
|
||||
end
|
||||
end
|
|
@ -108,20 +108,41 @@ module Gitlab
|
|||
end
|
||||
end
|
||||
|
||||
def self.bulk_insert(table, rows)
|
||||
# Bulk inserts a number of rows into a table, optionally returning their
|
||||
# IDs.
|
||||
#
|
||||
# table - The name of the table to insert the rows into.
|
||||
# rows - An Array of Hash instances, each mapping the columns to their
|
||||
# values.
|
||||
# return_ids - When set to true the return value will be an Array of IDs of
|
||||
# the inserted rows, this only works on PostgreSQL.
|
||||
def self.bulk_insert(table, rows, return_ids: false)
|
||||
return if rows.empty?
|
||||
|
||||
keys = rows.first.keys
|
||||
columns = keys.map { |key| connection.quote_column_name(key) }
|
||||
return_ids = false if mysql?
|
||||
|
||||
tuples = rows.map do |row|
|
||||
row.values_at(*keys).map { |value| connection.quote(value) }
|
||||
end
|
||||
|
||||
connection.execute <<-EOF
|
||||
sql = <<-EOF
|
||||
INSERT INTO #{table} (#{columns.join(', ')})
|
||||
VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
|
||||
EOF
|
||||
|
||||
if return_ids
|
||||
sql << 'RETURNING id'
|
||||
end
|
||||
|
||||
result = connection.execute(sql)
|
||||
|
||||
if return_ids
|
||||
result.values.map { |tuple| tuple[0].to_i }
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def self.sanitize_timestamp(timestamp)
|
||||
|
|
|
@ -920,6 +920,11 @@ module Gitlab
|
|||
false
|
||||
end
|
||||
|
||||
# Returns true if a remote exists.
|
||||
def remote_exists?(name)
|
||||
rugged.remotes[name].present?
|
||||
end
|
||||
|
||||
# Update the specified remote using the values in the +options+ hash
|
||||
#
|
||||
# Example
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
def self.new_client_for(project, token: nil, parallel: true)
|
||||
token_to_use = token || project.import_data&.credentials&.fetch(:user)
|
||||
|
||||
Client.new(token_to_use, parallel: parallel)
|
||||
end
|
||||
|
||||
# Inserts a raw row and returns the ID of the inserted row.
|
||||
#
|
||||
# attributes - The attributes/columns to set.
|
||||
# relation - An ActiveRecord::Relation to use for finding the ID of the row
|
||||
# when using MySQL.
|
||||
def self.insert_and_return_id(attributes, relation)
|
||||
# We use bulk_insert here so we can bypass any queries executed by
|
||||
# callbacks or validation rules, as doing this wouldn't scale when
|
||||
# importing very large projects.
|
||||
result = Gitlab::Database
|
||||
.bulk_insert(relation.table_name, [attributes], return_ids: true)
|
||||
|
||||
# MySQL doesn't support returning the IDs of a bulk insert in a way that
|
||||
# is not a pain, so in this case we'll issue an extra query instead.
|
||||
result.first ||
|
||||
relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first
|
||||
end
|
||||
|
||||
# Returns the ID of the ghost user.
|
||||
def self.ghost_user_id
|
||||
key = 'github-import/ghost-user-id'
|
||||
|
||||
Caching.read_integer(key) || Caching.write(key, User.select(:id).ghost.id)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module BulkImporting
|
||||
# Builds and returns an Array of objects to bulk insert into the
|
||||
# database.
|
||||
#
|
||||
# enum - An Enumerable that returns the objects to turn into database
|
||||
# rows.
|
||||
def build_database_rows(enum)
|
||||
enum.each_with_object([]) do |(object, _), rows|
|
||||
rows << build(object) unless already_imported?(object)
|
||||
end
|
||||
end
|
||||
|
||||
# Bulk inserts the given rows into the database.
|
||||
def bulk_insert(model, rows, batch_size: 100)
|
||||
rows.each_slice(batch_size) do |slice|
|
||||
Gitlab::Database.bulk_insert(model.table_name, slice)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,151 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Caching
|
||||
# The default timeout of the cache keys.
|
||||
TIMEOUT = 24.hours.to_i
|
||||
|
||||
WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze
|
||||
local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2]
|
||||
local existing = tonumber(redis.call("get", key))
|
||||
|
||||
if existing == nil or value > existing then
|
||||
redis.call("set", key, value)
|
||||
redis.call("expire", key, ttl)
|
||||
return true
|
||||
else
|
||||
return false
|
||||
end
|
||||
EOF
|
||||
|
||||
# Reads a cache key.
|
||||
#
|
||||
# If the key exists and has a non-empty value its TTL is refreshed
|
||||
# automatically.
|
||||
#
|
||||
# raw_key - The cache key to read.
|
||||
# timeout - The new timeout of the key if the key is to be refreshed.
|
||||
def self.read(raw_key, timeout: TIMEOUT)
|
||||
key = cache_key_for(raw_key)
|
||||
value = Redis::Cache.with { |redis| redis.get(key) }
|
||||
|
||||
if value.present?
|
||||
# We refresh the expiration time so frequently used keys stick
|
||||
# around, removing the need for querying the database as much as
|
||||
# possible.
|
||||
#
|
||||
# A key may be empty when we looked up a GitHub user (for example) but
|
||||
# did not find a matching GitLab user. In that case we _don't_ want to
|
||||
# refresh the TTL so we automatically pick up the right data when said
|
||||
# user were to register themselves on the GitLab instance.
|
||||
Redis::Cache.with { |redis| redis.expire(key, timeout) }
|
||||
end
|
||||
|
||||
value
|
||||
end
|
||||
|
||||
# Reads an integer from the cache, or returns nil if no value was found.
|
||||
#
|
||||
# See Caching.read for more information.
|
||||
def self.read_integer(raw_key, timeout: TIMEOUT)
|
||||
value = read(raw_key, timeout: timeout)
|
||||
|
||||
value.to_i if value.present?
|
||||
end
|
||||
|
||||
# Sets a cache key to the given value.
|
||||
#
|
||||
# key - The cache key to write.
|
||||
# value - The value to set.
|
||||
# timeout - The time after which the cache key should expire.
|
||||
def self.write(raw_key, value, timeout: TIMEOUT)
|
||||
key = cache_key_for(raw_key)
|
||||
|
||||
Redis::Cache.with do |redis|
|
||||
redis.set(key, value, ex: timeout)
|
||||
end
|
||||
|
||||
value
|
||||
end
|
||||
|
||||
# Adds a value to a set.
|
||||
#
|
||||
# raw_key - The key of the set to add the value to.
|
||||
# value - The value to add to the set.
|
||||
# timeout - The new timeout of the key.
|
||||
def self.set_add(raw_key, value, timeout: TIMEOUT)
|
||||
key = cache_key_for(raw_key)
|
||||
|
||||
Redis::Cache.with do |redis|
|
||||
redis.multi do |m|
|
||||
m.sadd(key, value)
|
||||
m.expire(key, timeout)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Returns true if the given value is present in the set.
|
||||
#
|
||||
# raw_key - The key of the set to check.
|
||||
# value - The value to check for.
|
||||
def self.set_includes?(raw_key, value)
|
||||
key = cache_key_for(raw_key)
|
||||
|
||||
Redis::Cache.with do |redis|
|
||||
redis.sismember(key, value)
|
||||
end
|
||||
end
|
||||
|
||||
# Sets multiple keys to a given value.
|
||||
#
|
||||
# mapping - A Hash mapping the cache keys to their values.
|
||||
# timeout - The time after which the cache key should expire.
|
||||
def self.write_multiple(mapping, timeout: TIMEOUT)
|
||||
Redis::Cache.with do |redis|
|
||||
redis.multi do |multi|
|
||||
mapping.each do |raw_key, value|
|
||||
multi.set(cache_key_for(raw_key), value, ex: timeout)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Sets the expiration time of a key.
|
||||
#
|
||||
# raw_key - The key for which to change the timeout.
|
||||
# timeout - The new timeout.
|
||||
def self.expire(raw_key, timeout)
|
||||
key = cache_key_for(raw_key)
|
||||
|
||||
Redis::Cache.with do |redis|
|
||||
redis.expire(key, timeout)
|
||||
end
|
||||
end
|
||||
|
||||
# Sets a key to the given integer but only if the existing value is
|
||||
# smaller than the given value.
|
||||
#
|
||||
# This method uses a Lua script to ensure the read and write are atomic.
|
||||
#
|
||||
# raw_key - The key to set.
|
||||
# value - The new value for the key.
|
||||
# timeout - The key timeout in seconds.
|
||||
#
|
||||
# Returns true when the key was overwritten, false otherwise.
|
||||
def self.write_if_greater(raw_key, value, timeout: TIMEOUT)
|
||||
key = cache_key_for(raw_key)
|
||||
val = Redis::Cache.with do |redis|
|
||||
redis
|
||||
.eval(WRITE_IF_GREATER_SCRIPT, keys: [key], argv: [value, timeout])
|
||||
end
|
||||
|
||||
val ? true : false
|
||||
end
|
||||
|
||||
def self.cache_key_for(raw_key)
|
||||
"#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,148 +1,185 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# HTTP client for interacting with the GitHub API.
|
||||
#
|
||||
# This class is basically a fancy wrapped around Octokit while adding some
|
||||
# functionality to deal with rate limiting and parallel imports. Usage is
|
||||
# mostly the same as Octokit, for example:
|
||||
#
|
||||
# client = GithubImport::Client.new('hunter2')
|
||||
#
|
||||
# client.labels.each do |label|
|
||||
# puts label.name
|
||||
# end
|
||||
class Client
|
||||
GITHUB_SAFE_REMAINING_REQUESTS = 100
|
||||
GITHUB_SAFE_SLEEP_TIME = 500
|
||||
attr_reader :octokit
|
||||
|
||||
attr_reader :access_token, :host, :api_version
|
||||
# A single page of data and the corresponding page number.
|
||||
Page = Struct.new(:objects, :number)
|
||||
|
||||
def initialize(access_token, host: nil, api_version: 'v3')
|
||||
@access_token = access_token
|
||||
@host = host.to_s.sub(%r{/+\z}, '')
|
||||
@api_version = api_version
|
||||
@users = {}
|
||||
# The minimum number of requests we want to keep available.
|
||||
#
|
||||
# We don't use a value of 0 as multiple threads may be using the same
|
||||
# token in parallel. This could result in all of them hitting the GitHub
|
||||
# rate limit at once. The threshold is put in place to not hit the limit
|
||||
# in most cases.
|
||||
RATE_LIMIT_THRESHOLD = 50
|
||||
|
||||
if access_token
|
||||
::Octokit.auto_paginate = false
|
||||
# token - The GitHub API token to use.
|
||||
#
|
||||
# per_page - The number of objects that should be displayed per page.
|
||||
#
|
||||
# parallel - When set to true hitting the rate limit will result in a
|
||||
# dedicated error being raised. When set to `false` we will
|
||||
# instead just `sleep()` until the rate limit is reset. Setting
|
||||
# this value to `true` for parallel importing is crucial as
|
||||
# otherwise hitting the rate limit will result in a thread
|
||||
# being blocked in a `sleep()` call for up to an hour.
|
||||
def initialize(token, per_page: 100, parallel: true)
|
||||
@octokit = Octokit::Client.new(access_token: token, per_page: per_page)
|
||||
@parallel = parallel
|
||||
end
|
||||
|
||||
def parallel?
|
||||
@parallel
|
||||
end
|
||||
|
||||
# Returns the details of a GitHub user.
|
||||
#
|
||||
# username - The username of the user.
|
||||
def user(username)
|
||||
with_rate_limit { octokit.user(username) }
|
||||
end
|
||||
|
||||
# Returns the details of a GitHub repository.
|
||||
#
|
||||
# name - The path (in the form `owner/repository`) of the repository.
|
||||
def repository(name)
|
||||
with_rate_limit { octokit.repo(name) }
|
||||
end
|
||||
|
||||
def labels(*args)
|
||||
each_object(:labels, *args)
|
||||
end
|
||||
|
||||
def milestones(*args)
|
||||
each_object(:milestones, *args)
|
||||
end
|
||||
|
||||
def releases(*args)
|
||||
each_object(:releases, *args)
|
||||
end
|
||||
|
||||
# Fetches data from the GitHub API and yields a Page object for every page
|
||||
# of data, without loading all of them into memory.
|
||||
#
|
||||
# method - The Octokit method to use for getting the data.
|
||||
# args - Arguments to pass to the Octokit method.
|
||||
#
|
||||
# rubocop: disable GitlabSecurity/PublicSend
|
||||
def each_page(method, *args, &block)
|
||||
return to_enum(__method__, method, *args) unless block_given?
|
||||
|
||||
page =
|
||||
if args.last.is_a?(Hash) && args.last[:page]
|
||||
args.last[:page]
|
||||
else
|
||||
1
|
||||
end
|
||||
|
||||
collection = with_rate_limit { octokit.public_send(method, *args) }
|
||||
next_url = octokit.last_response.rels[:next]
|
||||
|
||||
yield Page.new(collection, page)
|
||||
|
||||
while next_url
|
||||
response = with_rate_limit { next_url.get }
|
||||
next_url = response.rels[:next]
|
||||
|
||||
yield Page.new(response.data, page += 1)
|
||||
end
|
||||
end
|
||||
|
||||
def api
|
||||
@api ||= ::Octokit::Client.new(
|
||||
access_token: access_token,
|
||||
api_endpoint: api_endpoint,
|
||||
# If there is no config, we're connecting to github.com and we
|
||||
# should verify ssl.
|
||||
connection_options: {
|
||||
ssl: { verify: config ? config['verify_ssl'] : true }
|
||||
}
|
||||
# Iterates over all of the objects for the given method (e.g. `:labels`).
|
||||
#
|
||||
# method - The method to send to Octokit for querying data.
|
||||
# args - Any arguments to pass to the Octokit method.
|
||||
def each_object(method, *args, &block)
|
||||
return to_enum(__method__, method, *args) unless block_given?
|
||||
|
||||
each_page(method, *args) do |page|
|
||||
page.objects.each do |object|
|
||||
yield object
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Yields the supplied block, responding to any rate limit errors.
|
||||
#
|
||||
# The exact strategy used for handling rate limiting errors depends on
|
||||
# whether we are running in parallel mode or not. For more information see
|
||||
# `#rate_or_wait_for_rate_limit`.
|
||||
def with_rate_limit
|
||||
request_count_counter.increment
|
||||
|
||||
raise_or_wait_for_rate_limit unless requests_remaining?
|
||||
|
||||
begin
|
||||
yield
|
||||
rescue Octokit::TooManyRequests
|
||||
raise_or_wait_for_rate_limit
|
||||
|
||||
# This retry will only happen when running in sequential mode as we'll
|
||||
# raise an error in parallel mode.
|
||||
retry
|
||||
end
|
||||
end
|
||||
|
||||
# Returns `true` if we're still allowed to perform API calls.
|
||||
def requests_remaining?
|
||||
remaining_requests > RATE_LIMIT_THRESHOLD
|
||||
end
|
||||
|
||||
def remaining_requests
|
||||
octokit.rate_limit.remaining
|
||||
end
|
||||
|
||||
def raise_or_wait_for_rate_limit
|
||||
rate_limit_counter.increment
|
||||
|
||||
if parallel?
|
||||
raise RateLimitError
|
||||
else
|
||||
sleep(rate_limit_resets_in)
|
||||
end
|
||||
end
|
||||
|
||||
def rate_limit_resets_in
|
||||
# We add a few seconds to the rate limit so we don't _immediately_
|
||||
# resume when the rate limit resets as this may result in us performing
|
||||
# a request before GitHub has a chance to reset the limit.
|
||||
octokit.rate_limit.resets_in + 5
|
||||
end
|
||||
|
||||
def respond_to_missing?(method, include_private = false)
|
||||
octokit.respond_to?(method, include_private)
|
||||
end
|
||||
|
||||
def rate_limit_counter
|
||||
@rate_limit_counter ||= Gitlab::Metrics.counter(
|
||||
:github_importer_rate_limit_hits,
|
||||
'The number of times we hit the GitHub rate limit when importing projects'
|
||||
)
|
||||
end
|
||||
|
||||
def client
|
||||
unless config
|
||||
raise Projects::ImportService::Error,
|
||||
'OAuth configuration for GitHub missing.'
|
||||
end
|
||||
|
||||
@client ||= ::OAuth2::Client.new(
|
||||
config.app_id,
|
||||
config.app_secret,
|
||||
github_options.merge(ssl: { verify: config['verify_ssl'] })
|
||||
def request_count_counter
|
||||
@request_counter ||= Gitlab::Metrics.counter(
|
||||
:github_importer_request_count,
|
||||
'The number of GitHub API calls performed when importing projects'
|
||||
)
|
||||
end
|
||||
|
||||
def authorize_url(redirect_uri)
|
||||
client.auth_code.authorize_url({
|
||||
redirect_uri: redirect_uri,
|
||||
scope: "repo, user, user:email"
|
||||
})
|
||||
end
|
||||
|
||||
def get_token(code)
|
||||
client.auth_code.get_token(code).token
|
||||
end
|
||||
|
||||
def method_missing(method, *args, &block)
|
||||
if api.respond_to?(method)
|
||||
request(method, *args, &block)
|
||||
else
|
||||
super(method, *args, &block)
|
||||
end
|
||||
end
|
||||
|
||||
def respond_to?(method)
|
||||
api.respond_to?(method) || super
|
||||
end
|
||||
|
||||
def user(login)
|
||||
return nil unless login.present?
|
||||
return @users[login] if @users.key?(login)
|
||||
|
||||
@users[login] = api.user(login)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def api_endpoint
|
||||
if host.present? && api_version.present?
|
||||
"#{host}/api/#{api_version}"
|
||||
else
|
||||
github_options[:site]
|
||||
end
|
||||
end
|
||||
|
||||
def config
|
||||
Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" }
|
||||
end
|
||||
|
||||
def github_options
|
||||
if config
|
||||
config["args"]["client_options"].deep_symbolize_keys
|
||||
else
|
||||
OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys
|
||||
end
|
||||
end
|
||||
|
||||
def rate_limit
|
||||
api.rate_limit!
|
||||
# GitHub Rate Limit API returns 404 when the rate limit is
|
||||
# disabled. In this case we just want to return gracefully
|
||||
# instead of spitting out an error.
|
||||
rescue Octokit::NotFound
|
||||
nil
|
||||
end
|
||||
|
||||
def has_rate_limit?
|
||||
return @has_rate_limit if defined?(@has_rate_limit)
|
||||
|
||||
@has_rate_limit = rate_limit.present?
|
||||
end
|
||||
|
||||
def rate_limit_exceed?
|
||||
has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS
|
||||
end
|
||||
|
||||
def rate_limit_sleep_time
|
||||
rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
|
||||
end
|
||||
|
||||
def request(method, *args, &block)
|
||||
sleep rate_limit_sleep_time if rate_limit_exceed?
|
||||
|
||||
data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend
|
||||
return data unless data.is_a?(Array)
|
||||
|
||||
last_response = api.last_response
|
||||
|
||||
if block_given?
|
||||
yield data
|
||||
# api.last_response could change while we're yielding (e.g. fetching labels for each PR)
|
||||
# so we cache our own last response
|
||||
each_response_page(last_response, &block)
|
||||
else
|
||||
each_response_page(last_response) { |page| data.concat(page) }
|
||||
data
|
||||
end
|
||||
end
|
||||
|
||||
def each_response_page(last_response)
|
||||
while last_response.rels[:next]
|
||||
sleep rate_limit_sleep_time if rate_limit_exceed?
|
||||
last_response = last_response.rels[:next].get
|
||||
yield last_response.data if last_response.data.is_a?(Array)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class DiffNoteImporter
|
||||
attr_reader :note, :project, :client, :user_finder
|
||||
|
||||
# note - An instance of `Gitlab::GithubImport::Representation::DiffNote`.
|
||||
# project - An instance of `Project`.
|
||||
# client - An instance of `Gitlab::GithubImport::Client`.
|
||||
def initialize(note, project, client)
|
||||
@note = note
|
||||
@project = project
|
||||
@client = client
|
||||
@user_finder = UserFinder.new(project, client)
|
||||
end
|
||||
|
||||
def execute
|
||||
return unless (mr_id = find_merge_request_id)
|
||||
|
||||
author_id, author_found = user_finder.author_id_for(note)
|
||||
|
||||
note_body =
|
||||
MarkdownText.format(note.note, note.author, author_found)
|
||||
|
||||
attributes = {
|
||||
noteable_type: 'MergeRequest',
|
||||
noteable_id: mr_id,
|
||||
project_id: project.id,
|
||||
author_id: author_id,
|
||||
note: note_body,
|
||||
system: false,
|
||||
commit_id: note.commit_id,
|
||||
line_code: note.line_code,
|
||||
type: 'LegacyDiffNote',
|
||||
created_at: note.created_at,
|
||||
updated_at: note.updated_at,
|
||||
st_diff: note.diff_hash.to_yaml
|
||||
}
|
||||
|
||||
# It's possible that during an import we'll insert tens of thousands
|
||||
# of diff notes. If we were to use the Note/LegacyDiffNote model here
|
||||
# we'd also have to run additional queries for both validations and
|
||||
# callbacks, putting a lot of pressure on the database.
|
||||
#
|
||||
# To work around this we're using bulk_insert with a single row. This
|
||||
# allows us to efficiently insert data (even if it's just 1 row)
|
||||
# without having to use all sorts of hacks to disable callbacks.
|
||||
Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes])
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project and the issue have been deleted since
|
||||
# scheduling this job. In this case we'll just skip creating the note.
|
||||
end
|
||||
|
||||
# Returns the ID of the merge request this note belongs to.
|
||||
def find_merge_request_id
|
||||
GithubImport::IssuableFinder.new(project, note).database_id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class DiffNotesImporter
|
||||
include ParallelScheduling
|
||||
|
||||
def representation_class
|
||||
Representation::DiffNote
|
||||
end
|
||||
|
||||
def importer_class
|
||||
DiffNoteImporter
|
||||
end
|
||||
|
||||
def sidekiq_worker_class
|
||||
ImportDiffNoteWorker
|
||||
end
|
||||
|
||||
def collection_method
|
||||
:pull_requests_comments
|
||||
end
|
||||
|
||||
def id_for_already_imported_cache(note)
|
||||
note.id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class IssueAndLabelLinksImporter
|
||||
attr_reader :issue, :project, :client
|
||||
|
||||
# issue - An instance of `Gitlab::GithubImport::Representation::Issue`.
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(issue, project, client)
|
||||
@issue = issue
|
||||
@project = project
|
||||
@client = client
|
||||
end
|
||||
|
||||
def execute
|
||||
IssueImporter.import_if_issue(issue, project, client)
|
||||
LabelLinksImporter.new(issue, project, client).execute
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class IssueImporter
|
||||
attr_reader :project, :issue, :client, :user_finder, :milestone_finder,
|
||||
:issuable_finder
|
||||
|
||||
# Imports an issue if it's a regular issue and not a pull request.
|
||||
def self.import_if_issue(issue, project, client)
|
||||
new(issue, project, client).execute unless issue.pull_request?
|
||||
end
|
||||
|
||||
# issue - An instance of `Gitlab::GithubImport::Representation::Issue`.
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(issue, project, client)
|
||||
@issue = issue
|
||||
@project = project
|
||||
@client = client
|
||||
@user_finder = UserFinder.new(project, client)
|
||||
@milestone_finder = MilestoneFinder.new(project)
|
||||
@issuable_finder = GithubImport::IssuableFinder.new(project, issue)
|
||||
end
|
||||
|
||||
def execute
|
||||
Issue.transaction do
|
||||
if (issue_id = create_issue)
|
||||
create_assignees(issue_id)
|
||||
issuable_finder.cache_database_id(issue_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Creates a new GitLab issue for the current GitHub issue.
|
||||
#
|
||||
# Returns the ID of the created issue as an Integer. If the issue
|
||||
# couldn't be created this method will return `nil` instead.
|
||||
def create_issue
|
||||
author_id, author_found = user_finder.author_id_for(issue)
|
||||
|
||||
description =
|
||||
MarkdownText.format(issue.description, issue.author, author_found)
|
||||
|
||||
attributes = {
|
||||
iid: issue.iid,
|
||||
title: issue.truncated_title,
|
||||
author_id: author_id,
|
||||
project_id: project.id,
|
||||
description: description,
|
||||
milestone_id: milestone_finder.id_for(issue),
|
||||
state: issue.state,
|
||||
created_at: issue.created_at,
|
||||
updated_at: issue.updated_at
|
||||
}
|
||||
|
||||
GithubImport.insert_and_return_id(attributes, project.issues)
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project has been deleted since scheduling this
|
||||
# job. In this case we'll just skip creating the issue.
|
||||
end
|
||||
|
||||
# Stores all issue assignees in the database.
|
||||
#
|
||||
# issue_id - The ID of the created issue.
|
||||
def create_assignees(issue_id)
|
||||
assignees = []
|
||||
|
||||
issue.assignees.each do |assignee|
|
||||
if (user_id = user_finder.user_id_for(assignee))
|
||||
assignees << { issue_id: issue_id, user_id: user_id }
|
||||
end
|
||||
end
|
||||
|
||||
Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,35 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class IssuesImporter
|
||||
include ParallelScheduling
|
||||
|
||||
def importer_class
|
||||
IssueAndLabelLinksImporter
|
||||
end
|
||||
|
||||
def representation_class
|
||||
Representation::Issue
|
||||
end
|
||||
|
||||
def sidekiq_worker_class
|
||||
ImportIssueWorker
|
||||
end
|
||||
|
||||
def collection_method
|
||||
:issues
|
||||
end
|
||||
|
||||
def id_for_already_imported_cache(issue)
|
||||
issue.number
|
||||
end
|
||||
|
||||
def collection_options
|
||||
{ state: 'all', sort: 'created', direction: 'asc' }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,52 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class LabelLinksImporter
|
||||
attr_reader :issue, :project, :client, :label_finder
|
||||
|
||||
# issue - An instance of `Gitlab::GithubImport::Representation::Issue`
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(issue, project, client)
|
||||
@issue = issue
|
||||
@project = project
|
||||
@client = client
|
||||
@label_finder = LabelFinder.new(project)
|
||||
end
|
||||
|
||||
def execute
|
||||
create_labels
|
||||
end
|
||||
|
||||
def create_labels
|
||||
time = Time.zone.now
|
||||
rows = []
|
||||
target_id = find_target_id
|
||||
|
||||
issue.label_names.each do |label_name|
|
||||
# Although unlikely it's technically possible for an issue to be
|
||||
# given a label that was created and assigned after we imported all
|
||||
# the project's labels.
|
||||
next unless (label_id = label_finder.id_for(label_name))
|
||||
|
||||
rows << {
|
||||
label_id: label_id,
|
||||
target_id: target_id,
|
||||
target_type: issue.issuable_type,
|
||||
created_at: time,
|
||||
updated_at: time
|
||||
}
|
||||
end
|
||||
|
||||
Gitlab::Database.bulk_insert(LabelLink.table_name, rows)
|
||||
end
|
||||
|
||||
def find_target_id
|
||||
GithubImport::IssuableFinder.new(project, issue).database_id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,55 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class LabelsImporter
|
||||
include BulkImporting
|
||||
|
||||
attr_reader :project, :client, :existing_labels
|
||||
|
||||
# project - An instance of `Project`.
|
||||
# client - An instance of `Gitlab::GithubImport::Client`.
|
||||
def initialize(project, client)
|
||||
@project = project
|
||||
@client = client
|
||||
@existing_labels = project.labels.pluck(:title).to_set
|
||||
end
|
||||
|
||||
def execute
|
||||
bulk_insert(Label, build_labels)
|
||||
build_labels_cache
|
||||
end
|
||||
|
||||
def build_labels
|
||||
build_database_rows(each_label)
|
||||
end
|
||||
|
||||
def already_imported?(label)
|
||||
existing_labels.include?(label.name)
|
||||
end
|
||||
|
||||
def build_labels_cache
|
||||
LabelFinder.new(project).build_cache
|
||||
end
|
||||
|
||||
def build(label)
|
||||
time = Time.zone.now
|
||||
|
||||
{
|
||||
title: label.name,
|
||||
color: '#' + label.color,
|
||||
project_id: project.id,
|
||||
type: 'ProjectLabel',
|
||||
created_at: time,
|
||||
updated_at: time
|
||||
}
|
||||
end
|
||||
|
||||
def each_label
|
||||
client.labels(project.import_source)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,58 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class MilestonesImporter
|
||||
include BulkImporting
|
||||
|
||||
attr_reader :project, :client, :existing_milestones
|
||||
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(project, client)
|
||||
@project = project
|
||||
@client = client
|
||||
@existing_milestones = project.milestones.pluck(:iid).to_set
|
||||
end
|
||||
|
||||
def execute
|
||||
bulk_insert(Milestone, build_milestones)
|
||||
build_milestones_cache
|
||||
end
|
||||
|
||||
def build_milestones
|
||||
build_database_rows(each_milestone)
|
||||
end
|
||||
|
||||
def already_imported?(milestone)
|
||||
existing_milestones.include?(milestone.number)
|
||||
end
|
||||
|
||||
def build_milestones_cache
|
||||
MilestoneFinder.new(project).build_cache
|
||||
end
|
||||
|
||||
def build(milestone)
|
||||
{
|
||||
iid: milestone.number,
|
||||
title: milestone.title,
|
||||
description: milestone.description,
|
||||
project_id: project.id,
|
||||
state: state_for(milestone),
|
||||
created_at: milestone.created_at,
|
||||
updated_at: milestone.updated_at
|
||||
}
|
||||
end
|
||||
|
||||
def state_for(milestone)
|
||||
milestone.state == 'open' ? :active : :closed
|
||||
end
|
||||
|
||||
def each_milestone
|
||||
client.milestones(project.import_source, state: 'all')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,54 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class NoteImporter
|
||||
attr_reader :note, :project, :client, :user_finder
|
||||
|
||||
# note - An instance of `Gitlab::GithubImport::Representation::Note`.
|
||||
# project - An instance of `Project`.
|
||||
# client - An instance of `Gitlab::GithubImport::Client`.
|
||||
def initialize(note, project, client)
|
||||
@note = note
|
||||
@project = project
|
||||
@client = client
|
||||
@user_finder = UserFinder.new(project, client)
|
||||
end
|
||||
|
||||
def execute
|
||||
return unless (noteable_id = find_noteable_id)
|
||||
|
||||
author_id, author_found = user_finder.author_id_for(note)
|
||||
|
||||
note_body =
|
||||
MarkdownText.format(note.note, note.author, author_found)
|
||||
|
||||
attributes = {
|
||||
noteable_type: note.noteable_type,
|
||||
noteable_id: noteable_id,
|
||||
project_id: project.id,
|
||||
author_id: author_id,
|
||||
note: note_body,
|
||||
system: false,
|
||||
created_at: note.created_at,
|
||||
updated_at: note.updated_at
|
||||
}
|
||||
|
||||
# We're using bulk_insert here so we can bypass any validations and
|
||||
# callbacks. Running these would result in a lot of unnecessary SQL
|
||||
# queries being executed when importing large projects.
|
||||
Gitlab::Database.bulk_insert(Note.table_name, [attributes])
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project and the issue have been deleted since
|
||||
# scheduling this job. In this case we'll just skip creating the note.
|
||||
end
|
||||
|
||||
# Returns the ID of the issue or merge request to create the note for.
|
||||
def find_noteable_id
|
||||
GithubImport::IssuableFinder.new(project, note).database_id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class NotesImporter
|
||||
include ParallelScheduling
|
||||
|
||||
def importer_class
|
||||
NoteImporter
|
||||
end
|
||||
|
||||
def representation_class
|
||||
Representation::Note
|
||||
end
|
||||
|
||||
def sidekiq_worker_class
|
||||
ImportNoteWorker
|
||||
end
|
||||
|
||||
def collection_method
|
||||
:issues_comments
|
||||
end
|
||||
|
||||
def id_for_already_imported_cache(note)
|
||||
note.id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,91 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class PullRequestImporter
|
||||
attr_reader :pull_request, :project, :client, :user_finder,
|
||||
:milestone_finder, :issuable_finder
|
||||
|
||||
# pull_request - An instance of
|
||||
# `Gitlab::GithubImport::Representation::PullRequest`.
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(pull_request, project, client)
|
||||
@pull_request = pull_request
|
||||
@project = project
|
||||
@client = client
|
||||
@user_finder = UserFinder.new(project, client)
|
||||
@milestone_finder = MilestoneFinder.new(project)
|
||||
@issuable_finder =
|
||||
GithubImport::IssuableFinder.new(project, pull_request)
|
||||
end
|
||||
|
||||
def execute
|
||||
if (mr_id = create_merge_request)
|
||||
issuable_finder.cache_database_id(mr_id)
|
||||
end
|
||||
end
|
||||
|
||||
# Creates the merge request and returns its ID.
|
||||
#
|
||||
# This method will return `nil` if the merge request could not be
|
||||
# created.
|
||||
def create_merge_request
|
||||
author_id, author_found = user_finder.author_id_for(pull_request)
|
||||
|
||||
description = MarkdownText
|
||||
.format(pull_request.description, pull_request.author, author_found)
|
||||
|
||||
# This work must be wrapped in a transaction as otherwise we can leave
|
||||
# behind incomplete data in the event of an error. This can then lead
|
||||
# to duplicate key errors when jobs are retried.
|
||||
MergeRequest.transaction do
|
||||
attributes = {
|
||||
iid: pull_request.iid,
|
||||
title: pull_request.truncated_title,
|
||||
description: description,
|
||||
source_project_id: project.id,
|
||||
target_project_id: project.id,
|
||||
source_branch: pull_request.formatted_source_branch,
|
||||
target_branch: pull_request.target_branch,
|
||||
state: pull_request.state,
|
||||
milestone_id: milestone_finder.id_for(pull_request),
|
||||
author_id: author_id,
|
||||
assignee_id: user_finder.assignee_id_for(pull_request),
|
||||
created_at: pull_request.created_at,
|
||||
updated_at: pull_request.updated_at
|
||||
}
|
||||
|
||||
# When creating merge requests there are a lot of hooks that may
|
||||
# run, for many different reasons. Many of these hooks (e.g. the
|
||||
# ones used for rendering Markdown) are completely unnecessary and
|
||||
# may even lead to transaction timeouts.
|
||||
#
|
||||
# To ensure importing pull requests has a minimal impact and can
|
||||
# complete in a reasonable time we bypass all the hooks by inserting
|
||||
# the row and then retrieving it. We then only perform the
|
||||
# additional work that is strictly necessary.
|
||||
merge_request_id = GithubImport
|
||||
.insert_and_return_id(attributes, project.merge_requests)
|
||||
|
||||
merge_request = project.merge_requests.find(merge_request_id)
|
||||
|
||||
# These fields are set so we can create the correct merge request
|
||||
# diffs.
|
||||
merge_request.source_branch_sha = pull_request.source_branch_sha
|
||||
merge_request.target_branch_sha = pull_request.target_branch_sha
|
||||
|
||||
merge_request.keep_around_commit
|
||||
merge_request.merge_request_diffs.create
|
||||
|
||||
merge_request.id
|
||||
end
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project has been deleted since scheduling this
|
||||
# job. In this case we'll just skip creating the merge request.
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,83 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class PullRequestsImporter
|
||||
include ParallelScheduling
|
||||
|
||||
def importer_class
|
||||
PullRequestImporter
|
||||
end
|
||||
|
||||
def representation_class
|
||||
Representation::PullRequest
|
||||
end
|
||||
|
||||
def sidekiq_worker_class
|
||||
ImportPullRequestWorker
|
||||
end
|
||||
|
||||
def id_for_already_imported_cache(pr)
|
||||
pr.number
|
||||
end
|
||||
|
||||
def each_object_to_import
|
||||
super do |pr|
|
||||
update_repository if update_repository?(pr)
|
||||
yield pr
|
||||
end
|
||||
end
|
||||
|
||||
def update_repository
|
||||
# We set this column _before_ fetching the repository, and this is
|
||||
# deliberate. If we were to update this column after the fetch we may
|
||||
# miss out on changes pushed during the fetch or between the fetch and
|
||||
# updating the timestamp.
|
||||
project.update_column(:last_repository_updated_at, Time.zone.now)
|
||||
|
||||
project.repository.fetch_remote('github', forced: false)
|
||||
|
||||
pname = project.path_with_namespace
|
||||
|
||||
Rails.logger
|
||||
.info("GitHub importer finished updating repository for #{pname}")
|
||||
|
||||
repository_updates_counter.increment(project: pname)
|
||||
end
|
||||
|
||||
def update_repository?(pr)
|
||||
last_update = project.last_repository_updated_at || project.created_at
|
||||
|
||||
return false if pr.updated_at < last_update
|
||||
|
||||
# PRs may be updated without there actually being new commits, thus we
|
||||
# check to make sure we only re-fetch if truly necessary.
|
||||
!(commit_exists?(pr.head.sha) && commit_exists?(pr.base.sha))
|
||||
end
|
||||
|
||||
def commit_exists?(sha)
|
||||
project.repository.lookup(sha)
|
||||
true
|
||||
rescue Rugged::Error
|
||||
false
|
||||
end
|
||||
|
||||
def collection_method
|
||||
:pull_requests
|
||||
end
|
||||
|
||||
def collection_options
|
||||
{ state: 'all', sort: 'created', direction: 'asc' }
|
||||
end
|
||||
|
||||
def repository_updates_counter
|
||||
@repository_updates_counter ||= Gitlab::Metrics.counter(
|
||||
:github_importer_repository_updates,
|
||||
'The number of times repositories have to be updated again'
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,55 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class ReleasesImporter
|
||||
include BulkImporting
|
||||
|
||||
attr_reader :project, :client, :existing_tags
|
||||
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(project, client)
|
||||
@project = project
|
||||
@client = client
|
||||
@existing_tags = project.releases.pluck(:tag).to_set
|
||||
end
|
||||
|
||||
def execute
|
||||
bulk_insert(Release, build_releases)
|
||||
end
|
||||
|
||||
def build_releases
|
||||
build_database_rows(each_release)
|
||||
end
|
||||
|
||||
def already_imported?(release)
|
||||
existing_tags.include?(release.tag_name)
|
||||
end
|
||||
|
||||
def build(release)
|
||||
{
|
||||
tag: release.tag_name,
|
||||
description: description_for(release),
|
||||
created_at: release.created_at,
|
||||
updated_at: release.updated_at,
|
||||
project_id: project.id
|
||||
}
|
||||
end
|
||||
|
||||
def each_release
|
||||
client.releases(project.import_source)
|
||||
end
|
||||
|
||||
def description_for(release)
|
||||
if release.body.present?
|
||||
release.body
|
||||
else
|
||||
"Release for tag #{release.tag_name}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,96 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Importer
|
||||
class RepositoryImporter
|
||||
include Gitlab::ShellAdapter
|
||||
|
||||
attr_reader :project, :client
|
||||
|
||||
def initialize(project, client)
|
||||
@project = project
|
||||
@client = client
|
||||
end
|
||||
|
||||
# Returns true if we should import the wiki for the project.
|
||||
def import_wiki?
|
||||
client.repository(project.import_source)&.has_wiki &&
|
||||
!project.wiki_repository_exists?
|
||||
end
|
||||
|
||||
# Imports the repository data.
|
||||
#
|
||||
# This method will return true if the data was imported successfully or
|
||||
# the repository had already been imported before.
|
||||
def execute
|
||||
imported =
|
||||
# It's possible a repository has already been imported when running
|
||||
# this code, e.g. because we had to retry this job after
|
||||
# `import_wiki?` raised a rate limit error. In this case we'll skip
|
||||
# re-importing the main repository.
|
||||
if project.repository.empty_repo?
|
||||
import_repository
|
||||
else
|
||||
true
|
||||
end
|
||||
|
||||
update_clone_time if imported
|
||||
|
||||
imported = import_wiki_repository if import_wiki? && imported
|
||||
|
||||
imported
|
||||
end
|
||||
|
||||
def import_repository
|
||||
project.ensure_repository
|
||||
|
||||
configure_repository_remote
|
||||
|
||||
project.repository.fetch_remote('github', forced: true)
|
||||
|
||||
true
|
||||
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
|
||||
fail_import("Failed to import the repository: #{e.message}")
|
||||
end
|
||||
|
||||
def configure_repository_remote
|
||||
return if project.repository.remote_exists?('github')
|
||||
|
||||
project.repository.add_remote('github', project.import_url)
|
||||
project.repository.set_import_remote_as_mirror('github')
|
||||
|
||||
project.repository.add_remote_fetch_config(
|
||||
'github',
|
||||
'+refs/pull/*/head:refs/merge-requests/*/head'
|
||||
)
|
||||
end
|
||||
|
||||
def import_wiki_repository
|
||||
wiki_path = "#{project.disk_path}.wiki"
|
||||
wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
|
||||
storage_path = project.repository_storage_path
|
||||
|
||||
gitlab_shell.import_repository(storage_path, wiki_path, wiki_url)
|
||||
|
||||
true
|
||||
rescue Gitlab::Shell::Error => e
|
||||
if e.message !~ /repository not exported/
|
||||
fail_import("Failed to import the wiki: #{e.message}")
|
||||
else
|
||||
true
|
||||
end
|
||||
end
|
||||
|
||||
def update_clone_time
|
||||
project.update_column(:last_repository_updated_at, Time.zone.now)
|
||||
end
|
||||
|
||||
def fail_import(message)
|
||||
project.mark_import_as_failed(message)
|
||||
false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# IssuableFinder can be used for caching and retrieving database IDs for
|
||||
# issuable objects such as issues and pull requests. By caching these IDs we
|
||||
# remove the need for running a lot of database queries when importing
|
||||
# GitHub projects.
|
||||
class IssuableFinder
|
||||
attr_reader :project, :object
|
||||
|
||||
# The base cache key to use for storing/retrieving issuable IDs.
|
||||
CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}'.freeze
|
||||
|
||||
# project - An instance of `Project`.
|
||||
# object - The object to look up or set a database ID for.
|
||||
def initialize(project, object)
|
||||
@project = project
|
||||
@object = object
|
||||
end
|
||||
|
||||
# Returns the database ID for the object.
|
||||
#
|
||||
# This method will return `nil` if no ID could be found.
|
||||
def database_id
|
||||
val = Caching.read(cache_key)
|
||||
|
||||
val.to_i if val.present?
|
||||
end
|
||||
|
||||
# Associates the given database ID with the current object.
|
||||
#
|
||||
# database_id - The ID of the corresponding database row.
|
||||
def cache_database_id(database_id)
|
||||
Caching.write(cache_key, database_id)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def cache_key
|
||||
CACHE_KEY % {
|
||||
project: project.id,
|
||||
type: cache_key_type,
|
||||
iid: cache_key_iid
|
||||
}
|
||||
end
|
||||
|
||||
# Returns the identifier to use for cache keys.
|
||||
#
|
||||
# For issues and pull requests this will be "Issue" or "MergeRequest"
|
||||
# respectively. For diff notes this will return "MergeRequest", for
|
||||
# regular notes it will either return "Issue" or "MergeRequest" depending
|
||||
# on what type of object the note belongs to.
|
||||
def cache_key_type
|
||||
if object.respond_to?(:issuable_type)
|
||||
object.issuable_type
|
||||
elsif object.respond_to?(:noteable_type)
|
||||
object.noteable_type
|
||||
else
|
||||
raise(
|
||||
TypeError,
|
||||
"Instances of #{object.class} are not supported"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def cache_key_iid
|
||||
if object.respond_to?(:noteable_id)
|
||||
object.noteable_id
|
||||
elsif object.respond_to?(:iid)
|
||||
object.iid
|
||||
else
|
||||
raise(
|
||||
TypeError,
|
||||
"Instances of #{object.class} are not supported"
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,37 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class LabelFinder
|
||||
attr_reader :project
|
||||
|
||||
# The base cache key to use for storing/retrieving label IDs.
|
||||
CACHE_KEY = 'github-import/label-finder/%{project}/%{name}'.freeze
|
||||
|
||||
# project - An instance of `Project`.
|
||||
def initialize(project)
|
||||
@project = project
|
||||
end
|
||||
|
||||
# Returns the label ID for the given name.
|
||||
def id_for(name)
|
||||
Caching.read_integer(cache_key_for(name))
|
||||
end
|
||||
|
||||
def build_cache
|
||||
mapping = @project
|
||||
.labels
|
||||
.pluck(:id, :name)
|
||||
.each_with_object({}) do |(id, name), hash|
|
||||
hash[cache_key_for(name)] = id
|
||||
end
|
||||
|
||||
Caching.write_multiple(mapping)
|
||||
end
|
||||
|
||||
def cache_key_for(name)
|
||||
CACHE_KEY % { project: project.id, name: name }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class MarkdownText
|
||||
attr_reader :text, :author, :exists
|
||||
|
||||
def self.format(*args)
|
||||
new(*args).to_s
|
||||
end
|
||||
|
||||
# text - The Markdown text as a String.
|
||||
# author - An instance of `Gitlab::GithubImport::Representation::User`
|
||||
# exists - Boolean that indicates the user exists in the GitLab database.
|
||||
def initialize(text, author, exists = false)
|
||||
@text = text
|
||||
@author = author
|
||||
@exists = exists
|
||||
end
|
||||
|
||||
def to_s
|
||||
if exists
|
||||
text
|
||||
else
|
||||
"*Created by: #{author.login}*\n\n#{text}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,40 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
class MilestoneFinder
|
||||
attr_reader :project
|
||||
|
||||
# The base cache key to use for storing/retrieving milestone IDs.
|
||||
CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}'.freeze
|
||||
|
||||
# project - An instance of `Project`
|
||||
def initialize(project)
|
||||
@project = project
|
||||
end
|
||||
|
||||
# issuable - An instance of `Gitlab::GithubImport::Representation::Issue`
|
||||
# or `Gitlab::GithubImport::Representation::PullRequest`.
|
||||
def id_for(issuable)
|
||||
return unless issuable.milestone_number
|
||||
|
||||
Caching.read_integer(cache_key_for(issuable.milestone_number))
|
||||
end
|
||||
|
||||
def build_cache
|
||||
mapping = @project
|
||||
.milestones
|
||||
.pluck(:id, :iid)
|
||||
.each_with_object({}) do |(id, iid), hash|
|
||||
hash[cache_key_for(iid)] = id
|
||||
end
|
||||
|
||||
Caching.write_multiple(mapping)
|
||||
end
|
||||
|
||||
def cache_key_for(iid)
|
||||
CACHE_KEY % { project: project.id, iid: iid }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# PageCounter can be used to keep track of the last imported page of a
|
||||
# collection, allowing workers to resume where they left off in the event of
|
||||
# an error.
|
||||
class PageCounter
|
||||
attr_reader :cache_key
|
||||
|
||||
# The base cache key to use for storing the last page number.
|
||||
CACHE_KEY = 'github-importer/page-counter/%{project}/%{collection}'.freeze
|
||||
|
||||
def initialize(project, collection)
|
||||
@cache_key = CACHE_KEY % { project: project.id, collection: collection }
|
||||
end
|
||||
|
||||
# Sets the page number to the given value.
|
||||
#
|
||||
# Returns true if the page number was overwritten, false otherwise.
|
||||
def set(page)
|
||||
Caching.write_if_greater(cache_key, page)
|
||||
end
|
||||
|
||||
# Returns the current value from the cache.
|
||||
def current
|
||||
Caching.read_integer(cache_key) || 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,44 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# The ParallelImporter schedules the importing of a GitHub project using
|
||||
# Sidekiq.
|
||||
class ParallelImporter
|
||||
attr_reader :project
|
||||
|
||||
def self.async?
|
||||
true
|
||||
end
|
||||
|
||||
def initialize(project)
|
||||
@project = project
|
||||
end
|
||||
|
||||
def execute
|
||||
jid = generate_jid
|
||||
|
||||
# The original import JID is the JID of the RepositoryImportWorker job,
|
||||
# which will be removed once that job completes. Reusing that JID could
|
||||
# result in StuckImportJobsWorker marking the job as stuck before we get
|
||||
# to running Stage::ImportRepositoryWorker.
|
||||
#
|
||||
# We work around this by setting the JID to a custom generated one, then
|
||||
# refreshing it in the various stages whenever necessary.
|
||||
Gitlab::SidekiqStatus
|
||||
.set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
|
||||
|
||||
project.update_column(:import_jid, jid)
|
||||
|
||||
Stage::ImportRepositoryWorker
|
||||
.perform_async(project.id)
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def generate_jid
|
||||
"github-importer/#{project.id}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,162 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module ParallelScheduling
|
||||
attr_reader :project, :client, :page_counter, :already_imported_cache_key
|
||||
|
||||
# The base cache key to use for tracking already imported objects.
|
||||
ALREADY_IMPORTED_CACHE_KEY =
|
||||
'github-importer/already-imported/%{project}/%{collection}'.freeze
|
||||
|
||||
# project - An instance of `Project`.
|
||||
# client - An instance of `Gitlab::GithubImport::Client`.
|
||||
# parallel - When set to true the objects will be imported in parallel.
|
||||
def initialize(project, client, parallel: true)
|
||||
@project = project
|
||||
@client = client
|
||||
@parallel = parallel
|
||||
@page_counter = PageCounter.new(project, collection_method)
|
||||
@already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
|
||||
{ project: project.id, collection: collection_method }
|
||||
end
|
||||
|
||||
def parallel?
|
||||
@parallel
|
||||
end
|
||||
|
||||
def execute
|
||||
retval =
|
||||
if parallel?
|
||||
parallel_import
|
||||
else
|
||||
sequential_import
|
||||
end
|
||||
|
||||
# Once we have completed all work we can remove our "already exists"
|
||||
# cache so we don't put too much pressure on Redis.
|
||||
#
|
||||
# We don't immediately remove it since it's technically possible for
|
||||
# other instances of this job to still run, instead we set the
|
||||
# expiration time to a lower value. This prevents the other jobs from
|
||||
# still scheduling duplicates while. Since all work has already been
|
||||
# completed those jobs will just cycle through any remaining pages while
|
||||
# not scheduling anything.
|
||||
Caching.expire(already_imported_cache_key, 15.minutes.to_i)
|
||||
|
||||
retval
|
||||
end
|
||||
|
||||
# Imports all the objects in sequence in the current thread.
|
||||
def sequential_import
|
||||
each_object_to_import do |object|
|
||||
repr = representation_class.from_api_response(object)
|
||||
|
||||
importer_class.new(repr, project, client).execute
|
||||
end
|
||||
end
|
||||
|
||||
# Imports all objects in parallel by scheduling a Sidekiq job for every
|
||||
# individual object.
|
||||
def parallel_import
|
||||
waiter = JobWaiter.new
|
||||
|
||||
each_object_to_import do |object|
|
||||
repr = representation_class.from_api_response(object)
|
||||
|
||||
sidekiq_worker_class
|
||||
.perform_async(project.id, repr.to_hash, waiter.key)
|
||||
|
||||
waiter.jobs_remaining += 1
|
||||
end
|
||||
|
||||
waiter
|
||||
end
|
||||
|
||||
# The method that will be called for traversing through all the objects to
|
||||
# import, yielding them to the supplied block.
|
||||
def each_object_to_import
|
||||
repo = project.import_source
|
||||
|
||||
# We inject the page number here to make sure that all importers always
|
||||
# start where they left off. Simply starting over wouldn't work for
|
||||
# repositories with a lot of data (e.g. tens of thousands of comments).
|
||||
options = collection_options.merge(page: page_counter.current)
|
||||
|
||||
client.each_page(collection_method, repo, options) do |page|
|
||||
# Technically it's possible that the same work is performed multiple
|
||||
# times, as Sidekiq doesn't guarantee there will ever only be one
|
||||
# instance of a job. In such a scenario it's possible for one job to
|
||||
# have a lower page number (e.g. 5) compared to another (e.g. 10). In
|
||||
# this case we skip over all the objects until we have caught up,
|
||||
# reducing the number of duplicate jobs scheduled by the provided
|
||||
# block.
|
||||
next unless page_counter.set(page.number)
|
||||
|
||||
page.objects.each do |object|
|
||||
next if already_imported?(object)
|
||||
|
||||
yield object
|
||||
|
||||
# We mark the object as imported immediately so we don't end up
|
||||
# scheduling it multiple times.
|
||||
mark_as_imported(object)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Returns true if the given object has already been imported, false
|
||||
# otherwise.
|
||||
#
|
||||
# object - The object to check.
|
||||
def already_imported?(object)
|
||||
id = id_for_already_imported_cache(object)
|
||||
|
||||
Caching.set_includes?(already_imported_cache_key, id)
|
||||
end
|
||||
|
||||
# Marks the given object as "already imported".
|
||||
def mark_as_imported(object)
|
||||
id = id_for_already_imported_cache(object)
|
||||
|
||||
Caching.set_add(already_imported_cache_key, id)
|
||||
end
|
||||
|
||||
# Returns the ID to use for the cache used for checking if an object has
|
||||
# already been imported or not.
|
||||
#
|
||||
# object - The object we may want to import.
|
||||
def id_for_already_imported_cache(object)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# The class used for converting API responses to Hashes when performing
|
||||
# the import.
|
||||
def representation_class
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# The class to use for importing objects when importing them sequentially.
|
||||
def importer_class
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# The Sidekiq worker class used for scheduling the importing of objects in
|
||||
# parallel.
|
||||
def sidekiq_worker_class
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# The name of the method to call to retrieve the data to import.
|
||||
def collection_method
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Any options to be passed to the method used for retrieving the data to
|
||||
# import.
|
||||
def collection_options
|
||||
{}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,9 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# Error that will be raised when we're about to reach (or have reached) the
|
||||
# GitHub API's rate limit.
|
||||
RateLimitError = Class.new(StandardError)
|
||||
end
|
||||
end
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
TIMESTAMP_KEYS = %i[created_at updated_at merged_at].freeze
|
||||
|
||||
# Converts a Hash with String based keys to one that can be used by the
|
||||
# various Representation classes.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# Representation.symbolize_hash('number' => 10) # => { number: 10 }
|
||||
def self.symbolize_hash(raw_hash = nil)
|
||||
hash = raw_hash.deep_symbolize_keys
|
||||
|
||||
TIMESTAMP_KEYS.each do |key|
|
||||
hash[key] = Time.parse(hash[key]) if hash[key].is_a?(String)
|
||||
end
|
||||
|
||||
hash
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,87 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
class DiffNote
|
||||
include ToHash
|
||||
include ExposeAttribute
|
||||
|
||||
attr_reader :attributes
|
||||
|
||||
expose_attribute :noteable_type, :noteable_id, :commit_id, :file_path,
|
||||
:diff_hunk, :author, :note, :created_at, :updated_at,
|
||||
:github_id
|
||||
|
||||
NOTEABLE_ID_REGEX = /\/pull\/(?<iid>\d+)/i
|
||||
|
||||
# Builds a diff note from a GitHub API response.
|
||||
#
|
||||
# note - An instance of `Sawyer::Resource` containing the note details.
|
||||
def self.from_api_response(note)
|
||||
matches = note.html_url.match(NOTEABLE_ID_REGEX)
|
||||
|
||||
unless matches
|
||||
raise(
|
||||
ArgumentError,
|
||||
"The note URL #{note.html_url.inspect} is not supported"
|
||||
)
|
||||
end
|
||||
|
||||
user = Representation::User.from_api_response(note.user) if note.user
|
||||
hash = {
|
||||
noteable_type: 'MergeRequest',
|
||||
noteable_id: matches[:iid].to_i,
|
||||
file_path: note.path,
|
||||
commit_id: note.commit_id,
|
||||
diff_hunk: note.diff_hunk,
|
||||
author: user,
|
||||
note: note.body,
|
||||
created_at: note.created_at,
|
||||
updated_at: note.updated_at,
|
||||
github_id: note.id
|
||||
}
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# Builds a new note using a Hash that was built from a JSON payload.
|
||||
def self.from_json_hash(raw_hash)
|
||||
hash = Representation.symbolize_hash(raw_hash)
|
||||
hash[:author] &&= Representation::User.from_json_hash(hash[:author])
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# attributes - A Hash containing the raw note details. The keys of this
|
||||
# Hash must be Symbols.
|
||||
def initialize(attributes)
|
||||
@attributes = attributes
|
||||
end
|
||||
|
||||
def line_code
|
||||
diff_line = Gitlab::Diff::Parser.new.parse(diff_hunk.lines).to_a.last
|
||||
|
||||
Gitlab::Git
|
||||
.diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos)
|
||||
end
|
||||
|
||||
# Returns a Hash that can be used to populate `notes.st_diff`, removing
|
||||
# the need for requesting Git data for every diff note.
|
||||
def diff_hash
|
||||
{
|
||||
diff: diff_hunk,
|
||||
new_path: file_path,
|
||||
old_path: file_path,
|
||||
|
||||
# These fields are not displayed for LegacyDiffNote notes, so it
|
||||
# doesn't really matter what we set them to.
|
||||
a_mode: '100644',
|
||||
b_mode: '100644',
|
||||
new_file: false
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,26 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
module ExposeAttribute
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
# Defines getter methods for the given attribute names.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# expose_attribute :iid, :title
|
||||
def expose_attribute(*names)
|
||||
names.each do |name|
|
||||
name = name.to_sym
|
||||
|
||||
define_method(name) { attributes[name] }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,80 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
class Issue
|
||||
include ToHash
|
||||
include ExposeAttribute
|
||||
|
||||
attr_reader :attributes
|
||||
|
||||
expose_attribute :iid, :title, :description, :milestone_number,
|
||||
:created_at, :updated_at, :state, :assignees,
|
||||
:label_names, :author
|
||||
|
||||
# Builds an issue from a GitHub API response.
|
||||
#
|
||||
# issue - An instance of `Sawyer::Resource` containing the issue
|
||||
# details.
|
||||
def self.from_api_response(issue)
|
||||
user =
|
||||
if issue.user
|
||||
Representation::User.from_api_response(issue.user)
|
||||
end
|
||||
|
||||
hash = {
|
||||
iid: issue.number,
|
||||
title: issue.title,
|
||||
description: issue.body,
|
||||
milestone_number: issue.milestone&.number,
|
||||
state: issue.state == 'open' ? :opened : :closed,
|
||||
assignees: issue.assignees.map do |u|
|
||||
Representation::User.from_api_response(u)
|
||||
end,
|
||||
label_names: issue.labels.map(&:name),
|
||||
author: user,
|
||||
created_at: issue.created_at,
|
||||
updated_at: issue.updated_at,
|
||||
pull_request: issue.pull_request ? true : false
|
||||
}
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# Builds a new issue using a Hash that was built from a JSON payload.
|
||||
def self.from_json_hash(raw_hash)
|
||||
hash = Representation.symbolize_hash(raw_hash)
|
||||
|
||||
hash[:state] = hash[:state].to_sym
|
||||
hash[:assignees].map! { |u| Representation::User.from_json_hash(u) }
|
||||
hash[:author] &&= Representation::User.from_json_hash(hash[:author])
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# attributes - A hash containing the raw issue details. The keys of this
|
||||
# Hash (and any nested hashes) must be symbols.
|
||||
def initialize(attributes)
|
||||
@attributes = attributes
|
||||
end
|
||||
|
||||
def truncated_title
|
||||
title.truncate(255)
|
||||
end
|
||||
|
||||
def labels?
|
||||
label_names && label_names.any?
|
||||
end
|
||||
|
||||
def pull_request?
|
||||
attributes[:pull_request]
|
||||
end
|
||||
|
||||
def issuable_type
|
||||
pull_request? ? 'MergeRequest' : 'Issue'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,70 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
class Note
|
||||
include ToHash
|
||||
include ExposeAttribute
|
||||
|
||||
attr_reader :attributes
|
||||
|
||||
expose_attribute :noteable_id, :noteable_type, :author, :note,
|
||||
:created_at, :updated_at, :github_id
|
||||
|
||||
NOTEABLE_TYPE_REGEX = /\/(?<type>(pull|issues))\/(?<iid>\d+)/i
|
||||
|
||||
# Builds a note from a GitHub API response.
|
||||
#
|
||||
# note - An instance of `Sawyer::Resource` containing the note details.
|
||||
def self.from_api_response(note)
|
||||
matches = note.html_url.match(NOTEABLE_TYPE_REGEX)
|
||||
|
||||
if !matches || !matches[:type]
|
||||
raise(
|
||||
ArgumentError,
|
||||
"The note URL #{note.html_url.inspect} is not supported"
|
||||
)
|
||||
end
|
||||
|
||||
noteable_type =
|
||||
if matches[:type] == 'pull'
|
||||
'MergeRequest'
|
||||
else
|
||||
'Issue'
|
||||
end
|
||||
|
||||
user = Representation::User.from_api_response(note.user) if note.user
|
||||
hash = {
|
||||
noteable_type: noteable_type,
|
||||
noteable_id: matches[:iid].to_i,
|
||||
author: user,
|
||||
note: note.body,
|
||||
created_at: note.created_at,
|
||||
updated_at: note.updated_at,
|
||||
github_id: note.id
|
||||
}
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# Builds a new note using a Hash that was built from a JSON payload.
|
||||
def self.from_json_hash(raw_hash)
|
||||
hash = Representation.symbolize_hash(raw_hash)
|
||||
|
||||
hash[:author] &&= Representation::User.from_json_hash(hash[:author])
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# attributes - A Hash containing the raw note details. The keys of this
|
||||
# Hash must be Symbols.
|
||||
def initialize(attributes)
|
||||
@attributes = attributes
|
||||
end
|
||||
|
||||
alias_method :issuable_type, :noteable_type
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,114 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
class PullRequest
|
||||
include ToHash
|
||||
include ExposeAttribute
|
||||
|
||||
attr_reader :attributes
|
||||
|
||||
expose_attribute :iid, :title, :description, :source_branch,
|
||||
:source_branch_sha, :target_branch, :target_branch_sha,
|
||||
:milestone_number, :author, :assignee, :created_at,
|
||||
:updated_at, :merged_at, :source_repository_id,
|
||||
:target_repository_id, :source_repository_owner
|
||||
|
||||
# Builds a PR from a GitHub API response.
|
||||
#
|
||||
# issue - An instance of `Sawyer::Resource` containing the PR details.
|
||||
def self.from_api_response(pr)
|
||||
assignee =
|
||||
if pr.assignee
|
||||
Representation::User.from_api_response(pr.assignee)
|
||||
end
|
||||
|
||||
user = Representation::User.from_api_response(pr.user) if pr.user
|
||||
hash = {
|
||||
iid: pr.number,
|
||||
title: pr.title,
|
||||
description: pr.body,
|
||||
source_branch: pr.head.ref,
|
||||
target_branch: pr.base.ref,
|
||||
source_branch_sha: pr.head.sha,
|
||||
target_branch_sha: pr.base.sha,
|
||||
source_repository_id: pr.head&.repo&.id,
|
||||
target_repository_id: pr.base&.repo&.id,
|
||||
source_repository_owner: pr.head&.user&.login,
|
||||
state: pr.state == 'open' ? :opened : :closed,
|
||||
milestone_number: pr.milestone&.number,
|
||||
author: user,
|
||||
assignee: assignee,
|
||||
created_at: pr.created_at,
|
||||
updated_at: pr.updated_at,
|
||||
merged_at: pr.merged_at
|
||||
}
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# Builds a new PR using a Hash that was built from a JSON payload.
|
||||
def self.from_json_hash(raw_hash)
|
||||
hash = Representation.symbolize_hash(raw_hash)
|
||||
|
||||
hash[:state] = hash[:state].to_sym
|
||||
hash[:author] &&= Representation::User.from_json_hash(hash[:author])
|
||||
|
||||
# Assignees are optional so we only convert it from a Hash if one was
|
||||
# set.
|
||||
hash[:assignee] &&= Representation::User
|
||||
.from_json_hash(hash[:assignee])
|
||||
|
||||
new(hash)
|
||||
end
|
||||
|
||||
# attributes - A Hash containing the raw PR details. The keys of this
|
||||
# Hash (and any nested hashes) must be symbols.
|
||||
def initialize(attributes)
|
||||
@attributes = attributes
|
||||
end
|
||||
|
||||
def truncated_title
|
||||
title.truncate(255)
|
||||
end
|
||||
|
||||
# Returns a formatted source branch.
|
||||
#
|
||||
# For cross-project pull requests the branch name will be in the format
|
||||
# `owner-name:branch-name`.
|
||||
def formatted_source_branch
|
||||
if cross_project? && source_repository_owner
|
||||
"#{source_repository_owner}:#{source_branch}"
|
||||
elsif source_branch == target_branch
|
||||
# Sometimes the source and target branch are the same, but GitLab
|
||||
# doesn't support this. This can happen when both the user and
|
||||
# source repository have been deleted, and the PR was submitted from
|
||||
# the fork's master branch.
|
||||
"#{source_branch}-#{iid}"
|
||||
else
|
||||
source_branch
|
||||
end
|
||||
end
|
||||
|
||||
def state
|
||||
if merged_at
|
||||
:merged
|
||||
else
|
||||
attributes[:state]
|
||||
end
|
||||
end
|
||||
|
||||
def cross_project?
|
||||
return true unless source_repository_id
|
||||
|
||||
source_repository_id != target_repository_id
|
||||
end
|
||||
|
||||
def issuable_type
|
||||
'MergeRequest'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
module ToHash
|
||||
# Converts the current representation to a Hash. The keys of this Hash
|
||||
# will be Symbols.
|
||||
def to_hash
|
||||
hash = {}
|
||||
|
||||
attributes.each do |key, value|
|
||||
hash[key] = convert_value_for_to_hash(value)
|
||||
end
|
||||
|
||||
hash
|
||||
end
|
||||
|
||||
def convert_value_for_to_hash(value)
|
||||
if value.is_a?(Array)
|
||||
value.map { |v| convert_value_for_to_hash(v) }
|
||||
elsif value.respond_to?(:to_hash)
|
||||
value.to_hash
|
||||
else
|
||||
value
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,34 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
module Representation
|
||||
class User
|
||||
include ToHash
|
||||
include ExposeAttribute
|
||||
|
||||
attr_reader :attributes
|
||||
|
||||
expose_attribute :id, :login
|
||||
|
||||
# Builds a user from a GitHub API response.
|
||||
#
|
||||
# user - An instance of `Sawyer::Resource` containing the user details.
|
||||
def self.from_api_response(user)
|
||||
new(id: user.id, login: user.login)
|
||||
end
|
||||
|
||||
# Builds a user using a Hash that was built from a JSON payload.
|
||||
def self.from_json_hash(raw_hash)
|
||||
new(Representation.symbolize_hash(raw_hash))
|
||||
end
|
||||
|
||||
# attributes - A Hash containing the user details. The keys of this
|
||||
# Hash (and any nested hashes) must be symbols.
|
||||
def initialize(attributes)
|
||||
@attributes = attributes
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,50 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# The SequentialImporter imports a GitHub project in a single thread,
|
||||
# without using Sidekiq. This makes it useful for testing purposes as well
|
||||
# as Rake tasks, but it should be avoided for anything else in favour of the
|
||||
# parallel importer.
|
||||
class SequentialImporter
|
||||
attr_reader :project, :client
|
||||
|
||||
SEQUENTIAL_IMPORTERS = [
|
||||
Importer::LabelsImporter,
|
||||
Importer::MilestonesImporter,
|
||||
Importer::ReleasesImporter
|
||||
].freeze
|
||||
|
||||
PARALLEL_IMPORTERS = [
|
||||
Importer::PullRequestsImporter,
|
||||
Importer::IssuesImporter,
|
||||
Importer::DiffNotesImporter,
|
||||
Importer::NotesImporter
|
||||
].freeze
|
||||
|
||||
# project - The project to import the data into.
|
||||
# token - The token to use for the GitHub API.
|
||||
def initialize(project, token: nil)
|
||||
@project = project
|
||||
@client = GithubImport
|
||||
.new_client_for(project, token: token, parallel: false)
|
||||
end
|
||||
|
||||
def execute
|
||||
Importer::RepositoryImporter.new(project, client).execute
|
||||
|
||||
SEQUENTIAL_IMPORTERS.each do |klass|
|
||||
klass.new(project, client).execute
|
||||
end
|
||||
|
||||
PARALLEL_IMPORTERS.each do |klass|
|
||||
klass.new(project, client, parallel: false).execute
|
||||
end
|
||||
|
||||
project.repository.after_import
|
||||
|
||||
true
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,164 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module GithubImport
|
||||
# Class that can be used for finding a GitLab user ID based on a GitHub user
|
||||
# ID or username.
|
||||
#
|
||||
# Any found user IDs are cached in Redis to reduce the number of SQL queries
|
||||
# executed over time. Valid keys are refreshed upon access so frequently
|
||||
# used keys stick around.
|
||||
#
|
||||
# Lookups are cached even if no ID was found to remove the need for querying
|
||||
# the database when most queries are not going to return results anyway.
|
||||
class UserFinder
|
||||
attr_reader :project, :client
|
||||
|
||||
# The base cache key to use for caching user IDs for a given GitHub user
|
||||
# ID.
|
||||
ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'.freeze
|
||||
|
||||
# The base cache key to use for caching user IDs for a given GitHub email
|
||||
# address.
|
||||
ID_FOR_EMAIL_CACHE_KEY =
|
||||
'github-import/user-finder/id-for-email/%s'.freeze
|
||||
|
||||
# The base cache key to use for caching the Email addresses of GitHub
|
||||
# usernames.
|
||||
EMAIL_FOR_USERNAME_CACHE_KEY =
|
||||
'github-import/user-finder/email-for-username/%s'.freeze
|
||||
|
||||
# project - An instance of `Project`
|
||||
# client - An instance of `Gitlab::GithubImport::Client`
|
||||
def initialize(project, client)
|
||||
@project = project
|
||||
@client = client
|
||||
end
|
||||
|
||||
# Returns the GitLab user ID of an object's author.
|
||||
#
|
||||
# If the object has no author ID we'll use the ID of the GitLab ghost
|
||||
# user.
|
||||
def author_id_for(object)
|
||||
id =
|
||||
if object&.author
|
||||
user_id_for(object.author)
|
||||
else
|
||||
GithubImport.ghost_user_id
|
||||
end
|
||||
|
||||
if id
|
||||
[id, true]
|
||||
else
|
||||
[project.creator_id, false]
|
||||
end
|
||||
end
|
||||
|
||||
# Returns the GitLab user ID of an issuable's assignee.
|
||||
def assignee_id_for(issuable)
|
||||
user_id_for(issuable.assignee) if issuable.assignee
|
||||
end
|
||||
|
||||
# Returns the GitLab user ID for a GitHub user.
|
||||
#
|
||||
# user - An instance of `Gitlab::GithubImport::Representation::User`.
|
||||
def user_id_for(user)
|
||||
find(user.id, user.login)
|
||||
end
|
||||
|
||||
# Returns the GitLab ID for the given GitHub ID or username.
|
||||
#
|
||||
# id - The ID of the GitHub user.
|
||||
# username - The username of the GitHub user.
|
||||
def find(id, username)
|
||||
email = email_for_github_username(username)
|
||||
cached, found_id = find_from_cache(id, email)
|
||||
|
||||
return found_id if found_id
|
||||
|
||||
# We only want to query the database if necessary. If previous lookups
|
||||
# didn't yield a user ID we won't query the database again until the
|
||||
# keys expire.
|
||||
find_id_from_database(id, email) unless cached
|
||||
end
|
||||
|
||||
# Finds a user ID from the cache for a given GitHub ID or Email.
|
||||
def find_from_cache(id, email = nil)
|
||||
id_exists, id_for_github_id = cached_id_for_github_id(id)
|
||||
|
||||
return [id_exists, id_for_github_id] if id_for_github_id
|
||||
|
||||
# Just in case no Email address could be retrieved (for whatever reason)
|
||||
return [false] unless email
|
||||
|
||||
cached_id_for_github_email(email)
|
||||
end
|
||||
|
||||
# Finds a GitLab user ID from the database for a given GitHub user ID or
|
||||
# Email.
|
||||
def find_id_from_database(id, email)
|
||||
id_for_github_id(id) || id_for_github_email(email)
|
||||
end
|
||||
|
||||
def email_for_github_username(username)
|
||||
cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
|
||||
email = Caching.read(cache_key)
|
||||
|
||||
unless email
|
||||
user = client.user(username)
|
||||
email = Caching.write(cache_key, user.email) if user
|
||||
end
|
||||
|
||||
email
|
||||
end
|
||||
|
||||
def cached_id_for_github_id(id)
|
||||
read_id_from_cache(ID_CACHE_KEY % id)
|
||||
end
|
||||
|
||||
def cached_id_for_github_email(email)
|
||||
read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
|
||||
end
|
||||
|
||||
# Queries and caches the GitLab user ID for a GitHub user ID, if one was
|
||||
# found.
|
||||
def id_for_github_id(id)
|
||||
gitlab_id = query_id_for_github_id(id) || nil
|
||||
|
||||
Caching.write(ID_CACHE_KEY % id, gitlab_id)
|
||||
end
|
||||
|
||||
# Queries and caches the GitLab user ID for a GitHub email, if one was
|
||||
# found.
|
||||
def id_for_github_email(email)
|
||||
gitlab_id = query_id_for_github_email(email) || nil
|
||||
|
||||
Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
|
||||
end
|
||||
|
||||
def query_id_for_github_id(id)
|
||||
User.for_github_id(id).pluck(:id).first
|
||||
end
|
||||
|
||||
def query_id_for_github_email(email)
|
||||
User.by_any_email(email).pluck(:id).first
|
||||
end
|
||||
|
||||
# Reads an ID from the cache.
|
||||
#
|
||||
# The return value is an Array with two values:
|
||||
#
|
||||
# 1. A boolean indicating if the key was present or not.
|
||||
# 2. The ID as an Integer, or nil in case no ID could be found.
|
||||
def read_id_from_cache(key)
|
||||
value = Caching.read(key)
|
||||
exists = !value.nil?
|
||||
number = value.to_i
|
||||
|
||||
# The cache key may be empty to indicate a previously looked up user for
|
||||
# which we couldn't find an ID.
|
||||
[exists, number.positive? ? number : nil]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -8,14 +8,14 @@ module Gitlab
|
|||
ImportSource = Struct.new(:name, :title, :importer)
|
||||
|
||||
ImportTable = [
|
||||
ImportSource.new('github', 'GitHub', Github::Import),
|
||||
ImportSource.new('github', 'GitHub', Gitlab::GithubImport::ParallelImporter),
|
||||
ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer),
|
||||
ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer),
|
||||
ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer),
|
||||
ImportSource.new('fogbugz', 'FogBugz', Gitlab::FogbugzImport::Importer),
|
||||
ImportSource.new('git', 'Repo by URL', nil),
|
||||
ImportSource.new('gitlab_project', 'GitLab export', Gitlab::ImportExport::Importer),
|
||||
ImportSource.new('gitea', 'Gitea', Gitlab::GithubImport::Importer)
|
||||
ImportSource.new('gitea', 'Gitea', Gitlab::LegacyGithubImport::Importer)
|
||||
].freeze
|
||||
|
||||
class << self
|
||||
|
|
|
@ -19,11 +19,13 @@ module Gitlab
|
|||
Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) }
|
||||
end
|
||||
|
||||
attr_reader :key, :jobs_remaining, :finished
|
||||
attr_reader :key, :finished
|
||||
attr_accessor :jobs_remaining
|
||||
|
||||
# jobs_remaining - the number of jobs left to wait for
|
||||
def initialize(jobs_remaining)
|
||||
@key = "gitlab:job_waiter:#{SecureRandom.uuid}"
|
||||
# key - The key of this waiter.
|
||||
def initialize(jobs_remaining = 0, key = "gitlab:job_waiter:#{SecureRandom.uuid}")
|
||||
@key = key
|
||||
@jobs_remaining = jobs_remaining
|
||||
@finished = []
|
||||
end
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class BaseFormatter
|
||||
attr_reader :client, :formatter, :project, :raw_data
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class BranchFormatter < BaseFormatter
|
||||
delegate :repo, :sha, :ref, to: :raw_data
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
module Gitlab
|
||||
module LegacyGithubImport
|
||||
class Client
|
||||
GITHUB_SAFE_REMAINING_REQUESTS = 100
|
||||
GITHUB_SAFE_SLEEP_TIME = 500
|
||||
|
||||
attr_reader :access_token, :host, :api_version
|
||||
|
||||
def initialize(access_token, host: nil, api_version: 'v3')
|
||||
@access_token = access_token
|
||||
@host = host.to_s.sub(%r{/+\z}, '')
|
||||
@api_version = api_version
|
||||
@users = {}
|
||||
|
||||
if access_token
|
||||
::Octokit.auto_paginate = false
|
||||
end
|
||||
end
|
||||
|
||||
def api
|
||||
@api ||= ::Octokit::Client.new(
|
||||
access_token: access_token,
|
||||
api_endpoint: api_endpoint,
|
||||
# If there is no config, we're connecting to github.com and we
|
||||
# should verify ssl.
|
||||
connection_options: {
|
||||
ssl: { verify: config ? config['verify_ssl'] : true }
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
def client
|
||||
unless config
|
||||
raise Projects::ImportService::Error,
|
||||
'OAuth configuration for GitHub missing.'
|
||||
end
|
||||
|
||||
@client ||= ::OAuth2::Client.new(
|
||||
config.app_id,
|
||||
config.app_secret,
|
||||
github_options.merge(ssl: { verify: config['verify_ssl'] })
|
||||
)
|
||||
end
|
||||
|
||||
def authorize_url(redirect_uri)
|
||||
client.auth_code.authorize_url({
|
||||
redirect_uri: redirect_uri,
|
||||
scope: "repo, user, user:email"
|
||||
})
|
||||
end
|
||||
|
||||
def get_token(code)
|
||||
client.auth_code.get_token(code).token
|
||||
end
|
||||
|
||||
def method_missing(method, *args, &block)
|
||||
if api.respond_to?(method)
|
||||
request(method, *args, &block)
|
||||
else
|
||||
super(method, *args, &block)
|
||||
end
|
||||
end
|
||||
|
||||
def respond_to?(method)
|
||||
api.respond_to?(method) || super
|
||||
end
|
||||
|
||||
def user(login)
|
||||
return nil unless login.present?
|
||||
return @users[login] if @users.key?(login)
|
||||
|
||||
@users[login] = api.user(login)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def api_endpoint
|
||||
if host.present? && api_version.present?
|
||||
"#{host}/api/#{api_version}"
|
||||
else
|
||||
github_options[:site]
|
||||
end
|
||||
end
|
||||
|
||||
def config
|
||||
Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" }
|
||||
end
|
||||
|
||||
def github_options
|
||||
if config
|
||||
config["args"]["client_options"].deep_symbolize_keys
|
||||
else
|
||||
OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys
|
||||
end
|
||||
end
|
||||
|
||||
def rate_limit
|
||||
api.rate_limit!
|
||||
# GitHub Rate Limit API returns 404 when the rate limit is
|
||||
# disabled. In this case we just want to return gracefully
|
||||
# instead of spitting out an error.
|
||||
rescue Octokit::NotFound
|
||||
nil
|
||||
end
|
||||
|
||||
def has_rate_limit?
|
||||
return @has_rate_limit if defined?(@has_rate_limit)
|
||||
|
||||
@has_rate_limit = rate_limit.present?
|
||||
end
|
||||
|
||||
def rate_limit_exceed?
|
||||
has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS
|
||||
end
|
||||
|
||||
def rate_limit_sleep_time
|
||||
rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
|
||||
end
|
||||
|
||||
def request(method, *args, &block)
|
||||
sleep rate_limit_sleep_time if rate_limit_exceed?
|
||||
|
||||
data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend
|
||||
return data unless data.is_a?(Array)
|
||||
|
||||
last_response = api.last_response
|
||||
|
||||
if block_given?
|
||||
yield data
|
||||
# api.last_response could change while we're yielding (e.g. fetching labels for each PR)
|
||||
# so we cache our own last response
|
||||
each_response_page(last_response, &block)
|
||||
else
|
||||
each_response_page(last_response) { |page| data.concat(page) }
|
||||
data
|
||||
end
|
||||
end
|
||||
|
||||
def each_response_page(last_response)
|
||||
while last_response.rels[:next]
|
||||
sleep rate_limit_sleep_time if rate_limit_exceed?
|
||||
last_response = last_response.rels[:next].get
|
||||
yield last_response.data if last_response.data.is_a?(Array)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class CommentFormatter < BaseFormatter
|
||||
attr_writer :author_id
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class Importer
|
||||
include Gitlab::ShellAdapter
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class IssuableFormatter < BaseFormatter
|
||||
attr_writer :assignee_id, :author_id
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
module Gitlab
|
||||
module GithubImport
|
||||
module LegacyGithubImport
|
||||
class IssueFormatter < IssuableFormatter
|
||||
def attributes
|
||||
{
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue