gitlab-org--gitlab-foss/lib/gitlab/github_import/importer.rb

module Gitlab
  module GithubImport
    class Importer
      include Gitlab::ShellAdapter

      attr_reader :client, :errors, :project, :repo, :repo_url

      def initialize(project)
        @project  = project
        @repo     = project.import_source
        @repo_url = project.import_url
        @errors   = []
        @labels   = {}

        if credentials
          @client = Client.new(credentials[:user])
        else
          raise Projects::ImportService::Error, "Unable to find project import data credentials for project ID: #{@project.id}"
        end
      end

      def execute
        import_labels
        import_milestones
        import_issues
        import_pull_requests
        import_comments(:issues)
        import_comments(:pull_requests)
        import_wiki
        import_releases
        handle_errors

        true
      end

      private

      def credentials
        @credentials ||= project.import_data.credentials if project.import_data
      end

      def handle_errors
        return unless errors.any?

        project.update_column(:import_error, {
          message: 'The remote data could not be fully imported.',
          errors: errors
        }.to_json)
      end

      def import_labels
        fetch_resources(:labels, repo, per_page: 100) do |labels|
          labels.each do |raw|
            begin
              label = LabelFormatter.new(project, raw).create!
              @labels[label.title] = label.id
            rescue => e
              errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
            end
          end
        end
      end

      def import_milestones
        fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
          milestones.each do |raw|
            begin
              MilestoneFormatter.new(project, raw).create!
            rescue => e
              errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
            end
          end
        end
      end

      def import_issues
        fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
          issues.each do |raw|
            gh_issue = IssueFormatter.new(project, raw)

            if gh_issue.valid?
              begin
                issue = gh_issue.create!
                apply_labels(issue, raw)
              rescue => e
                errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
              end
            end
          end
        end
      end

      def import_pull_requests
        fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
          pull_requests.each do |raw|
            pull_request = PullRequestFormatter.new(project, raw)
            next unless pull_request.valid?

            begin
              restore_source_branch(pull_request) unless pull_request.source_branch_exists?
              restore_target_branch(pull_request) unless pull_request.target_branch_exists?

              merge_request = pull_request.create!
              apply_labels(merge_request, raw)
            rescue => e
              errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
            ensure
              clean_up_restored_branches(pull_request)
            end
          end
        end

        project.repository.after_remove_branch
      end

      def restore_source_branch(pull_request)
        project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name)
      end

      def restore_target_branch(pull_request)
        project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
      end

      def remove_branch(name)
        project.repository.delete_branch(name)
      rescue Rugged::ReferenceError
        errors << { type: :remove_branch, name: name }
      end

      def clean_up_restored_branches(pull_request)
        remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
        remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
      end

      def apply_labels(issuable, raw_issuable)
        # GH returns labels for issues but not for pull requests!
        labels = if issuable.is_a?(MergeRequest)
                   client.labels_for_issue(repo, raw_issuable.number)
                 else
                   raw_issuable.labels
                 end

        if labels.count > 0
          label_ids = labels
            .map { |attrs| @labels[attrs.name] }
            .compact

          issuable.update_attribute(:label_ids, label_ids)
        end
      end

      def import_comments(issuable_type)
        resource_type = "#{issuable_type}_comments".to_sym

        # Two notes here:
        # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
        # compare it against every comment in the current imported page until we find match, and that's where start importing
        # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
        # only comments on diffs, so select last note not based on noteable_type but on line_code
        line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
        last_note    = project.notes.where("line_code IS #{line_code_is}").last

        fetch_resources(resource_type, repo, per_page: 100) do |comments|
          if last_note
            discard_inserted_comments(comments, last_note)
            last_note = nil
          end

          create_comments(comments)
        end
      end

      def create_comments(comments)
        ActiveRecord::Base.no_touching do
          comments.each do |raw|
            begin
              comment         = CommentFormatter.new(project, raw)
              # GH does not return info about comment's parent, so we guess it by checking its URL!
              *_, parent, iid = URI(raw.html_url).path.split('/')
              issuable_class = parent == 'issues' ? Issue : MergeRequest
              issuable       = issuable_class.find_by_iid(iid)
              next unless issuable

              issuable.notes.create!(comment.attributes)
            rescue => e
              errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
            end
          end
        end
      end

      def discard_inserted_comments(comments, last_note)
        last_note_attrs = nil

        cut_off_index = comments.find_index do |raw|
          comment           = CommentFormatter.new(project, raw)
          comment_attrs     = comment.attributes
          last_note_attrs ||= last_note.slice(*comment_attrs.keys)

          comment_attrs.with_indifferent_access == last_note_attrs
        end

        # No matching resource in the collection, which means we got halted right on the end of the last page, so all good
        return unless cut_off_index

        # Otherwise, remove the resources we've already inserted
        comments.shift(cut_off_index + 1)
      end

      def import_wiki
        unless project.wiki.repository_exists?
          wiki = WikiFormatter.new(project)
          gitlab_shell.import_repository(project.repository_storage_path, wiki.path_with_namespace, wiki.import_url)
        end
      rescue Gitlab::Shell::Error => e
        # GitHub error message when the wiki repo has not been created,
        # this means that repo has wiki enabled, but have no pages. So,
        # we can skip the import.
        if e.message !~ /repository not exported/
          errors << { type: :wiki, errors: e.message }
        end
      end

      def import_releases
        fetch_resources(:releases, repo, per_page: 100) do |releases|
          releases.each do |raw|
            begin
              gh_release = ReleaseFormatter.new(project, raw)
              gh_release.create! if gh_release.valid?
            rescue => e
              errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
            end
          end
        end
      end

      def fetch_resources(resource_type, *opts)
        return if imported?(resource_type)

        opts.last.merge!(page: current_page(resource_type))

        client.public_send(resource_type, *opts) do |resources|
          yield resources
          increment_page(resource_type)
        end

        imported!(resource_type)
      end

      def imported?(resource_type)
        Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
      end

      def imported!(resource_type)
        Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
      end

      def increment_page(resource_type)
        key = "#{cache_key_prefix}:#{resource_type}:current-page"

        # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
        # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
        page = Rails.cache.read(key)
        page += 1
        Rails.cache.write(key, page)

        page
      end

      def current_page(resource_type)
        Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
      end

      def cache_key_prefix
        @cache_key_prefix ||= "github-import:#{project.id}"
      end
    end
  end
end
Github Importer 2014-12-31 13:07:48 +00:00			`module Gitlab`
GitLab.com integration: refactoring 2015-02-02 22:26:29 +00:00			`module GithubImport`
Github Importer 2014-12-31 13:07:48 +00:00			`class Importer`
Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00			`include Gitlab::ShellAdapter`

Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`attr_reader :client, :errors, :project, :repo, :repo_url`
Github Importer 2014-12-31 13:07:48 +00:00
			`def initialize(project)`
Extract GitHub branch formatter 2016-05-09 22:45:37 +00:00			`@project = project`
			`@repo = project.import_source`
Import pull requests from GitHub where the source branch was removed 2016-04-21 20:16:22 +00:00			`@repo_url = project.import_url`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`@errors = []`
Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`@labels = {}`
Import pull requests from GitHub where the source branch was removed 2016-04-21 20:16:22 +00:00
			`if credentials`
			`@client = Client.new(credentials[:user])`
more refactoring 2016-03-21 14:11:05 +00:00			`else`
			`raise Projects::ImportService::Error, "Unable to find project import data credentials for project ID: #{@project.id}"`
			`end`
Github Importer 2014-12-31 13:07:48 +00:00			`end`

			`def execute`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`import_labels`
			`import_milestones`
			`import_issues`
			`import_pull_requests`
			`import_comments(:issues)`
			`import_comments(:pull_requests)`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`import_wiki`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`import_releases`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`handle_errors`

			`true`
Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`end`

			`private`

Import pull requests from GitHub where the source branch was removed 2016-04-21 20:16:22 +00:00			`def credentials`
			`@credentials \|\|= project.import_data.credentials if project.import_data`
more refactoring 2016-03-21 14:11:05 +00:00			`end`

Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`def handle_errors`
Add readable error message when remote data could not be fully imported 2016-08-24 15:14:06 +00:00			`return unless errors.any?`

			`project.update_column(:import_error, {`
			`message: 'The remote data could not be fully imported.',`
			`errors: errors`
			`}.to_json)`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`end`

Import labels from GitHub 2016-04-18 02:52:34 +00:00			`def import_labels`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(:labels, repo, per_page: 100) do \|labels\|`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`labels.each do \|raw\|`
			`begin`
Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`label = LabelFormatter.new(project, raw).create!`
			`@labels[label.title] = label.id`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`rescue => e`
			`errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }`
			`end`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`end`
			`end`
Import labels from GitHub 2016-04-18 02:52:34 +00:00			`end`

Import milestones from GitHub 2016-04-18 16:44:27 +00:00			`def import_milestones`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(:milestones, repo, state: :all, per_page: 100) do \|milestones\|`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`milestones.each do \|raw\|`
			`begin`
			`MilestoneFormatter.new(project, raw).create!`
			`rescue => e`
			`errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }`
			`end`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`end`
			`end`
Import milestones from GitHub 2016-04-18 16:44:27 +00:00			`end`

Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`def import_issues`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do \|issues\|`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`issues.each do \|raw\|`
			`gh_issue = IssueFormatter.new(project, raw)`

			`if gh_issue.valid?`
			`begin`
			`issue = gh_issue.create!`
Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`apply_labels(issue, raw)`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`rescue => e`
			`errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }`
			`end`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`end`
Github Importer 2014-12-31 13:07:48 +00:00			`end`
			`end`
			`end`

Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`def import_pull_requests`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do \|pull_requests\|`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`pull_requests.each do \|raw\|`
			`pull_request = PullRequestFormatter.new(project, raw)`
			`next unless pull_request.valid?`

			`begin`
			`restore_source_branch(pull_request) unless pull_request.source_branch_exists?`
			`restore_target_branch(pull_request) unless pull_request.target_branch_exists?`

			`merge_request = pull_request.create!`
Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`apply_labels(merge_request, raw)`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`rescue => e`
			`errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }`
			`ensure`
			`clean_up_restored_branches(pull_request)`
			`end`
Remove SHA suffix for removed branches name when importing PR from GH 2016-08-04 20:15:00 +00:00			`end`
Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`end`
Call after_remove_branch only once after importing all GitHub PRs 2016-09-27 17:36:58 +00:00
			`project.repository.after_remove_branch`
Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`end`

Remove SHA suffix for removed branches name when importing PR from GH 2016-08-04 20:15:00 +00:00			`def restore_source_branch(pull_request)`
			`project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name)`
Check out locally PRs where the source/target branch were removed 2016-08-02 16:41:22 +00:00			`end`
Remove branches that does exist on GitHub after PRs were imported 2016-05-10 22:23:03 +00:00
Remove SHA suffix for removed branches name when importing PR from GH 2016-08-04 20:15:00 +00:00			`def restore_target_branch(pull_request)`
			`project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)`
Import pull requests from GitHub where the target branch was removed 2016-04-22 18:25:04 +00:00			`end`

Remove SHA suffix for removed branches name when importing PR from GH 2016-08-04 20:15:00 +00:00			`def remove_branch(name)`
			`project.repository.delete_branch(name)`
			`rescue Rugged::ReferenceError`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`errors << { type: :remove_branch, name: name }`
Remove SHA suffix for removed branches name when importing PR from GH 2016-08-04 20:15:00 +00:00			`end`

			`def clean_up_restored_branches(pull_request)`
			`remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?`
			`remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?`
Import pull requests from GitHub where the target branch was removed 2016-04-22 18:25:04 +00:00			`end`

Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`def apply_labels(issuable, raw_issuable)`
Fix applying labels for GitHub-imported MRs 2016-10-27 13:07:18 +00:00			`# GH returns labels for issues but not for pull requests!`
			`labels = if issuable.is_a?(MergeRequest)`
			`client.labels_for_issue(repo, raw_issuable.number)`
			`else`
			`raw_issuable.labels`
			`end`

			`if labels.count > 0`
			`label_ids = labels`
Speed up label-applying process for GitHub importing * No need to re-fetch issues from GH to read their labels, the labels are already there from the index request. * No need to look up labels on the database for every application, so we cache them. 2016-09-27 17:35:39 +00:00			`.map { \|attrs\| @labels[attrs.name] }`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`.compact`
Apply GitHub labels to Issue/Merge Request that were imported 2016-04-18 04:02:22 +00:00
			`issuable.update_attribute(:label_ids, label_ids)`
			`end`
			`end`

Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`def import_comments(issuable_type)`
			`resource_type = "#{issuable_type}_comments".to_sym`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`# Two notes here:`
			`# 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,`
			`# compare it against every comment in the current imported page until we find match, and that's where start importing`
			`# 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns`
			`# only comments on diffs, so select last note not based on noteable_type but on line_code`
			`line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'`
			`last_note = project.notes.where("line_code IS #{line_code_is}").last`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(resource_type, repo, per_page: 100) do \|comments\|`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00			`if last_note`
			`discard_inserted_comments(comments, last_note)`
			`last_note = nil`
			`end`
Extract methods to import comments on a GitHub Pull Request 2015-12-21 23:17:57 +00:00
Fix importing MR comments from GitHub 2016-10-27 13:08:37 +00:00			`create_comments(comments)`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`end`
Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`end`
Github Importer 2014-12-31 13:07:48 +00:00
Fix importing MR comments from GitHub 2016-10-27 13:08:37 +00:00			`def create_comments(comments)`
Don't touch Issue/Merge Request when importing GitHub comments 2016-08-30 19:25:28 +00:00			`ActiveRecord::Base.no_touching do`
			`comments.each do \|raw\|`
			`begin`
Fix importing MR comments from GitHub 2016-10-27 13:08:37 +00:00			`comment = CommentFormatter.new(project, raw)`
			`# GH does not return info about comment's parent, so we guess it by checking its URL!`
			`*_, parent, iid = URI(raw.html_url).path.split('/')`
			`issuable_class = parent == 'issues' ? Issue : MergeRequest`
Import all GitHub comments after importing issues and PRs 2016-09-27 17:41:22 +00:00			`issuable = issuable_class.find_by_iid(iid)`
			`next unless issuable`

Don't touch Issue/Merge Request when importing GitHub comments 2016-08-30 19:25:28 +00:00			`issuable.notes.create!(comment.attributes)`
			`rescue => e`
			`errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }`
			`end`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`end`
Import GitHub Pull Requests into GitLab 2015-12-21 18:52:45 +00:00			`end`
Github Importer 2014-12-31 13:07:48 +00:00			`end`
Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00			`def discard_inserted_comments(comments, last_note)`
			`last_note_attrs = nil`

			`cut_off_index = comments.find_index do \|raw\|`
			`comment = CommentFormatter.new(project, raw)`
			`comment_attrs = comment.attributes`
			`last_note_attrs \|\|= last_note.slice(*comment_attrs.keys)`

			`comment_attrs.with_indifferent_access == last_note_attrs`
			`end`

			`# No matching resource in the collection, which means we got halted right on the end of the last page, so all good`
			`return unless cut_off_index`

Fix typos 2016-10-27 13:00:31 +00:00			`# Otherwise, remove the resources we've already inserted`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00			`comments.shift(cut_off_index + 1)`
			`end`

Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00			`def import_wiki`
Skip wiki creation when GitHub project has wiki enabled If the GitHub project repository has wiki, we should not create the default wiki. Otherwise the GitHub importer will fail because the wiki repository already exist. This bug was introduced here https://gitlab.com/gitlab-org/gitlab-ce/commit/892dea67717c0efbd6a28f763 9f34535ec0a8747 2016-10-04 05:40:03 +00:00			`unless project.wiki.repository_exists?`
Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00			`wiki = WikiFormatter.new(project)`
Refactor repository paths handling to allow multiple git mount points 2016-06-22 21:04:51 +00:00			`gitlab_shell.import_repository(project.repository_storage_path, wiki.path_with_namespace, wiki.import_url)`
Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00			`end`
Fix import of GitHub's wiki when the repository has not been created 2016-01-21 15:11:38 +00:00			`rescue Gitlab::Shell::Error => e`
Wrap errors on GitHub importer to raise Projects::ImportService::Error 2016-01-22 15:00:00 +00:00			`# GitHub error message when the wiki repo has not been created,`
			`# this means that repo has wiki enabled, but have no pages. So,`
			`# we can skip the import.`
			`if e.message !~ /repository not exported/`
Does not halt the GitHub import process when an error occurs 2016-08-10 21:22:21 +00:00			`errors << { type: :wiki, errors: e.message }`
Fix import of GitHub's wiki when the repository has not been created 2016-01-21 15:11:38 +00:00			`end`
Import GitHub wiki into GitLab 2016-01-06 23:15:37 +00:00			`end`
Import GitHub release notes # Conflicts: # lib/gitlab/github_import/importer.rb 2016-08-24 21:45:31 +00:00
			`def import_releases`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`fetch_resources(:releases, repo, per_page: 100) do \|releases\|`
Process each page of GitHub resources instead of concating them then processing This should avoid having large memory growth when importing GitHub repos with lots of resources. 2016-09-27 17:32:47 +00:00			`releases.each do \|raw\|`
			`begin`
			`gh_release = ReleaseFormatter.new(project, raw)`
			`gh_release.create! if gh_release.valid?`
			`rescue => e`
			`errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }`
			`end`
Import GitHub release notes # Conflicts: # lib/gitlab/github_import/importer.rb 2016-08-24 21:45:31 +00:00			`end`
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`end`
			`end`

			`def fetch_resources(resource_type, *opts)`
			`return if imported?(resource_type)`

			`opts.last.merge!(page: current_page(resource_type))`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`client.public_send(resource_type, *opts) do \|resources\|`
			`yield resources`
			`increment_page(resource_type)`
Import GitHub release notes # Conflicts: # lib/gitlab/github_import/importer.rb 2016-08-24 21:45:31 +00:00			`end`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00
Abstract the use of imported[!?] and {current,increment}_page in GitHub importer 2016-10-27 13:00:21 +00:00			`imported!(resource_type)`
Modify GitHub importer to be retryable 2016-10-19 18:42:31 +00:00			`end`

			`def imported?(resource_type)`
			`Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")`
			`end`

			`def imported!(resource_type)`
			`Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)`
			`end`

			`def increment_page(resource_type)`
			`key = "#{cache_key_prefix}:#{resource_type}:current-page"`

			`# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is`
			`# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around`
			`page = Rails.cache.read(key)`
			`page += 1`
			`Rails.cache.write(key, page)`

			`page`
			`end`

			`def current_page(resource_type)`
			`Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }`
			`end`

			`def cache_key_prefix`
			`@cache_key_prefix \|\|= "github-import:#{project.id}"`
Import GitHub release notes # Conflicts: # lib/gitlab/github_import/importer.rb 2016-08-24 21:45:31 +00:00			`end`
Github Importer 2014-12-31 13:07:48 +00:00			`end`
			`end`
			`end`