2018-08-24 15:58:50 -04:00
# frozen_string_literal: true
2018-06-25 16:13:59 -04:00
module Gitlab
module BitbucketServerImport
class Importer
2018-07-20 12:45:59 -04:00
attr_reader :recover_missing_commits
2020-10-29 08:08:50 -04:00
attr_reader :project , :project_key , :repository_slug , :client , :errors , :users , :already_imported_cache_key
2018-08-24 15:58:50 -04:00
attr_accessor :logger
2018-06-25 16:13:59 -04:00
2019-08-31 15:25:25 -04:00
REMOTE_NAME = 'bitbucket_server'
2018-07-06 00:51:12 -04:00
BATCH_SIZE = 100
2020-10-29 08:08:50 -04:00
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
'bitbucket_server-importer/already-imported/%{project}/%{collection}'
2018-07-03 19:37:17 -04:00
2018-07-19 02:20:10 -04:00
TempBranch = Struct . new ( :name , :sha )
2018-07-03 19:37:17 -04:00
def self . imports_repository?
true
end
def self . refmap
[ :heads , :tags , '+refs/pull-requests/*/to:refs/merge-requests/*/head' ]
end
2018-07-20 12:45:59 -04:00
# Unlike GitHub, you can't grab the commit SHAs for pull requests that
# have been closed but not merged even though Bitbucket has these
# commits internally. We can recover these pull requests by creating a
# branch with the Bitbucket REST API, but by default we turn this
# behavior off.
def initialize ( project , recover_missing_commits : false )
2018-06-25 16:13:59 -04:00
@project = project
2018-07-20 12:45:59 -04:00
@recover_missing_commits = recover_missing_commits
2018-06-26 01:40:11 -04:00
@project_key = project . import_data . data [ 'project_key' ]
@repository_slug = project . import_data . data [ 'repo_slug' ]
2018-06-25 16:13:59 -04:00
@client = BitbucketServer :: Client . new ( project . import_data . credentials )
@formatter = Gitlab :: ImportFormatter . new
@errors = [ ]
@users = { }
2018-07-03 19:37:17 -04:00
@temp_branches = [ ]
2018-08-24 15:58:50 -04:00
@logger = Gitlab :: Import :: Logger . build
2020-10-29 08:08:50 -04:00
@already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
{ project : project . id , collection : collection_method }
end
def collection_method
:pull_requests
2018-06-25 16:13:59 -04:00
end
def execute
2018-07-03 19:37:17 -04:00
import_repository
2018-06-25 16:13:59 -04:00
import_pull_requests
2020-10-28 14:08:52 -04:00
download_lfs_objects
2018-07-13 18:43:15 -04:00
delete_temp_branches
2018-06-25 16:13:59 -04:00
handle_errors
2020-06-19 11:08:39 -04:00
metrics . track_finished_import
2018-06-25 16:13:59 -04:00
2018-08-24 15:58:50 -04:00
log_info ( stage : " complete " )
2020-10-29 08:08:50 -04:00
Gitlab :: Cache :: Import :: Caching . expire ( already_imported_cache_key , 15 . minutes . to_i )
2018-06-25 16:13:59 -04:00
true
end
private
def handle_errors
return unless errors . any?
2018-11-27 04:41:27 -05:00
project . import_state . update_column ( :last_error , {
2018-06-25 16:13:59 -04:00
message : 'The remote data could not be fully imported.' ,
errors : errors
} . to_json )
end
2020-09-14 14:09:48 -04:00
def find_user_id ( by : , value : )
return unless value
2018-06-25 16:13:59 -04:00
2020-09-14 14:09:48 -04:00
return users [ value ] if users . key? ( value )
2018-06-25 16:13:59 -04:00
2020-09-14 14:09:48 -04:00
user = if by == :email
User . find_by_any_email ( value , confirmed : true )
else
User . find_by_username ( value )
end
2018-06-25 16:13:59 -04:00
2020-09-14 14:09:48 -04:00
users [ value ] = user & . id
2018-07-16 01:07:39 -04:00
user & . id
2018-06-25 16:13:59 -04:00
end
def repo
2018-06-27 17:25:09 -04:00
@repo || = client . repo ( project_key , repository_slug )
2018-06-25 16:13:59 -04:00
end
2018-07-03 19:37:17 -04:00
def sha_exists? ( sha )
project . repository . commit ( sha )
end
2018-07-06 00:51:12 -04:00
def temp_branch_name ( pull_request , suffix )
" gitlab/import/pull-request/ #{ pull_request . iid } / #{ suffix } "
end
2018-07-19 02:20:10 -04:00
# This method restores required SHAs that GitLab needs to create diffs
# into branch names as the following:
#
# gitlab/import/pull-request/N/{to,from}
2018-07-06 00:51:12 -04:00
def restore_branches ( pull_requests )
shas_to_restore = [ ]
2018-07-19 02:20:10 -04:00
2018-07-06 00:51:12 -04:00
pull_requests . each do | pull_request |
2018-07-19 02:20:10 -04:00
shas_to_restore << TempBranch . new ( temp_branch_name ( pull_request , :from ) ,
pull_request . source_branch_sha )
shas_to_restore << TempBranch . new ( temp_branch_name ( pull_request , :to ) ,
pull_request . target_branch_sha )
2018-07-06 00:51:12 -04:00
end
2018-07-03 19:37:17 -04:00
2018-07-19 02:20:10 -04:00
# Create the branches on the Bitbucket Server first
2018-07-06 00:51:12 -04:00
created_branches = restore_branch_shas ( shas_to_restore )
2018-07-19 02:20:10 -04:00
2018-07-17 18:01:33 -04:00
@temp_branches += created_branches
2018-07-19 02:20:10 -04:00
# Now sync the repository so we get the new branches
2018-07-06 00:51:12 -04:00
import_repository unless created_branches . empty?
2018-07-03 19:37:17 -04:00
end
2018-07-06 00:51:12 -04:00
def restore_branch_shas ( shas_to_restore )
2018-07-19 02:20:10 -04:00
shas_to_restore . each_with_object ( [ ] ) do | temp_branch , branches_created |
branch_name = temp_branch . name
sha = temp_branch . sha
2018-07-03 19:37:17 -04:00
2018-07-19 02:20:10 -04:00
next if sha_exists? ( sha )
2018-07-03 19:37:17 -04:00
2018-07-19 02:20:10 -04:00
begin
client . create_branch ( project_key , repository_slug , branch_name , sha )
branches_created << temp_branch
rescue BitbucketServer :: Connection :: ConnectionError = > e
2018-08-24 15:58:50 -04:00
log_warn ( message : " Unable to recreate branch " , sha : sha , error : e . message )
2018-07-03 19:37:17 -04:00
end
end
end
def import_repository
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_repository' , message : 'starting import' )
2018-07-03 19:37:17 -04:00
project . ensure_repository
project . repository . fetch_as_mirror ( project . import_url , refmap : self . class . refmap , remote_name : REMOTE_NAME )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_repository' , message : 'finished import' )
2019-01-18 17:53:52 -05:00
rescue Gitlab :: Shell :: Error = > e
2019-12-16 07:07:43 -05:00
Gitlab :: ErrorTracking . log_exception (
2019-12-13 07:07:41 -05:00
e ,
stage : 'import_repository' , message : 'failed import' , error : e . message
)
2018-08-24 15:58:50 -04:00
2018-07-03 19:37:17 -04:00
# Expire cache to prevent scenarios such as:
# 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
# 2. Retried import, repo is broken or not imported but +exists?+ still returns true
project . repository . expire_content_cache if project . repository_exists?
2019-01-18 17:53:52 -05:00
raise
2018-07-03 19:37:17 -04:00
end
2020-10-28 14:08:52 -04:00
def download_lfs_objects
result = Projects :: LfsPointers :: LfsImportService . new ( project ) . execute
if result [ :status ] == :error
errors << { type : :lfs_objects , errors : " The Lfs import process failed. #{ result [ :message ] } " }
end
end
2018-07-06 00:51:12 -04:00
# Bitbucket Server keeps tracks of references for open pull requests in
# refs/heads/pull-requests, but closed and merged requests get moved
# into hidden internal refs under stash-refs/pull-requests. Unless the
# SHAs involved are at the tip of a branch or tag, there is no way to
# retrieve the server for those commits.
#
# To avoid losing history, we use the Bitbucket API to re-create the branch
# on the remote server. Then we have to issue a `git fetch` to download these
# branches.
2018-06-25 16:13:59 -04:00
def import_pull_requests
2020-11-05 07:09:05 -05:00
page = 0
2018-07-06 00:51:12 -04:00
2020-11-05 07:09:05 -05:00
log_info ( stage : 'import_pull_requests' , message : " starting " )
2018-07-06 00:51:12 -04:00
2020-11-05 07:09:05 -05:00
loop do
log_debug ( stage : 'import_pull_requests' , message : " importing page #{ page } and batch-size #{ BATCH_SIZE } from #{ page * BATCH_SIZE } to #{ ( page + 1 ) * BATCH_SIZE } " )
pull_requests = client . pull_requests ( project_key , repository_slug , page_offset : page , limit : BATCH_SIZE ) . to_a
break if pull_requests . empty?
# Creating branches on the server and fetching the newly-created branches
# may take a number of network round-trips. This used to be done in batches to
# avoid doing a git fetch for every new branch, as the whole process is now
# batched, we do not need to separately do this in batches.
restore_branches ( pull_requests ) if recover_missing_commits
pull_requests . each do | pull_request |
2020-10-29 08:08:50 -04:00
if already_imported? ( pull_request )
log_info ( stage : 'import_pull_requests' , message : 'already imported' , iid : pull_request . iid )
else
import_bitbucket_pull_request ( pull_request )
end
2019-03-13 09:42:43 -04:00
rescue StandardError = > e
2019-12-16 07:07:43 -05:00
Gitlab :: ErrorTracking . log_exception (
2019-12-13 07:07:41 -05:00
e ,
stage : 'import_pull_requests' , iid : pull_request . iid , error : e . message
)
2019-03-13 09:42:43 -04:00
2020-01-20 07:09:02 -05:00
backtrace = Gitlab :: BacktraceCleaner . clean_backtrace ( e . backtrace )
2019-03-13 09:42:43 -04:00
errors << { type : :pull_request , iid : pull_request . iid , errors : e . message , backtrace : backtrace . join ( " \n " ) , raw_response : pull_request . raw }
2018-06-25 16:13:59 -04:00
end
2020-11-05 07:09:05 -05:00
log_debug ( stage : 'import_pull_requests' , message : " finished page #{ page } and batch-size #{ BATCH_SIZE } " )
page += 1
2018-06-25 16:13:59 -04:00
end
end
2020-10-29 08:08:50 -04:00
# Returns true if the given object has already been imported, false
# otherwise.
#
# object - The object to check.
def already_imported? ( pull_request )
Gitlab :: Cache :: Import :: Caching . set_includes? ( already_imported_cache_key , pull_request . iid )
end
# Marks the given object as "already imported".
def mark_as_imported ( pull_request )
Gitlab :: Cache :: Import :: Caching . set_add ( already_imported_cache_key , pull_request . iid )
end
2018-07-13 18:43:15 -04:00
def delete_temp_branches
2018-07-19 02:20:10 -04:00
@temp_branches . each do | branch |
2019-03-13 09:42:43 -04:00
client . delete_branch ( project_key , repository_slug , branch . name , branch . sha )
project . repository . delete_branch ( branch . name )
rescue BitbucketServer :: Connection :: ConnectionError = > e
2019-12-16 07:07:43 -05:00
Gitlab :: ErrorTracking . log_exception (
2019-12-13 07:07:41 -05:00
e ,
stage : 'delete_temp_branches' , branch : branch . name , error : e . message
)
2019-03-13 09:42:43 -04:00
@errors << { type : :delete_temp_branches , branch_name : branch . name , errors : e . message }
2018-07-13 18:43:15 -04:00
end
end
2018-07-06 00:11:29 -04:00
def import_bitbucket_pull_request ( pull_request )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_bitbucket_pull_requests' , message : 'starting' , iid : pull_request . iid )
2018-07-06 00:11:29 -04:00
description = ''
2020-09-14 14:09:48 -04:00
description += author_line ( pull_request )
2018-07-31 16:59:45 -04:00
description += pull_request . description if pull_request . description
2018-07-15 18:36:18 -04:00
2018-07-06 00:11:29 -04:00
attributes = {
iid : pull_request . iid ,
title : pull_request . title ,
description : description ,
2018-08-24 15:58:50 -04:00
source_project_id : project . id ,
2018-07-06 00:11:29 -04:00
source_branch : Gitlab :: Git . ref_name ( pull_request . source_branch_name ) ,
2018-08-24 15:58:50 -04:00
source_branch_sha : pull_request . source_branch_sha ,
target_project_id : project . id ,
2018-07-06 00:11:29 -04:00
target_branch : Gitlab :: Git . ref_name ( pull_request . target_branch_name ) ,
2018-08-24 15:58:50 -04:00
target_branch_sha : pull_request . target_branch_sha ,
2019-05-06 15:45:17 -04:00
state_id : MergeRequest . available_states [ pull_request . state ] ,
2020-09-14 14:09:48 -04:00
author_id : author_id ( pull_request ) ,
2018-07-06 00:11:29 -04:00
created_at : pull_request . created_at ,
updated_at : pull_request . updated_at
}
2018-08-24 15:58:50 -04:00
creator = Gitlab :: Import :: MergeRequestCreator . new ( project )
merge_request = creator . execute ( attributes )
2020-06-19 11:08:39 -04:00
if merge_request . persisted?
import_pull_request_comments ( pull_request , merge_request )
metrics . merge_requests_counter . increment
end
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_bitbucket_pull_requests' , message : 'finished' , iid : pull_request . iid )
2020-10-29 08:08:50 -04:00
mark_as_imported ( pull_request )
2018-07-06 00:11:29 -04:00
end
2018-06-25 16:13:59 -04:00
def import_pull_request_comments ( pull_request , merge_request )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_pull_request_comments' , message : 'starting' , iid : merge_request . iid )
2018-07-06 00:11:29 -04:00
comments , other_activities = client . activities ( project_key , repository_slug , pull_request . iid ) . partition ( & :comment? )
2018-06-27 17:25:09 -04:00
2018-07-06 00:11:29 -04:00
merge_event = other_activities . find ( & :merge_event? )
2018-06-27 17:25:09 -04:00
import_merge_event ( merge_request , merge_event ) if merge_event
2018-06-26 18:59:34 -04:00
inline_comments , pr_comments = comments . partition ( & :inline_comment? )
2018-06-25 16:13:59 -04:00
2018-07-17 18:01:33 -04:00
import_inline_comments ( inline_comments . map ( & :comment ) , merge_request )
2018-06-28 03:27:04 -04:00
import_standalone_pr_comments ( pr_comments . map ( & :comment ) , merge_request )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_pull_request_comments' , message : 'finished' , iid : merge_request . iid ,
merge_event_found : merge_event . present? ,
inline_comments_count : inline_comments . count ,
standalone_pr_comments : pr_comments . count )
2018-06-25 16:13:59 -04:00
end
2018-08-27 11:31:01 -04:00
# rubocop: disable CodeReuse/ActiveRecord
2018-06-27 17:25:09 -04:00
def import_merge_event ( merge_request , merge_event )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_merge_event' , message : 'starting' , iid : merge_request . iid )
2018-06-28 03:27:04 -04:00
committer = merge_event . committer_email
2018-06-27 17:25:09 -04:00
2020-09-14 14:09:48 -04:00
user_id = find_user_id ( by : :email , value : committer ) || project . creator_id
2018-06-27 17:25:09 -04:00
timestamp = merge_event . merge_timestamp
2018-07-31 11:58:45 -04:00
merge_request . update ( { merge_commit_sha : merge_event . merge_commit } )
2018-07-06 00:11:29 -04:00
metric = MergeRequest :: Metrics . find_or_initialize_by ( merge_request : merge_request )
2018-07-16 01:07:39 -04:00
metric . update ( merged_by_id : user_id , merged_at : timestamp )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_merge_event' , message : 'finished' , iid : merge_request . iid )
2018-06-27 17:25:09 -04:00
end
2018-08-27 11:31:01 -04:00
# rubocop: enable CodeReuse/ActiveRecord
2018-06-27 17:25:09 -04:00
2018-07-17 18:01:33 -04:00
def import_inline_comments ( inline_comments , merge_request )
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_inline_comments' , message : 'starting' , iid : merge_request . iid )
2018-06-25 16:13:59 -04:00
inline_comments . each do | comment |
2018-07-17 18:01:33 -04:00
position = build_position ( merge_request , comment )
2018-07-17 18:48:01 -04:00
parent = create_diff_note ( merge_request , comment , position )
2018-06-28 04:49:35 -04:00
next unless parent & . persisted?
2018-07-17 18:01:33 -04:00
discussion_id = parent . discussion_id
2018-06-28 04:49:35 -04:00
comment . comments . each do | reply |
2018-07-17 18:48:01 -04:00
create_diff_note ( merge_request , reply , position , discussion_id )
2018-06-25 16:13:59 -04:00
end
end
2018-08-24 15:58:50 -04:00
log_info ( stage : 'import_inline_comments' , message : 'finished' , iid : merge_request . iid )
2018-06-25 16:13:59 -04:00
end
2018-07-17 18:48:01 -04:00
def create_diff_note ( merge_request , comment , position , discussion_id = nil )
2018-06-28 04:49:35 -04:00
attributes = pull_request_comment_attributes ( comment )
2018-07-17 18:58:12 -04:00
attributes . merge! ( position : position , type : 'DiffNote' )
2018-07-17 18:01:33 -04:00
attributes [ :discussion_id ] = discussion_id if discussion_id
2018-06-28 04:49:35 -04:00
2018-07-17 18:48:01 -04:00
note = merge_request . notes . build ( attributes )
if note . valid?
note . save
return note
end
2018-08-24 15:58:50 -04:00
log_info ( stage : 'create_diff_note' , message : 'creating fallback DiffNote' , iid : merge_request . iid )
2018-07-17 18:58:12 -04:00
# Bitbucket Server supports the ability to comment on any line, not just the
# line in the diff. If we can't add the note as a DiffNote, fallback to creating
# a regular note.
2018-07-31 17:30:01 -04:00
create_fallback_diff_note ( merge_request , comment , position )
2018-06-28 04:49:35 -04:00
rescue StandardError = > e
2019-12-16 07:07:43 -05:00
Gitlab :: ErrorTracking . log_exception (
2019-12-13 07:07:41 -05:00
e ,
stage : 'create_diff_note' , comment_id : comment . id , error : e . message
)
2018-06-28 04:49:35 -04:00
errors << { type : :pull_request , id : comment . id , errors : e . message }
nil
end
2018-07-31 17:30:01 -04:00
def create_fallback_diff_note ( merge_request , comment , position )
2018-07-17 18:48:01 -04:00
attributes = pull_request_comment_attributes ( comment )
2018-07-31 18:38:14 -04:00
note = " *Comment on "
2018-07-17 18:48:01 -04:00
2018-07-31 18:33:21 -04:00
note += " #{ position . old_path } : #{ position . old_line } --> " if position . old_line
note += " #{ position . new_path } : #{ position . new_line } " if position . new_line
2018-07-31 18:38:14 -04:00
note += " * \n \n #{ comment . note } "
2018-07-31 18:33:21 -04:00
attributes [ :note ] = note
2018-07-17 18:48:01 -04:00
merge_request . notes . create! ( attributes )
end
2018-06-25 16:13:59 -04:00
def build_position ( merge_request , pr_comment )
params = {
diff_refs : merge_request . diff_refs ,
old_path : pr_comment . file_path ,
new_path : pr_comment . file_path ,
old_line : pr_comment . old_pos ,
new_line : pr_comment . new_pos
}
Gitlab :: Diff :: Position . new ( params )
end
def import_standalone_pr_comments ( pr_comments , merge_request )
pr_comments . each do | comment |
2019-03-13 09:42:43 -04:00
merge_request . notes . create! ( pull_request_comment_attributes ( comment ) )
2018-06-28 03:27:04 -04:00
2019-03-13 09:42:43 -04:00
comment . comments . each do | replies |
merge_request . notes . create! ( pull_request_comment_attributes ( replies ) )
2018-06-25 16:13:59 -04:00
end
2019-03-13 09:42:43 -04:00
rescue StandardError = > e
2019-12-16 07:07:43 -05:00
Gitlab :: ErrorTracking . log_exception (
2019-12-13 07:07:41 -05:00
e ,
stage : 'import_standalone_pr_comments' , merge_request_id : merge_request . id , comment_id : comment . id , error : e . message
)
2019-03-13 09:42:43 -04:00
errors << { type : :pull_request , comment_id : comment . id , errors : e . message }
2018-06-25 16:13:59 -04:00
end
end
def pull_request_comment_attributes ( comment )
2020-09-14 14:09:48 -04:00
author = uid ( comment )
2018-08-02 18:52:11 -04:00
note = ''
unless author
author = project . creator_id
2018-08-03 09:07:37 -04:00
note = " *By #{ comment . author_username } ( #{ comment . author_email } )* \n \n "
2018-08-02 18:52:11 -04:00
end
note +=
2018-07-31 14:33:37 -04:00
# Provide some context for replying
if comment . parent_comment
" > #{ comment . parent_comment . note . truncate ( 80 ) } \n \n #{ comment . note } "
else
comment . note
end
2018-06-25 16:13:59 -04:00
{
project : project ,
2018-07-31 14:33:37 -04:00
note : note ,
2018-08-02 18:52:11 -04:00
author_id : author ,
2018-06-25 16:13:59 -04:00
created_at : comment . created_at ,
updated_at : comment . updated_at
}
end
2018-08-24 15:58:50 -04:00
2020-11-05 07:09:05 -05:00
def log_debug ( details )
logger . debug ( log_base_data . merge ( details ) )
end
2018-08-24 15:58:50 -04:00
def log_info ( details )
logger . info ( log_base_data . merge ( details ) )
end
def log_warn ( details )
logger . warn ( log_base_data . merge ( details ) )
end
def log_base_data
{
class : self . class . name ,
project_id : project . id ,
project_path : project . full_path
}
end
2020-06-19 11:08:39 -04:00
def metrics
@metrics || = Gitlab :: Import :: Metrics . new ( :bitbucket_server_importer , @project )
end
2020-09-14 14:09:48 -04:00
def author_line ( rep_object )
return '' if uid ( rep_object )
@formatter . author_line ( rep_object . author )
end
def author_id ( rep_object )
uid ( rep_object ) || project . creator_id
end
def uid ( rep_object )
find_user_id ( by : :email , value : rep_object . author_email ) unless Feature . enabled? ( :bitbucket_server_user_mapping_by_username )
find_user_id ( by : :username , value : rep_object . author_username ) ||
find_user_id ( by : :email , value : rep_object . author_email )
end
2018-06-25 16:13:59 -04:00
end
end
end