Refactor GitHub Importer database helpers into helper methods
This in preparation for addressing idle-in-transaction timeouts for other importers. Part of #50021
This commit is contained in:
parent
842377ab3c
commit
0377c015cf
|
@ -10,24 +10,6 @@ module Gitlab
|
|||
Client.new(token_to_use, parallel: parallel)
|
||||
end
|
||||
|
||||
# Inserts a raw row and returns the ID of the inserted row.
|
||||
#
|
||||
# attributes - The attributes/columns to set.
|
||||
# relation - An ActiveRecord::Relation to use for finding the ID of the row
|
||||
# when using MySQL.
|
||||
def self.insert_and_return_id(attributes, relation)
|
||||
# We use bulk_insert here so we can bypass any queries executed by
|
||||
# callbacks or validation rules, as doing this wouldn't scale when
|
||||
# importing very large projects.
|
||||
result = Gitlab::Database
|
||||
.bulk_insert(relation.table_name, [attributes], return_ids: true)
|
||||
|
||||
# MySQL doesn't support returning the IDs of a bulk insert in a way that
|
||||
# is not a pain, so in this case we'll issue an extra query instead.
|
||||
result.first ||
|
||||
relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first
|
||||
end
|
||||
|
||||
# Returns the ID of the ghost user.
|
||||
def self.ghost_user_id
|
||||
key = 'github-import/ghost-user-id'
|
||||
|
|
|
@ -4,6 +4,8 @@ module Gitlab
|
|||
module GithubImport
|
||||
module Importer
|
||||
class IssueImporter
|
||||
include Gitlab::Import::DatabaseHelpers
|
||||
|
||||
attr_reader :project, :issue, :client, :user_finder, :milestone_finder,
|
||||
:issuable_finder
|
||||
|
||||
|
@ -55,7 +57,7 @@ module Gitlab
|
|||
updated_at: issue.updated_at
|
||||
}
|
||||
|
||||
GithubImport.insert_and_return_id(attributes, project.issues).tap do |id|
|
||||
insert_and_return_id(attributes, project.issues).tap do |id|
|
||||
# We use .insert_and_return_id which effectively disables all callbacks.
|
||||
# Trigger iid logic here to make sure we track internal id values consistently.
|
||||
project.issues.find(id).ensure_project_iid!
|
||||
|
|
|
@ -4,6 +4,8 @@ module Gitlab
|
|||
module GithubImport
|
||||
module Importer
|
||||
class PullRequestImporter
|
||||
include Gitlab::Import::MergeRequestHelpers
|
||||
|
||||
attr_reader :pull_request, :project, :client, :user_finder,
|
||||
:milestone_finder, :issuable_finder
|
||||
|
||||
|
@ -44,81 +46,27 @@ module Gitlab
|
|||
description = MarkdownText
|
||||
.format(pull_request.description, pull_request.author, author_found)
|
||||
|
||||
# This work must be wrapped in a transaction as otherwise we can leave
|
||||
# behind incomplete data in the event of an error. This can then lead
|
||||
# to duplicate key errors when jobs are retried.
|
||||
MergeRequest.transaction do
|
||||
attributes = {
|
||||
iid: pull_request.iid,
|
||||
title: pull_request.truncated_title,
|
||||
description: description,
|
||||
source_project_id: project.id,
|
||||
target_project_id: project.id,
|
||||
source_branch: pull_request.formatted_source_branch,
|
||||
target_branch: pull_request.target_branch,
|
||||
state: pull_request.state,
|
||||
milestone_id: milestone_finder.id_for(pull_request),
|
||||
author_id: author_id,
|
||||
assignee_id: user_finder.assignee_id_for(pull_request),
|
||||
created_at: pull_request.created_at,
|
||||
updated_at: pull_request.updated_at
|
||||
}
|
||||
attributes = {
|
||||
iid: pull_request.iid,
|
||||
title: pull_request.truncated_title,
|
||||
description: description,
|
||||
source_project_id: project.id,
|
||||
target_project_id: project.id,
|
||||
source_branch: pull_request.formatted_source_branch,
|
||||
target_branch: pull_request.target_branch,
|
||||
state: pull_request.state,
|
||||
milestone_id: milestone_finder.id_for(pull_request),
|
||||
author_id: author_id,
|
||||
assignee_id: user_finder.assignee_id_for(pull_request),
|
||||
created_at: pull_request.created_at,
|
||||
updated_at: pull_request.updated_at
|
||||
}
|
||||
|
||||
# When creating merge requests there are a lot of hooks that may
|
||||
# run, for many different reasons. Many of these hooks (e.g. the
|
||||
# ones used for rendering Markdown) are completely unnecessary and
|
||||
# may even lead to transaction timeouts.
|
||||
#
|
||||
# To ensure importing pull requests has a minimal impact and can
|
||||
# complete in a reasonable time we bypass all the hooks by inserting
|
||||
# the row and then retrieving it. We then only perform the
|
||||
# additional work that is strictly necessary.
|
||||
merge_request_id = GithubImport
|
||||
.insert_and_return_id(attributes, project.merge_requests)
|
||||
|
||||
merge_request = project.merge_requests.find(merge_request_id)
|
||||
|
||||
# We use .insert_and_return_id which effectively disables all callbacks.
|
||||
# Trigger iid logic here to make sure we track internal id values consistently.
|
||||
merge_request.ensure_target_project_iid!
|
||||
|
||||
[merge_request, false]
|
||||
end
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project has been deleted since scheduling this
|
||||
# job. In this case we'll just skip creating the merge request.
|
||||
[]
|
||||
rescue ActiveRecord::RecordNotUnique
|
||||
# It's possible we previously created the MR, but failed when updating
|
||||
# the Git data. In this case we'll just continue working on the
|
||||
# existing row.
|
||||
[project.merge_requests.find_by(iid: pull_request.iid), true]
|
||||
create_merge_request_without_hooks(project, attributes, pull_request.iid)
|
||||
end
|
||||
|
||||
def insert_git_data(merge_request, already_exists = false)
|
||||
# These fields are set so we can create the correct merge request
|
||||
# diffs.
|
||||
merge_request.source_branch_sha = pull_request.source_branch_sha
|
||||
merge_request.target_branch_sha = pull_request.target_branch_sha
|
||||
|
||||
merge_request.keep_around_commit
|
||||
|
||||
# MR diffs normally use an "after_save" hook to pull data from Git.
|
||||
# All of this happens in the transaction started by calling
|
||||
# create/save/etc. This in turn can lead to these transactions being
|
||||
# held open for much longer than necessary. To work around this we
|
||||
# first save the diff, then populate it.
|
||||
diff =
|
||||
if already_exists
|
||||
merge_request.merge_request_diffs.take ||
|
||||
merge_request.merge_request_diffs.build
|
||||
else
|
||||
merge_request.merge_request_diffs.build
|
||||
end
|
||||
|
||||
diff.importing = true
|
||||
diff.save
|
||||
diff.save_git_content
|
||||
def insert_git_data(merge_request, already_exists)
|
||||
insert_or_replace_git_data(merge_request, pull_request.source_branch_sha, pull_request.target_branch_sha, already_exists)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module Import
|
||||
module DatabaseHelpers
|
||||
# Inserts a raw row and returns the ID of the inserted row.
|
||||
#
|
||||
# attributes - The attributes/columns to set.
|
||||
# relation - An ActiveRecord::Relation to use for finding the ID of the row
|
||||
# when using MySQL.
|
||||
def insert_and_return_id(attributes, relation)
|
||||
# We use bulk_insert here so we can bypass any queries executed by
|
||||
# callbacks or validation rules, as doing this wouldn't scale when
|
||||
# importing very large projects.
|
||||
result = Gitlab::Database
|
||||
.bulk_insert(relation.table_name, [attributes], return_ids: true)
|
||||
|
||||
# MySQL doesn't support returning the IDs of a bulk insert in a way that
|
||||
# is not a pain, so in this case we'll issue an extra query instead.
|
||||
result.first ||
|
||||
relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,70 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Gitlab
|
||||
module Import
|
||||
module MergeRequestHelpers
|
||||
include DatabaseHelpers
|
||||
|
||||
def create_merge_request_without_hooks(project, attributes, iid)
|
||||
# This work must be wrapped in a transaction as otherwise we can leave
|
||||
# behind incomplete data in the event of an error. This can then lead
|
||||
# to duplicate key errors when jobs are retried.
|
||||
MergeRequest.transaction do
|
||||
# When creating merge requests there are a lot of hooks that may
|
||||
# run, for many different reasons. Many of these hooks (e.g. the
|
||||
# ones used for rendering Markdown) are completely unnecessary and
|
||||
# may even lead to transaction timeouts.
|
||||
#
|
||||
# To ensure importing pull requests has a minimal impact and can
|
||||
# complete in a reasonable time we bypass all the hooks by inserting
|
||||
# the row and then retrieving it. We then only perform the
|
||||
# additional work that is strictly necessary.
|
||||
merge_request_id = insert_and_return_id(attributes, project.merge_requests)
|
||||
|
||||
merge_request = project.merge_requests.find(merge_request_id)
|
||||
|
||||
# We use .insert_and_return_id which effectively disables all callbacks.
|
||||
# Trigger iid logic here to make sure we track internal id values consistently.
|
||||
merge_request.ensure_target_project_iid!
|
||||
|
||||
[merge_request, false]
|
||||
end
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project has been deleted since scheduling this
|
||||
# job. In this case we'll just skip creating the merge request.
|
||||
[]
|
||||
rescue ActiveRecord::RecordNotUnique
|
||||
# It's possible we previously created the MR, but failed when updating
|
||||
# the Git data. In this case we'll just continue working on the
|
||||
# existing row.
|
||||
[project.merge_requests.find_by(iid: iid), true]
|
||||
end
|
||||
|
||||
def insert_or_replace_git_data(merge_request, source_branch_sha, target_branch_sha, already_exists = false)
|
||||
# These fields are set so we can create the correct merge request
|
||||
# diffs.
|
||||
merge_request.source_branch_sha = source_branch_sha
|
||||
merge_request.target_branch_sha = target_branch_sha
|
||||
|
||||
merge_request.keep_around_commit
|
||||
|
||||
# MR diffs normally use an "after_save" hook to pull data from Git.
|
||||
# All of this happens in the transaction started by calling
|
||||
# create/save/etc. This in turn can lead to these transactions being
|
||||
# held open for much longer than necessary. To work around this we
|
||||
# first save the diff, then populate it.
|
||||
diff =
|
||||
if already_exists
|
||||
merge_request.merge_request_diffs.take ||
|
||||
merge_request.merge_request_diffs.build
|
||||
else
|
||||
merge_request.merge_request_diffs.build
|
||||
end
|
||||
|
||||
diff.importing = true
|
||||
diff.save
|
||||
diff.save_git_content
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -92,7 +92,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
.with(issue)
|
||||
.and_return([user.id, true])
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.with(
|
||||
{
|
||||
|
@ -121,7 +121,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
.with(issue)
|
||||
.and_return([project.creator_id, false])
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.with(
|
||||
{
|
||||
|
@ -150,7 +150,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
.with(issue)
|
||||
.and_return([user.id, true])
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.and_raise(ActiveRecord::InvalidForeignKey, 'invalid foreign key')
|
||||
|
||||
|
@ -185,7 +185,7 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
.and_return([user.id, true])
|
||||
|
||||
issue = build_stubbed(:issue, project: project)
|
||||
allow(Gitlab::GithubImport)
|
||||
allow(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.and_return(issue.id)
|
||||
allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
|
||||
|
|
|
@ -80,7 +80,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
end
|
||||
|
||||
it 'imports the pull request with the pull request author as the merge request author' do
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.with(
|
||||
{
|
||||
|
@ -114,7 +114,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
|
||||
it 'triggers internal_id functionality to track greatest iids' do
|
||||
mr = build_stubbed(:merge_request, source_project: project, target_project: project)
|
||||
allow(Gitlab::GithubImport).to receive(:insert_and_return_id).and_return(mr.id)
|
||||
allow(importer).to receive(:insert_and_return_id).and_return(mr.id)
|
||||
allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
|
||||
|
||||
expect(mr).to receive(:ensure_target_project_iid!)
|
||||
|
@ -135,7 +135,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
.with(pull_request)
|
||||
.and_return(user.id)
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.with(
|
||||
{
|
||||
|
@ -181,7 +181,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
.to receive(:source_branch)
|
||||
.and_return('master')
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.with(
|
||||
{
|
||||
|
@ -219,7 +219,7 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
.with(pull_request)
|
||||
.and_return(user.id)
|
||||
|
||||
expect(Gitlab::GithubImport)
|
||||
expect(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.and_raise(ActiveRecord::InvalidForeignKey, 'invalid foreign key')
|
||||
|
||||
|
|
|
@ -27,39 +27,6 @@ describe Gitlab::GithubImport do
|
|||
end
|
||||
end
|
||||
|
||||
describe '.insert_and_return_id' do
|
||||
let(:attributes) { { iid: 1, title: 'foo' } }
|
||||
let(:project) { create(:project) }
|
||||
|
||||
context 'on PostgreSQL' do
|
||||
it 'returns the ID returned by the query' do
|
||||
expect(Gitlab::Database)
|
||||
.to receive(:bulk_insert)
|
||||
.with(Issue.table_name, [attributes], return_ids: true)
|
||||
.and_return([10])
|
||||
|
||||
id = described_class.insert_and_return_id(attributes, project.issues)
|
||||
|
||||
expect(id).to eq(10)
|
||||
end
|
||||
end
|
||||
|
||||
context 'on MySQL' do
|
||||
it 'uses a separate query to retrieve the ID' do
|
||||
issue = create(:issue, project: project, iid: attributes[:iid])
|
||||
|
||||
expect(Gitlab::Database)
|
||||
.to receive(:bulk_insert)
|
||||
.with(Issue.table_name, [attributes], return_ids: true)
|
||||
.and_return([])
|
||||
|
||||
id = described_class.insert_and_return_id(attributes, project.issues)
|
||||
|
||||
expect(id).to eq(issue.id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '.ghost_user_id', :clean_gitlab_redis_cache do
|
||||
it 'returns the ID of the ghost user' do
|
||||
expect(described_class.ghost_user_id).to eq(User.ghost.id)
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::Import::DatabaseHelpers do
|
||||
let(:database_helper) do
|
||||
Class.new do
|
||||
include Gitlab::Import::DatabaseHelpers
|
||||
end
|
||||
end
|
||||
|
||||
subject { database_helper.new }
|
||||
|
||||
describe '.insert_and_return_id' do
|
||||
let(:attributes) { { iid: 1, title: 'foo' } }
|
||||
let(:project) { create(:project) }
|
||||
|
||||
context 'on PostgreSQL' do
|
||||
it 'returns the ID returned by the query' do
|
||||
expect(Gitlab::Database)
|
||||
.to receive(:bulk_insert)
|
||||
.with(Issue.table_name, [attributes], return_ids: true)
|
||||
.and_return([10])
|
||||
|
||||
id = subject.insert_and_return_id(attributes, project.issues)
|
||||
|
||||
expect(id).to eq(10)
|
||||
end
|
||||
end
|
||||
|
||||
context 'on MySQL' do
|
||||
it 'uses a separate query to retrieve the ID' do
|
||||
issue = create(:issue, project: project, iid: attributes[:iid])
|
||||
|
||||
expect(Gitlab::Database)
|
||||
.to receive(:bulk_insert)
|
||||
.with(Issue.table_name, [attributes], return_ids: true)
|
||||
.and_return([])
|
||||
|
||||
id = subject.insert_and_return_id(attributes, project.issues)
|
||||
|
||||
expect(id).to eq(issue.id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue