Merge branch 'ab-54270-github-iid' into 'master'
Reduce amount of locks needed for GitHub importer Closes #54270 and #51817 See merge request gitlab-org/gitlab-ce!24102
This commit is contained in:
commit
2d24dca485
14 changed files with 97 additions and 83 deletions
|
@ -66,6 +66,17 @@ class InternalId < ActiveRecord::Base
|
|||
InternalIdGenerator.new(subject, scope, usage, init).generate
|
||||
end
|
||||
|
||||
# Flushing records is generally safe in a sense that those
|
||||
# records are going to be re-created when needed.
|
||||
#
|
||||
# A filter condition has to be provided to not accidentally flush
|
||||
# records for all projects.
|
||||
def flush_records!(filter)
|
||||
raise ArgumentError, "filter cannot be empty" if filter.blank?
|
||||
|
||||
where(filter).delete_all
|
||||
end
|
||||
|
||||
def available?
|
||||
@available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization
|
||||
end
|
||||
|
@ -111,7 +122,7 @@ class InternalId < ActiveRecord::Base
|
|||
|
||||
# Generates next internal id and returns it
|
||||
def generate
|
||||
InternalId.transaction do
|
||||
subject.transaction do
|
||||
# Create a record in internal_ids if one does not yet exist
|
||||
# and increment its last value
|
||||
#
|
||||
|
@ -125,7 +136,7 @@ class InternalId < ActiveRecord::Base
|
|||
#
|
||||
# Note this will acquire a ROW SHARE lock on the InternalId record
|
||||
def track_greatest(new_value)
|
||||
InternalId.transaction do
|
||||
subject.transaction do
|
||||
(lookup || create_record).track_greatest_and_save!(new_value)
|
||||
end
|
||||
end
|
||||
|
@ -148,7 +159,7 @@ class InternalId < ActiveRecord::Base
|
|||
# violation. We can safely roll-back the nested transaction and perform
|
||||
# a lookup instead to retrieve the record.
|
||||
def create_record
|
||||
InternalId.transaction(requires_new: true) do
|
||||
subject.transaction(requires_new: true) do
|
||||
InternalId.create!(
|
||||
**scope,
|
||||
usage: usage_value,
|
||||
|
|
|
@ -1585,6 +1585,13 @@ class Project < ActiveRecord::Base
|
|||
def after_import
|
||||
repository.after_import
|
||||
wiki.repository.after_import
|
||||
|
||||
# The import assigns iid values on its own, e.g. by re-using GitHub ids.
|
||||
# Flush existing InternalId records for this project for consistency reasons.
|
||||
# Those records are going to be recreated with the next normal creation
|
||||
# of a model instance (e.g. an Issue).
|
||||
InternalId.flush_records!(project: self)
|
||||
|
||||
import_state.finish
|
||||
import_state.remove_jid
|
||||
update_project_counter_caches
|
||||
|
|
5
changelogs/unreleased/ab-54270-github-iid.yml
Normal file
5
changelogs/unreleased/ab-54270-github-iid.yml
Normal file
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Improve efficiency of GitHub importer by reducing amount of locks needed.
|
||||
merge_request: 24102
|
||||
author:
|
||||
type: performance
|
|
@ -0,0 +1,43 @@
|
|||
# frozen_string_literal: true
|
||||
class DeleteInconsistentInternalIdRecords2 < ActiveRecord::Migration[5.0]
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
# This migration cleans up any inconsistent records in internal_ids.
|
||||
#
|
||||
# That is, it deletes records that track a `last_value` that is
|
||||
# smaller than the maximum internal id (usually `iid`) found in
|
||||
# the corresponding model records.
|
||||
|
||||
def up
|
||||
disable_statement_timeout do
|
||||
delete_internal_id_records('milestones', 'project_id')
|
||||
delete_internal_id_records('milestones', 'namespace_id', 'group_id')
|
||||
end
|
||||
end
|
||||
|
||||
class InternalId < ActiveRecord::Base
|
||||
self.table_name = 'internal_ids'
|
||||
enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 }
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name)
|
||||
sql = <<~SQL
|
||||
SELECT id FROM ( -- workaround for MySQL
|
||||
SELECT internal_ids.id FROM (
|
||||
SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name}
|
||||
) maxima JOIN internal_ids USING (#{scope_column_name})
|
||||
WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value
|
||||
) internal_ids
|
||||
SQL
|
||||
|
||||
InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection
|
||||
say "Deleting internal_id records for #{base_table}: #{ids.map { |i| [i.project_id, i.last_value] }}" unless ids.empty?
|
||||
end.delete_all
|
||||
end
|
||||
end
|
|
@ -15,12 +15,10 @@ module Gitlab
|
|||
end
|
||||
|
||||
# Bulk inserts the given rows into the database.
|
||||
def bulk_insert(model, rows, batch_size: 100, pre_hook: nil)
|
||||
def bulk_insert(model, rows, batch_size: 100)
|
||||
rows.each_slice(batch_size) do |slice|
|
||||
pre_hook.call(slice) if pre_hook
|
||||
Gitlab::Database.bulk_insert(model.table_name, slice)
|
||||
end
|
||||
rows
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -57,11 +57,7 @@ module Gitlab
|
|||
updated_at: issue.updated_at
|
||||
}
|
||||
|
||||
insert_and_return_id(attributes, project.issues).tap do |id|
|
||||
# We use .insert_and_return_id which effectively disables all callbacks.
|
||||
# Trigger iid logic here to make sure we track internal id values consistently.
|
||||
project.issues.find(id).ensure_project_iid!
|
||||
end
|
||||
insert_and_return_id(attributes, project.issues)
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
# It's possible the project has been deleted since scheduling this
|
||||
# job. In this case we'll just skip creating the issue.
|
||||
|
|
|
@ -19,20 +19,10 @@ module Gitlab
|
|||
# rubocop: enable CodeReuse/ActiveRecord
|
||||
|
||||
def execute
|
||||
# We insert records in bulk, by-passing any standard model callbacks.
|
||||
# The pre_hook here makes sure we track internal ids consistently.
|
||||
# Note this has to be called before performing an insert of a batch
|
||||
# because we're outside a transaction scope here.
|
||||
bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid))
|
||||
bulk_insert(Milestone, build_milestones)
|
||||
build_milestones_cache
|
||||
end
|
||||
|
||||
def track_greatest_iid(slice)
|
||||
greatest_iid = slice.max { |e| e[:iid] }[:iid]
|
||||
|
||||
InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) })
|
||||
end
|
||||
|
||||
def build_milestones
|
||||
build_database_rows(each_milestone)
|
||||
end
|
||||
|
|
|
@ -24,10 +24,6 @@ module Gitlab
|
|||
|
||||
merge_request = project.merge_requests.reload.find(merge_request_id)
|
||||
|
||||
# We use .insert_and_return_id which effectively disables all callbacks.
|
||||
# Trigger iid logic here to make sure we track internal id values consistently.
|
||||
merge_request.ensure_target_project_iid!
|
||||
|
||||
[merge_request, false]
|
||||
end
|
||||
rescue ActiveRecord::InvalidForeignKey
|
||||
|
|
|
@ -58,17 +58,5 @@ describe Gitlab::GithubImport::BulkImporting do
|
|||
|
||||
importer.bulk_insert(model, rows, batch_size: 5)
|
||||
end
|
||||
|
||||
it 'calls pre_hook for each slice if given' do
|
||||
rows = [{ title: 'Foo' }] * 10
|
||||
model = double(:model, table_name: 'kittens')
|
||||
pre_hook = double('pre_hook', call: nil)
|
||||
allow(Gitlab::Database).to receive(:bulk_insert)
|
||||
|
||||
expect(pre_hook).to receive(:call).with(rows[0..4])
|
||||
expect(pre_hook).to receive(:call).with(rows[5..9])
|
||||
|
||||
importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -78,11 +78,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
.to receive(:id_for)
|
||||
.with(issue)
|
||||
.and_return(milestone.id)
|
||||
|
||||
allow(importer.user_finder)
|
||||
.to receive(:author_id_for)
|
||||
.with(issue)
|
||||
.and_return([user.id, true])
|
||||
end
|
||||
|
||||
context 'when the issue author could be found' do
|
||||
|
@ -177,23 +172,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
|
|||
|
||||
expect(importer.create_issue).to be_a_kind_of(Numeric)
|
||||
end
|
||||
|
||||
it 'triggers internal_id functionality to track greatest iids' do
|
||||
allow(importer.user_finder)
|
||||
.to receive(:author_id_for)
|
||||
.with(issue)
|
||||
.and_return([user.id, true])
|
||||
|
||||
issue = build_stubbed(:issue, project: project)
|
||||
allow(importer)
|
||||
.to receive(:insert_and_return_id)
|
||||
.and_return(issue.id)
|
||||
allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
|
||||
|
||||
expect(issue).to receive(:ensure_project_iid!)
|
||||
|
||||
importer.create_issue
|
||||
end
|
||||
end
|
||||
|
||||
describe '#create_assignees' do
|
||||
|
|
|
@ -29,25 +29,13 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis
|
|||
|
||||
expect(importer)
|
||||
.to receive(:bulk_insert)
|
||||
.with(Milestone, [milestone_hash], any_args)
|
||||
.with(Milestone, [milestone_hash])
|
||||
|
||||
expect(importer)
|
||||
.to receive(:build_milestones_cache)
|
||||
|
||||
importer.execute
|
||||
end
|
||||
|
||||
it 'tracks internal ids' do
|
||||
milestone_hash = { iid: 1, title: '1.0', project_id: project.id }
|
||||
allow(importer)
|
||||
.to receive(:build_milestones)
|
||||
.and_return([milestone_hash])
|
||||
|
||||
expect(InternalId).to receive(:track_greatest)
|
||||
.with(nil, { project: project }, :milestones, 1, any_args)
|
||||
|
||||
importer.execute
|
||||
end
|
||||
end
|
||||
|
||||
describe '#build_milestones' do
|
||||
|
|
|
@ -111,16 +111,6 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
|
|||
expect(mr).to be_instance_of(MergeRequest)
|
||||
expect(exists).to eq(false)
|
||||
end
|
||||
|
||||
it 'triggers internal_id functionality to track greatest iids' do
|
||||
mr = build_stubbed(:merge_request, source_project: project, target_project: project)
|
||||
allow(importer).to receive(:insert_and_return_id).and_return(mr.id)
|
||||
allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
|
||||
|
||||
expect(mr).to receive(:ensure_target_project_iid!)
|
||||
|
||||
importer.create_merge_request
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the author could not be found' do
|
||||
|
|
|
@ -13,6 +13,29 @@ describe InternalId do
|
|||
it { is_expected.to validate_presence_of(:usage) }
|
||||
end
|
||||
|
||||
describe '.flush_records!' do
|
||||
subject { described_class.flush_records!(project: project) }
|
||||
|
||||
let(:another_project) { create(:project) }
|
||||
|
||||
before do
|
||||
create_list(:issue, 2, project: project)
|
||||
create_list(:issue, 2, project: another_project)
|
||||
end
|
||||
|
||||
it 'deletes all records for the given project' do
|
||||
expect { subject }.to change { described_class.where(project: project).count }.from(1).to(0)
|
||||
end
|
||||
|
||||
it 'retains records for other projects' do
|
||||
expect { subject }.not_to change { described_class.where(project: another_project).count }
|
||||
end
|
||||
|
||||
it 'does not allow an empty filter' do
|
||||
expect { described_class.flush_records!({}) }.to raise_error(/filter cannot be empty/)
|
||||
end
|
||||
end
|
||||
|
||||
describe '.generate_next' do
|
||||
subject { described_class.generate_next(issue, scope, usage, init) }
|
||||
|
||||
|
|
|
@ -3767,6 +3767,7 @@ describe Project do
|
|||
expect(import_state).to receive(:remove_jid)
|
||||
expect(project).to receive(:after_create_default_branch)
|
||||
expect(project).to receive(:refresh_markdown_cache!)
|
||||
expect(InternalId).to receive(:flush_records!).with(project: project)
|
||||
|
||||
project.after_import
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue