Merge branch 'ab-54270-github-iid' into 'master'

Reduce amount of locks needed for GitHub importer

Closes #54270 and #51817

See merge request gitlab-org/gitlab-ce!24102
This commit is contained in:
Yorick Peterse 2019-01-29 16:50:18 +00:00
commit 2d24dca485
14 changed files with 97 additions and 83 deletions

View file

@ -66,6 +66,17 @@ class InternalId < ActiveRecord::Base
InternalIdGenerator.new(subject, scope, usage, init).generate
end
# Flushing records is generally safe in a sense that those
# records are going to be re-created when needed.
#
# A filter condition has to be provided to not accidentally flush
# records for all projects.
def flush_records!(filter)
raise ArgumentError, "filter cannot be empty" if filter.blank?
where(filter).delete_all
end
def available?
@available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization
end
@ -111,7 +122,7 @@ class InternalId < ActiveRecord::Base
# Generates next internal id and returns it
def generate
InternalId.transaction do
subject.transaction do
# Create a record in internal_ids if one does not yet exist
# and increment its last value
#
@ -125,7 +136,7 @@ class InternalId < ActiveRecord::Base
#
# Note this will acquire a ROW SHARE lock on the InternalId record
def track_greatest(new_value)
InternalId.transaction do
subject.transaction do
(lookup || create_record).track_greatest_and_save!(new_value)
end
end
@ -148,7 +159,7 @@ class InternalId < ActiveRecord::Base
# violation. We can safely roll-back the nested transaction and perform
# a lookup instead to retrieve the record.
def create_record
InternalId.transaction(requires_new: true) do
subject.transaction(requires_new: true) do
InternalId.create!(
**scope,
usage: usage_value,

View file

@ -1585,6 +1585,13 @@ class Project < ActiveRecord::Base
def after_import
repository.after_import
wiki.repository.after_import
# The import assigns iid values on its own, e.g. by re-using GitHub ids.
# Flush existing InternalId records for this project for consistency reasons.
# Those records are going to be recreated with the next normal creation
# of a model instance (e.g. an Issue).
InternalId.flush_records!(project: self)
import_state.finish
import_state.remove_jid
update_project_counter_caches

View file

@ -0,0 +1,5 @@
---
title: Improve efficiency of GitHub importer by reducing amount of locks needed.
merge_request: 24102
author:
type: performance

View file

@ -0,0 +1,43 @@
# frozen_string_literal: true
class DeleteInconsistentInternalIdRecords2 < ActiveRecord::Migration[5.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
# This migration cleans up any inconsistent records in internal_ids.
#
# That is, it deletes records that track a `last_value` that is
# smaller than the maximum internal id (usually `iid`) found in
# the corresponding model records.
def up
disable_statement_timeout do
delete_internal_id_records('milestones', 'project_id')
delete_internal_id_records('milestones', 'namespace_id', 'group_id')
end
end
class InternalId < ActiveRecord::Base
self.table_name = 'internal_ids'
enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 }
end
private
def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name)
sql = <<~SQL
SELECT id FROM ( -- workaround for MySQL
SELECT internal_ids.id FROM (
SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name}
) maxima JOIN internal_ids USING (#{scope_column_name})
WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value
) internal_ids
SQL
InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection
say "Deleting internal_id records for #{base_table}: #{ids.map { |i| [i.project_id, i.last_value] }}" unless ids.empty?
end.delete_all
end
end

View file

@ -15,12 +15,10 @@ module Gitlab
end
# Bulk inserts the given rows into the database.
def bulk_insert(model, rows, batch_size: 100, pre_hook: nil)
def bulk_insert(model, rows, batch_size: 100)
rows.each_slice(batch_size) do |slice|
pre_hook.call(slice) if pre_hook
Gitlab::Database.bulk_insert(model.table_name, slice)
end
rows
end
end
end

View file

@ -57,11 +57,7 @@ module Gitlab
updated_at: issue.updated_at
}
insert_and_return_id(attributes, project.issues).tap do |id|
# We use .insert_and_return_id which effectively disables all callbacks.
# Trigger iid logic here to make sure we track internal id values consistently.
project.issues.find(id).ensure_project_iid!
end
insert_and_return_id(attributes, project.issues)
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the issue.

View file

@ -19,20 +19,10 @@ module Gitlab
# rubocop: enable CodeReuse/ActiveRecord
def execute
# We insert records in bulk, by-passing any standard model callbacks.
# The pre_hook here makes sure we track internal ids consistently.
# Note this has to be called before performing an insert of a batch
# because we're outside a transaction scope here.
bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid))
bulk_insert(Milestone, build_milestones)
build_milestones_cache
end
def track_greatest_iid(slice)
greatest_iid = slice.max { |e| e[:iid] }[:iid]
InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) })
end
def build_milestones
build_database_rows(each_milestone)
end

View file

@ -24,10 +24,6 @@ module Gitlab
merge_request = project.merge_requests.reload.find(merge_request_id)
# We use .insert_and_return_id which effectively disables all callbacks.
# Trigger iid logic here to make sure we track internal id values consistently.
merge_request.ensure_target_project_iid!
[merge_request, false]
end
rescue ActiveRecord::InvalidForeignKey

View file

@ -58,17 +58,5 @@ describe Gitlab::GithubImport::BulkImporting do
importer.bulk_insert(model, rows, batch_size: 5)
end
it 'calls pre_hook for each slice if given' do
rows = [{ title: 'Foo' }] * 10
model = double(:model, table_name: 'kittens')
pre_hook = double('pre_hook', call: nil)
allow(Gitlab::Database).to receive(:bulk_insert)
expect(pre_hook).to receive(:call).with(rows[0..4])
expect(pre_hook).to receive(:call).with(rows[5..9])
importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook)
end
end
end

View file

@ -78,11 +78,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.to receive(:id_for)
.with(issue)
.and_return(milestone.id)
allow(importer.user_finder)
.to receive(:author_id_for)
.with(issue)
.and_return([user.id, true])
end
context 'when the issue author could be found' do
@ -177,23 +172,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
expect(importer.create_issue).to be_a_kind_of(Numeric)
end
it 'triggers internal_id functionality to track greatest iids' do
allow(importer.user_finder)
.to receive(:author_id_for)
.with(issue)
.and_return([user.id, true])
issue = build_stubbed(:issue, project: project)
allow(importer)
.to receive(:insert_and_return_id)
.and_return(issue.id)
allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
expect(issue).to receive(:ensure_project_iid!)
importer.create_issue
end
end
describe '#create_assignees' do

View file

@ -29,25 +29,13 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis
expect(importer)
.to receive(:bulk_insert)
.with(Milestone, [milestone_hash], any_args)
.with(Milestone, [milestone_hash])
expect(importer)
.to receive(:build_milestones_cache)
importer.execute
end
it 'tracks internal ids' do
milestone_hash = { iid: 1, title: '1.0', project_id: project.id }
allow(importer)
.to receive(:build_milestones)
.and_return([milestone_hash])
expect(InternalId).to receive(:track_greatest)
.with(nil, { project: project }, :milestones, 1, any_args)
importer.execute
end
end
describe '#build_milestones' do

View file

@ -111,16 +111,6 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect(mr).to be_instance_of(MergeRequest)
expect(exists).to eq(false)
end
it 'triggers internal_id functionality to track greatest iids' do
mr = build_stubbed(:merge_request, source_project: project, target_project: project)
allow(importer).to receive(:insert_and_return_id).and_return(mr.id)
allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
expect(mr).to receive(:ensure_target_project_iid!)
importer.create_merge_request
end
end
context 'when the author could not be found' do

View file

@ -13,6 +13,29 @@ describe InternalId do
it { is_expected.to validate_presence_of(:usage) }
end
describe '.flush_records!' do
subject { described_class.flush_records!(project: project) }
let(:another_project) { create(:project) }
before do
create_list(:issue, 2, project: project)
create_list(:issue, 2, project: another_project)
end
it 'deletes all records for the given project' do
expect { subject }.to change { described_class.where(project: project).count }.from(1).to(0)
end
it 'retains records for other projects' do
expect { subject }.not_to change { described_class.where(project: another_project).count }
end
it 'does not allow an empty filter' do
expect { described_class.flush_records!({}) }.to raise_error(/filter cannot be empty/)
end
end
describe '.generate_next' do
subject { described_class.generate_next(issue, scope, usage, init) }

View file

@ -3767,6 +3767,7 @@ describe Project do
expect(import_state).to receive(:remove_jid)
expect(project).to receive(:after_create_default_branch)
expect(project).to receive(:refresh_markdown_cache!)
expect(InternalId).to receive(:flush_records!).with(project: project)
project.after_import
end