diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb index e7168d49db9..e75c6eb2331 100644 --- a/app/models/internal_id.rb +++ b/app/models/internal_id.rb @@ -66,6 +66,17 @@ class InternalId < ActiveRecord::Base InternalIdGenerator.new(subject, scope, usage, init).generate end + # Flushing records is generally safe in a sense that those + # records are going to be re-created when needed. + # + # A filter condition has to be provided to not accidentally flush + # records for all projects. + def flush_records!(filter) + raise ArgumentError, "filter cannot be empty" if filter.blank? + + where(filter).delete_all + end + def available? @available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization end @@ -111,7 +122,7 @@ class InternalId < ActiveRecord::Base # Generates next internal id and returns it def generate - InternalId.transaction do + subject.transaction do # Create a record in internal_ids if one does not yet exist # and increment its last value # @@ -125,7 +136,7 @@ class InternalId < ActiveRecord::Base # # Note this will acquire a ROW SHARE lock on the InternalId record def track_greatest(new_value) - InternalId.transaction do + subject.transaction do (lookup || create_record).track_greatest_and_save!(new_value) end end @@ -148,7 +159,7 @@ class InternalId < ActiveRecord::Base # violation. We can safely roll-back the nested transaction and perform # a lookup instead to retrieve the record. def create_record - InternalId.transaction(requires_new: true) do + subject.transaction(requires_new: true) do InternalId.create!( **scope, usage: usage_value, diff --git a/app/models/project.rb b/app/models/project.rb index b509120ff83..da77479fe1f 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1585,6 +1585,13 @@ class Project < ActiveRecord::Base def after_import repository.after_import wiki.repository.after_import + + # The import assigns iid values on its own, e.g. by re-using GitHub ids. + # Flush existing InternalId records for this project for consistency reasons. + # Those records are going to be recreated with the next normal creation + # of a model instance (e.g. an Issue). + InternalId.flush_records!(project: self) + import_state.finish import_state.remove_jid update_project_counter_caches diff --git a/changelogs/unreleased/ab-54270-github-iid.yml b/changelogs/unreleased/ab-54270-github-iid.yml new file mode 100644 index 00000000000..1776b0aeb86 --- /dev/null +++ b/changelogs/unreleased/ab-54270-github-iid.yml @@ -0,0 +1,5 @@ +--- +title: Improve efficiency of GitHub importer by reducing amount of locks needed. +merge_request: 24102 +author: +type: performance diff --git a/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb new file mode 100644 index 00000000000..ddcddcf72a3 --- /dev/null +++ b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true +class DeleteInconsistentInternalIdRecords2 < ActiveRecord::Migration[5.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + # This migration cleans up any inconsistent records in internal_ids. + # + # That is, it deletes records that track a `last_value` that is + # smaller than the maximum internal id (usually `iid`) found in + # the corresponding model records. + + def up + disable_statement_timeout do + delete_internal_id_records('milestones', 'project_id') + delete_internal_id_records('milestones', 'namespace_id', 'group_id') + end + end + + class InternalId < ActiveRecord::Base + self.table_name = 'internal_ids' + enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 } + end + + private + + def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name) + sql = <<~SQL + SELECT id FROM ( -- workaround for MySQL + SELECT internal_ids.id FROM ( + SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name} + ) maxima JOIN internal_ids USING (#{scope_column_name}) + WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value + ) internal_ids + SQL + + InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection + say "Deleting internal_id records for #{base_table}: #{ids.map { |i| [i.project_id, i.last_value] }}" unless ids.empty? + end.delete_all + end +end diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index da2f96b5c4b..147597289cf 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -15,12 +15,10 @@ module Gitlab end # Bulk inserts the given rows into the database. - def bulk_insert(model, rows, batch_size: 100, pre_hook: nil) + def bulk_insert(model, rows, batch_size: 100) rows.each_slice(batch_size) do |slice| - pre_hook.call(slice) if pre_hook Gitlab::Database.bulk_insert(model.table_name, slice) end - rows end end end diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index 4226eee85cc..656d46b6a7d 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -57,11 +57,7 @@ module Gitlab updated_at: issue.updated_at } - insert_and_return_id(attributes, project.issues).tap do |id| - # We use .insert_and_return_id which effectively disables all callbacks. - # Trigger iid logic here to make sure we track internal id values consistently. - project.issues.find(id).ensure_project_iid! - end + insert_and_return_id(attributes, project.issues) rescue ActiveRecord::InvalidForeignKey # It's possible the project has been deleted since scheduling this # job. In this case we'll just skip creating the issue. diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index 8d54b27374c..87cf2c8b598 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -19,20 +19,10 @@ module Gitlab # rubocop: enable CodeReuse/ActiveRecord def execute - # We insert records in bulk, by-passing any standard model callbacks. - # The pre_hook here makes sure we track internal ids consistently. - # Note this has to be called before performing an insert of a batch - # because we're outside a transaction scope here. - bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid)) + bulk_insert(Milestone, build_milestones) build_milestones_cache end - def track_greatest_iid(slice) - greatest_iid = slice.max { |e| e[:iid] }[:iid] - - InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) }) - end - def build_milestones build_database_rows(each_milestone) end diff --git a/lib/gitlab/import/merge_request_helpers.rb b/lib/gitlab/import/merge_request_helpers.rb index 9215067d973..fa3ff6c3f12 100644 --- a/lib/gitlab/import/merge_request_helpers.rb +++ b/lib/gitlab/import/merge_request_helpers.rb @@ -24,10 +24,6 @@ module Gitlab merge_request = project.merge_requests.reload.find(merge_request_id) - # We use .insert_and_return_id which effectively disables all callbacks. - # Trigger iid logic here to make sure we track internal id values consistently. - merge_request.ensure_target_project_iid! - [merge_request, false] end rescue ActiveRecord::InvalidForeignKey diff --git a/spec/lib/gitlab/github_import/bulk_importing_spec.rb b/spec/lib/gitlab/github_import/bulk_importing_spec.rb index 861710f7e9b..91229d9c7d4 100644 --- a/spec/lib/gitlab/github_import/bulk_importing_spec.rb +++ b/spec/lib/gitlab/github_import/bulk_importing_spec.rb @@ -58,17 +58,5 @@ describe Gitlab::GithubImport::BulkImporting do importer.bulk_insert(model, rows, batch_size: 5) end - - it 'calls pre_hook for each slice if given' do - rows = [{ title: 'Foo' }] * 10 - model = double(:model, table_name: 'kittens') - pre_hook = double('pre_hook', call: nil) - allow(Gitlab::Database).to receive(:bulk_insert) - - expect(pre_hook).to receive(:call).with(rows[0..4]) - expect(pre_hook).to receive(:call).with(rows[5..9]) - - importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook) - end end end diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb index 65a2e1cb5cb..7901ae005d9 100644 --- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb @@ -78,11 +78,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach .to receive(:id_for) .with(issue) .and_return(milestone.id) - - allow(importer.user_finder) - .to receive(:author_id_for) - .with(issue) - .and_return([user.id, true]) end context 'when the issue author could be found' do @@ -177,23 +172,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach expect(importer.create_issue).to be_a_kind_of(Numeric) end - - it 'triggers internal_id functionality to track greatest iids' do - allow(importer.user_finder) - .to receive(:author_id_for) - .with(issue) - .and_return([user.id, true]) - - issue = build_stubbed(:issue, project: project) - allow(importer) - .to receive(:insert_and_return_id) - .and_return(issue.id) - allow(project.issues).to receive(:find).with(issue.id).and_return(issue) - - expect(issue).to receive(:ensure_project_iid!) - - importer.create_issue - end end describe '#create_assignees' do diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb index db0be760c7b..b1cac3b6e46 100644 --- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb @@ -29,25 +29,13 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis expect(importer) .to receive(:bulk_insert) - .with(Milestone, [milestone_hash], any_args) + .with(Milestone, [milestone_hash]) expect(importer) .to receive(:build_milestones_cache) importer.execute end - - it 'tracks internal ids' do - milestone_hash = { iid: 1, title: '1.0', project_id: project.id } - allow(importer) - .to receive(:build_milestones) - .and_return([milestone_hash]) - - expect(InternalId).to receive(:track_greatest) - .with(nil, { project: project }, :milestones, 1, any_args) - - importer.execute - end end describe '#build_milestones' do diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb index 25684ea9e2c..0f21b8843b6 100644 --- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb @@ -111,16 +111,6 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi expect(mr).to be_instance_of(MergeRequest) expect(exists).to eq(false) end - - it 'triggers internal_id functionality to track greatest iids' do - mr = build_stubbed(:merge_request, source_project: project, target_project: project) - allow(importer).to receive(:insert_and_return_id).and_return(mr.id) - allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr) - - expect(mr).to receive(:ensure_target_project_iid!) - - importer.create_merge_request - end end context 'when the author could not be found' do diff --git a/spec/models/internal_id_spec.rb b/spec/models/internal_id_spec.rb index 4696341c05f..d32f163f05b 100644 --- a/spec/models/internal_id_spec.rb +++ b/spec/models/internal_id_spec.rb @@ -13,6 +13,29 @@ describe InternalId do it { is_expected.to validate_presence_of(:usage) } end + describe '.flush_records!' do + subject { described_class.flush_records!(project: project) } + + let(:another_project) { create(:project) } + + before do + create_list(:issue, 2, project: project) + create_list(:issue, 2, project: another_project) + end + + it 'deletes all records for the given project' do + expect { subject }.to change { described_class.where(project: project).count }.from(1).to(0) + end + + it 'retains records for other projects' do + expect { subject }.not_to change { described_class.where(project: another_project).count } + end + + it 'does not allow an empty filter' do + expect { described_class.flush_records!({}) }.to raise_error(/filter cannot be empty/) + end + end + describe '.generate_next' do subject { described_class.generate_next(issue, scope, usage, init) } diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 4b061b5e24f..7d3f2dfe374 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -3767,6 +3767,7 @@ describe Project do expect(import_state).to receive(:remove_jid) expect(project).to receive(:after_create_default_branch) expect(project).to receive(:refresh_markdown_cache!) + expect(InternalId).to receive(:flush_records!).with(project: project) project.after_import end