From 852e68fd952b18ba32b87c165bb17c370965d756 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 2 Jan 2019 16:05:40 +0100 Subject: [PATCH 1/6] Revert "Trigger iid logic from GitHub importer for milestones." This reverts commit 358675d09f6ba0fdcc4a089c6d1da6df9ff6d092. --- app/models/internal_id.rb | 6 +++--- lib/gitlab/github_import/bulk_importing.rb | 4 +--- .../github_import/importer/milestones_importer.rb | 12 +----------- .../gitlab/github_import/bulk_importing_spec.rb | 12 ------------ .../importer/milestones_importer_spec.rb | 14 +------------- 5 files changed, 6 insertions(+), 42 deletions(-) diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb index e7168d49db9..4eb211eff61 100644 --- a/app/models/internal_id.rb +++ b/app/models/internal_id.rb @@ -111,7 +111,7 @@ class InternalId < ActiveRecord::Base # Generates next internal id and returns it def generate - InternalId.transaction do + subject.transaction do # Create a record in internal_ids if one does not yet exist # and increment its last value # @@ -125,7 +125,7 @@ class InternalId < ActiveRecord::Base # # Note this will acquire a ROW SHARE lock on the InternalId record def track_greatest(new_value) - InternalId.transaction do + subject.transaction do (lookup || create_record).track_greatest_and_save!(new_value) end end @@ -148,7 +148,7 @@ class InternalId < ActiveRecord::Base # violation. We can safely roll-back the nested transaction and perform # a lookup instead to retrieve the record. def create_record - InternalId.transaction(requires_new: true) do + subject.transaction(requires_new: true) do InternalId.create!( **scope, usage: usage_value, diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb index da2f96b5c4b..147597289cf 100644 --- a/lib/gitlab/github_import/bulk_importing.rb +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -15,12 +15,10 @@ module Gitlab end # Bulk inserts the given rows into the database. - def bulk_insert(model, rows, batch_size: 100, pre_hook: nil) + def bulk_insert(model, rows, batch_size: 100) rows.each_slice(batch_size) do |slice| - pre_hook.call(slice) if pre_hook Gitlab::Database.bulk_insert(model.table_name, slice) end - rows end end end diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index 8d54b27374c..87cf2c8b598 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -19,20 +19,10 @@ module Gitlab # rubocop: enable CodeReuse/ActiveRecord def execute - # We insert records in bulk, by-passing any standard model callbacks. - # The pre_hook here makes sure we track internal ids consistently. - # Note this has to be called before performing an insert of a batch - # because we're outside a transaction scope here. - bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid)) + bulk_insert(Milestone, build_milestones) build_milestones_cache end - def track_greatest_iid(slice) - greatest_iid = slice.max { |e| e[:iid] }[:iid] - - InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) }) - end - def build_milestones build_database_rows(each_milestone) end diff --git a/spec/lib/gitlab/github_import/bulk_importing_spec.rb b/spec/lib/gitlab/github_import/bulk_importing_spec.rb index 861710f7e9b..91229d9c7d4 100644 --- a/spec/lib/gitlab/github_import/bulk_importing_spec.rb +++ b/spec/lib/gitlab/github_import/bulk_importing_spec.rb @@ -58,17 +58,5 @@ describe Gitlab::GithubImport::BulkImporting do importer.bulk_insert(model, rows, batch_size: 5) end - - it 'calls pre_hook for each slice if given' do - rows = [{ title: 'Foo' }] * 10 - model = double(:model, table_name: 'kittens') - pre_hook = double('pre_hook', call: nil) - allow(Gitlab::Database).to receive(:bulk_insert) - - expect(pre_hook).to receive(:call).with(rows[0..4]) - expect(pre_hook).to receive(:call).with(rows[5..9]) - - importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook) - end end end diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb index db0be760c7b..b1cac3b6e46 100644 --- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb @@ -29,25 +29,13 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis expect(importer) .to receive(:bulk_insert) - .with(Milestone, [milestone_hash], any_args) + .with(Milestone, [milestone_hash]) expect(importer) .to receive(:build_milestones_cache) importer.execute end - - it 'tracks internal ids' do - milestone_hash = { iid: 1, title: '1.0', project_id: project.id } - allow(importer) - .to receive(:build_milestones) - .and_return([milestone_hash]) - - expect(InternalId).to receive(:track_greatest) - .with(nil, { project: project }, :milestones, 1, any_args) - - importer.execute - end end describe '#build_milestones' do From 820739eb09a0182054a7713271a70b912c4a7e27 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 2 Jan 2019 16:16:34 +0100 Subject: [PATCH 2/6] Revert "Trigger iid logic from GitHub importer for issues." This reverts commit b78a69b06c165f7a463d5e0de69030346d9d5c72. --- .../github_import/importer/issue_importer.rb | 6 +---- .../importer/issue_importer_spec.rb | 22 ------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index 4226eee85cc..656d46b6a7d 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -57,11 +57,7 @@ module Gitlab updated_at: issue.updated_at } - insert_and_return_id(attributes, project.issues).tap do |id| - # We use .insert_and_return_id which effectively disables all callbacks. - # Trigger iid logic here to make sure we track internal id values consistently. - project.issues.find(id).ensure_project_iid! - end + insert_and_return_id(attributes, project.issues) rescue ActiveRecord::InvalidForeignKey # It's possible the project has been deleted since scheduling this # job. In this case we'll just skip creating the issue. diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb index 65a2e1cb5cb..7901ae005d9 100644 --- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb @@ -78,11 +78,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach .to receive(:id_for) .with(issue) .and_return(milestone.id) - - allow(importer.user_finder) - .to receive(:author_id_for) - .with(issue) - .and_return([user.id, true]) end context 'when the issue author could be found' do @@ -177,23 +172,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach expect(importer.create_issue).to be_a_kind_of(Numeric) end - - it 'triggers internal_id functionality to track greatest iids' do - allow(importer.user_finder) - .to receive(:author_id_for) - .with(issue) - .and_return([user.id, true]) - - issue = build_stubbed(:issue, project: project) - allow(importer) - .to receive(:insert_and_return_id) - .and_return(issue.id) - allow(project.issues).to receive(:find).with(issue.id).and_return(issue) - - expect(issue).to receive(:ensure_project_iid!) - - importer.create_issue - end end describe '#create_assignees' do From 4ac06d344b8587ec3abdf2920f838babf7c6c324 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 2 Jan 2019 16:21:17 +0100 Subject: [PATCH 3/6] Revert " Trigger iid logic from GitHub importer for merge requests." This reverts commit fb98496f49bbb324b808523ea97f0844682fe1ac. --- lib/gitlab/import/merge_request_helpers.rb | 4 ---- .../importer/pull_request_importer_spec.rb | 10 ---------- 2 files changed, 14 deletions(-) diff --git a/lib/gitlab/import/merge_request_helpers.rb b/lib/gitlab/import/merge_request_helpers.rb index 9215067d973..fa3ff6c3f12 100644 --- a/lib/gitlab/import/merge_request_helpers.rb +++ b/lib/gitlab/import/merge_request_helpers.rb @@ -24,10 +24,6 @@ module Gitlab merge_request = project.merge_requests.reload.find(merge_request_id) - # We use .insert_and_return_id which effectively disables all callbacks. - # Trigger iid logic here to make sure we track internal id values consistently. - merge_request.ensure_target_project_iid! - [merge_request, false] end rescue ActiveRecord::InvalidForeignKey diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb index 25684ea9e2c..0f21b8843b6 100644 --- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb @@ -111,16 +111,6 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi expect(mr).to be_instance_of(MergeRequest) expect(exists).to eq(false) end - - it 'triggers internal_id functionality to track greatest iids' do - mr = build_stubbed(:merge_request, source_project: project, target_project: project) - allow(importer).to receive(:insert_and_return_id).and_return(mr.id) - allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr) - - expect(mr).to receive(:ensure_target_project_iid!) - - importer.create_merge_request - end end context 'when the author could not be found' do From 13f5f7e64de8016bafc6c897a4dc405250f2f63d Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 2 Jan 2019 16:25:01 +0100 Subject: [PATCH 4/6] Add migration to cleanup iid records --- ...elete_inconsistent_internal_id_records2.rb | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb diff --git a/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb new file mode 100644 index 00000000000..ddcddcf72a3 --- /dev/null +++ b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true +class DeleteInconsistentInternalIdRecords2 < ActiveRecord::Migration[5.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + # This migration cleans up any inconsistent records in internal_ids. + # + # That is, it deletes records that track a `last_value` that is + # smaller than the maximum internal id (usually `iid`) found in + # the corresponding model records. + + def up + disable_statement_timeout do + delete_internal_id_records('milestones', 'project_id') + delete_internal_id_records('milestones', 'namespace_id', 'group_id') + end + end + + class InternalId < ActiveRecord::Base + self.table_name = 'internal_ids' + enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 } + end + + private + + def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name) + sql = <<~SQL + SELECT id FROM ( -- workaround for MySQL + SELECT internal_ids.id FROM ( + SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name} + ) maxima JOIN internal_ids USING (#{scope_column_name}) + WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value + ) internal_ids + SQL + + InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection + say "Deleting internal_id records for #{base_table}: #{ids.map { |i| [i.project_id, i.last_value] }}" unless ids.empty? + end.delete_all + end +end From fede3a0b75e5db720bd4684faca5626d4ef109d2 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Wed, 2 Jan 2019 16:50:37 +0100 Subject: [PATCH 5/6] Flush InternalId records after import After the import has finished, we flush (delete) the InternalId records related to the project at hand. This means we're starting over with tracking correct internal id values, a new record will be created automatically when the next internal id is generated. The GitHub importer assigns iid values by using supplied values from GitHub. We skip tracking internal id values during the import in favor of higher concurrency. Deleting any InternalId records after the import has finished is an extra measure to guarantee consistency. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/54270. --- app/models/internal_id.rb | 11 +++++++++++ app/models/project.rb | 7 +++++++ spec/models/internal_id_spec.rb | 23 +++++++++++++++++++++++ spec/models/project_spec.rb | 1 + 4 files changed, 42 insertions(+) diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb index 4eb211eff61..e75c6eb2331 100644 --- a/app/models/internal_id.rb +++ b/app/models/internal_id.rb @@ -66,6 +66,17 @@ class InternalId < ActiveRecord::Base InternalIdGenerator.new(subject, scope, usage, init).generate end + # Flushing records is generally safe in a sense that those + # records are going to be re-created when needed. + # + # A filter condition has to be provided to not accidentally flush + # records for all projects. + def flush_records!(filter) + raise ArgumentError, "filter cannot be empty" if filter.blank? + + where(filter).delete_all + end + def available? @available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization end diff --git a/app/models/project.rb b/app/models/project.rb index 15465d9b356..7f6da130658 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1585,6 +1585,13 @@ class Project < ActiveRecord::Base def after_import repository.after_import wiki.repository.after_import + + # The import assigns iid values on its own, e.g. by re-using GitHub ids. + # Flush existing InternalId records for this project for consistency reasons. + # Those records are going to be recreated with the next normal creation + # of a model instance (e.g. an Issue). + InternalId.flush_records!(project: self) + import_state.finish import_state.remove_jid update_project_counter_caches diff --git a/spec/models/internal_id_spec.rb b/spec/models/internal_id_spec.rb index 4696341c05f..d32f163f05b 100644 --- a/spec/models/internal_id_spec.rb +++ b/spec/models/internal_id_spec.rb @@ -13,6 +13,29 @@ describe InternalId do it { is_expected.to validate_presence_of(:usage) } end + describe '.flush_records!' do + subject { described_class.flush_records!(project: project) } + + let(:another_project) { create(:project) } + + before do + create_list(:issue, 2, project: project) + create_list(:issue, 2, project: another_project) + end + + it 'deletes all records for the given project' do + expect { subject }.to change { described_class.where(project: project).count }.from(1).to(0) + end + + it 'retains records for other projects' do + expect { subject }.not_to change { described_class.where(project: another_project).count } + end + + it 'does not allow an empty filter' do + expect { described_class.flush_records!({}) }.to raise_error(/filter cannot be empty/) + end + end + describe '.generate_next' do subject { described_class.generate_next(issue, scope, usage, init) } diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 4b061b5e24f..7d3f2dfe374 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -3767,6 +3767,7 @@ describe Project do expect(import_state).to receive(:remove_jid) expect(project).to receive(:after_create_default_branch) expect(project).to receive(:refresh_markdown_cache!) + expect(InternalId).to receive(:flush_records!).with(project: project) project.after_import end From f82eea8628770ce8451bdaf4819cd3879c163853 Mon Sep 17 00:00:00 2001 From: Andreas Brandl Date: Fri, 11 Jan 2019 15:02:38 +0100 Subject: [PATCH 6/6] Add changelog for performance --- changelogs/unreleased/ab-54270-github-iid.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/ab-54270-github-iid.yml diff --git a/changelogs/unreleased/ab-54270-github-iid.yml b/changelogs/unreleased/ab-54270-github-iid.yml new file mode 100644 index 00000000000..1776b0aeb86 --- /dev/null +++ b/changelogs/unreleased/ab-54270-github-iid.yml @@ -0,0 +1,5 @@ +--- +title: Improve efficiency of GitHub importer by reducing amount of locks needed. +merge_request: 24102 +author: +type: performance