diff --git a/app/models/concerns/atomic_internal_id.rb b/app/models/concerns/atomic_internal_id.rb new file mode 100644 index 00000000000..4b66725a3e6 --- /dev/null +++ b/app/models/concerns/atomic_internal_id.rb @@ -0,0 +1,46 @@ +# Include atomic internal id generation scheme for a model +# +# This allows us to atomically generate internal ids that are +# unique within a given scope. +# +# For example, let's generate internal ids for Issue per Project: +# ``` +# class Issue < ActiveRecord::Base +# has_internal_id :iid, scope: :project, init: ->(s) { s.project.issues.maximum(:iid) } +# end +# ``` +# +# This generates unique internal ids per project for newly created issues. +# The generated internal id is saved in the `iid` attribute of `Issue`. +# +# This concern uses InternalId records to facilitate atomicity. +# In the absence of a record for the given scope, one will be created automatically. +# In this situation, the `init` block is called to calculate the initial value. +# In the example above, we calculate the maximum `iid` of all issues +# within the given project. +# +# Note that a model may have more than one internal id associated with possibly +# different scopes. +module AtomicInternalId + extend ActiveSupport::Concern + + module ClassMethods + def has_internal_id(column, scope:, init:) # rubocop:disable Naming/PredicateName + before_validation(on: :create) do + if read_attribute(column).blank? + scope_attrs = { scope => association(scope).reader } + usage = self.class.table_name.to_sym + + new_iid = InternalId.generate_next(self, scope_attrs, usage, init) + write_attribute(column, new_iid) + end + end + + validates column, presence: true, numericality: true + end + end + + def to_param + iid.to_s + end +end diff --git a/app/models/concerns/internal_id.rb b/app/models/concerns/nonatomic_internal_id.rb similarity index 94% rename from app/models/concerns/internal_id.rb rename to app/models/concerns/nonatomic_internal_id.rb index 01079fb8bd6..9d0c9b8512f 100644 --- a/app/models/concerns/internal_id.rb +++ b/app/models/concerns/nonatomic_internal_id.rb @@ -1,4 +1,4 @@ -module InternalId +module NonatomicInternalId extend ActiveSupport::Concern included do diff --git a/app/models/deployment.rb b/app/models/deployment.rb index 66e61c06765..e18ea8bfea4 100644 --- a/app/models/deployment.rb +++ b/app/models/deployment.rb @@ -1,5 +1,5 @@ class Deployment < ActiveRecord::Base - include InternalId + include NonatomicInternalId belongs_to :project, required: true belongs_to :environment, required: true diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb new file mode 100644 index 00000000000..cbec735c2dd --- /dev/null +++ b/app/models/internal_id.rb @@ -0,0 +1,125 @@ +# An InternalId is a strictly monotone sequence of integers +# generated for a given scope and usage. +# +# For example, issues use their project to scope internal ids: +# In that sense, scope is "project" and usage is "issues". +# Generated internal ids for an issue are unique per project. +# +# See InternalId#usage enum for available usages. +# +# In order to leverage InternalId for other usages, the idea is to +# * Add `usage` value to enum +# * (Optionally) add columns to `internal_ids` if needed for scope. +class InternalId < ActiveRecord::Base + belongs_to :project + + enum usage: { issues: 0 } + + validates :usage, presence: true + + REQUIRED_SCHEMA_VERSION = 20180305095250 + + # Increments #last_value and saves the record + # + # The operation locks the record and gathers a `ROW SHARE` lock (in PostgreSQL). + # As such, the increment is atomic and safe to be called concurrently. + def increment_and_save! + lock! + self.last_value = (last_value || 0) + 1 + save! + last_value + end + + class << self + def generate_next(subject, scope, usage, init) + # Shortcut if `internal_ids` table is not available (yet) + # This can be the case in other (unrelated) migration specs + return (init.call(subject) || 0) + 1 unless available? + + InternalIdGenerator.new(subject, scope, usage, init).generate + end + + def available? + @available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization + end + + # Flushes cached information about schema + def reset_column_information + @available_flag = nil + super + end + end + + class InternalIdGenerator + # Generate next internal id for a given scope and usage. + # + # For currently supported usages, see #usage enum. + # + # The method implements a locking scheme that has the following properties: + # 1) Generated sequence of internal ids is unique per (scope and usage) + # 2) The method is thread-safe and may be used in concurrent threads/processes. + # 3) The generated sequence is gapless. + # 4) In the absence of a record in the internal_ids table, one will be created + # and last_value will be calculated on the fly. + # + # subject: The instance we're generating an internal id for. Gets passed to init if called. + # scope: Attributes that define the scope for id generation. + # usage: Symbol to define the usage of the internal id, see InternalId.usages + # init: Block that gets called to initialize InternalId record if not present + # Make sure to not throw exceptions in the absence of records (if this is expected). + attr_reader :subject, :scope, :init, :scope_attrs, :usage + + def initialize(subject, scope, usage, init) + @subject = subject + @scope = scope + @init = init + @usage = usage + + raise ArgumentError, 'Scope is not well-defined, need at least one column for scope (given: 0)' if scope.empty? + + unless InternalId.usages.has_key?(usage.to_s) + raise ArgumentError, "Usage '#{usage}' is unknown. Supported values are #{InternalId.usages.keys} from InternalId.usages" + end + end + + # Generates next internal id and returns it + def generate + subject.transaction do + # Create a record in internal_ids if one does not yet exist + # and increment its last value + # + # Note this will acquire a ROW SHARE lock on the InternalId record + (lookup || create_record).increment_and_save! + end + end + + private + + # Retrieve InternalId record for (project, usage) combination, if it exists + def lookup + InternalId.find_by(**scope, usage: usage_value) + end + + def usage_value + @usage_value ||= InternalId.usages[usage.to_s] + end + + # Create InternalId record for (scope, usage) combination, if it doesn't exist + # + # We blindly insert without synchronization. If another process + # was faster in doing this, we'll realize once we hit the unique key constraint + # violation. We can safely roll-back the nested transaction and perform + # a lookup instead to retrieve the record. + def create_record + subject.transaction(requires_new: true) do + InternalId.create!( + **scope, + usage: usage_value, + last_value: init.call(subject) || 0 + ) + end + rescue ActiveRecord::RecordNotUnique + lookup + end + end +end diff --git a/app/models/issue.rb b/app/models/issue.rb index c81f7e52bb1..7bfc45c1f43 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -1,7 +1,7 @@ require 'carrierwave/orm/activerecord' class Issue < ActiveRecord::Base - include InternalId + include AtomicInternalId include Issuable include Noteable include Referable @@ -24,6 +24,8 @@ class Issue < ActiveRecord::Base belongs_to :project belongs_to :moved_to, class_name: 'Issue' + has_internal_id :iid, scope: :project, init: ->(s) { s&.project&.issues&.maximum(:iid) } + has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :merge_requests_closing_issues, diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 149ef7ec429..7e6d89ec9c7 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -1,5 +1,5 @@ class MergeRequest < ActiveRecord::Base - include InternalId + include NonatomicInternalId include Issuable include Noteable include Referable diff --git a/app/models/milestone.rb b/app/models/milestone.rb index 77c19380e66..e7d397f40f5 100644 --- a/app/models/milestone.rb +++ b/app/models/milestone.rb @@ -8,7 +8,7 @@ class Milestone < ActiveRecord::Base Started = MilestoneStruct.new('Started', '#started', -3) include CacheMarkdownField - include InternalId + include NonatomicInternalId include Sortable include Referable include StripAttribute diff --git a/app/models/project.rb b/app/models/project.rb index d6e663f14e4..e5ede967668 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -188,6 +188,8 @@ class Project < ActiveRecord::Base has_many :todos has_many :notification_settings, as: :source, dependent: :delete_all # rubocop:disable Cop/ActiveRecordDependent + has_many :internal_ids + has_one :import_data, class_name: 'ProjectImportData', inverse_of: :project, autosave: true has_one :project_feature, inverse_of: :project has_one :statistics, class_name: 'ProjectStatistics' diff --git a/changelogs/unreleased/31114-internal-ids-are-not-atomic.yml b/changelogs/unreleased/31114-internal-ids-are-not-atomic.yml new file mode 100644 index 00000000000..bc1955bc66f --- /dev/null +++ b/changelogs/unreleased/31114-internal-ids-are-not-atomic.yml @@ -0,0 +1,5 @@ +--- +title: Atomic generation of internal ids for issues. +merge_request: 17580 +author: +type: other diff --git a/config/initializers/ar_native_database_types.rb b/config/initializers/ar_native_database_types.rb new file mode 100644 index 00000000000..3522b1db536 --- /dev/null +++ b/config/initializers/ar_native_database_types.rb @@ -0,0 +1,11 @@ +require 'active_record/connection_adapters/abstract_mysql_adapter' + +module ActiveRecord + module ConnectionAdapters + class AbstractMysqlAdapter + NATIVE_DATABASE_TYPES.merge!( + bigserial: { name: 'bigint(20) auto_increment PRIMARY KEY' } + ) + end + end +end diff --git a/db/migrate/20180305095250_create_internal_ids_table.rb b/db/migrate/20180305095250_create_internal_ids_table.rb new file mode 100644 index 00000000000..432086fe98b --- /dev/null +++ b/db/migrate/20180305095250_create_internal_ids_table.rb @@ -0,0 +1,15 @@ +class CreateInternalIdsTable < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + create_table :internal_ids, id: :bigserial do |t| + t.references :project, null: false, foreign_key: { on_delete: :cascade } + t.integer :usage, null: false + t.integer :last_value, null: false + + t.index [:usage, :project_id], unique: true + end + end +end diff --git a/db/schema.rb b/db/schema.rb index ab4370e2754..3ff1a8754e2 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -866,6 +866,14 @@ ActiveRecord::Schema.define(version: 20180309160427) do add_index "identities", ["user_id"], name: "index_identities_on_user_id", using: :btree + create_table "internal_ids", id: :bigserial, force: :cascade do |t| + t.integer "project_id", null: false + t.integer "usage", null: false + t.integer "last_value", null: false + end + + add_index "internal_ids", ["usage", "project_id"], name: "index_internal_ids_on_usage_and_project_id", unique: true, using: :btree + create_table "issue_assignees", id: false, force: :cascade do |t| t.integer "user_id", null: false t.integer "issue_id", null: false @@ -2058,6 +2066,7 @@ ActiveRecord::Schema.define(version: 20180309160427) do add_foreign_key "gpg_signatures", "gpg_keys", on_delete: :nullify add_foreign_key "gpg_signatures", "projects", on_delete: :cascade add_foreign_key "group_custom_attributes", "namespaces", column: "group_id", on_delete: :cascade + add_foreign_key "internal_ids", "projects", on_delete: :cascade add_foreign_key "issue_assignees", "issues", name: "fk_b7d881734a", on_delete: :cascade add_foreign_key "issue_assignees", "users", name: "fk_5e0c8d9154", on_delete: :cascade add_foreign_key "issue_metrics", "issues", on_delete: :cascade diff --git a/spec/factories/internal_ids.rb b/spec/factories/internal_ids.rb new file mode 100644 index 00000000000..fbde07a391a --- /dev/null +++ b/spec/factories/internal_ids.rb @@ -0,0 +1,7 @@ +FactoryBot.define do + factory :internal_id do + project + usage :issues + last_value { project.issues.maximum(:iid) || 0 } + end +end diff --git a/spec/lib/gitlab/import_export/all_models.yml b/spec/lib/gitlab/import_export/all_models.yml index bece82e531a..a204a8f1ffe 100644 --- a/spec/lib/gitlab/import_export/all_models.yml +++ b/spec/lib/gitlab/import_export/all_models.yml @@ -279,6 +279,7 @@ project: - lfs_file_locks - project_badges - source_of_merge_requests +- internal_ids award_emoji: - awardable - user diff --git a/spec/models/concerns/issuable_spec.rb b/spec/models/concerns/issuable_spec.rb index 4b217df2e8f..f8874d14e3f 100644 --- a/spec/models/concerns/issuable_spec.rb +++ b/spec/models/concerns/issuable_spec.rb @@ -34,7 +34,7 @@ describe Issuable do subject { build(:issue) } before do - allow(subject).to receive(:set_iid).and_return(false) + allow(InternalId).to receive(:generate_next).and_return(nil) end it { is_expected.to validate_presence_of(:project) } diff --git a/spec/models/internal_id_spec.rb b/spec/models/internal_id_spec.rb new file mode 100644 index 00000000000..581fd0293cc --- /dev/null +++ b/spec/models/internal_id_spec.rb @@ -0,0 +1,106 @@ +require 'spec_helper' + +describe InternalId do + let(:project) { create(:project) } + let(:usage) { :issues } + let(:issue) { build(:issue, project: project) } + let(:scope) { { project: project } } + let(:init) { ->(s) { s.project.issues.size } } + + context 'validations' do + it { is_expected.to validate_presence_of(:usage) } + end + + describe '.generate_next' do + subject { described_class.generate_next(issue, scope, usage, init) } + + context 'in the absence of a record' do + it 'creates a record if not yet present' do + expect { subject }.to change { described_class.count }.from(0).to(1) + end + + it 'stores record attributes' do + subject + + described_class.first.tap do |record| + expect(record.project).to eq(project) + expect(record.usage).to eq(usage.to_s) + end + end + + context 'with existing issues' do + before do + rand(1..10).times { create(:issue, project: project) } + described_class.delete_all + end + + it 'calculates last_value values automatically' do + expect(subject).to eq(project.issues.size + 1) + end + end + + context 'with concurrent inserts on table' do + it 'looks up the record if it was created concurrently' do + args = { **scope, usage: described_class.usages[usage.to_s] } + record = double + expect(described_class).to receive(:find_by).with(args).and_return(nil) # first call, record not present + expect(described_class).to receive(:find_by).with(args).and_return(record) # second call, record was created by another process + expect(described_class).to receive(:create!).and_raise(ActiveRecord::RecordNotUnique, 'record not unique') + expect(record).to receive(:increment_and_save!) + + subject + end + end + end + + it 'generates a strictly monotone, gapless sequence' do + seq = (0..rand(100)).map do + described_class.generate_next(issue, scope, usage, init) + end + normalized = seq.map { |i| i - seq.min } + + expect(normalized).to eq((0..seq.size - 1).to_a) + end + + context 'with an insufficient schema version' do + before do + described_class.reset_column_information + expect(ActiveRecord::Migrator).to receive(:current_version).and_return(InternalId::REQUIRED_SCHEMA_VERSION - 1) + end + + let(:init) { double('block') } + + it 'calculates next internal ids on the fly' do + val = rand(1..100) + + expect(init).to receive(:call).with(issue).and_return(val) + expect(subject).to eq(val + 1) + end + end + end + + describe '#increment_and_save!' do + let(:id) { create(:internal_id) } + subject { id.increment_and_save! } + + it 'returns incremented iid' do + value = id.last_value + + expect(subject).to eq(value + 1) + end + + it 'saves the record' do + subject + + expect(id.changed?).to be_falsey + end + + context 'with last_value=nil' do + let(:id) { build(:internal_id, last_value: nil) } + + it 'returns 1' do + expect(subject).to eq(1) + end + end + end +end diff --git a/spec/models/issue_spec.rb b/spec/models/issue_spec.rb index feed7968f09..11154291368 100644 --- a/spec/models/issue_spec.rb +++ b/spec/models/issue_spec.rb @@ -9,11 +9,17 @@ describe Issue do describe 'modules' do subject { described_class } - it { is_expected.to include_module(InternalId) } it { is_expected.to include_module(Issuable) } it { is_expected.to include_module(Referable) } it { is_expected.to include_module(Sortable) } it { is_expected.to include_module(Taskable) } + + it_behaves_like 'AtomicInternalId' do + let(:internal_id_attribute) { :iid } + let(:instance) { build(:issue) } + let(:scope_attrs) { { project: instance.project } } + let(:usage) { :issues } + end end subject { create(:issue) } diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 4e783acbd8b..ff5a6f63010 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -17,7 +17,7 @@ describe MergeRequest do describe 'modules' do subject { described_class } - it { is_expected.to include_module(InternalId) } + it { is_expected.to include_module(NonatomicInternalId) } it { is_expected.to include_module(Issuable) } it { is_expected.to include_module(Referable) } it { is_expected.to include_module(Sortable) } diff --git a/spec/support/shared_examples/models/atomic_internal_id_spec.rb b/spec/support/shared_examples/models/atomic_internal_id_spec.rb new file mode 100644 index 00000000000..144af4fc475 --- /dev/null +++ b/spec/support/shared_examples/models/atomic_internal_id_spec.rb @@ -0,0 +1,40 @@ +require 'spec_helper' + +shared_examples_for 'AtomicInternalId' do + describe '.has_internal_id' do + describe 'Module inclusion' do + subject { described_class } + + it { is_expected.to include_module(AtomicInternalId) } + end + + describe 'Validation' do + subject { instance } + + before do + allow(InternalId).to receive(:generate_next).and_return(nil) + end + + it { is_expected.to validate_presence_of(internal_id_attribute) } + it { is_expected.to validate_numericality_of(internal_id_attribute) } + end + + describe 'internal id generation' do + subject { instance.save! } + + it 'calls InternalId.generate_next and sets internal id attribute' do + iid = rand(1..1000) + + expect(InternalId).to receive(:generate_next).with(instance, scope_attrs, usage, any_args).and_return(iid) + subject + expect(instance.public_send(internal_id_attribute)).to eq(iid) + end + + it 'does not overwrite an existing internal id' do + instance.public_send("#{internal_id_attribute}=", 4711) + + expect { subject }.not_to change { instance.public_send(internal_id_attribute) } + end + end + end +end