diff --git a/app/models/concerns/spammable.rb b/app/models/concerns/spammable.rb index 2daea388939..4901cd832ff 100644 --- a/app/models/concerns/spammable.rb +++ b/app/models/concerns/spammable.rb @@ -111,7 +111,7 @@ module Spammable end # Override in Spammable if further checks are necessary - def check_for_spam? + def check_for_spam?(user:) true end diff --git a/app/models/issue.rb b/app/models/issue.rb index 00fcba5298a..d1ef84a8a97 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -437,10 +437,10 @@ class Issue < ApplicationRecord user, project.external_authorization_classification_label) end - def check_for_spam? + def check_for_spam?(user:) # content created via support bots is always checked for spam, EVEN if # the issue is not publicly visible and/or confidential - return true if author.support_bot? && spammable_attribute_changed? + return true if user.support_bot? && spammable_attribute_changed? # Only check for spam on issues which are publicly visible (and thus indexed in search engines) return false unless publicly_visible? diff --git a/app/models/snippet.rb b/app/models/snippet.rb index 68957dd6b22..dd76f2c3c84 100644 --- a/app/models/snippet.rb +++ b/app/models/snippet.rb @@ -246,7 +246,7 @@ class Snippet < ApplicationRecord notes.includes(:author) end - def check_for_spam? + def check_for_spam?(user:) visibility_level_changed?(to: Snippet::PUBLIC) || (public? && (title_changed? || content_changed?)) end diff --git a/app/services/spam/spam_action_service.rb b/app/services/spam/spam_action_service.rb index ec16ce19cf6..2a28b66f09b 100644 --- a/app/services/spam/spam_action_service.rb +++ b/app/services/spam/spam_action_service.rb @@ -28,7 +28,7 @@ module Spam ServiceResponse.success(message: "CAPTCHA successfully verified") else return ServiceResponse.success(message: 'Skipped spam check because user was allowlisted') if allowlisted?(user) - return ServiceResponse.success(message: 'Skipped spam check because it was not required') unless check_for_spam? + return ServiceResponse.success(message: 'Skipped spam check because it was not required') unless check_for_spam?(user: user) perform_spam_service_check ServiceResponse.success(message: "Spam check performed. Check #{target.class.name} spammable model for any errors or CAPTCHA requirement") @@ -94,7 +94,7 @@ module Spam def create_spam_log @spam_log = SpamLog.create!( { - user_id: target.author_id, + user_id: user.id, title: target.spam_title, description: target.spam_description, source_ip: spam_params.ip_address, diff --git a/doc/development/background_migrations.md b/doc/development/background_migrations.md index 534621caf8f..4e91c4a0e25 100644 --- a/doc/development/background_migrations.md +++ b/doc/development/background_migrations.md @@ -174,13 +174,18 @@ roughly be as follows: 1. Release B: 1. Deploy code so that the application starts using the new column and stops scheduling jobs for newly created data. - 1. In a post-deployment migration you'll need to ensure no jobs remain. - 1. Use `Gitlab::BackgroundMigration.steal` to process any remaining - jobs in Sidekiq. - 1. Reschedule the migration to be run directly (i.e. not through Sidekiq) - on any rows that weren't migrated by Sidekiq. This can happen if, for - instance, Sidekiq received a SIGKILL, or if a particular batch failed - enough times to be marked as dead. + 1. In a post-deployment migration use `finalize_background_migration` from + `BackgroundMigrationHelpers` to ensure no jobs remain. This helper will: + 1. Use `Gitlab::BackgroundMigration.steal` to process any remaining + jobs in Sidekiq. + 1. Reschedule the migration to be run directly (i.e. not through Sidekiq) + on any rows that weren't migrated by Sidekiq. This can happen if, for + instance, Sidekiq received a SIGKILL, or if a particular batch failed + enough times to be marked as dead. + 1. Remove `Gitlab::Database::BackgroundMigrationJob` rows where + `status = succeeded`. To retain diagnostic information that may + help with future bug tracking you can skip this step by specifying + the `delete_tracking_jobs: false` parameter. 1. Remove the old column. This may also require a bump to the [import/export version](../user/project/settings/import_export.md), if diff --git a/lib/gitlab/database/migrations/background_migration_helpers.rb b/lib/gitlab/database/migrations/background_migration_helpers.rb index 28491a934a0..19d80ba1d64 100644 --- a/lib/gitlab/database/migrations/background_migration_helpers.rb +++ b/lib/gitlab/database/migrations/background_migration_helpers.rb @@ -264,6 +264,34 @@ module Gitlab migration end + # Force a background migration to complete. + # + # WARNING: This method will block the caller and move the background migration from an + # asynchronous migration to a synchronous migration. + # + # 1. Steal work from sidekiq and perform immediately (avoid duplicates generated by step 2). + # 2. Process any pending tracked jobs. + # 3. Steal work from sidekiq and perform immediately (clear anything left from step 2). + # 4. Optionally remove job tracking information. + # + # This method does not garauntee that all jobs completed successfully. + def finalize_background_migration(class_name, delete_tracking_jobs: ['succeeded']) + # Empty the sidekiq queue. + Gitlab::BackgroundMigration.steal(class_name) + + # Process pending tracked jobs. + jobs = Gitlab::Database::BackgroundMigrationJob.pending.for_migration_class(class_name) + jobs.find_each do |job| + BackgroundMigrationWorker.new.perform(job.class_name, job.arguments) + end + + # Empty the sidekiq queue. + Gitlab::BackgroundMigration.steal(class_name) + + # Delete job tracking rows. + delete_job_tracking(class_name, status: delete_tracking_jobs) if delete_tracking_jobs + end + def perform_background_migration_inline? Rails.env.test? || Rails.env.development? end @@ -304,6 +332,12 @@ module Gitlab end end + def delete_job_tracking(class_name, status: 'succeeded') + status = Array(status).map { |s| Gitlab::Database::BackgroundMigrationJob.statuses[s] } + jobs = Gitlab::Database::BackgroundMigrationJob.where(status: status).for_migration_class(class_name) + jobs.each_batch { |batch| batch.delete_all } + end + private def track_in_database(class_name, arguments) diff --git a/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb b/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb index e096e7f6e91..1a7116e75e5 100644 --- a/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migrations/background_migration_helpers_spec.rb @@ -581,4 +581,101 @@ RSpec.describe Gitlab::Database::Migrations::BackgroundMigrationHelpers do model.delete_queued_jobs('BackgroundMigrationClassName') end end + + describe '#finalized_background_migration' do + include_context 'background migration job class' + + let!(:tracked_pending_job) { create(:background_migration_job, class_name: job_class_name, status: :pending, arguments: [1]) } + let!(:tracked_successful_job) { create(:background_migration_job, class_name: job_class_name, status: :succeeded, arguments: [2]) } + + before do + Sidekiq::Testing.disable! do + BackgroundMigrationWorker.perform_async(job_class_name, [1, 2]) + BackgroundMigrationWorker.perform_async(job_class_name, [3, 4]) + BackgroundMigrationWorker.perform_in(10, job_class_name, [5, 6]) + BackgroundMigrationWorker.perform_in(20, job_class_name, [7, 8]) + end + end + + it_behaves_like 'finalized tracked background migration' do + before do + model.finalize_background_migration(job_class_name) + end + end + + context 'when removing all tracked job records' do + # Force pending jobs to remain pending. + let!(:job_perform_method) { ->(*arguments) { } } + + before do + model.finalize_background_migration(job_class_name, delete_tracking_jobs: %w[pending succeeded]) + end + + it_behaves_like 'finalized tracked background migration' + it_behaves_like 'removed tracked jobs', 'pending' + it_behaves_like 'removed tracked jobs', 'succeeded' + end + + context 'when retaining all tracked job records' do + before do + model.finalize_background_migration(job_class_name, delete_tracking_jobs: false) + end + + it_behaves_like 'finalized background migration' + include_examples 'retained tracked jobs', 'succeeded' + end + + context 'during retry race condition' do + let(:queue_items_added) { [] } + let!(:job_perform_method) do + ->(*arguments) do + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + RSpec.current_example.example_group_instance.job_class_name, + arguments + ) + + # Mock another process pushing queue jobs. + queue_items_added = RSpec.current_example.example_group_instance.queue_items_added + if queue_items_added.count < 10 + Sidekiq::Testing.disable! do + job_class_name = RSpec.current_example.example_group_instance.job_class_name + queue_items_added << BackgroundMigrationWorker.perform_async(job_class_name, [Time.current]) + queue_items_added << BackgroundMigrationWorker.perform_in(10, job_class_name, [Time.current]) + end + end + end + end + + it_behaves_like 'finalized tracked background migration' do + before do + model.finalize_background_migration(job_class_name, delete_tracking_jobs: ['succeeded']) + end + end + end + end + + describe '#delete_job_tracking' do + let!(:job_class_name) { 'TestJob' } + + let!(:tracked_pending_job) { create(:background_migration_job, class_name: job_class_name, status: :pending, arguments: [1]) } + let!(:tracked_successful_job) { create(:background_migration_job, class_name: job_class_name, status: :succeeded, arguments: [2]) } + + context 'with default status' do + before do + model.delete_job_tracking(job_class_name) + end + + include_examples 'retained tracked jobs', 'pending' + include_examples 'removed tracked jobs', 'succeeded' + end + + context 'with explicit status' do + before do + model.delete_job_tracking(job_class_name, status: %w[pending succeeded]) + end + + include_examples 'removed tracked jobs', 'pending' + include_examples 'removed tracked jobs', 'succeeded' + end + end end diff --git a/spec/models/concerns/spammable_spec.rb b/spec/models/concerns/spammable_spec.rb index 3c5f3b2d2ad..5edaab56e2d 100644 --- a/spec/models/concerns/spammable_spec.rb +++ b/spec/models/concerns/spammable_spec.rb @@ -28,11 +28,11 @@ RSpec.describe Spammable do it 'returns true for public project' do issue.project.update_attribute(:visibility_level, Gitlab::VisibilityLevel::PUBLIC) - expect(issue.check_for_spam?).to eq(true) + expect(issue.check_for_spam?(user: issue.author)).to eq(true) end it 'returns false for other visibility levels' do - expect(issue.check_for_spam?).to eq(false) + expect(issue.check_for_spam?(user: issue.author)).to eq(false) end end diff --git a/spec/models/issue_spec.rb b/spec/models/issue_spec.rb index 441446bae60..22d34e7a88c 100644 --- a/spec/models/issue_spec.rb +++ b/spec/models/issue_spec.rb @@ -1112,14 +1112,14 @@ RSpec.describe Issue do with_them do it 'checks for spam when necessary' do - author = support_bot? ? support_bot : user + active_user = support_bot? ? support_bot : user project = reusable_project project.update!(visibility_level: visibility_level) - issue = create(:issue, project: project, confidential: confidential, description: 'original description', author: author) + issue = create(:issue, project: project, confidential: confidential, description: 'original description', author: support_bot) issue.assign_attributes(new_attributes) - expect(issue.check_for_spam?).to eq(check_for_spam?) + expect(issue.check_for_spam?(user: active_user)).to eq(check_for_spam?) end end end diff --git a/spec/models/snippet_spec.rb b/spec/models/snippet_spec.rb index 19d3895177f..4e20a83f18e 100644 --- a/spec/models/snippet_spec.rb +++ b/spec/models/snippet_spec.rb @@ -431,7 +431,7 @@ RSpec.describe Snippet do subject do snippet.assign_attributes(title: title) - snippet.check_for_spam? + snippet.check_for_spam?(user: snippet.author) end context 'when public and spammable attributes changed' do @@ -455,7 +455,7 @@ RSpec.describe Snippet do snippet.save! snippet.visibility_level = Snippet::PUBLIC - expect(snippet.check_for_spam?).to be_truthy + expect(snippet.check_for_spam?(user: snippet.author)).to be_truthy end end diff --git a/spec/services/spam/spam_action_service_spec.rb b/spec/services/spam/spam_action_service_spec.rb index 3a92e5acb5a..8ddfa7ed3a0 100644 --- a/spec/services/spam/spam_action_service_spec.rb +++ b/spec/services/spam/spam_action_service_spec.rb @@ -5,7 +5,7 @@ require 'spec_helper' RSpec.describe Spam::SpamActionService do include_context 'includes Spam constants' - let(:issue) { create(:issue, project: project, author: user) } + let(:issue) { create(:issue, project: project, author: author) } let(:fake_ip) { '1.2.3.4' } let(:fake_user_agent) { 'fake-user-agent' } let(:fake_referer) { 'fake-http-referer' } @@ -23,6 +23,7 @@ RSpec.describe Spam::SpamActionService do let_it_be(:project) { create(:project, :public) } let_it_be(:user) { create(:user) } + let_it_be(:author) { create(:user) } before do issue.spam = false diff --git a/spec/support/shared_contexts/lib/gitlab/database/background_migration_job_shared_context.rb b/spec/support/shared_contexts/lib/gitlab/database/background_migration_job_shared_context.rb new file mode 100644 index 00000000000..382eb796f8e --- /dev/null +++ b/spec/support/shared_contexts/lib/gitlab/database/background_migration_job_shared_context.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +RSpec.shared_context 'background migration job class' do + let!(:job_class_name) { 'TestJob' } + let!(:job_class) { Class.new } + let!(:job_perform_method) do + ->(*arguments) do + Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded( + # Value is 'TestJob' defined by :job_class_name in the let! above. + # Scoping prohibits us from directly referencing job_class_name. + RSpec.current_example.example_group_instance.job_class_name, + arguments + ) + end + end + + before do + job_class.define_method(:perform, job_perform_method) + expect(Gitlab::BackgroundMigration).to receive(:migration_class_for).with(job_class_name).at_least(:once) { job_class } + end +end diff --git a/spec/support/shared_examples/lib/gitlab/database/background_migration_job_shared_examples.rb b/spec/support/shared_examples/lib/gitlab/database/background_migration_job_shared_examples.rb index 20f3270526e..7888ade56eb 100644 --- a/spec/support/shared_examples/lib/gitlab/database/background_migration_job_shared_examples.rb +++ b/spec/support/shared_examples/lib/gitlab/database/background_migration_job_shared_examples.rb @@ -21,3 +21,46 @@ RSpec.shared_examples 'marks background migration job records' do expect(jobs_updated).to eq(1) end end + +RSpec.shared_examples 'finalized background migration' do + it 'processed the scheduled sidekiq queue' do + queued = Sidekiq::ScheduledSet + .new + .select do |scheduled| + scheduled.klass == 'BackgroundMigrationWorker' && + scheduled.args.first == job_class_name + end + expect(queued.size).to eq(0) + end + + it 'processed the async sidekiq queue' do + queued = Sidekiq::Queue.new('BackgroundMigrationWorker') + .select { |scheduled| scheduled.klass == job_class_name } + expect(queued.size).to eq(0) + end + + include_examples 'removed tracked jobs', 'pending' +end + +RSpec.shared_examples 'finalized tracked background migration' do + include_examples 'finalized background migration' + include_examples 'removed tracked jobs', 'succeeded' +end + +RSpec.shared_examples 'removed tracked jobs' do |status| + it "removes '#{status}' tracked jobs" do + jobs = Gitlab::Database::BackgroundMigrationJob + .where(status: Gitlab::Database::BackgroundMigrationJob.statuses[status]) + .for_migration_class(job_class_name) + expect(jobs).to be_empty + end +end + +RSpec.shared_examples 'retained tracked jobs' do |status| + it "retains '#{status}' tracked jobs" do + jobs = Gitlab::Database::BackgroundMigrationJob + .where(status: Gitlab::Database::BackgroundMigrationJob.statuses[status]) + .for_migration_class(job_class_name) + expect(jobs).to be_present + end +end