Merge branch 'mk-delete-conflicting-redirects-mysql' into 'master'
Clean up redirect routes that conflict with regular routes Closes #36229 See merge request gitlab-org/gitlab-ce!13783
This commit is contained in:
commit
36ad91d75a
6 changed files with 386 additions and 0 deletions
|
@ -0,0 +1,37 @@
|
|||
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
|
||||
# for more information on how to write migrations for GitLab.
|
||||
|
||||
class DeleteConflictingRedirectRoutes < ActiveRecord::Migration
|
||||
include Gitlab::Database::MigrationHelpers
|
||||
|
||||
DOWNTIME = false
|
||||
MIGRATION = 'DeleteConflictingRedirectRoutesRange'.freeze
|
||||
BATCH_SIZE = 200 # At 200, I expect under 20s per batch, which is under our query timeout of 60s.
|
||||
DELAY_INTERVAL = 12.seconds
|
||||
|
||||
disable_ddl_transaction!
|
||||
|
||||
class Route < ActiveRecord::Base
|
||||
include EachBatch
|
||||
|
||||
self.table_name = 'routes'
|
||||
end
|
||||
|
||||
def up
|
||||
say opening_message
|
||||
|
||||
queue_background_migration_jobs_by_range_at_intervals(Route, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
|
||||
end
|
||||
|
||||
def down
|
||||
# nothing
|
||||
end
|
||||
|
||||
def opening_message
|
||||
<<~MSG
|
||||
Clean up redirect routes that conflict with regular routes.
|
||||
See initial bug fix:
|
||||
https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/13357
|
||||
MSG
|
||||
end
|
||||
end
|
|
@ -0,0 +1,41 @@
|
|||
module Gitlab
|
||||
module BackgroundMigration
|
||||
class DeleteConflictingRedirectRoutesRange
|
||||
class Route < ActiveRecord::Base
|
||||
self.table_name = 'routes'
|
||||
end
|
||||
|
||||
class RedirectRoute < ActiveRecord::Base
|
||||
self.table_name = 'redirect_routes'
|
||||
end
|
||||
|
||||
# start_id - The start ID of the range of events to process
|
||||
# end_id - The end ID of the range to process.
|
||||
def perform(start_id, end_id)
|
||||
return unless migrate?
|
||||
|
||||
conflicts = RedirectRoute.where(routes_match_redirects_clause(start_id, end_id))
|
||||
num_rows = conflicts.delete_all
|
||||
|
||||
Rails.logger.info("Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange [#{start_id}, #{end_id}] - Deleted #{num_rows} redirect routes that were conflicting with routes.")
|
||||
end
|
||||
|
||||
def migrate?
|
||||
Route.table_exists? && RedirectRoute.table_exists?
|
||||
end
|
||||
|
||||
def routes_match_redirects_clause(start_id, end_id)
|
||||
<<~ROUTES_MATCH_REDIRECTS
|
||||
EXISTS (
|
||||
SELECT 1 FROM routes
|
||||
WHERE (
|
||||
LOWER(redirect_routes.path) = LOWER(routes.path)
|
||||
OR LOWER(redirect_routes.path) LIKE LOWER(CONCAT(routes.path, '/%'))
|
||||
)
|
||||
AND routes.id BETWEEN #{start_id} AND #{end_id}
|
||||
)
|
||||
ROUTES_MATCH_REDIRECTS
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,6 +1,9 @@
|
|||
module Gitlab
|
||||
module Database
|
||||
module MigrationHelpers
|
||||
BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
|
||||
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
|
||||
|
||||
# Adds `created_at` and `updated_at` columns with timezone information.
|
||||
#
|
||||
# This method is an improved version of Rails' built-in method `add_timestamps`.
|
||||
|
@ -653,6 +656,91 @@ into similar problems in the future (e.g. when new tables are created).
|
|||
EOF
|
||||
end
|
||||
end
|
||||
|
||||
# Bulk queues background migration jobs for an entire table, batched by ID range.
|
||||
# "Bulk" meaning many jobs will be pushed at a time for efficiency.
|
||||
# If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
|
||||
#
|
||||
# model_class - The table being iterated over
|
||||
# job_class_name - The background migration job class as a string
|
||||
# batch_size - The maximum number of rows per job
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# class Route < ActiveRecord::Base
|
||||
# include EachBatch
|
||||
# self.table_name = 'routes'
|
||||
# end
|
||||
#
|
||||
# bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
|
||||
#
|
||||
# Where the model_class includes EachBatch, and the background migration exists:
|
||||
#
|
||||
# class Gitlab::BackgroundMigration::ProcessRoutes
|
||||
# def perform(start_id, end_id)
|
||||
# # do something
|
||||
# end
|
||||
# end
|
||||
def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
|
||||
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
|
||||
|
||||
jobs = []
|
||||
|
||||
model_class.each_batch(of: batch_size) do |relation|
|
||||
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
|
||||
|
||||
if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
|
||||
# Note: This code path generally only helps with many millions of rows
|
||||
# We push multiple jobs at a time to reduce the time spent in
|
||||
# Sidekiq/Redis operations. We're using this buffer based approach so we
|
||||
# don't need to run additional queries for every range.
|
||||
BackgroundMigrationWorker.perform_bulk(jobs)
|
||||
jobs.clear
|
||||
end
|
||||
|
||||
jobs << [job_class_name, [start_id, end_id]]
|
||||
end
|
||||
|
||||
BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
|
||||
end
|
||||
|
||||
# Queues background migration jobs for an entire table, batched by ID range.
|
||||
# Each job is scheduled with a `delay_interval` in between.
|
||||
# If you use a small interval, then some jobs may run at the same time.
|
||||
#
|
||||
# model_class - The table being iterated over
|
||||
# job_class_name - The background migration job class as a string
|
||||
# delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
|
||||
# batch_size - The maximum number of rows per job
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# class Route < ActiveRecord::Base
|
||||
# include EachBatch
|
||||
# self.table_name = 'routes'
|
||||
# end
|
||||
#
|
||||
# queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
|
||||
#
|
||||
# Where the model_class includes EachBatch, and the background migration exists:
|
||||
#
|
||||
# class Gitlab::BackgroundMigration::ProcessRoutes
|
||||
# def perform(start_id, end_id)
|
||||
# # do something
|
||||
# end
|
||||
# end
|
||||
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
|
||||
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
|
||||
|
||||
model_class.each_batch(of: batch_size) do |relation, index|
|
||||
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
|
||||
|
||||
# `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
|
||||
# the same time, which is not helpful in most cases where we wish to
|
||||
# spread the work over time.
|
||||
BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Gitlab::BackgroundMigration::DeleteConflictingRedirectRoutesRange, :migration, schema: 20170907170235 do
|
||||
let!(:redirect_routes) { table(:redirect_routes) }
|
||||
let!(:routes) { table(:routes) }
|
||||
|
||||
before do
|
||||
routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1')
|
||||
routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2')
|
||||
routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3')
|
||||
routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4')
|
||||
routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5')
|
||||
|
||||
# Valid redirects
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2')
|
||||
redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3')
|
||||
|
||||
# Conflicting redirects
|
||||
redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5')
|
||||
end
|
||||
|
||||
it 'deletes the conflicting redirect_routes in the range' do
|
||||
expect(redirect_routes.count).to eq(8)
|
||||
|
||||
expect do
|
||||
described_class.new.perform(1, 3)
|
||||
end.to change { redirect_routes.where("path like 'foo%'").count }.from(5).to(2)
|
||||
|
||||
expect do
|
||||
described_class.new.perform(4, 5)
|
||||
end.to change { redirect_routes.where("path like 'foo%'").count }.from(2).to(0)
|
||||
|
||||
expect(redirect_routes.count).to eq(3)
|
||||
end
|
||||
end
|
|
@ -914,4 +914,126 @@ describe Gitlab::Database::MigrationHelpers do
|
|||
.to raise_error(RuntimeError, /Your database user is not allowed/)
|
||||
end
|
||||
end
|
||||
|
||||
describe '#bulk_queue_background_migration_jobs_by_range', :sidekiq do
|
||||
context 'when the model has an ID column' do
|
||||
let!(:id1) { create(:user).id }
|
||||
let!(:id2) { create(:user).id }
|
||||
let!(:id3) { create(:user).id }
|
||||
|
||||
before do
|
||||
User.class_eval do
|
||||
include EachBatch
|
||||
end
|
||||
end
|
||||
|
||||
context 'with enough rows to bulk queue jobs more than once' do
|
||||
before do
|
||||
stub_const('Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE', 1)
|
||||
end
|
||||
|
||||
it 'queues jobs correctly' do
|
||||
Sidekiq::Testing.fake! do
|
||||
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
|
||||
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
|
||||
end
|
||||
end
|
||||
|
||||
it 'queues jobs in groups of buffer size 1' do
|
||||
expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]]])
|
||||
expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id3, id3]]])
|
||||
|
||||
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with not enough rows to bulk queue jobs more than once' do
|
||||
it 'queues jobs correctly' do
|
||||
Sidekiq::Testing.fake! do
|
||||
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
|
||||
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
|
||||
end
|
||||
end
|
||||
|
||||
it 'queues jobs in bulk all at once (big buffer size)' do
|
||||
expect(BackgroundMigrationWorker).to receive(:perform_bulk).with([['FooJob', [id1, id2]],
|
||||
['FooJob', [id3, id3]]])
|
||||
|
||||
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob', batch_size: 2)
|
||||
end
|
||||
end
|
||||
|
||||
context 'without specifying batch_size' do
|
||||
it 'queues jobs correctly' do
|
||||
Sidekiq::Testing.fake! do
|
||||
model.bulk_queue_background_migration_jobs_by_range(User, 'FooJob')
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "when the model doesn't have an ID column" do
|
||||
it 'raises error (for now)' do
|
||||
expect do
|
||||
model.bulk_queue_background_migration_jobs_by_range(ProjectAuthorization, 'FooJob')
|
||||
end.to raise_error(StandardError, /does not have an ID/)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#queue_background_migration_jobs_by_range_at_intervals', :sidekiq do
|
||||
context 'when the model has an ID column' do
|
||||
let!(:id1) { create(:user).id }
|
||||
let!(:id2) { create(:user).id }
|
||||
let!(:id3) { create(:user).id }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
before do
|
||||
User.class_eval do
|
||||
include EachBatch
|
||||
end
|
||||
end
|
||||
|
||||
context 'with batch_size option' do
|
||||
it 'queues jobs correctly' do
|
||||
Sidekiq::Testing.fake! do
|
||||
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds, batch_size: 2)
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
|
||||
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
|
||||
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
|
||||
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'without batch_size option' do
|
||||
it 'queues jobs correctly' do
|
||||
Sidekiq::Testing.fake! do
|
||||
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds)
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
|
||||
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "when the model doesn't have an ID column" do
|
||||
it 'raises error (for now)' do
|
||||
expect do
|
||||
model.queue_background_migration_jobs_by_range_at_intervals(ProjectAuthorization, 'FooJob', 10.seconds)
|
||||
end.to raise_error(StandardError, /does not have an ID/)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
58
spec/migrations/delete_conflicting_redirect_routes_spec.rb
Normal file
58
spec/migrations/delete_conflicting_redirect_routes_spec.rb
Normal file
|
@ -0,0 +1,58 @@
|
|||
require 'spec_helper'
|
||||
require Rails.root.join('db', 'post_migrate', '20170907170235_delete_conflicting_redirect_routes')
|
||||
|
||||
describe DeleteConflictingRedirectRoutes, :migration, :sidekiq do
|
||||
let!(:redirect_routes) { table(:redirect_routes) }
|
||||
let!(:routes) { table(:routes) }
|
||||
|
||||
around do |example|
|
||||
Timecop.freeze { example.run }
|
||||
end
|
||||
|
||||
before do
|
||||
stub_const("DeleteConflictingRedirectRoutes::BATCH_SIZE", 2)
|
||||
stub_const("Gitlab::Database::MigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE", 2)
|
||||
|
||||
routes.create!(id: 1, source_id: 1, source_type: 'Namespace', path: 'foo1')
|
||||
routes.create!(id: 2, source_id: 2, source_type: 'Namespace', path: 'foo2')
|
||||
routes.create!(id: 3, source_id: 3, source_type: 'Namespace', path: 'foo3')
|
||||
routes.create!(id: 4, source_id: 4, source_type: 'Namespace', path: 'foo4')
|
||||
routes.create!(id: 5, source_id: 5, source_type: 'Namespace', path: 'foo5')
|
||||
|
||||
# Valid redirects
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'bar2')
|
||||
redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'bar3')
|
||||
|
||||
# Conflicting redirects
|
||||
redirect_routes.create!(source_id: 2, source_type: 'Namespace', path: 'foo1')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo2')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo3')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo4')
|
||||
redirect_routes.create!(source_id: 1, source_type: 'Namespace', path: 'foo5')
|
||||
end
|
||||
|
||||
it 'correctly schedules background migrations' do
|
||||
Sidekiq::Testing.fake! do
|
||||
Timecop.freeze do
|
||||
migrate!
|
||||
|
||||
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]])
|
||||
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(12.seconds.from_now.to_f)
|
||||
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]])
|
||||
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(24.seconds.from_now.to_f)
|
||||
expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]])
|
||||
expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(36.seconds.from_now.to_f)
|
||||
expect(BackgroundMigrationWorker.jobs.size).to eq 3
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it 'schedules background migrations' do
|
||||
Sidekiq::Testing.inline! do
|
||||
expect do
|
||||
migrate!
|
||||
end.to change { redirect_routes.count }.from(8).to(3)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue