Resolve "Hashed Storage: Make possible to migrate single project"

This commit is contained in:
Gabriel Mazetto 2018-06-07 15:40:44 +00:00 committed by Nick Thomas
parent 1b06b8342c
commit 36c3376475
9 changed files with 243 additions and 69 deletions

View file

@ -1,29 +1,8 @@
class StorageMigratorWorker
include ApplicationWorker
BATCH_SIZE = 100
def perform(start, finish)
projects = build_relation(start, finish)
projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..."
begin
project.migrate_to_hashed_storage!
rescue => err
Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
end
end
end
def build_relation(start, finish)
relation = Project
table = Project.arel_table
relation = relation.where(table[:id].gteq(start)) if start
relation = relation.where(table[:id].lteq(finish)) if finish
relation
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_migrate(start, finish)
end
end

View file

@ -0,0 +1,5 @@
---
title: 'Hashed Storage: migration rake task now can be executed to specific project'
merge_request: 19268
author:
type: changed

View file

@ -17,13 +17,21 @@ This task will schedule all your existing projects and attachments associated wi
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:migrate_to_hashed
sudo gitlab-rake gitlab:storage:migrate_to_hashed
```
**Source Installation**
```bash
rake gitlab:storage:migrate_to_hashed
sudo -u git -H bundle exec rake gitlab:storage:migrate_to_hashed RAILS_ENV=production
```
They both also accept a range as environment variable:
```bash
# to migrate any non migrated project from ID 20 to 50.
export ID_FROM=20
export ID_TO=50
```
You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen.
@ -44,13 +52,13 @@ To have a simple summary of projects using **Legacy** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:legacy_projects
sudo gitlab-rake gitlab:storage:legacy_projects
```
**Source Installation**
```bash
rake gitlab:storage:legacy_projects
sudo -u git -H bundle exec rake gitlab:storage:legacy_projects RAILS_ENV=production
```
------
@ -60,13 +68,13 @@ To list projects using **Legacy** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:list_legacy_projects
sudo gitlab-rake gitlab:storage:list_legacy_projects
```
**Source Installation**
```bash
rake gitlab:storage:list_legacy_projects
sudo -u git -H bundle exec rake gitlab:storage:list_legacy_projects RAILS_ENV=production
```
@ -77,13 +85,13 @@ To have a simple summary of projects using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:hashed_projects
sudo gitlab-rake gitlab:storage:hashed_projects
```
**Source Installation**
```bash
rake gitlab:storage:hashed_projects
sudo -u git -H bundle exec rake gitlab:storage:hashed_projects RAILS_ENV=production
```
------
@ -93,14 +101,13 @@ To list projects using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:list_hashed_projects
sudo gitlab-rake gitlab:storage:list_hashed_projects
```
**Source Installation**
```bash
rake gitlab:storage:list_hashed_projects
sudo -u git -H bundle exec rake gitlab:storage:list_hashed_projects RAILS_ENV=production
```
## List attachments on Legacy storage
@ -110,13 +117,13 @@ To have a simple summary of project attachments using **Legacy** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:legacy_attachments
sudo gitlab-rake gitlab:storage:legacy_attachments
```
**Source Installation**
```bash
rake gitlab:storage:legacy_attachments
sudo -u git -H bundle exec rake gitlab:storage:legacy_attachments RAILS_ENV=production
```
------
@ -126,13 +133,13 @@ To list project attachments using **Legacy** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:list_legacy_attachments
sudo gitlab-rake gitlab:storage:list_legacy_attachments
```
**Source Installation**
```bash
rake gitlab:storage:list_legacy_attachments
sudo -u git -H bundle exec rake gitlab:storage:list_legacy_attachments RAILS_ENV=production
```
## List attachments on Hashed storage
@ -142,13 +149,13 @@ To have a simple summary of project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:hashed_attachments
sudo gitlab-rake gitlab:storage:hashed_attachments
```
**Source Installation**
```bash
rake gitlab:storage:hashed_attachments
sudo -u git -H bundle exec rake gitlab:storage:hashed_attachments RAILS_ENV=production
```
------
@ -158,13 +165,13 @@ To list project attachments using **Hashed** storage:
**Omnibus Installation**
```bash
gitlab-rake gitlab:storage:list_hashed_attachments
sudo gitlab-rake gitlab:storage:list_hashed_attachments
```
**Source Installation**
```bash
rake gitlab:storage:list_hashed_attachments
sudo -u git -H bundle exec rake gitlab:storage:list_hashed_attachments RAILS_ENV=production
```
[storage-types]: ../repository_storage_types.md

View file

@ -0,0 +1,57 @@
module Gitlab
module HashedStorage
# Hashed Storage Migrator
#
# This is responsible for scheduling and flagging projects
# to be migrated from Legacy to Hashed storage, either one by one or in bulk.
class Migrator
BATCH_SIZE = 100
# Schedule a range of projects to be bulk migrated with #bulk_migrate asynchronously
#
# @param [Object] start first project id for the range
# @param [Object] finish last project id for the range
def bulk_schedule(start, finish)
StorageMigratorWorker.perform_async(start, finish)
end
# Start migration of projects from specified range
#
# Flagging a project to be migrated is a synchronous action,
# but the migration runs through async jobs
#
# @param [Object] start first project id for the range
# @param [Object] finish last project id for the range
def bulk_migrate(start, finish)
projects = build_relation(start, finish)
projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
migrate(project)
end
end
# Flag a project to me migrated
#
# @param [Object] project that will be migrated
def migrate(project)
Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..."
project.migrate_to_hashed_storage!
rescue => err
Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
end
private
def build_relation(start, finish)
relation = Project
table = Project.arel_table
relation = relation.where(table[:id].gteq(start)) if start
relation = relation.where(table[:id].lteq(finish)) if finish
relation
end
end
end
end

View file

@ -9,8 +9,20 @@ module Gitlab
ENV.fetch('LIMIT', 500).to_i
end
def self.range_from
ENV['ID_FROM']
end
def self.range_to
ENV['ID_TO']
end
def self.range_single_item?
!range_from.nil? && range_from == range_to
end
def self.project_id_batches(&block)
Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
Project.with_unmigrated_storage.in_batches(of: batch_size, start: range_from, finish: range_to) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max

View file

@ -2,9 +2,26 @@ namespace :gitlab do
namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do
legacy_projects_count = Project.with_unmigrated_storage.count
storage_migrator = Gitlab::HashedStorage::Migrator.new
helper = Gitlab::HashedStorage::RakeHelper
if helper.range_single_item?
project = Project.with_unmigrated_storage.find_by(id: helper.range_from)
unless project
puts "There are no projects requiring storage migration with ID=#{helper.range_from}"
next
end
puts "Enqueueing storage migration of #{project.full_path} (ID=#{project.id})..."
storage_migrator.migrate(project)
next
end
legacy_projects_count = Project.with_unmigrated_storage.count
if legacy_projects_count == 0
puts 'There are no projects requiring storage migration. Nothing to do!'
@ -14,7 +31,7 @@ namespace :gitlab do
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"
helper.project_id_batches do |start, finish|
StorageMigratorWorker.perform_async(start, finish)
storage_migrator.bulk_schedule(start, finish)
print '.'
end

View file

@ -0,0 +1,75 @@
require 'spec_helper'
describe Gitlab::HashedStorage::Migrator do
describe '#bulk_schedule' do
it 'schedules job to StorageMigratorWorker' do
Sidekiq::Testing.fake! do
expect { subject.bulk_schedule(1, 5) }.to change(StorageMigratorWorker.jobs, :size).by(1)
end
end
end
describe '#bulk_migrate' do
let(:projects) { create_list(:project, 2, :legacy_storage) }
let(:ids) { projects.map(&:id) }
it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do
Sidekiq::Testing.fake! do
expect { subject.bulk_migrate(ids.min, ids.max) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(2)
end
end
it 'sets projects as read only' do
allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
subject.bulk_migrate(ids.min, ids.max)
projects.each do |project|
expect(project.reload.repository_read_only?).to be_truthy
end
end
it 'rescues and log exceptions' do
allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
expect { subject.bulk_migrate(ids.min, ids.max) }.not_to raise_error
end
it 'delegates each project in specified range to #migrate' do
projects.each do |project|
expect(subject).to receive(:migrate).with(project)
end
subject.bulk_migrate(ids.min, ids.max)
end
end
describe '#migrate' do
let(:project) { create(:project, :legacy_storage, :empty_repo) }
it 'enqueues job to ProjectMigrateHashedStorageWorker' do
Sidekiq::Testing.fake! do
expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)
end
end
it 'rescues and log exceptions' do
allow(project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
expect { subject.migrate(project) }.not_to raise_error
end
it 'sets project as read only' do
allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async)
subject.migrate(project)
expect(project.reload.repository_read_only?).to be_truthy
end
it 'migrate project' do
Sidekiq::Testing.inline! do
subject.migrate(project)
end
expect(project.reload.hashed_storage?(:attachments)).to be_truthy
end
end
end

View file

@ -1,6 +1,6 @@
require 'rake_helper'
describe 'gitlab:storage:*' do
describe 'rake gitlab:storage:*' do
before do
Rake.application.rake_require 'tasks/gitlab/storage'
@ -44,16 +44,18 @@ describe 'gitlab:storage:*' do
end
describe 'gitlab:storage:migrate_to_hashed' do
let(:task) { 'gitlab:storage:migrate_to_hashed' }
context '0 legacy projects' do
it 'does nothing' do
expect(StorageMigratorWorker).not_to receive(:perform_async)
run_rake_task('gitlab:storage:migrate_to_hashed')
run_rake_task(task)
end
end
context '3 legacy projects' do
let(:projects) { create_list(:project, 3, storage_version: 0) }
let(:projects) { create_list(:project, 3, :legacy_storage) }
context 'in batches of 1' do
before do
@ -65,7 +67,7 @@ describe 'gitlab:storage:*' do
expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id)
end
run_rake_task('gitlab:storage:migrate_to_hashed')
run_rake_task(task)
end
end
@ -80,23 +82,48 @@ describe 'gitlab:storage:*' do
expect(StorageMigratorWorker).to receive(:perform_async).with(first, last)
end
run_rake_task('gitlab:storage:migrate_to_hashed')
run_rake_task(task)
end
end
end
context 'with same id in range' do
it 'displays message when project cant be found' do
stub_env('ID_FROM', 99999)
stub_env('ID_TO', 99999)
expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=99999/).to_stdout
end
it 'displays a message when project exists but its already migrated' do
project = create(:project)
stub_env('ID_FROM', project.id)
stub_env('ID_TO', project.id)
expect { run_rake_task(task) }.to output(/There are no projects requiring storage migration with ID=#{project.id}/).to_stdout
end
it 'enqueues migration when project can be found' do
project = create(:project, :legacy_storage)
stub_env('ID_FROM', project.id)
stub_env('ID_TO', project.id)
expect { run_rake_task(task) }.to output(/Enqueueing storage migration .* \(ID=#{project.id}\)/).to_stdout
end
end
end
describe 'gitlab:storage:legacy_projects' do
it_behaves_like 'rake entities summary', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:legacy_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 0) }
let(:create_collection) { create_list(:project, 3, :legacy_storage) }
end
end
describe 'gitlab:storage:list_legacy_projects' do
it_behaves_like 'rake listing entities', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:list_legacy_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 0) }
let(:create_collection) { create_list(:project, 3, :legacy_storage) }
end
end
@ -133,7 +160,7 @@ describe 'gitlab:storage:*' do
describe 'gitlab:storage:hashed_attachments' do
it_behaves_like 'rake entities summary', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:hashed_attachments' }
let(:project) { create(:project, storage_version: 2) }
let(:project) { create(:project) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
@ -141,7 +168,7 @@ describe 'gitlab:storage:*' do
describe 'gitlab:storage:list_hashed_attachments' do
it_behaves_like 'rake listing entities', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:list_hashed_attachments' }
let(:project) { create(:project, storage_version: 2) }
let(:project) { create(:project) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end

View file

@ -2,29 +2,24 @@ require 'spec_helper'
describe StorageMigratorWorker do
subject(:worker) { described_class.new }
let(:projects) { create_list(:project, 2, :legacy_storage) }
let(:projects) { create_list(:project, 2, :legacy_storage, :empty_repo) }
let(:ids) { projects.map(&:id) }
describe '#perform' do
let(:ids) { projects.map(&:id) }
it 'delegates to MigratorService' do
expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(5, 10)
it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do
expect(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
worker.perform(ids.min, ids.max)
worker.perform(5, 10)
end
it 'sets projects as read only' do
allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice
worker.perform(ids.min, ids.max)
it 'migrates projects in the specified range' do
Sidekiq::Testing.inline! do
worker.perform(ids.min, ids.max)
end
projects.each do |project|
expect(project.reload.repository_read_only?).to be_truthy
expect(project.reload.hashed_storage?(:attachments)).to be_truthy
end
end
it 'rescues and log exceptions' do
allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
expect { worker.perform(ids.min, ids.max) }.not_to raise_error
end
end
end