Add a migration to populate fork networks

This uses the existing ForkedProjectLinks
This commit is contained in:
Bob Van Landuyt 2017-09-29 19:02:02 +02:00
parent e8ca579d88
commit df7f530d84
5 changed files with 346 additions and 0 deletions

View file

@ -0,0 +1,30 @@
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class PopulateForkNetworks < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'PopulateForkNetworksRange'.freeze
BATCH_SIZE = 100
DELAY_INTERVAL = 15.seconds
disable_ddl_transaction!
class ForkedProjectLink < ActiveRecord::Base
include EachBatch
self.table_name = 'forked_project_links'
end
def up
say 'Populating the `fork_networks` based on existing `forked_project_links`'
queue_background_migration_jobs_by_range_at_intervals(ForkedProjectLink, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
# nothing
end
end

View file

@ -0,0 +1,60 @@
module Gitlab
module BackgroundMigration
class CreateForkNetworkMembershipsRange
RESCHEDULE_DELAY = 15
class ForkedProjectLink < ActiveRecord::Base
self.table_name = 'forked_project_links'
end
def perform(start_id, end_id)
log("Creating memberships for forks: #{start_id} - #{end_id}")
ActiveRecord::Base.connection.execute <<~INSERT_MEMBERS
INSERT INTO fork_network_members (fork_network_id, project_id, forked_from_project_id)
SELECT fork_network_members.fork_network_id,
forked_project_links.forked_to_project_id,
forked_project_links.forked_from_project_id
FROM forked_project_links
INNER JOIN fork_network_members
ON forked_project_links.forked_from_project_id = fork_network_members.project_id
WHERE forked_project_links.id BETWEEN #{start_id} AND #{end_id}
AND NOT EXISTS (
SELECT true
FROM fork_network_members existing_members
WHERE existing_members.project_id = forked_project_links.forked_to_project_id
)
INSERT_MEMBERS
if missing_members?(start_id, end_id)
BackgroundMigrationWorker.perform_in(RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [start_id, end_id])
end
end
def missing_members?(start_id, end_id)
count_sql = <<~MISSING_MEMBERS
SELECT COUNT(*)
FROM forked_project_links
WHERE NOT EXISTS (
SELECT true
FROM fork_network_members
WHERE fork_network_members.project_id = forked_project_links.forked_to_project_id
)
AND forked_project_links.id BETWEEN #{start_id} AND #{end_id}
MISSING_MEMBERS
ForkNetworkMember.count_by_sql(count_sql) > 0
end
def log(message)
Rails.logger.info("#{self.class.name} - #{message}")
end
end
end
end

View file

@ -0,0 +1,54 @@
module Gitlab
module BackgroundMigration
class PopulateForkNetworksRange
def perform(start_id, end_id)
log("Creating fork networks for forked project links: #{start_id} - #{end_id}")
ActiveRecord::Base.connection.execute <<~INSERT_NETWORKS
INSERT INTO fork_networks (root_project_id)
SELECT DISTINCT forked_project_links.forked_from_project_id
FROM forked_project_links
WHERE NOT EXISTS (
SELECT true
FROM forked_project_links inner_links
WHERE inner_links.forked_to_project_id = forked_project_links.forked_from_project_id
)
AND NOT EXISTS (
SELECT true
FROM fork_networks
WHERE forked_project_links.forked_from_project_id = fork_networks.root_project_id
)
AND forked_project_links.id BETWEEN #{start_id} AND #{end_id}
INSERT_NETWORKS
log("Creating memberships for root projects: #{start_id} - #{end_id}")
ActiveRecord::Base.connection.execute <<~INSERT_ROOT
INSERT INTO fork_network_members (fork_network_id, project_id)
SELECT DISTINCT fork_networks.id, fork_networks.root_project_id
FROM fork_networks
INNER JOIN forked_project_links
ON forked_project_links.forked_from_project_id = fork_networks.root_project_id
WHERE NOT EXISTS (
SELECT true
FROM fork_network_members
WHERE fork_network_members.project_id = fork_networks.root_project_id
)
AND forked_project_links.id BETWEEN #{start_id} AND #{end_id}
INSERT_ROOT
delay = BackgroundMigration::CreateForkNetworkMembershipsRange::RESCHEDULE_DELAY
BackgroundMigrationWorker.perform_in(delay, "CreateForkNetworkMembershipsRange", [start_id, end_id])
end
def log(message)
Rails.logger.info("#{self.class.name} - #{message}")
end
end
end
end

View file

@ -0,0 +1,117 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::CreateForkNetworkMembershipsRange, :migration, schema: 20170929131201 do
let(:migration) { described_class.new }
let(:base1) { create(:project) }
let(:base1_fork1) { create(:project) }
let(:base1_fork2) { create(:project) }
let(:base2) { create(:project) }
let(:base2_fork1) { create(:project) }
let(:base2_fork2) { create(:project) }
let(:fork_of_fork) { create(:project) }
let(:fork_of_fork2) { create(:project) }
let(:second_level_fork) { create(:project) }
let(:third_level_fork) { create(:project) }
let(:fork_network1) { fork_networks.find_by(root_project_id: base1.id) }
let(:fork_network2) { fork_networks.find_by(root_project_id: base2.id) }
let!(:forked_project_links) { table(:forked_project_links) }
let!(:fork_networks) { table(:fork_networks) }
let!(:fork_network_members) { table(:fork_network_members) }
before do
# The fork-network relation created for the forked project
fork_networks.create(id: 1, root_project_id: base1.id)
fork_network_members.create(project_id: base1.id, fork_network_id: 1)
fork_networks.create(id: 2, root_project_id: base2.id)
fork_network_members.create(project_id: base2.id, fork_network_id: 2)
# Normal fork links
forked_project_links.create(id: 1, forked_from_project_id: base1.id, forked_to_project_id: base1_fork1.id)
forked_project_links.create(id: 2, forked_from_project_id: base1.id, forked_to_project_id: base1_fork2.id)
forked_project_links.create(id: 3, forked_from_project_id: base2.id, forked_to_project_id: base2_fork1.id)
forked_project_links.create(id: 4, forked_from_project_id: base2.id, forked_to_project_id: base2_fork2.id)
# Fork links
forked_project_links.create(id: 5, forked_from_project_id: base1_fork1.id, forked_to_project_id: fork_of_fork.id)
forked_project_links.create(id: 6, forked_from_project_id: base1_fork1.id, forked_to_project_id: fork_of_fork2.id)
# Forks 3 levels down
forked_project_links.create(id: 7, forked_from_project_id: fork_of_fork.id, forked_to_project_id: second_level_fork.id)
forked_project_links.create(id: 8, forked_from_project_id: second_level_fork.id, forked_to_project_id: third_level_fork.id)
migration.perform(1, 8)
end
it 'creates a memberships for the direct forks' do
base1_fork1_membership = fork_network_members.find_by(fork_network_id: fork_network1.id,
project_id: base1_fork1.id)
base1_fork2_membership = fork_network_members.find_by(fork_network_id: fork_network1.id,
project_id: base1_fork2.id)
base2_fork1_membership = fork_network_members.find_by(fork_network_id: fork_network2.id,
project_id: base2_fork1.id)
base2_fork2_membership = fork_network_members.find_by(fork_network_id: fork_network2.id,
project_id: base2_fork2.id)
expect(base1_fork1_membership.forked_from_project_id).to eq(base1.id)
expect(base1_fork2_membership.forked_from_project_id).to eq(base1.id)
expect(base2_fork1_membership.forked_from_project_id).to eq(base2.id)
expect(base2_fork2_membership.forked_from_project_id).to eq(base2.id)
end
it 'adds the fork network members for forks of forks' do
fork_of_fork_membership = fork_network_members.find_by(project_id: fork_of_fork.id,
fork_network_id: fork_network1.id)
fork_of_fork2_membership = fork_network_members.find_by(project_id: fork_of_fork2.id,
fork_network_id: fork_network1.id)
second_level_fork_membership = fork_network_members.find_by(project_id: second_level_fork.id,
fork_network_id: fork_network1.id)
third_level_fork_membership = fork_network_members.find_by(project_id: third_level_fork.id,
fork_network_id: fork_network1.id)
expect(fork_of_fork_membership.forked_from_project_id).to eq(base1_fork1.id)
expect(fork_of_fork2_membership.forked_from_project_id).to eq(base1_fork1.id)
expect(second_level_fork_membership.forked_from_project_id).to eq(fork_of_fork.id)
expect(third_level_fork_membership.forked_from_project_id).to eq(second_level_fork.id)
end
it 'reschedules itself when there are missing members' do
allow(migration).to receive(:missing_members?).and_return(true)
expect(BackgroundMigrationWorker)
.to receive(:perform_in).with(described_class::RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [1, 3])
migration.perform(1, 3)
end
it 'can be repeated without effect' do
expect { fork_network_members.count }.not_to change { migration.perform(1, 7) }
end
it 'knows it is finished for this range' do
expect(migration.missing_members?(1, 7)).to be_falsy
end
context 'with more forks' do
before do
forked_project_links.create(id: 9, forked_from_project_id: fork_of_fork.id, forked_to_project_id: create(:project).id)
forked_project_links.create(id: 10, forked_from_project_id: fork_of_fork.id, forked_to_project_id: create(:project).id)
end
it 'only processes a single batch of links at a time' do
expect(fork_network_members.count).to eq(10)
migration.perform(8, 10)
expect(fork_network_members.count).to eq(12)
end
it 'knows when not all memberships withing a batch have been created' do
expect(migration.missing_members?(8, 10)).to be_truthy
end
end
end

View file

@ -0,0 +1,85 @@
require 'spec_helper'
describe Gitlab::BackgroundMigration::PopulateForkNetworksRange, :migration, schema: 20170929131201 do
let(:migration) { described_class.new }
let(:base1) { create(:project) }
let(:base1_fork1) { create(:project) }
let(:base1_fork2) { create(:project) }
let(:base2) { create(:project) }
let(:base2_fork1) { create(:project) }
let(:base2_fork2) { create(:project) }
let!(:forked_project_links) { table(:forked_project_links) }
let!(:fork_networks) { table(:fork_networks) }
let!(:fork_network_members) { table(:fork_network_members) }
let(:fork_network1) { fork_networks.find_by(root_project_id: base1.id) }
let(:fork_network2) { fork_networks.find_by(root_project_id: base2.id) }
before do
# A normal fork link
forked_project_links.create(id: 1,
forked_from_project_id: base1.id,
forked_to_project_id: base1_fork1.id)
forked_project_links.create(id: 2,
forked_from_project_id: base1.id,
forked_to_project_id: base1_fork2.id)
forked_project_links.create(id: 3,
forked_from_project_id: base2.id,
forked_to_project_id: base2_fork1.id)
forked_project_links.create(id: 4,
forked_from_project_id: base2_fork1.id,
forked_to_project_id: create(:project).id)
forked_project_links.create(id: 5,
forked_from_project_id: base2.id,
forked_to_project_id: base2_fork2.id)
migration.perform(1, 3)
end
it 'it creates the fork network' do
expect(fork_network1).not_to be_nil
expect(fork_network2).not_to be_nil
end
it 'does not create a fork network for a fork-of-fork' do
# perfrom the entire batch
migration.perform(1, 5)
expect(fork_networks.find_by(root_project_id: base2_fork1.id)).to be_nil
end
it 'creates memberships for the root of fork networks' do
base1_membership = fork_network_members.find_by(fork_network_id: fork_network1.id,
project_id: base1.id)
base2_membership = fork_network_members.find_by(fork_network_id: fork_network2.id,
project_id: base2.id)
expect(base1_membership).not_to be_nil
expect(base2_membership).not_to be_nil
end
it 'schedules a job for inserting memberships for forks-of-forks' do
delay = Gitlab::BackgroundMigration::CreateForkNetworkMembershipsRange::RESCHEDULE_DELAY
expect(BackgroundMigrationWorker)
.to receive(:perform_in).with(delay, "CreateForkNetworkMembershipsRange", [1, 3])
migration.perform(1, 3)
end
it 'only processes a single batch of links at a time' do
expect(fork_network_members.count).to eq(5)
migration.perform(3, 5)
expect(fork_network_members.count).to eq(7)
end
it 'can be repeated without effect' do
expect { migration.perform(1, 3) }.not_to change { fork_network_members.count }
end
end