gitlab-org--gitlab-foss/db/post_migrate/20210812013042_remove_dupli...

113 lines
3.4 KiB
Ruby

# frozen_string_literal: true
class RemoveDuplicateProjectAuthorizations < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
BATCH_SIZE = 10_000
OLD_INDEX_NAME = 'index_project_authorizations_on_project_id_user_id'
INDEX_NAME = 'index_unique_project_authorizations_on_project_id_user_id'
class ProjectAuthorization < ActiveRecord::Base
self.table_name = 'project_authorizations'
end
disable_ddl_transaction!
def up
batch do |first_record, last_record|
break if first_record.blank?
# construct a range query where we filter records between the first and last records
rows = ActiveRecord::Base.connection.execute <<~SQL
SELECT user_id, project_id
FROM project_authorizations
WHERE
#{start_condition(first_record)}
#{end_condition(last_record)}
GROUP BY user_id, project_id
HAVING COUNT(*) > 1
SQL
rows.each do |row|
deduplicate_item(row['project_id'], row['user_id'])
end
end
add_concurrent_index :project_authorizations, [:project_id, :user_id], unique: true, name: INDEX_NAME
remove_concurrent_index_by_name :project_authorizations, OLD_INDEX_NAME
end
def down
add_concurrent_index(:project_authorizations, [:project_id, :user_id], name: OLD_INDEX_NAME)
remove_concurrent_index_by_name(:project_authorizations, INDEX_NAME)
end
private
def start_condition(record)
"(user_id, project_id) >= (#{Integer(record.user_id)}, #{Integer(record.project_id)})"
end
def end_condition(record)
return "" unless record
"AND (user_id, project_id) <= (#{Integer(record.user_id)}, #{Integer(record.project_id)})"
end
def batch(&block)
order = Gitlab::Pagination::Keyset::Order.build(
[
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: 'user_id',
order_expression: ProjectAuthorization.arel_table[:user_id].asc,
nullable: :not_nullable,
distinct: false
),
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: 'project_id',
order_expression: ProjectAuthorization.arel_table[:project_id].asc,
nullable: :not_nullable,
distinct: false
),
Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(
attribute_name: 'access_level',
order_expression: ProjectAuthorization.arel_table[:access_level].asc,
nullable: :not_nullable,
distinct: true
)
])
scope = ProjectAuthorization.order(order)
cursor = {}
loop do
current_scope = scope.dup
relation = order.apply_cursor_conditions(current_scope, cursor)
first_record = relation.take
last_record = relation.offset(BATCH_SIZE).take
yield first_record, last_record
break if last_record.blank?
cursor = order.cursor_attributes_for_node(last_record)
end
end
def deduplicate_item(project_id, user_id)
auth_records = ProjectAuthorization.where(project_id: project_id, user_id: user_id).order(access_level: :desc).to_a
ActiveRecord::Base.transaction do
# Keep the highest access level and destroy the rest.
auth_records[1..].each do |record|
ProjectAuthorization
.where(
project_id: record.project_id,
user_id: record.user_id,
access_level: record.access_level
).delete_all
end
end
end
end