Port cleanup tasks to use Gitaly
Rake tasks cleaning up the Git storage were still using direct disk
access, which won't work if these aren't attached. To mitigate a
migration issue was created.
To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was
required, ListDirectories. This was implemented in Gitaly, through
https://gitlab.com/gitlab-org/gitaly/merge_requests/868.
To be able to use the new RPC the Gitaly server was bumped to v0.120.
This is an RPC that will not use feature gates, as this doesn't scale on
.com so there is no way to test it at scale. Futhermore, we _know_ it
doesn't scale, but this might be a useful task for smaller instances.
Lastly, the tests are slightly updated to also work when the disk isn't
attached. Eventhough this is not planned, it was very little effort and
thus I applied the boy scout rule.
Closes https://gitlab.com/gitlab-org/gitaly/issues/954
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
2018-09-07 05:16:34 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
require 'set'
|
|
|
|
|
2012-12-24 23:14:05 -05:00
|
|
|
namespace :gitlab do
|
|
|
|
namespace :cleanup do
|
2015-06-23 10:52:40 -04:00
|
|
|
desc "GitLab | Cleanup | Block users that have been removed in LDAP"
|
2018-01-24 03:12:33 -05:00
|
|
|
task block_removed_ldap_users: :gitlab_environment do
|
2014-06-26 09:38:11 -04:00
|
|
|
warn_user_is_not_gitlab
|
|
|
|
block_flag = ENV['BLOCK']
|
|
|
|
|
2015-02-16 04:00:25 -05:00
|
|
|
User.find_each do |user|
|
|
|
|
next unless user.ldap_user?
|
2017-11-14 04:02:39 -05:00
|
|
|
|
2015-02-16 04:00:25 -05:00
|
|
|
print "#{user.name} (#{user.ldap_identity.extern_uid}) ..."
|
2018-01-11 11:34:01 -05:00
|
|
|
|
2020-03-12 11:09:39 -04:00
|
|
|
if Gitlab::Auth::Ldap::Access.allowed?(user)
|
2016-06-01 18:37:15 -04:00
|
|
|
puts " [OK]".color(:green)
|
2014-06-26 09:38:11 -04:00
|
|
|
else
|
|
|
|
if block_flag
|
2015-02-16 04:00:25 -05:00
|
|
|
user.block! unless user.blocked?
|
2016-06-01 18:37:15 -04:00
|
|
|
puts " [BLOCKED]".color(:red)
|
2014-06-26 09:38:11 -04:00
|
|
|
else
|
2016-06-01 18:37:15 -04:00
|
|
|
puts " [NOT IN LDAP]".color(:yellow)
|
2014-06-26 09:38:11 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
unless block_flag
|
2016-06-01 18:37:15 -04:00
|
|
|
puts "To block these users run this command with BLOCK=true".color(:yellow)
|
2014-06-26 09:38:11 -04:00
|
|
|
end
|
|
|
|
end
|
2018-07-26 17:23:33 -04:00
|
|
|
|
|
|
|
desc "GitLab | Cleanup | Clean orphaned project uploads"
|
|
|
|
task project_uploads: :gitlab_environment do
|
|
|
|
warn_user_is_not_gitlab
|
|
|
|
|
|
|
|
cleaner = Gitlab::Cleanup::ProjectUploads.new(logger: logger)
|
|
|
|
cleaner.run!(dry_run: dry_run?)
|
|
|
|
|
|
|
|
if dry_run?
|
|
|
|
logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-07-30 14:14:38 -04:00
|
|
|
desc 'GitLab | Cleanup | Clean orphan remote upload files that do not exist in the db'
|
|
|
|
task remote_upload_files: :environment do
|
|
|
|
cleaner = Gitlab::Cleanup::RemoteUploads.new(logger: logger)
|
|
|
|
cleaner.run!(dry_run: dry_run?)
|
|
|
|
|
|
|
|
if dry_run?
|
|
|
|
logger.info "To cleanup these files run this command with DRY_RUN=false".color(:yellow)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-06-13 17:07:59 -04:00
|
|
|
desc 'GitLab | Cleanup | Clean orphan job artifact files'
|
|
|
|
task orphan_job_artifact_files: :gitlab_environment do
|
|
|
|
warn_user_is_not_gitlab
|
|
|
|
|
2021-01-25 07:09:07 -05:00
|
|
|
cleaner = Gitlab::Cleanup::OrphanJobArtifactFiles.new(dry_run: dry_run?, niceness: niceness, logger: logger)
|
2019-06-13 17:07:59 -04:00
|
|
|
cleaner.run!
|
|
|
|
|
|
|
|
if dry_run?
|
|
|
|
logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-03-23 08:09:47 -04:00
|
|
|
desc 'GitLab | Cleanup | Clean orphan LFS file references'
|
|
|
|
task orphan_lfs_file_references: :gitlab_environment do
|
|
|
|
warn_user_is_not_gitlab
|
|
|
|
|
|
|
|
project = find_project
|
|
|
|
|
|
|
|
unless project
|
|
|
|
logger.info "Specify the project with PROJECT_ID={number} or PROJECT_PATH={namespace/project-name}".color(:red)
|
|
|
|
exit
|
|
|
|
end
|
|
|
|
|
|
|
|
cleaner = Gitlab::Cleanup::OrphanLfsFileReferences.new(
|
|
|
|
project,
|
|
|
|
dry_run: dry_run?,
|
2021-01-25 07:09:07 -05:00
|
|
|
logger: logger
|
2020-03-23 08:09:47 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
cleaner.run!
|
|
|
|
|
|
|
|
if dry_run?
|
|
|
|
logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
desc 'GitLab | Cleanup | Clean orphan LFS files'
|
|
|
|
task orphan_lfs_files: :gitlab_environment do
|
|
|
|
warn_user_is_not_gitlab
|
|
|
|
|
2021-05-27 11:10:39 -04:00
|
|
|
number_of_removed_files = RemoveUnreferencedLfsObjectsWorker.new.perform
|
2020-03-23 08:09:47 -04:00
|
|
|
|
2021-05-27 11:10:39 -04:00
|
|
|
logger.info "Removed unreferenced LFS files: #{number_of_removed_files}".color(:green)
|
2020-03-23 08:09:47 -04:00
|
|
|
end
|
|
|
|
|
2019-07-12 08:25:12 -04:00
|
|
|
namespace :sessions do
|
|
|
|
desc "GitLab | Cleanup | Sessions | Clean ActiveSession lookup keys"
|
|
|
|
task active_sessions_lookup_keys: :gitlab_environment do
|
|
|
|
session_key_pattern = "#{Gitlab::Redis::SharedState::USER_SESSIONS_LOOKUP_NAMESPACE}:*"
|
|
|
|
last_save_check = Time.at(0)
|
|
|
|
wait_time = 10.seconds
|
|
|
|
cursor = 0
|
|
|
|
total_users_scanned = 0
|
|
|
|
|
|
|
|
Gitlab::Redis::SharedState.with do |redis|
|
|
|
|
begin
|
|
|
|
cursor, keys = redis.scan(cursor, match: session_key_pattern)
|
|
|
|
total_users_scanned += keys.count
|
|
|
|
|
|
|
|
if last_save_check < Time.now - 1.second
|
|
|
|
while redis.info('persistence')['rdb_bgsave_in_progress'] == '1'
|
|
|
|
puts "BGSAVE in progress, waiting #{wait_time} seconds"
|
|
|
|
sleep(wait_time)
|
|
|
|
end
|
|
|
|
last_save_check = Time.now
|
|
|
|
end
|
|
|
|
|
|
|
|
keys.each do |key|
|
|
|
|
user_id = key.split(':').last
|
|
|
|
|
|
|
|
lookup_key_count = redis.scard(key)
|
|
|
|
|
|
|
|
session_ids = ActiveSession.session_ids_for_user(user_id)
|
2019-11-27 13:06:30 -05:00
|
|
|
entries = ActiveSession.raw_active_session_entries(redis, session_ids, user_id)
|
2019-07-12 08:25:12 -04:00
|
|
|
session_ids_and_entries = session_ids.zip(entries)
|
|
|
|
|
|
|
|
inactive_session_ids = session_ids_and_entries.map do |session_id, session|
|
|
|
|
session_id if session.nil?
|
|
|
|
end.compact
|
|
|
|
|
|
|
|
redis.pipelined do |conn|
|
|
|
|
inactive_session_ids.each do |session_id|
|
|
|
|
conn.srem(key, session_id)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if inactive_session_ids
|
|
|
|
puts "deleted #{inactive_session_ids.count} out of #{lookup_key_count} lookup keys for User ##{user_id}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end while cursor.to_i != 0
|
|
|
|
|
|
|
|
puts "--- All done! Total number of scanned users: #{total_users_scanned}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-07-26 17:23:33 -04:00
|
|
|
def remove?
|
|
|
|
ENV['REMOVE'] == 'true'
|
|
|
|
end
|
|
|
|
|
|
|
|
def dry_run?
|
|
|
|
ENV['DRY_RUN'] != 'false'
|
|
|
|
end
|
|
|
|
|
2019-06-13 17:07:59 -04:00
|
|
|
def debug?
|
|
|
|
ENV['DEBUG'].present?
|
|
|
|
end
|
|
|
|
|
|
|
|
def niceness
|
|
|
|
ENV['NICENESS'].presence
|
|
|
|
end
|
|
|
|
|
2020-03-23 08:09:47 -04:00
|
|
|
def find_project
|
|
|
|
if ENV['PROJECT_ID']
|
|
|
|
Project.find_by_id(ENV['PROJECT_ID']&.to_i)
|
|
|
|
elsif ENV['PROJECT_PATH']
|
|
|
|
Project.find_by_full_path(ENV['PROJECT_PATH'])
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-09-09 23:08:56 -04:00
|
|
|
# rubocop:disable Gitlab/RailsLogger
|
2018-07-26 17:23:33 -04:00
|
|
|
def logger
|
|
|
|
return @logger if defined?(@logger)
|
|
|
|
|
|
|
|
@logger = if Rails.env.development? || Rails.env.production?
|
2021-06-08 11:10:00 -04:00
|
|
|
Logger.new($stdout).tap do |stdout_logger|
|
2020-09-09 23:08:56 -04:00
|
|
|
stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger))
|
2019-06-13 17:07:59 -04:00
|
|
|
stdout_logger.level = debug? ? Logger::DEBUG : Logger::INFO
|
2018-07-26 17:23:33 -04:00
|
|
|
end
|
|
|
|
else
|
2020-09-09 23:08:56 -04:00
|
|
|
Rails.logger
|
2018-07-26 17:23:33 -04:00
|
|
|
end
|
|
|
|
end
|
2020-09-09 23:08:56 -04:00
|
|
|
# rubocop:enable Gitlab/RailsLogger
|
2012-12-24 23:14:05 -05:00
|
|
|
end
|
|
|
|
end
|