diff --git a/changelogs/unreleased/tc-rake-orphan-artifacts.yml b/changelogs/unreleased/tc-rake-orphan-artifacts.yml new file mode 100644 index 00000000000..7081bee640a --- /dev/null +++ b/changelogs/unreleased/tc-rake-orphan-artifacts.yml @@ -0,0 +1,5 @@ +--- +title: Add rake task to clean orphan artifact files +merge_request: 29681 +author: +type: added diff --git a/doc/administration/geo/replication/troubleshooting.md b/doc/administration/geo/replication/troubleshooting.md index 5394e6dd763..5bd6cc81362 100644 --- a/doc/administration/geo/replication/troubleshooting.md +++ b/doc/administration/geo/replication/troubleshooting.md @@ -504,6 +504,15 @@ To resolve this, run the following command: sudo gitlab-rake geo:db:refresh_foreign_tables ``` +## Expired artifacts + +If you notice for some reason there are more artifacts on the Geo +secondary node than on the Geo primary node, you can use the rake task +to [cleanup orphan artifact files](../../../raketasks/cleanup.md#remove-orphan-artifact-files). + +On a Geo **secondary** node, this command will also clean up all Geo +registry record related to the orphan files on disk. + ## Fixing common errors This section documents common errors reported in the Admin UI and how to fix them. diff --git a/doc/raketasks/cleanup.md b/doc/raketasks/cleanup.md index f5c788af578..f880f31c39e 100644 --- a/doc/raketasks/cleanup.md +++ b/doc/raketasks/cleanup.md @@ -92,3 +92,48 @@ I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote up I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg ``` + +## Remove orphan artifact files + +When you notice there are more job artifacts files on disk than there +should be, you can run: + +```shell +gitlab-rake gitlab:cleanup:orphan_job_artifact_files +``` + +This command: + +- Scans through the entire artifacts folder. +- Checks which files still have a record in the database. +- If no database record is found, the file is deleted from disk. + +By default, this task does not delete anything but shows what it can +delete. Run the command with `DRY_RUN=false` if you actually want to +delete the files: + +```shell +gitlab-rake gitlab:cleanup:orphan_job_artifact_files DRY_RUN=false +``` + +You can also limit the number of files to delete with `LIMIT`: + +```shell +gitlab-rake gitlab:cleanup:orphan_job_artifact_files LIMIT=100` +``` + +This will only delete up to 100 files from disk. You can use this to +delete a small set for testing purposes. + +If you provide `DEBUG=1`, you'll see the full path of every file that +is detected as being an orphan. + +If `ionice` is installed, the tasks uses it to ensure the command is +not causing too much load on the disk. You can configure the niceness +level with `NICENESS`. Below are the valid levels, but consult +`man 1 ionice` to be sure. + +- `0` or `None` +- `1` or `Realtime` +- `2` or `Best-effort` (default) +- `3` or `Idle` diff --git a/lib/gitlab/cleanup/orphan_job_artifact_files.rb b/lib/gitlab/cleanup/orphan_job_artifact_files.rb new file mode 100644 index 00000000000..ee7164b3e55 --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_files.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class OrphanJobArtifactFiles + include Gitlab::Utils::StrongMemoize + + ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze + LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze + BATCH_SIZE = 500 + DEFAULT_NICENESS = 'Best-effort' + + attr_accessor :batch, :total_found, :total_cleaned + attr_reader :limit, :dry_run, :niceness, :logger + + def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil) + @limit = limit + @dry_run = dry_run + @niceness = niceness || DEFAULT_NICENESS + @logger = logger || Rails.logger + @total_found = @total_cleaned = 0 + + new_batch! + end + + def run! + log_info('Looking for orphan job artifacts to clean up') + + find_artifacts do |artifact_file| + batch << artifact_file + + clean_batch! if batch.full? + break if limit_reached? + end + + clean_batch! + + log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.") + end + + private + + def new_batch! + self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch + .new(batch_size: batch_size, logger: logger, dry_run: dry_run) + end + + def clean_batch! + batch.clean! + + update_stats!(batch) + + new_batch! + end + + def update_stats!(batch) + self.total_found += batch.artifact_files.count + self.total_cleaned += batch.lost_and_found.count + end + + def limit_reached? + return false unless limit + + total_cleaned >= limit + end + + def batch_size + return BATCH_SIZE unless limit + return if limit_reached? + + todo = limit - total_cleaned + [BATCH_SIZE, todo].min + end + + def find_artifacts + Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread| + stdout.each_line do |line| + yield line + end + + log_error(stderr.read.color(:red)) unless status_thread.value.success? + end + end + + def find_command + strong_memoize(:find_command) do + cmd = %W[find -L #{absolute_artifact_dir}] + + # Search for Job Artifact IDs, they are found 6 directory + # levels deep. For example: + # shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log + # 1 2 3 4 5 6 + # | | | ^- date | ^- Job Artifact ID + # | | | ^- Job ID + # ^--+--+- components of hashed storage project path + cmd += %w[-mindepth 6 -maxdepth 6] + + # Artifact directories are named on their ID + cmd += %w[-type d] + + if ionice + raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/) + + cmd.unshift(*%W[#{ionice} --class #{niceness}]) + end + + log_info("find command: '#{cmd.join(' ')}'") + + cmd + end + end + + def absolute_artifact_dir + File.absolute_path(ABSOLUTE_ARTIFACT_DIR) + end + + def ionice + strong_memoize(:ionice) do + Gitlab::Utils.which('ionice') + end + end + + def log_info(msg, params = {}) + logger.info("#{'[DRY RUN]' if dry_run} #{msg}") + end + + def log_error(msg, params = {}) + logger.error(msg) + end + end + end +end diff --git a/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb b/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb new file mode 100644 index 00000000000..5c30258c0fc --- /dev/null +++ b/lib/gitlab/cleanup/orphan_job_artifact_files_batch.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Gitlab + module Cleanup + class OrphanJobArtifactFilesBatch + BatchFull = Class.new(StandardError) + + class ArtifactFile + attr_accessor :path + + def initialize(path) + @path = path + end + + def artifact_id + path.split('/').last.to_i + end + end + + include Gitlab::Utils::StrongMemoize + + attr_reader :batch_size, :dry_run + attr_accessor :artifact_files + + def initialize(batch_size:, dry_run: true, logger: Rails.logger) + @batch_size = batch_size + @dry_run = dry_run + @logger = logger + @artifact_files = [] + end + + def clean! + return if artifact_files.empty? + + lost_and_found.each do |artifact| + clean_one!(artifact) + end + end + + def full? + artifact_files.count >= batch_size + end + + def <<(artifact_path) + raise BatchFull, "Batch full! Already contains #{artifact_files.count} artifacts" if full? + + artifact_files << ArtifactFile.new(artifact_path) + end + + def lost_and_found + strong_memoize(:lost_and_found) do + artifact_file_ids = artifact_files.map(&:artifact_id) + existing_artifact_ids = ::Ci::JobArtifact.id_in(artifact_file_ids).pluck_primary_key + + artifact_files.reject { |artifact| existing_artifact_ids.include?(artifact.artifact_id) } + end + end + + private + + def clean_one!(artifact_file) + log_debug("Found orphan job artifact file @ #{artifact_file.path}") + + remove_file!(artifact_file) unless dry_run + end + + def remove_file!(artifact_file) + FileUtils.rm_rf(artifact_file.path) + end + + def log_info(msg, params = {}) + @logger.info("#{'[DRY RUN]' if dry_run} #{msg}") + end + + def log_debug(msg, params = {}) + @logger.debug(msg) + end + end + end +end diff --git a/lib/tasks/gitlab/cleanup.rake b/lib/tasks/gitlab/cleanup.rake index 760331620ef..105ef417df3 100644 --- a/lib/tasks/gitlab/cleanup.rake +++ b/lib/tasks/gitlab/cleanup.rake @@ -115,6 +115,18 @@ namespace :gitlab do end end + desc 'GitLab | Cleanup | Clean orphan job artifact files' + task orphan_job_artifact_files: :gitlab_environment do + warn_user_is_not_gitlab + + cleaner = Gitlab::Cleanup::OrphanJobArtifactFiles.new(limit: limit, dry_run: dry_run?, niceness: niceness, logger: logger) + cleaner.run! + + if dry_run? + logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow) + end + end + def remove? ENV['REMOVE'] == 'true' end @@ -123,12 +135,25 @@ namespace :gitlab do ENV['DRY_RUN'] != 'false' end + def debug? + ENV['DEBUG'].present? + end + + def limit + ENV['LIMIT']&.to_i + end + + def niceness + ENV['NICENESS'].presence + end + def logger return @logger if defined?(@logger) @logger = if Rails.env.development? || Rails.env.production? Logger.new(STDOUT).tap do |stdout_logger| stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger)) + stdout_logger.level = debug? ? Logger::DEBUG : Logger::INFO end else Rails.logger diff --git a/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb new file mode 100644 index 00000000000..4d8edfeac80 --- /dev/null +++ b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Cleanup::OrphanJobArtifactFilesBatch do + let(:batch_size) { 10 } + let(:dry_run) { true } + + subject(:batch) { described_class.new(batch_size: batch_size, dry_run: dry_run) } + + context 'no dry run' do + let(:dry_run) { false } + + it 'deletes only orphan job artifacts from disk' do + job_artifact = create(:ci_job_artifact, :archive) + orphan_artifact = create(:ci_job_artifact, :archive) + batch << artifact_path(job_artifact) + batch << artifact_path(orphan_artifact) + orphan_artifact.delete + + batch.clean! + + expect(batch.artifact_files.count).to eq(2) + expect(batch.lost_and_found.count).to eq(1) + expect(batch.lost_and_found.first.artifact_id).to eq(orphan_artifact.id) + end + + it 'does not mix up job ID and artifact ID' do + # take maximum ID of both tables to avoid any collision + max_id = [Ci::Build.maximum(:id), Ci::JobArtifact.maximum(:id)].compact.max.to_i + job_a = create(:ci_build, id: max_id + 1) + job_b = create(:ci_build, id: max_id + 2) + # reuse the build IDs for the job artifact IDs, but swap them + job_artifact_b = create(:ci_job_artifact, :archive, job: job_b, id: max_id + 1) + job_artifact_a = create(:ci_job_artifact, :archive, job: job_a, id: max_id + 2) + + batch << artifact_path(job_artifact_a) + batch << artifact_path(job_artifact_b) + + job_artifact_b.delete + + batch.clean! + + expect(File.exist?(job_artifact_a.file.path)).to be_truthy + expect(File.exist?(job_artifact_b.file.path)).to be_falsey + end + end + + context 'with dry run' do + it 'does not remove files' do + job_artifact = create(:ci_job_artifact, :archive) + batch << job_artifact.file.path + job_artifact.delete + + expect(batch).not_to receive(:remove_file!) + + batch.clean! + + expect(File.exist?(job_artifact.file.path)).to be_truthy + end + end + + def artifact_path(job_artifact) + Pathname.new(job_artifact.file.path).parent.to_s + end +end diff --git a/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb new file mode 100644 index 00000000000..974cc2c4660 --- /dev/null +++ b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Cleanup::OrphanJobArtifactFiles do + let(:null_logger) { Logger.new('/dev/null') } + subject(:cleanup) { described_class.new(logger: null_logger) } + + before do + allow(null_logger).to receive(:info) + end + + it 'passes on dry_run' do + expect(Gitlab::Cleanup::OrphanJobArtifactFilesBatch) + .to receive(:new) + .with(dry_run: false, batch_size: anything, logger: anything) + .at_least(:once) + .and_call_original + + described_class.new(dry_run: false).run! + end + + it 'errors when invalid niceness is given' do + cleanup = described_class.new(logger: null_logger, niceness: 'FooBar') + + expect(null_logger).to receive(:error).with(/FooBar/) + + cleanup.run! + end + + it 'finds artifacts on disk' do + artifact = create(:ci_job_artifact, :archive) + + expect(cleanup).to receive(:find_artifacts).and_yield(artifact.file.path) + cleanup.run! + end + + it 'stops when limit is reached' do + cleanup = described_class.new(limit: 1) + + mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2') + + cleanup.run! + + expect(cleanup.total_found).to eq(1) + end + + it 'cleans even if batch is not full' do + mock_artifacts_found(cleanup, 'tmp/foo/bar/1') + + expect(cleanup).to receive(:clean_batch!).and_call_original + cleanup.run! + end + + it 'cleans in batches' do + stub_const("#{described_class.name}::BATCH_SIZE", 2) + mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2', 'tmp/foo/bar/3') + + expect(cleanup).to receive(:clean_batch!).twice.and_call_original + cleanup.run! + end + + def mock_artifacts_found(cleanup, *files) + mock = allow(cleanup).to receive(:find_artifacts) + + files.each { |file| mock.and_yield(file) } + end +end diff --git a/spec/tasks/gitlab/cleanup_rake_spec.rb b/spec/tasks/gitlab/cleanup_rake_spec.rb index 19794227d9f..92c094f08a4 100644 --- a/spec/tasks/gitlab/cleanup_rake_spec.rb +++ b/spec/tasks/gitlab/cleanup_rake_spec.rb @@ -156,4 +156,33 @@ describe 'gitlab:cleanup rake tasks' do end end end + + describe 'gitlab:cleanup:orphan_job_artifact_files' do + subject(:rake_task) { run_rake_task('gitlab:cleanup:orphan_job_artifact_files') } + + it 'runs the task without errors' do + expect(Gitlab::Cleanup::OrphanJobArtifactFiles) + .to receive(:new).and_call_original + + expect { rake_task }.not_to raise_error + end + + context 'with DRY_RUN set to false' do + before do + stub_env('DRY_RUN', 'false') + end + + it 'passes dry_run correctly' do + expect(Gitlab::Cleanup::OrphanJobArtifactFiles) + .to receive(:new) + .with(limit: anything, + dry_run: false, + niceness: anything, + logger: anything) + .and_call_original + + rake_task + end + end + end end