From 63091cfe6432b2290f6ccd0c1e8105c8900b9df5 Mon Sep 17 00:00:00 2001 From: Shinya Maeda Date: Thu, 16 Aug 2018 14:28:47 +0000 Subject: [PATCH] Add rake command to migrate archived traces from local storage to object storage --- app/models/ci/build.rb | 5 + ...rate-locally-persisted-archived-traces.yml | 5 + doc/administration/job_traces.md | 54 +++++++- lib/tasks/gitlab/traces.rake | 17 +++ spec/tasks/gitlab/traces_rake_spec.rb | 122 +++++++++++++----- 5 files changed, 164 insertions(+), 39 deletions(-) create mode 100644 changelogs/unreleased/add-rake-command-to-migrate-locally-persisted-archived-traces.yml diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 3c69677baf0..faa160ad6ba 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -67,6 +67,10 @@ module Ci '', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive) end + scope :with_archived_trace, ->() do + where('EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace) + end + scope :without_archived_trace, ->() do where('NOT EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace) end @@ -77,6 +81,7 @@ module Ci end scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) } + scope :with_archived_trace_stored_locally, -> { with_archived_trace.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) } scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) } scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) } scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) } diff --git a/changelogs/unreleased/add-rake-command-to-migrate-locally-persisted-archived-traces.yml b/changelogs/unreleased/add-rake-command-to-migrate-locally-persisted-archived-traces.yml new file mode 100644 index 00000000000..b82344e3c9c --- /dev/null +++ b/changelogs/unreleased/add-rake-command-to-migrate-locally-persisted-archived-traces.yml @@ -0,0 +1,5 @@ +--- +title: Add rake command to migrate archived traces from local storage to object storage +merge_request: 21193 +author: +type: added diff --git a/doc/administration/job_traces.md b/doc/administration/job_traces.md index 24d1a3fd151..6e2f67f61bc 100644 --- a/doc/administration/job_traces.md +++ b/doc/administration/job_traces.md @@ -3,10 +3,6 @@ Job traces are sent by GitLab Runner while it's processing a job. You can see traces in job pages, pipelines, email notifications, etc. -There isn't a way to automatically expire old job logs, but it's safe to remove -them if they're taking up too much space. If you remove the logs manually, the -job output in the UI will be empty. - ## Data flow In general, there are two states in job traces: "live trace" and "archived trace". @@ -57,11 +53,55 @@ To change the location where the job logs will be stored, follow the steps below ## Uploading traces to object storage -An archived trace is considered as a [job artifact](job_artifacts.md). -Therefore, when you [set up an object storage](job_artifacts.md#object-storage-settings), +Archived traces are considered as [job artifacts](job_artifacts.md). +Therefore, when you [set up the object storage integration](job_artifacts.md#object-storage-settings), job traces are automatically migrated to it along with the other job artifacts. -See [Data flow](#data-flow) to learn about the process. +See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process. + +## How to archive legacy job trace files + +Legacy job traces, which were created before GitLab 10.5, were not archived regularly. +It's the same state with the "2: overwriting" in the above [Data flow](#data-flow). +To archive those legacy job traces, please follow the instruction below. + +1. Execute the following command + + ```bash + gitlab-rake gitlab:traces:archive + ``` + + After you executed this task, GitLab instance queues up Sidekiq jobs (asynchronous processes) + for migrating job trace files from local storage to object storage. + It could take time to complete the all migration jobs. You can check the progress by the following command + + ```bash + sudo gitlab-rails console + ``` + + ```bash + [1] pry(main)> Sidekiq::Stats.new.queues['pipeline_background:archive_trace'] + => 100 + ``` + + If the count becomes zero, the archiving processes are done + +## How to migrate archived job traces to object storage + +If job traces have already been archived into local storage, and you want to migrate those traces to object storage, please follow the instruction below. + +1. Ensure [Object storage integration for Job Artifacts](job_artifacts.md#object-storage-settings) is enabled +1. Execute the following command + + ```bash + gitlab-rake gitlab:traces:migrate + ``` + +## How to remove job traces + +There isn't a way to automatically expire old job logs, but it's safe to remove +them if they're taking up too much space. If you remove the logs manually, the +job output in the UI will be empty. ## New live trace architecture diff --git a/lib/tasks/gitlab/traces.rake b/lib/tasks/gitlab/traces.rake index ddcca69711f..5a232091a7e 100644 --- a/lib/tasks/gitlab/traces.rake +++ b/lib/tasks/gitlab/traces.rake @@ -18,5 +18,22 @@ namespace :gitlab do logger.info("Scheduled #{job_ids.count} jobs. From #{job_ids.min} to #{job_ids.max}") end end + + task migrate: :environment do + logger = Logger.new(STDOUT) + logger.info('Starting transfer of job traces') + + Ci::Build.joins(:project) + .with_archived_trace_stored_locally + .find_each(batch_size: 10) do |build| + begin + build.job_artifacts_trace.file.migrate!(ObjectStorage::Store::REMOTE) + + logger.info("Transferred job trace of #{build.id} to object storage") + rescue => e + logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}") + end + end + end end end diff --git a/spec/tasks/gitlab/traces_rake_spec.rb b/spec/tasks/gitlab/traces_rake_spec.rb index bd18e8ffc1e..aaf0d7242dd 100644 --- a/spec/tasks/gitlab/traces_rake_spec.rb +++ b/spec/tasks/gitlab/traces_rake_spec.rb @@ -5,51 +5,109 @@ describe 'gitlab:traces rake tasks' do Rake.application.rake_require 'tasks/gitlab/traces' end - shared_examples 'passes the job id to worker' do - it do - expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]]) + describe 'gitlab:traces:archive' do + shared_examples 'passes the job id to worker' do + it do + expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]]) - run_rake_task('gitlab:traces:archive') + run_rake_task('gitlab:traces:archive') + end + end + + shared_examples 'does not pass the job id to worker' do + it do + expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async) + + run_rake_task('gitlab:traces:archive') + end + end + + context 'when trace file stored in default path' do + let!(:job) { create(:ci_build, :success, :trace_live) } + + it_behaves_like 'passes the job id to worker' + end + + context 'when trace is stored in database' do + let!(:job) { create(:ci_build, :success) } + + before do + job.update_column(:trace, 'trace in db') + end + + it_behaves_like 'passes the job id to worker' + end + + context 'when job has trace artifact' do + let!(:job) { create(:ci_build, :success) } + + before do + create(:ci_job_artifact, :trace, job: job) + end + + it_behaves_like 'does not pass the job id to worker' + end + + context 'when job is not finished yet' do + let!(:build) { create(:ci_build, :running, :trace_live) } + + it_behaves_like 'does not pass the job id to worker' end end - shared_examples 'does not pass the job id to worker' do - it do - expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async) - - run_rake_task('gitlab:traces:archive') - end - end - - context 'when trace file stored in default path' do - let!(:job) { create(:ci_build, :success, :trace_live) } - - it_behaves_like 'passes the job id to worker' - end - - context 'when trace is stored in database' do - let!(:job) { create(:ci_build, :success) } + describe 'gitlab:traces:migrate' do + let(:object_storage_enabled) { false } before do - job.update_column(:trace, 'trace in db') + stub_artifacts_object_storage(enabled: object_storage_enabled) end - it_behaves_like 'passes the job id to worker' - end + subject { run_rake_task('gitlab:traces:migrate') } - context 'when job has trace artifact' do - let!(:job) { create(:ci_build, :success) } + let!(:job_trace) { create(:ci_job_artifact, :trace, file_store: store) } - before do - create(:ci_job_artifact, :trace, job: job) + context 'when local storage is used' do + let(:store) { ObjectStorage::Store::LOCAL } + + context 'and job does not have file store defined' do + let(:object_storage_enabled) { true } + let(:store) { nil } + + it "migrates file to remote storage" do + subject + + expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE) + end + end + + context 'and remote storage is defined' do + let(:object_storage_enabled) { true } + + it "migrates file to remote storage" do + subject + + expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE) + end + end + + context 'and remote storage is not defined' do + it "fails to migrate to remote storage" do + subject + + expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::LOCAL) + end + end end - it_behaves_like 'does not pass the job id to worker' - end + context 'when remote storage is used' do + let(:object_storage_enabled) { true } + let(:store) { ObjectStorage::Store::REMOTE } - context 'when job is not finished yet' do - let!(:build) { create(:ci_build, :running, :trace_live) } + it "file stays on remote storage" do + subject - it_behaves_like 'does not pass the job id to worker' + expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE) + end + end end end