Add rake command to migrate archived traces from local storage to object storage

This commit is contained in:
Shinya Maeda 2018-08-16 14:28:47 +00:00 committed by Kamil Trzciński
parent e6da699e15
commit 63091cfe64
5 changed files with 164 additions and 39 deletions

View file

@ -67,6 +67,10 @@ module Ci
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive)
end
scope :with_archived_trace, ->() do
where('EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
end
scope :without_archived_trace, ->() do
where('NOT EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
end
@ -77,6 +81,7 @@ module Ci
end
scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :with_archived_trace_stored_locally, -> { with_archived_trace.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) }
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }

View file

@ -0,0 +1,5 @@
---
title: Add rake command to migrate archived traces from local storage to object storage
merge_request: 21193
author:
type: added

View file

@ -3,10 +3,6 @@
Job traces are sent by GitLab Runner while it's processing a job. You can see
traces in job pages, pipelines, email notifications, etc.
There isn't a way to automatically expire old job logs, but it's safe to remove
them if they're taking up too much space. If you remove the logs manually, the
job output in the UI will be empty.
## Data flow
In general, there are two states in job traces: "live trace" and "archived trace".
@ -57,11 +53,55 @@ To change the location where the job logs will be stored, follow the steps below
## Uploading traces to object storage
An archived trace is considered as a [job artifact](job_artifacts.md).
Therefore, when you [set up an object storage](job_artifacts.md#object-storage-settings),
Archived traces are considered as [job artifacts](job_artifacts.md).
Therefore, when you [set up the object storage integration](job_artifacts.md#object-storage-settings),
job traces are automatically migrated to it along with the other job artifacts.
See [Data flow](#data-flow) to learn about the process.
See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process.
## How to archive legacy job trace files
Legacy job traces, which were created before GitLab 10.5, were not archived regularly.
It's the same state with the "2: overwriting" in the above [Data flow](#data-flow).
To archive those legacy job traces, please follow the instruction below.
1. Execute the following command
```bash
gitlab-rake gitlab:traces:archive
```
After you executed this task, GitLab instance queues up Sidekiq jobs (asynchronous processes)
for migrating job trace files from local storage to object storage.
It could take time to complete the all migration jobs. You can check the progress by the following command
```bash
sudo gitlab-rails console
```
```bash
[1] pry(main)> Sidekiq::Stats.new.queues['pipeline_background:archive_trace']
=> 100
```
If the count becomes zero, the archiving processes are done
## How to migrate archived job traces to object storage
If job traces have already been archived into local storage, and you want to migrate those traces to object storage, please follow the instruction below.
1. Ensure [Object storage integration for Job Artifacts](job_artifacts.md#object-storage-settings) is enabled
1. Execute the following command
```bash
gitlab-rake gitlab:traces:migrate
```
## How to remove job traces
There isn't a way to automatically expire old job logs, but it's safe to remove
them if they're taking up too much space. If you remove the logs manually, the
job output in the UI will be empty.
## New live trace architecture

View file

@ -18,5 +18,22 @@ namespace :gitlab do
logger.info("Scheduled #{job_ids.count} jobs. From #{job_ids.min} to #{job_ids.max}")
end
end
task migrate: :environment do
logger = Logger.new(STDOUT)
logger.info('Starting transfer of job traces')
Ci::Build.joins(:project)
.with_archived_trace_stored_locally
.find_each(batch_size: 10) do |build|
begin
build.job_artifacts_trace.file.migrate!(ObjectStorage::Store::REMOTE)
logger.info("Transferred job trace of #{build.id} to object storage")
rescue => e
logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}")
end
end
end
end
end

View file

@ -5,51 +5,109 @@ describe 'gitlab:traces rake tasks' do
Rake.application.rake_require 'tasks/gitlab/traces'
end
shared_examples 'passes the job id to worker' do
it do
expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]])
describe 'gitlab:traces:archive' do
shared_examples 'passes the job id to worker' do
it do
expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]])
run_rake_task('gitlab:traces:archive')
run_rake_task('gitlab:traces:archive')
end
end
shared_examples 'does not pass the job id to worker' do
it do
expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async)
run_rake_task('gitlab:traces:archive')
end
end
context 'when trace file stored in default path' do
let!(:job) { create(:ci_build, :success, :trace_live) }
it_behaves_like 'passes the job id to worker'
end
context 'when trace is stored in database' do
let!(:job) { create(:ci_build, :success) }
before do
job.update_column(:trace, 'trace in db')
end
it_behaves_like 'passes the job id to worker'
end
context 'when job has trace artifact' do
let!(:job) { create(:ci_build, :success) }
before do
create(:ci_job_artifact, :trace, job: job)
end
it_behaves_like 'does not pass the job id to worker'
end
context 'when job is not finished yet' do
let!(:build) { create(:ci_build, :running, :trace_live) }
it_behaves_like 'does not pass the job id to worker'
end
end
shared_examples 'does not pass the job id to worker' do
it do
expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async)
run_rake_task('gitlab:traces:archive')
end
end
context 'when trace file stored in default path' do
let!(:job) { create(:ci_build, :success, :trace_live) }
it_behaves_like 'passes the job id to worker'
end
context 'when trace is stored in database' do
let!(:job) { create(:ci_build, :success) }
describe 'gitlab:traces:migrate' do
let(:object_storage_enabled) { false }
before do
job.update_column(:trace, 'trace in db')
stub_artifacts_object_storage(enabled: object_storage_enabled)
end
it_behaves_like 'passes the job id to worker'
end
subject { run_rake_task('gitlab:traces:migrate') }
context 'when job has trace artifact' do
let!(:job) { create(:ci_build, :success) }
let!(:job_trace) { create(:ci_job_artifact, :trace, file_store: store) }
before do
create(:ci_job_artifact, :trace, job: job)
context 'when local storage is used' do
let(:store) { ObjectStorage::Store::LOCAL }
context 'and job does not have file store defined' do
let(:object_storage_enabled) { true }
let(:store) { nil }
it "migrates file to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
context 'and remote storage is defined' do
let(:object_storage_enabled) { true }
it "migrates file to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
context 'and remote storage is not defined' do
it "fails to migrate to remote storage" do
subject
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::LOCAL)
end
end
end
it_behaves_like 'does not pass the job id to worker'
end
context 'when remote storage is used' do
let(:object_storage_enabled) { true }
let(:store) { ObjectStorage::Store::REMOTE }
context 'when job is not finished yet' do
let!(:build) { create(:ci_build, :running, :trace_live) }
it "file stays on remote storage" do
subject
it_behaves_like 'does not pass the job id to worker'
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
end
end
end
end