Add rake command to migrate archived traces from local storage to object storage
This commit is contained in:
parent
e6da699e15
commit
63091cfe64
5 changed files with 164 additions and 39 deletions
|
@ -67,6 +67,10 @@ module Ci
|
|||
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive)
|
||||
end
|
||||
|
||||
scope :with_archived_trace, ->() do
|
||||
where('EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
|
||||
end
|
||||
|
||||
scope :without_archived_trace, ->() do
|
||||
where('NOT EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
|
||||
end
|
||||
|
@ -77,6 +81,7 @@ module Ci
|
|||
end
|
||||
|
||||
scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
|
||||
scope :with_archived_trace_stored_locally, -> { with_archived_trace.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
|
||||
scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
|
||||
scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) }
|
||||
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
title: Add rake command to migrate archived traces from local storage to object storage
|
||||
merge_request: 21193
|
||||
author:
|
||||
type: added
|
|
@ -3,10 +3,6 @@
|
|||
Job traces are sent by GitLab Runner while it's processing a job. You can see
|
||||
traces in job pages, pipelines, email notifications, etc.
|
||||
|
||||
There isn't a way to automatically expire old job logs, but it's safe to remove
|
||||
them if they're taking up too much space. If you remove the logs manually, the
|
||||
job output in the UI will be empty.
|
||||
|
||||
## Data flow
|
||||
|
||||
In general, there are two states in job traces: "live trace" and "archived trace".
|
||||
|
@ -57,11 +53,55 @@ To change the location where the job logs will be stored, follow the steps below
|
|||
|
||||
## Uploading traces to object storage
|
||||
|
||||
An archived trace is considered as a [job artifact](job_artifacts.md).
|
||||
Therefore, when you [set up an object storage](job_artifacts.md#object-storage-settings),
|
||||
Archived traces are considered as [job artifacts](job_artifacts.md).
|
||||
Therefore, when you [set up the object storage integration](job_artifacts.md#object-storage-settings),
|
||||
job traces are automatically migrated to it along with the other job artifacts.
|
||||
|
||||
See [Data flow](#data-flow) to learn about the process.
|
||||
See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process.
|
||||
|
||||
## How to archive legacy job trace files
|
||||
|
||||
Legacy job traces, which were created before GitLab 10.5, were not archived regularly.
|
||||
It's the same state with the "2: overwriting" in the above [Data flow](#data-flow).
|
||||
To archive those legacy job traces, please follow the instruction below.
|
||||
|
||||
1. Execute the following command
|
||||
|
||||
```bash
|
||||
gitlab-rake gitlab:traces:archive
|
||||
```
|
||||
|
||||
After you executed this task, GitLab instance queues up Sidekiq jobs (asynchronous processes)
|
||||
for migrating job trace files from local storage to object storage.
|
||||
It could take time to complete the all migration jobs. You can check the progress by the following command
|
||||
|
||||
```bash
|
||||
sudo gitlab-rails console
|
||||
```
|
||||
|
||||
```bash
|
||||
[1] pry(main)> Sidekiq::Stats.new.queues['pipeline_background:archive_trace']
|
||||
=> 100
|
||||
```
|
||||
|
||||
If the count becomes zero, the archiving processes are done
|
||||
|
||||
## How to migrate archived job traces to object storage
|
||||
|
||||
If job traces have already been archived into local storage, and you want to migrate those traces to object storage, please follow the instruction below.
|
||||
|
||||
1. Ensure [Object storage integration for Job Artifacts](job_artifacts.md#object-storage-settings) is enabled
|
||||
1. Execute the following command
|
||||
|
||||
```bash
|
||||
gitlab-rake gitlab:traces:migrate
|
||||
```
|
||||
|
||||
## How to remove job traces
|
||||
|
||||
There isn't a way to automatically expire old job logs, but it's safe to remove
|
||||
them if they're taking up too much space. If you remove the logs manually, the
|
||||
job output in the UI will be empty.
|
||||
|
||||
## New live trace architecture
|
||||
|
||||
|
|
|
@ -18,5 +18,22 @@ namespace :gitlab do
|
|||
logger.info("Scheduled #{job_ids.count} jobs. From #{job_ids.min} to #{job_ids.max}")
|
||||
end
|
||||
end
|
||||
|
||||
task migrate: :environment do
|
||||
logger = Logger.new(STDOUT)
|
||||
logger.info('Starting transfer of job traces')
|
||||
|
||||
Ci::Build.joins(:project)
|
||||
.with_archived_trace_stored_locally
|
||||
.find_each(batch_size: 10) do |build|
|
||||
begin
|
||||
build.job_artifacts_trace.file.migrate!(ObjectStorage::Store::REMOTE)
|
||||
|
||||
logger.info("Transferred job trace of #{build.id} to object storage")
|
||||
rescue => e
|
||||
logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,51 +5,109 @@ describe 'gitlab:traces rake tasks' do
|
|||
Rake.application.rake_require 'tasks/gitlab/traces'
|
||||
end
|
||||
|
||||
shared_examples 'passes the job id to worker' do
|
||||
it do
|
||||
expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]])
|
||||
describe 'gitlab:traces:archive' do
|
||||
shared_examples 'passes the job id to worker' do
|
||||
it do
|
||||
expect(ArchiveTraceWorker).to receive(:bulk_perform_async).with([[job.id]])
|
||||
|
||||
run_rake_task('gitlab:traces:archive')
|
||||
run_rake_task('gitlab:traces:archive')
|
||||
end
|
||||
end
|
||||
|
||||
shared_examples 'does not pass the job id to worker' do
|
||||
it do
|
||||
expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async)
|
||||
|
||||
run_rake_task('gitlab:traces:archive')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when trace file stored in default path' do
|
||||
let!(:job) { create(:ci_build, :success, :trace_live) }
|
||||
|
||||
it_behaves_like 'passes the job id to worker'
|
||||
end
|
||||
|
||||
context 'when trace is stored in database' do
|
||||
let!(:job) { create(:ci_build, :success) }
|
||||
|
||||
before do
|
||||
job.update_column(:trace, 'trace in db')
|
||||
end
|
||||
|
||||
it_behaves_like 'passes the job id to worker'
|
||||
end
|
||||
|
||||
context 'when job has trace artifact' do
|
||||
let!(:job) { create(:ci_build, :success) }
|
||||
|
||||
before do
|
||||
create(:ci_job_artifact, :trace, job: job)
|
||||
end
|
||||
|
||||
it_behaves_like 'does not pass the job id to worker'
|
||||
end
|
||||
|
||||
context 'when job is not finished yet' do
|
||||
let!(:build) { create(:ci_build, :running, :trace_live) }
|
||||
|
||||
it_behaves_like 'does not pass the job id to worker'
|
||||
end
|
||||
end
|
||||
|
||||
shared_examples 'does not pass the job id to worker' do
|
||||
it do
|
||||
expect(ArchiveTraceWorker).not_to receive(:bulk_perform_async)
|
||||
|
||||
run_rake_task('gitlab:traces:archive')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when trace file stored in default path' do
|
||||
let!(:job) { create(:ci_build, :success, :trace_live) }
|
||||
|
||||
it_behaves_like 'passes the job id to worker'
|
||||
end
|
||||
|
||||
context 'when trace is stored in database' do
|
||||
let!(:job) { create(:ci_build, :success) }
|
||||
describe 'gitlab:traces:migrate' do
|
||||
let(:object_storage_enabled) { false }
|
||||
|
||||
before do
|
||||
job.update_column(:trace, 'trace in db')
|
||||
stub_artifacts_object_storage(enabled: object_storage_enabled)
|
||||
end
|
||||
|
||||
it_behaves_like 'passes the job id to worker'
|
||||
end
|
||||
subject { run_rake_task('gitlab:traces:migrate') }
|
||||
|
||||
context 'when job has trace artifact' do
|
||||
let!(:job) { create(:ci_build, :success) }
|
||||
let!(:job_trace) { create(:ci_job_artifact, :trace, file_store: store) }
|
||||
|
||||
before do
|
||||
create(:ci_job_artifact, :trace, job: job)
|
||||
context 'when local storage is used' do
|
||||
let(:store) { ObjectStorage::Store::LOCAL }
|
||||
|
||||
context 'and job does not have file store defined' do
|
||||
let(:object_storage_enabled) { true }
|
||||
let(:store) { nil }
|
||||
|
||||
it "migrates file to remote storage" do
|
||||
subject
|
||||
|
||||
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
|
||||
end
|
||||
end
|
||||
|
||||
context 'and remote storage is defined' do
|
||||
let(:object_storage_enabled) { true }
|
||||
|
||||
it "migrates file to remote storage" do
|
||||
subject
|
||||
|
||||
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
|
||||
end
|
||||
end
|
||||
|
||||
context 'and remote storage is not defined' do
|
||||
it "fails to migrate to remote storage" do
|
||||
subject
|
||||
|
||||
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::LOCAL)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it_behaves_like 'does not pass the job id to worker'
|
||||
end
|
||||
context 'when remote storage is used' do
|
||||
let(:object_storage_enabled) { true }
|
||||
let(:store) { ObjectStorage::Store::REMOTE }
|
||||
|
||||
context 'when job is not finished yet' do
|
||||
let!(:build) { create(:ci_build, :running, :trace_live) }
|
||||
it "file stays on remote storage" do
|
||||
subject
|
||||
|
||||
it_behaves_like 'does not pass the job id to worker'
|
||||
expect(job_trace.reload.file_store).to eq(ObjectStorage::Store::REMOTE)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue