Squashed commit of the following:

commit 10456b1e9240886432f565dd17689080bbb133b9
Merge: 312c1a9bdf8 a5f4627857
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Thu Nov 29 14:33:21 2018 +0900

    Merge branch 'master-ce' into add-counter-for-trace-chunks

commit 312c1a9bdf8efc45c3fed5ff50f05cc589bbb4ed
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Wed Nov 28 20:06:18 2018 +0900

    Fix coding offence

commit e397cc2ccc1b2cf7f8b3558b8fa81fe2aa0ab366
Author: Shinya Maeda <shinya@gitlab.com>
Date:   Wed Nov 28 14:40:24 2018 +0900

    Fix tracking archive failure
This commit is contained in:
Shinya Maeda 2018-11-29 14:34:14 +09:00
parent a5f4627857
commit 3fbd48e127
6 changed files with 112 additions and 18 deletions

View file

@ -0,0 +1,25 @@
# frozen_string_literal: true
module Ci
class ArchiveTraceService
def execute(job)
job.trace.archive!
rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
# It's already archived, thus we can safely ignore this exception.
rescue => e
archive_error(e, job)
end
private
def failed_archive_counter
@failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
end
def archive_error(error, job)
failed_archive_counter.increment
Gitlab::Sentry.track_exception(error, issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502', extra: { job_id: job.id })
Rails.logger.error "Failed to archive trace. id: #{job.id} message: #{error.message}"
end
end
end

View file

@ -7,7 +7,7 @@ class ArchiveTraceWorker
# rubocop: disable CodeReuse/ActiveRecord
def perform(job_id)
Ci::Build.without_archived_trace.find_by(id: job_id).try do |job|
job.trace.archive!
Ci::ArchiveTraceService.new.execute(job)
end
end
# rubocop: enable CodeReuse/ActiveRecord

View file

@ -11,21 +11,9 @@ module Ci
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
begin
build.trace.archive!
rescue ::Gitlab::Ci::Trace::AlreadyArchivedError
rescue => e
failed_archive_counter.increment
Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
end
Ci::ArchiveTraceService.new.execute(build)
end
end
# rubocop: enable CodeReuse/ActiveRecord
private
def failed_archive_counter
@failed_archive_counter ||= Gitlab::Metrics.counter(:job_trace_archive_failed_total, "Counter of failed attempts of traces archiving")
end
end
end

View file

@ -0,0 +1,39 @@
require 'spec_helper'
describe Ci::ArchiveTraceService, '#execute' do
subject { described_class.new.execute(job) }
context 'when job is finished' do
let(:job) { create(:ci_build, :success, :trace_live) }
it 'creates an archived trace' do
expect { subject }.not_to raise_error
expect(job.reload.job_artifacts_trace).to be_exist
end
end
context 'when job is running' do
let(:job) { create(:ci_build, :running, :trace_live) }
it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(::Gitlab::Ci::Trace::ArchiveError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: job.id } ).once
expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{job.id} message: Job is not finished yet")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end
end
end

View file

@ -23,5 +23,33 @@ describe ArchiveTraceWorker do
subject
end
end
context 'when an unexpected exception happened during archiving' do
let!(:job) { create(:ci_build, :success, :trace_live) }
before do
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive_stream!).and_raise('Unexpected error')
end
it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(RuntimeError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: job.id } ).once
expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{job.id} message: Unexpected error")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end
end
end
end

View file

@ -46,13 +46,27 @@ describe Ci::ArchiveTracesCronWorker do
let!(:build) { create(:ci_build, :success, :trace_live) }
before do
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive!).and_raise('Unexpected error')
allow_any_instance_of(Gitlab::Ci::Trace).to receive(:archive_stream!).and_raise('Unexpected error')
end
it 'puts a log' do
expect(Rails.logger).to receive(:error).with("Failed to archive stale live trace. id: #{build.id} message: Unexpected error")
it 'increments Prometheus counter, sends crash report to Sentry and ignore an error for continuing to archive' do
expect(Gitlab::Sentry)
.to receive(:track_exception)
.with(RuntimeError,
issue_url: 'https://gitlab.com/gitlab-org/gitlab-ce/issues/51502',
extra: { job_id: build.id } ).once
subject
expect(Rails.logger)
.to receive(:error)
.with("Failed to archive trace. id: #{build.id} message: Unexpected error")
.and_call_original
expect(Gitlab::Metrics)
.to receive(:counter)
.with(:job_trace_archive_failed_total, "Counter of failed attempts of trace archiving")
.and_call_original
expect { subject }.not_to raise_error
end
end
end