From 867126130946a9d3c3da7ff8e64b59b14da13599 Mon Sep 17 00:00:00 2001 From: Stan Hu Date: Sun, 21 Jan 2018 15:34:09 -0800 Subject: [PATCH] Add a gRPC health check to ensure Gitaly is up This will enable Geo to skip shards that not operational. Relates to gitlab-org/gitlab-ee#4329 --- app/controllers/health_controller.rb | 3 +- .../unreleased/sh-add-gitaly-health-check.yml | 5 ++ lib/gitlab/gitaly_client.rb | 21 ++++++- .../gitaly_client/health_check_service.rb | 19 +++++++ lib/gitlab/health_checks/gitaly_check.rb | 53 +++++++++++++++++ .../health_check_service_spec.rb | 41 +++++++++++++ spec/lib/gitlab/gitaly_client_spec.rb | 25 ++++++++ .../gitlab/health_checks/gitaly_check_spec.rb | 57 +++++++++++++++++++ 8 files changed, 220 insertions(+), 4 deletions(-) create mode 100644 changelogs/unreleased/sh-add-gitaly-health-check.yml create mode 100644 lib/gitlab/gitaly_client/health_check_service.rb create mode 100644 lib/gitlab/health_checks/gitaly_check.rb create mode 100644 spec/lib/gitlab/gitaly_client/health_check_service_spec.rb create mode 100644 spec/lib/gitlab/health_checks/gitaly_check_spec.rb diff --git a/app/controllers/health_controller.rb b/app/controllers/health_controller.rb index a931b456a93..16abf7bab7e 100644 --- a/app/controllers/health_controller.rb +++ b/app/controllers/health_controller.rb @@ -8,7 +8,8 @@ class HealthController < ActionController::Base Gitlab::HealthChecks::Redis::CacheCheck, Gitlab::HealthChecks::Redis::QueuesCheck, Gitlab::HealthChecks::Redis::SharedStateCheck, - Gitlab::HealthChecks::FsShardsCheck + Gitlab::HealthChecks::FsShardsCheck, + Gitlab::HealthChecks::GitalyCheck ].freeze def readiness diff --git a/changelogs/unreleased/sh-add-gitaly-health-check.yml b/changelogs/unreleased/sh-add-gitaly-health-check.yml new file mode 100644 index 00000000000..32c4c5362b4 --- /dev/null +++ b/changelogs/unreleased/sh-add-gitaly-health-check.yml @@ -0,0 +1,5 @@ +--- +title: Add a gRPC health check to ensure Gitaly is up +merge_request: +author: +type: added diff --git a/lib/gitlab/gitaly_client.rb b/lib/gitlab/gitaly_client.rb index 4507ea923b4..6bd256f57c7 100644 --- a/lib/gitlab/gitaly_client.rb +++ b/lib/gitlab/gitaly_client.rb @@ -1,6 +1,8 @@ require 'base64' require 'gitaly' +require 'grpc/health/v1/health_pb' +require 'grpc/health/v1/health_services_pb' module Gitlab module GitalyClient @@ -69,14 +71,27 @@ module Gitlab @stubs ||= {} @stubs[storage] ||= {} @stubs[storage][name] ||= begin - klass = Gitaly.const_get(name.to_s.camelcase.to_sym).const_get(:Stub) - addr = address(storage) - addr = addr.sub(%r{^tcp://}, '') if URI(addr).scheme == 'tcp' + klass = stub_class(name) + addr = stub_address(storage) klass.new(addr, :this_channel_is_insecure) end end end + def self.stub_class(name) + if name == :health_check + Grpc::Health::V1::Health::Stub + else + Gitaly.const_get(name.to_s.camelcase.to_sym).const_get(:Stub) + end + end + + def self.stub_address(storage) + addr = address(storage) + addr = addr.sub(%r{^tcp://}, '') if URI(addr).scheme == 'tcp' + addr + end + def self.clear_stubs! MUTEX.synchronize do @stubs = nil diff --git a/lib/gitlab/gitaly_client/health_check_service.rb b/lib/gitlab/gitaly_client/health_check_service.rb new file mode 100644 index 00000000000..6c1213f5e20 --- /dev/null +++ b/lib/gitlab/gitaly_client/health_check_service.rb @@ -0,0 +1,19 @@ +module Gitlab + module GitalyClient + class HealthCheckService + def initialize(storage) + @storage = storage + end + + # Sends a gRPC health ping to the Gitaly server for the storage shard. + def check + request = Grpc::Health::V1::HealthCheckRequest.new + response = GitalyClient.call(@storage, :health_check, :check, request, timeout: GitalyClient.fast_timeout) + + { success: response&.status == :SERVING } + rescue GRPC::BadStatus => e + { success: false, message: e.to_s } + end + end + end +end diff --git a/lib/gitlab/health_checks/gitaly_check.rb b/lib/gitlab/health_checks/gitaly_check.rb new file mode 100644 index 00000000000..11416c002e3 --- /dev/null +++ b/lib/gitlab/health_checks/gitaly_check.rb @@ -0,0 +1,53 @@ +module Gitlab + module HealthChecks + class GitalyCheck + extend BaseAbstractCheck + + METRIC_PREFIX = 'gitaly_health_check'.freeze + + class << self + def readiness + repository_storages.map do |storage_name| + check(storage_name) + end + end + + def metrics + repository_storages.flat_map do |storage_name| + result, elapsed = with_timing { check(storage_name) } + labels = { shard: storage_name } + + [ + metric("#{metric_prefix}_success", successful?(result) ? 1 : 0, **labels), + metric("#{metric_prefix}_latency_seconds", elapsed, **labels) + ].flatten + end + end + + def check(storage_name) + serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name) + result = serv.check + HealthChecks::Result.new(result[:success], result[:message], shard: storage_name) + end + + private + + def metric_prefix + METRIC_PREFIX + end + + def successful?(result) + result[:success] + end + + def repository_storages + storages.keys + end + + def storages + Gitlab.config.repositories.storages + end + end + end + end +end diff --git a/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb b/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb new file mode 100644 index 00000000000..2c7e5eb5787 --- /dev/null +++ b/spec/lib/gitlab/gitaly_client/health_check_service_spec.rb @@ -0,0 +1,41 @@ +require 'spec_helper' + +describe Gitlab::GitalyClient::HealthCheckService do + let(:project) { create(:project) } + let(:storage_name) { project.repository_storage } + + subject { described_class.new(storage_name) } + + describe '#check' do + it 'successfully sends a health check request' do + expect(Gitlab::GitalyClient).to receive(:call).with( + storage_name, + :health_check, + :check, + instance_of(Grpc::Health::V1::HealthCheckRequest), + timeout: Gitlab::GitalyClient.fast_timeout).and_call_original + + expect(subject.check).to eq({ success: true }) + end + + it 'receives an unsuccessful health check request' do + expect_any_instance_of(Grpc::Health::V1::Health::Stub) + .to receive(:check) + .and_return(double(status: false)) + + expect(subject.check).to eq({ success: false }) + end + + it 'gracefully handles gRPC error' do + expect(Gitlab::GitalyClient).to receive(:call).with( + storage_name, + :health_check, + :check, + instance_of(Grpc::Health::V1::HealthCheckRequest), + timeout: Gitlab::GitalyClient.fast_timeout) + .and_raise(GRPC::Unavailable.new('Connection refused')) + + expect(subject.check).to eq({ success: false, message: '14:Connection refused' }) + end + end +end diff --git a/spec/lib/gitlab/gitaly_client_spec.rb b/spec/lib/gitlab/gitaly_client_spec.rb index 309b7338ef0..81bcd8c28ed 100644 --- a/spec/lib/gitlab/gitaly_client_spec.rb +++ b/spec/lib/gitlab/gitaly_client_spec.rb @@ -3,6 +3,31 @@ require 'spec_helper' # We stub Gitaly in `spec/support/gitaly.rb` for other tests. We don't want # those stubs while testing the GitalyClient itself. describe Gitlab::GitalyClient, skip_gitaly_mock: true do + describe '.stub_class' do + it 'returns the gRPC health check stub' do + expect(described_class.stub_class(:health_check)).to eq(::Grpc::Health::V1::Health::Stub) + end + + it 'returns a Gitaly stub' do + expect(described_class.stub_class(:ref_service)).to eq(::Gitaly::RefService::Stub) + end + end + + describe '.stub_address' do + it 'returns the same result after being called multiple times' do + address = 'localhost:9876' + prefixed_address = "tcp://#{address}" + + allow(Gitlab.config.repositories).to receive(:storages).and_return({ + 'default' => { 'gitaly_address' => prefixed_address } + }) + + 2.times do + expect(described_class.stub_address('default')).to eq('localhost:9876') + end + end + end + describe '.stub' do # Notice that this is referring to gRPC "stubs", not rspec stubs before do diff --git a/spec/lib/gitlab/health_checks/gitaly_check_spec.rb b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb new file mode 100644 index 00000000000..724beefff69 --- /dev/null +++ b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb @@ -0,0 +1,57 @@ +require 'spec_helper' + +describe Gitlab::HealthChecks::GitalyCheck do + let(:result_class) { Gitlab::HealthChecks::Result } + let(:repository_storages) { ['default'] } + + before do + allow(described_class).to receive(:repository_storages) { repository_storages } + end + + describe '#readiness' do + subject { described_class.readiness } + + before do + expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(gitaly_check) + end + + context 'Gitaly server is up' do + let(:gitaly_check) { double(check: { success: true }) } + + it { is_expected.to eq([result_class.new(true, nil, shard: 'default')]) } + end + + context 'Gitaly server is down' do + let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) } + + it { is_expected.to eq([result_class.new(false, 'Connection refused', shard: 'default')]) } + end + end + + describe '#metrics' do + subject { described_class.metrics } + + before do + expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(gitaly_check) + end + + context 'Gitaly server is up' do + let(:gitaly_check) { double(check: { success: true }) } + + it 'provides metrics' do + expect(subject).to all(have_attributes(labels: { shard: 'default' })) + expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_success', value: 1)) + expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_latency_seconds', value: be >= 0)) + end + end + + context 'Gitaly server is down' do + let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) } + + it 'provides metrics' do + expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_success', value: 0)) + expect(subject).to include(an_object_having_attributes(name: 'gitaly_health_check_latency_seconds', value: be >= 0)) + end + end + end +end