diff --git a/.rubocop_manual_todo.yml b/.rubocop_manual_todo.yml index ae15995215a..fa81885e060 100644 --- a/.rubocop_manual_todo.yml +++ b/.rubocop_manual_todo.yml @@ -761,7 +761,6 @@ RSpec/TimecopFreeze: - 'ee/spec/lib/gitlab/analytics/cycle_analytics/summary/group/stage_time_summary_spec.rb' - 'ee/spec/lib/gitlab/analytics/type_of_work/tasks_by_type_spec.rb' - 'ee/spec/lib/gitlab/auth/group_saml/sso_enforcer_spec.rb' - - 'ee/spec/lib/gitlab/database/load_balancing/host_spec.rb' - 'ee/spec/lib/gitlab/geo/base_request_spec.rb' - 'ee/spec/lib/gitlab/geo/event_gap_tracking_spec.rb' - 'ee/spec/lib/gitlab/geo/git_push_http_spec.rb' @@ -2900,7 +2899,6 @@ Style/RegexpLiteralMixedPreserve: - 'ee/spec/controllers/groups/groups_controller_spec.rb' - 'ee/spec/features/groups/saml_enforcement_spec.rb' - 'ee/spec/features/markdown/metrics_spec.rb' - - 'ee/spec/lib/gitlab/database/load_balancing/load_balancer_spec.rb' - 'ee/spec/services/jira/requests/issues/list_service_spec.rb' - 'lib/api/invitations.rb' - 'lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb' diff --git a/app/assets/javascripts/emoji/awards_app/store/actions.js b/app/assets/javascripts/emoji/awards_app/store/actions.js index 482acc5a3a9..e7c49b2a4f9 100644 --- a/app/assets/javascripts/emoji/awards_app/store/actions.js +++ b/app/assets/javascripts/emoji/awards_app/store/actions.js @@ -13,6 +13,8 @@ import { export const setInitialData = ({ commit }, data) => commit(SET_INITIAL_DATA, data); export const fetchAwards = async ({ commit, dispatch, state }, page = '1') => { + if (!window.gon?.current_user_id) return; + try { const { data, headers } = await axios.get(state.path, { params: { per_page: 100, page } }); const normalizedHeaders = normalizeHeaders(headers); diff --git a/app/models/application_record.rb b/app/models/application_record.rb index 5e5bc00458e..a93348a3b27 100644 --- a/app/models/application_record.rb +++ b/app/models/application_record.rb @@ -53,10 +53,12 @@ class ApplicationRecord < ActiveRecord::Base # Start a new transaction with a shorter-than-usual statement timeout. This is # currently one third of the default 15-second timeout def self.with_fast_read_statement_timeout(timeout_ms = 5000) - transaction(requires_new: true) do - connection.exec_query("SET LOCAL statement_timeout = #{timeout_ms}") + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + transaction(requires_new: true) do + connection.exec_query("SET LOCAL statement_timeout = #{timeout_ms}") - yield + yield + end end end @@ -85,5 +87,3 @@ class ApplicationRecord < ActiveRecord::Base enum(enum_mod.key => values) end end - -ApplicationRecord.prepend_mod_with('ApplicationRecordHelpers') diff --git a/app/models/project_services/chat_notification_service.rb b/app/models/project_services/chat_notification_service.rb index 2f841bf903e..7624dd0d62c 100644 --- a/app/models/project_services/chat_notification_service.rb +++ b/app/models/project_services/chat_notification_service.rb @@ -92,14 +92,14 @@ class ChatNotificationService < Integration def execute(data) return unless supported_events.include?(data[:object_kind]) - return unless notify_label?(data) - return unless webhook.present? object_kind = data[:object_kind] data = custom_data(data) + return unless notify_label?(data) + # WebHook events often have an 'update' event that follows a 'open' or # 'close' action. Ignore update events for now to prevent duplicate # messages from arriving. @@ -156,9 +156,9 @@ class ChatNotificationService < Integration def notify_label?(data) return true unless SUPPORTED_EVENTS_FOR_LABEL_FILTER.include?(data[:object_kind]) && labels_to_be_notified.present? - labels = data.dig(:issue, :labels) || data.dig(:merge_request, :labels) + labels = data[:labels] || data.dig(:issue, :labels) || data.dig(:merge_request, :labels) || data.dig(:object_attributes, :labels) - return false if labels.nil? + return false if labels.blank? matching_labels = labels_to_be_notified_list & labels.pluck(:title) @@ -179,7 +179,7 @@ class ChatNotificationService < Integration end def custom_data(data) - data.merge(project_url: project_url, project_name: project_name) + data.merge(project_url: project_url, project_name: project_name).with_indifferent_access end def get_message(object_kind, data) diff --git a/app/models/repository.rb b/app/models/repository.rb index 7dca8e52403..1bd61fe48cb 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -938,6 +938,8 @@ class Repository end def fetch_as_mirror(url, forced: false, refmap: :all_refs, remote_name: nil, prune: true) + return fetch_remote(remote_name, url: url, refmap: refmap, forced: forced, prune: prune) if Feature.enabled?(:fetch_remote_params, project, default_enabled: :yaml) + unless remote_name remote_name = "tmp-#{SecureRandom.hex}" tmp_remote_name = true diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 8f4a77aced4..a914d7d0373 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -153,16 +153,6 @@ :weight: 1 :idempotent: :tags: [] -- :name: cronjob:analytics_instance_statistics_count_job_trigger - :worker_name: Analytics::InstanceStatistics::CountJobTriggerWorker - :feature_category: :devops_reports - :has_external_dependencies: - :urgency: :low - :resource_boundary: :unknown - :weight: 1 - :idempotent: true - :tags: - - :exclude_from_kubernetes - :name: cronjob:analytics_usage_trends_count_job_trigger :worker_name: Analytics::UsageTrends::CountJobTriggerWorker :feature_category: :devops_reports @@ -1786,16 +1776,6 @@ :weight: 1 :idempotent: true :tags: [] -- :name: analytics_instance_statistics_counter_job - :worker_name: Analytics::InstanceStatistics::CounterJobWorker - :feature_category: :devops_reports - :has_external_dependencies: - :urgency: :low - :resource_boundary: :unknown - :weight: 1 - :idempotent: true - :tags: - - :exclude_from_kubernetes - :name: analytics_usage_trends_counter_job :worker_name: Analytics::UsageTrends::CounterJobWorker :feature_category: :devops_reports diff --git a/app/workers/analytics/instance_statistics/count_job_trigger_worker.rb b/app/workers/analytics/instance_statistics/count_job_trigger_worker.rb deleted file mode 100644 index 083c01b166d..00000000000 --- a/app/workers/analytics/instance_statistics/count_job_trigger_worker.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -module Analytics - module InstanceStatistics - # This worker will be removed in 14.0 - class CountJobTriggerWorker - include ApplicationWorker - - sidekiq_options retry: 3 - include CronjobQueue # rubocop:disable Scalability/CronWorkerContext - - feature_category :devops_reports - tags :exclude_from_kubernetes - urgency :low - - idempotent! - - def perform - # Delegate to the new worker - Analytics::UsageTrends::CountJobTriggerWorker.new.perform - end - end - end -end diff --git a/app/workers/analytics/instance_statistics/counter_job_worker.rb b/app/workers/analytics/instance_statistics/counter_job_worker.rb deleted file mode 100644 index a4dda45ff72..00000000000 --- a/app/workers/analytics/instance_statistics/counter_job_worker.rb +++ /dev/null @@ -1,23 +0,0 @@ -# frozen_string_literal: true - -module Analytics - module InstanceStatistics - # This worker will be removed in 14.0 - class CounterJobWorker - include ApplicationWorker - - sidekiq_options retry: 3 - - feature_category :devops_reports - urgency :low - tags :exclude_from_kubernetes - - idempotent! - - def perform(*args) - # Delegate to the new worker - Analytics::UsageTrends::CounterJobWorker.new.perform(*args) - end - end - end -end diff --git a/config/feature_flags/development/fetch_remote_params.yml b/config/feature_flags/development/fetch_remote_params.yml new file mode 100644 index 00000000000..3d0a6d3008e --- /dev/null +++ b/config/feature_flags/development/fetch_remote_params.yml @@ -0,0 +1,8 @@ +--- +name: fetch_remote_params +introduced_by_url: +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/325528 +milestone: '13.12' +type: development +group: group::gitaly +default_enabled: false diff --git a/config/initializers/load_balancing.rb b/config/initializers/load_balancing.rb index 7502a6299ae..a8a6e87179a 100644 --- a/config/initializers/load_balancing.rb +++ b/config/initializers/load_balancing.rb @@ -1,25 +1,18 @@ # frozen_string_literal: true -# We need to run this initializer after migrations are done so it doesn't fail on CI +if Gitlab::Database::LoadBalancing.enable? + Gitlab::Database.disable_prepared_statements -Gitlab.ee do - if Gitlab::Database.cached_table_exists?('licenses') - if Gitlab::Database::LoadBalancing.enable? - Gitlab::Database.disable_prepared_statements + Gitlab::Application.configure do |config| + config.middleware.use(Gitlab::Database::LoadBalancing::RackMiddleware) + end - Gitlab::Application.configure do |config| - config.middleware.use(Gitlab::Database::LoadBalancing::RackMiddleware) - end + Gitlab::Database::LoadBalancing.configure_proxy - Gitlab::Database::LoadBalancing.configure_proxy - - # This needs to be executed after fork of clustered processes - Gitlab::Cluster::LifecycleEvents.on_worker_start do - # Service discovery must be started after configuring the proxy, as service - # discovery depends on this. - Gitlab::Database::LoadBalancing.start_service_discovery - end - - end + # This needs to be executed after fork of clustered processes + Gitlab::Cluster::LifecycleEvents.on_worker_start do + # Service discovery must be started after configuring the proxy, as service + # discovery depends on this. + Gitlab::Database::LoadBalancing.start_service_discovery end end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 06f9cc2cd85..65c57753486 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -32,8 +32,6 @@ - 1 - - analytics_devops_adoption_create_snapshot - 1 -- - analytics_instance_statistics_counter_job - - 1 - - analytics_usage_trends_counter_job - 1 - - approval_rules_external_approval_rule_payload diff --git a/doc/api/users.md b/doc/api/users.md index a1db12b13ca..e19a2225788 100644 --- a/doc/api/users.md +++ b/doc/api/users.md @@ -453,6 +453,7 @@ Parameters: | `twitter` | No | Twitter account | | `username` | Yes | Username | | `view_diffs_file_by_file` | No | Flag indicating the user sees only one file diff per page | +| `show_whitespace_in_diffs` | No | Flag indicating the user sees whitespace changes in diffs | `website_url` | No | Website URL | ## User modification diff --git a/doc/install/installation.md b/doc/install/installation.md index b9bde01c2fa..e795c763fca 100644 --- a/doc/install/installation.md +++ b/doc/install/installation.md @@ -697,7 +697,7 @@ gitlab_path=/home/git/gitlab gitaly_path=/home/git/gitaly sudo -u git -H sh -c "$gitlab_path/bin/daemon_with_pidfile $gitlab_path/tmp/pids/gitaly.pid \ - $gitaly_path/gitaly $gitaly_path/config.toml >> $gitlab_path/log/gitaly.log 2>&1 &" + $gitaly_path/_build/bin/gitaly $gitaly_path/config.toml >> $gitlab_path/log/gitaly.log 2>&1 &" ``` ### Initialize Database and Activate Advanced Features diff --git a/doc/user/application_security/container_scanning/index.md b/doc/user/application_security/container_scanning/index.md index 74b09de331b..308a027f8ce 100644 --- a/doc/user/application_security/container_scanning/index.md +++ b/doc/user/application_security/container_scanning/index.md @@ -10,21 +10,18 @@ info: To determine the technical writer assigned to the Stage/Group associated w > [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/3672) in [GitLab Ultimate](https://about.gitlab.com/pricing/) 10.4. WARNING: -GitLab 14.0 will replace its container scanning engine with Trivy. Currently, GitLab uses the open -source Clair engine for container scanning. GitLab 13.9 deprecates Clair. Until GitLab 14.0, this is -not a hard breaking change. Beginning in GitLab 14.0, GitLab will no longer update or maintain -Clair. To ensure that you get regular updates and the latest features, you must use the Trivy -container scanning engine beginning in GitLab 14.0. See the following sections for instructions on -moving from Clair to Trivy. +Versions of GitLab prior to 14.0 used Clair as the default container scanning engine. GitLab 14.0 +replaces Clair with Trivy and removes Clair from the product. If you run container scanning with the +default settings, GitLab switches you seamlessly and automatically to Trivy in GitLab 14.0. However, +if you customized the variables in your container scanning job, you should review the +[migration guide](#migrating-from-clair-to-trivy) and make any necessary updates. Your application's Docker image may itself be based on Docker images that contain known vulnerabilities. By including an extra job in your pipeline that scans for those vulnerabilities and displays them in a merge request, you can use GitLab to audit your Docker-based apps. -GitLab provides integration with two different open-source tools for vulnerability static analysis -in containers: +GitLab provides integration with open-source tools for vulnerability static analysis in containers: -- [Clair](https://github.com/quay/claircore) - [Trivy](https://github.com/aquasecurity/trivy) To integrate GitLab with security scanners other than those listed here, see @@ -41,10 +38,6 @@ information directly in the merge request. ![Container Scanning Widget](img/container_scanning_v13_2.png) - - ## Requirements To enable container scanning in your pipeline, you need the following: @@ -53,11 +46,10 @@ To enable container scanning in your pipeline, you need the following: or [`kubernetes`](https://docs.gitlab.com/runner/install/kubernetes.html) executor. - Docker `18.09.03` or higher installed on the same computer as the runner. If you're using the shared runners on GitLab.com, then this is already the case. -- An image matching the following supported distributions (depending on the analyzer being used): +- An image matching the following supported distributions (depending on the scanner being used): | Scanning Engine | Supported distributions | | --- | --- | - | [Clair](https://github.com/quay/claircore) | [Supported operating systems and languages](https://quay.github.io/claircore/) | | [Trivy](https://github.com/aquasecurity/trivy) | Supported [operating systems](https://aquasecurity.github.io/trivy/latest/vuln-detection/os/) and [languages](https://aquasecurity.github.io/trivy/latest/vuln-detection/library/) | - [Build and push](../../packages/container_registry/index.md#build-and-push-by-using-gitlab-cicd) @@ -106,6 +98,7 @@ How you enable container scanning depends on your GitLab version: variable. - GitLab 13.9 [introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/322656) integration with [Trivy](https://github.com/aquasecurity/trivy) by upgrading `CS_MAJOR_VERSION` from `3` to `4`. +- GitLab 14.0 makes Trivy the default scanner. To include the `Container-Scanning.gitlab-ci.yml` template (GitLab 11.9 and later), add the following to your `.gitlab-ci.yml` file: @@ -164,71 +157,33 @@ The variables you set in your `.gitlab-ci.yml` overwrite those in `Container-Scanning.gitlab-ci.yml`. This example [includes](../../../ci/yaml/README.md#include) the container scanning template and -enables verbose output for both analyzers: - -Clair: +enables verbose output for the analyzer: ```yaml include: - template: Container-Scanning.gitlab-ci.yml variables: - CLAIR_TRACE: true + SECURE_LOG_LEVEL: 'debug' ``` -Trivy: - -```yaml -include: - - template: Container-Scanning.gitlab-ci.yml - -variables: - TRIVY_DEBUG: true -``` - -This example [includes](../../../ci/yaml/README.md#include) the container scanning template and -enables version `2` of the analyzer: - -```yaml -include: - - template: Container-Scanning.gitlab-ci.yml - -variables: - CS_MAJOR_VERSION: '2' -``` - - - #### Available CI/CD variables You can [configure](#customizing-the-container-scanning-settings) both analyzers by using the following CI/CD variables: -| CI/CD Variable | Default | Description | Supported by| +| CI/CD Variable | Default | Description | Scanner | | ------------------------------ | ------------- | ----------- | ------------ | -| `ADDITIONAL_CA_CERT_BUNDLE` | `""` | Bundle of CA certs that you want to trust. See [Using a custom SSL CA certificate authority](#using-a-custom-ssl-ca-certificate-authority) for more details. | Both | -| `CLAIR_DB_CONNECTION_STRING` | `postgresql://postgres:password@clair-vulnerabilities-db:5432/postgres?sslmode=disable&statement_timeout=60000` | This variable represents the [connection string](https://www.postgresql.org/docs/9.3/libpq-connect.html#AEN39692) to the [PostgreSQL server hosting the vulnerability definitions](https://hub.docker.com/r/arminc/clair-db) database. **Do not change this** unless you're running the image locally as described in [Running the standalone container scanning tool](#running-the-standalone-container-scanning-tool). The host value for the connection string must match the [alias](https://gitlab.com/gitlab-org/gitlab/-/blob/898c5da43504eba87b749625da50098d345b60d6/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml#L23) value of the `Container-Scanning.gitlab-ci.yml` template file, which defaults to `clair-vulnerabilities-db`. | Clair | -| `CLAIR_DB_IMAGE` | `arminc/clair-db:latest` | The Docker image name and tag for the [PostgreSQL server hosting the vulnerability definitions](https://hub.docker.com/r/arminc/clair-db). It can be useful to override this value with a specific version (for example, to provide a consistent set of vulnerabilities for integration testing purposes, or to refer to a locally hosted vulnerability database for an on-premise offline installation). | Clair | -| `CLAIR_DB_IMAGE_TAG` | `latest` | (**DEPRECATED - use `CLAIR_DB_IMAGE` instead**) The Docker image tag for the [PostgreSQL server hosting the vulnerability definitions](https://hub.docker.com/r/arminc/clair-db). It can be useful to override this value with a specific version (for example, to provide a consistent set of vulnerabilities for integration testing purposes). | Clair | -| `CLAIR_OUTPUT` | `Unknown` | Severity level threshold. Vulnerabilities with severity level higher than or equal to this threshold are output. Supported levels are `Unknown`, `Negligible`, `Low`, `Medium`, `High`, `Critical`, and `Defcon1`. | Clair | -| `CLAIR_TRACE` | `"false"` | Set to true to enable more verbose output from the Clair server process. | Clair | -| `CLAIR_VULNERABILITIES_DB_URL` | `clair-vulnerabilities-db` | (**DEPRECATED - use `CLAIR_DB_CONNECTION_STRING` instead**) This variable is explicitly set in the [services section](https://gitlab.com/gitlab-org/gitlab/-/blob/898c5da43504eba87b749625da50098d345b60d6/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml#L23) of the `Container-Scanning.gitlab-ci.yml` file and defaults to `clair-vulnerabilities-db`. This value represents the address that the [PostgreSQL server hosting the vulnerability definitions](https://hub.docker.com/r/arminc/clair-db) is running on. **Do not change this** unless you're running the image locally as described in [Running the standalone container scanning tool](#running-the-standalone-container-scanning-tool). | Clair | -| `CI_APPLICATION_REPOSITORY` | `$CI_REGISTRY_IMAGE/$CI_COMMIT_REF_SLUG` | Docker repository URL for the image to be scanned. | Both | -| `CI_APPLICATION_TAG` | `$CI_COMMIT_SHA` | Docker repository tag for the image to be scanned. | Both | -| `CS_ANALYZER_IMAGE` | `$SECURE_ANALYZERS_PREFIX/$CS_PROJECT:$CS_MAJOR_VERSION` | Docker image of the analyzer. | Both | -| `CS_MAJOR_VERSION` | `3` | The major version of the Docker image tag. | Both | -| `CS_PROJECT` | Depends on `$CS_MAJOR_VERSION`. `klar` if `$CS_MAJOR_VERSION` is set to `1`, `2` or `3`, and `container-scanning` otherwise. | Analyzer project to be used. | Both | -| `DOCKER_IMAGE` | `$CI_APPLICATION_REPOSITORY:$CI_APPLICATION_TAG` | The Docker image to be scanned. If set, this variable overrides the `$CI_APPLICATION_REPOSITORY` and `$CI_APPLICATION_TAG` variables. | Both | -| `DOCKER_INSECURE` | `"false"` | Allow [Klar](https://github.com/optiopay/klar) to access secure Docker registries using HTTPS with bad (or self-signed) SSL certificates. | Clair | -| `DOCKER_PASSWORD` | `$CI_REGISTRY_PASSWORD` | Password for accessing a Docker registry requiring authentication. | Clair | -| `DOCKER_USER` | `$CI_REGISTRY_USER` | Username for accessing a Docker registry requiring authentication. | Clair | -| `DOCKERFILE_PATH` | `Dockerfile` | The path to the `Dockerfile` to use for generating remediations. By default, the scanner looks for a file named `Dockerfile` in the root directory of the project. You should configure this variable only if your `Dockerfile` is in a non-standard location, such as a subdirectory. See [Solutions for vulnerabilities](#solutions-for-vulnerabilities-auto-remediation) for more details. | Both | -| `KLAR_TRACE` | `"false"` | Set to true to enable more verbose output from Klar. | Clair | -| `REGISTRY_INSECURE` | `"false"` | Allow [Klar](https://github.com/optiopay/klar) to access insecure registries (HTTP only). Should only be set to `true` when testing the image locally. | Clair | -| `SECURE_ANALYZERS_PREFIX` | `"registry.gitlab.com/gitlab-org/security-products/analyzers"` | Set the Docker registry base address from which to download the analyzer. | Both | -| `SECURE_LOG_LEVEL` | `info` | Set the minimum logging level. Messages of this logging level or higher are output. From highest to lowest severity, the logging levels are: `fatal`, `error`, `warn`, `info`, `debug`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/10880) in GitLab 13.1. | Both | -| `TRIVY_DEBUG` | `"false"` | Set to true to enable more verbose output from the Trivy process. | Trivy | +| `ADDITIONAL_CA_CERT_BUNDLE` | `""` | Bundle of CA certs that you want to trust. See [Using a custom SSL CA certificate authority](#using-a-custom-ssl-ca-certificate-authority) for more details. | All | +| `CI_APPLICATION_REPOSITORY` | `$CI_REGISTRY_IMAGE/$CI_COMMIT_REF_SLUG` | Docker repository URL for the image to be scanned. | All | +| `CI_APPLICATION_TAG` | `$CI_COMMIT_SHA` | Docker repository tag for the image to be scanned. | All | +| `CS_ANALYZER_IMAGE` | `$SECURE_ANALYZERS_PREFIX/$CS_PROJECT:$CS_MAJOR_VERSION` | Docker image of the analyzer. | All | +| `DOCKER_IMAGE` | `$CI_APPLICATION_REPOSITORY:$CI_APPLICATION_TAG` | The Docker image to be scanned. If set, this variable overrides the `$CI_APPLICATION_REPOSITORY` and `$CI_APPLICATION_TAG` variables. | All | +| `DOCKER_INSECURE` | `"false"` | Allow access to secure Docker registries using HTTPS without validating the certificates. | All | +| `DOCKER_PASSWORD` | `$CI_REGISTRY_PASSWORD` | Password for accessing a Docker registry requiring authentication. | All | +| `DOCKER_USER` | `$CI_REGISTRY_USER` | Username for accessing a Docker registry requiring authentication. | All | +| `DOCKERFILE_PATH` | `Dockerfile` | The path to the `Dockerfile` to use for generating remediations. By default, the scanner looks for a file named `Dockerfile` in the root directory of the project. You should configure this variable only if your `Dockerfile` is in a non-standard location, such as a subdirectory. See [Solutions for vulnerabilities](#solutions-for-vulnerabilities-auto-remediation) for more details. | All | +| `REGISTRY_INSECURE` | `"false"` | Allow access to insecure registries (HTTP only). Should only be set to `true` when testing the image locally. | All | +| `SECURE_LOG_LEVEL` | `info` | Set the minimum logging level. Messages of this logging level or higher are output. From highest to lowest severity, the logging levels are: `fatal`, `error`, `warn`, `info`, `debug`. [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/10880) in GitLab 13.1. | All | ### Overriding the container scanning template @@ -236,18 +191,7 @@ If you want to override the job definition (for example, to change properties li must declare and override a job after the template inclusion, and then specify any additional keys. -This example sets `GIT_STRATEGY` to `fetch` to be considered by both Clair and Trivy: - -```yaml -include: - - template: Container-Scanning.gitlab-ci.yml - -.cs_common: - variables: - GIT_STRATEGY: fetch -``` - -This example sets `KLAR_TRACE` to `true`, which is specific to Clair: +This example sets `GIT_STRATEGY` to `fetch`: ```yaml include: @@ -255,18 +199,7 @@ include: container_scanning: variables: - CLAIR_TRACE: true -``` - -This example sets `TRIVY_DEBUG` to `true`, which is specific to Trivy: - -```yaml -include: - - template: Container-Scanning.gitlab-ci.yml - -container_scanning_new: - variables: - TRIVY_DEBUG: true + GIT_STRATEGY: fetch ``` WARNING: @@ -276,36 +209,47 @@ instead. ### Migrating from Clair to Trivy -If you are currently using Clair and want to migrate to Trivy before GitLab 14.0, you can do so by -taking the following steps: +If you're migrating from a GitLab 13.x release to a GitLab 14.x release and have customized the +`container_scanning` job or its CI variables, you might need to perform these migration steps in +your CI file: -1. Take the following actions in your CI file: +1. Remove these variables: - - Set the variable `CS_MAJOR_VERSION` to `4`. The job scope is global variables, or under `.cs_common`. - - Remove the variable `CS_PROJECT` from your CI file. The job scope is `container_scanning_new`. - Setting this variable to `container-scanning` under the correct scope has the same effect as - removing it from your CI file. - - Remove the `CS_ANALYZER_IMAGE` variable from your CI file. The job scope is `.cs_common`. Note - that instead of overriding this variable, you can use `CS_MAJOR_VERSION`. + - `CS_MAJOR_VERSION` + - `CS_PROJECT` + - `SECURE_ANALYZERS_PREFIX` -1. Remove any variables that are only applicable to Clair. For a complete list of these variables, - see the [available variables](#available-cicd-variables). -1. Make any [necessary customizations](#customizing-the-container-scanning-settings) to the - `Trivy` scanner. We strongly recommended that you minimize customizations, as they - might require changes in future GitLab major releases. +1. Review the `CS_ANALYZER_IMAGE` variable. It no longer depends on the variables above and its new + default value is `registry.gitlab.com/security-products/container-scanning:4`. If you have an + offline environment, see + [Running container scanning in an offline environment](#running-container-scanning-in-an-offline-environment). + +1. If present, remove the `.cs_common` configuration section. + +1. If the `container_scanning` section is present, it's safer to create one from scratch based on + the new version of the [`Container-Scanning.gitlab-ci.yml` template](https://gitlab.com/gitlab-org/gitlab/blob/master/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml). + Once finished, it should not have any variables that are only applicable to Klar or Clair. For a + complete list of supported variables, see [available variables](#available-cicd-variables). + +1. Make any [necessary customizations](#customizing-the-container-scanning-settings) + to the `Trivy` scanner. We recommend that you minimize such customizations, as they might require + changes in future GitLab major releases. + +1. Trigger a new run of a pipeline that includes the `container_scanning` job. Inspect the job + output and ensure that the log messages do not mention Clair. **Troubleshooting** Prior to the GitLab 14.0 release, any variable defined under the scope `container_scanning` is not considered for the Trivy scanner. Verify that all variables for Trivy are -either defined as a global variable, or under `.cs_common` and `container_scanning_new`. +either defined as a global variable, or under `container_scanning`. ### Using a custom SSL CA certificate authority You can use the `ADDITIONAL_CA_CERT_BUNDLE` CI/CD variable to configure a custom SSL CA certificate authority, which is used to verify the peer when fetching Docker images from a registry which uses HTTPS. The `ADDITIONAL_CA_CERT_BUNDLE` value should contain the [text representation of the X.509 PEM public-key certificate](https://tools.ietf.org/html/rfc7468#section-5.1). For example, to configure this value in the `.gitlab-ci.yml` file, use the following: ```yaml -.cs_common: +container_scanning: variables: ADDITIONAL_CA_CERT_BUNDLE: | -----BEGIN CERTIFICATE----- @@ -420,8 +364,7 @@ To use container scanning in an offline environment, you need: | GitLab Analyzer | Container Registry | | --- | --- | -| [Klar](https://gitlab.com/gitlab-org/security-products/analyzers/klar/) (used to run Clair) | [Klar container registry](https://gitlab.com/gitlab-org/security-products/analyzers/klar/container_registry) | -| [Container-Scanning](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning) (used to run Trivy) | [Container-Scanning container registry](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning/container_registry/1741162) | +| [Container-Scanning](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning) | [Container-Scanning container registry](https://gitlab.com/security-products/container-scanning/container_registry/) | Note that GitLab Runner has a [default `pull policy` of `always`](https://docs.gitlab.com/runner/executors/docker.html#using-the-always-pull-policy), meaning the runner tries to pull Docker images from the GitLab container registry even if a local @@ -436,7 +379,6 @@ Support for custom certificate authorities was introduced in the following versi | Scanner | Version | | -------- | ------- | -| `Clair` | [v2.3.0](https://gitlab.com/gitlab-org/security-products/analyzers/klar/-/releases/v2.3.0) | | `Trivy` | [4.0.0](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning/-/releases/4.0.0) | #### Make GitLab container scanning analyzer images available inside your Docker registry @@ -444,17 +386,8 @@ Support for custom certificate authorities was introduced in the following versi For container scanning, import the following default images from `registry.gitlab.com` into your [local Docker container registry](../../packages/container_registry/index.md): -Clair: - ```plaintext -registry.gitlab.com/gitlab-org/security-products/analyzers/klar -https://hub.docker.com/r/arminc/clair-db -``` - -Trivy: - -```plaintext -registry.gitlab.com/gitlab-org/security-products/analyzers/container-scanning +registry.gitlab.com/security-products/container-scanning ``` The process for importing Docker images into a local offline Docker registry depends on @@ -473,49 +406,28 @@ For details on saving and transporting Docker images as a file, see Docker's doc 1. [Override the container scanning template](#overriding-the-container-scanning-template) in your `.gitlab-ci.yml` file to refer to the Docker images hosted on your local Docker container registry: - Clair: - ```yaml include: - template: Container-Scanning.gitlab-ci.yml - .cs_common: - image: $CI_REGISTRY/namespace/gitlab-klar-analyzer - variables: - CLAIR_DB_IMAGE: $CI_REGISTRY/namespace/clair-vulnerabilities-db - ``` - - Trivy: - - ```yaml - include: - - template: Container-Scanning.gitlab-ci.yml - - .cs_common: + container_scanning: image: $CI_REGISTRY/namespace/gitlab-container-scanning ``` 1. If your local Docker container registry is running securely over `HTTPS`, but you're using a self-signed certificate, then you must set `DOCKER_INSECURE: "true"` in the above - `container_scanning` section of your `.gitlab-ci.yml`. This only applies to Clair. + `container_scanning` section of your `.gitlab-ci.yml`. #### Automating container scanning vulnerability database updates with a pipeline We recommend that you set up a [scheduled pipeline](../../../ci/pipelines/schedules.md) -to fetch the latest vulnerabilities database on a preset schedule. Because the Clair scanner is -deprecated, the latest vulnerabilities are currently only available for the Trivy scanner. +to fetch the latest vulnerabilities database on a preset schedule. Automating this with a pipeline means you do not have to do it manually each time. You can use the following `.gitlab-yml.ci` example as a template. ```yaml variables: - # If using Clair, uncomment the following 2 lines and comment the Trivy lines below - # SOURCE_IMAGE: arminc/clair-db:latest - # TARGET_IMAGE: $CI_REGISTRY/$CI_PROJECT_PATH/clair-vulnerabilities-db - - # If using Trivy, uncomment the following 3 lines and comment the Clair lines above - CS_MAJOR_VERSION: 4 # ensure that this value matches the one you use in your scanning jobs - SOURCE_IMAGE: registry.gitlab.com/gitlab-org/security-products/analyzers/container-scanning:$CS_MAJOR_VERSION + SOURCE_IMAGE: registry.gitlab.com/security-products/container-scanning:4 TARGET_IMAGE: $CI_REGISTRY/$CI_PROJECT_PATH/gitlab-container-scanning image: docker:stable @@ -536,42 +448,6 @@ you're using a non-GitLab Docker registry, you must change the `$CI_REGISTRY` va ## Running the standalone container scanning tool -### Clair - -It's possible to run [Klar](https://gitlab.com/gitlab-org/security-products/analyzers/klar) -against a Docker container without needing to run it within the context of a CI job. To scan an -image directly, follow these steps: - -1. Run [Docker Desktop](https://www.docker.com/products/docker-desktop) or [Docker Machine](https://github.com/docker/machine). -1. Run the latest [prefilled vulnerabilities database](https://hub.docker.com/repository/docker/arminc/clair-db) Docker image: - - ```shell - docker run -p 5432:5432 -d --name clair-db arminc/clair-db:latest - ``` - -1. Configure a CI/CD variable to point to your local machine's IP address (or insert your IP address instead of the `LOCAL_MACHINE_IP_ADDRESS` variable in the `CLAIR_DB_CONNECTION_STRING` in the next step): - - ```shell - export LOCAL_MACHINE_IP_ADDRESS=your.local.ip.address - ``` - -1. Run the analyzer's Docker image, passing the image and tag you want to analyze in the `CI_APPLICATION_REPOSITORY` and `CI_APPLICATION_TAG` variables: - - ```shell - docker run \ - --interactive --rm \ - --volume "$PWD":/tmp/app \ - -e CI_PROJECT_DIR=/tmp/app \ - -e CLAIR_DB_CONNECTION_STRING="postgresql://postgres:password@${LOCAL_MACHINE_IP_ADDRESS}:5432/postgres?sslmode=disable&statement_timeout=60000" \ - -e CI_APPLICATION_REPOSITORY=registry.gitlab.com/gitlab-org/security-products/dast/webgoat-8.0@sha256 \ - -e CI_APPLICATION_TAG=bc09fe2e0721dfaeee79364115aeedf2174cce0947b9ae5fe7c33312ee019a4e \ - registry.gitlab.com/gitlab-org/security-products/analyzers/klar - ``` - -The results are stored in `gl-container-scanning-report.json`. - -### Trivy - It's possible to run the [GitLab container scanning tool](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning) against a Docker container without needing to run it within the context of a CI job. To scan an image directly, follow these steps: @@ -589,7 +465,7 @@ image directly, follow these steps: -e CI_PROJECT_DIR=/tmp/app \ -e CI_APPLICATION_REPOSITORY=registry.gitlab.com/gitlab-org/security-products/dast/webgoat-8.0@sha256 \ -e CI_APPLICATION_TAG=bc09fe2e0721dfaeee79364115aeedf2174cce0947b9ae5fe7c33312ee019a4e \ - registry.gitlab.com/gitlab-org/security-products/analyzers/container-scanning + registry.gitlab.com/security-products/container-scanning ``` The results are stored in `gl-container-scanning-report.json`. diff --git a/doc/user/application_security/offline_deployments/index.md b/doc/user/application_security/offline_deployments/index.md index 6c22b81f0f0..449ec13ef35 100644 --- a/doc/user/application_security/offline_deployments/index.md +++ b/doc/user/application_security/offline_deployments/index.md @@ -129,6 +129,10 @@ This method requires a runner with access to both `gitlab.com` (including to be able to use the `docker` command inside the jobs. This runner can be installed in a DMZ or on a bastion, and used only for this specific project. +WARNING: +This template does not include updates for the container scanning analyzer. Please see +[Container scanning offline directions](../container_scanning/index.md#running-container-scanning-in-an-offline-environment). + #### Scheduling the updates By default, this project's pipeline runs only once, when the `.gitlab-ci.yml` is added to the @@ -136,12 +140,6 @@ repository. To update the GitLab security scanners and signatures, it's necessar regularly. GitLab provides a way to [schedule pipelines](../../../ci/pipelines/schedules.md). For example, you can set this up to download and store the Docker images every week. -Some images can be updated more frequently than others. For example, the [vulnerability database](https://hub.docker.com/r/arminc/clair-db/tags) -for Container Scanning is updated daily. To update this single image, create a new Scheduled -Pipeline that runs daily and set `SECURE_BINARIES_ANALYZERS` to `clair-vulnerabilities-db`. Only -this job is triggered, and the image is updated daily and made available in the project -registry. - #### Using the secure bundle created The project using the `Secure-Binaries.gitlab-ci.yml` template should now host all the required diff --git a/doc/user/application_security/vulnerabilities/index.md b/doc/user/application_security/vulnerabilities/index.md index 34b0abfdf1a..42a06d966e4 100644 --- a/doc/user/application_security/vulnerabilities/index.md +++ b/doc/user/application_security/vulnerabilities/index.md @@ -186,7 +186,7 @@ The following vulnerability scanners and their databases are regularly updated: | Secure scanning tool | Vulnerabilities database updates | |:----------------------------------------------------------------|----------------------------------| -| [Container Scanning](../container_scanning/index.md) | Uses either `trivy` or `clair`. For the `trivy` scanner, a job runs on a daily basis to build a new image with the latest vulnerability database updates from the [upstream `trivy-db`](https://github.com/aquasecurity/trivy-db). For the `clair` scanner, the latest `clair-db` version is used; `clair-db` database [is updated daily according to the author](https://github.com/arminc/clair-local-scan#clair-server-or-local). | +| [Container Scanning](../container_scanning/index.md) | A job runs on a daily basis to build new images with the latest vulnerability database updates from the upstream scanner. | | [Dependency Scanning](../dependency_scanning/index.md) | Relies on `bundler-audit` (for Ruby gems), `retire.js` (for npm packages), and `gemnasium` (the GitLab tool for all libraries). Both `bundler-audit` and `retire.js` fetch their vulnerabilities data from GitHub repositories, so vulnerabilities added to `ruby-advisory-db` and `retire.js` are immediately available. The tools themselves are updated once per month if there's a new version. The [Gemnasium DB](https://gitlab.com/gitlab-org/security-products/gemnasium-db) is updated at least once a week. See our [current measurement of time from CVE being issued to our product being updated](https://about.gitlab.com/handbook/engineering/development/performance-indicators/#cve-issue-to-update). | | [Dynamic Application Security Testing (DAST)](../dast/index.md) | The scanning engine is updated on a periodic basis. See the [version of the underlying tool `zaproxy`](https://gitlab.com/gitlab-org/security-products/dast/blob/master/Dockerfile#L1). The scanning rules are downloaded at scan runtime. | | [Static Application Security Testing (SAST)](../sast/index.md) | Relies exclusively on [the tools GitLab wraps](../sast/index.md#supported-languages-and-frameworks). The underlying analyzers are updated at least once per month if a relevant update is available. The vulnerabilities database is updated by the upstream tools. | diff --git a/doc/user/application_security/vulnerabilities/severities.md b/doc/user/application_security/vulnerabilities/severities.md index 75366a49a55..cc62ec18229 100644 --- a/doc/user/application_security/vulnerabilities/severities.md +++ b/doc/user/application_security/vulnerabilities/severities.md @@ -62,10 +62,9 @@ the following tables: ## Container Scanning -| GitLab scanner | Outputs severity levels? | Native severity level type | Native severity level example | +| GitLab analyzer | Outputs severity levels? | Native severity level type | Native severity level example | |------------------------------------------------------------------------|--------------------------|----------------------------|--------------------------------------------------------------| -| [`clair`](https://gitlab.com/gitlab-org/security-products/analyzers/klar) | **{check-circle}** Yes | String | `Negligible`, `Low`, `Medium`, `High`, `Critical`, `Defcon1` | -| [`trivy`](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning)| **{check-circle}** Yes | String | `Unknown`, `Low`, `Medium`, `High`, `Critical` | +| [`container-scanning`](https://gitlab.com/gitlab-org/security-products/analyzers/container-scanning)| **{check-circle}** Yes | String | `Unknown`, `Low`, `Medium`, `High`, `Critical` | ## Fuzz Testing diff --git a/lib/api/entities/user_preferences.rb b/lib/api/entities/user_preferences.rb index 7a6df9b6c59..ceee6c610d3 100644 --- a/lib/api/entities/user_preferences.rb +++ b/lib/api/entities/user_preferences.rb @@ -3,7 +3,7 @@ module API module Entities class UserPreferences < Grape::Entity - expose :id, :user_id, :view_diffs_file_by_file + expose :id, :user_id, :view_diffs_file_by_file, :show_whitespace_in_diffs end end end diff --git a/lib/api/users.rb b/lib/api/users.rb index 565a3544da2..25e2f509292 100644 --- a/lib/api/users.rb +++ b/lib/api/users.rb @@ -1025,7 +1025,9 @@ module API detail 'This feature was introduced in GitLab 13.10.' end params do - requires :view_diffs_file_by_file, type: Boolean, desc: 'Flag indicating the user sees only one file diff per page' + optional :view_diffs_file_by_file, type: Boolean, desc: 'Flag indicating the user sees only one file diff per page' + optional :show_whitespace_in_diffs, type: Boolean, desc: 'Flag indicating the user sees whitespace changes in diffs' + at_least_one_of :view_diffs_file_by_file, :show_whitespace_in_diffs end put "preferences", feature_category: :users do authenticate! diff --git a/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml b/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml index 84d9a92663a..424a4b3e397 100644 --- a/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml +++ b/lib/gitlab/ci/templates/Security/Container-Scanning.gitlab-ci.yml @@ -1,12 +1,15 @@ # Use this template to enable container scanning in your project. -# You should add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword. +# You should add this template to an existing `.gitlab-ci.yml` file by using the `include:` +# keyword. # The template should work without modifications but you can customize the template settings if # needed: https://docs.gitlab.com/ee/user/application_security/container_scanning/#customizing-the-container-scanning-settings # # Requirements: +# - A `test` stage to be present in the pipeline. # - You must define the image to be scanned in the DOCKER_IMAGE variable. If DOCKER_IMAGE is the # same as $CI_APPLICATION_REPOSITORY:$CI_APPLICATION_TAG, you can skip this. -# - Container registry credentials defined by `DOCKER_USER` and `DOCKER_PASSWORD` variables if the image to be scanned is in a private registry. +# - Container registry credentials defined by `DOCKER_USER` and `DOCKER_PASSWORD` variables if the +# image to be scanned is in a private registry. # - For auto-remediation, a readable Dockerfile in the root of the project or as defined by the # DOCKERFILE_PATH variable. # @@ -14,59 +17,27 @@ # List of available variables: https://docs.gitlab.com/ee/user/application_security/container_scanning/#available-variables variables: - # Setting this variable will affect all Security templates (e.g.: SAST, Dependency Scanning) - SECURE_ANALYZERS_PREFIX: "registry.gitlab.com/gitlab-org/security-products/analyzers" - CS_MAJOR_VERSION: 3 # The major version of the analyzer image to be used for scanning + CS_ANALYZER_IMAGE: registry.gitlab.com/security-products/container-scanning:4 -.cs_common: - stage: test +container_scanning: image: "$CS_ANALYZER_IMAGE" + stage: test variables: - # Override the GIT_STRATEGY variable in your `.gitlab-ci.yml` file and set it to `fetch` if you want to provide a `clair-whitelist.yml` - # file. See https://docs.gitlab.com/ee/user/application_security/container_scanning/index.html#overriding-the-container-scanning-template - # for details + # To provide a `vulnerability-allowlist.yml` file, override the GIT_STRATEGY variable in your + # `.gitlab-ci.yml` file and set it to `fetch`. + # For details, see the following links: + # https://docs.gitlab.com/ee/user/application_security/container_scanning/index.html#overriding-the-container-scanning-template + # https://docs.gitlab.com/ee/user/application_security/container_scanning/#vulnerability-allowlisting GIT_STRATEGY: none - # CS_ANALYZER_IMAGE is an undocumented variable used internally to allow QA to - # override the analyzer image with a custom value. This may be subject to change or - # breakage across GitLab releases. - CS_ANALYZER_IMAGE: $SECURE_ANALYZERS_PREFIX/$CS_PROJECT:$CS_MAJOR_VERSION allow_failure: true artifacts: reports: container_scanning: gl-container-scanning-report.json + paths: [gl-container-scanning-report.json] dependencies: [] - -container_scanning: - extends: .cs_common - variables: - # By default, use the latest clair vulnerabilities database, however, allow it to be overridden here with a specific image - # to enable container scanning to run offline, or to provide a consistent list of vulnerabilities for integration testing purposes - CLAIR_DB_IMAGE_TAG: "latest" - CLAIR_DB_IMAGE: "$SECURE_ANALYZERS_PREFIX/clair-vulnerabilities-db:$CLAIR_DB_IMAGE_TAG" - CS_PROJECT: 'klar' - services: - - name: $CLAIR_DB_IMAGE - alias: clair-vulnerabilities-db - script: - - /analyzer run - rules: - - if: $CONTAINER_SCANNING_DISABLED - when: never - - if: $CI_COMMIT_BRANCH && - $GITLAB_FEATURES =~ /\bcontainer_scanning\b/ && - $CS_MAJOR_VERSION =~ /^[0-3]$/ - -container_scanning_new: - extends: .cs_common - variables: - CS_ANALYZER_IMAGE: registry.gitlab.com/security-products/container-scanning:4 script: - gtcs scan - artifacts: - paths: [gl-container-scanning-report.json] rules: - if: $CONTAINER_SCANNING_DISABLED when: never - - if: $CI_COMMIT_BRANCH && - $GITLAB_FEATURES =~ /\bcontainer_scanning\b/ && - $CS_MAJOR_VERSION !~ /^[0-3]$/ + - if: $GITLAB_FEATURES =~ /\bcontainer_scanning\b/ diff --git a/lib/gitlab/ci/templates/Security/Secure-Binaries.gitlab-ci.yml b/lib/gitlab/ci/templates/Security/Secure-Binaries.gitlab-ci.yml index ac975fbbeab..c71b669c2a4 100644 --- a/lib/gitlab/ci/templates/Security/Secure-Binaries.gitlab-ci.yml +++ b/lib/gitlab/ci/templates/Security/Secure-Binaries.gitlab-ci.yml @@ -15,7 +15,6 @@ variables: SECURE_BINARIES_ANALYZERS: >- bandit, brakeman, gosec, spotbugs, flawfinder, phpcs-security-audit, security-code-scan, nodejs-scan, eslint, secrets, sobelow, pmd-apex, kubesec, semgrep, bundler-audit, retire.js, gemnasium, gemnasium-maven, gemnasium-python, - klar, clair-vulnerabilities-db, license-finder, dast, api-fuzzing @@ -161,28 +160,6 @@ kubesec: variables: - $SECURE_BINARIES_DOWNLOAD_IMAGES == "true" && $SECURE_BINARIES_ANALYZERS =~ /\bkubesec\b/ -# -# Container Scanning jobs -# - -klar: - extends: .download_images - only: - variables: - - $SECURE_BINARIES_DOWNLOAD_IMAGES == "true" && - $SECURE_BINARIES_ANALYZERS =~ /\bklar\b/ - variables: - SECURE_BINARIES_ANALYZER_VERSION: "3" - -clair-vulnerabilities-db: - extends: .download_images - only: - variables: - - $SECURE_BINARIES_DOWNLOAD_IMAGES == "true" && - $SECURE_BINARIES_ANALYZERS =~ /\bclair-vulnerabilities-db\b/ - variables: - SECURE_BINARIES_IMAGE: arminc/clair-db - SECURE_BINARIES_ANALYZER_VERSION: latest # # Dependency Scanning jobs diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index 0305433aef4..3583338c58a 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -89,6 +89,11 @@ module Gitlab end end + # Disables prepared statements for the current database connection. + def self.disable_prepared_statements + ActiveRecord::Base.establish_connection(config.merge(prepared_statements: false)) + end + # @deprecated def self.postgresql? adapter_name.casecmp('postgresql') == 0 diff --git a/lib/gitlab/database/consistency.rb b/lib/gitlab/database/consistency.rb index e99ea7a3232..17c16640e4c 100644 --- a/lib/gitlab/database/consistency.rb +++ b/lib/gitlab/database/consistency.rb @@ -4,28 +4,18 @@ module Gitlab module Database ## # This class is used to make it possible to ensure read consistency in - # GitLab EE without the need of overriding a lot of methods / classes / + # GitLab without the need of overriding a lot of methods / classes / # classs. # - # This is a CE class that does nothing in CE, because database load - # balancing is EE-only feature, but you can still use it in CE. It will - # start ensuring read consistency once it is overridden in EE. - # - # Using this class in CE helps to avoid creeping discrepancy between CE / - # EE only to force usage of the primary database in EE. - # class Consistency ## - # In CE there is no database load balancing, so all reads are expected to - # be consistent by the ACID guarantees of a single PostgreSQL instance. - # - # This method is overridden in EE. + # Within the block, disable the database load balancing for calls that + # require read consistency after recent writes. # def self.with_read_consistency(&block) - yield + ::Gitlab::Database::LoadBalancing::Session + .current.use_primary(&block) end end end end - -::Gitlab::Database::Consistency.singleton_class.prepend_mod_with('Gitlab::Database::Consistency') diff --git a/lib/gitlab/database/load_balancing.rb b/lib/gitlab/database/load_balancing.rb new file mode 100644 index 00000000000..ea4628ab757 --- /dev/null +++ b/lib/gitlab/database/load_balancing.rb @@ -0,0 +1,149 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # The exceptions raised for connection errors. + CONNECTION_ERRORS = if defined?(PG) + [ + PG::ConnectionBad, + PG::ConnectionDoesNotExist, + PG::ConnectionException, + PG::ConnectionFailure, + PG::UnableToSend, + # During a failover this error may be raised when + # writing to a primary. + PG::ReadOnlySqlTransaction + ].freeze + else + [].freeze + end + + ProxyNotConfiguredError = Class.new(StandardError) + + # The connection proxy to use for load balancing (if enabled). + def self.proxy + unless @proxy + Gitlab::ErrorTracking.track_exception( + ProxyNotConfiguredError.new( + "Attempting to access the database load balancing proxy, but it wasn't configured.\n" \ + "Did you forget to call '#{self.name}.configure_proxy'?" + )) + end + + @proxy + end + + # Returns a Hash containing the load balancing configuration. + def self.configuration + Gitlab::Database.config[:load_balancing] || {} + end + + # Returns the maximum replica lag size in bytes. + def self.max_replication_difference + (configuration['max_replication_difference'] || 8.megabytes).to_i + end + + # Returns the maximum lag time for a replica. + def self.max_replication_lag_time + (configuration['max_replication_lag_time'] || 60.0).to_f + end + + # Returns the interval (in seconds) to use for checking the status of a + # replica. + def self.replica_check_interval + (configuration['replica_check_interval'] || 60).to_f + end + + # Returns the additional hosts to use for load balancing. + def self.hosts + configuration['hosts'] || [] + end + + def self.service_discovery_enabled? + configuration.dig('discover', 'record').present? + end + + def self.service_discovery_configuration + conf = configuration['discover'] || {} + + { + nameserver: conf['nameserver'] || 'localhost', + port: conf['port'] || 8600, + record: conf['record'], + record_type: conf['record_type'] || 'A', + interval: conf['interval'] || 60, + disconnect_timeout: conf['disconnect_timeout'] || 120, + use_tcp: conf['use_tcp'] || false + } + end + + def self.pool_size + Gitlab::Database.config[:pool] + end + + # Returns true if load balancing is to be enabled. + def self.enable? + return false if Gitlab::Runtime.rake? + return false if Gitlab::Runtime.sidekiq? && !Gitlab::Utils.to_boolean(ENV['ENABLE_LOAD_BALANCING_FOR_SIDEKIQ'], default: false) + return false unless self.configured? + + true + end + + # Returns true if load balancing has been configured. Since + # Sidekiq does not currently use load balancing, we + # may want Web application servers to detect replication lag by + # posting the write location of the database if load balancing is + # configured. + def self.configured? + return false unless feature_available? + + hosts.any? || service_discovery_enabled? + end + + # Temporarily disabled for FOSS until move from EE to FOSS is complete + def self.feature_available? + Gitlab.ee? || Gitlab::Utils.to_boolean(ENV['ENABLE_LOAD_BALANCING_FOR_FOSS'], default: false) + end + + def self.start_service_discovery + return unless service_discovery_enabled? + + ServiceDiscovery.new(service_discovery_configuration).start + end + + # Configures proxying of requests. + def self.configure_proxy(proxy = ConnectionProxy.new(hosts)) + @proxy = proxy + + # This hijacks the "connection" method to ensure both + # `ActiveRecord::Base.connection` and all models use the same load + # balancing proxy. + ActiveRecord::Base.singleton_class.prepend(ActiveRecordProxy) + end + + def self.active_record_models + ActiveRecord::Base.descendants + end + + DB_ROLES = [ + ROLE_PRIMARY = :primary, + ROLE_REPLICA = :replica, + ROLE_UNKNOWN = :unknown + ].freeze + + # Returns the role (primary/replica) of the database the connection is + # connecting to. At the moment, the connection can only be retrieved by + # Gitlab::Database::LoadBalancer#read or #read_write or from the + # ActiveRecord directly. Therefore, if the load balancer doesn't + # recognize the connection, this method returns the primary role + # directly. In future, we may need to check for other sources. + def self.db_role_for_connection(connection) + return ROLE_PRIMARY if !enable? || @proxy.blank? + + proxy.load_balancer.db_role_for_connection(connection) + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/active_record_proxy.rb b/lib/gitlab/database/load_balancing/active_record_proxy.rb new file mode 100644 index 00000000000..7763497e770 --- /dev/null +++ b/lib/gitlab/database/load_balancing/active_record_proxy.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Module injected into ActiveRecord::Base to allow hijacking of the + # "connection" method. + module ActiveRecordProxy + def connection + LoadBalancing.proxy + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/connection_proxy.rb b/lib/gitlab/database/load_balancing/connection_proxy.rb new file mode 100644 index 00000000000..43edc99b961 --- /dev/null +++ b/lib/gitlab/database/load_balancing/connection_proxy.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +# rubocop:disable GitlabSecurity/PublicSend + +module Gitlab + module Database + module LoadBalancing + # Redirecting of ActiveRecord connections. + # + # The ConnectionProxy class redirects ActiveRecord connection requests to + # the right load balancer pool, depending on the type of query. + class ConnectionProxy + WriteInsideReadOnlyTransactionError = Class.new(StandardError) + READ_ONLY_TRANSACTION_KEY = :load_balacing_read_only_transaction + + attr_reader :load_balancer + + # These methods perform writes after which we need to stick to the + # primary. + STICKY_WRITES = %i( + delete + delete_all + insert + update + update_all + ).freeze + + NON_STICKY_READS = %i( + sanitize_limit + select + select_one + quote_column_name + ).freeze + + # hosts - The hosts to use for load balancing. + def initialize(hosts = []) + @load_balancer = LoadBalancer.new(hosts) + end + + def select_all(arel, name = nil, binds = [], preparable: nil) + if arel.respond_to?(:locked) && arel.locked + # SELECT ... FOR UPDATE queries should be sent to the primary. + write_using_load_balancer(:select_all, [arel, name, binds], + sticky: true) + else + read_using_load_balancer(:select_all, [arel, name, binds]) + end + end + + NON_STICKY_READS.each do |name| + define_method(name) do |*args, &block| + read_using_load_balancer(name, args, &block) + end + end + + STICKY_WRITES.each do |name| + define_method(name) do |*args, &block| + write_using_load_balancer(name, args, sticky: true, &block) + end + end + + def transaction(*args, &block) + if current_session.fallback_to_replicas_for_ambiguous_queries? + track_read_only_transaction! + read_using_load_balancer(:transaction, args, &block) + else + write_using_load_balancer(:transaction, args, sticky: true, &block) + end + + ensure + untrack_read_only_transaction! + end + + # Delegates all unknown messages to a read-write connection. + def method_missing(name, *args, &block) + if current_session.fallback_to_replicas_for_ambiguous_queries? + read_using_load_balancer(name, args, &block) + else + write_using_load_balancer(name, args, &block) + end + end + + # Performs a read using the load balancer. + # + # name - The name of the method to call on a connection object. + def read_using_load_balancer(name, args, &block) + if current_session.use_primary? && + !current_session.use_replicas_for_read_queries? + @load_balancer.read_write do |connection| + connection.send(name, *args, &block) + end + else + @load_balancer.read do |connection| + connection.send(name, *args, &block) + end + end + end + + # Performs a write using the load balancer. + # + # name - The name of the method to call on a connection object. + # sticky - If set to true the session will stick to the master after + # the write. + def write_using_load_balancer(name, args, sticky: false, &block) + if read_only_transaction? + raise WriteInsideReadOnlyTransactionError, 'A write query is performed inside a read-only transaction' + end + + @load_balancer.read_write do |connection| + # Sticking has to be enabled before calling the method. Not doing so + # could lead to methods called in a block still being performed on a + # secondary instead of on a primary (when necessary). + current_session.write! if sticky + + connection.send(name, *args, &block) + end + end + + private + + def current_session + ::Gitlab::Database::LoadBalancing::Session.current + end + + def track_read_only_transaction! + Thread.current[READ_ONLY_TRANSACTION_KEY] = true + end + + def untrack_read_only_transaction! + Thread.current[READ_ONLY_TRANSACTION_KEY] = nil + end + + def read_only_transaction? + Thread.current[READ_ONLY_TRANSACTION_KEY] == true + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/host.rb b/lib/gitlab/database/load_balancing/host.rb new file mode 100644 index 00000000000..3e74b5ea727 --- /dev/null +++ b/lib/gitlab/database/load_balancing/host.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # A single database host used for load balancing. + class Host + attr_reader :pool, :last_checked_at, :intervals, :load_balancer, :host, :port + + delegate :connection, :release_connection, :enable_query_cache!, :disable_query_cache!, :query_cache_enabled, to: :pool + + CONNECTION_ERRORS = + if defined?(PG) + [ + ActionView::Template::Error, + ActiveRecord::StatementInvalid, + PG::Error + ].freeze + else + [ + ActionView::Template::Error, + ActiveRecord::StatementInvalid + ].freeze + end + + # host - The address of the database. + # load_balancer - The LoadBalancer that manages this Host. + def initialize(host, load_balancer, port: nil) + @host = host + @port = port + @load_balancer = load_balancer + @pool = Database.create_connection_pool(LoadBalancing.pool_size, host, port) + @online = true + @last_checked_at = Time.zone.now + + interval = LoadBalancing.replica_check_interval + @intervals = (interval..(interval * 2)).step(0.5).to_a + end + + # Disconnects the pool, once all connections are no longer in use. + # + # timeout - The time after which the pool should be forcefully + # disconnected. + def disconnect!(timeout = 120) + start_time = Metrics::System.monotonic_time + + while (Metrics::System.monotonic_time - start_time) <= timeout + break if pool.connections.none?(&:in_use?) + + sleep(2) + end + + pool.disconnect! + end + + def offline! + LoadBalancing::Logger.warn( + event: :host_offline, + message: 'Marking host as offline', + db_host: @host, + db_port: @port + ) + + @online = false + @pool.disconnect! + end + + # Returns true if the host is online. + def online? + return @online unless check_replica_status? + + refresh_status + + if @online + LoadBalancing::Logger.info( + event: :host_online, + message: 'Host is online after replica status check', + db_host: @host, + db_port: @port + ) + else + LoadBalancing::Logger.warn( + event: :host_offline, + message: 'Host is offline after replica status check', + db_host: @host, + db_port: @port + ) + end + + @online + rescue *CONNECTION_ERRORS + offline! + false + end + + def refresh_status + @online = replica_is_up_to_date? + @last_checked_at = Time.zone.now + end + + def check_replica_status? + (Time.zone.now - last_checked_at) >= intervals.sample + end + + def replica_is_up_to_date? + replication_lag_below_threshold? || data_is_recent_enough? + end + + def replication_lag_below_threshold? + if (lag_time = replication_lag_time) + lag_time <= LoadBalancing.max_replication_lag_time + else + false + end + end + + # Returns true if the replica has replicated enough data to be useful. + def data_is_recent_enough? + # It's possible for a replica to not replay WAL data for a while, + # despite being up to date. This can happen when a primary does not + # receive any writes for a while. + # + # To prevent this from happening we check if the lag size (in bytes) + # of the replica is small enough for the replica to be useful. We + # only do this if we haven't replicated in a while so we only need + # to connect to the primary when truly necessary. + if (lag_size = replication_lag_size) + lag_size <= LoadBalancing.max_replication_difference + else + false + end + end + + # Returns the replication lag time of this secondary in seconds as a + # float. + # + # This method will return nil if no lag time could be calculated. + def replication_lag_time + row = query_and_release('SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::float as lag') + + row['lag'].to_f if row.any? + end + + # Returns the number of bytes this secondary is lagging behind the + # primary. + # + # This method will return nil if no lag size could be calculated. + def replication_lag_size + location = connection.quote(primary_write_location) + row = query_and_release(<<-SQL.squish) + SELECT pg_wal_lsn_diff(#{location}, pg_last_wal_replay_lsn())::float + AS diff + SQL + + row['diff'].to_i if row.any? + rescue *CONNECTION_ERRORS + nil + end + + def primary_write_location + load_balancer.primary_write_location + ensure + load_balancer.release_primary_connection + end + + def database_replica_location + row = query_and_release(<<-SQL.squish) + SELECT pg_last_wal_replay_lsn()::text AS location + SQL + + row['location'] if row.any? + rescue *CONNECTION_ERRORS + nil + end + + # Returns true if this host has caught up to the given transaction + # write location. + # + # location - The transaction write location as reported by a primary. + def caught_up?(location) + string = connection.quote(location) + + # In case the host is a primary pg_last_wal_replay_lsn/pg_last_xlog_replay_location() returns + # NULL. The recovery check ensures we treat the host as up-to-date in + # such a case. + query = <<-SQL.squish + SELECT NOT pg_is_in_recovery() + OR pg_wal_lsn_diff(pg_last_wal_replay_lsn(), #{string}) >= 0 + AS result + SQL + + row = query_and_release(query) + + ::Gitlab::Utils.to_boolean(row['result']) + rescue *CONNECTION_ERRORS + false + end + + def query_and_release(sql) + connection.select_all(sql).first || {} + rescue StandardError + {} + ensure + release_connection + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/host_list.rb b/lib/gitlab/database/load_balancing/host_list.rb new file mode 100644 index 00000000000..24800012947 --- /dev/null +++ b/lib/gitlab/database/load_balancing/host_list.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # A list of database hosts to use for connections. + class HostList + # hosts - The list of secondary hosts to add. + def initialize(hosts = []) + @hosts = hosts.shuffle + @pools = Set.new + @index = 0 + @mutex = Mutex.new + @hosts_gauge = Gitlab::Metrics.gauge(:db_load_balancing_hosts, 'Current number of load balancing hosts') + + set_metrics! + update_pools + end + + def hosts + @mutex.synchronize { @hosts.dup } + end + + def shuffle + @mutex.synchronize do + unsafe_shuffle + end + end + + def length + @mutex.synchronize { @hosts.length } + end + + def host_names_and_ports + @mutex.synchronize { @hosts.map { |host| [host.host, host.port] } } + end + + def manage_pool?(pool) + @pools.include?(pool) + end + + def hosts=(hosts) + @mutex.synchronize do + @hosts = hosts + unsafe_shuffle + update_pools + end + + set_metrics! + end + + # Sets metrics before returning next host + def next + next_host.tap do |_| + set_metrics! + end + end + + private + + def unsafe_shuffle + @hosts = @hosts.shuffle + @index = 0 + end + + # Returns the next available host. + # + # Returns a Gitlab::Database::LoadBalancing::Host instance, or nil if no + # hosts were available. + def next_host + @mutex.synchronize do + break if @hosts.empty? + + started_at = @index + + loop do + host = @hosts[@index] + @index = (@index + 1) % @hosts.length + + break host if host.online? + + # Return nil once we have cycled through all hosts and none were + # available. + break if @index == started_at + end + end + end + + def set_metrics! + @hosts_gauge.set({}, @hosts.length) + end + + def update_pools + @pools = Set.new(@hosts.map(&:pool)) + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb new file mode 100644 index 00000000000..f655bb46023 --- /dev/null +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -0,0 +1,261 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Load balancing for ActiveRecord connections. + # + # Each host in the load balancer uses the same credentials as the primary + # database. + # + # This class *requires* that `ActiveRecord::Base.retrieve_connection` + # always returns a connection to the primary. + class LoadBalancer + CACHE_KEY = :gitlab_load_balancer_host + VALID_HOSTS_CACHE_KEY = :gitlab_load_balancer_valid_hosts + + attr_reader :host_list + + # hosts - The hostnames/addresses of the additional databases. + def initialize(hosts = []) + @host_list = HostList.new(hosts.map { |addr| Host.new(addr, self) }) + @connection_db_roles = {}.compare_by_identity + @connection_db_roles_count = {}.compare_by_identity + end + + # Yields a connection that can be used for reads. + # + # If no secondaries were available this method will use the primary + # instead. + def read(&block) + connection = nil + conflict_retried = 0 + + while host + ensure_caching! + + begin + connection = host.connection + track_connection_role(connection, ROLE_REPLICA) + + return yield connection + rescue StandardError => error + untrack_connection_role(connection) + + if serialization_failure?(error) + # This error can occur when a query conflicts. See + # https://www.postgresql.org/docs/current/static/hot-standby.html#HOT-STANDBY-CONFLICT + # for more information. + # + # In this event we'll cycle through the secondaries at most 3 + # times before using the primary instead. + will_retry = conflict_retried < @host_list.length * 3 + + LoadBalancing::Logger.warn( + event: :host_query_conflict, + message: 'Query conflict on host', + conflict_retried: conflict_retried, + will_retry: will_retry, + db_host: host.host, + db_port: host.port, + host_list_length: @host_list.length + ) + + if will_retry + conflict_retried += 1 + release_host + else + break + end + elsif connection_error?(error) + host.offline! + release_host + else + raise error + end + end + end + + LoadBalancing::Logger.warn( + event: :no_secondaries_available, + message: 'No secondaries were available, using primary instead', + conflict_retried: conflict_retried, + host_list_length: @host_list.length + ) + + read_write(&block) + ensure + untrack_connection_role(connection) + end + + # Yields a connection that can be used for both reads and writes. + def read_write + connection = nil + # In the event of a failover the primary may be briefly unavailable. + # Instead of immediately grinding to a halt we'll retry the operation + # a few times. + retry_with_backoff do + connection = ActiveRecord::Base.retrieve_connection + track_connection_role(connection, ROLE_PRIMARY) + + yield connection + end + ensure + untrack_connection_role(connection) + end + + # Recognize the role (primary/replica) of the database this connection + # is connecting to. If the connection is not issued by this load + # balancer, return nil + def db_role_for_connection(connection) + return @connection_db_roles[connection] if @connection_db_roles[connection] + return ROLE_REPLICA if @host_list.manage_pool?(connection.pool) + return ROLE_PRIMARY if connection.pool == ActiveRecord::Base.connection_pool + end + + # Returns a host to use for queries. + # + # Hosts are scoped per thread so that multiple threads don't + # accidentally re-use the same host + connection. + def host + RequestStore[CACHE_KEY] ||= current_host_list.next + end + + # Releases the host and connection for the current thread. + def release_host + if host = RequestStore[CACHE_KEY] + host.disable_query_cache! + host.release_connection + end + + RequestStore.delete(CACHE_KEY) + RequestStore.delete(VALID_HOSTS_CACHE_KEY) + end + + def release_primary_connection + ActiveRecord::Base.connection_pool.release_connection + end + + # Returns the transaction write location of the primary. + def primary_write_location + location = read_write do |connection| + ::Gitlab::Database.get_write_location(connection) + end + + return location if location + + raise 'Failed to determine the write location of the primary database' + end + + # Returns true if all hosts have caught up to the given transaction + # write location. + def all_caught_up?(location) + @host_list.hosts.all? { |host| host.caught_up?(location) } + end + + # Returns true if there was at least one host that has caught up with the given transaction. + # + # In case of a retry, this method also stores the set of hosts that have caught up. + def select_caught_up_hosts(location) + all_hosts = @host_list.hosts + valid_hosts = all_hosts.select { |host| host.caught_up?(location) } + + return false if valid_hosts.empty? + + # Hosts can come online after the time when this scan was done, + # so we need to remember the ones that can be used. If the host went + # offline, we'll just rely on the retry mechanism to use the primary. + set_consistent_hosts_for_request(HostList.new(valid_hosts)) + + # Since we will be using a subset from the original list, let's just + # pick a random host and mix up the original list to ensure we don't + # only end up using one replica. + RequestStore[CACHE_KEY] = valid_hosts.sample + @host_list.shuffle + + true + end + + def set_consistent_hosts_for_request(hosts) + RequestStore[VALID_HOSTS_CACHE_KEY] = hosts + end + + # Yields a block, retrying it upon error using an exponential backoff. + def retry_with_backoff(retries = 3, time = 2) + retried = 0 + last_error = nil + + while retried < retries + begin + return yield + rescue StandardError => error + raise error unless connection_error?(error) + + # We need to release the primary connection as otherwise Rails + # will keep raising errors when using the connection. + release_primary_connection + + last_error = error + sleep(time) + retried += 1 + time **= 2 + end + end + + raise last_error + end + + def connection_error?(error) + case error + when ActiveRecord::StatementInvalid, ActionView::Template::Error + # After connecting to the DB Rails will wrap query errors using this + # class. + connection_error?(error.cause) + when *CONNECTION_ERRORS + true + else + # When PG tries to set the client encoding but fails due to a + # connection error it will raise a PG::Error instance. Catching that + # would catch all errors (even those we don't want), so instead we + # check for the message of the error. + error.message.start_with?('invalid encoding name:') + end + end + + def serialization_failure?(error) + if error.cause + serialization_failure?(error.cause) + else + error.is_a?(PG::TRSerializationFailure) + end + end + + private + + def ensure_caching! + host.enable_query_cache! unless host.query_cache_enabled + end + + def track_connection_role(connection, role) + @connection_db_roles[connection] = role + @connection_db_roles_count[connection] ||= 0 + @connection_db_roles_count[connection] += 1 + end + + def untrack_connection_role(connection) + return if connection.blank? || @connection_db_roles_count[connection].blank? + + @connection_db_roles_count[connection] -= 1 + if @connection_db_roles_count[connection] <= 0 + @connection_db_roles.delete(connection) + @connection_db_roles_count.delete(connection) + end + end + + def current_host_list + RequestStore[VALID_HOSTS_CACHE_KEY] || @host_list + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/logger.rb b/lib/gitlab/database/load_balancing/logger.rb new file mode 100644 index 00000000000..ee67ffcc99c --- /dev/null +++ b/lib/gitlab/database/load_balancing/logger.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + class Logger < ::Gitlab::JsonLogger + def self.file_name_noext + 'database_load_balancing' + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb new file mode 100644 index 00000000000..4734ff99bd3 --- /dev/null +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Rack middleware to handle sticking when serving Rails requests. Grape + # API calls are handled separately as different API endpoints need to + # stick based on different objects. + class RackMiddleware + STICK_OBJECT = 'load_balancing.stick_object' + + # Unsticks or continues sticking the current request. + # + # This method also updates the Rack environment so #call can later + # determine if we still need to stick or not. + # + # env - The Rack environment. + # namespace - The namespace to use for sticking. + # id - The identifier to use for sticking. + def self.stick_or_unstick(env, namespace, id) + return unless LoadBalancing.enable? + + Sticking.unstick_or_continue_sticking(namespace, id) + + env[STICK_OBJECT] ||= Set.new + env[STICK_OBJECT] << [namespace, id] + end + + def initialize(app) + @app = app + end + + def call(env) + # Ensure that any state that may have run before the first request + # doesn't linger around. + clear + + unstick_or_continue_sticking(env) + + result = @app.call(env) + + stick_if_necessary(env) + + result + ensure + clear + end + + # Determine if we need to stick based on currently available user data. + # + # Typically this code will only be reachable for Rails requests as + # Grape data is not yet available at this point. + def unstick_or_continue_sticking(env) + namespaces_and_ids = sticking_namespaces_and_ids(env) + + namespaces_and_ids.each do |namespace, id| + Sticking.unstick_or_continue_sticking(namespace, id) + end + end + + # Determine if we need to stick after handling a request. + def stick_if_necessary(env) + namespaces_and_ids = sticking_namespaces_and_ids(env) + + namespaces_and_ids.each do |namespace, id| + Sticking.stick_if_necessary(namespace, id) + end + end + + def clear + load_balancer.release_host + Session.clear_session + end + + def load_balancer + LoadBalancing.proxy.load_balancer + end + + # Determines the sticking namespace and identifier based on the Rack + # environment. + # + # For Rails requests this uses warden, but Grape and others have to + # manually set the right environment variable. + def sticking_namespaces_and_ids(env) + warden = env['warden'] + + if warden && warden.user + [[:user, warden.user.id]] + elsif env[STICK_OBJECT].present? + env[STICK_OBJECT].to_a + else + [] + end + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/resolver.rb b/lib/gitlab/database/load_balancing/resolver.rb new file mode 100644 index 00000000000..a291080cc3d --- /dev/null +++ b/lib/gitlab/database/load_balancing/resolver.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require 'net/dns' +require 'resolv' + +module Gitlab + module Database + module LoadBalancing + class Resolver + UnresolvableNameserverError = Class.new(StandardError) + + def initialize(nameserver) + @nameserver = nameserver + end + + def resolve + address = ip_address || ip_address_from_hosts_file || + ip_address_from_dns + + unless address + raise UnresolvableNameserverError, + "could not resolve #{@nameserver}" + end + + address + end + + private + + def ip_address + IPAddr.new(@nameserver) + rescue IPAddr::InvalidAddressError + end + + def ip_address_from_hosts_file + ip = Resolv::Hosts.new.getaddress(@nameserver) + IPAddr.new(ip) + rescue Resolv::ResolvError + end + + def ip_address_from_dns + answer = Net::DNS::Resolver.start(@nameserver, Net::DNS::A).answer + return if answer.empty? + + answer.first.address + rescue Net::DNS::Resolver::NoResponseError + raise UnresolvableNameserverError, "no response from DNS server(s)" + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/service_discovery.rb b/lib/gitlab/database/load_balancing/service_discovery.rb new file mode 100644 index 00000000000..9b42b25be1c --- /dev/null +++ b/lib/gitlab/database/load_balancing/service_discovery.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +require 'net/dns' +require 'resolv' + +module Gitlab + module Database + module LoadBalancing + # Service discovery of secondary database hosts. + # + # Service discovery works by periodically looking up a DNS record. If the + # DNS record returns a new list of hosts, this class will update the load + # balancer with said hosts. Requests may continue to use the old hosts + # until they complete. + class ServiceDiscovery + attr_reader :interval, :record, :record_type, :disconnect_timeout + + MAX_SLEEP_ADJUSTMENT = 10 + + RECORD_TYPES = { + 'A' => Net::DNS::A, + 'SRV' => Net::DNS::SRV + }.freeze + + Address = Struct.new(:hostname, :port) do + def to_s + port ? "#{hostname}:#{port}" : hostname + end + + def <=>(other) + self.to_s <=> other.to_s + end + end + + # nameserver - The nameserver to use for DNS lookups. + # port - The port of the nameserver. + # record - The DNS record to look up for retrieving the secondaries. + # record_type - The type of DNS record to look up + # interval - The time to wait between lookups. + # disconnect_timeout - The time after which an old host should be + # forcefully disconnected. + # use_tcp - Use TCP instaed of UDP to look up resources + def initialize(nameserver:, port:, record:, record_type: 'A', interval: 60, disconnect_timeout: 120, use_tcp: false) + @nameserver = nameserver + @port = port + @record = record + @record_type = record_type_for(record_type) + @interval = interval + @disconnect_timeout = disconnect_timeout + @use_tcp = use_tcp + end + + def start + Thread.new do + loop do + interval = + begin + refresh_if_necessary + rescue StandardError => error + # Any exceptions that might occur should be reported to + # Sentry, instead of silently terminating this thread. + Gitlab::ErrorTracking.track_exception(error) + + Gitlab::AppLogger.error( + "Service discovery encountered an error: #{error.message}" + ) + + self.interval + end + + # We slightly randomize the sleep() interval. This should reduce + # the likelihood of _all_ processes refreshing at the same time, + # possibly putting unnecessary pressure on the DNS server. + sleep(interval + rand(MAX_SLEEP_ADJUSTMENT)) + end + end + end + + # Refreshes the hosts, but only if the DNS record returned a new list of + # addresses. + # + # The return value is the amount of time (in seconds) to wait before + # checking the DNS record for any changes. + def refresh_if_necessary + interval, from_dns = addresses_from_dns + + current = addresses_from_load_balancer + + replace_hosts(from_dns) if from_dns != current + + interval + end + + # Replaces all the hosts in the load balancer with the new ones, + # disconnecting the old connections. + # + # addresses - An Array of Address structs to use for the new hosts. + def replace_hosts(addresses) + old_hosts = load_balancer.host_list.hosts + + load_balancer.host_list.hosts = addresses.map do |addr| + Host.new(addr.hostname, load_balancer, port: addr.port) + end + + # We must explicitly disconnect the old connections, otherwise we may + # leak database connections over time. For example, if a request + # started just before we added the new hosts it will use an old + # host/connection. While this connection will be checked in and out, + # it won't be explicitly disconnected. + old_hosts.each do |host| + host.disconnect!(disconnect_timeout) + end + end + + # Returns an Array containing: + # + # 1. The time to wait for the next check. + # 2. An array containing the hostnames of the DNS record. + def addresses_from_dns + response = resolver.search(record, record_type) + resources = response.answer + + addresses = + case record_type + when Net::DNS::A + addresses_from_a_record(resources) + when Net::DNS::SRV + addresses_from_srv_record(response) + end + + # Addresses are sorted so we can directly compare the old and new + # addresses, without having to use any additional data structures. + [new_wait_time_for(resources), addresses.sort] + end + + def new_wait_time_for(resources) + wait = resources.first&.ttl || interval + + # The preconfigured interval acts as a minimum amount of time to + # wait. + wait < interval ? interval : wait + end + + def addresses_from_load_balancer + load_balancer.host_list.host_names_and_ports.map do |hostname, port| + Address.new(hostname, port) + end.sort + end + + def load_balancer + LoadBalancing.proxy.load_balancer + end + + def resolver + @resolver ||= Net::DNS::Resolver.new( + nameservers: Resolver.new(@nameserver).resolve, + port: @port, + use_tcp: @use_tcp + ) + end + + private + + def record_type_for(type) + RECORD_TYPES.fetch(type) do + raise(ArgumentError, "Unsupported record type: #{type}") + end + end + + def addresses_from_srv_record(response) + srv_resolver = SrvResolver.new(resolver, response.additional) + + response.answer.map do |r| + address = srv_resolver.address_for(r.host.to_s) + next unless address + + Address.new(address.to_s, r.port) + end.compact + end + + def addresses_from_a_record(resources) + resources.map { |r| Address.new(r.address.to_s) } + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/session.rb b/lib/gitlab/database/load_balancing/session.rb new file mode 100644 index 00000000000..3682c9265c2 --- /dev/null +++ b/lib/gitlab/database/load_balancing/session.rb @@ -0,0 +1,118 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Tracking of load balancing state per user session. + # + # A session starts at the beginning of a request and ends once the request + # has been completed. Sessions can be used to keep track of what hosts + # should be used for queries. + class Session + CACHE_KEY = :gitlab_load_balancer_session + + def self.current + RequestStore[CACHE_KEY] ||= new + end + + def self.clear_session + RequestStore.delete(CACHE_KEY) + end + + def self.without_sticky_writes(&block) + current.ignore_writes(&block) + end + + def initialize + @use_primary = false + @performed_write = false + @ignore_writes = false + @fallback_to_replicas_for_ambiguous_queries = false + @use_replicas_for_read_queries = false + end + + def use_primary? + @use_primary + end + + alias_method :using_primary?, :use_primary? + + def use_primary! + @use_primary = true + end + + def use_primary(&blk) + used_primary = @use_primary + @use_primary = true + yield + ensure + @use_primary = used_primary || @performed_write + end + + def ignore_writes(&block) + @ignore_writes = true + + yield + ensure + @ignore_writes = false + end + + # Indicates that the read SQL statements from anywhere inside this + # blocks should use a replica, regardless of the current primary + # stickiness or whether a write query is already performed in the + # current session. This interface is reserved mostly for performance + # purpose. This is a good tool to push expensive queries, which can + # tolerate the replica lags, to the replicas. + # + # Write and ambiguous queries inside this block are still handled by + # the primary. + def use_replicas_for_read_queries(&blk) + previous_flag = @use_replicas_for_read_queries + @use_replicas_for_read_queries = true + yield + ensure + @use_replicas_for_read_queries = previous_flag + end + + def use_replicas_for_read_queries? + @use_replicas_for_read_queries == true + end + + # Indicate that the ambiguous SQL statements from anywhere inside this + # block should use a replica. The ambiguous statements include: + # - Transactions. + # - Custom queries (via exec_query, execute, etc.) + # - In-flight connection configuration change (SET LOCAL statement_timeout = 5000) + # + # This is a weak enforcement. This helper incorporates well with + # primary stickiness: + # - If the queries are about to write + # - The current session already performed writes + # - It prefers to use primary, aka, use_primary or use_primary! were called + def fallback_to_replicas_for_ambiguous_queries(&blk) + previous_flag = @fallback_to_replicas_for_ambiguous_queries + @fallback_to_replicas_for_ambiguous_queries = true + yield + ensure + @fallback_to_replicas_for_ambiguous_queries = previous_flag + end + + def fallback_to_replicas_for_ambiguous_queries? + @fallback_to_replicas_for_ambiguous_queries == true && !use_primary? && !performed_write? + end + + def write! + @performed_write = true + + return if @ignore_writes + + use_primary! + end + + def performed_write? + @performed_write + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb new file mode 100644 index 00000000000..524d69c00c0 --- /dev/null +++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + class SidekiqClientMiddleware + def call(worker_class, job, _queue, _redis_pool) + worker_class = worker_class.to_s.safe_constantize + + mark_data_consistency_location(worker_class, job) + + yield + end + + private + + def mark_data_consistency_location(worker_class, job) + # Mailers can't be constantized + return unless worker_class + return unless worker_class.include?(::ApplicationWorker) + return unless worker_class.get_data_consistency_feature_flag_enabled? + + return if location_already_provided?(job) + + job['worker_data_consistency'] = worker_class.get_data_consistency + + return unless worker_class.utilizes_load_balancing_capabilities? + + if Session.current.use_primary? + job['database_write_location'] = load_balancer.primary_write_location + else + job['database_replica_location'] = load_balancer.host.database_replica_location + end + end + + def location_already_provided?(job) + job['database_replica_location'] || job['database_write_location'] + end + + def load_balancer + LoadBalancing.proxy.load_balancer + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb new file mode 100644 index 00000000000..1c243f31809 --- /dev/null +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + class SidekiqServerMiddleware + JobReplicaNotUpToDate = Class.new(StandardError) + + def call(worker, job, _queue) + if requires_primary?(worker.class, job) + Session.current.use_primary! + end + + yield + ensure + clear + end + + private + + def clear + load_balancer.release_host + Session.clear_session + end + + def requires_primary?(worker_class, job) + return true unless worker_class.include?(::ApplicationWorker) + return true unless worker_class.utilizes_load_balancing_capabilities? + return true unless worker_class.get_data_consistency_feature_flag_enabled? + + location = job['database_write_location'] || job['database_replica_location'] + + return true unless location + + if replica_caught_up?(location) + job[:database_chosen] = 'replica' + false + elsif worker_class.get_data_consistency == :delayed && not_yet_retried?(job) + job[:database_chosen] = 'retry' + raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ + " Replica was not up to date." + else + job[:database_chosen] = 'primary' + true + end + end + + def not_yet_retried?(job) + # if `retry_count` is `nil` it indicates that this job was never retried + # the `0` indicates that this is a first retry + job['retry_count'].nil? + end + + def load_balancer + LoadBalancing.proxy.load_balancer + end + + def replica_caught_up?(location) + load_balancer.host.caught_up?(location) + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/srv_resolver.rb b/lib/gitlab/database/load_balancing/srv_resolver.rb new file mode 100644 index 00000000000..20da525f4d2 --- /dev/null +++ b/lib/gitlab/database/load_balancing/srv_resolver.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Hostnames returned in SRV records cannot sometimes be resolved by a local + # resolver, however, there's a possibility that their A/AAAA records are + # returned as part of the SRV query in the additional section, so we try + # to extract the IPs from there first, failing back to querying the + # hostnames A/AAAA records one by one, using the same resolver that + # queried the SRV record. + class SrvResolver + include Gitlab::Utils::StrongMemoize + + attr_reader :resolver, :additional + + def initialize(resolver, additional) + @resolver = resolver + @additional = additional + end + + def address_for(host) + addresses_from_additional[host] || resolve_host(host) + end + + private + + def addresses_from_additional + strong_memoize(:addresses_from_additional) do + additional.each_with_object({}) do |rr, h| + h[rr.name] = rr.address if rr.is_a?(Net::DNS::RR::A) || rr.is_a?(Net::DNS::RR::AAAA) + end + end + end + + def resolve_host(host) + record = resolver.search(host, Net::DNS::ANY).answer.find do |rr| + rr.is_a?(Net::DNS::RR::A) || rr.is_a?(Net::DNS::RR::AAAA) + end + + record&.address + end + end + end + end +end diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb new file mode 100644 index 00000000000..efbd7099300 --- /dev/null +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module LoadBalancing + # Module used for handling sticking connections to a primary, if + # necessary. + # + # ## Examples + # + # Sticking a user to the primary: + # + # Sticking.stick_if_necessary(:user, current_user.id) + # + # To unstick if possible, or continue using the primary otherwise: + # + # Sticking.unstick_or_continue_sticking(:user, current_user.id) + module Sticking + # The number of seconds after which a session should stop reading from + # the primary. + EXPIRATION = 30 + + # Sticks to the primary if a write was performed. + def self.stick_if_necessary(namespace, id) + return unless LoadBalancing.enable? + + stick(namespace, id) if Session.current.performed_write? + end + + # Checks if we are caught-up with all the work + def self.all_caught_up?(namespace, id) + location = last_write_location_for(namespace, id) + + return true unless location + + load_balancer.all_caught_up?(location).tap do |caught_up| + unstick(namespace, id) if caught_up + end + end + + # Selects hosts that have caught up with the primary. This ensures + # atomic selection of the host to prevent the host list changing + # in another thread. + # + # Returns true if one host was selected. + def self.select_caught_up_replicas(namespace, id) + location = last_write_location_for(namespace, id) + + # Unlike all_caught_up?, we return false if no write location exists. + # We want to be sure we talk to a replica that has caught up for a specific + # write location. If no such location exists, err on the side of caution. + return false unless location + + load_balancer.select_caught_up_hosts(location).tap do |selected| + unstick(namespace, id) if selected + end + end + + # Sticks to the primary if necessary, otherwise unsticks an object (if + # it was previously stuck to the primary). + def self.unstick_or_continue_sticking(namespace, id) + Session.current.use_primary! unless all_caught_up?(namespace, id) + end + + # Select a replica that has caught up with the primary. If one has not been + # found, stick to the primary. + def self.select_valid_host(namespace, id) + replica_selected = select_caught_up_replicas(namespace, id) + + Session.current.use_primary! unless replica_selected + end + + # Starts sticking to the primary for the given namespace and id, using + # the latest WAL pointer from the primary. + def self.stick(namespace, id) + return unless LoadBalancing.enable? + + mark_primary_write_location(namespace, id) + Session.current.use_primary! + end + + def self.bulk_stick(namespace, ids) + return unless LoadBalancing.enable? + + with_primary_write_location do |location| + ids.each do |id| + set_write_location_for(namespace, id, location) + end + end + + Session.current.use_primary! + end + + def self.with_primary_write_location + return unless LoadBalancing.configured? + + # Load balancing could be enabled for the Web application server, + # but it's not activated for Sidekiq. We should update Redis with + # the write location just in case load balancing is being used. + location = + if LoadBalancing.enable? + load_balancer.primary_write_location + else + Gitlab::Database.get_write_location(ActiveRecord::Base.connection) + end + + return if location.blank? + + yield(location) + end + + def self.mark_primary_write_location(namespace, id) + with_primary_write_location do |location| + set_write_location_for(namespace, id, location) + end + end + + # Stops sticking to the primary. + def self.unstick(namespace, id) + Gitlab::Redis::SharedState.with do |redis| + redis.del(redis_key_for(namespace, id)) + end + end + + def self.set_write_location_for(namespace, id, location) + Gitlab::Redis::SharedState.with do |redis| + redis.set(redis_key_for(namespace, id), location, ex: EXPIRATION) + end + end + + def self.last_write_location_for(namespace, id) + Gitlab::Redis::SharedState.with do |redis| + redis.get(redis_key_for(namespace, id)) + end + end + + def self.redis_key_for(namespace, id) + "database-load-balancing/write-location/#{namespace}/#{id}" + end + + def self.load_balancer + LoadBalancing.proxy.load_balancer + end + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 102fe60f2cb..94152e20d33 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -797,15 +797,19 @@ module Gitlab # Fetch remote for repository # # remote - remote name + # url - URL of the remote to fetch. `remote` is not used in this case. + # refmap - if url is given, determines which references should get fetched where # ssh_auth - SSH known_hosts data and a private key to use for public-key authentication # forced - should we use --force flag? # no_tags - should we use --no-tags flag? # prune - should we use --prune flag? # check_tags_changed - should we ask gitaly to calculate whether any tags changed? - def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true, check_tags_changed: false) + def fetch_remote(remote, url: nil, refmap: nil, ssh_auth: nil, forced: false, no_tags: false, prune: true, check_tags_changed: false) wrapped_gitaly_errors do gitaly_repository_client.fetch_remote( remote, + url: url, + refmap: refmap, ssh_auth: ssh_auth, forced: forced, no_tags: no_tags, diff --git a/lib/gitlab/gitaly_client/repository_service.rb b/lib/gitlab/gitaly_client/repository_service.rb index d2dbd456180..6a75096ff80 100644 --- a/lib/gitlab/gitaly_client/repository_service.rb +++ b/lib/gitlab/gitaly_client/repository_service.rb @@ -70,13 +70,21 @@ module Gitlab end.join end - def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true, check_tags_changed: false) + # rubocop: disable Metrics/ParameterLists + # The `remote` parameter is going away soonish anyway, at which point the + # Rubocop warning can be enabled again. + def fetch_remote(remote, url:, refmap:, ssh_auth:, forced:, no_tags:, timeout:, prune: true, check_tags_changed: false) request = Gitaly::FetchRemoteRequest.new( repository: @gitaly_repo, remote: remote, force: forced, no_tags: no_tags, timeout: timeout, no_prune: !prune, check_tags_changed: check_tags_changed ) + if url + request.remote_params = Gitaly::Remote.new(url: url, + mirror_refmaps: Array.wrap(refmap).map(&:to_s)) + end + if ssh_auth&.ssh_mirror_url? if ssh_auth.ssh_key_auth? && ssh_auth.ssh_private_key.present? request.ssh_key = ssh_auth.ssh_private_key @@ -89,6 +97,7 @@ module Gitlab GitalyClient.call(@storage, :repository_service, :fetch_remote, request, timeout: GitalyClient.long_timeout) end + # rubocop: enable Metrics/ParameterLists def create_repository request = Gitaly::CreateRepositoryRequest.new(repository: @gitaly_repo) diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb index 7f1569f592f..28cd3f802a2 100644 --- a/lib/gitlab/github_import/importer/pull_requests_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb @@ -36,7 +36,11 @@ module Gitlab # updating the timestamp. project.update_column(:last_repository_updated_at, Time.zone.now) - project.repository.fetch_remote('github', forced: false) + if Feature.enabled?(:fetch_remote_params, project, default_enabled: :yaml) + project.repository.fetch_remote('github', url: project.import_url, refmap: Gitlab::GithubImport.refmap, forced: false) + else + project.repository.fetch_remote('github', forced: false) + end pname = project.path_with_namespace diff --git a/lib/support/init.d/gitlab b/lib/support/init.d/gitlab index 582dcf82582..ac47c5be1e8 100755 --- a/lib/support/init.d/gitlab +++ b/lib/support/init.d/gitlab @@ -330,7 +330,7 @@ start_gitlab() { echo "Gitaly is already running with pid $gapid, not restarting" else $app_root/bin/daemon_with_pidfile $gitaly_pid_path \ - $gitaly_dir/gitaly $gitaly_dir/config.toml >> $gitaly_log 2>&1 & + $gitaly_dir/_build/bin/gitaly $gitaly_dir/config.toml >> $gitaly_log 2>&1 & fi fi diff --git a/scripts/rspec_helpers.sh b/scripts/rspec_helpers.sh index b1a618270b0..0484cabca82 100644 --- a/scripts/rspec_helpers.sh +++ b/scripts/rspec_helpers.sh @@ -85,7 +85,7 @@ function rspec_db_library_code() { local db_files="spec/lib/gitlab/database/ spec/support/helpers/database/" if [[ -d "ee/" ]]; then - db_files="${db_files} ee/spec/lib/gitlab/database/ ee/spec/lib/ee/gitlab/database_spec.rb" + db_files="${db_files} ee/spec/lib/ee/gitlab/database_spec.rb" fi rspec_simple_job "-- ${db_files}" diff --git a/spec/fixtures/dns/a_rr.json b/spec/fixtures/dns/a_rr.json new file mode 100644 index 00000000000..44ea739cec4 --- /dev/null +++ b/spec/fixtures/dns/a_rr.json @@ -0,0 +1,4 @@ +{ + "desc": "A response of a `dig patroni-02-db-gstg.node.east-us-2.consul. A` query", + "payload": "JJSFAAABAAEAAAABEnBhdHJvbmktMDItZGItZ3N0ZwRub2RlCWVhc3QtdXMt\nMgZjb25zdWwAAAEAAcAMAAEAAQAAAAAABArgHWbADAAQAAEAAAAAABgXY29u\nc3VsLW5ldHdvcmstc2VnbWVudD0=\n" +} diff --git a/spec/fixtures/dns/a_with_aaaa_rr_in_additional_section.json b/spec/fixtures/dns/a_with_aaaa_rr_in_additional_section.json new file mode 100644 index 00000000000..9d83d4caa2d --- /dev/null +++ b/spec/fixtures/dns/a_with_aaaa_rr_in_additional_section.json @@ -0,0 +1,4 @@ +{ + "desc": "A response of `dig google.com` query that contains AAAA records in additional section", + "payload": "YQiBgAABAAEADQAbBmdvb2dsZQNjb20AAAEAAcAMAAEAAQAAAOEABKzZFE7AEwACAAEAAAjmABQB\nagxndGxkLXNlcnZlcnMDbmV0AMATAAIAAQAACOYABAFrwDrAEwACAAEAAAjmAAQBYcA6wBMAAgAB\nAAAI5gAEAWXAOsATAAIAAQAACOYABAFmwDrAEwACAAEAAAjmAAQBbMA6wBMAAgABAAAI5gAEAWPA\nOsATAAIAAQAACOYABAFowDrAEwACAAEAAAjmAAQBbcA6wBMAAgABAAAI5gAEAWfAOsATAAIAAQAA\nCOYABAFkwDrAEwACAAEAAAjmAAQBacA6wBMAAgABAAAI5gAEAWLAOsBoAAEAAQAACOYABMAFBh7A\naAAcAAEAAAjmABAgAQUDqD4AAAAAAAAAAgAwwQgAAQABAAAI5gAEwCEOHsEIABwAAQAACOYAECAB\nBQMjHQAAAAAAAAACADDAqAABAAEAAAjmAATAGlwewKgAHAABAAAI5gAQIAEFA4PrAAAAAAAAAAAA\nMMDoAAEAAQAACOYABMAfUB7A6AAcAAEAAAjmABAgAQUAhW4AAAAAAAAAAAAwwHgAAQABAAAI5gAE\nwAxeHsB4ABwAAQAACOYAECABBQIcoQAAAAAAAAAAADDAiAABAAEAAAjmAATAIzMewIgAHAABAAAI\n5gAQIAEFA9QUAAAAAAAAAAAAMMDYAAEAAQAACOYABMAqXR7A2AAcAAEAAAjmABAgAQUD7qMAAAAA\nAAAAAAAwwLgAAQABAAAI5gAEwDZwHsC4ABwAAQAACOYAECABBQIIzAAAAAAAAAAAADDA+AABAAEA\nAAjmAATAK6wewPgAHAABAAAI5gAQIAEFAznBAAAAAAAAAAAAMMA4AAEAAQAACOYABMAwTx7AOAAc\nAAEAAAjmABAgAQUCcJQAAAAAAAAAAAAwwFgAAQABAAAI5gAEwDSyHsBYABwAAQAACOYAECABBQMN\nLQAAAAAAAAAAADDAmAABAAEAAAjmAATAKaIewJgAHAABAAAI5gAQIAEFANk3AAAAAAAAAAAAMMDI\nAAEAAQAACOYABMA3Ux7AyAAcAAEAAAjmABAgAQUBsfkAAAAAAAAAAAAwAAApEAAAAAAAAAA=\n" +} diff --git a/spec/fixtures/dns/aaaa_rr.json b/spec/fixtures/dns/aaaa_rr.json new file mode 100644 index 00000000000..ae06e5255de --- /dev/null +++ b/spec/fixtures/dns/aaaa_rr.json @@ -0,0 +1,4 @@ +{ + "desc": "A response of `dig google.com AAAA` query", + "payload": "PA+BgAABAAEADQAbBmdvb2dsZQNjb20AABwAAcAMABwAAQAAASwAECoAFFBA\nDggKAAAAAAAAIA7AEwACAAEAAAFMABQBYgxndGxkLXNlcnZlcnMDbmV0AMAT\nAAIAAQAAAUwABAFtwEbAEwACAAEAAAFMAAQBY8BGwBMAAgABAAABTAAEAWbA\nRsATAAIAAQAAAUwABAFnwEbAEwACAAEAAAFMAAQBa8BGwBMAAgABAAABTAAE\nAWXARsATAAIAAQAAAUwABAFqwEbAEwACAAEAAAFMAAQBZMBGwBMAAgABAAAB\nTAAEAWnARsATAAIAAQAAAUwABAFhwEbAEwACAAEAAAFMAAQBbMBGwBMAAgAB\nAAABTAAEAWjARsD0AAEAAQAAAUwABMAFBh7A9AAcAAEAAAFMABAgAQUDqD4A\nAAAAAAAAAgAwwEQAAQABAAABTAAEwCEOHsBEABwAAQAAAUwAECABBQMjHQAA\nAAAAAAACADDAdAABAAEAAAFMAATAGlwewHQAHAABAAABTAAQIAEFA4PrAAAA\nAAAAAAAAMMDUAAEAAQAAAUwABMAfUB7A1AAcAAEAAAFMABAgAQUAhW4AAAAA\nAAAAAAAwwLQAAQABAAABTAAEwAxeHsC0ABwAAQAAAUwAECABBQIcoQAAAAAA\nAAAAADDAhAABAAEAAAFMAATAIzMewIQAHAABAAABTAAQIAEFA9QUAAAAAAAA\nAAAAMMCUAAEAAQAAAUwABMAqXR7AlAAcAAEAAAFMABAgAQUD7qMAAAAAAAAA\nAAAwwRQAAQABAAABTAAEwDZwHsEUABwAAQAAAUwAECABBQIIzAAAAAAAAAAA\nADDA5AABAAEAAAFMAATAK6wewOQAHAABAAABTAAQIAEFAznBAAAAAAAAAAAA\nMMDEAAEAAQAAAUwABMAwTx7AxAAcAAEAAAFMABAgAQUCcJQAAAAAAAAAAAAw\nwKQAAQABAAABTAAEwDSyHsCkABwAAQAAAUwAECABBQMNLQAAAAAAAAAAADDB\nBAABAAEAAAFMAATAKaIewQQAHAABAAABTAAQIAEFANk3AAAAAAAAAAAAMMBk\nAAEAAQAAAUwABMA3Ux7AZAAcAAEAAAFMABAgAQUBsfkAAAAAAAAAAAAwAAAp\nEAAAAAAAAAA=\n" +} diff --git a/spec/fixtures/dns/srv_with_a_rr_in_additional_section.json b/spec/fixtures/dns/srv_with_a_rr_in_additional_section.json new file mode 100644 index 00000000000..97db149ad8e --- /dev/null +++ b/spec/fixtures/dns/srv_with_a_rr_in_additional_section.json @@ -0,0 +1,4 @@ +{ + "desc": "A response of `dig replica.patroni.service.consul. SRV` query that contains A records in additional section", + "payload": "y8uFAAABAAQAAAAIB3JlcGxpY2EHcGF0cm9uaQdzZXJ2aWNlBmNvbnN1bAAA\nIQABwAwAIQABAAAAAAAwAAEAAQAAEnBhdHJvbmktMDQtZGItZ3N0ZwRub2Rl\nCWVhc3QtdXMtMgZjb25zdWwAwAwAIQABAAAAAAAwAAEAAQAAEnBhdHJvbmkt\nMDUtZGItZ3N0ZwRub2RlCWVhc3QtdXMtMgZjb25zdWwAwAwAIQABAAAAAAAw\nAAEAAQAAEnBhdHJvbmktMDItZGItZ3N0ZwRub2RlCWVhc3QtdXMtMgZjb25z\ndWwAwAwAIQABAAAAAAAwAAEAAQAAEnBhdHJvbmktMDMtZGItZ3N0ZwRub2Rl\nCWVhc3QtdXMtMgZjb25zdWwAwEIAAQABAAAAAAAECuAdaMBCABAAAQAAAAAA\nGBdjb25zdWwtbmV0d29yay1zZWdtZW50PcB+AAEAAQAAAAAABArgHWnAfgAQ\nAAEAAAAAABgXY29uc3VsLW5ldHdvcmstc2VnbWVudD3AugABAAEAAAAAAAQK\n4B1mwLoAEAABAAAAAAAYF2NvbnN1bC1uZXR3b3JrLXNlZ21lbnQ9wPYAAQAB\nAAAAAAAECuAdZ8D2ABAAAQAAAAAAGBdjb25zdWwtbmV0d29yay1zZWdtZW50\nPQ==\n" +} diff --git a/spec/frontend/emoji/awards_app/store/actions_spec.js b/spec/frontend/emoji/awards_app/store/actions_spec.js index dac4fded260..137fcb742ae 100644 --- a/spec/frontend/emoji/awards_app/store/actions_spec.js +++ b/spec/frontend/emoji/awards_app/store/actions_spec.js @@ -7,6 +7,10 @@ import axios from '~/lib/utils/axios_utils'; jest.mock('@sentry/browser'); describe('Awards app actions', () => { + afterEach(() => { + window.gon = {}; + }); + describe('setInitialData', () => { it('commits SET_INITIAL_DATA', async () => { await testAction( @@ -39,6 +43,8 @@ describe('Awards app actions', () => { }); it('commits FETCH_AWARDS_SUCCESS', async () => { + window.gon = { current_user_id: 1 }; + await testAction( actions.fetchAwards, '1', @@ -47,6 +53,10 @@ describe('Awards app actions', () => { [{ type: 'fetchAwards', payload: '2' }], ); }); + + it('does not commit FETCH_AWARDS_SUCCESS when user signed out', async () => { + await testAction(actions.fetchAwards, '1', { path: '/awards' }, [], []); + }); }); describe('error', () => { @@ -55,6 +65,8 @@ describe('Awards app actions', () => { }); it('calls Sentry.captureException', async () => { + window.gon = { current_user_id: 1 }; + await testAction(actions.fetchAwards, null, { path: '/awards' }, [], [], () => { expect(Sentry.captureException).toHaveBeenCalled(); }); diff --git a/spec/lib/gitlab/database/consistency_spec.rb b/spec/lib/gitlab/database/consistency_spec.rb new file mode 100644 index 00000000000..35fa65512ae --- /dev/null +++ b/spec/lib/gitlab/database/consistency_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::Consistency do + let(:session) do + Gitlab::Database::LoadBalancing::Session.current + end + + describe '.with_read_consistency' do + it 'sticks to primary database' do + expect(session).not_to be_using_primary + + block = -> (&control) do + described_class.with_read_consistency do + expect(session).to be_using_primary + + control.call + end + end + + expect { |probe| block.call(&probe) }.to yield_control + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/active_record_proxy_spec.rb b/spec/lib/gitlab/database/load_balancing/active_record_proxy_spec.rb new file mode 100644 index 00000000000..8886ce9756d --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/active_record_proxy_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::ActiveRecordProxy do + describe '#connection' do + it 'returns a connection proxy' do + dummy = Class.new do + include Gitlab::Database::LoadBalancing::ActiveRecordProxy + end + + proxy = double(:proxy) + + expect(Gitlab::Database::LoadBalancing).to receive(:proxy) + .and_return(proxy) + + expect(dummy.new.connection).to eq(proxy) + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/connection_proxy_spec.rb b/spec/lib/gitlab/database/load_balancing/connection_proxy_spec.rb new file mode 100644 index 00000000000..015dd2ba8d2 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/connection_proxy_spec.rb @@ -0,0 +1,316 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::ConnectionProxy do + let(:proxy) { described_class.new } + + describe '#select' do + it 'performs a read' do + expect(proxy).to receive(:read_using_load_balancer).with(:select, ['foo']) + + proxy.select('foo') + end + end + + describe '#select_all' do + let(:override_proxy) { ActiveRecord::Base.connection.class } + + # We can't use :Gitlab::Utils::Override because this method is dynamically prepended + it 'method signatures match' do + expect(proxy.method(:select_all).parameters).to eq(override_proxy.instance_method(:select_all).parameters) + end + + describe 'using a SELECT query' do + it 'runs the query on a secondary' do + arel = double(:arel) + + expect(proxy).to receive(:read_using_load_balancer) + .with(:select_all, [arel, 'foo', []]) + + proxy.select_all(arel, 'foo') + end + end + + describe 'using a SELECT FOR UPDATE query' do + it 'runs the query on the primary and sticks to it' do + arel = double(:arel, locked: true) + + expect(proxy).to receive(:write_using_load_balancer) + .with(:select_all, [arel, 'foo', []], sticky: true) + + proxy.select_all(arel, 'foo') + end + end + end + + Gitlab::Database::LoadBalancing::ConnectionProxy::NON_STICKY_READS.each do |name| + describe "#{name}" do + it 'runs the query on the replica' do + expect(proxy).to receive(:read_using_load_balancer) + .with(name, ['foo']) + + proxy.send(name, 'foo') + end + end + end + + Gitlab::Database::LoadBalancing::ConnectionProxy::STICKY_WRITES.each do |name| + describe "#{name}" do + it 'runs the query on the primary and sticks to it' do + expect(proxy).to receive(:write_using_load_balancer) + .with(name, ['foo'], sticky: true) + + proxy.send(name, 'foo') + end + end + end + + describe '.insert_all!' do + before do + ActiveRecord::Schema.define do + create_table :connection_proxy_bulk_insert, force: true do |t| + t.string :name, null: true + end + end + end + + after do + ActiveRecord::Schema.define do + drop_table :connection_proxy_bulk_insert, force: true + end + end + + let(:model_class) do + Class.new(ApplicationRecord) do + self.table_name = "connection_proxy_bulk_insert" + end + end + + it 'inserts data in bulk' do + expect(model_class).to receive(:connection) + .at_least(:once) + .and_return(proxy) + + expect(proxy).to receive(:write_using_load_balancer) + .at_least(:once) + .and_call_original + + expect do + model_class.insert_all! [ + { name: "item1" }, + { name: "item2" } + ] + end.to change { model_class.count }.by(2) + end + end + + # We have an extra test for #transaction here to make sure that nested queries + # are also sent to a primary. + describe '#transaction' do + let(:session) { double(:session) } + + before do + allow(Gitlab::Database::LoadBalancing::Session).to receive(:current) + .and_return(session) + end + + context 'session fallbacks ambiguous queries to replicas' do + let(:replica) { double(:connection) } + + before do + allow(session).to receive(:fallback_to_replicas_for_ambiguous_queries?).and_return(true) + allow(session).to receive(:use_primary?).and_return(false) + allow(replica).to receive(:transaction).and_yield + allow(replica).to receive(:select) + end + + context 'with a read query' do + it 'runs the transaction and any nested queries on the replica' do + expect(proxy.load_balancer).to receive(:read) + .twice.and_yield(replica) + expect(proxy.load_balancer).not_to receive(:read_write) + expect(session).not_to receive(:write!) + + proxy.transaction { proxy.select('true') } + end + end + + context 'with a write query' do + it 'raises an exception' do + allow(proxy.load_balancer).to receive(:read).and_yield(replica) + allow(proxy.load_balancer).to receive(:read_write).and_yield(replica) + + expect do + proxy.transaction { proxy.insert('something') } + end.to raise_error(Gitlab::Database::LoadBalancing::ConnectionProxy::WriteInsideReadOnlyTransactionError) + end + end + end + + context 'session does not fallback to replicas for ambiguous queries' do + let(:primary) { double(:connection) } + + before do + allow(session).to receive(:fallback_to_replicas_for_ambiguous_queries?).and_return(false) + allow(session).to receive(:use_replicas_for_read_queries?).and_return(false) + allow(session).to receive(:use_primary?).and_return(true) + allow(primary).to receive(:transaction).and_yield + allow(primary).to receive(:select) + allow(primary).to receive(:insert) + end + + context 'with a read query' do + it 'runs the transaction and any nested queries on the primary and stick to it' do + expect(proxy.load_balancer).to receive(:read_write) + .twice.and_yield(primary) + expect(proxy.load_balancer).not_to receive(:read) + expect(session).to receive(:write!) + + proxy.transaction { proxy.select('true') } + end + end + + context 'with a write query' do + it 'runs the transaction and any nested queries on the primary and stick to it' do + expect(proxy.load_balancer).to receive(:read_write) + .twice.and_yield(primary) + expect(proxy.load_balancer).not_to receive(:read) + expect(session).to receive(:write!).twice + + proxy.transaction { proxy.insert('something') } + end + end + end + end + + describe '#method_missing' do + it 'runs the query on the primary without sticking to it' do + expect(proxy).to receive(:write_using_load_balancer) + .with(:foo, ['foo']) + + proxy.foo('foo') + end + + it 'properly forwards trailing hash arguments' do + allow(proxy.load_balancer).to receive(:read_write) + + expect(proxy).to receive(:write_using_load_balancer).and_call_original + + expect { proxy.case_sensitive_comparison(:table, :attribute, :column, { value: :value, format: :format }) } + .not_to raise_error + end + + context 'current session prefers to fallback ambiguous queries to replicas' do + let(:session) { double(:session) } + + before do + allow(Gitlab::Database::LoadBalancing::Session).to receive(:current) + .and_return(session) + allow(session).to receive(:fallback_to_replicas_for_ambiguous_queries?).and_return(true) + allow(session).to receive(:use_primary?).and_return(false) + end + + it 'runs the query on the replica' do + expect(proxy).to receive(:read_using_load_balancer).with(:foo, ['foo']) + + proxy.foo('foo') + end + + it 'properly forwards trailing hash arguments' do + allow(proxy.load_balancer).to receive(:read) + + expect(proxy).to receive(:read_using_load_balancer).and_call_original + + expect { proxy.case_sensitive_comparison(:table, :attribute, :column, { value: :value, format: :format }) } + .not_to raise_error + end + end + end + + describe '#read_using_load_balancer' do + let(:session) { double(:session) } + let(:connection) { double(:connection) } + + before do + allow(Gitlab::Database::LoadBalancing::Session).to receive(:current) + .and_return(session) + end + + context 'with a regular session' do + it 'uses a secondary' do + allow(session).to receive(:use_primary?).and_return(false) + allow(session).to receive(:use_replicas_for_read_queries?).and_return(false) + + expect(connection).to receive(:foo).with('foo') + expect(proxy.load_balancer).to receive(:read).and_yield(connection) + + proxy.read_using_load_balancer(:foo, ['foo']) + end + end + + context 'with a regular session and forcing all reads to replicas' do + it 'uses a secondary' do + allow(session).to receive(:use_primary?).and_return(false) + allow(session).to receive(:use_replicas_for_read_queries?).and_return(true) + + expect(connection).to receive(:foo).with('foo') + expect(proxy.load_balancer).to receive(:read).and_yield(connection) + + proxy.read_using_load_balancer(:foo, ['foo']) + end + end + + context 'with a session using the primary but forcing all reads to replicas' do + it 'uses a secondary' do + allow(session).to receive(:use_primary?).and_return(true) + allow(session).to receive(:use_replicas_for_read_queries?).and_return(true) + + expect(connection).to receive(:foo).with('foo') + expect(proxy.load_balancer).to receive(:read).and_yield(connection) + + proxy.read_using_load_balancer(:foo, ['foo']) + end + end + + describe 'with a session using the primary' do + it 'uses the primary' do + allow(session).to receive(:use_primary?).and_return(true) + allow(session).to receive(:use_replicas_for_read_queries?).and_return(false) + + expect(connection).to receive(:foo).with('foo') + + expect(proxy.load_balancer).to receive(:read_write) + .and_yield(connection) + + proxy.read_using_load_balancer(:foo, ['foo']) + end + end + end + + describe '#write_using_load_balancer' do + let(:session) { double(:session) } + let(:connection) { double(:connection) } + + before do + allow(Gitlab::Database::LoadBalancing::Session).to receive(:current) + .and_return(session) + end + + it 'uses but does not stick to the primary when sticking is disabled' do + expect(proxy.load_balancer).to receive(:read_write).and_yield(connection) + expect(connection).to receive(:foo).with('foo') + expect(session).not_to receive(:write!) + + proxy.write_using_load_balancer(:foo, ['foo']) + end + + it 'sticks to the primary when sticking is enabled' do + expect(proxy.load_balancer).to receive(:read_write).and_yield(connection) + expect(connection).to receive(:foo).with('foo') + expect(session).to receive(:write!) + + proxy.write_using_load_balancer(:foo, ['foo'], sticky: true) + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/host_list_spec.rb b/spec/lib/gitlab/database/load_balancing/host_list_spec.rb new file mode 100644 index 00000000000..873b599f84d --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/host_list_spec.rb @@ -0,0 +1,188 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::HostList do + def expect_metrics(hosts) + expect(Gitlab::Metrics.registry.get(:db_load_balancing_hosts).get({})).to eq(hosts) + end + + before do + allow(Gitlab::Database) + .to receive(:create_connection_pool) + .and_return(ActiveRecord::Base.connection_pool) + end + + let(:load_balancer) { double(:load_balancer) } + let(:host_count) { 2 } + + let(:host_list) do + hosts = Array.new(host_count) do + Gitlab::Database::LoadBalancing::Host.new('localhost', load_balancer, port: 5432) + end + + described_class.new(hosts) + end + + describe '#initialize' do + it 'sets metrics for current number of hosts and current index' do + host_list + + expect_metrics(2) + end + end + + describe '#length' do + it 'returns the number of hosts in the list' do + expect(host_list.length).to eq(2) + end + end + + describe '#host_names_and_ports' do + context 'with ports' do + it 'returns the host names of all hosts' do + hosts = [ + ['localhost', 5432], + ['localhost', 5432] + ] + + expect(host_list.host_names_and_ports).to eq(hosts) + end + end + + context 'without ports' do + let(:host_list) do + hosts = Array.new(2) do + Gitlab::Database::LoadBalancing::Host.new('localhost', load_balancer) + end + + described_class.new(hosts) + end + + it 'returns the host names of all hosts' do + hosts = [ + ['localhost', nil], + ['localhost', nil] + ] + + expect(host_list.host_names_and_ports).to eq(hosts) + end + end + end + + describe '#manage_pool?' do + before do + allow(Gitlab::Database).to receive(:create_connection_pool) { double(:connection) } + end + + context 'when the testing pool belongs to one host of the host list' do + it 'returns true' do + pool = host_list.hosts.first.pool + + expect(host_list.manage_pool?(pool)).to be(true) + end + end + + context 'when the testing pool belongs to a former host of the host list' do + it 'returns false' do + pool = host_list.hosts.first.pool + host_list.hosts = [ + Gitlab::Database::LoadBalancing::Host.new('foo', load_balancer) + ] + + expect(host_list.manage_pool?(pool)).to be(false) + end + end + + context 'when the testing pool belongs to a new host of the host list' do + it 'returns true' do + host = Gitlab::Database::LoadBalancing::Host.new('foo', load_balancer) + host_list.hosts = [host] + + expect(host_list.manage_pool?(host.pool)).to be(true) + end + end + + context 'when the testing pool does not have any relation with the host list' do + it 'returns false' do + host = Gitlab::Database::LoadBalancing::Host.new('foo', load_balancer) + + expect(host_list.manage_pool?(host.pool)).to be(false) + end + end + end + + describe '#hosts' do + it 'returns a copy of the host' do + first = host_list.hosts + + expect(host_list.hosts).to eq(first) + expect(host_list.hosts.object_id).not_to eq(first.object_id) + end + end + + describe '#hosts=' do + it 'updates the list of hosts to use' do + host_list.hosts = [ + Gitlab::Database::LoadBalancing::Host.new('foo', load_balancer) + ] + + expect(host_list.length).to eq(1) + expect(host_list.hosts[0].host).to eq('foo') + expect_metrics(1) + end + end + + describe '#next' do + it 'returns a host' do + expect(host_list.next) + .to be_an_instance_of(Gitlab::Database::LoadBalancing::Host) + end + + it 'cycles through all available hosts' do + expect(host_list.next).to eq(host_list.hosts[0]) + expect_metrics(2) + + expect(host_list.next).to eq(host_list.hosts[1]) + expect_metrics(2) + + expect(host_list.next).to eq(host_list.hosts[0]) + expect_metrics(2) + end + + it 'skips hosts that are offline' do + allow(host_list.hosts[0]).to receive(:online?).and_return(false) + + expect(host_list.next).to eq(host_list.hosts[1]) + expect_metrics(2) + end + + it 'returns nil if no hosts are online' do + host_list.hosts.each do |host| + allow(host).to receive(:online?).and_return(false) + end + + expect(host_list.next).to be_nil + expect_metrics(2) + end + + it 'returns nil if no hosts are available' do + expect(described_class.new.next).to be_nil + end + end + + describe '#shuffle' do + let(:host_count) { 3 } + + it 'randomizes the list' do + 2.times do + all_hosts = host_list.hosts + + host_list.shuffle + + expect(host_list.length).to eq(host_count) + expect(host_list.hosts).to contain_exactly(*all_hosts) + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/host_spec.rb b/spec/lib/gitlab/database/load_balancing/host_spec.rb new file mode 100644 index 00000000000..4dfddef68c8 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/host_spec.rb @@ -0,0 +1,445 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::Host do + let(:load_balancer) do + Gitlab::Database::LoadBalancing::LoadBalancer.new(%w[localhost]) + end + + let(:host) { load_balancer.host_list.hosts.first } + + before do + allow(Gitlab::Database).to receive(:create_connection_pool) + .and_return(ActiveRecord::Base.connection_pool) + end + + def raise_and_wrap(wrapper, original) + raise original + rescue original.class + raise wrapper, 'boom' + end + + def wrapped_exception(wrapper, original) + raise_and_wrap(wrapper, original.new) + rescue wrapper => error + error + end + + describe '#connection' do + it 'returns a connection from the pool' do + expect(host.pool).to receive(:connection) + + host.connection + end + end + + describe '#disconnect!' do + it 'disconnects the pool' do + connection = double(:connection, in_use?: false) + pool = double(:pool, connections: [connection]) + + allow(host) + .to receive(:pool) + .and_return(pool) + + expect(host) + .not_to receive(:sleep) + + expect(host.pool) + .to receive(:disconnect!) + + host.disconnect! + end + + it 'disconnects the pool when waiting for connections takes too long' do + connection = double(:connection, in_use?: true) + pool = double(:pool, connections: [connection]) + + allow(host) + .to receive(:pool) + .and_return(pool) + + expect(host.pool) + .to receive(:disconnect!) + + host.disconnect!(1) + end + end + + describe '#release_connection' do + it 'releases the current connection from the pool' do + expect(host.pool).to receive(:release_connection) + + host.release_connection + end + end + + describe '#offline!' do + it 'marks the host as offline' do + expect(host.pool).to receive(:disconnect!) + + expect(Gitlab::Database::LoadBalancing::Logger).to receive(:warn) + .with(hash_including(event: :host_offline)) + .and_call_original + + host.offline! + end + end + + describe '#online?' do + context 'when the replica status is recent enough' do + before do + expect(host).to receive(:check_replica_status?).and_return(false) + end + + it 'returns the latest status' do + expect(host).not_to receive(:refresh_status) + expect(Gitlab::Database::LoadBalancing::Logger).not_to receive(:info) + expect(Gitlab::Database::LoadBalancing::Logger).not_to receive(:warn) + + expect(host).to be_online + end + + it 'returns an offline status' do + host.offline! + + expect(host).not_to receive(:refresh_status) + expect(Gitlab::Database::LoadBalancing::Logger).not_to receive(:info) + expect(Gitlab::Database::LoadBalancing::Logger).not_to receive(:warn) + + expect(host).not_to be_online + end + end + + context 'when the replica status is outdated' do + before do + expect(host) + .to receive(:check_replica_status?) + .and_return(true) + end + + it 'refreshes the status' do + expect(Gitlab::Database::LoadBalancing::Logger).to receive(:info) + .with(hash_including(event: :host_online)) + .and_call_original + + expect(host).to be_online + end + + context 'and replica is not up to date' do + before do + expect(host).to receive(:replica_is_up_to_date?).and_return(false) + end + + it 'marks the host offline' do + expect(Gitlab::Database::LoadBalancing::Logger).to receive(:warn) + .with(hash_including(event: :host_offline)) + .and_call_original + + expect(host).not_to be_online + end + end + end + + context 'when the replica is not online' do + it 'returns false when ActionView::Template::Error is raised' do + wrapped_error = wrapped_exception(ActionView::Template::Error, StandardError) + + allow(host) + .to receive(:check_replica_status?) + .and_raise(wrapped_error) + + expect(host).not_to be_online + end + + it 'returns false when ActiveRecord::StatementInvalid is raised' do + allow(host) + .to receive(:check_replica_status?) + .and_raise(ActiveRecord::StatementInvalid.new('foo')) + + expect(host).not_to be_online + end + + it 'returns false when PG::Error is raised' do + allow(host) + .to receive(:check_replica_status?) + .and_raise(PG::Error) + + expect(host).not_to be_online + end + end + end + + describe '#refresh_status' do + it 'refreshes the status' do + host.offline! + + expect(host) + .to receive(:replica_is_up_to_date?) + .and_call_original + + host.refresh_status + + expect(host).to be_online + end + end + + describe '#check_replica_status?' do + it 'returns true when we need to check the replica status' do + allow(host) + .to receive(:last_checked_at) + .and_return(1.year.ago) + + expect(host.check_replica_status?).to eq(true) + end + + it 'returns false when we do not need to check the replica status' do + freeze_time do + allow(host) + .to receive(:last_checked_at) + .and_return(Time.zone.now) + + expect(host.check_replica_status?).to eq(false) + end + end + end + + describe '#replica_is_up_to_date?' do + context 'when the lag time is below the threshold' do + it 'returns true' do + expect(host) + .to receive(:replication_lag_below_threshold?) + .and_return(true) + + expect(host.replica_is_up_to_date?).to eq(true) + end + end + + context 'when the lag time exceeds the threshold' do + before do + allow(host) + .to receive(:replication_lag_below_threshold?) + .and_return(false) + end + + it 'returns true if the data is recent enough' do + expect(host) + .to receive(:data_is_recent_enough?) + .and_return(true) + + expect(host.replica_is_up_to_date?).to eq(true) + end + + it 'returns false when the data is not recent enough' do + expect(host) + .to receive(:data_is_recent_enough?) + .and_return(false) + + expect(host.replica_is_up_to_date?).to eq(false) + end + end + end + + describe '#replication_lag_below_threshold' do + it 'returns true when the lag time is below the threshold' do + expect(host) + .to receive(:replication_lag_time) + .and_return(1) + + expect(host.replication_lag_below_threshold?).to eq(true) + end + + it 'returns false when the lag time exceeds the threshold' do + expect(host) + .to receive(:replication_lag_time) + .and_return(9000) + + expect(host.replication_lag_below_threshold?).to eq(false) + end + + it 'returns false when no lag time could be calculated' do + expect(host) + .to receive(:replication_lag_time) + .and_return(nil) + + expect(host.replication_lag_below_threshold?).to eq(false) + end + end + + describe '#data_is_recent_enough?' do + it 'returns true when the data is recent enough' do + expect(host.data_is_recent_enough?).to eq(true) + end + + it 'returns false when the data is not recent enough' do + diff = Gitlab::Database::LoadBalancing.max_replication_difference * 2 + + expect(host) + .to receive(:query_and_release) + .and_return({ 'diff' => diff }) + + expect(host.data_is_recent_enough?).to eq(false) + end + + it 'returns false when no lag size could be calculated' do + expect(host) + .to receive(:replication_lag_size) + .and_return(nil) + + expect(host.data_is_recent_enough?).to eq(false) + end + end + + describe '#replication_lag_time' do + it 'returns the lag time as a Float' do + expect(host.replication_lag_time).to be_an_instance_of(Float) + end + + it 'returns nil when the database query returned no rows' do + expect(host) + .to receive(:query_and_release) + .and_return({}) + + expect(host.replication_lag_time).to be_nil + end + end + + describe '#replication_lag_size' do + it 'returns the lag size as an Integer' do + expect(host.replication_lag_size).to be_an_instance_of(Integer) + end + + it 'returns nil when the database query returned no rows' do + expect(host) + .to receive(:query_and_release) + .and_return({}) + + expect(host.replication_lag_size).to be_nil + end + + it 'returns nil when the database connection fails' do + wrapped_error = wrapped_exception(ActionView::Template::Error, StandardError) + + allow(host) + .to receive(:connection) + .and_raise(wrapped_error) + + expect(host.replication_lag_size).to be_nil + end + end + + describe '#primary_write_location' do + it 'returns the write location of the primary' do + expect(host.primary_write_location).to be_an_instance_of(String) + expect(host.primary_write_location).not_to be_empty + end + end + + describe '#caught_up?' do + let(:connection) { double(:connection) } + + before do + allow(connection).to receive(:quote).and_return('foo') + end + + it 'returns true when a host has caught up' do + allow(host).to receive(:connection).and_return(connection) + expect(connection).to receive(:select_all).and_return([{ 'result' => 't' }]) + + expect(host.caught_up?('foo')).to eq(true) + end + + it 'returns true when a host has caught up' do + allow(host).to receive(:connection).and_return(connection) + expect(connection).to receive(:select_all).and_return([{ 'result' => true }]) + + expect(host.caught_up?('foo')).to eq(true) + end + + it 'returns false when a host has not caught up' do + allow(host).to receive(:connection).and_return(connection) + expect(connection).to receive(:select_all).and_return([{ 'result' => 'f' }]) + + expect(host.caught_up?('foo')).to eq(false) + end + + it 'returns false when a host has not caught up' do + allow(host).to receive(:connection).and_return(connection) + expect(connection).to receive(:select_all).and_return([{ 'result' => false }]) + + expect(host.caught_up?('foo')).to eq(false) + end + + it 'returns false when the connection fails' do + wrapped_error = wrapped_exception(ActionView::Template::Error, StandardError) + + allow(host) + .to receive(:connection) + .and_raise(wrapped_error) + + expect(host.caught_up?('foo')).to eq(false) + end + end + + describe '#database_replica_location' do + let(:connection) { double(:connection) } + + it 'returns the write ahead location of the replica', :aggregate_failures do + expect(host) + .to receive(:query_and_release) + .and_return({ 'location' => '0/D525E3A8' }) + + expect(host.database_replica_location).to be_an_instance_of(String) + end + + it 'returns nil when the database query returned no rows' do + expect(host) + .to receive(:query_and_release) + .and_return({}) + + expect(host.database_replica_location).to be_nil + end + + it 'returns nil when the database connection fails' do + wrapped_error = wrapped_exception(ActionView::Template::Error, StandardError) + + allow(host) + .to receive(:connection) + .and_raise(wrapped_error) + + expect(host.database_replica_location).to be_nil + end + end + + describe '#query_and_release' do + it 'executes a SQL query' do + results = host.query_and_release('SELECT 10 AS number') + + expect(results).to be_an_instance_of(Hash) + expect(results['number'].to_i).to eq(10) + end + + it 'releases the connection after running the query' do + expect(host) + .to receive(:release_connection) + .once + + host.query_and_release('SELECT 10 AS number') + end + + it 'returns an empty Hash in the event of an error' do + expect(host.connection) + .to receive(:select_all) + .and_raise(RuntimeError, 'kittens') + + expect(host.query_and_release('SELECT 10 AS number')).to eq({}) + end + end + + describe '#host' do + it 'returns the hostname' do + expect(host.host).to eq('localhost') + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/load_balancer_spec.rb b/spec/lib/gitlab/database/load_balancing/load_balancer_spec.rb new file mode 100644 index 00000000000..59f70165380 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/load_balancer_spec.rb @@ -0,0 +1,491 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::LoadBalancer, :request_store do + let(:pool_spec) { ActiveRecord::Base.connection_pool.spec } + let(:pool) { ActiveRecord::ConnectionAdapters::ConnectionPool.new(pool_spec) } + let(:conflict_error) { Class.new(RuntimeError) } + + let(:lb) { described_class.new(%w(localhost localhost)) } + + before do + allow(Gitlab::Database).to receive(:create_connection_pool) + .and_return(pool) + stub_const( + 'Gitlab::Database::LoadBalancing::LoadBalancer::PG::TRSerializationFailure', + conflict_error + ) + end + + def raise_and_wrap(wrapper, original) + raise original + rescue original.class + raise wrapper, 'boop' + end + + def wrapped_exception(wrapper, original) + raise_and_wrap(wrapper, original.new) + rescue wrapper => error + error + end + + def twice_wrapped_exception(top, middle, original) + begin + raise_and_wrap(middle, original.new) + rescue middle => middle_error + raise_and_wrap(top, middle_error) + end + rescue top => top_error + top_error + end + + describe '#read' do + it 'yields a connection for a read' do + connection = double(:connection) + host = double(:host) + + allow(lb).to receive(:host).and_return(host) + allow(host).to receive(:query_cache_enabled).and_return(true) + + expect(host).to receive(:connection).and_return(connection) + + expect { |b| lb.read(&b) }.to yield_with_args(connection) + end + + it 'ensures that query cache is enabled' do + connection = double(:connection) + host = double(:host) + + allow(lb).to receive(:host).and_return(host) + allow(host).to receive(:query_cache_enabled).and_return(false) + allow(host).to receive(:connection).and_return(connection) + + expect(host).to receive(:enable_query_cache!).once + + lb.read { 10 } + end + + it 'marks hosts that are offline' do + allow(lb).to receive(:connection_error?).and_return(true) + + expect(lb.host_list.hosts[0]).to receive(:offline!) + expect(lb).to receive(:release_host) + + raised = false + + returned = lb.read do + unless raised + raised = true + raise + end + + 10 + end + + expect(returned).to eq(10) + end + + it 'retries a query in the event of a serialization failure' do + raised = false + + expect(lb).to receive(:release_host) + + returned = lb.read do + unless raised + raised = true + raise conflict_error + end + + 10 + end + + expect(returned).to eq(10) + end + + it 'retries every host at most 3 times when a query conflict is raised' do + expect(lb).to receive(:release_host).exactly(6).times + expect(lb).to receive(:read_write) + + lb.read { raise conflict_error } + end + + it 'uses the primary if no secondaries are available' do + allow(lb).to receive(:connection_error?).and_return(true) + + expect(lb.host_list.hosts).to all(receive(:online?).and_return(false)) + + expect(lb).to receive(:read_write).and_call_original + + expect { |b| lb.read(&b) } + .to yield_with_args(ActiveRecord::Base.retrieve_connection) + end + end + + describe '#read_write' do + it 'yields a connection for a write' do + expect { |b| lb.read_write(&b) } + .to yield_with_args(ActiveRecord::Base.retrieve_connection) + end + + it 'uses a retry with exponential backoffs' do + expect(lb).to receive(:retry_with_backoff).and_yield + + lb.read_write { 10 } + end + end + + describe '#db_role_for_connection' do + context 'when the load balancer creates the connection with #read' do + it 'returns :replica' do + role = nil + lb.read do |connection| + role = lb.db_role_for_connection(connection) + end + + expect(role).to be(:replica) + end + end + + context 'when the load balancer uses nested #read' do + it 'returns :replica' do + roles = [] + lb.read do |connection_1| + lb.read do |connection_2| + roles << lb.db_role_for_connection(connection_2) + end + roles << lb.db_role_for_connection(connection_1) + end + + expect(roles).to eq([:replica, :replica]) + end + end + + context 'when the load balancer creates the connection with #read_write' do + it 'returns :primary' do + role = nil + lb.read_write do |connection| + role = lb.db_role_for_connection(connection) + end + + expect(role).to be(:primary) + end + end + + context 'when the load balancer uses nested #read_write' do + it 'returns :primary' do + roles = [] + lb.read_write do |connection_1| + lb.read_write do |connection_2| + roles << lb.db_role_for_connection(connection_2) + end + roles << lb.db_role_for_connection(connection_1) + end + + expect(roles).to eq([:primary, :primary]) + end + end + + context 'when the load balancer falls back the connection creation to primary' do + it 'returns :primary' do + allow(lb).to receive(:serialization_failure?).and_return(true) + + role = nil + raised = 7 # 2 hosts = 6 retries + + lb.read do |connection| + if raised > 0 + raised -= 1 + raise + end + + role = lb.db_role_for_connection(connection) + end + + expect(role).to be(:primary) + end + end + + context 'when the load balancer uses replica after recovery from a failure' do + it 'returns :replica' do + allow(lb).to receive(:connection_error?).and_return(true) + + role = nil + raised = false + + lb.read do |connection| + unless raised + raised = true + raise + end + + role = lb.db_role_for_connection(connection) + end + + expect(role).to be(:replica) + end + end + + context 'when the connection comes from a pool managed by the host list' do + it 'returns :replica' do + connection = double(:connection) + allow(connection).to receive(:pool).and_return(lb.host_list.hosts.first.pool) + + expect(lb.db_role_for_connection(connection)).to be(:replica) + end + end + + context 'when the connection comes from the primary pool' do + it 'returns :primary' do + connection = double(:connection) + allow(connection).to receive(:pool).and_return(ActiveRecord::Base.connection_pool) + + expect(lb.db_role_for_connection(connection)).to be(:primary) + end + end + + context 'when the connection does not come from any known pool' do + it 'returns nil' do + connection = double(:connection) + pool = double(:connection_pool) + allow(connection).to receive(:pool).and_return(pool) + + expect(lb.db_role_for_connection(connection)).to be(nil) + end + end + end + + describe '#host' do + it 'returns the secondary host to use' do + expect(lb.host).to be_an_instance_of(Gitlab::Database::LoadBalancing::Host) + end + + it 'stores the host in a thread-local variable' do + RequestStore.delete(described_class::CACHE_KEY) + RequestStore.delete(described_class::VALID_HOSTS_CACHE_KEY) + + expect(lb.host_list).to receive(:next).once.and_call_original + + lb.host + lb.host + end + end + + describe '#release_host' do + it 'releases the host and its connection' do + host = lb.host + + expect(host).to receive(:disable_query_cache!) + + lb.release_host + + expect(RequestStore[described_class::CACHE_KEY]).to be_nil + expect(RequestStore[described_class::VALID_HOSTS_CACHE_KEY]).to be_nil + end + end + + describe '#release_primary_connection' do + it 'releases the connection to the primary' do + expect(ActiveRecord::Base.connection_pool).to receive(:release_connection) + + lb.release_primary_connection + end + end + + describe '#primary_write_location' do + it 'returns a String in the right format' do + expect(lb.primary_write_location).to match(%r{[A-F0-9]{1,8}/[A-F0-9]{1,8}}) + end + + it 'raises an error if the write location could not be retrieved' do + connection = double(:connection) + + allow(lb).to receive(:read_write).and_yield(connection) + allow(connection).to receive(:select_all).and_return([]) + + expect { lb.primary_write_location }.to raise_error(RuntimeError) + end + end + + describe '#all_caught_up?' do + it 'returns true if all hosts caught up to the write location' do + expect(lb.host_list.hosts).to all(receive(:caught_up?).with('foo').and_return(true)) + + expect(lb.all_caught_up?('foo')).to eq(true) + end + + it 'returns false if a host has not yet caught up' do + expect(lb.host_list.hosts[0]).to receive(:caught_up?) + .with('foo') + .and_return(true) + + expect(lb.host_list.hosts[1]).to receive(:caught_up?) + .with('foo') + .and_return(false) + + expect(lb.all_caught_up?('foo')).to eq(false) + end + end + + describe '#retry_with_backoff' do + it 'returns the value returned by the block' do + value = lb.retry_with_backoff { 10 } + + expect(value).to eq(10) + end + + it 're-raises errors not related to database connections' do + expect(lb).not_to receive(:sleep) # to make sure we're not retrying + + expect { lb.retry_with_backoff { raise 'boop' } } + .to raise_error(RuntimeError) + end + + it 'retries the block when a connection error is raised' do + allow(lb).to receive(:connection_error?).and_return(true) + expect(lb).to receive(:sleep).with(2) + expect(lb).to receive(:release_primary_connection) + + raised = false + returned = lb.retry_with_backoff do + unless raised + raised = true + raise + end + + 10 + end + + expect(returned).to eq(10) + end + + it 're-raises the connection error if the retries did not succeed' do + allow(lb).to receive(:connection_error?).and_return(true) + expect(lb).to receive(:sleep).with(2).ordered + expect(lb).to receive(:sleep).with(4).ordered + expect(lb).to receive(:sleep).with(16).ordered + + expect(lb).to receive(:release_primary_connection).exactly(3).times + + expect { lb.retry_with_backoff { raise } }.to raise_error(RuntimeError) + end + end + + describe '#connection_error?' do + before do + stub_const('Gitlab::Database::LoadBalancing::LoadBalancer::CONNECTION_ERRORS', + [NotImplementedError]) + end + + it 'returns true for a connection error' do + error = NotImplementedError.new + + expect(lb.connection_error?(error)).to eq(true) + end + + it 'returns true for a wrapped connection error' do + wrapped = wrapped_exception(ActiveRecord::StatementInvalid, NotImplementedError) + + expect(lb.connection_error?(wrapped)).to eq(true) + end + + it 'returns true for a wrapped connection error from a view' do + wrapped = wrapped_exception(ActionView::Template::Error, NotImplementedError) + + expect(lb.connection_error?(wrapped)).to eq(true) + end + + it 'returns true for deeply wrapped/nested errors' do + top = twice_wrapped_exception(ActionView::Template::Error, ActiveRecord::StatementInvalid, NotImplementedError) + + expect(lb.connection_error?(top)).to eq(true) + end + + it 'returns true for an invalid encoding error' do + error = RuntimeError.new('invalid encoding name: unicode') + + expect(lb.connection_error?(error)).to eq(true) + end + + it 'returns false for errors not related to database connections' do + error = RuntimeError.new + + expect(lb.connection_error?(error)).to eq(false) + end + end + + describe '#serialization_failure?' do + let(:conflict_error) { Class.new(RuntimeError) } + + before do + stub_const( + 'Gitlab::Database::LoadBalancing::LoadBalancer::PG::TRSerializationFailure', + conflict_error + ) + end + + it 'returns for a serialization error' do + expect(lb.serialization_failure?(conflict_error.new)).to eq(true) + end + + it 'returns true for a wrapped error' do + wrapped = wrapped_exception(ActionView::Template::Error, conflict_error) + + expect(lb.serialization_failure?(wrapped)).to eq(true) + end + end + + describe '#select_caught_up_hosts' do + let(:location) { 'AB/12345'} + let(:hosts) { lb.host_list.hosts } + let(:valid_host_list) { RequestStore[described_class::VALID_HOSTS_CACHE_KEY] } + let(:valid_hosts) { valid_host_list.hosts } + + subject { lb.select_caught_up_hosts(location) } + + context 'when all replicas are caught up' do + before do + expect(hosts).to all(receive(:caught_up?).with(location).and_return(true)) + end + + it 'returns true and sets all hosts to valid' do + expect(subject).to be true + expect(valid_host_list).to be_a(Gitlab::Database::LoadBalancing::HostList) + expect(valid_hosts).to contain_exactly(*hosts) + end + end + + context 'when none of the replicas are caught up' do + before do + expect(hosts).to all(receive(:caught_up?).with(location).and_return(false)) + end + + it 'returns true and has does not set the valid hosts' do + expect(subject).to be false + expect(valid_host_list).to be_nil + end + end + + context 'when one of the replicas is caught up' do + before do + expect(hosts[0]).to receive(:caught_up?).with(location).and_return(false) + expect(hosts[1]).to receive(:caught_up?).with(location).and_return(true) + end + + it 'returns true and sets one host to valid' do + expect(subject).to be true + expect(valid_host_list).to be_a(Gitlab::Database::LoadBalancing::HostList) + expect(valid_hosts).to contain_exactly(hosts[1]) + end + + it 'host always returns the caught-up replica' do + subject + + 3.times do + expect(lb.host).to eq(hosts[1]) + RequestStore.delete(described_class::CACHE_KEY) + end + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/rack_middleware_spec.rb b/spec/lib/gitlab/database/load_balancing/rack_middleware_spec.rb new file mode 100644 index 00000000000..01367716518 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/rack_middleware_spec.rb @@ -0,0 +1,243 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::RackMiddleware, :redis do + let(:app) { double(:app) } + let(:middleware) { described_class.new(app) } + let(:warden_user) { double(:warden, user: double(:user, id: 42)) } + let(:single_sticking_object) { Set.new([[:user, 42]]) } + let(:multiple_sticking_objects) do + Set.new([ + [:user, 42], + [:runner, '123456789'], + [:runner, '1234'] + ]) + end + + after do + Gitlab::Database::LoadBalancing::Session.clear_session + end + + describe '.stick_or_unstick' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:enable?) + .and_return(true) + end + + it 'sticks or unsticks a single object and updates the Rack environment' do + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:user, 42) + + env = {} + + described_class.stick_or_unstick(env, :user, 42) + + expect(env[described_class::STICK_OBJECT].to_a).to eq([[:user, 42]]) + end + + it 'sticks or unsticks multiple objects and updates the Rack environment' do + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:user, 42) + .ordered + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:runner, '123456789') + .ordered + + env = {} + + described_class.stick_or_unstick(env, :user, 42) + described_class.stick_or_unstick(env, :runner, '123456789') + + expect(env[described_class::STICK_OBJECT].to_a).to eq([ + [:user, 42], + [:runner, '123456789'] + ]) + end + end + + describe '#call' do + it 'handles a request' do + env = {} + + expect(middleware).to receive(:clear).twice + + expect(middleware).to receive(:unstick_or_continue_sticking).with(env) + expect(middleware).to receive(:stick_if_necessary).with(env) + + expect(app).to receive(:call).with(env).and_return(10) + + expect(middleware.call(env)).to eq(10) + end + end + + describe '#unstick_or_continue_sticking' do + it 'does not stick if no namespace and identifier could be found' do + expect(Gitlab::Database::LoadBalancing::Sticking) + .not_to receive(:unstick_or_continue_sticking) + + middleware.unstick_or_continue_sticking({}) + end + + it 'sticks to the primary if a warden user is found' do + env = { 'warden' => warden_user } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:user, 42) + + middleware.unstick_or_continue_sticking(env) + end + + it 'sticks to the primary if a sticking namespace and identifier is found' do + env = { described_class::STICK_OBJECT => single_sticking_object } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:user, 42) + + middleware.unstick_or_continue_sticking(env) + end + + it 'sticks to the primary if multiple sticking namespaces and identifiers were found' do + env = { described_class::STICK_OBJECT => multiple_sticking_objects } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:user, 42) + .ordered + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:runner, '123456789') + .ordered + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:unstick_or_continue_sticking) + .with(:runner, '1234') + .ordered + + middleware.unstick_or_continue_sticking(env) + end + end + + describe '#stick_if_necessary' do + it 'does not stick to the primary if not necessary' do + expect(Gitlab::Database::LoadBalancing::Sticking) + .not_to receive(:stick_if_necessary) + + middleware.stick_if_necessary({}) + end + + it 'sticks to the primary if a warden user is found' do + env = { 'warden' => warden_user } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:stick_if_necessary) + .with(:user, 42) + + middleware.stick_if_necessary(env) + end + + it 'sticks to the primary if a a single sticking object is found' do + env = { described_class::STICK_OBJECT => single_sticking_object } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:stick_if_necessary) + .with(:user, 42) + + middleware.stick_if_necessary(env) + end + + it 'sticks to the primary if multiple sticking namespaces and identifiers were found' do + env = { described_class::STICK_OBJECT => multiple_sticking_objects } + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:stick_if_necessary) + .with(:user, 42) + .ordered + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:stick_if_necessary) + .with(:runner, '123456789') + .ordered + + expect(Gitlab::Database::LoadBalancing::Sticking) + .to receive(:stick_if_necessary) + .with(:runner, '1234') + .ordered + + middleware.stick_if_necessary(env) + end + end + + describe '#clear' do + it 'clears the currently used host and session' do + lb = double(:lb) + session = double(:session) + + allow(middleware).to receive(:load_balancer).and_return(lb) + + expect(lb).to receive(:release_host) + + stub_const('Gitlab::Database::LoadBalancing::RackMiddleware::Session', + session) + + expect(session).to receive(:clear_session) + + middleware.clear + end + end + + describe '.load_balancer' do + it 'returns a the load balancer' do + proxy = double(:proxy) + + expect(Gitlab::Database::LoadBalancing).to receive(:proxy) + .and_return(proxy) + + expect(proxy).to receive(:load_balancer) + + middleware.load_balancer + end + end + + describe '#sticking_namespaces_and_ids' do + context 'using a Warden request' do + it 'returns the warden user if present' do + env = { 'warden' => warden_user } + + expect(middleware.sticking_namespaces_and_ids(env)).to eq([[:user, 42]]) + end + + it 'returns an empty Array if no user was present' do + warden = double(:warden, user: nil) + env = { 'warden' => warden } + + expect(middleware.sticking_namespaces_and_ids(env)).to eq([]) + end + end + + context 'using a request with a manually set sticking object' do + it 'returns the sticking object' do + env = { described_class::STICK_OBJECT => multiple_sticking_objects } + + expect(middleware.sticking_namespaces_and_ids(env)).to eq([ + [:user, 42], + [:runner, '123456789'], + [:runner, '1234'] + ]) + end + end + + context 'using a regular request' do + it 'returns an empty Array' do + expect(middleware.sticking_namespaces_and_ids({})).to eq([]) + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/resolver_spec.rb b/spec/lib/gitlab/database/load_balancing/resolver_spec.rb new file mode 100644 index 00000000000..0051cf50255 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/resolver_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::Resolver do + describe '#resolve' do + let(:ip_addr) { IPAddr.new('127.0.0.2') } + + context 'when nameserver is an IP' do + it 'returns an IPAddr object' do + service = described_class.new('127.0.0.2') + + expect(service.resolve).to eq(ip_addr) + end + end + + context 'when nameserver is not an IP' do + subject { described_class.new('localhost').resolve } + + it 'looks the nameserver up in the hosts file' do + allow_next_instance_of(Resolv::Hosts) do |instance| + allow(instance).to receive(:getaddress).with('localhost').and_return('127.0.0.2') + end + + expect(subject).to eq(ip_addr) + end + + context 'when nameserver is not in the hosts file' do + it 'looks the nameserver up in DNS' do + resource = double(:resource, address: ip_addr) + packet = double(:packet, answer: [resource]) + + allow_next_instance_of(Resolv::Hosts) do |instance| + allow(instance).to receive(:getaddress).with('localhost').and_raise(Resolv::ResolvError) + end + + allow(Net::DNS::Resolver).to receive(:start) + .with('localhost', Net::DNS::A) + .and_return(packet) + + expect(subject).to eq(ip_addr) + end + + context 'when nameserver is not in DNS' do + it 'raises an exception' do + allow_next_instance_of(Resolv::Hosts) do |instance| + allow(instance).to receive(:getaddress).with('localhost').and_raise(Resolv::ResolvError) + end + + allow(Net::DNS::Resolver).to receive(:start) + .with('localhost', Net::DNS::A) + .and_return(double(:packet, answer: [])) + + expect { subject }.to raise_exception( + described_class::UnresolvableNameserverError, + 'could not resolve localhost' + ) + end + end + + context 'when DNS does not respond' do + it 'raises an exception' do + allow_next_instance_of(Resolv::Hosts) do |instance| + allow(instance).to receive(:getaddress).with('localhost').and_raise(Resolv::ResolvError) + end + + allow(Net::DNS::Resolver).to receive(:start) + .with('localhost', Net::DNS::A) + .and_raise(Net::DNS::Resolver::NoResponseError) + + expect { subject }.to raise_exception( + described_class::UnresolvableNameserverError, + 'no response from DNS server(s)' + ) + end + end + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/service_discovery_spec.rb b/spec/lib/gitlab/database/load_balancing/service_discovery_spec.rb new file mode 100644 index 00000000000..7fc7b5e8d11 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/service_discovery_spec.rb @@ -0,0 +1,252 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::ServiceDiscovery do + let(:service) do + described_class.new(nameserver: 'localhost', port: 8600, record: 'foo') + end + + before do + resource = double(:resource, address: IPAddr.new('127.0.0.1')) + packet = double(:packet, answer: [resource]) + + allow(Net::DNS::Resolver).to receive(:start) + .with('localhost', Net::DNS::A) + .and_return(packet) + end + + describe '#initialize' do + describe ':record_type' do + subject { described_class.new(nameserver: 'localhost', port: 8600, record: 'foo', record_type: record_type) } + + context 'with a supported type' do + let(:record_type) { 'SRV' } + + it { expect(subject.record_type).to eq Net::DNS::SRV } + end + + context 'with an unsupported type' do + let(:record_type) { 'AAAA' } + + it 'raises an argument error' do + expect { subject }.to raise_error(ArgumentError, 'Unsupported record type: AAAA') + end + end + end + end + + describe '#start' do + before do + allow(service) + .to receive(:loop) + .and_yield + end + + it 'starts service discovery in a new thread' do + expect(service) + .to receive(:refresh_if_necessary) + .and_return(5) + + expect(service) + .to receive(:rand) + .and_return(2) + + expect(service) + .to receive(:sleep) + .with(7) + + service.start.join + end + + it 'reports exceptions to Sentry' do + error = StandardError.new + + expect(service) + .to receive(:refresh_if_necessary) + .and_raise(error) + + expect(Gitlab::ErrorTracking) + .to receive(:track_exception) + .with(error) + + expect(service) + .to receive(:rand) + .and_return(2) + + expect(service) + .to receive(:sleep) + .with(62) + + service.start.join + end + end + + describe '#refresh_if_necessary' do + let(:address_foo) { described_class::Address.new('foo') } + let(:address_bar) { described_class::Address.new('bar') } + + context 'when a refresh is necessary' do + before do + allow(service) + .to receive(:addresses_from_load_balancer) + .and_return(%w[localhost]) + + allow(service) + .to receive(:addresses_from_dns) + .and_return([10, [address_foo, address_bar]]) + end + + it 'refreshes the load balancer hosts' do + expect(service) + .to receive(:replace_hosts) + .with([address_foo, address_bar]) + + expect(service.refresh_if_necessary).to eq(10) + end + end + + context 'when a refresh is not necessary' do + before do + allow(service) + .to receive(:addresses_from_load_balancer) + .and_return(%w[localhost]) + + allow(service) + .to receive(:addresses_from_dns) + .and_return([10, %w[localhost]]) + end + + it 'does not refresh the load balancer hosts' do + expect(service) + .not_to receive(:replace_hosts) + + expect(service.refresh_if_necessary).to eq(10) + end + end + end + + describe '#replace_hosts' do + let(:address_foo) { described_class::Address.new('foo') } + let(:address_bar) { described_class::Address.new('bar') } + + let(:load_balancer) do + Gitlab::Database::LoadBalancing::LoadBalancer.new([address_foo]) + end + + before do + allow(service) + .to receive(:load_balancer) + .and_return(load_balancer) + end + + it 'replaces the hosts of the load balancer' do + service.replace_hosts([address_bar]) + + expect(load_balancer.host_list.host_names_and_ports).to eq([['bar', nil]]) + end + + it 'disconnects the old connections' do + host = load_balancer.host_list.hosts.first + + allow(service) + .to receive(:disconnect_timeout) + .and_return(2) + + expect(host) + .to receive(:disconnect!) + .with(2) + + service.replace_hosts([address_bar]) + end + end + + describe '#addresses_from_dns' do + let(:service) { described_class.new(nameserver: 'localhost', port: 8600, record: 'foo', record_type: record_type) } + let(:packet) { double(:packet, answer: [res1, res2]) } + + before do + allow(service.resolver) + .to receive(:search) + .with('foo', described_class::RECORD_TYPES[record_type]) + .and_return(packet) + end + + context 'with an A record' do + let(:record_type) { 'A' } + + let(:res1) { double(:resource, address: IPAddr.new('255.255.255.0'), ttl: 90) } + let(:res2) { double(:resource, address: IPAddr.new('127.0.0.1'), ttl: 90) } + + it 'returns a TTL and ordered list of IP addresses' do + addresses = [ + described_class::Address.new('127.0.0.1'), + described_class::Address.new('255.255.255.0') + ] + + expect(service.addresses_from_dns).to eq([90, addresses]) + end + end + + context 'with an SRV record' do + let(:record_type) { 'SRV' } + + let(:res1) { double(:resource, host: 'foo1.service.consul.', port: 5432, weight: 1, priority: 1, ttl: 90) } + let(:res2) { double(:resource, host: 'foo2.service.consul.', port: 5433, weight: 1, priority: 1, ttl: 90) } + let(:res3) { double(:resource, host: 'foo3.service.consul.', port: 5434, weight: 1, priority: 1, ttl: 90) } + let(:packet) { double(:packet, answer: [res1, res2, res3], additional: []) } + + before do + expect_next_instance_of(Gitlab::Database::LoadBalancing::SrvResolver) do |resolver| + allow(resolver).to receive(:address_for).with('foo1.service.consul.').and_return(IPAddr.new('255.255.255.0')) + allow(resolver).to receive(:address_for).with('foo2.service.consul.').and_return(IPAddr.new('127.0.0.1')) + allow(resolver).to receive(:address_for).with('foo3.service.consul.').and_return(nil) + end + end + + it 'returns a TTL and ordered list of hosts' do + addresses = [ + described_class::Address.new('127.0.0.1', 5433), + described_class::Address.new('255.255.255.0', 5432) + ] + + expect(service.addresses_from_dns).to eq([90, addresses]) + end + end + end + + describe '#new_wait_time_for' do + it 'returns the DNS TTL if greater than the default interval' do + res = double(:resource, ttl: 90) + + expect(service.new_wait_time_for([res])).to eq(90) + end + + it 'returns the default interval if greater than the DNS TTL' do + res = double(:resource, ttl: 10) + + expect(service.new_wait_time_for([res])).to eq(60) + end + + it 'returns the default interval if no resources are given' do + expect(service.new_wait_time_for([])).to eq(60) + end + end + + describe '#addresses_from_load_balancer' do + it 'returns the ordered host names of the load balancer' do + load_balancer = Gitlab::Database::LoadBalancing::LoadBalancer.new(%w[b a]) + + allow(service) + .to receive(:load_balancer) + .and_return(load_balancer) + + addresses = [ + described_class::Address.new('a'), + described_class::Address.new('b') + ] + + expect(service.addresses_from_load_balancer).to eq(addresses) + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/session_spec.rb b/spec/lib/gitlab/database/load_balancing/session_spec.rb new file mode 100644 index 00000000000..74512f76fd4 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/session_spec.rb @@ -0,0 +1,353 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::Session do + after do + described_class.clear_session + end + + describe '.current' do + it 'returns the current session' do + expect(described_class.current).to be_an_instance_of(described_class) + end + end + + describe '.clear_session' do + it 'clears the current session' do + described_class.current + described_class.clear_session + + expect(RequestStore[described_class::CACHE_KEY]).to be_nil + end + end + + describe '.without_sticky_writes' do + it 'ignores sticky write events sent by a connection proxy' do + described_class.without_sticky_writes do + described_class.current.write! + end + + session = described_class.current + + expect(session).not_to be_using_primary + end + + it 'still is aware of write that happened' do + described_class.without_sticky_writes do + described_class.current.write! + end + + session = described_class.current + + expect(session.performed_write?).to be true + end + end + + describe '#use_primary?' do + it 'returns true when the primary should be used' do + instance = described_class.new + + instance.use_primary! + + expect(instance.use_primary?).to eq(true) + end + + it 'returns false when a secondary should be used' do + expect(described_class.new.use_primary?).to eq(false) + end + + it 'returns true when a write was performed' do + instance = described_class.new + + instance.write! + + expect(instance.use_primary?).to eq(true) + end + end + + describe '#use_primary' do + let(:instance) { described_class.new } + + context 'when primary was used before' do + before do + instance.write! + end + + it 'restores state after use' do + expect { |blk| instance.use_primary(&blk) }.to yield_with_no_args + + expect(instance.use_primary?).to eq(true) + end + end + + context 'when primary was not used' do + it 'restores state after use' do + expect { |blk| instance.use_primary(&blk) }.to yield_with_no_args + + expect(instance.use_primary?).to eq(false) + end + end + + it 'uses primary during block' do + expect do |blk| + instance.use_primary do + expect(instance.use_primary?).to eq(true) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + end + + it 'continues using primary when write was performed' do + instance.use_primary do + instance.write! + end + + expect(instance.use_primary?).to eq(true) + end + end + + describe '#performed_write?' do + it 'returns true if a write was performed' do + instance = described_class.new + + instance.write! + + expect(instance.performed_write?).to eq(true) + end + end + + describe '#ignore_writes' do + it 'ignores write events' do + instance = described_class.new + + instance.ignore_writes { instance.write! } + + expect(instance).not_to be_using_primary + expect(instance.performed_write?).to eq true + end + + it 'does not prevent using primary if an exception is raised' do + instance = described_class.new + + instance.ignore_writes { raise ArgumentError } rescue ArgumentError + instance.write! + + expect(instance).to be_using_primary + end + end + + describe '#use_replicas_for_read_queries' do + let(:instance) { described_class.new } + + it 'sets the flag inside the block' do + expect do |blk| + instance.use_replicas_for_read_queries do + expect(instance.use_replicas_for_read_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.use_replicas_for_read_queries?).to eq(false) + end + + it 'restores state after use' do + expect do |blk| + instance.use_replicas_for_read_queries do + instance.use_replicas_for_read_queries do + expect(instance.use_replicas_for_read_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + + expect(instance.use_replicas_for_read_queries?).to eq(true) + end + end.to yield_control + + expect(instance.use_replicas_for_read_queries?).to eq(false) + end + + context 'when primary was used before' do + before do + instance.use_primary! + end + + it 'sets the flag inside the block' do + expect do |blk| + instance.use_replicas_for_read_queries do + expect(instance.use_replicas_for_read_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.use_replicas_for_read_queries?).to eq(false) + end + end + + context 'when a write query is performed before' do + before do + instance.write! + end + + it 'sets the flag inside the block' do + expect do |blk| + instance.use_replicas_for_read_queries do + expect(instance.use_replicas_for_read_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.use_replicas_for_read_queries?).to eq(false) + end + end + end + + describe '#fallback_to_replicas_for_ambiguous_queries' do + let(:instance) { described_class.new } + + it 'sets the flag inside the block' do + expect do |blk| + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + it 'restores state after use' do + expect do |blk| + instance.fallback_to_replicas_for_ambiguous_queries do + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + # call yield probe + blk.to_proc.call + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + end + end.to yield_control + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + context 'when primary was used before' do + before do + instance.use_primary! + end + + it 'uses primary during block' do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + expect do |blk| + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + end + + context 'when a write was performed before' do + before do + instance.write! + end + + it 'uses primary during block' do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + expect do |blk| + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + # call yield probe + blk.to_proc.call + end + end.to yield_control + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + end + + context 'when primary was used inside the block' do + it 'uses primary aterward' do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + instance.use_primary! + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + it 'restores state after use' do + instance.fallback_to_replicas_for_ambiguous_queries do + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + instance.use_primary! + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + end + + context 'when a write was performed inside the block' do + it 'uses primary aterward' do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + instance.write! + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + it 'restores state after use' do + instance.fallback_to_replicas_for_ambiguous_queries do + instance.fallback_to_replicas_for_ambiguous_queries do + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(true) + + instance.write! + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + + expect(instance.fallback_to_replicas_for_ambiguous_queries?).to eq(false) + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/sidekiq_client_middleware_spec.rb b/spec/lib/gitlab/database/load_balancing/sidekiq_client_middleware_spec.rb new file mode 100644 index 00000000000..90051172fca --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/sidekiq_client_middleware_spec.rb @@ -0,0 +1,161 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::SidekiqClientMiddleware do + let(:middleware) { described_class.new } + + after do + Gitlab::Database::LoadBalancing::Session.clear_session + end + + describe '#call' do + shared_context 'data consistency worker class' do |data_consistency, feature_flag| + let(:worker_class) do + Class.new do + def self.name + 'TestDataConsistencyWorker' + end + + include ApplicationWorker + + data_consistency data_consistency, feature_flag: feature_flag + + def perform(*args) + end + end + end + + before do + stub_const('TestDataConsistencyWorker', worker_class) + end + end + + shared_examples_for 'does not pass database locations' do + it 'does not pass database locations', :aggregate_failures do + middleware.call(worker_class, job, double(:queue), redis_pool) { 10 } + + expect(job['database_replica_location']).to be_nil + expect(job['database_write_location']).to be_nil + end + end + + shared_examples_for 'mark data consistency location' do |data_consistency| + include_context 'data consistency worker class', data_consistency, :load_balancing_for_test_data_consistency_worker + + let(:location) { '0/D525E3A8' } + + context 'when feature flag load_balancing_for_sidekiq is disabled' do + before do + stub_feature_flags(load_balancing_for_test_data_consistency_worker: false) + end + + include_examples 'does not pass database locations' + end + + context 'when write was not performed' do + before do + allow(Gitlab::Database::LoadBalancing::Session.current).to receive(:use_primary?).and_return(false) + end + + it 'passes database_replica_location' do + expect(middleware).to receive_message_chain(:load_balancer, :host, "database_replica_location").and_return(location) + + middleware.call(worker_class, job, double(:queue), redis_pool) { 10 } + + expect(job['database_replica_location']).to eq(location) + end + end + + context 'when write was performed' do + before do + allow(Gitlab::Database::LoadBalancing::Session.current).to receive(:use_primary?).and_return(true) + end + + it 'passes primary write location', :aggregate_failures do + expect(middleware).to receive_message_chain(:load_balancer, :primary_write_location).and_return(location) + + middleware.call(worker_class, job, double(:queue), redis_pool) { 10 } + + expect(job['database_write_location']).to eq(location) + end + end + end + + shared_examples_for 'database location was already provided' do |provided_database_location, other_location| + shared_examples_for 'does not set database location again' do |use_primary| + before do + allow(Gitlab::Database::LoadBalancing::Session.current).to receive(:use_primary?).and_return(use_primary) + end + + it 'does not set database locations again' do + middleware.call(worker_class, job, double(:queue), redis_pool) { 10 } + + expect(job[provided_database_location]).to eq(old_location) + expect(job[other_location]).to be_nil + end + end + + let(:old_location) { '0/D525E3A8' } + let(:new_location) { 'AB/12345' } + let(:job) { { "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", provided_database_location => old_location } } + + before do + allow(middleware).to receive_message_chain(:load_balancer, :primary_write_location).and_return(new_location) + allow(middleware).to receive_message_chain(:load_balancer, :database_replica_location).and_return(new_location) + end + + context "when write was performed" do + include_examples 'does not set database location again', true + end + + context "when write was not performed" do + include_examples 'does not set database location again', false + end + end + + let(:queue) { 'default' } + let(:redis_pool) { Sidekiq.redis_pool } + let(:worker_class) { 'TestDataConsistencyWorker' } + let(:job) { { "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e" } } + + before do + skip_feature_flags_yaml_validation + skip_default_enabled_yaml_check + end + + context 'when worker cannot be constantized' do + let(:worker_class) { 'ActionMailer::MailDeliveryJob' } + + include_examples 'does not pass database locations' + end + + context 'when worker class does not include ApplicationWorker' do + let(:worker_class) { ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper } + + include_examples 'does not pass database locations' + end + + context 'database write location was already provided' do + include_examples 'database location was already provided', 'database_write_location', 'database_replica_location' + end + + context 'database replica location was already provided' do + include_examples 'database location was already provided', 'database_replica_location', 'database_write_location' + end + + context 'when worker data consistency is :always' do + include_context 'data consistency worker class', :always, :load_balancing_for_test_data_consistency_worker + + include_examples 'does not pass database locations' + end + + context 'when worker data consistency is :delayed' do + include_examples 'mark data consistency location', :delayed + end + + context 'when worker data consistency is :sticky' do + include_examples 'mark data consistency location', :sticky + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/sidekiq_server_middleware_spec.rb b/spec/lib/gitlab/database/load_balancing/sidekiq_server_middleware_spec.rb new file mode 100644 index 00000000000..cf607231ddc --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/sidekiq_server_middleware_spec.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::SidekiqServerMiddleware do + let(:middleware) { described_class.new } + + after do + Gitlab::Database::LoadBalancing::Session.clear_session + end + + describe '#call' do + shared_context 'data consistency worker class' do |data_consistency, feature_flag| + let(:worker_class) do + Class.new do + def self.name + 'TestDataConsistencyWorker' + end + + include ApplicationWorker + + data_consistency data_consistency, feature_flag: feature_flag + + def perform(*args) + end + end + end + + before do + stub_const('TestDataConsistencyWorker', worker_class) + end + end + + shared_examples_for 'job marked with chosen database' do + it 'yields and sets database chosen', :aggregate_failures do + expect { |b| middleware.call(worker, job, double(:queue), &b) }.to yield_control + + expect(job[:database_chosen]).to eq('primary') + end + end + + shared_examples_for 'stick to the primary' do + it 'sticks to the primary' do + middleware.call(worker, job, double(:queue)) do + expect(Gitlab::Database::LoadBalancing::Session.current.use_primary?).to be_truthy + end + end + end + + shared_examples_for 'replica is up to date' do |location| + it 'do not stick to the primary', :aggregate_failures do + expect(middleware).to receive(:replica_caught_up?).with(location).and_return(true) + + middleware.call(worker, job, double(:queue)) do + expect(Gitlab::Database::LoadBalancing::Session.current.use_primary?).not_to be_truthy + end + + expect(job[:database_chosen]).to eq('replica') + end + end + + shared_examples_for 'sticks based on data consistency' do |data_consistency| + include_context 'data consistency worker class', data_consistency, :load_balancing_for_test_data_consistency_worker + + context 'when load_balancing_for_test_data_consistency_worker is disabled' do + before do + stub_feature_flags(load_balancing_for_test_data_consistency_worker: false) + end + + include_examples 'stick to the primary' + end + + context 'when database replica location is set' do + let(:job) { { 'job_id' => 'a180b47c-3fd6-41b8-81e9-34da61c3400e', 'database_replica_location' => '0/D525E3A8' } } + + before do + allow(middleware).to receive(:replica_caught_up?).and_return(true) + end + + it_behaves_like 'replica is up to date', '0/D525E3A8' + end + + context 'when database primary location is set' do + let(:job) { { 'job_id' => 'a180b47c-3fd6-41b8-81e9-34da61c3400e', 'database_write_location' => '0/D525E3A8' } } + + before do + allow(middleware).to receive(:replica_caught_up?).and_return(true) + end + + it_behaves_like 'replica is up to date', '0/D525E3A8' + end + + context 'when database location is not set' do + let(:job) { { 'job_id' => 'a180b47c-3fd6-41b8-81e9-34da61c3400e' } } + + it_behaves_like 'stick to the primary', nil + end + end + + let(:queue) { 'default' } + let(:redis_pool) { Sidekiq.redis_pool } + let(:worker) { worker_class.new } + let(:job) { { "retry" => 3, "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'database_replica_location' => '0/D525E3A8' } } + let(:block) { 10 } + + before do + skip_feature_flags_yaml_validation + skip_default_enabled_yaml_check + allow(middleware).to receive(:clear) + allow(Gitlab::Database::LoadBalancing::Session.current).to receive(:performed_write?).and_return(true) + end + + context 'when worker class does not include ApplicationWorker' do + let(:worker) { ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper.new } + + include_examples 'stick to the primary' + end + + context 'when worker data consistency is :always' do + include_context 'data consistency worker class', :always, :load_balancing_for_test_data_consistency_worker + + include_examples 'stick to the primary' + end + + context 'when worker data consistency is :delayed' do + include_examples 'sticks based on data consistency', :delayed + + context 'when replica is not up to date' do + before do + allow(::Gitlab::Database::LoadBalancing).to receive_message_chain(:proxy, :load_balancer, :release_host) + allow(::Gitlab::Database::LoadBalancing).to receive_message_chain(:proxy, :load_balancer, :host, :caught_up?).and_return(false) + end + + around do |example| + with_sidekiq_server_middleware do |chain| + chain.add described_class + Sidekiq::Testing.disable! { example.run } + end + end + + context 'when job is executed first' do + it 'raise an error and retries', :aggregate_failures do + expect do + process_job(job) + end.to raise_error(Sidekiq::JobRetry::Skip) + + expect(job['error_class']).to eq('Gitlab::Database::LoadBalancing::SidekiqServerMiddleware::JobReplicaNotUpToDate') + expect(job[:database_chosen]).to eq('retry') + end + end + + context 'when job is retried' do + it 'stick to the primary', :aggregate_failures do + expect do + process_job(job) + end.to raise_error(Sidekiq::JobRetry::Skip) + + process_job(job) + expect(job[:database_chosen]).to eq('primary') + end + end + end + end + + context 'when worker data consistency is :sticky' do + include_examples 'sticks based on data consistency', :sticky + + context 'when replica is not up to date' do + before do + allow(middleware).to receive(:replica_caught_up?).and_return(false) + end + + include_examples 'stick to the primary' + include_examples 'job marked with chosen database' + end + end + end + + def process_job(job) + Sidekiq::JobRetry.new.local(worker_class, job, queue) do + worker_class.process_job(job) + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing/srv_resolver_spec.rb b/spec/lib/gitlab/database/load_balancing/srv_resolver_spec.rb new file mode 100644 index 00000000000..6ac0608d485 --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/srv_resolver_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::SrvResolver do + let(:resolver) { Net::DNS::Resolver.new(nameservers: '127.0.0.1', port: 8600, use_tcp: true) } + let(:additional) { dns_response_packet_from_fixture('srv_with_a_rr_in_additional_section').additional } + + describe '#address_for' do + let(:host) { 'patroni-02-db-gstg.node.east-us-2.consul.' } + + subject { described_class.new(resolver, additional).address_for(host) } + + context 'when additional section contains an A record' do + it 'returns an IP4 address' do + expect(subject).to eq(IPAddr.new('10.224.29.102')) + end + end + + context 'when additional section contains an AAAA record' do + let(:host) { 'a.gtld-servers.net.' } + let(:additional) { dns_response_packet_from_fixture('a_with_aaaa_rr_in_additional_section').additional } + + it 'returns an IP6 address' do + expect(subject).to eq(IPAddr.new('2001:503:a83e::2:30')) + end + end + + context 'when additional section does not contain A nor AAAA records' do + let(:additional) { [] } + + context 'when host resolves to an A record' do + before do + allow(resolver).to receive(:search).with(host, Net::DNS::ANY).and_return(dns_response_packet_from_fixture('a_rr')) + end + + it 'returns an IP4 address' do + expect(subject).to eq(IPAddr.new('10.224.29.102')) + end + end + + context 'when host does resolves to an AAAA record' do + before do + allow(resolver).to receive(:search).with(host, Net::DNS::ANY).and_return(dns_response_packet_from_fixture('aaaa_rr')) + end + + it 'returns an IP6 address' do + expect(subject).to eq(IPAddr.new('2a00:1450:400e:80a::200e')) + end + end + end + end + + def dns_response_packet_from_fixture(fixture_name) + fixture = File.read(Rails.root + "spec/fixtures/dns/#{fixture_name}.json") + encoded_payload = Gitlab::Json.parse(fixture)['payload'] + payload = Base64.decode64(encoded_payload) + + Net::DNS::Packet.parse(payload) + end +end diff --git a/spec/lib/gitlab/database/load_balancing/sticking_spec.rb b/spec/lib/gitlab/database/load_balancing/sticking_spec.rb new file mode 100644 index 00000000000..bf4e3756e0e --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing/sticking_spec.rb @@ -0,0 +1,307 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing::Sticking, :redis do + after do + Gitlab::Database::LoadBalancing::Session.clear_session + end + + describe '.stick_if_necessary' do + context 'when sticking is disabled' do + it 'does not perform any sticking' do + expect(described_class).not_to receive(:stick) + + described_class.stick_if_necessary(:user, 42) + end + end + + context 'when sticking is enabled' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:enable?) + .and_return(true) + end + + it 'does not stick if no write was performed' do + allow(Gitlab::Database::LoadBalancing::Session.current) + .to receive(:performed_write?) + .and_return(false) + + expect(described_class).not_to receive(:stick) + + described_class.stick_if_necessary(:user, 42) + end + + it 'sticks to the primary if a write was performed' do + allow(Gitlab::Database::LoadBalancing::Session.current) + .to receive(:performed_write?) + .and_return(true) + + expect(described_class).to receive(:stick).with(:user, 42) + + described_class.stick_if_necessary(:user, 42) + end + end + end + + describe '.all_caught_up?' do + let(:lb) { double(:lb) } + + before do + allow(described_class).to receive(:load_balancer).and_return(lb) + end + + it 'returns true if no write location could be found' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return(nil) + + expect(lb).not_to receive(:all_caught_up?) + + expect(described_class.all_caught_up?(:user, 42)).to eq(true) + end + + it 'returns true, and unsticks if all secondaries have caught up' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return('foo') + + allow(lb).to receive(:all_caught_up?).with('foo').and_return(true) + + expect(described_class).to receive(:unstick).with(:user, 42) + + expect(described_class.all_caught_up?(:user, 42)).to eq(true) + end + + it 'return false if the secondaries have not yet caught up' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return('foo') + + allow(lb).to receive(:all_caught_up?).with('foo').and_return(false) + + expect(described_class.all_caught_up?(:user, 42)).to eq(false) + end + end + + describe '.unstick_or_continue_sticking' do + let(:lb) { double(:lb) } + + before do + allow(described_class).to receive(:load_balancer).and_return(lb) + end + + it 'simply returns if no write location could be found' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return(nil) + + expect(lb).not_to receive(:all_caught_up?) + + described_class.unstick_or_continue_sticking(:user, 42) + end + + it 'unsticks if all secondaries have caught up' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return('foo') + + allow(lb).to receive(:all_caught_up?).with('foo').and_return(true) + + expect(described_class).to receive(:unstick).with(:user, 42) + + described_class.unstick_or_continue_sticking(:user, 42) + end + + it 'continues using the primary if the secondaries have not yet caught up' do + allow(described_class).to receive(:last_write_location_for) + .with(:user, 42) + .and_return('foo') + + allow(lb).to receive(:all_caught_up?).with('foo').and_return(false) + + expect(Gitlab::Database::LoadBalancing::Session.current) + .to receive(:use_primary!) + + described_class.unstick_or_continue_sticking(:user, 42) + end + end + + RSpec.shared_examples 'sticking' do + context 'when sticking is disabled' do + it 'does not perform any sticking', :aggregate_failures do + expect(described_class).not_to receive(:set_write_location_for) + expect(Gitlab::Database::LoadBalancing::Session.current).not_to receive(:use_primary!) + + described_class.bulk_stick(:user, ids) + end + end + + context 'when sticking is enabled' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:configured?).and_return(true) + + lb = double(:lb, primary_write_location: 'foo') + + allow(described_class).to receive(:load_balancer).and_return(lb) + end + + it 'sticks an entity to the primary', :aggregate_failures do + ids.each do |id| + expect(described_class).to receive(:set_write_location_for) + .with(:user, id, 'foo') + end + + expect(Gitlab::Database::LoadBalancing::Session.current) + .to receive(:use_primary!) + + subject + end + end + end + + describe '.stick' do + it_behaves_like 'sticking' do + let(:ids) { [42] } + subject { described_class.stick(:user, ids.first) } + end + end + + describe '.bulk_stick' do + it_behaves_like 'sticking' do + let(:ids) { [42, 43] } + subject { described_class.bulk_stick(:user, ids) } + end + end + + describe '.mark_primary_write_location' do + context 'when enabled' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(true) + allow(Gitlab::Database::LoadBalancing).to receive(:configured?).and_return(true) + end + + it 'updates the write location with the load balancer' do + lb = double(:lb, primary_write_location: 'foo') + + allow(described_class).to receive(:load_balancer).and_return(lb) + + expect(described_class).to receive(:set_write_location_for) + .with(:user, 42, 'foo') + + described_class.mark_primary_write_location(:user, 42) + end + end + + context 'when load balancing is configured but not enabled' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false) + allow(Gitlab::Database::LoadBalancing).to receive(:configured?).and_return(true) + end + + it 'updates the write location with the main ActiveRecord connection' do + allow(described_class).to receive(:load_balancer).and_return(nil) + expect(ActiveRecord::Base).to receive(:connection).and_call_original + expect(described_class).to receive(:set_write_location_for) + .with(:user, 42, anything) + + described_class.mark_primary_write_location(:user, 42) + end + + context 'when write location is nil' do + before do + allow(Gitlab::Database).to receive(:get_write_location).and_return(nil) + end + + it 'does not update the write location' do + expect(described_class).not_to receive(:set_write_location_for) + + described_class.mark_primary_write_location(:user, 42) + end + end + end + + context 'when load balancing is disabled' do + before do + allow(Gitlab::Database::LoadBalancing).to receive(:enable?).and_return(false) + allow(Gitlab::Database::LoadBalancing).to receive(:configured?).and_return(false) + end + + it 'updates the write location with the main ActiveRecord connection' do + expect(described_class).not_to receive(:set_write_location_for) + + described_class.mark_primary_write_location(:user, 42) + end + end + end + + describe '.unstick' do + it 'removes the sticking data from Redis' do + described_class.set_write_location_for(:user, 4, 'foo') + described_class.unstick(:user, 4) + + expect(described_class.last_write_location_for(:user, 4)).to be_nil + end + end + + describe '.last_write_location_for' do + it 'returns the last WAL write location for a user' do + described_class.set_write_location_for(:user, 4, 'foo') + + expect(described_class.last_write_location_for(:user, 4)).to eq('foo') + end + end + + describe '.redis_key_for' do + it 'returns a String' do + expect(described_class.redis_key_for(:user, 42)) + .to eq('database-load-balancing/write-location/user/42') + end + end + + describe '.load_balancer' do + it 'returns a the load balancer' do + proxy = double(:proxy) + + expect(Gitlab::Database::LoadBalancing).to receive(:proxy) + .and_return(proxy) + + expect(proxy).to receive(:load_balancer) + + described_class.load_balancer + end + end + + describe '.select_caught_up_replicas' do + let(:lb) { double(:lb) } + + before do + allow(described_class).to receive(:load_balancer).and_return(lb) + end + + context 'with no write location' do + before do + allow(described_class).to receive(:last_write_location_for) + .with(:project, 42).and_return(nil) + end + + it 'returns false and does not try to find caught up hosts' do + expect(described_class).not_to receive(:select_caught_up_hosts) + expect(described_class.select_caught_up_replicas(:project, 42)).to be false + end + end + + context 'with write location' do + before do + allow(described_class).to receive(:last_write_location_for) + .with(:project, 42).and_return('foo') + end + + it 'returns true, selects hosts, and unsticks if any secondary has caught up' do + expect(lb).to receive(:select_caught_up_hosts).and_return(true) + expect(described_class).to receive(:unstick).with(:project, 42) + expect(described_class.select_caught_up_replicas(:project, 42)).to be true + end + end + end +end diff --git a/spec/lib/gitlab/database/load_balancing_spec.rb b/spec/lib/gitlab/database/load_balancing_spec.rb new file mode 100644 index 00000000000..c3dcfa3eb4a --- /dev/null +++ b/spec/lib/gitlab/database/load_balancing_spec.rb @@ -0,0 +1,859 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Database::LoadBalancing do + include_context 'clear DB Load Balancing configuration' + + before do + stub_env('ENABLE_LOAD_BALANCING_FOR_FOSS', 'true') + end + + describe '.proxy' do + context 'when configured' do + before do + allow(ActiveRecord::Base.singleton_class).to receive(:prepend) + subject.configure_proxy + end + + it 'returns the connection proxy' do + expect(subject.proxy).to be_an_instance_of(subject::ConnectionProxy) + end + end + + context 'when not configured' do + it 'returns nil' do + expect(subject.proxy).to be_nil + end + + it 'tracks an error to sentry' do + expect(Gitlab::ErrorTracking).to receive(:track_exception).with( + an_instance_of(subject::ProxyNotConfiguredError) + ) + + subject.proxy + end + end + end + + describe '.configuration' do + it 'returns a Hash' do + lb_config = { 'hosts' => %w(foo) } + + original_db_config = Gitlab::Database.config + modified_db_config = original_db_config.merge(load_balancing: lb_config) + expect(Gitlab::Database).to receive(:config).and_return(modified_db_config) + + expect(described_class.configuration).to eq(lb_config) + end + end + + describe '.max_replication_difference' do + context 'without an explicitly configured value' do + it 'returns the default value' do + allow(described_class) + .to receive(:configuration) + .and_return({}) + + expect(described_class.max_replication_difference).to eq(8.megabytes) + end + end + + context 'with an explicitly configured value' do + it 'returns the configured value' do + allow(described_class) + .to receive(:configuration) + .and_return({ 'max_replication_difference' => 4 }) + + expect(described_class.max_replication_difference).to eq(4) + end + end + end + + describe '.max_replication_lag_time' do + context 'without an explicitly configured value' do + it 'returns the default value' do + allow(described_class) + .to receive(:configuration) + .and_return({}) + + expect(described_class.max_replication_lag_time).to eq(60) + end + end + + context 'with an explicitly configured value' do + it 'returns the configured value' do + allow(described_class) + .to receive(:configuration) + .and_return({ 'max_replication_lag_time' => 4 }) + + expect(described_class.max_replication_lag_time).to eq(4) + end + end + end + + describe '.replica_check_interval' do + context 'without an explicitly configured value' do + it 'returns the default value' do + allow(described_class) + .to receive(:configuration) + .and_return({}) + + expect(described_class.replica_check_interval).to eq(60) + end + end + + context 'with an explicitly configured value' do + it 'returns the configured value' do + allow(described_class) + .to receive(:configuration) + .and_return({ 'replica_check_interval' => 4 }) + + expect(described_class.replica_check_interval).to eq(4) + end + end + end + + describe '.hosts' do + it 'returns a list of hosts' do + allow(described_class) + .to receive(:configuration) + .and_return({ 'hosts' => %w(foo bar baz) }) + + expect(described_class.hosts).to eq(%w(foo bar baz)) + end + end + + describe '.pool_size' do + it 'returns a Fixnum' do + expect(described_class.pool_size).to be_a_kind_of(Integer) + end + end + + describe '.enable?' do + before do + clear_load_balancing_configuration + allow(described_class).to receive(:hosts).and_return(%w(foo)) + end + + it 'returns false when no hosts are specified' do + allow(described_class).to receive(:hosts).and_return([]) + + expect(described_class.enable?).to eq(false) + end + + it 'returns false when Sidekiq is being used' do + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true) + + expect(described_class.enable?).to eq(false) + end + + it 'returns false when running inside a Rake task' do + allow(Gitlab::Runtime).to receive(:rake?).and_return(true) + + expect(described_class.enable?).to eq(false) + end + + it 'returns true when load balancing should be enabled' do + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(false) + + expect(described_class.enable?).to eq(true) + end + + it 'returns true when service discovery is enabled' do + allow(described_class).to receive(:hosts).and_return([]) + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(false) + + allow(described_class) + .to receive(:service_discovery_enabled?) + .and_return(true) + + expect(described_class.enable?).to eq(true) + end + + context 'when ENABLE_LOAD_BALANCING_FOR_SIDEKIQ environment variable is set' do + before do + stub_env('ENABLE_LOAD_BALANCING_FOR_SIDEKIQ', 'true') + end + + it 'returns true when Sidekiq is being used' do + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true) + + expect(described_class.enable?).to eq(true) + end + end + + context 'FOSS' do + before do + allow(Gitlab).to receive(:ee?).and_return(false) + + stub_env('ENABLE_LOAD_BALANCING_FOR_FOSS', 'false') + end + + it 'is disabled' do + expect(described_class.enable?).to eq(false) + end + end + + context 'EE' do + before do + allow(Gitlab).to receive(:ee?).and_return(true) + end + + it 'is enabled' do + allow(described_class).to receive(:hosts).and_return(%w(foo)) + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(false) + + expect(described_class.enable?).to eq(true) + end + end + end + + describe '.configured?' do + before do + clear_load_balancing_configuration + end + + it 'returns true when Sidekiq is being used' do + allow(described_class).to receive(:hosts).and_return(%w(foo)) + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true) + expect(described_class.configured?).to eq(true) + end + + it 'returns true when service discovery is enabled in Sidekiq' do + allow(described_class).to receive(:hosts).and_return([]) + allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true) + + allow(described_class) + .to receive(:service_discovery_enabled?) + .and_return(true) + + expect(described_class.configured?).to eq(true) + end + + it 'returns false when neither service discovery nor hosts are configured' do + allow(described_class).to receive(:hosts).and_return([]) + + allow(described_class) + .to receive(:service_discovery_enabled?) + .and_return(false) + + expect(described_class.configured?).to eq(false) + end + end + + describe '.configure_proxy' do + it 'configures the connection proxy' do + allow(ActiveRecord::Base.singleton_class).to receive(:prepend) + + described_class.configure_proxy + + expect(ActiveRecord::Base.singleton_class).to have_received(:prepend) + .with(Gitlab::Database::LoadBalancing::ActiveRecordProxy) + end + end + + describe '.active_record_models' do + it 'returns an Array' do + expect(described_class.active_record_models).to be_an_instance_of(Array) + end + end + + describe '.service_discovery_enabled?' do + it 'returns true if service discovery is enabled' do + allow(described_class) + .to receive(:configuration) + .and_return('discover' => { 'record' => 'foo' }) + + expect(described_class.service_discovery_enabled?).to eq(true) + end + + it 'returns false if service discovery is disabled' do + expect(described_class.service_discovery_enabled?).to eq(false) + end + end + + describe '.service_discovery_configuration' do + context 'when no configuration is provided' do + it 'returns a default configuration Hash' do + expect(described_class.service_discovery_configuration).to eq( + nameserver: 'localhost', + port: 8600, + record: nil, + record_type: 'A', + interval: 60, + disconnect_timeout: 120, + use_tcp: false + ) + end + end + + context 'when configuration is provided' do + it 'returns a Hash including the custom configuration' do + allow(described_class) + .to receive(:configuration) + .and_return('discover' => { 'record' => 'foo', 'record_type' => 'SRV' }) + + expect(described_class.service_discovery_configuration).to eq( + nameserver: 'localhost', + port: 8600, + record: 'foo', + record_type: 'SRV', + interval: 60, + disconnect_timeout: 120, + use_tcp: false + ) + end + end + end + + describe '.start_service_discovery' do + it 'does not start if service discovery is disabled' do + expect(Gitlab::Database::LoadBalancing::ServiceDiscovery) + .not_to receive(:new) + + described_class.start_service_discovery + end + + it 'starts service discovery if enabled' do + allow(described_class) + .to receive(:service_discovery_enabled?) + .and_return(true) + + instance = double(:instance) + + expect(Gitlab::Database::LoadBalancing::ServiceDiscovery) + .to receive(:new) + .with(an_instance_of(Hash)) + .and_return(instance) + + expect(instance) + .to receive(:start) + + described_class.start_service_discovery + end + end + + describe '.db_role_for_connection' do + let(:connection) { double(:conneciton) } + + context 'when the load balancing is not configured' do + before do + allow(described_class).to receive(:enable?).and_return(false) + end + + it 'returns primary' do + expect(described_class.db_role_for_connection(connection)).to be(:primary) + end + end + + context 'when the load balancing is configured' do + let(:proxy) { described_class::ConnectionProxy.new(%w(foo)) } + let(:load_balancer) { described_class::LoadBalancer.new(%w(foo)) } + + before do + allow(ActiveRecord::Base.singleton_class).to receive(:prepend) + + allow(described_class).to receive(:enable?).and_return(true) + allow(described_class).to receive(:proxy).and_return(proxy) + allow(proxy).to receive(:load_balancer).and_return(load_balancer) + + subject.configure_proxy(proxy) + end + + context 'when the load balancer returns :replica' do + it 'returns :replica' do + allow(load_balancer).to receive(:db_role_for_connection).and_return(:replica) + + expect(described_class.db_role_for_connection(connection)).to be(:replica) + + expect(load_balancer).to have_received(:db_role_for_connection).with(connection) + end + end + + context 'when the load balancer returns :primary' do + it 'returns :primary' do + allow(load_balancer).to receive(:db_role_for_connection).and_return(:primary) + + expect(described_class.db_role_for_connection(connection)).to be(:primary) + + expect(load_balancer).to have_received(:db_role_for_connection).with(connection) + end + end + + context 'when the load balancer returns nil' do + it 'returns nil' do + allow(load_balancer).to receive(:db_role_for_connection).and_return(nil) + + expect(described_class.db_role_for_connection(connection)).to be(nil) + + expect(load_balancer).to have_received(:db_role_for_connection).with(connection) + end + end + end + end + + # For such an important module like LoadBalancing, full mocking is not + # enough. This section implements some integration tests to test a full flow + # of the load balancer. + # - A real model with a table backed behind is defined + # - The load balancing module is set up for this module only, as to prevent + # breaking other tests. The replica configuration is cloned from the test + # configuraiton. + # - In each test, we listen to the SQL queries (via sql.active_record + # instrumentation) while triggering real queries from the defined model. + # - We assert the desinations (replica/primary) of the queries in order. + describe 'LoadBalancing integration tests', :delete do + before(:all) do + ActiveRecord::Schema.define do + create_table :load_balancing_test, force: true do |t| + t.string :name, null: true + end + end + end + + after(:all) do + ActiveRecord::Schema.define do + drop_table :load_balancing_test, force: true + end + end + + shared_context 'LoadBalancing setup' do + let(:development_db_config) { ActiveRecord::Base.configurations.default_hash("development").with_indifferent_access } + let(:hosts) { [development_db_config[:host]] } + let(:model) do + Class.new(ApplicationRecord) do + self.table_name = "load_balancing_test" + end + end + + before do + # Preloading testing class + model.singleton_class.prepend ::Gitlab::Database::LoadBalancing::ActiveRecordProxy + + # Setup load balancing + clear_load_balancing_configuration + allow(ActiveRecord::Base.singleton_class).to receive(:prepend) + subject.configure_proxy(::Gitlab::Database::LoadBalancing::ConnectionProxy.new(hosts)) + + original_db_config = Gitlab::Database.config + modified_db_config = original_db_config.merge(load_balancing: { hosts: hosts }) + allow(Gitlab::Database).to receive(:config).and_return(modified_db_config) + + ::Gitlab::Database::LoadBalancing::Session.clear_session + end + end + + where(:queries, :include_transaction, :expected_results) do + [ + # Read methods + [-> { model.first }, false, [:replica]], + [-> { model.find_by(id: 123) }, false, [:replica]], + [-> { model.where(name: 'hello').to_a }, false, [:replica]], + + # Write methods + [-> { model.create!(name: 'test1') }, false, [:primary]], + [ + -> { + instance = model.create!(name: 'test1') + instance.update!(name: 'test2') + }, + false, [:primary, :primary] + ], + [-> { model.update_all(name: 'test2') }, false, [:primary]], + [ + -> { + instance = model.create!(name: 'test1') + instance.destroy! + }, + false, [:primary, :primary] + ], + [-> { model.delete_all }, false, [:primary]], + + # Custom query + [-> { model.connection.exec_query('SELECT 1').to_a }, false, [:primary]], + + # Reads after a write + [ + -> { + model.first + model.create!(name: 'test1') + model.first + model.find_by(name: 'test1') + }, + false, [:replica, :primary, :primary, :primary] + ], + + # Inside a transaction + [ + -> { + model.transaction do + model.find_by(name: 'test1') + model.create!(name: 'test1') + instance = model.find_by(name: 'test1') + instance.update!(name: 'test2') + end + model.find_by(name: 'test1') + }, + true, [:primary, :primary, :primary, :primary, :primary, :primary, :primary] + ], + + # Nested transaction + [ + -> { + model.transaction do + model.transaction do + model.create!(name: 'test1') + end + model.update_all(name: 'test2') + end + model.find_by(name: 'test1') + }, + true, [:primary, :primary, :primary, :primary, :primary] + ], + + # Read-only transaction + [ + -> { + model.transaction do + model.first + model.where(name: 'test1').to_a + end + }, + true, [:primary, :primary, :primary, :primary] + ], + + # use_primary + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary do + model.first + model.where(name: 'test1').to_a + end + model.first + }, + false, [:primary, :primary, :replica] + ], + + # use_primary! + [ + -> { + model.first + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! + model.where(name: 'test1').to_a + }, + false, [:replica, :primary] + ], + + # use_replicas_for_read_queries does not affect read queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.where(name: 'test1').to_a + end + }, + false, [:replica] + ], + + # use_replicas_for_read_queries does not affect write queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.create!(name: 'test1') + end + }, + false, [:primary] + ], + + # use_replicas_for_read_queries does not affect ambiguous queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.connection.exec_query("SELECT 1") + end + }, + false, [:primary] + ], + + # use_replicas_for_read_queries ignores use_primary! for read queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.where(name: 'test1').to_a + end + }, + false, [:replica] + ], + + # use_replicas_for_read_queries adheres use_primary! for write queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.create!(name: 'test1') + end + }, + false, [:primary] + ], + + # use_replicas_for_read_queries adheres use_primary! for ambiguous queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.connection.exec_query('SELECT 1') + end + }, + false, [:primary] + ], + + # use_replicas_for_read_queries ignores use_primary blocks + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary do + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.where(name: 'test1').to_a + end + end + }, + false, [:replica] + ], + + # use_replicas_for_read_queries ignores a session already performed write + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.write! + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + model.where(name: 'test1').to_a + end + }, + false, [:replica] + ], + + # fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.first + model.where(name: 'test1').to_a + end + }, + false, [:replica, :replica] + ], + + # fallback_to_replicas_for_ambiguous_queries for read-only transaction + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.transaction do + model.first + model.where(name: 'test1').to_a + end + end + }, + false, [:replica, :replica] + ], + + # A custom read query inside fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query("SELECT 1") + end + }, + false, [:replica] + ], + + # A custom read query inside a transaction fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.transaction do + model.connection.exec_query("SET LOCAL statement_timeout = 5000") + model.count + end + end + }, + true, [:replica, :replica, :replica, :replica] + ], + + # fallback_to_replicas_for_ambiguous_queries after a write + [ + -> { + model.create!(name: 'Test1') + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query("SELECT 1") + end + }, + false, [:primary, :primary] + ], + + # fallback_to_replicas_for_ambiguous_queries after use_primary! + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary! + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query("SELECT 1") + end + }, + false, [:primary] + ], + + # fallback_to_replicas_for_ambiguous_queries inside use_primary + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_primary do + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query("SELECT 1") + end + end + }, + false, [:primary] + ], + + # use_primary inside fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + ::Gitlab::Database::LoadBalancing::Session.current.use_primary do + model.connection.exec_query("SELECT 1") + end + end + }, + false, [:primary] + ], + + # A write query inside fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query("SELECT 1") + model.delete_all + model.connection.exec_query("SELECT 1") + end + }, + false, [:replica, :primary, :primary] + ], + + # use_replicas_for_read_queries incorporates with fallback_to_replicas_for_ambiguous_queries + [ + -> { + ::Gitlab::Database::LoadBalancing::Session.current.use_replicas_for_read_queries do + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.connection.exec_query('SELECT 1') + model.where(name: 'test1').to_a + end + end + }, + false, [:replica, :replica] + ] + ] + end + + with_them do + include_context 'LoadBalancing setup' + + it 'redirects queries to the right roles' do + roles = [] + + subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event| + payload = event.payload + + assert = + if payload[:name] == 'SCHEMA' + false + elsif payload[:name] == 'SQL' # Custom query + true + else + keywords = %w[load_balancing_test] + keywords += %w[begin commit] if include_transaction + keywords.any? { |keyword| payload[:sql].downcase.include?(keyword) } + end + + if assert + db_role = ::Gitlab::Database::LoadBalancing.db_role_for_connection(payload[:connection]) + roles << db_role + end + end + + self.instance_exec(&queries) + + expect(roles).to eql(expected_results) + ensure + ActiveSupport::Notifications.unsubscribe(subscriber) if subscriber + end + end + + context 'custom connection handling' do + where(:queries, :expected_role) do + [ + # Reload cache. The schema loading queries should be handled by + # primary. + [ + -> { + model.connection.clear_cache! + model.connection.schema_cache.add('users') + model.connection.pool.release_connection + }, + :primary + ], + + # Call model's connection method + [ + -> { + connection = model.connection + connection.select_one('SELECT 1') + connection.pool.release_connection + }, + :replica + ], + + # Retrieve connection via #retrieve_connection + [ + -> { + connection = model.retrieve_connection + connection.select_one('SELECT 1') + connection.pool.release_connection + }, + :primary + ] + ] + end + + with_them do + include_context 'LoadBalancing setup' + + it 'redirects queries to the right roles' do + roles = [] + + subscriber = ActiveSupport::Notifications.subscribe('sql.active_record') do |event| + role = ::Gitlab::Database::LoadBalancing.db_role_for_connection(event.payload[:connection]) + roles << role if role.present? + end + + self.instance_exec(&queries) + + expect(roles).to all(eql(expected_role)) + ensure + ActiveSupport::Notifications.unsubscribe(subscriber) if subscriber + end + end + end + + context 'a write inside a transaction inside fallback_to_replicas_for_ambiguous_queries block' do + include_context 'LoadBalancing setup' + + it 'raises an exception' do + expect do + ::Gitlab::Database::LoadBalancing::Session.current.fallback_to_replicas_for_ambiguous_queries do + model.transaction do + model.first + model.create!(name: 'hello') + end + end + end.to raise_error(Gitlab::Database::LoadBalancing::ConnectionProxy::WriteInsideReadOnlyTransactionError) + end + end + end +end diff --git a/spec/lib/gitlab/database_spec.rb b/spec/lib/gitlab/database_spec.rb index 663c8d69328..2b31f3b4dee 100644 --- a/spec/lib/gitlab/database_spec.rb +++ b/spec/lib/gitlab/database_spec.rb @@ -65,6 +65,28 @@ RSpec.describe Gitlab::Database do end end + describe '.disable_prepared_statements' do + around do |example| + original_config = ::Gitlab::Database.config + + example.run + + ActiveRecord::Base.establish_connection(original_config) + end + + it 'disables prepared statements' do + ActiveRecord::Base.establish_connection(::Gitlab::Database.config.merge(prepared_statements: true)) + expect(ActiveRecord::Base.connection.prepared_statements).to eq(true) + + expect(ActiveRecord::Base).to receive(:establish_connection) + .with(a_hash_including({ 'prepared_statements' => false })).and_call_original + + described_class.disable_prepared_statements + + expect(ActiveRecord::Base.connection.prepared_statements).to eq(false) + end + end + describe '.postgresql?' do subject { described_class.postgresql? } diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb index 1ddbdda12b5..d9f7dc780b5 100644 --- a/spec/lib/gitlab/git/repository_spec.rb +++ b/spec/lib/gitlab/git/repository_spec.rb @@ -521,7 +521,9 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do no_tags: true, timeout: described_class::GITLAB_PROJECTS_TIMEOUT, prune: false, - check_tags_changed: false + check_tags_changed: false, + url: nil, + refmap: nil } expect(repository.gitaly_repository_client).to receive(:fetch_remote).with('remote-name', expected_opts) diff --git a/spec/lib/gitlab/gitaly_client/repository_service_spec.rb b/spec/lib/gitlab/gitaly_client/repository_service_spec.rb index 26ec194a2e7..56c8fe20eca 100644 --- a/spec/lib/gitlab/gitaly_client/repository_service_spec.rb +++ b/spec/lib/gitlab/gitaly_client/repository_service_spec.rb @@ -122,69 +122,91 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do end describe '#fetch_remote' do - let(:remote) { 'remote-name' } + shared_examples 'a fetch' do + it 'sends a fetch_remote_request message' do + expected_remote_params = Gitaly::Remote.new( + url: url, http_authorization_header: "", mirror_refmaps: []) - it 'sends a fetch_remote_request message' do - expected_request = gitaly_request_with_params( - remote: remote, - ssh_key: '', - known_hosts: '', - force: false, - no_tags: false, - no_prune: false, - check_tags_changed: false - ) + expected_request = gitaly_request_with_params( + remote: remote, + remote_params: url ? expected_remote_params : nil, + ssh_key: '', + known_hosts: '', + force: false, + no_tags: false, + no_prune: false, + check_tags_changed: false + ) - expect_any_instance_of(Gitaly::RepositoryService::Stub) - .to receive(:fetch_remote) - .with(expected_request, kind_of(Hash)) - .and_return(double(value: true)) + expect_any_instance_of(Gitaly::RepositoryService::Stub) + .to receive(:fetch_remote) + .with(expected_request, kind_of(Hash)) + .and_return(double(value: true)) - client.fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, timeout: 1, check_tags_changed: false) - end - - context 'SSH auth' do - where(:ssh_mirror_url, :ssh_key_auth, :ssh_private_key, :ssh_known_hosts, :expected_params) do - false | false | 'key' | 'known_hosts' | {} - false | true | 'key' | 'known_hosts' | {} - true | false | 'key' | 'known_hosts' | { known_hosts: 'known_hosts' } - true | true | 'key' | 'known_hosts' | { ssh_key: 'key', known_hosts: 'known_hosts' } - true | true | 'key' | nil | { ssh_key: 'key' } - true | true | nil | 'known_hosts' | { known_hosts: 'known_hosts' } - true | true | nil | nil | {} - true | true | '' | '' | {} + client.fetch_remote(remote, url: url, refmap: nil, ssh_auth: nil, forced: false, no_tags: false, timeout: 1, check_tags_changed: false) end - with_them do - let(:ssh_auth) do - double( - :ssh_auth, - ssh_mirror_url?: ssh_mirror_url, - ssh_key_auth?: ssh_key_auth, - ssh_private_key: ssh_private_key, - ssh_known_hosts: ssh_known_hosts - ) + context 'SSH auth' do + where(:ssh_mirror_url, :ssh_key_auth, :ssh_private_key, :ssh_known_hosts, :expected_params) do + false | false | 'key' | 'known_hosts' | {} + false | true | 'key' | 'known_hosts' | {} + true | false | 'key' | 'known_hosts' | { known_hosts: 'known_hosts' } + true | true | 'key' | 'known_hosts' | { ssh_key: 'key', known_hosts: 'known_hosts' } + true | true | 'key' | nil | { ssh_key: 'key' } + true | true | nil | 'known_hosts' | { known_hosts: 'known_hosts' } + true | true | nil | nil | {} + true | true | '' | '' | {} end - it do - expected_request = gitaly_request_with_params({ - remote: remote, - ssh_key: '', - known_hosts: '', - force: false, - no_tags: false, - no_prune: false - }.update(expected_params)) + with_them do + let(:ssh_auth) do + double( + :ssh_auth, + ssh_mirror_url?: ssh_mirror_url, + ssh_key_auth?: ssh_key_auth, + ssh_private_key: ssh_private_key, + ssh_known_hosts: ssh_known_hosts + ) + end - expect_any_instance_of(Gitaly::RepositoryService::Stub) - .to receive(:fetch_remote) - .with(expected_request, kind_of(Hash)) - .and_return(double(value: true)) + it do + expected_remote_params = Gitaly::Remote.new( + url: url, http_authorization_header: "", mirror_refmaps: []) - client.fetch_remote(remote, ssh_auth: ssh_auth, forced: false, no_tags: false, timeout: 1) + expected_request = gitaly_request_with_params({ + remote: remote, + remote_params: url ? expected_remote_params : nil, + ssh_key: '', + known_hosts: '', + force: false, + no_tags: false, + no_prune: false + }.update(expected_params)) + + expect_any_instance_of(Gitaly::RepositoryService::Stub) + .to receive(:fetch_remote) + .with(expected_request, kind_of(Hash)) + .and_return(double(value: true)) + + client.fetch_remote(remote, url: url, refmap: nil, ssh_auth: ssh_auth, forced: false, no_tags: false, timeout: 1) + end end end end + + context 'with remote' do + it_behaves_like 'a fetch' do + let(:remote) { 'remote-name' } + let(:url) { nil } + end + end + + context 'with URL' do + it_behaves_like 'a fetch' do + let(:remote) { "" } + let(:url) { 'https://example.com/git/repo.git' } + end + end end describe '#rebase_in_progress?' do diff --git a/spec/lib/gitlab/github_import/importer/pull_requests_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_requests_importer_spec.rb index 8a7867f3841..133d515246a 100644 --- a/spec/lib/gitlab/github_import/importer/pull_requests_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_requests_importer_spec.rb @@ -3,7 +3,8 @@ require 'spec_helper' RSpec.describe Gitlab::GithubImport::Importer::PullRequestsImporter do - let(:project) { create(:project, import_source: 'foo/bar') } + let(:url) { 'https://github.com/foo/bar.git' } + let(:project) { create(:project, import_source: 'foo/bar', import_url: url) } let(:client) { double(:client) } let(:pull_request) do @@ -147,14 +148,10 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsImporter do end end - describe '#update_repository' do + shared_examples '#update_repository' do it 'updates the repository' do importer = described_class.new(project, client) - expect(project.repository) - .to receive(:fetch_remote) - .with('github', forced: false) - expect_next_instance_of(Gitlab::Import::Logger) do |logger| expect(logger) .to receive(:info) @@ -173,6 +170,28 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsImporter do end end + describe '#update_repository with :fetch_remote_params enabled' do + before do + stub_feature_flags(fetch_remote_params: true) + expect(project.repository) + .to receive(:fetch_remote) + .with('github', forced: false, url: url, refmap: Gitlab::GithubImport.refmap) + end + + it_behaves_like '#update_repository' + end + + describe '#update_repository with :fetch_remote_params disabled' do + before do + stub_feature_flags(fetch_remote_params: false) + expect(project.repository) + .to receive(:fetch_remote) + .with('github', forced: false) + end + + it_behaves_like '#update_repository' + end + describe '#update_repository?' do let(:importer) { described_class.new(project, client) } diff --git a/spec/models/application_record_spec.rb b/spec/models/application_record_spec.rb index 24de46cb536..85a6717d259 100644 --- a/spec/models/application_record_spec.rb +++ b/spec/models/application_record_spec.rb @@ -132,5 +132,47 @@ RSpec.describe ApplicationRecord do end.to raise_error(ActiveRecord::QueryCanceled) end end + + context 'with database load balancing' do + let(:session) { double(:session) } + + before do + allow(::Gitlab::Database::LoadBalancing::Session).to receive(:current).and_return(session) + allow(session).to receive(:fallback_to_replicas_for_ambiguous_queries).and_yield + end + + it 'yields control' do + expect do |blk| + described_class.with_fast_read_statement_timeout(&blk) + end.to yield_control.once + end + + context 'when the query runs faster than configured timeout' do + it 'executes the query without error' do + result = nil + + expect do + described_class.with_fast_read_statement_timeout(100) do + result = described_class.connection.exec_query('SELECT 1') + end + end.not_to raise_error + + expect(result).not_to be_nil + end + end + + # This query hangs for 10ms and then gets cancelled. As there is no + # other way to test the timeout for sure, 10ms of waiting seems to be + # reasonable! + context 'when the query runs longer than configured timeout' do + it 'cancels the query and raiss an exception' do + expect do + described_class.with_fast_read_statement_timeout(10) do + described_class.connection.exec_query('SELECT pg_sleep(0.1)') + end + end.to raise_error(ActiveRecord::QueryCanceled) + end + end + end end end diff --git a/spec/models/project_services/chat_notification_service_spec.rb b/spec/models/project_services/chat_notification_service_spec.rb index 62f97873a06..192b1df33b5 100644 --- a/spec/models/project_services/chat_notification_service_spec.rb +++ b/spec/models/project_services/chat_notification_service_spec.rb @@ -89,12 +89,6 @@ RSpec.describe ChatNotificationService do let(:data) { Gitlab::DataBuilder::Note.build(note, user) } - it 'notifies the chat service' do - expect(chat_service).to receive(:notify).with(any_args) - - chat_service.execute(data) - end - shared_examples 'notifies the chat service' do specify do expect(chat_service).to receive(:notify).with(any_args) @@ -111,6 +105,26 @@ RSpec.describe ChatNotificationService do end end + it_behaves_like 'notifies the chat service' + + context 'with label filter' do + subject(:chat_service) { described_class.new(labels_to_be_notified: '~Bug') } + + it_behaves_like 'notifies the chat service' + + context 'MergeRequest events' do + let(:data) { create(:merge_request, labels: [label]).to_hook_data(user) } + + it_behaves_like 'notifies the chat service' + end + + context 'Issue events' do + let(:data) { issue.to_hook_data(user) } + + it_behaves_like 'notifies the chat service' + end + end + context 'when labels_to_be_notified_behavior is not defined' do subject(:chat_service) { described_class.new(labels_to_be_notified: label_filter) } diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 7748846f6a5..b6f09babb4b 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1123,6 +1123,70 @@ RSpec.describe Repository do end end + describe '#fetch_as_mirror' do + let(:url) { "http://example.com" } + + context 'when :fetch_remote_params is enabled' do + let(:remote_name) { "remote-name" } + + before do + stub_feature_flags(fetch_remote_params: true) + end + + it 'fetches the URL without creating a remote' do + expect(repository).not_to receive(:add_remote) + expect(repository) + .to receive(:fetch_remote) + .with(remote_name, url: url, forced: false, prune: true, refmap: :all_refs) + .and_return(nil) + + repository.fetch_as_mirror(url, remote_name: remote_name) + end + end + + context 'when :fetch_remote_params is disabled' do + before do + stub_feature_flags(fetch_remote_params: false) + end + + shared_examples 'a fetch' do + it 'adds and fetches a remote' do + expect(repository) + .to receive(:add_remote) + .with(expected_remote, url, mirror_refmap: :all_refs) + .and_return(nil) + expect(repository) + .to receive(:fetch_remote) + .with(expected_remote, forced: false, prune: true) + .and_return(nil) + + repository.fetch_as_mirror(url, remote_name: remote_name) + end + end + + context 'with temporary remote' do + let(:remote_name) { nil } + let(:expected_remote_suffix) { "123456" } + let(:expected_remote) { "tmp-#{expected_remote_suffix}" } + + before do + expect(repository) + .to receive(:async_remove_remote).with(expected_remote).and_return(nil) + allow(SecureRandom).to receive(:hex).and_return(expected_remote_suffix) + end + + it_behaves_like 'a fetch' + end + + context 'with remote name' do + let(:remote_name) { "foo" } + let(:expected_remote) { "foo" } + + it_behaves_like 'a fetch' + end + end + end + describe '#fetch_ref' do let(:broken_repository) { create(:project, :broken_storage).repository } diff --git a/spec/requests/api/users_preferences_spec.rb b/spec/requests/api/users_preferences_spec.rb index db03786ed2a..97e37263ee6 100644 --- a/spec/requests/api/users_preferences_spec.rb +++ b/spec/requests/api/users_preferences_spec.rb @@ -8,11 +8,19 @@ RSpec.describe API::Users do describe 'PUT /user/preferences/' do context "with correct attributes and a logged in user" do it 'returns a success status and the value has been changed' do - put api("/user/preferences", user), params: { view_diffs_file_by_file: true } + put api("/user/preferences", user), params: { + view_diffs_file_by_file: true, + show_whitespace_in_diffs: true + } expect(response).to have_gitlab_http_status(:ok) expect(json_response['view_diffs_file_by_file']).to eq(true) - expect(user.reload.view_diffs_file_by_file).to be_truthy + expect(json_response['show_whitespace_in_diffs']).to eq(true) + + user.reload + + expect(user.view_diffs_file_by_file).to be_truthy + expect(user.show_whitespace_in_diffs).to be_truthy end end diff --git a/spec/support/shared_contexts/load_balancing_configuration_shared_context.rb b/spec/support/shared_contexts/load_balancing_configuration_shared_context.rb new file mode 100644 index 00000000000..8e27d7987e8 --- /dev/null +++ b/spec/support/shared_contexts/load_balancing_configuration_shared_context.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +RSpec.shared_context 'clear DB Load Balancing configuration' do + def clear_load_balancing_configuration + proxy = ::Gitlab::Database::LoadBalancing.instance_variable_get(:@proxy) + proxy.load_balancer.release_host if proxy + ::Gitlab::Database::LoadBalancing.instance_variable_set(:@proxy, nil) + + ::Gitlab::Database::LoadBalancing.remove_instance_variable(:@feature_available) if ::Gitlab::Database::LoadBalancing.instance_variable_defined?(:@feature_available) + + ::Gitlab::Database::LoadBalancing::Session.clear_session + end + + around do |example| + clear_load_balancing_configuration + + example.run + + clear_load_balancing_configuration + end +end diff --git a/spec/workers/analytics/instance_statistics/count_job_trigger_worker_spec.rb b/spec/workers/analytics/instance_statistics/count_job_trigger_worker_spec.rb deleted file mode 100644 index da0cbe37400..00000000000 --- a/spec/workers/analytics/instance_statistics/count_job_trigger_worker_spec.rb +++ /dev/null @@ -1,17 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Analytics::InstanceStatistics::CountJobTriggerWorker do - it_behaves_like 'an idempotent worker' - - context 'triggers a job for each measurement identifiers' do - let(:expected_count) { Analytics::UsageTrends::Measurement.identifier_query_mapping.keys.size } - - it 'triggers CounterJobWorker jobs' do - subject.perform - - expect(Analytics::UsageTrends::CounterJobWorker.jobs.count).to eq(expected_count) - end - end -end diff --git a/spec/workers/analytics/instance_statistics/counter_job_worker_spec.rb b/spec/workers/analytics/instance_statistics/counter_job_worker_spec.rb deleted file mode 100644 index 4994fec44ab..00000000000 --- a/spec/workers/analytics/instance_statistics/counter_job_worker_spec.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Analytics::InstanceStatistics::CounterJobWorker do - let_it_be(:user_1) { create(:user) } - let_it_be(:user_2) { create(:user) } - - let(:users_measurement_identifier) { ::Analytics::UsageTrends::Measurement.identifiers.fetch(:users) } - let(:recorded_at) { Time.zone.now } - let(:job_args) { [users_measurement_identifier, user_1.id, user_2.id, recorded_at] } - - before do - allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) - end - - include_examples 'an idempotent worker' do - it 'counts a scope and stores the result' do - subject - - measurement = Analytics::UsageTrends::Measurement.users.first - expect(measurement.recorded_at).to be_like_time(recorded_at) - expect(measurement.identifier).to eq('users') - expect(measurement.count).to eq(2) - end - end - - context 'when no records are in the database' do - let(:users_measurement_identifier) { ::Analytics::UsageTrends::Measurement.identifiers.fetch(:groups) } - - subject { described_class.new.perform(users_measurement_identifier, nil, nil, recorded_at) } - - it 'sets 0 as the count' do - subject - - measurement = Analytics::UsageTrends::Measurement.groups.first - expect(measurement.recorded_at).to be_like_time(recorded_at) - expect(measurement.identifier).to eq('groups') - expect(measurement.count).to eq(0) - end - end - - it 'does not raise error when inserting duplicated measurement' do - subject - - expect { subject }.not_to raise_error - end - - it 'does not insert anything when BatchCount returns error' do - allow(Gitlab::Database::BatchCount).to receive(:batch_count).and_return(Gitlab::Database::BatchCounter::FALLBACK) - - expect { subject }.not_to change { Analytics::UsageTrends::Measurement.count } - end - - context 'when pipelines_succeeded identifier is passed' do - let_it_be(:pipeline) { create(:ci_pipeline, :success) } - - let(:successful_pipelines_measurement_identifier) { ::Analytics::UsageTrends::Measurement.identifiers.fetch(:pipelines_succeeded) } - let(:job_args) { [successful_pipelines_measurement_identifier, pipeline.id, pipeline.id, recorded_at] } - - it 'counts successful pipelines' do - subject - - measurement = Analytics::UsageTrends::Measurement.pipelines_succeeded.first - expect(measurement.recorded_at).to be_like_time(recorded_at) - expect(measurement.identifier).to eq('pipelines_succeeded') - expect(measurement.count).to eq(1) - end - end -end