Add latest changes from gitlab-org/gitlab@master

This commit is contained in:
GitLab Bot 2022-01-07 15:15:57 +00:00
parent bf57aa7662
commit c68ee79c33
27 changed files with 551 additions and 234 deletions

View File

@ -11,6 +11,8 @@ class Projects::Analytics::CycleAnalytics::StagesController < Projects::Applicat
before_action :authorize_read_cycle_analytics!
before_action :only_default_value_stream_is_allowed!
urgency :low
private
override :parent

View File

@ -9,6 +9,8 @@ class Projects::Analytics::CycleAnalytics::SummaryController < Projects::Applica
before_action :authorize_read_cycle_analytics!
urgency :low
def show
render json: project_level.summary
end

View File

@ -0,0 +1,35 @@
# frozen_string_literal: true
module Mutations
module Clusters
module AgentTokens
class Revoke < BaseMutation
graphql_name 'ClusterAgentTokenRevoke'
authorize :admin_cluster
TokenID = ::Types::GlobalIDType[::Clusters::AgentToken]
argument :id, TokenID,
required: true,
description: 'Global ID of the agent token that will be revoked.'
def resolve(id:)
token = authorized_find!(id: id)
token.update(status: token.class.statuses[:revoked])
{ errors: errors_on_object(token) }
end
private
def find_object(id:)
# TODO: remove this line when the compatibility layer is removed
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/257883
id = TokenID.coerce_isolated_input(id)
GitlabSchema.find_by_gid(id)
end
end
end
end
end

View File

@ -36,6 +36,7 @@ module Types
mount_mutation Mutations::Clusters::Agents::Delete
mount_mutation Mutations::Clusters::AgentTokens::Create
mount_mutation Mutations::Clusters::AgentTokens::Delete
mount_mutation Mutations::Clusters::AgentTokens::Revoke
mount_mutation Mutations::Commits::Create, calls_gitaly: true
mount_mutation Mutations::CustomEmoji::Create, feature_flag: :custom_emoji
mount_mutation Mutations::CustomEmoji::Destroy, feature_flag: :custom_emoji

View File

@ -15,7 +15,7 @@ module ImportState
def refresh_jid_expiration
return unless jid
Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2)
Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION)
end
def self.jid_by(project_id:, status:)

View File

@ -107,10 +107,7 @@ class Deployment < ApplicationRecord
deployment.run_after_commit do
Deployments::UpdateEnvironmentWorker.perform_async(id)
Deployments::LinkMergeRequestWorker.perform_async(id)
if ::Feature.enabled?(:deployments_archive, deployment.project, default_enabled: :yaml)
Deployments::ArchiveInProjectWorker.perform_async(deployment.project_id)
end
Deployments::ArchiveInProjectWorker.perform_async(deployment.project_id)
end
end

View File

@ -7,10 +7,6 @@ module Deployments
BATCH_SIZE = 100
def execute
unless ::Feature.enabled?(:deployments_archive, project, default_enabled: :yaml)
return error('Feature flag is not enabled')
end
deployments = Deployment.archivables_in(project, limit: BATCH_SIZE)
return success(result: :empty) if deployments.empty?

View File

@ -8,13 +8,11 @@
%li
%a{ href: "#tab-queries", data: { toggle: "tab" } }
= t('sherlock.queries')
%span.badge.badge-pill
#{@transaction.queries.length}
= gl_badge_tag @transaction.queries.length.to_s
%li
%a{ href: "#tab-file-samples", data: { toggle: "tab" } }
= t('sherlock.file_samples')
%span.badge.badge-pill
#{@transaction.file_samples.length}
= gl_badge_tag @transaction.file_samples.length.to_s
.row-content-block
.float-right

View File

@ -1,8 +0,0 @@
---
name: deployments_archive
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345027
milestone: '14.5'
type: development
group: group::release
default_enabled: true

View File

@ -1,8 +1,8 @@
---
name: log_implicit_sidekiq_status_calls
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/74815
name: opt_in_sidekiq_status
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/77349
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/343964
milestone: '14.6'
milestone: '14.7'
type: development
group: group::scalability
default_enabled: false

View File

@ -6,18 +6,37 @@ info: To determine the technical writer assigned to the Stage/Group associated w
# How to self-host the docs site **(FREE SELF)**
The following guide describes how to use a local instance of the docs site with
a self-managed GitLab instance.
If you have a self-managed instance of GitLab, you may not be able to access the
product documentation as hosted on `docs.gitlab.com` from your GitLab instance.
## Run the docs site
Be aware of the following items if you self-host the product documentation:
The easiest way to run the docs site locally it to pick up one of the existing
Docker images that contain the HTML files.
- You must host the product documentation site under a subdirectory that matches
your installed GitLab version (for example, `14.5/`). The
[Docker images](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635)
hosted by the GitLab Docs team provide this by default. We use a
[script](https://gitlab.com/gitlab-org/gitlab-docs/-/blob/2995d1378175803b22fb8806ba77adf63e79f32c/scripts/normalize-links.sh#L28-82)
to normalize the links and prefix them with the respective version.
- The version dropdown will display additional versions that don't exist, selecting
those versions will display a 404 Not Found page.
- Results when using the search box will display results from `docs.gitlab.com`
and not the local documentation.
- When you use the Docker images to serve the product documentation site, by
default the landing page redirects to the respective version (for example, `/14.5/`),
which causes the landing page <https://docs.gitlab.com> to not be displayed.
Pick the version that matches your GitLab version and run it, in the following
examples 14.5.
## Documentation self-hosting options
### Host the docs site using Docker
You can self-host the GitLab product documentation locally using one of these
methods:
- Docker
- GitLab Pages
- From your own webserver
The examples on this page are based on GitLab 14.5.
### Self-host the product documentation with Docker
The Docker images use a built-in webserver listening on port `4000`, so you need
to expose that.
@ -42,9 +61,9 @@ services:
- '4000:4000'
```
### Host the docs site using GitLab Pages
### Self-host the product documentation with GitLab Pages
You can also host the docs site with GitLab Pages.
You use GitLab Pages to host the GitLab product documentation locally.
Prerequisite:
@ -53,11 +72,11 @@ Prerequisite:
main domain or subdomain. For example, URLs like `https://example.com/docs/`
are not supported.
To host the docs site with GitLab Pages:
To host the product documentation site with GitLab Pages:
1. [Create a new blank project](../user/project/working_with_projects.md#create-a-blank-project).
1. Create a new or edit your existing `.gitlab-ci.yml` file and add the following
`pages` job. Make sure the version is the same as your GitLab installation:
1. Create a new or edit your existing `.gitlab-ci.yml` file, and add the following
`pages` job, while ensuring the version is the same as your GitLab installation:
```yaml
image: registry.gitlab.com/gitlab-org/gitlab-docs:14.5
@ -70,20 +89,22 @@ To host the docs site with GitLab Pages:
- public
```
1. (Optional) Set the Pages domain name. Depending on the type of the Pages website,
you have two options:
1. Optional. Set the GitLab Pages domain name. Depending on the type of the
GitLab Pages website, you have two options:
| Type of website | [Default domain](../user/project/pages/getting_started_part_one.md#gitlab-pages-default-domain-names) | [Custom domain](../user/project/pages/custom_domains_ssl_tls_certification/index.md) |
| --------------- | -------------- | ------------- |
| Type of website | [Default domain](../user/project/pages/getting_started_part_one.md#gitlab-pages-default-domain-names) | [Custom domain](../user/project/pages/custom_domains_ssl_tls_certification/index.md) |
|-------------------------|----------------|---------------|
| [Project website](../user/project/pages/getting_started_part_one.md#project-website-examples) | Not supported | Supported |
| [User or group website](../user/project/pages/getting_started_part_one.md#user-and-group-website-examples) | Supported | Supported |
### Host the docs site on your own webserver
### Self-host the product documentation on your own webserver
Since the docs site is static, you can grab the directory from the container
(under `/usr/share/nginx/html`) and use your own web server to host
it wherever you want. Replace `<destination>` with the directory where the
docs will be copied to:
Because the product documentation site is static, you can grab the directory from
the container (in `/usr/share/nginx/html`) and use your own web server to host
it wherever you want.
Use the following commands, and replace `<destination>` with the directory where the
documentation files will be copied to:
```shell
docker create -it --name gitlab-docs registry.gitlab.com/gitlab-org/gitlab-docs:14.5
@ -93,32 +114,18 @@ docker rm -f gitlab-docs
## Redirect the `/help` links to the new docs page
When the docs site is up and running:
After your local product documentation site is running, [redirect the help
links](../user/admin_area/settings/help_page.md#redirect-help-pages) in the GitLab
application to your local site.
1. [Enable the help page redirects](../user/admin_area/settings/help_page.md#redirect-help-pages).
Use the Fully Qualified Domain Name as the docs URL. For example, if you
used the [Docker method](#host-the-docs-site-using-docker) , enter `http://0.0.0.0:4000`.
You don't need to append the version, it is detected automatically.
1. Test that everything works by selecting the **Learn more** link on the page
you're on. Your GitLab version is automatically detected and appended to the docs URL
you set in the admin area. In this example, if your GitLab version is 14.5,
`https://<instance_url>/` becomes `http://0.0.0.0:4000/14.5/`.
The link inside GitLab link shows as
`<instance_url>/help/user/admin_area/settings/help_page#destination-requirements`,
but when you select it, you are redirected to
`http://0.0.0.0:4000/14.5/ee/user/admin_area/settings/help_page/#destination-requirements`.
Be sure to use the fully qualified domain name as the docs URL. For example, if you
used the [Docker method](#self-host-the-product-documentation-with-docker), enter `http://0.0.0.0:4000`.
## Caveats
You don't need to append the version, as GitLab will detect it and append it to
any documentation URL requests, as needed. For example, if your GitLab version is
14.5, the GitLab Docs URL becomes `http://0.0.0.0:4000/14.5/`. The link
inside GitLab displays as `<instance_url>/help/user/admin_area/settings/help_page#destination-requirements`,
but when you select it, you are redirected to
`http://0.0.0.0:4000/14.5/ee/user/admin_area/settings/help_page/#destination-requirements`.
- You need to host the docs site under a subdirectory matching your GitLab version,
in the example of this guide `14.5/`. The
[Docker images](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635)
hosted by the Docs team provide this by default. We use a
[script](https://gitlab.com/gitlab-org/gitlab-docs/-/blob/2995d1378175803b22fb8806ba77adf63e79f32c/scripts/normalize-links.sh#L28-82)
to normalize the links and prefix them with the respective version.
- The version dropdown will show more versions which do not exist and will lead
to 404 if selected.
- The search results point to `docs.gitlab.com` and not the local docs.
- When you use the Docker images to serve the docs site, the landing page redirects
by default to the respective version, for example `/14.5/`, so you don't
see the landing page as seen at <https://docs.gitlab.com>.
To test the setting, select a **Learn more** link within the GitLab application.

View File

@ -970,6 +970,24 @@ Input type: `ClusterAgentTokenDeleteInput`
| <a id="mutationclusteragenttokendeleteclientmutationid"></a>`clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. |
| <a id="mutationclusteragenttokendeleteerrors"></a>`errors` | [`[String!]!`](#string) | Errors encountered during execution of the mutation. |
### `Mutation.clusterAgentTokenRevoke`
Input type: `ClusterAgentTokenRevokeInput`
#### Arguments
| Name | Type | Description |
| ---- | ---- | ----------- |
| <a id="mutationclusteragenttokenrevokeclientmutationid"></a>`clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. |
| <a id="mutationclusteragenttokenrevokeid"></a>`id` | [`ClustersAgentTokenID!`](#clustersagenttokenid) | Global ID of the agent token that will be revoked. |
#### Fields
| Name | Type | Description |
| ---- | ---- | ----------- |
| <a id="mutationclusteragenttokenrevokeclientmutationid"></a>`clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. |
| <a id="mutationclusteragenttokenrevokeerrors"></a>`errors` | [`[String!]!`](#string) | Errors encountered during execution of the mutation. |
### `Mutation.commitCreate`
Input type: `CommitCreateInput`

View File

@ -0,0 +1,255 @@
---
stage: none
group: unassigned
comments: false
description: 'CI/CD data time decay'
---
# CI/CD data time decay
## Summary
GitLab CI/CD is one of the most data and compute intensive components of GitLab.
Since its [initial release in November 2012](https://about.gitlab.com/blog/2012/11/13/continuous-integration-server-from-gitlab/),
the CI/CD subsystem has evolved significantly. It was [integrated into GitLab in September 2015](https://about.gitlab.com/releases/2015/09/22/gitlab-8-0-released/)
and has become [one of the most beloved CI/CD solutions](https://about.gitlab.com/blog/2017/09/27/gitlab-leader-continuous-integration-forrester-wave/).
On February 1st, 2021, GitLab.com surpassed 1 billion CI/CD builds, and the number of
builds [continues to grow exponentially](../ci_scale/index.md).
GitLab CI/CD has come a long way since the initial release, but the design of
the data storage for pipeline builds remains almost the same since 2012. In
2021 we started working on database decomposition and extracting CI/CD data to
ia separate database. Now we want to improve the architecture of GitLab CI/CD
product to enable further scaling.
*Disclaimer: The following contain information related to upcoming products,
features, and functionality.
It is important to note that the information presented is for informational
purposes only. Please do not rely on this information for purchasing or
planning purposes.
As with all projects, the items mentioned in this document and linked pages are
subject to change or delay. The development, release and timing of any
products, features, or functionality remain at the sole discretion of GitLab
Inc.*
## Goals
**Implement a new architecture of CI/CD data storage to enable scaling.**
## Challenges
There are more than two billion rows describing CI/CD builds in GitLab.com's
database. This data represents a sizable portion of the whole data stored in
PostgreSQL database running on GitLab.com.
This volume contributes to significant performance problems, development
challenges and is often related to production incidents.
We also expect a [significant growth in the number of builds executed on
GitLab.com](../ci_scale/index.md) in the upcoming years.
## Opportunity
CI/CD data is subject to
[time-decay](https://about.gitlab.com/company/team/structure/working-groups/database-scalability/time-decay.html)
because, usually, pipelines that are a few months old are not frequently
accessed or are even not relevant anymore. Restricting access to processing
pipelines that are older than a few months might help us to move this data out
of the primary database, to a different storage, that is more performant and
cost effective.
It is already possible to prevent processing builds [that have been
archived](../../../user/admin_area/settings/continuous_integration.md#archive-jobs).
When a build gets archived it will not be possible to retry it, but we still do
keep all the processing metadata in the database, and it consumes resources
that are scarce in the primary database.
In order to improve performance and make it easier to scale CI/CD data storage
we might want to follow these three tracks described below.
![pipeline data time decay](pipeline_data_time_decay.png)
<!-- markdownlint-disable MD029 -->
1. Partition builds queuing tables
2. Archive CI/CD data into partitioned database schema
3. Migrate archived builds metadata out of primary database
<!-- markdownlint-enable MD029 -->
### Migrate archived builds metadata out of primary database
Once a build (or a pipeline) gets archived, it is no longer possible to resume
pipeline processing in such pipeline. It means that all the metadata, we store
in PostgreSQL, that is needed to efficiently and reliably process builds can be
safely moved to a different data store.
Currently, storing pipeline processing data is expensive as this kind of CI/CD
data represents a significant portion of data stored in CI/CD tables. Once we
restrict access to processing archived pipelines, we can move this metadata to
a different place - preferably object storage - and make it accessible on
demand, when it is really needed again (for example for compliance or auditing purposes).
We need to evaluate whether moving data is the most optimal solution. We might
be able to use de-duplication of metadata entries and other normalization
strategies to consume less storage while retaining ability to query this
dataset. Technical evaluation will be required to find the best solution here.
Epic: [Migrate archived builds metadata out of primary database](https://gitlab.com/groups/gitlab-org/-/epics/7216).
### Archive CI/CD data into partitioned database schema
After we move CI/CD metadata to a different store, the problem of having
billions of rows describing pipelines, builds and artifacts, remains. We still
need to keep reference to the metadata we store in object storage and we still
do need to be able to retrieve this information reliably in bulk (or search
through it).
It means that by moving data to object storage we might not be able to reduce
the number of rows in CI/CD tables. Moving data to object storage should help
with reducing the data size, but not the quantity of entries describing this
data. Because of this limitation, we still want to partition CI/CD data to
reduce the impact on the database (indices size, auto-vacuum time and
frequency).
Our intent here is not to move this data out of our primary database elsewhere.
What want to divide very large database tables, that store CI/CD data, into
multiple smaller ones, using PostgreSQL partitioning features.
There are a few approaches we can take to partition CI/CD data. A promising one
is using list-based partitioning where a partition number is assigned a
pipeline, and gets propagated to all resources that are related to this
pipeline. We assign the partition number based on when the pipeline was created
or when we observed the last processing activity in it. This is very flexible
because we can extend this partitioning strategy at will; for example with this
strategy we can assign an arbitrary partition number based on multiple
partitioning keys, combining time-decay-based partitioning with tenant-based
partitioning on the application level.
Partitioning rarely accessed data should also follow the policy defined for
builds archival, to make it consistent and reliable.
Epic: [Archive CI/CD data into partitioned database schema](https://gitlab.com/groups/gitlab-org/-/epics/5417).
### Partition builds queuing tables
While working on the [CI/CD Scale](../ci_scale/index.md) blueprint, we have
introduced a [new architecture for queuing CI/CD builds](https://gitlab.com/groups/gitlab-org/-/epics/5909#note_680407908)
for execution.
This allowed us to significantly improve performance. We still consider the new
solution as an intermediate mechanism, needed before we start working on the
next iteration. The following iteration that should improve the architecture of
builds queuing even more (it might require moving off the PostgreSQL fully or
partially).
In the meantime we want to ship another iteration, an intermediate step towards
more flexible and reliable solution. We want to partition the new queuing
tables, to reduce the impact on the database, to improve reliability and
database health.
Partitioning of CI/CD queuing tables does not need to follow the policy defined
for builds archival. Instead we should leverage a long-standing policy saying
that builds created more 24 hours ago need to be removed from the queue. This
business rule is present in the product since the inception of GitLab CI.
Epic: [Partition builds queuing tables](https://gitlab.com/gitlab-org/gitlab/-/issues/347027).
## Principles
All the three tracks we will use to implement CI/CD time decay pattern are
associated with some challenges. As we progress with the implementation we will
need to solve many problems and devise many implementation details to make this
successful.
Below, we documented a few foundational principles to make it easier for
everyone to understand the vision described in this architectural blueprint.
### Removing pipeline data
While it might be tempting to simply remove old or archived data from our
databases this should be avoided. It is usually not desired to permanently
remove user data unless consent is given to do so. We can, however, move data
to a different data store, like object storage.
Archived data can still be needed sometimes (for example for compliance or
auditing reasons). We want to be able to retrieve this data if needed, as long
as permanent removal has not been requested or approved by a user.
### Accessing pipeline data in the UI
Implementing CI/CD data time-decay through partitioning might be challenging
when we still want to make it possible for users to access data stored in many
partitions.
We want to retain simplicity of accessing pipeline data in the UI. It will
require some backstage changes in how we reference pipeline data from other
resources, but we don't want to make it more difficult for users to find their
pipelines in the UI.
We may need to add "Archived" tab on the pipelines / builds list pages, but we
should be able to avoid additional steps / clicks when someone wants to view
pipeline status or builds associated with a merge request or a deployment.
We also may need to disable search in the "Archived" tab on pipelines / builds
list pages.
### Accessing pipeline data through the API
We accept the possible necessity of building a separate API endpoint /
endpoints needed to access pipeline data through the API.
In the new API users might need to provide a time range in which the data has
been created to search through their pipelines / builds. In order to make it
efficient it might be necessary to restrict access to querying data residing in
more than two partitions at once. We can do that by supporting time ranges
spanning the duration that equals to the builds archival policy.
It is possible to still allow users to use the old API to access archived
pipelines data, although a user provided partition identifier may be required.
## Iterations
All three tracks can be worked on in parallel:
1. [Migrate archived build metadata to object storage](https://gitlab.com/groups/gitlab-org/-/epics/7216).
1. [Partition CI/CD data that have been archived](https://gitlab.com/groups/gitlab-org/-/epics/5417).
1. [Partition CI/CD queuing tables using list partitioning](https://gitlab.com/gitlab-org/gitlab/-/issues/347027)
## Status
In progress.
## Who
Proposal:
<!-- vale gitlab.Spelling = NO -->
| Role | Who
|------------------------------|-------------------------|
| Author | Grzegorz Bizon |
| Engineering Leader | Cheryl Li |
| Product Manager | Jackie Porter |
| Architecture Evolution Coach | Kamil Trzciński |
DRIs:
| Role | Who
|------------------------------|------------------------|
| Leadership | Cheryl Li |
| Product | Jackie Porter |
| Engineering | Grzegorz Bizon |
Domain experts:
| Area | Who
|------------------------------|------------------------|
| Verify / Pipeline execution | Fabio Pitino |
| Verify / Pipeline execution | Marius Bobin |
| PostgreSQL Database | Andreas Brandl |
<!-- vale gitlab.Spelling = YES -->

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -774,10 +774,8 @@ fetch = +refs/environments/*:refs/remotes/origin/environments/*
### Archive Old Deployments
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628) in GitLab 14.5.
> - [Enabled on GitLab.com and self-managed](https://gitlab.com/gitlab-org/gitlab/-/issues/337507) in GitLab 14.6.
FLAG:
On self-managed GitLab, by default this feature is available. To hide the feature per project or for your entire instance, ask an administrator to [disable the feature flag](../../administration/feature_flags.md) named `deployments_archive`. On GitLab.com, this feature is available.
> - [Enabled on GitLab.com and self-managed](https://gitlab.com/gitlab-org/gitlab/-/issues/345027) in GitLab 14.6.
> - [Generally available](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628) in GitLab 14.0. [Feature flag `deployments_archive`](https://gitlab.com/gitlab-org/gitlab/-/issues/345027) removed.
When a new deployment happens in your project,
GitLab creates [a special Git-ref to the deployment](#check-out-deployments-locally).

View File

@ -83,23 +83,11 @@ replacing the class name and arguments with whatever values are necessary for
your migration:
```ruby
migrate_async('BackgroundMigrationClassName', [arg1, arg2, ...])
migrate_in('BackgroundMigrationClassName', [arg1, arg2, ...])
```
Usually it's better to enqueue jobs in bulk, for this you can use
`bulk_migrate_async`:
```ruby
bulk_migrate_async(
[['BackgroundMigrationClassName', [1]],
['BackgroundMigrationClassName', [2]]]
)
```
Note that this will queue a Sidekiq job immediately: if you have a large number
of records, this may not be what you want. You can use the function
`queue_background_migration_jobs_by_range_at_intervals` to split the job into
batches:
You can use the function `queue_background_migration_jobs_by_range_at_intervals`
to automatically split the job into batches:
```ruby
queue_background_migration_jobs_by_range_at_intervals(
@ -117,16 +105,6 @@ consuming migrations it's best to schedule a background job using an
updates. Removals in turn can be handled by simply defining foreign keys with
cascading deletes.
If you would like to schedule jobs in bulk with a delay, you can use
`BackgroundMigrationWorker.bulk_perform_in`:
```ruby
jobs = [['BackgroundMigrationClassName', [1]],
['BackgroundMigrationClassName', [2]]]
bulk_migrate_in(5.minutes, jobs)
```
### Rescheduling background migrations
If one of the background migrations contains a bug that is fixed in a patch
@ -197,53 +175,47 @@ the new format.
## Example
To explain all this, let's use the following example: the table `services` has a
To explain all this, let's use the following example: the table `integrations` has a
field called `properties` which is stored in JSON. For all rows you want to
extract the `url` key from this JSON object and store it in the `services.url`
column. There are millions of services and parsing JSON is slow, thus you can't
extract the `url` key from this JSON object and store it in the `integrations.url`
column. There are millions of integrations and parsing JSON is slow, thus you can't
do this in a regular migration.
To do this using a background migration we'll start with defining our migration
class:
```ruby
class Gitlab::BackgroundMigration::ExtractServicesUrl
class Service < ActiveRecord::Base
self.table_name = 'services'
class Gitlab::BackgroundMigration::ExtractIntegrationsUrl
class Integration < ActiveRecord::Base
self.table_name = 'integrations'
end
def perform(service_id)
# A row may be removed between scheduling and starting of a job, thus we
# need to make sure the data is still present before doing any work.
service = Service.select(:properties).find_by(id: service_id)
def perform(start_id, end_id)
Integration.where(id: start_id..end_id).each do |integration|
json = JSON.load(integration.properties)
return unless service
begin
json = JSON.load(service.properties)
integration.update(url: json['url']) if json['url']
rescue JSON::ParserError
# If the JSON is invalid we don't want to keep the job around forever,
# instead we'll just leave the "url" field to whatever the default value
# is.
return
next
end
service.update(url: json['url']) if json['url']
end
end
```
Next we'll need to adjust our code so we schedule the above migration for newly
created and updated services. We can do this using something along the lines of
created and updated integrations. We can do this using something along the lines of
the following:
```ruby
class Service < ActiveRecord::Base
after_commit :schedule_service_migration, on: :update
after_commit :schedule_service_migration, on: :create
class Integration < ActiveRecord::Base
after_commit :schedule_integration_migration, on: :update
after_commit :schedule_integration_migration, on: :create
def schedule_service_migration
BackgroundMigrationWorker.perform_async('ExtractServicesUrl', [id])
def schedule_integration_migration
BackgroundMigrationWorker.perform_async('ExtractIntegrationsUrl', [id, id])
end
end
```
@ -253,21 +225,20 @@ before the transaction completes as doing so can lead to race conditions where
the changes are not yet visible to the worker.
Next we'll need a post-deployment migration that schedules the migration for
existing data. Since we're dealing with a lot of rows we'll schedule jobs in
batches instead of doing this one by one:
existing data.
```ruby
class ScheduleExtractServicesUrl < Gitlab::Database::Migration[1.0]
class ScheduleExtractIntegrationsUrl < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
def up
define_batchable_model('services').select(:id).in_batches do |relation|
jobs = relation.pluck(:id).map do |id|
['ExtractServicesUrl', [id]]
end
MIGRATION = 'ExtractIntegrationsUrl'
DELAY_INTERVAL = 2.minutes
BackgroundMigrationWorker.bulk_perform_async(jobs)
end
def up
queue_background_migration_jobs_by_range_at_intervals(
define_batchable_model('integrations'),
MIGRATION,
DELAY_INTERVAL)
end
def down
@ -284,18 +255,18 @@ jobs and manually run on any un-migrated rows. Such a migration would look like
this:
```ruby
class ConsumeRemainingExtractServicesUrlJobs < Gitlab::Database::Migration[1.0]
class ConsumeRemainingExtractIntegrationsUrlJobs < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
def up
# This must be included
Gitlab::BackgroundMigration.steal('ExtractServicesUrl')
Gitlab::BackgroundMigration.steal('ExtractIntegrationsUrl')
# This should be included, but can be skipped - see below
define_batchable_model('services').where(url: nil).each_batch(of: 50) do |batch|
define_batchable_model('integrations').where(url: nil).each_batch(of: 50) do |batch|
range = batch.pluck('MIN(id)', 'MAX(id)').first
Gitlab::BackgroundMigration::ExtractServicesUrl.new.perform(*range)
Gitlab::BackgroundMigration::ExtractIntegrationsUrl.new.perform(*range)
end
end
@ -313,9 +284,9 @@ If the application does not depend on the data being 100% migrated (for
instance, the data is advisory, and not mission-critical), then this final step
can be skipped.
This migration will then process any jobs for the ExtractServicesUrl migration
This migration will then process any jobs for the ExtractIntegrationsUrl migration
and continue once all jobs have been processed. Once done you can safely remove
the `services.properties` column.
the `integrations.properties` column.
## Testing

View File

@ -12,9 +12,6 @@ info: To determine the technical writer assigned to the Stage/Group associated w
> - [Moved](https://gitlab.com/groups/gitlab-org/-/epics/6290) from GitLab Premium to GitLab Free in 14.5.
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/332227) in GitLab 14.0, the `resource_inclusions` and `resource_exclusions` attributes were removed and `reconcile_timeout`, `dry_run_strategy`, `prune`, `prune_timeout`, `prune_propagation_policy`, and `inventory_policy` attributes were added.
WARNING:
This feature might not be available to you. Check the **version history** note above for details.
The [GitLab Agent](index.md) supports hosting your configuration for
multiple agents in a single repository. These agents can be running
in the same cluster or in multiple clusters, and potentially with more than one agent per cluster.

View File

@ -43,27 +43,27 @@ module Gitlab
TRANSLATION_LEVELS = {
'bg' => 0,
'cs_CZ' => 0,
'da_DK' => 51,
'da_DK' => 49,
'de' => 15,
'en' => 100,
'eo' => 0,
'es' => 39,
'es' => 38,
'fil_PH' => 0,
'fr' => 12,
'fr' => 11,
'gl_ES' => 0,
'id_ID' => 0,
'it' => 2,
'ja' => 35,
'ko' => 11,
'nb_NO' => 33,
'ja' => 36,
'ko' => 12,
'nb_NO' => 32,
'nl_NL' => 0,
'pl_PL' => 5,
'pt_BR' => 49,
'ro_RO' => 23,
'ru' => 25,
'tr_TR' => 15,
'pt_BR' => 50,
'ro_RO' => 22,
'ru' => 26,
'tr_TR' => 14,
'uk' => 45,
'zh_CN' => 95,
'zh_CN' => 98,
'zh_HK' => 2,
'zh_TW' => 3
}.freeze

View File

@ -13,7 +13,7 @@ module Gitlab
def self.set_jid(import_state)
jid = generate_jid(import_state)
Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2)
Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION)
import_state.update_column(:jid, jid)
end

View File

@ -29,16 +29,15 @@ module Gitlab
# for most jobs.
DEFAULT_EXPIRATION = 30.minutes.to_i
DEFAULT_VALUE = 1
DEFAULT_VALUE_MESSAGE = 'Keys using the default value for SidekiqStatus detected'
# Starts tracking of the given job.
#
# jid - The Sidekiq job ID
# expire - The expiration time of the Redis key.
def self.set(jid, expire = DEFAULT_EXPIRATION, value: DEFAULT_VALUE)
def self.set(jid, expire = DEFAULT_EXPIRATION)
return unless expire
Sidekiq.redis do |redis|
redis.set(key_for(jid), value, ex: expire)
redis.set(key_for(jid), 1, ex: expire)
end
end
@ -94,17 +93,10 @@ module Gitlab
return [] if job_ids.empty?
keys = job_ids.map { |jid| key_for(jid) }
results = Sidekiq.redis { |redis| redis.mget(*keys) }
if Feature.enabled?(:log_implicit_sidekiq_status_calls, default_enabled: :yaml)
to_log = keys.zip(results).select do |_key, result|
result == DEFAULT_VALUE.to_s
end.map(&:first)
Sidekiq.logger.info(message: DEFAULT_VALUE_MESSAGE, keys: to_log) if to_log.any?
end
results.map { |result| !result.nil? }
Sidekiq
.redis { |redis| redis.mget(*keys) }
.map { |result| !result.nil? }
end
# Returns the JIDs that are completed

View File

@ -4,10 +4,14 @@ module Gitlab
module SidekiqStatus
class ClientMiddleware
def call(_, job, _, _)
status_expiration = job['status_expiration'] || Gitlab::SidekiqStatus::DEFAULT_EXPIRATION
value = job['status_expiration'] ? 2 : Gitlab::SidekiqStatus::DEFAULT_VALUE
status_expiration = job['status_expiration']
unless ::Feature.enabled?(:opt_in_sidekiq_status, default_enabled: :yaml)
status_expiration ||= Gitlab::SidekiqStatus::DEFAULT_EXPIRATION
end
Gitlab::SidekiqStatus.set(job['jid'], status_expiration)
Gitlab::SidekiqStatus.set(job['jid'], status_expiration, value: value)
yield
end
end

View File

@ -0,0 +1,55 @@
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Mutations::Clusters::AgentTokens::Revoke do
let_it_be(:token) { create(:cluster_agent_token) }
let_it_be(:user) { create(:user) }
let(:mutation) do
described_class.new(
object: double,
context: { current_user: user },
field: double
)
end
it { expect(described_class.graphql_name).to eq('ClusterAgentTokenRevoke') }
it { expect(described_class).to require_graphql_authorizations(:admin_cluster) }
describe '#resolve' do
let(:global_id) { token.to_global_id }
subject { mutation.resolve(id: global_id) }
context 'user does not have permission' do
it 'does not revoke the token' do
expect { subject }.to raise_error(Gitlab::Graphql::Errors::ResourceNotAvailable)
expect(token.reload).not_to be_revoked
end
end
context 'user has permission' do
before do
token.agent.project.add_maintainer(user)
end
it 'revokes the token' do
subject
expect(token.reload).to be_revoked
end
context 'supplied ID is invalid' do
let(:global_id) { token.id }
it 'raises a coercion error' do
expect { subject }.to raise_error(::GraphQL::CoercionError)
expect(token.reload).not_to be_revoked
end
end
end
end
end

View File

@ -8,7 +8,7 @@ RSpec.describe Gitlab::Import::SetAsyncJid do
it 'sets the JID in Redis' do
expect(Gitlab::SidekiqStatus)
.to receive(:set)
.with("async-import/project-import-state/#{project.id}", Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2)
.with("async-import/project-import-state/#{project.id}", Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION)
.and_call_original
described_class.set_jid(project.import_state)

View File

@ -1,24 +1,61 @@
# frozen_string_literal: true
require 'fast_spec_helper'
# This can use fast_spec_helper when the feature flag stubbing is removed.
require 'spec_helper'
RSpec.describe Gitlab::SidekiqStatus::ClientMiddleware do
RSpec.describe Gitlab::SidekiqStatus::ClientMiddleware, :clean_gitlab_redis_queues do
describe '#call' do
context 'when the job has status_expiration set' do
it 'tracks the job in Redis with a value of 2' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i, value: 2)
context 'when opt_in_sidekiq_status is disabled' do
before do
stub_feature_flags(opt_in_sidekiq_status: false)
end
described_class.new
.call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil }
context 'when the job has status_expiration set' do
it 'tracks the job in Redis' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i).and_call_original
described_class.new
.call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil }
expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1)
end
end
context 'when the job does not have status_expiration set' do
it 'tracks the job in Redis' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 30.minutes.to_i).and_call_original
described_class.new
.call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil }
expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1)
end
end
end
context 'when the job does not have status_expiration set' do
it 'tracks the job in Redis with a value of 1' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', Gitlab::SidekiqStatus::DEFAULT_EXPIRATION, value: 1)
context 'when opt_in_sidekiq_status is enabled' do
before do
stub_feature_flags(opt_in_sidekiq_status: true)
end
described_class.new
.call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil }
context 'when the job has status_expiration set' do
it 'tracks the job in Redis' do
expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i).and_call_original
described_class.new
.call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil }
expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1)
end
end
context 'when the job does not have status_expiration set' do
it 'does not track the job in Redis' do
described_class.new
.call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil }
expect(Gitlab::SidekiqStatus.num_running(['123'])).to be_zero
end
end
end
end

View File

@ -12,7 +12,7 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_
Sidekiq.redis do |redis|
expect(redis.exists(key)).to eq(true)
expect(redis.ttl(key) > 0).to eq(true)
expect(redis.get(key)).to eq(described_class::DEFAULT_VALUE.to_s)
expect(redis.get(key)).to eq('1')
end
end
@ -24,19 +24,17 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_
Sidekiq.redis do |redis|
expect(redis.exists(key)).to eq(true)
expect(redis.ttl(key) > described_class::DEFAULT_EXPIRATION).to eq(true)
expect(redis.get(key)).to eq(described_class::DEFAULT_VALUE.to_s)
expect(redis.get(key)).to eq('1')
end
end
it 'allows overriding the default value' do
described_class.set('123', value: 2)
it 'does not store anything with a nil expiry' do
described_class.set('123', nil)
key = described_class.key_for('123')
Sidekiq.redis do |redis|
expect(redis.exists(key)).to eq(true)
expect(redis.ttl(key) > 0).to eq(true)
expect(redis.get(key)).to eq('2')
expect(redis.exists(key)).to eq(false)
end
end
end
@ -138,33 +136,5 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_
it 'handles an empty array' do
expect(described_class.job_status([])).to eq([])
end
context 'when log_implicit_sidekiq_status_calls is enabled' do
it 'logs keys that contained the default value' do
described_class.set('123', value: 2)
described_class.set('456')
described_class.set('012')
expect(Sidekiq.logger).to receive(:info).with(message: described_class::DEFAULT_VALUE_MESSAGE,
keys: [described_class.key_for('456'), described_class.key_for('012')])
expect(described_class.job_status(%w(123 456 789 012))).to eq([true, true, false, true])
end
end
context 'when log_implicit_sidekiq_status_calls is disabled' do
before do
stub_feature_flags(log_implicit_sidekiq_status_calls: false)
end
it 'does not perform any logging' do
described_class.set('123', value: 2)
described_class.set('456')
expect(Sidekiq.logger).not_to receive(:info)
expect(described_class.job_status(%w(123 456 789))).to eq([true, true, false])
end
end
end
end

View File

@ -3278,9 +3278,10 @@ RSpec.describe API::MergeRequests do
context 'when skip_ci parameter is set' do
it 'enqueues a rebase of the merge request with skip_ci flag set' do
allow(RebaseWorker).to receive(:with_status).and_return(RebaseWorker)
with_status = RebaseWorker.with_status
expect(RebaseWorker).to receive(:perform_async).with(merge_request.id, user.id, true).and_call_original
expect(RebaseWorker).to receive(:with_status).and_return(with_status)
expect(with_status).to receive(:perform_async).with(merge_request.id, user.id, true).and_call_original
Sidekiq::Testing.fake! do
expect do

View File

@ -50,17 +50,6 @@ RSpec.describe Deployments::ArchiveInProjectService do
end
end
context 'when deployments_archive feature flag is disabled' do
before do
stub_feature_flags(deployments_archive: false)
end
it 'does not do anything' do
expect(subject[:status]).to eq(:error)
expect(subject[:message]).to eq('Feature flag is not enabled')
end
end
def deployment_refs_exist?
deployment_refs.map { |path| project.repository.ref_exists?(path) }
end