diff --git a/app/controllers/projects/analytics/cycle_analytics/stages_controller.rb b/app/controllers/projects/analytics/cycle_analytics/stages_controller.rb index 2f9d70fede1..7b38c069a60 100644 --- a/app/controllers/projects/analytics/cycle_analytics/stages_controller.rb +++ b/app/controllers/projects/analytics/cycle_analytics/stages_controller.rb @@ -11,6 +11,8 @@ class Projects::Analytics::CycleAnalytics::StagesController < Projects::Applicat before_action :authorize_read_cycle_analytics! before_action :only_default_value_stream_is_allowed! + urgency :low + private override :parent diff --git a/app/controllers/projects/analytics/cycle_analytics/summary_controller.rb b/app/controllers/projects/analytics/cycle_analytics/summary_controller.rb index bf8742bf6e8..69327feeb02 100644 --- a/app/controllers/projects/analytics/cycle_analytics/summary_controller.rb +++ b/app/controllers/projects/analytics/cycle_analytics/summary_controller.rb @@ -9,6 +9,8 @@ class Projects::Analytics::CycleAnalytics::SummaryController < Projects::Applica before_action :authorize_read_cycle_analytics! + urgency :low + def show render json: project_level.summary end diff --git a/app/graphql/mutations/clusters/agent_tokens/revoke.rb b/app/graphql/mutations/clusters/agent_tokens/revoke.rb new file mode 100644 index 00000000000..ca570792296 --- /dev/null +++ b/app/graphql/mutations/clusters/agent_tokens/revoke.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Mutations + module Clusters + module AgentTokens + class Revoke < BaseMutation + graphql_name 'ClusterAgentTokenRevoke' + + authorize :admin_cluster + + TokenID = ::Types::GlobalIDType[::Clusters::AgentToken] + + argument :id, TokenID, + required: true, + description: 'Global ID of the agent token that will be revoked.' + + def resolve(id:) + token = authorized_find!(id: id) + token.update(status: token.class.statuses[:revoked]) + + { errors: errors_on_object(token) } + end + + private + + def find_object(id:) + # TODO: remove this line when the compatibility layer is removed + # See: https://gitlab.com/gitlab-org/gitlab/-/issues/257883 + id = TokenID.coerce_isolated_input(id) + GitlabSchema.find_by_gid(id) + end + end + end + end +end diff --git a/app/graphql/types/mutation_type.rb b/app/graphql/types/mutation_type.rb index 9bba0e1bb37..c9f083efd27 100644 --- a/app/graphql/types/mutation_type.rb +++ b/app/graphql/types/mutation_type.rb @@ -36,6 +36,7 @@ module Types mount_mutation Mutations::Clusters::Agents::Delete mount_mutation Mutations::Clusters::AgentTokens::Create mount_mutation Mutations::Clusters::AgentTokens::Delete + mount_mutation Mutations::Clusters::AgentTokens::Revoke mount_mutation Mutations::Commits::Create, calls_gitaly: true mount_mutation Mutations::CustomEmoji::Create, feature_flag: :custom_emoji mount_mutation Mutations::CustomEmoji::Destroy, feature_flag: :custom_emoji diff --git a/app/models/concerns/import_state/sidekiq_job_tracker.rb b/app/models/concerns/import_state/sidekiq_job_tracker.rb index 340bf4279bc..b7d0ed0f51b 100644 --- a/app/models/concerns/import_state/sidekiq_job_tracker.rb +++ b/app/models/concerns/import_state/sidekiq_job_tracker.rb @@ -15,7 +15,7 @@ module ImportState def refresh_jid_expiration return unless jid - Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2) + Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION) end def self.jid_by(project_id:, status:) diff --git a/app/models/deployment.rb b/app/models/deployment.rb index 4c60ce57f49..943abdc7c1c 100644 --- a/app/models/deployment.rb +++ b/app/models/deployment.rb @@ -107,10 +107,7 @@ class Deployment < ApplicationRecord deployment.run_after_commit do Deployments::UpdateEnvironmentWorker.perform_async(id) Deployments::LinkMergeRequestWorker.perform_async(id) - - if ::Feature.enabled?(:deployments_archive, deployment.project, default_enabled: :yaml) - Deployments::ArchiveInProjectWorker.perform_async(deployment.project_id) - end + Deployments::ArchiveInProjectWorker.perform_async(deployment.project_id) end end diff --git a/app/services/deployments/archive_in_project_service.rb b/app/services/deployments/archive_in_project_service.rb index a593721f390..d80ed637cd8 100644 --- a/app/services/deployments/archive_in_project_service.rb +++ b/app/services/deployments/archive_in_project_service.rb @@ -7,10 +7,6 @@ module Deployments BATCH_SIZE = 100 def execute - unless ::Feature.enabled?(:deployments_archive, project, default_enabled: :yaml) - return error('Feature flag is not enabled') - end - deployments = Deployment.archivables_in(project, limit: BATCH_SIZE) return success(result: :empty) if deployments.empty? diff --git a/app/views/sherlock/transactions/show.html.haml b/app/views/sherlock/transactions/show.html.haml index 162b14f01e1..71b0df5f29e 100644 --- a/app/views/sherlock/transactions/show.html.haml +++ b/app/views/sherlock/transactions/show.html.haml @@ -8,13 +8,11 @@ %li %a{ href: "#tab-queries", data: { toggle: "tab" } } = t('sherlock.queries') - %span.badge.badge-pill - #{@transaction.queries.length} + = gl_badge_tag @transaction.queries.length.to_s %li %a{ href: "#tab-file-samples", data: { toggle: "tab" } } = t('sherlock.file_samples') - %span.badge.badge-pill - #{@transaction.file_samples.length} + = gl_badge_tag @transaction.file_samples.length.to_s .row-content-block .float-right diff --git a/config/feature_flags/development/deployments_archive.yml b/config/feature_flags/development/deployments_archive.yml deleted file mode 100644 index 8129d6d4af8..00000000000 --- a/config/feature_flags/development/deployments_archive.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: deployments_archive -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628 -rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345027 -milestone: '14.5' -type: development -group: group::release -default_enabled: true diff --git a/config/feature_flags/development/log_implicit_sidekiq_status_calls.yml b/config/feature_flags/development/opt_in_sidekiq_status.yml similarity index 73% rename from config/feature_flags/development/log_implicit_sidekiq_status_calls.yml rename to config/feature_flags/development/opt_in_sidekiq_status.yml index 1aeb768b3dd..433c04e7822 100644 --- a/config/feature_flags/development/log_implicit_sidekiq_status_calls.yml +++ b/config/feature_flags/development/opt_in_sidekiq_status.yml @@ -1,8 +1,8 @@ --- -name: log_implicit_sidekiq_status_calls -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/74815 +name: opt_in_sidekiq_status +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/77349 rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/343964 -milestone: '14.6' +milestone: '14.7' type: development group: group::scalability default_enabled: false diff --git a/doc/administration/docs_self_host.md b/doc/administration/docs_self_host.md index 4e45fe3a1c8..007055b5de7 100644 --- a/doc/administration/docs_self_host.md +++ b/doc/administration/docs_self_host.md @@ -6,18 +6,37 @@ info: To determine the technical writer assigned to the Stage/Group associated w # How to self-host the docs site **(FREE SELF)** -The following guide describes how to use a local instance of the docs site with -a self-managed GitLab instance. +If you have a self-managed instance of GitLab, you may not be able to access the +product documentation as hosted on `docs.gitlab.com` from your GitLab instance. -## Run the docs site +Be aware of the following items if you self-host the product documentation: -The easiest way to run the docs site locally it to pick up one of the existing -Docker images that contain the HTML files. +- You must host the product documentation site under a subdirectory that matches + your installed GitLab version (for example, `14.5/`). The + [Docker images](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635) + hosted by the GitLab Docs team provide this by default. We use a + [script](https://gitlab.com/gitlab-org/gitlab-docs/-/blob/2995d1378175803b22fb8806ba77adf63e79f32c/scripts/normalize-links.sh#L28-82) + to normalize the links and prefix them with the respective version. +- The version dropdown will display additional versions that don't exist, selecting + those versions will display a 404 Not Found page. +- Results when using the search box will display results from `docs.gitlab.com` + and not the local documentation. +- When you use the Docker images to serve the product documentation site, by + default the landing page redirects to the respective version (for example, `/14.5/`), + which causes the landing page to not be displayed. -Pick the version that matches your GitLab version and run it, in the following -examples 14.5. +## Documentation self-hosting options -### Host the docs site using Docker +You can self-host the GitLab product documentation locally using one of these +methods: + +- Docker +- GitLab Pages +- From your own webserver + +The examples on this page are based on GitLab 14.5. + +### Self-host the product documentation with Docker The Docker images use a built-in webserver listening on port `4000`, so you need to expose that. @@ -42,9 +61,9 @@ services: - '4000:4000' ``` -### Host the docs site using GitLab Pages +### Self-host the product documentation with GitLab Pages -You can also host the docs site with GitLab Pages. +You use GitLab Pages to host the GitLab product documentation locally. Prerequisite: @@ -53,11 +72,11 @@ Prerequisite: main domain or subdomain. For example, URLs like `https://example.com/docs/` are not supported. -To host the docs site with GitLab Pages: +To host the product documentation site with GitLab Pages: 1. [Create a new blank project](../user/project/working_with_projects.md#create-a-blank-project). -1. Create a new or edit your existing `.gitlab-ci.yml` file and add the following - `pages` job. Make sure the version is the same as your GitLab installation: +1. Create a new or edit your existing `.gitlab-ci.yml` file, and add the following + `pages` job, while ensuring the version is the same as your GitLab installation: ```yaml image: registry.gitlab.com/gitlab-org/gitlab-docs:14.5 @@ -70,20 +89,22 @@ To host the docs site with GitLab Pages: - public ``` -1. (Optional) Set the Pages domain name. Depending on the type of the Pages website, - you have two options: +1. Optional. Set the GitLab Pages domain name. Depending on the type of the + GitLab Pages website, you have two options: - | Type of website | [Default domain](../user/project/pages/getting_started_part_one.md#gitlab-pages-default-domain-names) | [Custom domain](../user/project/pages/custom_domains_ssl_tls_certification/index.md) | - | --------------- | -------------- | ------------- | + | Type of website | [Default domain](../user/project/pages/getting_started_part_one.md#gitlab-pages-default-domain-names) | [Custom domain](../user/project/pages/custom_domains_ssl_tls_certification/index.md) | + |-------------------------|----------------|---------------| | [Project website](../user/project/pages/getting_started_part_one.md#project-website-examples) | Not supported | Supported | | [User or group website](../user/project/pages/getting_started_part_one.md#user-and-group-website-examples) | Supported | Supported | -### Host the docs site on your own webserver +### Self-host the product documentation on your own webserver -Since the docs site is static, you can grab the directory from the container -(under `/usr/share/nginx/html`) and use your own web server to host -it wherever you want. Replace `` with the directory where the -docs will be copied to: +Because the product documentation site is static, you can grab the directory from +the container (in `/usr/share/nginx/html`) and use your own web server to host +it wherever you want. + +Use the following commands, and replace `` with the directory where the +documentation files will be copied to: ```shell docker create -it --name gitlab-docs registry.gitlab.com/gitlab-org/gitlab-docs:14.5 @@ -93,32 +114,18 @@ docker rm -f gitlab-docs ## Redirect the `/help` links to the new docs page -When the docs site is up and running: +After your local product documentation site is running, [redirect the help +links](../user/admin_area/settings/help_page.md#redirect-help-pages) in the GitLab +application to your local site. -1. [Enable the help page redirects](../user/admin_area/settings/help_page.md#redirect-help-pages). - Use the Fully Qualified Domain Name as the docs URL. For example, if you - used the [Docker method](#host-the-docs-site-using-docker) , enter `http://0.0.0.0:4000`. - You don't need to append the version, it is detected automatically. -1. Test that everything works by selecting the **Learn more** link on the page - you're on. Your GitLab version is automatically detected and appended to the docs URL - you set in the admin area. In this example, if your GitLab version is 14.5, - `https:///` becomes `http://0.0.0.0:4000/14.5/`. - The link inside GitLab link shows as - `/help/user/admin_area/settings/help_page#destination-requirements`, - but when you select it, you are redirected to - `http://0.0.0.0:4000/14.5/ee/user/admin_area/settings/help_page/#destination-requirements`. +Be sure to use the fully qualified domain name as the docs URL. For example, if you +used the [Docker method](#self-host-the-product-documentation-with-docker), enter `http://0.0.0.0:4000`. -## Caveats +You don't need to append the version, as GitLab will detect it and append it to +any documentation URL requests, as needed. For example, if your GitLab version is +14.5, the GitLab Docs URL becomes `http://0.0.0.0:4000/14.5/`. The link +inside GitLab displays as `/help/user/admin_area/settings/help_page#destination-requirements`, +but when you select it, you are redirected to +`http://0.0.0.0:4000/14.5/ee/user/admin_area/settings/help_page/#destination-requirements`. -- You need to host the docs site under a subdirectory matching your GitLab version, - in the example of this guide `14.5/`. The - [Docker images](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635) - hosted by the Docs team provide this by default. We use a - [script](https://gitlab.com/gitlab-org/gitlab-docs/-/blob/2995d1378175803b22fb8806ba77adf63e79f32c/scripts/normalize-links.sh#L28-82) - to normalize the links and prefix them with the respective version. -- The version dropdown will show more versions which do not exist and will lead - to 404 if selected. -- The search results point to `docs.gitlab.com` and not the local docs. -- When you use the Docker images to serve the docs site, the landing page redirects - by default to the respective version, for example `/14.5/`, so you don't - see the landing page as seen at . +To test the setting, select a **Learn more** link within the GitLab application. diff --git a/doc/api/graphql/reference/index.md b/doc/api/graphql/reference/index.md index 1b1f9b31207..b4b0f28e132 100644 --- a/doc/api/graphql/reference/index.md +++ b/doc/api/graphql/reference/index.md @@ -970,6 +970,24 @@ Input type: `ClusterAgentTokenDeleteInput` | `clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. | | `errors` | [`[String!]!`](#string) | Errors encountered during execution of the mutation. | +### `Mutation.clusterAgentTokenRevoke` + +Input type: `ClusterAgentTokenRevokeInput` + +#### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. | +| `id` | [`ClustersAgentTokenID!`](#clustersagenttokenid) | Global ID of the agent token that will be revoked. | + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. | +| `errors` | [`[String!]!`](#string) | Errors encountered during execution of the mutation. | + ### `Mutation.commitCreate` Input type: `CommitCreateInput` diff --git a/doc/architecture/blueprints/ci_data_decay/index.md b/doc/architecture/blueprints/ci_data_decay/index.md new file mode 100644 index 00000000000..f4b180f5ea9 --- /dev/null +++ b/doc/architecture/blueprints/ci_data_decay/index.md @@ -0,0 +1,255 @@ +--- +stage: none +group: unassigned +comments: false +description: 'CI/CD data time decay' +--- + +# CI/CD data time decay + +## Summary + +GitLab CI/CD is one of the most data and compute intensive components of GitLab. +Since its [initial release in November 2012](https://about.gitlab.com/blog/2012/11/13/continuous-integration-server-from-gitlab/), +the CI/CD subsystem has evolved significantly. It was [integrated into GitLab in September 2015](https://about.gitlab.com/releases/2015/09/22/gitlab-8-0-released/) +and has become [one of the most beloved CI/CD solutions](https://about.gitlab.com/blog/2017/09/27/gitlab-leader-continuous-integration-forrester-wave/). + +On February 1st, 2021, GitLab.com surpassed 1 billion CI/CD builds, and the number of +builds [continues to grow exponentially](../ci_scale/index.md). + +GitLab CI/CD has come a long way since the initial release, but the design of +the data storage for pipeline builds remains almost the same since 2012. In +2021 we started working on database decomposition and extracting CI/CD data to +ia separate database. Now we want to improve the architecture of GitLab CI/CD +product to enable further scaling. + +*Disclaimer: The following contain information related to upcoming products, +features, and functionality. + +It is important to note that the information presented is for informational +purposes only. Please do not rely on this information for purchasing or +planning purposes. + +As with all projects, the items mentioned in this document and linked pages are +subject to change or delay. The development, release and timing of any +products, features, or functionality remain at the sole discretion of GitLab +Inc.* + +## Goals + +**Implement a new architecture of CI/CD data storage to enable scaling.** + +## Challenges + +There are more than two billion rows describing CI/CD builds in GitLab.com's +database. This data represents a sizable portion of the whole data stored in +PostgreSQL database running on GitLab.com. + +This volume contributes to significant performance problems, development +challenges and is often related to production incidents. + +We also expect a [significant growth in the number of builds executed on +GitLab.com](../ci_scale/index.md) in the upcoming years. + +## Opportunity + +CI/CD data is subject to +[time-decay](https://about.gitlab.com/company/team/structure/working-groups/database-scalability/time-decay.html) +because, usually, pipelines that are a few months old are not frequently +accessed or are even not relevant anymore. Restricting access to processing +pipelines that are older than a few months might help us to move this data out +of the primary database, to a different storage, that is more performant and +cost effective. + +It is already possible to prevent processing builds [that have been +archived](../../../user/admin_area/settings/continuous_integration.md#archive-jobs). +When a build gets archived it will not be possible to retry it, but we still do +keep all the processing metadata in the database, and it consumes resources +that are scarce in the primary database. + +In order to improve performance and make it easier to scale CI/CD data storage +we might want to follow these three tracks described below. + +![pipeline data time decay](pipeline_data_time_decay.png) + + + +1. Partition builds queuing tables +2. Archive CI/CD data into partitioned database schema +3. Migrate archived builds metadata out of primary database + + + +### Migrate archived builds metadata out of primary database + +Once a build (or a pipeline) gets archived, it is no longer possible to resume +pipeline processing in such pipeline. It means that all the metadata, we store +in PostgreSQL, that is needed to efficiently and reliably process builds can be +safely moved to a different data store. + +Currently, storing pipeline processing data is expensive as this kind of CI/CD +data represents a significant portion of data stored in CI/CD tables. Once we +restrict access to processing archived pipelines, we can move this metadata to +a different place - preferably object storage - and make it accessible on +demand, when it is really needed again (for example for compliance or auditing purposes). + +We need to evaluate whether moving data is the most optimal solution. We might +be able to use de-duplication of metadata entries and other normalization +strategies to consume less storage while retaining ability to query this +dataset. Technical evaluation will be required to find the best solution here. + +Epic: [Migrate archived builds metadata out of primary database](https://gitlab.com/groups/gitlab-org/-/epics/7216). + +### Archive CI/CD data into partitioned database schema + +After we move CI/CD metadata to a different store, the problem of having +billions of rows describing pipelines, builds and artifacts, remains. We still +need to keep reference to the metadata we store in object storage and we still +do need to be able to retrieve this information reliably in bulk (or search +through it). + +It means that by moving data to object storage we might not be able to reduce +the number of rows in CI/CD tables. Moving data to object storage should help +with reducing the data size, but not the quantity of entries describing this +data. Because of this limitation, we still want to partition CI/CD data to +reduce the impact on the database (indices size, auto-vacuum time and +frequency). + +Our intent here is not to move this data out of our primary database elsewhere. +What want to divide very large database tables, that store CI/CD data, into +multiple smaller ones, using PostgreSQL partitioning features. + +There are a few approaches we can take to partition CI/CD data. A promising one +is using list-based partitioning where a partition number is assigned a +pipeline, and gets propagated to all resources that are related to this +pipeline. We assign the partition number based on when the pipeline was created +or when we observed the last processing activity in it. This is very flexible +because we can extend this partitioning strategy at will; for example with this +strategy we can assign an arbitrary partition number based on multiple +partitioning keys, combining time-decay-based partitioning with tenant-based +partitioning on the application level. + +Partitioning rarely accessed data should also follow the policy defined for +builds archival, to make it consistent and reliable. + +Epic: [Archive CI/CD data into partitioned database schema](https://gitlab.com/groups/gitlab-org/-/epics/5417). + +### Partition builds queuing tables + +While working on the [CI/CD Scale](../ci_scale/index.md) blueprint, we have +introduced a [new architecture for queuing CI/CD builds](https://gitlab.com/groups/gitlab-org/-/epics/5909#note_680407908) +for execution. + +This allowed us to significantly improve performance. We still consider the new +solution as an intermediate mechanism, needed before we start working on the +next iteration. The following iteration that should improve the architecture of +builds queuing even more (it might require moving off the PostgreSQL fully or +partially). + +In the meantime we want to ship another iteration, an intermediate step towards +more flexible and reliable solution. We want to partition the new queuing +tables, to reduce the impact on the database, to improve reliability and +database health. + +Partitioning of CI/CD queuing tables does not need to follow the policy defined +for builds archival. Instead we should leverage a long-standing policy saying +that builds created more 24 hours ago need to be removed from the queue. This +business rule is present in the product since the inception of GitLab CI. + +Epic: [Partition builds queuing tables](https://gitlab.com/gitlab-org/gitlab/-/issues/347027). + +## Principles + +All the three tracks we will use to implement CI/CD time decay pattern are +associated with some challenges. As we progress with the implementation we will +need to solve many problems and devise many implementation details to make this +successful. + +Below, we documented a few foundational principles to make it easier for +everyone to understand the vision described in this architectural blueprint. + +### Removing pipeline data + +While it might be tempting to simply remove old or archived data from our +databases this should be avoided. It is usually not desired to permanently +remove user data unless consent is given to do so. We can, however, move data +to a different data store, like object storage. + +Archived data can still be needed sometimes (for example for compliance or +auditing reasons). We want to be able to retrieve this data if needed, as long +as permanent removal has not been requested or approved by a user. + +### Accessing pipeline data in the UI + +Implementing CI/CD data time-decay through partitioning might be challenging +when we still want to make it possible for users to access data stored in many +partitions. + +We want to retain simplicity of accessing pipeline data in the UI. It will +require some backstage changes in how we reference pipeline data from other +resources, but we don't want to make it more difficult for users to find their +pipelines in the UI. + +We may need to add "Archived" tab on the pipelines / builds list pages, but we +should be able to avoid additional steps / clicks when someone wants to view +pipeline status or builds associated with a merge request or a deployment. + +We also may need to disable search in the "Archived" tab on pipelines / builds +list pages. + +### Accessing pipeline data through the API + +We accept the possible necessity of building a separate API endpoint / +endpoints needed to access pipeline data through the API. + +In the new API users might need to provide a time range in which the data has +been created to search through their pipelines / builds. In order to make it +efficient it might be necessary to restrict access to querying data residing in +more than two partitions at once. We can do that by supporting time ranges +spanning the duration that equals to the builds archival policy. + +It is possible to still allow users to use the old API to access archived +pipelines data, although a user provided partition identifier may be required. + +## Iterations + +All three tracks can be worked on in parallel: + +1. [Migrate archived build metadata to object storage](https://gitlab.com/groups/gitlab-org/-/epics/7216). +1. [Partition CI/CD data that have been archived](https://gitlab.com/groups/gitlab-org/-/epics/5417). +1. [Partition CI/CD queuing tables using list partitioning](https://gitlab.com/gitlab-org/gitlab/-/issues/347027) + +## Status + +In progress. + +## Who + +Proposal: + + + +| Role | Who +|------------------------------|-------------------------| +| Author | Grzegorz Bizon | +| Engineering Leader | Cheryl Li | +| Product Manager | Jackie Porter | +| Architecture Evolution Coach | Kamil TrzciƄski | + +DRIs: + +| Role | Who +|------------------------------|------------------------| +| Leadership | Cheryl Li | +| Product | Jackie Porter | +| Engineering | Grzegorz Bizon | + +Domain experts: + +| Area | Who +|------------------------------|------------------------| +| Verify / Pipeline execution | Fabio Pitino | +| Verify / Pipeline execution | Marius Bobin | +| PostgreSQL Database | Andreas Brandl | + + diff --git a/doc/architecture/blueprints/ci_data_decay/pipeline_data_time_decay.png b/doc/architecture/blueprints/ci_data_decay/pipeline_data_time_decay.png new file mode 100644 index 00000000000..a95be6f1eb2 Binary files /dev/null and b/doc/architecture/blueprints/ci_data_decay/pipeline_data_time_decay.png differ diff --git a/doc/ci/environments/index.md b/doc/ci/environments/index.md index 561507cab97..abc8f2d2759 100644 --- a/doc/ci/environments/index.md +++ b/doc/ci/environments/index.md @@ -774,10 +774,8 @@ fetch = +refs/environments/*:refs/remotes/origin/environments/* ### Archive Old Deployments > - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628) in GitLab 14.5. -> - [Enabled on GitLab.com and self-managed](https://gitlab.com/gitlab-org/gitlab/-/issues/337507) in GitLab 14.6. - -FLAG: -On self-managed GitLab, by default this feature is available. To hide the feature per project or for your entire instance, ask an administrator to [disable the feature flag](../../administration/feature_flags.md) named `deployments_archive`. On GitLab.com, this feature is available. +> - [Enabled on GitLab.com and self-managed](https://gitlab.com/gitlab-org/gitlab/-/issues/345027) in GitLab 14.6. +> - [Generally available](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/73628) in GitLab 14.0. [Feature flag `deployments_archive`](https://gitlab.com/gitlab-org/gitlab/-/issues/345027) removed. When a new deployment happens in your project, GitLab creates [a special Git-ref to the deployment](#check-out-deployments-locally). diff --git a/doc/development/background_migrations.md b/doc/development/background_migrations.md index 4a18b2123da..49835085f96 100644 --- a/doc/development/background_migrations.md +++ b/doc/development/background_migrations.md @@ -83,23 +83,11 @@ replacing the class name and arguments with whatever values are necessary for your migration: ```ruby -migrate_async('BackgroundMigrationClassName', [arg1, arg2, ...]) +migrate_in('BackgroundMigrationClassName', [arg1, arg2, ...]) ``` -Usually it's better to enqueue jobs in bulk, for this you can use -`bulk_migrate_async`: - -```ruby -bulk_migrate_async( - [['BackgroundMigrationClassName', [1]], - ['BackgroundMigrationClassName', [2]]] -) -``` - -Note that this will queue a Sidekiq job immediately: if you have a large number -of records, this may not be what you want. You can use the function -`queue_background_migration_jobs_by_range_at_intervals` to split the job into -batches: +You can use the function `queue_background_migration_jobs_by_range_at_intervals` +to automatically split the job into batches: ```ruby queue_background_migration_jobs_by_range_at_intervals( @@ -117,16 +105,6 @@ consuming migrations it's best to schedule a background job using an updates. Removals in turn can be handled by simply defining foreign keys with cascading deletes. -If you would like to schedule jobs in bulk with a delay, you can use -`BackgroundMigrationWorker.bulk_perform_in`: - -```ruby -jobs = [['BackgroundMigrationClassName', [1]], - ['BackgroundMigrationClassName', [2]]] - -bulk_migrate_in(5.minutes, jobs) -``` - ### Rescheduling background migrations If one of the background migrations contains a bug that is fixed in a patch @@ -197,53 +175,47 @@ the new format. ## Example -To explain all this, let's use the following example: the table `services` has a +To explain all this, let's use the following example: the table `integrations` has a field called `properties` which is stored in JSON. For all rows you want to -extract the `url` key from this JSON object and store it in the `services.url` -column. There are millions of services and parsing JSON is slow, thus you can't +extract the `url` key from this JSON object and store it in the `integrations.url` +column. There are millions of integrations and parsing JSON is slow, thus you can't do this in a regular migration. To do this using a background migration we'll start with defining our migration class: ```ruby -class Gitlab::BackgroundMigration::ExtractServicesUrl - class Service < ActiveRecord::Base - self.table_name = 'services' +class Gitlab::BackgroundMigration::ExtractIntegrationsUrl + class Integration < ActiveRecord::Base + self.table_name = 'integrations' end - def perform(service_id) - # A row may be removed between scheduling and starting of a job, thus we - # need to make sure the data is still present before doing any work. - service = Service.select(:properties).find_by(id: service_id) + def perform(start_id, end_id) + Integration.where(id: start_id..end_id).each do |integration| + json = JSON.load(integration.properties) - return unless service - - begin - json = JSON.load(service.properties) + integration.update(url: json['url']) if json['url'] rescue JSON::ParserError # If the JSON is invalid we don't want to keep the job around forever, # instead we'll just leave the "url" field to whatever the default value # is. - return + next end - - service.update(url: json['url']) if json['url'] end end ``` Next we'll need to adjust our code so we schedule the above migration for newly -created and updated services. We can do this using something along the lines of +created and updated integrations. We can do this using something along the lines of the following: ```ruby -class Service < ActiveRecord::Base - after_commit :schedule_service_migration, on: :update - after_commit :schedule_service_migration, on: :create +class Integration < ActiveRecord::Base + after_commit :schedule_integration_migration, on: :update + after_commit :schedule_integration_migration, on: :create - def schedule_service_migration - BackgroundMigrationWorker.perform_async('ExtractServicesUrl', [id]) + def schedule_integration_migration + BackgroundMigrationWorker.perform_async('ExtractIntegrationsUrl', [id, id]) end end ``` @@ -253,21 +225,20 @@ before the transaction completes as doing so can lead to race conditions where the changes are not yet visible to the worker. Next we'll need a post-deployment migration that schedules the migration for -existing data. Since we're dealing with a lot of rows we'll schedule jobs in -batches instead of doing this one by one: +existing data. ```ruby -class ScheduleExtractServicesUrl < Gitlab::Database::Migration[1.0] +class ScheduleExtractIntegrationsUrl < Gitlab::Database::Migration[1.0] disable_ddl_transaction! - def up - define_batchable_model('services').select(:id).in_batches do |relation| - jobs = relation.pluck(:id).map do |id| - ['ExtractServicesUrl', [id]] - end + MIGRATION = 'ExtractIntegrationsUrl' + DELAY_INTERVAL = 2.minutes - BackgroundMigrationWorker.bulk_perform_async(jobs) - end + def up + queue_background_migration_jobs_by_range_at_intervals( + define_batchable_model('integrations'), + MIGRATION, + DELAY_INTERVAL) end def down @@ -284,18 +255,18 @@ jobs and manually run on any un-migrated rows. Such a migration would look like this: ```ruby -class ConsumeRemainingExtractServicesUrlJobs < Gitlab::Database::Migration[1.0] +class ConsumeRemainingExtractIntegrationsUrlJobs < Gitlab::Database::Migration[1.0] disable_ddl_transaction! def up # This must be included - Gitlab::BackgroundMigration.steal('ExtractServicesUrl') + Gitlab::BackgroundMigration.steal('ExtractIntegrationsUrl') # This should be included, but can be skipped - see below - define_batchable_model('services').where(url: nil).each_batch(of: 50) do |batch| + define_batchable_model('integrations').where(url: nil).each_batch(of: 50) do |batch| range = batch.pluck('MIN(id)', 'MAX(id)').first - Gitlab::BackgroundMigration::ExtractServicesUrl.new.perform(*range) + Gitlab::BackgroundMigration::ExtractIntegrationsUrl.new.perform(*range) end end @@ -313,9 +284,9 @@ If the application does not depend on the data being 100% migrated (for instance, the data is advisory, and not mission-critical), then this final step can be skipped. -This migration will then process any jobs for the ExtractServicesUrl migration +This migration will then process any jobs for the ExtractIntegrationsUrl migration and continue once all jobs have been processed. Once done you can safely remove -the `services.properties` column. +the `integrations.properties` column. ## Testing diff --git a/doc/user/clusters/agent/repository.md b/doc/user/clusters/agent/repository.md index c8ab037118e..22964fde395 100644 --- a/doc/user/clusters/agent/repository.md +++ b/doc/user/clusters/agent/repository.md @@ -12,9 +12,6 @@ info: To determine the technical writer assigned to the Stage/Group associated w > - [Moved](https://gitlab.com/groups/gitlab-org/-/epics/6290) from GitLab Premium to GitLab Free in 14.5. > - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/332227) in GitLab 14.0, the `resource_inclusions` and `resource_exclusions` attributes were removed and `reconcile_timeout`, `dry_run_strategy`, `prune`, `prune_timeout`, `prune_propagation_policy`, and `inventory_policy` attributes were added. -WARNING: -This feature might not be available to you. Check the **version history** note above for details. - The [GitLab Agent](index.md) supports hosting your configuration for multiple agents in a single repository. These agents can be running in the same cluster or in multiple clusters, and potentially with more than one agent per cluster. diff --git a/lib/gitlab/i18n.rb b/lib/gitlab/i18n.rb index 12203cab8c8..f056381b86a 100644 --- a/lib/gitlab/i18n.rb +++ b/lib/gitlab/i18n.rb @@ -43,27 +43,27 @@ module Gitlab TRANSLATION_LEVELS = { 'bg' => 0, 'cs_CZ' => 0, - 'da_DK' => 51, + 'da_DK' => 49, 'de' => 15, 'en' => 100, 'eo' => 0, - 'es' => 39, + 'es' => 38, 'fil_PH' => 0, - 'fr' => 12, + 'fr' => 11, 'gl_ES' => 0, 'id_ID' => 0, 'it' => 2, - 'ja' => 35, - 'ko' => 11, - 'nb_NO' => 33, + 'ja' => 36, + 'ko' => 12, + 'nb_NO' => 32, 'nl_NL' => 0, 'pl_PL' => 5, - 'pt_BR' => 49, - 'ro_RO' => 23, - 'ru' => 25, - 'tr_TR' => 15, + 'pt_BR' => 50, + 'ro_RO' => 22, + 'ru' => 26, + 'tr_TR' => 14, 'uk' => 45, - 'zh_CN' => 95, + 'zh_CN' => 98, 'zh_HK' => 2, 'zh_TW' => 3 }.freeze diff --git a/lib/gitlab/import/set_async_jid.rb b/lib/gitlab/import/set_async_jid.rb index 054fcdb433f..527d84477fe 100644 --- a/lib/gitlab/import/set_async_jid.rb +++ b/lib/gitlab/import/set_async_jid.rb @@ -13,7 +13,7 @@ module Gitlab def self.set_jid(import_state) jid = generate_jid(import_state) - Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2) + Gitlab::SidekiqStatus.set(jid, Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION) import_state.update_column(:jid, jid) end diff --git a/lib/gitlab/sidekiq_status.rb b/lib/gitlab/sidekiq_status.rb index 120d18f63f2..66417b3697e 100644 --- a/lib/gitlab/sidekiq_status.rb +++ b/lib/gitlab/sidekiq_status.rb @@ -29,16 +29,15 @@ module Gitlab # for most jobs. DEFAULT_EXPIRATION = 30.minutes.to_i - DEFAULT_VALUE = 1 - DEFAULT_VALUE_MESSAGE = 'Keys using the default value for SidekiqStatus detected' - # Starts tracking of the given job. # # jid - The Sidekiq job ID # expire - The expiration time of the Redis key. - def self.set(jid, expire = DEFAULT_EXPIRATION, value: DEFAULT_VALUE) + def self.set(jid, expire = DEFAULT_EXPIRATION) + return unless expire + Sidekiq.redis do |redis| - redis.set(key_for(jid), value, ex: expire) + redis.set(key_for(jid), 1, ex: expire) end end @@ -94,17 +93,10 @@ module Gitlab return [] if job_ids.empty? keys = job_ids.map { |jid| key_for(jid) } - results = Sidekiq.redis { |redis| redis.mget(*keys) } - if Feature.enabled?(:log_implicit_sidekiq_status_calls, default_enabled: :yaml) - to_log = keys.zip(results).select do |_key, result| - result == DEFAULT_VALUE.to_s - end.map(&:first) - - Sidekiq.logger.info(message: DEFAULT_VALUE_MESSAGE, keys: to_log) if to_log.any? - end - - results.map { |result| !result.nil? } + Sidekiq + .redis { |redis| redis.mget(*keys) } + .map { |result| !result.nil? } end # Returns the JIDs that are completed diff --git a/lib/gitlab/sidekiq_status/client_middleware.rb b/lib/gitlab/sidekiq_status/client_middleware.rb index cee7270f2fb..ee12dbbe51a 100644 --- a/lib/gitlab/sidekiq_status/client_middleware.rb +++ b/lib/gitlab/sidekiq_status/client_middleware.rb @@ -4,10 +4,14 @@ module Gitlab module SidekiqStatus class ClientMiddleware def call(_, job, _, _) - status_expiration = job['status_expiration'] || Gitlab::SidekiqStatus::DEFAULT_EXPIRATION - value = job['status_expiration'] ? 2 : Gitlab::SidekiqStatus::DEFAULT_VALUE + status_expiration = job['status_expiration'] + + unless ::Feature.enabled?(:opt_in_sidekiq_status, default_enabled: :yaml) + status_expiration ||= Gitlab::SidekiqStatus::DEFAULT_EXPIRATION + end + + Gitlab::SidekiqStatus.set(job['jid'], status_expiration) - Gitlab::SidekiqStatus.set(job['jid'], status_expiration, value: value) yield end end diff --git a/spec/graphql/mutations/clusters/agent_tokens/revoke_spec.rb b/spec/graphql/mutations/clusters/agent_tokens/revoke_spec.rb new file mode 100644 index 00000000000..f5f4c0cefad --- /dev/null +++ b/spec/graphql/mutations/clusters/agent_tokens/revoke_spec.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Mutations::Clusters::AgentTokens::Revoke do + let_it_be(:token) { create(:cluster_agent_token) } + let_it_be(:user) { create(:user) } + + let(:mutation) do + described_class.new( + object: double, + context: { current_user: user }, + field: double + ) + end + + it { expect(described_class.graphql_name).to eq('ClusterAgentTokenRevoke') } + it { expect(described_class).to require_graphql_authorizations(:admin_cluster) } + + describe '#resolve' do + let(:global_id) { token.to_global_id } + + subject { mutation.resolve(id: global_id) } + + context 'user does not have permission' do + it 'does not revoke the token' do + expect { subject }.to raise_error(Gitlab::Graphql::Errors::ResourceNotAvailable) + + expect(token.reload).not_to be_revoked + end + end + + context 'user has permission' do + before do + token.agent.project.add_maintainer(user) + end + + it 'revokes the token' do + subject + + expect(token.reload).to be_revoked + end + + context 'supplied ID is invalid' do + let(:global_id) { token.id } + + it 'raises a coercion error' do + expect { subject }.to raise_error(::GraphQL::CoercionError) + + expect(token.reload).not_to be_revoked + end + end + end + end +end diff --git a/spec/lib/gitlab/import/set_async_jid_spec.rb b/spec/lib/gitlab/import/set_async_jid_spec.rb index 016f7cac61a..6931a7a953d 100644 --- a/spec/lib/gitlab/import/set_async_jid_spec.rb +++ b/spec/lib/gitlab/import/set_async_jid_spec.rb @@ -8,7 +8,7 @@ RSpec.describe Gitlab::Import::SetAsyncJid do it 'sets the JID in Redis' do expect(Gitlab::SidekiqStatus) .to receive(:set) - .with("async-import/project-import-state/#{project.id}", Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION, value: 2) + .with("async-import/project-import-state/#{project.id}", Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION) .and_call_original described_class.set_jid(project.import_state) diff --git a/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb b/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb index 2f2499753b9..cb1616ec40a 100644 --- a/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb +++ b/spec/lib/gitlab/sidekiq_status/client_middleware_spec.rb @@ -1,24 +1,61 @@ # frozen_string_literal: true -require 'fast_spec_helper' +# This can use fast_spec_helper when the feature flag stubbing is removed. +require 'spec_helper' -RSpec.describe Gitlab::SidekiqStatus::ClientMiddleware do +RSpec.describe Gitlab::SidekiqStatus::ClientMiddleware, :clean_gitlab_redis_queues do describe '#call' do - context 'when the job has status_expiration set' do - it 'tracks the job in Redis with a value of 2' do - expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i, value: 2) + context 'when opt_in_sidekiq_status is disabled' do + before do + stub_feature_flags(opt_in_sidekiq_status: false) + end - described_class.new - .call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil } + context 'when the job has status_expiration set' do + it 'tracks the job in Redis' do + expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i).and_call_original + + described_class.new + .call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil } + + expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1) + end + end + + context 'when the job does not have status_expiration set' do + it 'tracks the job in Redis' do + expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 30.minutes.to_i).and_call_original + + described_class.new + .call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil } + + expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1) + end end end - context 'when the job does not have status_expiration set' do - it 'tracks the job in Redis with a value of 1' do - expect(Gitlab::SidekiqStatus).to receive(:set).with('123', Gitlab::SidekiqStatus::DEFAULT_EXPIRATION, value: 1) + context 'when opt_in_sidekiq_status is enabled' do + before do + stub_feature_flags(opt_in_sidekiq_status: true) + end - described_class.new - .call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil } + context 'when the job has status_expiration set' do + it 'tracks the job in Redis' do + expect(Gitlab::SidekiqStatus).to receive(:set).with('123', 1.hour.to_i).and_call_original + + described_class.new + .call('Foo', { 'jid' => '123', 'status_expiration' => 1.hour.to_i }, double(:queue), double(:pool)) { nil } + + expect(Gitlab::SidekiqStatus.num_running(['123'])).to eq(1) + end + end + + context 'when the job does not have status_expiration set' do + it 'does not track the job in Redis' do + described_class.new + .call('Foo', { 'jid' => '123' }, double(:queue), double(:pool)) { nil } + + expect(Gitlab::SidekiqStatus.num_running(['123'])).to be_zero + end end end end diff --git a/spec/lib/gitlab/sidekiq_status_spec.rb b/spec/lib/gitlab/sidekiq_status_spec.rb index 1e7b52471b0..c94deb8e008 100644 --- a/spec/lib/gitlab/sidekiq_status_spec.rb +++ b/spec/lib/gitlab/sidekiq_status_spec.rb @@ -12,7 +12,7 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_ Sidekiq.redis do |redis| expect(redis.exists(key)).to eq(true) expect(redis.ttl(key) > 0).to eq(true) - expect(redis.get(key)).to eq(described_class::DEFAULT_VALUE.to_s) + expect(redis.get(key)).to eq('1') end end @@ -24,19 +24,17 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_ Sidekiq.redis do |redis| expect(redis.exists(key)).to eq(true) expect(redis.ttl(key) > described_class::DEFAULT_EXPIRATION).to eq(true) - expect(redis.get(key)).to eq(described_class::DEFAULT_VALUE.to_s) + expect(redis.get(key)).to eq('1') end end - it 'allows overriding the default value' do - described_class.set('123', value: 2) + it 'does not store anything with a nil expiry' do + described_class.set('123', nil) key = described_class.key_for('123') Sidekiq.redis do |redis| - expect(redis.exists(key)).to eq(true) - expect(redis.ttl(key) > 0).to eq(true) - expect(redis.get(key)).to eq('2') + expect(redis.exists(key)).to eq(false) end end end @@ -138,33 +136,5 @@ RSpec.describe Gitlab::SidekiqStatus, :clean_gitlab_redis_queues, :clean_gitlab_ it 'handles an empty array' do expect(described_class.job_status([])).to eq([]) end - - context 'when log_implicit_sidekiq_status_calls is enabled' do - it 'logs keys that contained the default value' do - described_class.set('123', value: 2) - described_class.set('456') - described_class.set('012') - - expect(Sidekiq.logger).to receive(:info).with(message: described_class::DEFAULT_VALUE_MESSAGE, - keys: [described_class.key_for('456'), described_class.key_for('012')]) - - expect(described_class.job_status(%w(123 456 789 012))).to eq([true, true, false, true]) - end - end - - context 'when log_implicit_sidekiq_status_calls is disabled' do - before do - stub_feature_flags(log_implicit_sidekiq_status_calls: false) - end - - it 'does not perform any logging' do - described_class.set('123', value: 2) - described_class.set('456') - - expect(Sidekiq.logger).not_to receive(:info) - - expect(described_class.job_status(%w(123 456 789))).to eq([true, true, false]) - end - end end end diff --git a/spec/requests/api/merge_requests_spec.rb b/spec/requests/api/merge_requests_spec.rb index 7c147419354..d5f0fdeacd7 100644 --- a/spec/requests/api/merge_requests_spec.rb +++ b/spec/requests/api/merge_requests_spec.rb @@ -3278,9 +3278,10 @@ RSpec.describe API::MergeRequests do context 'when skip_ci parameter is set' do it 'enqueues a rebase of the merge request with skip_ci flag set' do - allow(RebaseWorker).to receive(:with_status).and_return(RebaseWorker) + with_status = RebaseWorker.with_status - expect(RebaseWorker).to receive(:perform_async).with(merge_request.id, user.id, true).and_call_original + expect(RebaseWorker).to receive(:with_status).and_return(with_status) + expect(with_status).to receive(:perform_async).with(merge_request.id, user.id, true).and_call_original Sidekiq::Testing.fake! do expect do diff --git a/spec/services/deployments/archive_in_project_service_spec.rb b/spec/services/deployments/archive_in_project_service_spec.rb index d4039ee7b4a..a316c210d64 100644 --- a/spec/services/deployments/archive_in_project_service_spec.rb +++ b/spec/services/deployments/archive_in_project_service_spec.rb @@ -50,17 +50,6 @@ RSpec.describe Deployments::ArchiveInProjectService do end end - context 'when deployments_archive feature flag is disabled' do - before do - stub_feature_flags(deployments_archive: false) - end - - it 'does not do anything' do - expect(subject[:status]).to eq(:error) - expect(subject[:message]).to eq('Feature flag is not enabled') - end - end - def deployment_refs_exist? deployment_refs.map { |path| project.repository.ref_exists?(path) } end