diff --git a/.gitlab/CODEOWNERS b/.gitlab/CODEOWNERS
index a545196b597..6b72d133862 100644
--- a/.gitlab/CODEOWNERS
+++ b/.gitlab/CODEOWNERS
@@ -131,6 +131,15 @@
/doc/user/project/settings/import_export.md @aqualls
/doc/user/snippets.md @aqualls
+[Docs Growth]
+/doc/administration/instance_review.md @aqualls
+/doc/api/invitations.md @aqualls
+/doc/api/experiments.md @aqualls
+/doc/development/experiment_guide/ @aqualls
+/doc/development/snowplow.md @aqualls
+/doc/development/usage_ping/ @aqualls
+/doc/user/admin_area/license.md @aqualls
+
[Frontend]
*.scss @annabeldunstone @gitlab-org/maintainers/frontend
*.js @gitlab-org/maintainers/frontend
diff --git a/app/services/merge_requests/merge_service.rb b/app/services/merge_requests/merge_service.rb
index fc4405ef704..27f474b0fe7 100644
--- a/app/services/merge_requests/merge_service.rb
+++ b/app/services/merge_requests/merge_service.rb
@@ -107,8 +107,7 @@ module MergeRequests
log_info("Post merge finished on JID #{merge_jid} with state #{state}")
if delete_source_branch?
- ::Branches::DeleteService.new(@merge_request.source_project, branch_deletion_user)
- .execute(merge_request.source_branch)
+ MergeRequests::DeleteSourceBranchWorker.perform_async(@merge_request.id, @merge_request.source_branch_sha, branch_deletion_user.id)
end
end
diff --git a/app/views/admin/application_settings/_usage.html.haml b/app/views/admin/application_settings/_usage.html.haml
index fe83d4b807c..6abb7abf4a9 100644
--- a/app/views/admin/application_settings/_usage.html.haml
+++ b/app/views/admin/application_settings/_usage.html.haml
@@ -33,7 +33,7 @@
%pre.usage-data.js-syntax-highlight.code.highlight.mt-2.d-none{ class: payload_class, data: { endpoint: usage_data_admin_application_settings_path(format: :html) } }
- else
= _('The usage ping is disabled, and cannot be configured through this form.')
- - deactivating_usage_ping_path = help_page_path('development/usage_ping', anchor: 'disable-usage-ping')
+ - deactivating_usage_ping_path = help_page_path('development/usage_ping/index.md', anchor: 'disable-usage-ping')
- deactivating_usage_ping_link_start = ''.html_safe % { url: deactivating_usage_ping_path }
= s_('For more information, see the documentation on %{deactivating_usage_ping_link_start}deactivating the usage ping%{deactivating_usage_ping_link_end}.').html_safe % { deactivating_usage_ping_link_start: deactivating_usage_ping_link_start, deactivating_usage_ping_link_end: ''.html_safe }
diff --git a/app/views/admin/dev_ops_report/_report.html.haml b/app/views/admin/dev_ops_report/_report.html.haml
index 5faadd15ef8..46cd4824793 100644
--- a/app/views/admin/dev_ops_report/_report.html.haml
+++ b/app/views/admin/dev_ops_report/_report.html.haml
@@ -4,7 +4,7 @@
= render 'callout'
- if !usage_ping_enabled
- #js-devops-empty-state{ data: { is_admin: current_user&.admin.to_s, empty_state_svg_path: image_path('illustrations/convdev/convdev_no_index.svg'), enable_usage_ping_link: metrics_and_profiling_admin_application_settings_path(anchor: 'js-usage-settings'), docs_link: help_page_path('development/usage_ping') } }
+ #js-devops-empty-state{ data: { is_admin: current_user&.admin.to_s, empty_state_svg_path: image_path('illustrations/convdev/convdev_no_index.svg'), enable_usage_ping_link: metrics_and_profiling_admin_application_settings_path(anchor: 'js-usage-settings'), docs_link: help_page_path('development/usage_ping/index.md') } }
- elsif @metric.blank?
= render 'no_data'
- else
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml
index e38a6df321e..51df9a9d891 100644
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -1808,6 +1808,14 @@
:weight: 1
:idempotent: true
:tags: []
+- :name: merge_requests_delete_source_branch
+ :feature_category: :source_code_management
+ :has_external_dependencies:
+ :urgency: :high
+ :resource_boundary: :unknown
+ :weight: 1
+ :idempotent: true
+ :tags: []
- :name: metrics_dashboard_prune_old_annotations
:feature_category: :metrics
:has_external_dependencies:
diff --git a/app/workers/merge_requests/delete_source_branch_worker.rb b/app/workers/merge_requests/delete_source_branch_worker.rb
new file mode 100644
index 00000000000..99816d5ed0b
--- /dev/null
+++ b/app/workers/merge_requests/delete_source_branch_worker.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+class MergeRequests::DeleteSourceBranchWorker
+ include ApplicationWorker
+
+ feature_category :source_code_management
+ urgency :high
+ idempotent!
+
+ def perform(merge_request_id, source_branch_sha, user_id)
+ merge_request = MergeRequest.find(merge_request_id)
+ user = User.find(user_id)
+
+ # Source branch changed while it's being removed
+ return if merge_request.source_branch_sha != source_branch_sha
+
+ ::Branches::DeleteService.new(merge_request.source_project, user)
+ .execute(merge_request.source_branch)
+ rescue ActiveRecord::RecordNotFound
+ end
+end
diff --git a/changelogs/unreleased/id-move-delete-branch-to-workers.yml b/changelogs/unreleased/id-move-delete-branch-to-workers.yml
new file mode 100644
index 00000000000..b5dd5dbfb9f
--- /dev/null
+++ b/changelogs/unreleased/id-move-delete-branch-to-workers.yml
@@ -0,0 +1,5 @@
+---
+title: Move branch deletion on merge to async worker
+merge_request: 55390
+author:
+type: performance
diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml
index d1b80fb3934..ef0be2913c9 100644
--- a/config/sidekiq_queues.yml
+++ b/config/sidekiq_queues.yml
@@ -204,6 +204,8 @@
- 1
- - merge_request_reset_approvals
- 1
+- - merge_requests_delete_source_branch
+ - 1
- - metrics_dashboard_prune_old_annotations
- 1
- - metrics_dashboard_sync_dashboards
diff --git a/doc/api/settings.md b/doc/api/settings.md
index 4d1b66f40ab..8a9ee3e72d1 100644
--- a/doc/api/settings.md
+++ b/doc/api/settings.md
@@ -308,6 +308,7 @@ listed in the descriptions of the relevant settings.
| `in_product_marketing_emails_enabled` | boolean | no | Enable in-product marketing emails. Enabled by default. |
| `invisible_captcha_enabled` | boolean | no | Enable Invisible Captcha spam detection during sign-up. Disabled by default. |
| `issues_create_limit` | integer | no | Max number of issue creation requests per minute per user. Disabled by default.|
+| `keep_latest_artifact` | boolean | no | Prevent the deletion of the artifacts from the most recent successful jobs, regardless of the expiry time. Enabled by default. |
| `local_markdown_version` | integer | no | Increase this value when any cached Markdown should be invalidated. |
| `maintenance_mode_message` | string | no | **(PREMIUM)** Message displayed when instance is in maintenance mode |
| `maintenance_mode` | boolean | no | **(PREMIUM)** When instance is in maintenance mode, non-administrative users can sign in with read-only access and make read-only API requests |
diff --git a/doc/development/README.md b/doc/development/README.md
index c25782dbf84..43ceb737dde 100644
--- a/doc/development/README.md
+++ b/doc/development/README.md
@@ -261,7 +261,7 @@ See [database guidelines](database/index.md).
## Product Intelligence guides
- [Product Intelligence guide](https://about.gitlab.com/handbook/product/product-intelligence-guide/)
-- [Usage Ping guide](usage_ping.md)
+- [Usage Ping guide](usage_ping/index.md)
- [Snowplow guide](snowplow.md)
## Experiment guide
diff --git a/doc/development/documentation/site_architecture/release_process.md b/doc/development/documentation/site_architecture/release_process.md
index 8473a78bb79..7bdf3fbdcf8 100644
--- a/doc/development/documentation/site_architecture/release_process.md
+++ b/doc/development/documentation/site_architecture/release_process.md
@@ -4,191 +4,165 @@ group: unassigned
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# GitLab Docs monthly release process
+# Monthly release process
-When a new GitLab version is released on the 22nd, we need to create the respective
-single Docker image, and update some files so that the dropdown works correctly.
+When a new GitLab version is released on the 22nd, we need to release the published documentation
+for the new version.
-## 1. Add the chart version
+This should be done as soon as possible after the GitLab version is announced, so that:
-Since the charts use a different version number than all the other GitLab
-products, we need to add a
-[version mapping](https://docs.gitlab.com/charts/installation/version_mappings.html):
+- The published documentation includes the three most recent minor releases of the current major
+ version, and the most recent minor releases of the last two major versions. For example 13.9,
+ 13.8, 13.7, 12.10, and 11.11.
+- Documentation updates after the 22nd are for the next release. The versions drop down
+ should have the current milestone with `-pre` appended to it, for example `13.10-pre`.
-The charts stable branch is not created automatically like the other products.
-There's an [issue to track this](https://gitlab.com/gitlab-org/charts/gitlab/-/issues/1442).
-It is usually created on the 21st or the 22nd.
+Each documentation release:
-To add a new charts version:
+- Has a dedicated branch, named in the format `XX.yy`.
+- Has a Docker image that contains a build of that branch.
+
+For example:
+
+- For [GitLab 13.9](https://docs.gitlab.com/13.9/index.html), the
+ [stable branch](https://gitlab.com/gitlab-org/gitlab-docs/-/tree/13.9) and Docker image:
+ [`registry.gitlab.com/gitlab-org/gitlab-docs:13.9`](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635).
+- For [GitLab 13.8](https://docs.gitlab.com/13.8/index.html), the
+ [stable branch](https://gitlab.com/gitlab-org/gitlab-docs/-/tree/13.8) and Docker image:
+ [`registry.gitlab.com/gitlab-org/gitlab-docs:13.8`](https://gitlab.com/gitlab-org/gitlab-docs/container_registry/631635).
+
+To set up a documentation release, follow these steps:
+
+1. [Add the charts version](#add-chart-version), so that the documentation is built using the
+ [version of the charts project that maps to](https://docs.gitlab.com/charts/installation/version_mappings.html)
+ the GitLab release. This step may have been completed already.
+1. [Create a stable branch and Docker image](#create-stable-branch-and-docker-image-for-release) for
+ the new version.
+1. [Create a release merge request](#create-release-merge-request) for the new version, which
+ updates the version dropdown menu for the current documentation and adds the release to the
+ Docker configuration. For example, the
+ [release merge request for 13.9](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests/1555).
+1. [Update the three online versions](#update-dropdown-for-online-versions), so that they display the new release on their
+ version dropdown menus. For example:
+ - The merge request to [update the 13.9 version dropdown menu for the 13.9 release](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests/1556).
+ - The merge request to [update the 13.8 version dropdown menu for the 13.9 release](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests/1557).
+ - The merge request to [update the 13.7 version dropdown menu for the 13.9 release](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests/1558).
+1. [Merge the release merge request and run the necessary Docker image builds](#merge-release-merge-request-and-run-docker-image-builds).
+
+## Add chart version
+
+To add a new charts version for the release:
1. Make sure you're in the root path of the `gitlab-docs` repository.
1. Open `content/_data/chart_versions.yaml` and add the new stable branch version using the
- version mapping. Note that only the `major.minor` version is needed.
+ [version mapping](https://docs.gitlab.com/charts/installation/version_mappings.html). Only the
+ `major.minor` version is needed.
1. Create a new merge request and merge it.
NOTE:
-It can be handy to create the future mappings since they are pretty much known.
-In that case, when a new GitLab version is released, you don't have to repeat
-this first step.
+If you have time, add anticipated future mappings to `content/_data/chart_versions.yaml`. This saves
+a step for the next GitLab release.
-## 2. Create an image for a single version
+## Create stable branch and Docker image for release
-The single docs version must be created before the release merge request, but
-this needs to happen when the stable branches for all products have been created.
+To create a stable branch and Docker image for the release:
1. Make sure you're in the root path of the `gitlab-docs` repository.
-1. Run the Rake task to create the single version:
+1. Run the Rake task to create the single version. For example, to create the 13.9 release branch
+ and perform others tasks:
```shell
- ./bin/rake "release:single[12.0]"
+ ./bin/rake "release:single[13.9]"
```
- A new `Dockerfile.12.0` should have been created and `.gitlab-ci.yml` should
- have the branches variables updated into a new branch. They are automatically
- committed.
+ A branch for the release is created, a new `Dockerfile.13.9` is created, and `.gitlab-ci.yml`
+ has branches variables updated into a new branch. These files are automatically committed.
-1. Push the newly created branch, but **don't create a merge request**.
- After you push, the `image:docs-single` job creates a new Docker image
- tagged with the branch name you created in the first step. In the end, the
- image is uploaded in the [Container Registry](https://gitlab.com/gitlab-org/gitlab-docs/container_registry)
- and it is listed under the `registry` environment folder at
- `https://gitlab.com/gitlab-org/gitlab-docs/-/environments/folders/registry` (must
- have developer access).
+1. Push the newly created branch, but **don't create a merge request**. After you push, the
+ `image:docs-single` job creates a new Docker image tagged with the name of the branch you created
+ earlier. You can see the Docker image in the `registry` environment at
+ .
-Optionally, you can test locally by building the image and running it:
+For example, see [the 13.9 release pipeline](https://gitlab.com/gitlab-org/gitlab-docs/-/pipelines/260288747).
-```shell
-docker build -t docs:12.0 -f Dockerfile.12.0 .
-docker run -it --rm -p 4000:4000 docs:12.0
-```
+Optionally, you can test locally by:
-Visit `http://localhost:4000/12.0/` to see if everything works correctly.
+1. Building the image and running it. For example, for GitLab 13.9 documentation:
-## 3. Create the release merge request
+ ```shell
+ docker build -t docs:13.9 -f Dockerfile.13.9 .
+ docker run -it --rm -p 4000:4000 docs:13.9
+ ```
+
+1. Visiting to see if everything works correctly.
+
+## Create release merge request
NOTE:
-To be [automated](https://gitlab.com/gitlab-org/gitlab-docs/-/issues/750).
+An [epic is open](https://gitlab.com/groups/gitlab-org/-/epics/4361) to automate this step.
-Now it's time to create the monthly release merge request that adds the new
-version and rotates the old one:
+To create the release merge request for the release:
1. Make sure you're in the root path of the `gitlab-docs` repository.
-1. Create a branch `release-X-Y`:
+1. Create a branch `release-X-Y`. For example:
```shell
git checkout master
- git checkout -b release-12-0
+ git checkout -b release-13-9
```
-1. **Rotate the online and offline versions:**
+1. Edit `content/_data/versions.yaml` and update the lists of versions to reflect the new release:
- At any given time, there are 4 browsable online versions: one pulled from
- the upstream master branches (docs for GitLab.com) and the three latest
- stable versions.
+ - Add the latest version to the `online:` section.
+ - Move the oldest version in `online:` to the `offline:` section. There should now be three
+ versions in `online:`.
- Edit `content/_data/versions.yaml` and rotate the versions to reflect the
- new changes:
+1. Update these Dockerfiles:
- - `online`: The 3 latest stable versions.
- - `offline`: All the previous versions offered as an offline archive.
+ - `dockerfiles/Dockerfile.archives`: Add the latest version to the top of the list.
+ - `Dockerfile.master`: Remove the oldest version, and add the newest version to the
+ top of the list.
-1. **Update the `:latest` and `:archives` Docker images:**
-
- The following two Dockerfiles need to be updated:
-
- 1. `dockerfiles/Dockerfile.archives` - Add the latest version at the top of
- the list.
- 1. `Dockerfile.master` - Rotate the versions (oldest gets removed and latest
- is added at the top of the list).
-
-1. In the end, there should be four files in total that have changed.
- Commit and push to create the merge request using the "Release" template:
+1. Commit and push to create the merge request. For example:
```shell
git add content/ Dockerfile.master dockerfiles/Dockerfile.archives
- git commit -m "Release 12.0"
- git push origin release-12-0
+ git commit -m "Release 13.9"
+ git push origin release-13-9
```
-## 4. Update the dropdown for all online versions
+Do not merge the release merge request yet.
-The versions dropdown is in a way "hardcoded". When the site is built, it looks
-at the contents of `content/_data/versions.yaml` and based on that, the dropdown
-is populated. Older branches have different content, which means the
-dropdown list is one or more releases behind. Remember that the new changes of
-the dropdown are included in the unmerged `release-X-Y` branch.
+## Update dropdown for online versions
-The content of `content/_data/versions.yaml` needs to change for all online
-versions (stable branches `X.Y` of the `gitlab-docs` project):
+To update`content/_data/versions.yaml` for all online versions (stable branches `X.Y` of the
+`gitlab-docs` project):
-1. Run the Rake task that creates all the respective merge requests needed to
- update the dropdowns. Set these to automatically be merged when their
- pipelines succeed:
-
- NOTE:
- The `release-X-Y` branch needs to be present locally,
- and you need to have switched to it, otherwise the Rake task fails.
+1. Run the Rake task that creates all of the necessary merge requests to update the dropdowns. For
+ example, for the 13.9 release:
```shell
- git checkout release-X-Y
+ git checkout release-13-9
./bin/rake release:dropdowns
```
+ These merge requests are set to automatically merge.
+
1. [Visit the merge requests page](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests?label_name%5B%5D=release)
- to check that their pipelines pass, and once all are merged, proceed to the
- following and final step.
+ to check that their pipelines pass. After all MRs are merged, proceed to the following and final
+ step.
-NOTE:
-In case a pipeline fails, see [troubleshooting](#troubleshooting).
+## Merge release merge request and run Docker image builds
-## 5. Merge the release merge request
-
-The dropdown merge requests should have now been merged into their respective
-version (stable `X.Y` branch), which triggers another pipeline. At this point,
-you need to only babysit the pipelines and make sure they don't fail:
+The merge requests for the dropdowns should now all be merged into their respective stable branches.
+Each merge triggers a new pipeline for each stable branch. Wait for the stable branch pipelines to
+complete, then:
1. Check the [pipelines page](https://gitlab.com/gitlab-org/gitlab-docs/pipelines)
and make sure all stable branches have green pipelines.
-1. After all the pipelines of the online versions succeed, merge the release merge request.
+1. After all the pipelines succeed, merge the [release merge request](#create-release-merge-request).
1. Finally, run the
[`Build docker images weekly` pipeline](https://gitlab.com/gitlab-org/gitlab-docs/pipeline_schedules)
that builds the `:latest` and `:archives` Docker images.
-Once the scheduled pipeline succeeds, the docs site is deployed with all
-new versions online.
-
-## Troubleshooting
-
-Releasing a new version is a long process that involves many moving parts.
-
-### `test_internal_links_and_anchors` failing on dropdown merge requests
-
-WARNING:
-We now pin versions in the `.gitlab-ci.yml` of the respective branch,
-so the steps below are deprecated.
-
-When [updating the dropdown for the stable versions](#4-update-the-dropdown-for-all-online-versions),
-there may be cases where some links might fail. The process of how the
-dropdown MRs are created have a caveat, and that is that the tests run by
-pulling the master branches of all products, instead of the respective stable
-ones.
-
-In a real world scenario, the [Update 12.2 dropdown to match that of 12.4](https://gitlab.com/gitlab-org/gitlab-docs/-/merge_requests/604)
-merge request failed because of the [`test_internal_links_and_anchors` test](https://gitlab.com/gitlab-org/gitlab-docs/-/jobs/328042431).
-
-This happened because there has been a rename of a product (`gitlab-monitor` to `gitlab-exporter`)
-and the old name was still referenced in the 12.2 docs. If the respective stable
-branches for 12.2 were used, this wouldn't have failed, but as we can see from
-the [`compile_dev` job](https://gitlab.com/gitlab-org/gitlab-docs/-/jobs/328042427),
-the `master` branches were pulled.
-
-To fix this, re-run the pipeline (`https://gitlab.com/gitlab-org/gitlab-docs/pipelines/new`)
-for the `update-12-2-for-release-12-4` branch, by including the following CI/CD variables:
-
-- `BRANCH_CE` set to `12-2-stable`
-- `BRANCH_EE` set to `12-2-stable-ee`
-- `BRANCH_OMNIBUS` set to `12-2-stable`
-- `BRANCH_RUNNER` set to `12-2-stable`
-- `BRANCH_CHARTS` set to `2-2-stable`
-
-This should make the MR pass.
+As the last step in the scheduled pipeline, the documentation site deploys with all new versions.
diff --git a/doc/development/product_analytics/usage_ping.md b/doc/development/product_analytics/usage_ping.md
index 79075157ce4..43acf5b7e3f 100644
--- a/doc/development/product_analytics/usage_ping.md
+++ b/doc/development/product_analytics/usage_ping.md
@@ -1,8 +1,8 @@
---
-redirect_to: '../usage_ping.md'
+redirect_to: '../usage_ping/index.md'
---
-This document was moved to [another location](../usage_ping.md).
+This document was moved to [another location](../usage_ping/index.md).
diff --git a/doc/development/query_performance.md b/doc/development/query_performance.md
index 80c4d8c1fd0..87e26cf42df 100644
--- a/doc/development/query_performance.md
+++ b/doc/development/query_performance.md
@@ -21,7 +21,7 @@ When you are optimizing your SQL queries, there are two dimensions to pay attent
| Queries in a migration | `100ms` | This is different than the total [migration time](database_review.md#timing-guidelines-for-migrations). |
| Concurrent operations in a migration | `5min` | Concurrent operations do not block the database, but they block the GitLab update. This includes operations such as `add_concurrent_index` and `add_concurrent_foreign_key`. |
| Background migrations | `1s` | |
-| Usage Ping | `1s` | See the [usage ping docs](usage_ping.md#developing-and-testing-usage-ping) for more details. |
+| Usage Ping | `1s` | See the [usage ping docs](usage_ping/index.md#developing-and-testing-usage-ping) for more details. |
- When analyzing your query's performance, pay attention to if the time you are seeing is on a [cold or warm cache](#cold-and-warm-cache). These guidelines apply for both cache types.
- When working with batched queries, change the range and batch size to see how it effects the query timing and caching.
diff --git a/doc/development/snowplow.md b/doc/development/snowplow.md
index c9b76014c74..f5689068654 100644
--- a/doc/development/snowplow.md
+++ b/doc/development/snowplow.md
@@ -11,7 +11,7 @@ This guide provides an overview of how Snowplow works, and implementation detail
For more information about Product Intelligence, see:
- [Product Intelligence Guide](https://about.gitlab.com/handbook/product/product-intelligence-guide/)
-- [Usage Ping Guide](usage_ping.md)
+- [Usage Ping Guide](usage_ping/index.md)
More useful links:
diff --git a/doc/development/usage_ping.md b/doc/development/usage_ping.md
index 7fa253e75f5..c340f1eb10d 100644
--- a/doc/development/usage_ping.md
+++ b/doc/development/usage_ping.md
@@ -1,1296 +1,8 @@
---
-stage: Growth
-group: Product Intelligence
-info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+redirect_to: 'usage_ping/index.md'
---
-# Usage Ping Guide
+This document was moved to [another location](usage_ping/index.md).
-> Introduced in GitLab Ultimate 11.2, more statistics.
-
-This guide describes Usage Ping's purpose and how it's implemented.
-
-For more information about Product Intelligence, see:
-
-- [Product Intelligence Guide](https://about.gitlab.com/handbook/product/product-intelligence-guide/)
-- [Snowplow Guide](snowplow.md)
-
-More links:
-
-- [Product Intelligence Direction](https://about.gitlab.com/direction/product-intelligence/)
-- [Data Analysis Process](https://about.gitlab.com/handbook/business-ops/data-team/#data-analysis-process/)
-- [Data for Product Managers](https://about.gitlab.com/handbook/business-ops/data-team/programs/data-for-product-managers/)
-- [Data Infrastructure](https://about.gitlab.com/handbook/business-ops/data-team/platform/infrastructure/)
-
-## What is Usage Ping?
-
-- GitLab sends a weekly payload containing usage data to GitLab Inc. Usage Ping provides high-level data to help our product, support, and sales teams. It does not send any project names, usernames, or any other specific data. The information from the usage ping is not anonymous, it is linked to the hostname of the instance. Sending usage ping is optional, and any instance can disable analytics.
-- The usage data is primarily composed of row counts for different tables in the instance’s database. By comparing these counts month over month (or week over week), we can get a rough sense for how an instance is using the different features in the product. In addition to counts, other facts
- that help us classify and understand GitLab installations are collected.
-- Usage ping is important to GitLab as we use it to calculate our Stage Monthly Active Users (SMAU) which helps us measure the success of our stages and features.
-- While usage ping is enabled, GitLab gathers data from the other instances and can show usage statistics of your instance to your users.
-
-### Why should we enable Usage Ping?
-
-- The main purpose of Usage Ping is to build a better GitLab. Data about how GitLab is used is collected to better understand feature/stage adoption and usage, which helps us understand how GitLab is adding value and helps our team better understand the reasons why people use GitLab and with this knowledge we're able to make better product decisions.
-- As a benefit of having the usage ping active, GitLab lets you analyze the users’ activities over time of your GitLab installation.
-- As a benefit of having the usage ping active, GitLab provides you with The DevOps Report,which gives you an overview of your entire instance’s adoption of Concurrent DevOps from planning to monitoring.
-- You get better, more proactive support. (assuming that our TAMs and support organization used the data to deliver more value)
-- You get insight and advice into how to get the most value out of your investment in GitLab. Wouldn't you want to know that a number of features or values are not being adopted in your organization?
-- You get a report that illustrates how you compare against other similar organizations (anonymized), with specific advice and recommendations on how to improve your DevOps processes.
-- Usage Ping is enabled by default. To disable it, see [Disable Usage Ping](#disable-usage-ping).
-
-### Limitations
-
-- Usage Ping does not track frontend events things like page views, link clicks, or user sessions, and only focuses on aggregated backend events.
-- Because of these limitations we recommend instrumenting your products with Snowplow for more detailed analytics on GitLab.com and use Usage Ping to track aggregated backend events on self-managed.
-
-## Usage Ping payload
-
-You can view the exact JSON payload sent to GitLab Inc. in the administration panel. To view the payload:
-
-1. Navigate to **Admin Area > Settings > Metrics and profiling**.
-1. Expand the **Usage statistics** section.
-1. Click the **Preview payload** button.
-
-For an example payload, see [Example Usage Ping payload](#example-usage-ping-payload).
-
-## Disable Usage Ping
-
-To disable Usage Ping in the GitLab UI, go to the **Settings** page of your administration panel and uncheck the **Usage Ping** checkbox.
-
-To disable Usage Ping and prevent it from being configured in the future through the administration panel, Omnibus installs can set the following in [`gitlab.rb`](https://docs.gitlab.com/omnibus/settings/configuration.html#configuration-options):
-
-```ruby
-gitlab_rails['usage_ping_enabled'] = false
-```
-
-Source installations can set the following in `gitlab.yml`:
-
-```yaml
-production: &base
- # ...
- gitlab:
- # ...
- usage_ping_enabled: false
-```
-
-## Usage Ping request flow
-
-The following example shows a basic request/response flow between a GitLab instance, the Versions Application, the License Application, Salesforce, the GitLab S3 Bucket, the GitLab Snowflake Data Warehouse, and Sisense:
-
-```mermaid
-sequenceDiagram
- participant GitLab Instance
- participant Versions Application
- participant Licenses Application
- participant Salesforce
- participant S3 Bucket
- participant Snowflake DW
- participant Sisense Dashboards
- GitLab Instance->>Versions Application: Send Usage Ping
- loop Process usage data
- Versions Application->>Versions Application: Parse usage data
- Versions Application->>Versions Application: Write to database
- Versions Application->>Versions Application: Update license ping time
- end
- loop Process data for Salesforce
- Versions Application-xLicenses Application: Request Zuora subscription id
- Licenses Application-xVersions Application: Zuora subscription id
- Versions Application-xSalesforce: Request Zuora account id by Zuora subscription id
- Salesforce-xVersions Application: Zuora account id
- Versions Application-xSalesforce: Usage data for the Zuora account
- end
- Versions Application->>S3 Bucket: Export Versions database
- S3 Bucket->>Snowflake DW: Import data
- Snowflake DW->>Snowflake DW: Transform data using dbt
- Snowflake DW->>Sisense Dashboards: Data available for querying
- Versions Application->>GitLab Instance: DevOps Report (Conversational Development Index)
-```
-
-## How Usage Ping works
-
-1. The Usage Ping [cron job](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/gitlab_usage_ping_worker.rb#L30) is set in Sidekiq to run weekly.
-1. When the cron job runs, it calls [`Gitlab::UsageData.to_json`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/submit_usage_ping_service.rb#L22).
-1. `Gitlab::UsageData.to_json` [cascades down](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data.rb#L22) to ~400+ other counter method calls.
-1. The response of all methods calls are [merged together](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data.rb#L14) into a single JSON payload in `Gitlab::UsageData.to_json`.
-1. The JSON payload is then [posted to the Versions application]( https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/submit_usage_ping_service.rb#L20)
- If a firewall exception is needed, the required URL depends on several things. If
- the hostname is `version.gitlab.com`, the protocol is `TCP`, and the port number is `443`,
- the required URL is .
-
-## Usage Ping Metric Life cycle
-
-### 1. New metrics addition
-
-Please follow the [Implementing Usage Ping](#implementing-usage-ping) guide.
-
-### 2. Existing metric change
-
-Because we do not control when customers update their self-managed instances of GitLab,
-we **STRONGLY DISCOURAGE** changes to the logic used to calculate any metric.
-Any such changes lead to inconsistent reports from multiple GitLab instances.
-If there is a problem with an existing metric, it's best to deprecate the existing metric,
-and use it, side by side, with the desired new metric.
-
-Example:
-Consider following change. Before GitLab 12.6, the `example_metric` was implemented as:
-
-```ruby
-{
- ...
- example_metric: distinct_count(Project, :creator_id)
-}
-```
-
-For GitLab 12.6, the metric was changed to filter out archived projects:
-
-```ruby
-{
- ...
- example_metric: distinct_count(Project.non_archived, :creator_id)
-}
-```
-
-In this scenario all instances running up to GitLab 12.5 continue to report `example_metric`,
-including all archived projects, while all instances running GitLab 12.6 and higher filters
-out such projects. As Usage Ping data is collected from all reporting instances, the
-resulting dataset includes mixed data, which distorts any following business analysis.
-
-The correct approach is to add a new metric for GitLab 12.6 release with updated logic:
-
-```ruby
-{
- ...
- example_metric_without_archived: distinct_count(Project.non_archived, :creator_id)
-}
-```
-
-and update existing business analysis artefacts to use `example_metric_without_archived` instead of `example_metric`
-
-### 3. Metrics deprecation and removal
-
-The process for deprecating and removing metrics is under development. For
-more information, see the following [issue](https://gitlab.com/gitlab-org/gitlab/-/issues/284637).
-
-## Implementing Usage Ping
-
-Usage Ping consists of two kinds of data, counters and observations. Counters track how often a certain event
-happened over time, such as how many CI pipelines have run. They are monotonic and always trend up.
-Observations are facts collected from one or more GitLab instances and can carry arbitrary data. There are no
-general guidelines around how to collect those, due to the individual nature of that data.
-
-There are several types of counters which are all found in `usage_data.rb`:
-
-- **Ordinary Batch Counters:** Simple count of a given ActiveRecord_Relation
-- **Distinct Batch Counters:** Distinct count of a given ActiveRecord_Relation in a given column
-- **Sum Batch Counters:** Sum the values of a given ActiveRecord_Relation in a given column
-- **Alternative Counters:** Used for settings and configurations
-- **Redis Counters:** Used for in-memory counts.
-
-NOTE:
-Only use the provided counter methods. Each counter method contains a built in fail safe to isolate each counter to avoid breaking the entire Usage Ping.
-
-### Why batch counting
-
-For large tables, PostgreSQL can take a long time to count rows due to MVCC [(Multi-version Concurrency Control)](https://en.wikipedia.org/wiki/Multiversion_concurrency_control). Batch counting is a counting method where a single large query is broken into multiple smaller queries. For example, instead of a single query querying 1,000,000 records, with batch counting, you can execute 100 queries of 10,000 records each. Batch counting is useful for avoiding database timeouts as each batch query is significantly shorter than one single long running query.
-
-For GitLab.com, there are extremely large tables with 15 second query timeouts, so we use batch counting to avoid encountering timeouts. Here are the sizes of some GitLab.com tables:
-
-| Table | Row counts in millions |
-|------------------------------|------------------------|
-| `merge_request_diff_commits` | 2280 |
-| `ci_build_trace_sections` | 1764 |
-| `merge_request_diff_files` | 1082 |
-| `events` | 514 |
-
-We have several batch counting methods available:
-
-- [Ordinary Batch Counters](#ordinary-batch-counters)
-- [Distinct Batch Counters](#distinct-batch-counters)
-- [Sum Batch Counters](#sum-batch-counters)
-- [Estimated Batch Counters](#estimated-batch-counters)
-
-Batch counting requires indexes on columns to calculate max, min, and range queries. In some cases,
-you may need to add a specialized index on the columns involved in a counter.
-
-### Ordinary Batch Counters
-
-Handles `ActiveRecord::StatementInvalid` error
-
-Simple count of a given `ActiveRecord_Relation`, does a non-distinct batch count, smartly reduces `batch_size`, and handles errors.
-
-Method: `count(relation, column = nil, batch: true, start: nil, finish: nil)`
-
-Arguments:
-
-- `relation` the ActiveRecord_Relation to perform the count
-- `column` the column to perform the count on, by default is the primary key
-- `batch`: default `true` to use batch counting
-- `start`: custom start of the batch counting to avoid complex min calculations
-- `end`: custom end of the batch counting to avoid complex min calculations
-
-Examples:
-
-```ruby
-count(User.active)
-count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
-count(::Clusters::Cluster.aws_installed.enabled, :cluster_id, start: ::Clusters::Cluster.minimum(:id), finish: ::Clusters::Cluster.maximum(:id))
-```
-
-### Distinct Batch Counters
-
-Handles `ActiveRecord::StatementInvalid` error
-
-Distinct count of a given `ActiveRecord_Relation` on given column, a distinct batch count, smartly reduces `batch_size`, and handles errors.
-
-Method: `distinct_count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)`
-
-Arguments:
-
-- `relation` the ActiveRecord_Relation to perform the count
-- `column` the column to perform the distinct count, by default is the primary key
-- `batch`: default `true` to use batch counting
-- `batch_size`: if none set it uses default value 10000 from `Gitlab::Database::BatchCounter`
-- `start`: custom start of the batch counting to avoid complex min calculations
-- `end`: custom end of the batch counting to avoid complex min calculations
-
-WARNING:
-Counting over non-unique columns can lead to performance issues. Take a look at the [iterating tables in batches](iterating_tables_in_batches.md) guide for more details.
-
-Examples:
-
-```ruby
-distinct_count(::Project, :creator_id)
-distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::User.minimum(:id), finish: ::User.maximum(:id))
-distinct_count(::Clusters::Applications::CertManager.where(time_period).available.joins(:cluster), 'clusters.user_id')
-```
-
-### Sum Batch Counters
-
-Handles `ActiveRecord::StatementInvalid` error
-
-Sum the values of a given ActiveRecord_Relation on given column and handles errors.
-
-Method: `sum(relation, column, batch_size: nil, start: nil, finish: nil)`
-
-Arguments:
-
-- `relation` the ActiveRecord_Relation to perform the operation
-- `column` the column to sum on
-- `batch_size`: if none set it uses default value 1000 from `Gitlab::Database::BatchCounter`
-- `start`: custom start of the batch counting to avoid complex min calculations
-- `end`: custom end of the batch counting to avoid complex min calculations
-
-Examples:
-
-```ruby
-sum(JiraImportState.finished, :imported_issues_count)
-```
-
-### Grouping & Batch Operations
-
-The `count`, `distinct_count`, and `sum` batch counters can accept an `ActiveRecord::Relation`
-object, which groups by a specified column. With a grouped relation, the methods do batch counting,
-handle errors, and returns a hash table of key-value pairs.
-
-Examples:
-
-```ruby
-count(Namespace.group(:type))
-# returns => {nil=>179, "Group"=>54}
-
-distinct_count(Project.group(:visibility_level), :creator_id)
-# returns => {0=>1, 10=>1, 20=>11}
-
-sum(Issue.group(:state_id), :weight))
-# returns => {1=>3542, 2=>6820}
-```
-
-### Estimated Batch Counters
-
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/48233) in GitLab 13.7.
-
-Estimated batch counter functionality handles `ActiveRecord::StatementInvalid` errors
-when used through the provided `estimate_batch_distinct_count` method.
-Errors return a value of `-1`.
-
-WARNING:
-This functionality estimates a distinct count of a specific ActiveRecord_Relation in a given column,
-which uses the [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) algorithm.
-As the HyperLogLog algorithm is probabilistic, the **results always include error**.
-The highest encountered error rate is 4.9%.
-
-When correctly used, the `estimate_batch_distinct_count` method enables efficient counting over
-columns that contain non-unique values, which can not be assured by other counters.
-
-#### estimate_batch_distinct_count method
-
-Method: `estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)`
-
-The [method](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/utils/usage_data.rb#L63)
-includes the following arguments:
-
-- `relation`: The ActiveRecord_Relation to perform the count.
-- `column`: The column to perform the distinct count. The default is the primary key.
-- `batch_size`: From `Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE`. Default value: 10,000.
-- `start`: The custom start of the batch count, to avoid complex minimum calculations.
-- `finish`: The custom end of the batch count to avoid complex maximum calculations.
-
-The method includes the following prerequisites:
-
-1. The supplied `relation` must include the primary key defined as the numeric column.
- For example: `id bigint NOT NULL`.
-1. The `estimate_batch_distinct_count` can handle a joined relation. To use its ability to
- count non-unique columns, the joined relation **must NOT** have a one-to-many relationship,
- such as `has_many :boards`.
-1. Both `start` and `finish` arguments should always represent primary key relationship values,
- even if the estimated count refers to another column, for example:
-
- ```ruby
- estimate_batch_distinct_count(::Note, :author_id, start: ::Note.minimum(:id), finish: ::Note.maximum(:id))
- ```
-
-Examples:
-
-1. Simple execution of estimated batch counter, with only relation provided,
- returned value represents estimated number of unique values in `id` column
- (which is the primary key) of `Project` relation:
-
- ```ruby
- estimate_batch_distinct_count(::Project)
- ```
-
-1. Execution of estimated batch counter, where provided relation has applied
- additional filter (`.where(time_period)`), number of unique values estimated
- in custom column (`:author_id`), and parameters: `start` and `finish` together
- apply boundaries that defines range of provided relation to analyze:
-
- ```ruby
- estimate_batch_distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::Note.minimum(:id), finish: ::Note.maximum(:id))
- ```
-
-1. Execution of estimated batch counter with joined relation (`joins(:cluster)`),
- for a custom column (`'clusters.user_id'`):
-
- ```ruby
- estimate_batch_distinct_count(::Clusters::Applications::CertManager.where(time_period).available.joins(:cluster), 'clusters.user_id')
- ```
-
-When instrumenting metric with usage of estimated batch counter please add
-`_estimated` suffix to its name, for example:
-
-```ruby
- "counts": {
- "ci_builds_estimated": estimate_batch_distinct_count(Ci::Build),
- ...
-```
-
-### Redis Counters
-
-Handles `::Redis::CommandError` and `Gitlab::UsageDataCounters::BaseCounter::UnknownEvent`
-returns -1 when a block is sent or hash with all values -1 when a `counter(Gitlab::UsageDataCounters)` is sent
-different behavior due to 2 different implementations of Redis counter
-
-Method: `redis_usage_data(counter, &block)`
-
-Arguments:
-
-- `counter`: a counter from `Gitlab::UsageDataCounters`, that has `fallback_totals` method implemented
-- or a `block`: which is evaluated
-
-#### Ordinary Redis Counters
-
-Examples of implementation:
-
-- Using Redis methods [`INCR`](https://redis.io/commands/incr), [`GET`](https://redis.io/commands/get), and [`Gitlab::UsageDataCounters::WikiPageCounter`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/wiki_page_counter.rb)
-- Using Redis methods [`HINCRBY`](https://redis.io/commands/hincrby), [`HGETALL`](https://redis.io/commands/hgetall), and [`Gitlab::UsageCounters::PodLogs`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_counters/pod_logs.rb)
-
-##### UsageData API Tracking
-
-
-
-1. Track event using `UsageData` API
-
- Increment event count using ordinary Redis counter, for given event name.
-
- Tracking events using the `UsageData` API requires the `usage_data_api` feature flag to be enabled, which is enabled by default.
-
- API requests are protected by checking for a valid CSRF token.
-
- To be able to increment the values, the related feature `usage_data_` should be enabled.
-
- ```plaintext
- POST /usage_data/increment_counter
- ```
-
- | Attribute | Type | Required | Description |
- | :-------- | :--- | :------- | :---------- |
- | `event` | string | yes | The event name it should be tracked |
-
- Response
-
- - `200` if event was tracked
- - `400 Bad request` if event parameter is missing
- - `401 Unauthorized` if user is not authenticated
- - `403 Forbidden` for invalid CSRF token provided
-
-1. Track events using JavaScript/Vue API helper which calls the API above
-
- Note that `usage_data_api` and `usage_data_#{event_name}` should be enabled to be able to track events
-
- ```javascript
- import api from '~/api';
-
- api.trackRedisCounterEvent('my_already_defined_event_name'),
- ```
-
-#### Redis HLL Counters
-
-WARNING:
-HyperLogLog (HLL) is a probabilistic algorithm and its **results always includes some small error**. According to [Redis documentation](https://redis.io/commands/pfcount), data from
-used HLL implementation is "approximated with a standard error of 0.81%".
-
-With `Gitlab::UsageDataCounters::HLLRedisCounter` we have available data structures used to count unique values.
-
-Implemented using Redis methods [PFADD](https://redis.io/commands/pfadd) and [PFCOUNT](https://redis.io/commands/pfcount).
-
-##### Adding new events
-
-1. Define events in [`known_events`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/).
-
- Example event:
-
- ```yaml
- - name: i_compliance_credential_inventory
- category: compliance
- redis_slot: compliance
- expiry: 42 # 6 weeks
- aggregation: weekly
- ```
-
- Keys:
-
- - `name`: unique event name.
-
- Name format `__name`.
-
- Use one of the following prefixes for the event's name:
-
- - `g_` for group, as an event which is tracked for group.
- - `p_` for project, as an event which is tracked for project.
- - `i_` for instance, as an event which is tracked for instance.
- - `a_` for events encompassing all `g_`, `p_`, `i_`.
- - `o_` for other.
-
- Consider including in the event's name the Redis slot to be able to count totals for a specific category.
-
- Example names: `i_compliance_credential_inventory`, `g_analytics_contribution`.
-
- - `category`: event category. Used for getting total counts for events in a category, for easier
- access to a group of events.
- - `redis_slot`: optional Redis slot; default value: event name. Used if needed to calculate totals
- for a group of metrics. Ensure keys are in the same slot. For example:
- `i_compliance_credential_inventory` with `redis_slot: 'compliance'` builds Redis key
- `i_{compliance}_credential_inventory-2020-34`. If `redis_slot` is not defined the Redis key will
- be `{i_compliance_credential_inventory}-2020-34`.
- - `expiry`: expiry time in days. Default: 29 days for daily aggregation and 6 weeks for weekly
- aggregation.
- - `aggregation`: may be set to a `:daily` or `:weekly` key. Defines how counting data is stored in Redis.
- Aggregation on a `daily` basis does not pull more fine grained data.
- - `feature_flag`: optional `default_enabled: :yaml`. If no feature flag is set then the tracking is enabled. For details, see our [GitLab internal Feature flags](feature_flags/) documentation. The feature flags are owned by the group adding the event tracking.
-
-Use one of the following methods to track events:
-
-1. Track event in controller using `RedisTracking` module with `track_redis_hll_event(*controller_actions, name:, if: nil)`.
-
- Arguments:
-
- - `controller_actions`: controller actions we want to track.
- - `name`: event name.
- - `if`: optional custom conditions, using the same format as with Rails callbacks.
-
- Example usage:
-
- ```ruby
- # controller
- class ProjectsController < Projects::ApplicationController
- include RedisTracking
-
- skip_before_action :authenticate_user!, only: :show
- track_redis_hll_event :index, :show, name: 'g_compliance_example_feature_visitors'
-
- def index
- render html: 'index'
- end
-
- def new
- render html: 'new'
- end
-
- def show
- render html: 'show'
- end
- end
- ```
-
-1. Track event in API using `increment_unique_values(event_name, values)` helper method.
-
- To be able to track the event, Usage Ping must be enabled and the event feature `usage_data_` must be enabled.
-
- Arguments:
-
- - `event_name`: event name.
- - `values`: values counted, one value or array of values.
-
- Example usage:
-
- ```ruby
- get ':id/registry/repositories' do
- repositories = ContainerRepositoriesFinder.new(
- user: current_user, subject: user_group
- ).execute
-
- increment_unique_values('i_list_repositories', current_user.id)
-
- present paginate(repositories), with: Entities::ContainerRegistry::Repository, tags: params[:tags], tags_count: params[:tags_count]
- end
- ```
-
-1. Track event using `track_usage_event(event_name, values) in services and GraphQL
-
- Increment unique values count using Redis HLL, for given event name.
-
- Example:
-
- [Track usage event for incident created in service](https://gitlab.com/gitlab-org/gitlab/-/blob/v13.8.3-ee/app/services/issues/update_service.rb#L66)
-
- [Track usage event for incident created in GraphQL](https://gitlab.com/gitlab-org/gitlab/-/blob/v13.8.3-ee/app/graphql/mutations/alert_management/update_alert_status.rb#L16)
-
- ```ruby
- track_usage_event(:incident_management_incident_created, current_user.id)
- ```
-
-
-
-1. Track event using `UsageData` API
-
- Increment unique users count using Redis HLL, for given event name.
-
- Tracking events using the `UsageData` API requires the `usage_data_api` feature flag to be enabled, which is enabled by default.
-
- API requests are protected by checking for a valid CSRF token.
-
- To increment the values, the related feature `usage_data_` should be
- set to `default_enabled: true`. For more information, see
- [Feature flags in development of GitLab](feature_flags/index.md).
-
- ```plaintext
- POST /usage_data/increment_unique_users
- ```
-
- | Attribute | Type | Required | Description |
- | :-------- | :--- | :------- | :---------- |
- | `event` | string | yes | The event name it should be tracked |
-
- Response
-
- Return 200 if tracking failed for any reason.
-
- - `200` if event was tracked or any errors
- - `400 Bad request` if event parameter is missing
- - `401 Unauthorized` if user is not authenticated
- - `403 Forbidden` for invalid CSRF token provided
-
-1. Track events using JavaScript/Vue API helper which calls the API above
-
- Example usage for an existing event already defined in [known events](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/):
-
- Usage Data API is behind `usage_data_api` feature flag which, as of GitLab 13.7, is
- now set to `default_enabled: true`.
-
- Each event tracked using Usage Data API is behind a feature flag `usage_data_#{event_name}` which should be `default_enabled: true`
-
- ```javascript
- import api from '~/api';
-
- api.trackRedisHllUserEvent('my_already_defined_event_name'),
- ```
-
-1. Get event data using `Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names:, start_date:, end_date:, context: '')`.
-
- Arguments:
-
- - `event_names`: the list of event names.
- - `start_date`: start date of the period for which we want to get event data.
- - `end_date`: end date of the period for which we want to get event data.
- - `context`: context of the event. Allowed values are `default`, `free`, `bronze`, `silver`, `gold`, `starter`, `premium`, `ultimate`.
-
-1. Testing tracking and getting unique events
-
-Trigger events in rails console by using `track_event` method
-
- ```ruby
- Gitlab::UsageDataCounters::HLLRedisCounter.track_event('g_compliance_audit_events', values: 1)
- Gitlab::UsageDataCounters::HLLRedisCounter.track_event('g_compliance_audit_events', values: [2, 3])
- ```
-
-Next, get the unique events for the current week.
-
- ```ruby
- # Get unique events for metric for current_week
- Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_audit_events',
- start_date: Date.current.beginning_of_week, end_date: Date.current.next_week)
- ```
-
-##### Recommendations
-
-We have the following recommendations for [Adding new events](#adding-new-events):
-
-- Event aggregation: weekly.
-- Key expiry time:
- - Daily: 29 days.
- - Weekly: 42 days.
-- When adding new metrics, use a [feature flag](../operations/feature_flags.md) to control the impact.
-- For feature flags triggered by another service, set `default_enabled: false`,
- - Events can be triggered using the `UsageData` API, which helps when there are > 10 events per change
-
-##### Enable/Disable Redis HLL tracking
-
-Events are tracked behind [feature flags](feature_flags/index.md) due to concerns for Redis performance and scalability.
-
-For a full list of events and corresponding feature flags see, [known_events](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/) files.
-
-To enable or disable tracking for specific event in or , run commands such as the following to
-[enable or disable the corresponding feature](feature_flags/index.md).
-
-```shell
-/chatops run feature set true
-/chatops run feature set false
-```
-
-##### Known events are added automatically in usage data payload
-
-All events added in [`known_events/common.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/common.yml) are automatically added to usage data generation under the `redis_hll_counters` key. This column is stored in [version-app as a JSON](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/db/schema.rb#L209).
-For each event we add metrics for the weekly and monthly time frames, and totals for each where applicable:
-
-- `#{event_name}_weekly`: Data for 7 days for daily [aggregation](#adding-new-events) events and data for the last complete week for weekly [aggregation](#adding-new-events) events.
-- `#{event_name}_monthly`: Data for 28 days for daily [aggregation](#adding-new-events) events and data for the last 4 complete weeks for weekly [aggregation](#adding-new-events) events.
-
-Redis HLL implementation calculates automatic total metrics, if there are more than one metric for the same category, aggregation, and Redis slot.
-
-- `#{category}_total_unique_counts_weekly`: Total unique counts for events in the same category for the last 7 days or the last complete week, if events are in the same Redis slot and we have more than one metric.
-- `#{category}_total_unique_counts_monthly`: Total unique counts for events in same category for the last 28 days or the last 4 complete weeks, if events are in the same Redis slot and we have more than one metric.
-
-Example of `redis_hll_counters` data:
-
-```ruby
-{:redis_hll_counters=>
- {"compliance"=>
- {"g_compliance_dashboard_weekly"=>0,
- "g_compliance_dashboard_monthly"=>0,
- "g_compliance_audit_events_weekly"=>0,
- "g_compliance_audit_events_monthly"=>0,
- "compliance_total_unique_counts_weekly"=>0,
- "compliance_total_unique_counts_monthly"=>0},
- "analytics"=>
- {"g_analytics_contribution_weekly"=>0,
- "g_analytics_contribution_monthly"=>0,
- "g_analytics_insights_weekly"=>0,
- "g_analytics_insights_monthly"=>0,
- "analytics_total_unique_counts_weekly"=>0,
- "analytics_total_unique_counts_monthly"=>0},
- "ide_edit"=>
- {"g_edit_by_web_ide_weekly"=>0,
- "g_edit_by_web_ide_monthly"=>0,
- "g_edit_by_sfe_weekly"=>0,
- "g_edit_by_sfe_monthly"=>0,
- "ide_edit_total_unique_counts_weekly"=>0,
- "ide_edit_total_unique_counts_monthly"=>0},
- "search"=>
- {"i_search_total_weekly"=>0, "i_search_total_monthly"=>0, "i_search_advanced_weekly"=>0, "i_search_advanced_monthly"=>0, "i_search_paid_weekly"=>0, "i_search_paid_monthly"=>0, "search_total_unique_counts_weekly"=>0, "search_total_unique_counts_monthly"=>0},
- "source_code"=>{"wiki_action_weekly"=>0, "wiki_action_monthly"=>0}
- }
-```
-
-Example usage:
-
-```ruby
-# Redis Counters
-redis_usage_data(Gitlab::UsageDataCounters::WikiPageCounter)
-redis_usage_data { ::Gitlab::UsageCounters::PodLogs.usage_totals[:total] }
-
-# Define events in common.yml https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/common.yml
-
-# Tracking events
-Gitlab::UsageDataCounters::HLLRedisCounter.track_event('expand_vulnerabilities', values: visitor_id)
-
-# Get unique events for metric
-redis_usage_data { Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'expand_vulnerabilities', start_date: 28.days.ago, end_date: Date.current) }
-```
-
-### Alternative Counters
-
-Handles `StandardError` and fallbacks into -1 this way not all measures fail if we encounter one exception.
-Mainly used for settings and configurations.
-
-Method: `alt_usage_data(value = nil, fallback: -1, &block)`
-
-Arguments:
-
-- `value`: a simple static value in which case the value is simply returned.
-- or a `block`: which is evaluated
-- `fallback: -1`: the common value used for any metrics that are failing.
-
-Usage:
-
-```ruby
-alt_usage_data { Gitlab::VERSION }
-alt_usage_data { Gitlab::CurrentSettings.uuid }
-alt_usage_data(999)
-```
-
-### Adding counters to build new metrics
-
-When adding the results of two counters, use the `add` usage data method that
-handles fallback values and exceptions. It also generates a valid [SQL export](#exporting-usage-ping-sql-queries-and-definitions).
-
-Example usage:
-
-```ruby
-add(User.active, User.bot)
-```
-
-### Prometheus Queries
-
-In those cases where operational metrics should be part of Usage Ping, a database or Redis query is unlikely
-to provide useful data. Instead, Prometheus might be more appropriate, because most GitLab architectural
-components publish metrics to it that can be queried back, aggregated, and included as usage data.
-
-NOTE:
-Prometheus as a data source for Usage Ping is currently only available for single-node Omnibus installations
-that are running the [bundled Prometheus](../administration/monitoring/prometheus/index.md) instance.
-
-To query Prometheus for metrics, a helper method is available to `yield` a fully configured
-`PrometheusClient`, given it is available as per the note above:
-
-```ruby
-with_prometheus_client do |client|
- response = client.query('')
- ...
-end
-```
-
-Please refer to [the `PrometheusClient` definition](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/prometheus_client.rb)
-for how to use its API to query for data.
-
-## Developing and testing Usage Ping
-
-### 1. Naming and placing the metrics
-
-Add the metric in one of the top level keys
-
-- `license`: for license related metrics.
-- `settings`: for settings related metrics.
-- `counts_weekly`: for counters that have data for the most recent 7 days.
-- `counts_monthly`: for counters that have data for the most recent 28 days.
-- `counts`: for counters that have data for all time.
-
-### 2. Use your Rails console to manually test counters
-
-```ruby
-# count
-Gitlab::UsageData.count(User.active)
-Gitlab::UsageData.count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
-
-# count distinct
-Gitlab::UsageData.distinct_count(::Project, :creator_id)
-Gitlab::UsageData.distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::User.minimum(:id), finish: ::User.maximum(:id))
-```
-
-### 3. Generate the SQL query
-
-Your Rails console returns the generated SQL queries.
-
-Example:
-
-```ruby
-pry(main)> Gitlab::UsageData.count(User.active)
- (2.6ms) SELECT "features"."key" FROM "features"
- (15.3ms) SELECT MIN("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4))
- (2.4ms) SELECT MAX("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4))
- (1.9ms) SELECT COUNT("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4)) AND "users"."id" BETWEEN 1 AND 100000
-```
-
-### 4. Optimize queries with #database-lab
-
-Paste the SQL query into `#database-lab` to see how the query performs at scale.
-
-- `#database-lab` is a Slack channel which uses a production-sized environment to test your queries.
-- GitLab.com’s production database has a 15 second timeout.
-- Any single query must stay below [1 second execution time](query_performance.md#timing-guidelines-for-queries) with cold caches.
-- Add a specialized index on columns involved to reduce the execution time.
-
-To have an understanding of the query's execution we add in the MR description the following information:
-
-- For counters that have a `time_period` test we add information for both cases:
- - `time_period = {}` for all time periods
- - `time_period = { created_at: 28.days.ago..Time.current }` for last 28 days period
-- Execution plan and query time before and after optimization
-- Query generated for the index and time
-- Migration output for up and down execution
-
-We also use `#database-lab` and [explain.depesz.com](https://explain.depesz.com/). For more details, see the [database review guide](database_review.md#preparation-when-adding-or-modifying-queries).
-
-#### Optimization recommendations and examples
-
-- Use specialized indexes [example 1](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26871), [example 2](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26445).
-- Use defined `start` and `finish`, and simple queries. These values can be memoized and reused, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/37155).
-- Avoid joins and write the queries as simply as possible, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/36316).
-- Set a custom `batch_size` for `distinct_count`, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/38000).
-
-### 5. Add the metric definition
-
-[Check Metrics Dictionary Guide](usage_ping/metrics_dictionary.md)
-
-When adding, updating, or removing metrics, please update the [Metrics Dictionary](usage_ping/dictionary.md).
-
-### 6. Add new metric to Versions Application
-
-Check if new metrics need to be added to the Versions Application. See `usage_data` [schema](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/db/schema.rb#L147) and usage data [parameters accepted](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/app/services/usage_ping.rb). Any metrics added under the `counts` key are saved in the `stats` column.
-
-### 7. Add the feature label
-
-Add the `feature` label to the Merge Request for new Usage Ping metrics. These are user-facing changes and are part of expanding the Usage Ping feature.
-
-### 8. Add a changelog file
-
-Ensure you comply with the [Changelog entries guide](changelog.md).
-
-### 9. Ask for a Product Intelligence Review
-
-On GitLab.com, we have DangerBot setup to monitor Product Intelligence related files and DangerBot recommends a [Product Intelligence review](usage_ping/product_intelligence_review.md). Mention `@gitlab-org/growth/product_intelligence/engineers` in your MR for a review.
-
-### 10. Verify your metric
-
-On GitLab.com, the Product Intelligence team regularly monitors Usage Ping. They may alert you that your metrics need further optimization to run quicker and with greater success. You may also use the [Usage Ping QA dashboard](https://app.periscopedata.com/app/gitlab/632033/Usage-Ping-QA) to check how well your metric performs. The dashboard allows filtering by GitLab version, by "Self-managed" & "SaaS" and shows you how many failures have occurred for each metric. Whenever you notice a high failure rate, you may re-optimize your metric.
-
-### Optional: Test Prometheus based Usage Ping
-
-If the data submitted includes metrics [queried from Prometheus](#prometheus-queries) that you would like to inspect and verify,
-then you need to ensure that a Prometheus server is running locally, and that furthermore the respective GitLab components
-are exporting metrics to it. If you do not need to test data coming from Prometheus, no further action
-is necessary. Usage Ping should degrade gracefully in the absence of a running Prometheus server.
-
-There are three kinds of components that may export data to Prometheus, and which are included in Usage Ping:
-
-- [`node_exporter`](https://github.com/prometheus/node_exporter) - Exports node metrics from the host machine
-- [`gitlab-exporter`](https://gitlab.com/gitlab-org/gitlab-exporter) - Exports process metrics from various GitLab components
-- various GitLab services such as Sidekiq and the Rails server that export their own metrics
-
-#### Test with an Omnibus container
-
-This is the recommended approach to test Prometheus based Usage Ping.
-
-The easiest way to verify your changes is to build a new Omnibus image from your code branch by using CI, then download the image
-and run a local container instance:
-
-1. From your merge request, click on the `qa` stage, then trigger the `package-and-qa` job. This job triggers an Omnibus
-build in a [downstream pipeline of the `omnibus-gitlab-mirror` project](https://gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/-/pipelines).
-1. In the downstream pipeline, wait for the `gitlab-docker` job to finish.
-1. Open the job logs and locate the full container name including the version. It takes the following form: `registry.gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/gitlab-ee:`.
-1. On your local machine, make sure you are signed in to the GitLab Docker registry. You can find the instructions for this in
-[Authenticate to the GitLab Container Registry](../user/packages/container_registry/index.md#authenticate-with-the-container-registry).
-1. Once signed in, download the new image by using `docker pull registry.gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/gitlab-ee:`
-1. For more information about working with and running Omnibus GitLab containers in Docker, please refer to [GitLab Docker images](https://docs.gitlab.com/omnibus/docker/README.html) in the Omnibus documentation.
-
-#### Test with GitLab development toolkits
-
-This is the less recommended approach, because it comes with a number of difficulties when emulating a real GitLab deployment.
-
-The [GDK](https://gitlab.com/gitlab-org/gitlab-development-kit) is not set up to run a Prometheus server or `node_exporter` alongside other GitLab components. If you would
-like to do so, [Monitoring the GDK with Prometheus](https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/master/doc/howto/prometheus/index.md#monitoring-the-gdk-with-prometheus) is a good start.
-
-The [GCK](https://gitlab.com/gitlab-org/gitlab-compose-kit) has limited support for testing Prometheus based Usage Ping.
-By default, it already comes with a fully configured Prometheus service that is set up to scrape a number of components,
-but with the following limitations:
-
-- It does not run a `gitlab-exporter` instance, so several `process_*` metrics from services such as Gitaly may be missing.
-- While it runs a `node_exporter`, `docker-compose` services emulate hosts, meaning that it would normally report itself to not be associated
-with any of the other services that are running. That is not how node metrics are reported in a production setup, where `node_exporter`
-always runs as a process alongside other GitLab components on any given node. From Usage Ping's perspective none of the node data would therefore
-appear to be associated to any of the services running, because they all appear to be running on different hosts. To alleviate this problem, the `node_exporter` in GCK was arbitrarily "assigned" to the `web` service, meaning only for this service `node_*` metrics appears in Usage Ping.
-
-## Aggregated metrics
-
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45979) in GitLab 13.6.
-
-WARNING:
-This feature is intended solely for internal GitLab use.
-
-To add data for aggregated metrics into Usage Ping payload you should add corresponding definition in [`aggregated_metrics`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/aggregated_metrics/). Each aggregate definition includes following parts:
-
-- `name`: Unique name under which the aggregate metric is added to the Usage Ping payload.
-- `operator`: Operator that defines how the aggregated metric data is counted. Available operators are:
- - `OR`: Removes duplicates and counts all entries that triggered any of listed events.
- - `AND`: Removes duplicates and counts all elements that were observed triggering all of following events.
-- `source`: Data source used to collect all events data included in aggregated metric. Valid data sources are:
- - [`database`](#database-sourced-aggregated-metrics)
- - [`redis`](#redis-sourced-aggregated-metrics)
-- `events`: list of events names to aggregate into metric. All events in this list must
- relay on the same data source. Additional data source requirements are described in the
- [Database sourced aggregated metrics](#database-sourced-aggregated-metrics) and
- [Redis sourced aggregated metrics](#redis-sourced-aggregated-metrics) sections.
-- `feature_flag`: Name of [development feature flag](feature_flags/development.md#development-type)
- that is checked before metrics aggregation is performed. Corresponding feature flag
- should have `default_enabled` attribute set to `false`. The `feature_flag` attribute
- is optional and can be omitted. When `feature_flag` is missing, no feature flag is checked.
-
-Example aggregated metric entries:
-
-```yaml
-- name: product_analytics_test_metrics_union_redis_sourced
- operator: OR
- events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
- source: redis
-- name: product_analytics_test_metrics_intersection_with_feautre_flag_database_sourced
- operator: AND
- source: database
- events: ['dependency_scanning_pipeline_all_time', 'container_scanning_pipeline_all_time']
- feature_flag: example_aggregated_metric
-```
-
-Aggregated metrics are added under `aggregated_metrics` key in both `counts_weekly` and `counts_monthly` top level keys in Usage Ping payload.
-
-```ruby
-{
- :counts_monthly => {
- :deployments => 1003,
- :successful_deployments => 78,
- :failed_deployments => 275,
- :packages => 155,
- :personal_snippets => 2106,
- :project_snippets => 407,
- :promoted_issues => 719,
- :aggregated_metrics => {
- :product_analytics_test_metrics_union => 7,
- :product_analytics_test_metrics_intersection_with_feautre_flag => 2
- },
- :snippets => 2513
- }
-}
-```
-
-### Redis sourced aggregated metrics
-
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45979) in GitLab 13.6.
-
-To declare the aggregate of events collected with [Redis HLL Counters](#redis-hll-counters),
-you must fulfill the following requirements:
-
-1. All events listed at `events` attribute must come from
- [`known_events/*.yml`](#known-events-are-added-automatically-in-usage-data-payload) files.
-1. All events listed at `events` attribute must have the same `redis_slot` attribute.
-1. All events listed at `events` attribute must have the same `aggregation` attribute.
-
-### Database sourced aggregated metrics
-
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784) in GitLab 13.9.
-> - It's [deployed behind a feature flag](../user/feature_flags.md), disabled by default.
-> - It's enabled on GitLab.com.
-
-To declare an aggregate of metrics based on events collected from database, follow
-these steps:
-
-1. [Persist the metrics for aggregation](#persist-metrics-for-aggregation).
-1. [Add new aggregated metric definition](#add-new-aggregated-metric-definition).
-
-#### Persist metrics for aggregation
-
-Only metrics calculated with [Estimated Batch Counters](#estimated-batch-counters)
-can be persisted for database sourced aggregated metrics. To persist a metric,
-inject a Ruby block into the
-[estimate_batch_distinct_count](#estimate_batch_distinct_count-method) method.
-This block should invoke the
-`Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll.save_aggregated_metrics`
-[method](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb#L21),
-which stores `estimate_batch_distinct_count` results for future use in aggregated metrics.
-
-The `Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll.save_aggregated_metrics`
-method accepts the following arguments:
-
-- `metric_name`: The name of metric to use for aggregations. Should be the same
- as the key under which the metric is added into Usage Ping.
-- `recorded_at_timestamp`: The timestamp representing the moment when a given
- Usage Ping payload was collected. You should use the convenience method `recorded_at`
- to fill `recorded_at_timestamp` argument, like this: `recorded_at_timestamp: recorded_at`
-- `time_period`: The time period used to build the `relation` argument passed into
- `estimate_batch_distinct_count`. To collect the metric with all available historical
- data, set a `nil` value as time period: `time_period: nil`.
-- `data`: HyperLogLog buckets structure representing unique entries in `relation`.
- The `estimate_batch_distinct_count` method always passes the correct argument
- into the block, so `data` argument must always have a value equal to block argument,
- like this: `data: result`
-
-Example metrics persistence:
-
-```ruby
-class UsageData
- def count_secure_pipelines(time_period)
- ...
- relation = ::Security::Scan.latest_successful_by_build.by_scan_types(scan_type).where(security_scans: time_period)
-
- pipelines_with_secure_jobs['dependency_scanning_pipeline'] = estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
- ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
- .save_aggregated_metrics(metric_name: 'dependency_scanning_pipeline', recorded_at_timestamp: recorded_at, time_period: time_period, data: result)
- end
- end
-end
-```
-
-#### Add new aggregated metric definition
-
-After all metrics are persisted, you can add an aggregated metric definition at
-[`aggregated_metrics/`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/aggregated_metrics/). When adding definitions for metrics names listed in the
-`events:` attribute, use the same names you passed in the `metric_name` argument
-while persisting metrics in previous step.
-
-Example definition:
-
-```yaml
-- name: product_analytics_test_metrics_intersection_database_sourced
- operator: AND
- source: database
- events: ['dependency_scanning_pipeline', 'container_scanning_pipeline']
-```
-
-## Example Usage Ping payload
-
-The following is example content of the Usage Ping payload.
-
-```json
-{
- "uuid": "0000000-0000-0000-0000-000000000000",
- "hostname": "example.com",
- "version": "12.10.0-pre",
- "installation_type": "omnibus-gitlab",
- "active_user_count": 999,
- "recorded_at": "2020-04-17T07:43:54.162+00:00",
- "edition": "EEU",
- "license_md5": "00000000000000000000000000000000",
- "license_id": null,
- "historical_max_users": 999,
- "licensee": {
- "Name": "ABC, Inc.",
- "Email": "email@example.com",
- "Company": "ABC, Inc."
- },
- "license_user_count": 999,
- "license_starts_at": "2020-01-01",
- "license_expires_at": "2021-01-01",
- "license_plan": "ultimate",
- "license_add_ons": {
- },
- "license_trial": false,
- "counts": {
- "assignee_lists": 999,
- "boards": 999,
- "ci_builds": 999,
- ...
- },
- "container_registry_enabled": true,
- "dependency_proxy_enabled": false,
- "gitlab_shared_runners_enabled": true,
- "gravatar_enabled": true,
- "influxdb_metrics_enabled": true,
- "ldap_enabled": false,
- "mattermost_enabled": false,
- "omniauth_enabled": true,
- "prometheus_enabled": false,
- "prometheus_metrics_enabled": false,
- "reply_by_email_enabled": "incoming+%{key}@incoming.gitlab.com",
- "signup_enabled": true,
- "web_ide_clientside_preview_enabled": true,
- "ingress_modsecurity_enabled": true,
- "projects_with_expiration_policy_disabled": 999,
- "projects_with_expiration_policy_enabled": 999,
- ...
- "elasticsearch_enabled": true,
- "license_trial_ends_on": null,
- "geo_enabled": false,
- "git": {
- "version": {
- "major": 2,
- "minor": 26,
- "patch": 1
- }
- },
- "gitaly": {
- "version": "12.10.0-rc1-93-g40980d40",
- "servers": 56,
- "clusters": 14,
- "filesystems": [
- "EXT_2_3_4"
- ]
- },
- "gitlab_pages": {
- "enabled": true,
- "version": "1.17.0"
- },
- "container_registry_server": {
- "vendor": "gitlab",
- "version": "2.9.1-gitlab"
- },
- "database": {
- "adapter": "postgresql",
- "version": "9.6.15",
- "pg_system_id": 6842684531675334351
- },
- "analytics_unique_visits": {
- "g_analytics_contribution": 999,
- ...
- },
- "usage_activity_by_stage": {
- "configure": {
- "project_clusters_enabled": 999,
- ...
- },
- "create": {
- "merge_requests": 999,
- ...
- },
- "manage": {
- "events": 999,
- ...
- },
- "monitor": {
- "clusters": 999,
- ...
- },
- "package": {
- "projects_with_packages": 999
- },
- "plan": {
- "issues": 999,
- ...
- },
- "release": {
- "deployments": 999,
- ...
- },
- "secure": {
- "user_container_scanning_jobs": 999,
- ...
- },
- "verify": {
- "ci_builds": 999,
- ...
- }
- },
- "usage_activity_by_stage_monthly": {
- "configure": {
- "project_clusters_enabled": 999,
- ...
- },
- "create": {
- "merge_requests": 999,
- ...
- },
- "manage": {
- "events": 999,
- ...
- },
- "monitor": {
- "clusters": 999,
- ...
- },
- "package": {
- "projects_with_packages": 999
- },
- "plan": {
- "issues": 999,
- ...
- },
- "release": {
- "deployments": 999,
- ...
- },
- "secure": {
- "user_container_scanning_jobs": 999,
- ...
- },
- "verify": {
- "ci_builds": 999,
- ...
- }
- },
- "topology": {
- "duration_s": 0.013836685999194742,
- "application_requests_per_hour": 4224,
- "query_apdex_weekly_average": 0.996,
- "failures": [],
- "nodes": [
- {
- "node_memory_total_bytes": 33269903360,
- "node_memory_utilization": 0.35,
- "node_cpus": 16,
- "node_cpu_utilization": 0.2,
- "node_uname_info": {
- "machine": "x86_64",
- "sysname": "Linux",
- "release": "4.19.76-linuxkit"
- },
- "node_services": [
- {
- "name": "web",
- "process_count": 16,
- "process_memory_pss": 233349888,
- "process_memory_rss": 788220927,
- "process_memory_uss": 195295487,
- "server": "puma"
- },
- {
- "name": "sidekiq",
- "process_count": 1,
- "process_memory_pss": 734080000,
- "process_memory_rss": 750051328,
- "process_memory_uss": 731533312
- },
- ...
- ],
- ...
- },
- ...
- ]
- }
-}
-```
-
-## Notable changes
-
-In GitLab 13.5, `pg_system_id` was added to send the [PostgreSQL system identifier](https://www.2ndquadrant.com/en/blog/support-for-postgresqls-system-identifier-in-barman/).
-
-## Exporting Usage Ping SQL queries and definitions
-
-Two Rake tasks exist to export Usage Ping definitions.
-
-- The Rake tasks export the raw SQL queries for `count`, `distinct_count`, `sum`.
-- The Rake tasks export the Redis counter class or the line of the Redis block for `redis_usage_data`.
-- The Rake tasks calculate the `alt_usage_data` metrics.
-
-In the home directory of your local GitLab installation run the following Rake tasks for the YAML and JSON versions respectively:
-
-```shell
-# for YAML export
-bin/rake gitlab:usage_data:dump_sql_in_yaml
-
-# for JSON export
-bin/rake gitlab:usage_data:dump_sql_in_json
-
-# You may pipe the output into a file
-bin/rake gitlab:usage_data:dump_sql_in_yaml > ~/Desktop/usage-metrics-2020-09-02.yaml
-```
-
-## Generating and troubleshooting usage ping
-
-To get a usage ping, or to troubleshoot caching issues on your GitLab instance, please follow [instructions to generate usage ping](../administration/troubleshooting/gitlab_rails_cheat_sheet.md#generate-usage-ping).
+
+
diff --git a/doc/development/usage_ping/index.md b/doc/development/usage_ping/index.md
new file mode 100644
index 00000000000..6d532be721e
--- /dev/null
+++ b/doc/development/usage_ping/index.md
@@ -0,0 +1,1296 @@
+---
+stage: Growth
+group: Product Intelligence
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
+
+# Usage Ping Guide
+
+> Introduced in GitLab Ultimate 11.2, more statistics.
+
+This guide describes Usage Ping's purpose and how it's implemented.
+
+For more information about Product Intelligence, see:
+
+- [Product Intelligence Guide](https://about.gitlab.com/handbook/product/product-intelligence-guide/)
+- [Snowplow Guide](../snowplow.md)
+
+More links:
+
+- [Product Intelligence Direction](https://about.gitlab.com/direction/product-intelligence/)
+- [Data Analysis Process](https://about.gitlab.com/handbook/business-ops/data-team/#data-analysis-process/)
+- [Data for Product Managers](https://about.gitlab.com/handbook/business-ops/data-team/programs/data-for-product-managers/)
+- [Data Infrastructure](https://about.gitlab.com/handbook/business-ops/data-team/platform/infrastructure/)
+
+## What is Usage Ping?
+
+- GitLab sends a weekly payload containing usage data to GitLab Inc. Usage Ping provides high-level data to help our product, support, and sales teams. It does not send any project names, usernames, or any other specific data. The information from the usage ping is not anonymous, it is linked to the hostname of the instance. Sending usage ping is optional, and any instance can disable analytics.
+- The usage data is primarily composed of row counts for different tables in the instance’s database. By comparing these counts month over month (or week over week), we can get a rough sense for how an instance is using the different features in the product. In addition to counts, other facts
+ that help us classify and understand GitLab installations are collected.
+- Usage ping is important to GitLab as we use it to calculate our Stage Monthly Active Users (SMAU) which helps us measure the success of our stages and features.
+- While usage ping is enabled, GitLab gathers data from the other instances and can show usage statistics of your instance to your users.
+
+### Why should we enable Usage Ping?
+
+- The main purpose of Usage Ping is to build a better GitLab. Data about how GitLab is used is collected to better understand feature/stage adoption and usage, which helps us understand how GitLab is adding value and helps our team better understand the reasons why people use GitLab and with this knowledge we're able to make better product decisions.
+- As a benefit of having the usage ping active, GitLab lets you analyze the users’ activities over time of your GitLab installation.
+- As a benefit of having the usage ping active, GitLab provides you with The DevOps Report,which gives you an overview of your entire instance’s adoption of Concurrent DevOps from planning to monitoring.
+- You get better, more proactive support. (assuming that our TAMs and support organization used the data to deliver more value)
+- You get insight and advice into how to get the most value out of your investment in GitLab. Wouldn't you want to know that a number of features or values are not being adopted in your organization?
+- You get a report that illustrates how you compare against other similar organizations (anonymized), with specific advice and recommendations on how to improve your DevOps processes.
+- Usage Ping is enabled by default. To disable it, see [Disable Usage Ping](#disable-usage-ping).
+
+### Limitations
+
+- Usage Ping does not track frontend events things like page views, link clicks, or user sessions, and only focuses on aggregated backend events.
+- Because of these limitations we recommend instrumenting your products with Snowplow for more detailed analytics on GitLab.com and use Usage Ping to track aggregated backend events on self-managed.
+
+## Usage Ping payload
+
+You can view the exact JSON payload sent to GitLab Inc. in the administration panel. To view the payload:
+
+1. Navigate to **Admin Area > Settings > Metrics and profiling**.
+1. Expand the **Usage statistics** section.
+1. Click the **Preview payload** button.
+
+For an example payload, see [Example Usage Ping payload](#example-usage-ping-payload).
+
+## Disable Usage Ping
+
+To disable Usage Ping in the GitLab UI, go to the **Settings** page of your administration panel and uncheck the **Usage Ping** checkbox.
+
+To disable Usage Ping and prevent it from being configured in the future through the administration panel, Omnibus installs can set the following in [`gitlab.rb`](https://docs.gitlab.com/omnibus/settings/configuration.html#configuration-options):
+
+```ruby
+gitlab_rails['usage_ping_enabled'] = false
+```
+
+Source installations can set the following in `gitlab.yml`:
+
+```yaml
+production: &base
+ # ...
+ gitlab:
+ # ...
+ usage_ping_enabled: false
+```
+
+## Usage Ping request flow
+
+The following example shows a basic request/response flow between a GitLab instance, the Versions Application, the License Application, Salesforce, the GitLab S3 Bucket, the GitLab Snowflake Data Warehouse, and Sisense:
+
+```mermaid
+sequenceDiagram
+ participant GitLab Instance
+ participant Versions Application
+ participant Licenses Application
+ participant Salesforce
+ participant S3 Bucket
+ participant Snowflake DW
+ participant Sisense Dashboards
+ GitLab Instance->>Versions Application: Send Usage Ping
+ loop Process usage data
+ Versions Application->>Versions Application: Parse usage data
+ Versions Application->>Versions Application: Write to database
+ Versions Application->>Versions Application: Update license ping time
+ end
+ loop Process data for Salesforce
+ Versions Application-xLicenses Application: Request Zuora subscription id
+ Licenses Application-xVersions Application: Zuora subscription id
+ Versions Application-xSalesforce: Request Zuora account id by Zuora subscription id
+ Salesforce-xVersions Application: Zuora account id
+ Versions Application-xSalesforce: Usage data for the Zuora account
+ end
+ Versions Application->>S3 Bucket: Export Versions database
+ S3 Bucket->>Snowflake DW: Import data
+ Snowflake DW->>Snowflake DW: Transform data using dbt
+ Snowflake DW->>Sisense Dashboards: Data available for querying
+ Versions Application->>GitLab Instance: DevOps Report (Conversational Development Index)
+```
+
+## How Usage Ping works
+
+1. The Usage Ping [cron job](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/gitlab_usage_ping_worker.rb#L30) is set in Sidekiq to run weekly.
+1. When the cron job runs, it calls [`Gitlab::UsageData.to_json`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/submit_usage_ping_service.rb#L22).
+1. `Gitlab::UsageData.to_json` [cascades down](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data.rb#L22) to ~400+ other counter method calls.
+1. The response of all methods calls are [merged together](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data.rb#L14) into a single JSON payload in `Gitlab::UsageData.to_json`.
+1. The JSON payload is then [posted to the Versions application]( https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/services/submit_usage_ping_service.rb#L20)
+ If a firewall exception is needed, the required URL depends on several things. If
+ the hostname is `version.gitlab.com`, the protocol is `TCP`, and the port number is `443`,
+ the required URL is .
+
+## Usage Ping Metric Life cycle
+
+### 1. New metrics addition
+
+Please follow the [Implementing Usage Ping](#implementing-usage-ping) guide.
+
+### 2. Existing metric change
+
+Because we do not control when customers update their self-managed instances of GitLab,
+we **STRONGLY DISCOURAGE** changes to the logic used to calculate any metric.
+Any such changes lead to inconsistent reports from multiple GitLab instances.
+If there is a problem with an existing metric, it's best to deprecate the existing metric,
+and use it, side by side, with the desired new metric.
+
+Example:
+Consider following change. Before GitLab 12.6, the `example_metric` was implemented as:
+
+```ruby
+{
+ ...
+ example_metric: distinct_count(Project, :creator_id)
+}
+```
+
+For GitLab 12.6, the metric was changed to filter out archived projects:
+
+```ruby
+{
+ ...
+ example_metric: distinct_count(Project.non_archived, :creator_id)
+}
+```
+
+In this scenario all instances running up to GitLab 12.5 continue to report `example_metric`,
+including all archived projects, while all instances running GitLab 12.6 and higher filters
+out such projects. As Usage Ping data is collected from all reporting instances, the
+resulting dataset includes mixed data, which distorts any following business analysis.
+
+The correct approach is to add a new metric for GitLab 12.6 release with updated logic:
+
+```ruby
+{
+ ...
+ example_metric_without_archived: distinct_count(Project.non_archived, :creator_id)
+}
+```
+
+and update existing business analysis artefacts to use `example_metric_without_archived` instead of `example_metric`
+
+### 3. Metrics deprecation and removal
+
+The process for deprecating and removing metrics is under development. For
+more information, see the following [issue](https://gitlab.com/gitlab-org/gitlab/-/issues/284637).
+
+## Implementing Usage Ping
+
+Usage Ping consists of two kinds of data, counters and observations. Counters track how often a certain event
+happened over time, such as how many CI pipelines have run. They are monotonic and always trend up.
+Observations are facts collected from one or more GitLab instances and can carry arbitrary data. There are no
+general guidelines around how to collect those, due to the individual nature of that data.
+
+There are several types of counters which are all found in `usage_data.rb`:
+
+- **Ordinary Batch Counters:** Simple count of a given ActiveRecord_Relation
+- **Distinct Batch Counters:** Distinct count of a given ActiveRecord_Relation in a given column
+- **Sum Batch Counters:** Sum the values of a given ActiveRecord_Relation in a given column
+- **Alternative Counters:** Used for settings and configurations
+- **Redis Counters:** Used for in-memory counts.
+
+NOTE:
+Only use the provided counter methods. Each counter method contains a built in fail safe to isolate each counter to avoid breaking the entire Usage Ping.
+
+### Why batch counting
+
+For large tables, PostgreSQL can take a long time to count rows due to MVCC [(Multi-version Concurrency Control)](https://en.wikipedia.org/wiki/Multiversion_concurrency_control). Batch counting is a counting method where a single large query is broken into multiple smaller queries. For example, instead of a single query querying 1,000,000 records, with batch counting, you can execute 100 queries of 10,000 records each. Batch counting is useful for avoiding database timeouts as each batch query is significantly shorter than one single long running query.
+
+For GitLab.com, there are extremely large tables with 15 second query timeouts, so we use batch counting to avoid encountering timeouts. Here are the sizes of some GitLab.com tables:
+
+| Table | Row counts in millions |
+|------------------------------|------------------------|
+| `merge_request_diff_commits` | 2280 |
+| `ci_build_trace_sections` | 1764 |
+| `merge_request_diff_files` | 1082 |
+| `events` | 514 |
+
+We have several batch counting methods available:
+
+- [Ordinary Batch Counters](#ordinary-batch-counters)
+- [Distinct Batch Counters](#distinct-batch-counters)
+- [Sum Batch Counters](#sum-batch-counters)
+- [Estimated Batch Counters](#estimated-batch-counters)
+
+Batch counting requires indexes on columns to calculate max, min, and range queries. In some cases,
+you may need to add a specialized index on the columns involved in a counter.
+
+### Ordinary Batch Counters
+
+Handles `ActiveRecord::StatementInvalid` error
+
+Simple count of a given `ActiveRecord_Relation`, does a non-distinct batch count, smartly reduces `batch_size`, and handles errors.
+
+Method: `count(relation, column = nil, batch: true, start: nil, finish: nil)`
+
+Arguments:
+
+- `relation` the ActiveRecord_Relation to perform the count
+- `column` the column to perform the count on, by default is the primary key
+- `batch`: default `true` to use batch counting
+- `start`: custom start of the batch counting to avoid complex min calculations
+- `end`: custom end of the batch counting to avoid complex min calculations
+
+Examples:
+
+```ruby
+count(User.active)
+count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+count(::Clusters::Cluster.aws_installed.enabled, :cluster_id, start: ::Clusters::Cluster.minimum(:id), finish: ::Clusters::Cluster.maximum(:id))
+```
+
+### Distinct Batch Counters
+
+Handles `ActiveRecord::StatementInvalid` error
+
+Distinct count of a given `ActiveRecord_Relation` on given column, a distinct batch count, smartly reduces `batch_size`, and handles errors.
+
+Method: `distinct_count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)`
+
+Arguments:
+
+- `relation` the ActiveRecord_Relation to perform the count
+- `column` the column to perform the distinct count, by default is the primary key
+- `batch`: default `true` to use batch counting
+- `batch_size`: if none set it uses default value 10000 from `Gitlab::Database::BatchCounter`
+- `start`: custom start of the batch counting to avoid complex min calculations
+- `end`: custom end of the batch counting to avoid complex min calculations
+
+WARNING:
+Counting over non-unique columns can lead to performance issues. Take a look at the [iterating tables in batches](../iterating_tables_in_batches.md) guide for more details.
+
+Examples:
+
+```ruby
+distinct_count(::Project, :creator_id)
+distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::User.minimum(:id), finish: ::User.maximum(:id))
+distinct_count(::Clusters::Applications::CertManager.where(time_period).available.joins(:cluster), 'clusters.user_id')
+```
+
+### Sum Batch Counters
+
+Handles `ActiveRecord::StatementInvalid` error
+
+Sum the values of a given ActiveRecord_Relation on given column and handles errors.
+
+Method: `sum(relation, column, batch_size: nil, start: nil, finish: nil)`
+
+Arguments:
+
+- `relation` the ActiveRecord_Relation to perform the operation
+- `column` the column to sum on
+- `batch_size`: if none set it uses default value 1000 from `Gitlab::Database::BatchCounter`
+- `start`: custom start of the batch counting to avoid complex min calculations
+- `end`: custom end of the batch counting to avoid complex min calculations
+
+Examples:
+
+```ruby
+sum(JiraImportState.finished, :imported_issues_count)
+```
+
+### Grouping & Batch Operations
+
+The `count`, `distinct_count`, and `sum` batch counters can accept an `ActiveRecord::Relation`
+object, which groups by a specified column. With a grouped relation, the methods do batch counting,
+handle errors, and returns a hash table of key-value pairs.
+
+Examples:
+
+```ruby
+count(Namespace.group(:type))
+# returns => {nil=>179, "Group"=>54}
+
+distinct_count(Project.group(:visibility_level), :creator_id)
+# returns => {0=>1, 10=>1, 20=>11}
+
+sum(Issue.group(:state_id), :weight))
+# returns => {1=>3542, 2=>6820}
+```
+
+### Estimated Batch Counters
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/48233) in GitLab 13.7.
+
+Estimated batch counter functionality handles `ActiveRecord::StatementInvalid` errors
+when used through the provided `estimate_batch_distinct_count` method.
+Errors return a value of `-1`.
+
+WARNING:
+This functionality estimates a distinct count of a specific ActiveRecord_Relation in a given column,
+which uses the [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) algorithm.
+As the HyperLogLog algorithm is probabilistic, the **results always include error**.
+The highest encountered error rate is 4.9%.
+
+When correctly used, the `estimate_batch_distinct_count` method enables efficient counting over
+columns that contain non-unique values, which can not be assured by other counters.
+
+#### estimate_batch_distinct_count method
+
+Method: `estimate_batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)`
+
+The [method](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/utils/usage_data.rb#L63)
+includes the following arguments:
+
+- `relation`: The ActiveRecord_Relation to perform the count.
+- `column`: The column to perform the distinct count. The default is the primary key.
+- `batch_size`: From `Gitlab::Database::PostgresHll::BatchDistinctCounter::DEFAULT_BATCH_SIZE`. Default value: 10,000.
+- `start`: The custom start of the batch count, to avoid complex minimum calculations.
+- `finish`: The custom end of the batch count to avoid complex maximum calculations.
+
+The method includes the following prerequisites:
+
+1. The supplied `relation` must include the primary key defined as the numeric column.
+ For example: `id bigint NOT NULL`.
+1. The `estimate_batch_distinct_count` can handle a joined relation. To use its ability to
+ count non-unique columns, the joined relation **must NOT** have a one-to-many relationship,
+ such as `has_many :boards`.
+1. Both `start` and `finish` arguments should always represent primary key relationship values,
+ even if the estimated count refers to another column, for example:
+
+ ```ruby
+ estimate_batch_distinct_count(::Note, :author_id, start: ::Note.minimum(:id), finish: ::Note.maximum(:id))
+ ```
+
+Examples:
+
+1. Simple execution of estimated batch counter, with only relation provided,
+ returned value represents estimated number of unique values in `id` column
+ (which is the primary key) of `Project` relation:
+
+ ```ruby
+ estimate_batch_distinct_count(::Project)
+ ```
+
+1. Execution of estimated batch counter, where provided relation has applied
+ additional filter (`.where(time_period)`), number of unique values estimated
+ in custom column (`:author_id`), and parameters: `start` and `finish` together
+ apply boundaries that defines range of provided relation to analyze:
+
+ ```ruby
+ estimate_batch_distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::Note.minimum(:id), finish: ::Note.maximum(:id))
+ ```
+
+1. Execution of estimated batch counter with joined relation (`joins(:cluster)`),
+ for a custom column (`'clusters.user_id'`):
+
+ ```ruby
+ estimate_batch_distinct_count(::Clusters::Applications::CertManager.where(time_period).available.joins(:cluster), 'clusters.user_id')
+ ```
+
+When instrumenting metric with usage of estimated batch counter please add
+`_estimated` suffix to its name, for example:
+
+```ruby
+ "counts": {
+ "ci_builds_estimated": estimate_batch_distinct_count(Ci::Build),
+ ...
+```
+
+### Redis Counters
+
+Handles `::Redis::CommandError` and `Gitlab::UsageDataCounters::BaseCounter::UnknownEvent`
+returns -1 when a block is sent or hash with all values -1 when a `counter(Gitlab::UsageDataCounters)` is sent
+different behavior due to 2 different implementations of Redis counter
+
+Method: `redis_usage_data(counter, &block)`
+
+Arguments:
+
+- `counter`: a counter from `Gitlab::UsageDataCounters`, that has `fallback_totals` method implemented
+- or a `block`: which is evaluated
+
+#### Ordinary Redis Counters
+
+Examples of implementation:
+
+- Using Redis methods [`INCR`](https://redis.io/commands/incr), [`GET`](https://redis.io/commands/get), and [`Gitlab::UsageDataCounters::WikiPageCounter`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/wiki_page_counter.rb)
+- Using Redis methods [`HINCRBY`](https://redis.io/commands/hincrby), [`HGETALL`](https://redis.io/commands/hgetall), and [`Gitlab::UsageCounters::PodLogs`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_counters/pod_logs.rb)
+
+##### UsageData API Tracking
+
+
+
+1. Track event using `UsageData` API
+
+ Increment event count using ordinary Redis counter, for given event name.
+
+ Tracking events using the `UsageData` API requires the `usage_data_api` feature flag to be enabled, which is enabled by default.
+
+ API requests are protected by checking for a valid CSRF token.
+
+ To be able to increment the values, the related feature `usage_data_` should be enabled.
+
+ ```plaintext
+ POST /usage_data/increment_counter
+ ```
+
+ | Attribute | Type | Required | Description |
+ | :-------- | :--- | :------- | :---------- |
+ | `event` | string | yes | The event name it should be tracked |
+
+ Response
+
+ - `200` if event was tracked
+ - `400 Bad request` if event parameter is missing
+ - `401 Unauthorized` if user is not authenticated
+ - `403 Forbidden` for invalid CSRF token provided
+
+1. Track events using JavaScript/Vue API helper which calls the API above
+
+ Note that `usage_data_api` and `usage_data_#{event_name}` should be enabled to be able to track events
+
+ ```javascript
+ import api from '~/api';
+
+ api.trackRedisCounterEvent('my_already_defined_event_name'),
+ ```
+
+#### Redis HLL Counters
+
+WARNING:
+HyperLogLog (HLL) is a probabilistic algorithm and its **results always includes some small error**. According to [Redis documentation](https://redis.io/commands/pfcount), data from
+used HLL implementation is "approximated with a standard error of 0.81%".
+
+With `Gitlab::UsageDataCounters::HLLRedisCounter` we have available data structures used to count unique values.
+
+Implemented using Redis methods [PFADD](https://redis.io/commands/pfadd) and [PFCOUNT](https://redis.io/commands/pfcount).
+
+##### Adding new events
+
+1. Define events in [`known_events`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/).
+
+ Example event:
+
+ ```yaml
+ - name: i_compliance_credential_inventory
+ category: compliance
+ redis_slot: compliance
+ expiry: 42 # 6 weeks
+ aggregation: weekly
+ ```
+
+ Keys:
+
+ - `name`: unique event name.
+
+ Name format `__name`.
+
+ Use one of the following prefixes for the event's name:
+
+ - `g_` for group, as an event which is tracked for group.
+ - `p_` for project, as an event which is tracked for project.
+ - `i_` for instance, as an event which is tracked for instance.
+ - `a_` for events encompassing all `g_`, `p_`, `i_`.
+ - `o_` for other.
+
+ Consider including in the event's name the Redis slot to be able to count totals for a specific category.
+
+ Example names: `i_compliance_credential_inventory`, `g_analytics_contribution`.
+
+ - `category`: event category. Used for getting total counts for events in a category, for easier
+ access to a group of events.
+ - `redis_slot`: optional Redis slot; default value: event name. Used if needed to calculate totals
+ for a group of metrics. Ensure keys are in the same slot. For example:
+ `i_compliance_credential_inventory` with `redis_slot: 'compliance'` builds Redis key
+ `i_{compliance}_credential_inventory-2020-34`. If `redis_slot` is not defined the Redis key will
+ be `{i_compliance_credential_inventory}-2020-34`.
+ - `expiry`: expiry time in days. Default: 29 days for daily aggregation and 6 weeks for weekly
+ aggregation.
+ - `aggregation`: may be set to a `:daily` or `:weekly` key. Defines how counting data is stored in Redis.
+ Aggregation on a `daily` basis does not pull more fine grained data.
+ - `feature_flag`: optional `default_enabled: :yaml`. If no feature flag is set then the tracking is enabled. For details, see our [GitLab internal Feature flags](../feature_flags/index.md) documentation. The feature flags are owned by the group adding the event tracking.
+
+Use one of the following methods to track events:
+
+1. Track event in controller using `RedisTracking` module with `track_redis_hll_event(*controller_actions, name:, if: nil)`.
+
+ Arguments:
+
+ - `controller_actions`: controller actions we want to track.
+ - `name`: event name.
+ - `if`: optional custom conditions, using the same format as with Rails callbacks.
+
+ Example usage:
+
+ ```ruby
+ # controller
+ class ProjectsController < Projects::ApplicationController
+ include RedisTracking
+
+ skip_before_action :authenticate_user!, only: :show
+ track_redis_hll_event :index, :show, name: 'g_compliance_example_feature_visitors'
+
+ def index
+ render html: 'index'
+ end
+
+ def new
+ render html: 'new'
+ end
+
+ def show
+ render html: 'show'
+ end
+ end
+ ```
+
+1. Track event in API using `increment_unique_values(event_name, values)` helper method.
+
+ To be able to track the event, Usage Ping must be enabled and the event feature `usage_data_` must be enabled.
+
+ Arguments:
+
+ - `event_name`: event name.
+ - `values`: values counted, one value or array of values.
+
+ Example usage:
+
+ ```ruby
+ get ':id/registry/repositories' do
+ repositories = ContainerRepositoriesFinder.new(
+ user: current_user, subject: user_group
+ ).execute
+
+ increment_unique_values('i_list_repositories', current_user.id)
+
+ present paginate(repositories), with: Entities::ContainerRegistry::Repository, tags: params[:tags], tags_count: params[:tags_count]
+ end
+ ```
+
+1. Track event using `track_usage_event(event_name, values)` in services and GraphQL
+
+ Increment unique values count using Redis HLL, for given event name.
+
+ Example:
+
+ [Track usage event for incident created in service](https://gitlab.com/gitlab-org/gitlab/-/blob/v13.8.3-ee/app/services/issues/update_service.rb#L66)
+
+ [Track usage event for incident created in GraphQL](https://gitlab.com/gitlab-org/gitlab/-/blob/v13.8.3-ee/app/graphql/mutations/alert_management/update_alert_status.rb#L16)
+
+ ```ruby
+ track_usage_event(:incident_management_incident_created, current_user.id)
+ ```
+
+
+
+1. Track event using `UsageData` API
+
+ Increment unique users count using Redis HLL, for given event name.
+
+ Tracking events using the `UsageData` API requires the `usage_data_api` feature flag to be enabled, which is enabled by default.
+
+ API requests are protected by checking for a valid CSRF token.
+
+ To increment the values, the related feature `usage_data_` should be
+ set to `default_enabled: true`. For more information, see
+ [Feature flags in development of GitLab](../feature_flags/index.md).
+
+ ```plaintext
+ POST /usage_data/increment_unique_users
+ ```
+
+ | Attribute | Type | Required | Description |
+ | :-------- | :--- | :------- | :---------- |
+ | `event` | string | yes | The event name it should be tracked |
+
+ Response
+
+ Return 200 if tracking failed for any reason.
+
+ - `200` if event was tracked or any errors
+ - `400 Bad request` if event parameter is missing
+ - `401 Unauthorized` if user is not authenticated
+ - `403 Forbidden` for invalid CSRF token provided
+
+1. Track events using JavaScript/Vue API helper which calls the API above
+
+ Example usage for an existing event already defined in [known events](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/):
+
+ Usage Data API is behind `usage_data_api` feature flag which, as of GitLab 13.7, is
+ now set to `default_enabled: true`.
+
+ Each event tracked using Usage Data API is behind a feature flag `usage_data_#{event_name}` which should be `default_enabled: true`
+
+ ```javascript
+ import api from '~/api';
+
+ api.trackRedisHllUserEvent('my_already_defined_event_name'),
+ ```
+
+1. Get event data using `Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names:, start_date:, end_date:, context: '')`.
+
+ Arguments:
+
+ - `event_names`: the list of event names.
+ - `start_date`: start date of the period for which we want to get event data.
+ - `end_date`: end date of the period for which we want to get event data.
+ - `context`: context of the event. Allowed values are `default`, `free`, `bronze`, `silver`, `gold`, `starter`, `premium`, `ultimate`.
+
+1. Testing tracking and getting unique events
+
+Trigger events in rails console by using `track_event` method
+
+ ```ruby
+ Gitlab::UsageDataCounters::HLLRedisCounter.track_event('g_compliance_audit_events', values: 1)
+ Gitlab::UsageDataCounters::HLLRedisCounter.track_event('g_compliance_audit_events', values: [2, 3])
+ ```
+
+Next, get the unique events for the current week.
+
+ ```ruby
+ # Get unique events for metric for current_week
+ Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_audit_events',
+ start_date: Date.current.beginning_of_week, end_date: Date.current.next_week)
+ ```
+
+##### Recommendations
+
+We have the following recommendations for [Adding new events](#adding-new-events):
+
+- Event aggregation: weekly.
+- Key expiry time:
+ - Daily: 29 days.
+ - Weekly: 42 days.
+- When adding new metrics, use a [feature flag](../../operations/feature_flags.md) to control the impact.
+- For feature flags triggered by another service, set `default_enabled: false`,
+ - Events can be triggered using the `UsageData` API, which helps when there are > 10 events per change
+
+##### Enable/Disable Redis HLL tracking
+
+Events are tracked behind [feature flags](../feature_flags/index.md) due to concerns for Redis performance and scalability.
+
+For a full list of events and corresponding feature flags see, [known_events](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/) files.
+
+To enable or disable tracking for specific event in or , run commands such as the following to
+[enable or disable the corresponding feature](../feature_flags/index.md).
+
+```shell
+/chatops run feature set true
+/chatops run feature set false
+```
+
+##### Known events are added automatically in usage data payload
+
+All events added in [`known_events/common.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/common.yml) are automatically added to usage data generation under the `redis_hll_counters` key. This column is stored in [version-app as a JSON](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/db/schema.rb#L209).
+For each event we add metrics for the weekly and monthly time frames, and totals for each where applicable:
+
+- `#{event_name}_weekly`: Data for 7 days for daily [aggregation](#adding-new-events) events and data for the last complete week for weekly [aggregation](#adding-new-events) events.
+- `#{event_name}_monthly`: Data for 28 days for daily [aggregation](#adding-new-events) events and data for the last 4 complete weeks for weekly [aggregation](#adding-new-events) events.
+
+Redis HLL implementation calculates automatic total metrics, if there are more than one metric for the same category, aggregation, and Redis slot.
+
+- `#{category}_total_unique_counts_weekly`: Total unique counts for events in the same category for the last 7 days or the last complete week, if events are in the same Redis slot and we have more than one metric.
+- `#{category}_total_unique_counts_monthly`: Total unique counts for events in same category for the last 28 days or the last 4 complete weeks, if events are in the same Redis slot and we have more than one metric.
+
+Example of `redis_hll_counters` data:
+
+```ruby
+{:redis_hll_counters=>
+ {"compliance"=>
+ {"g_compliance_dashboard_weekly"=>0,
+ "g_compliance_dashboard_monthly"=>0,
+ "g_compliance_audit_events_weekly"=>0,
+ "g_compliance_audit_events_monthly"=>0,
+ "compliance_total_unique_counts_weekly"=>0,
+ "compliance_total_unique_counts_monthly"=>0},
+ "analytics"=>
+ {"g_analytics_contribution_weekly"=>0,
+ "g_analytics_contribution_monthly"=>0,
+ "g_analytics_insights_weekly"=>0,
+ "g_analytics_insights_monthly"=>0,
+ "analytics_total_unique_counts_weekly"=>0,
+ "analytics_total_unique_counts_monthly"=>0},
+ "ide_edit"=>
+ {"g_edit_by_web_ide_weekly"=>0,
+ "g_edit_by_web_ide_monthly"=>0,
+ "g_edit_by_sfe_weekly"=>0,
+ "g_edit_by_sfe_monthly"=>0,
+ "ide_edit_total_unique_counts_weekly"=>0,
+ "ide_edit_total_unique_counts_monthly"=>0},
+ "search"=>
+ {"i_search_total_weekly"=>0, "i_search_total_monthly"=>0, "i_search_advanced_weekly"=>0, "i_search_advanced_monthly"=>0, "i_search_paid_weekly"=>0, "i_search_paid_monthly"=>0, "search_total_unique_counts_weekly"=>0, "search_total_unique_counts_monthly"=>0},
+ "source_code"=>{"wiki_action_weekly"=>0, "wiki_action_monthly"=>0}
+ }
+```
+
+Example usage:
+
+```ruby
+# Redis Counters
+redis_usage_data(Gitlab::UsageDataCounters::WikiPageCounter)
+redis_usage_data { ::Gitlab::UsageCounters::PodLogs.usage_totals[:total] }
+
+# Define events in common.yml https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/known_events/common.yml
+
+# Tracking events
+Gitlab::UsageDataCounters::HLLRedisCounter.track_event('expand_vulnerabilities', values: visitor_id)
+
+# Get unique events for metric
+redis_usage_data { Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'expand_vulnerabilities', start_date: 28.days.ago, end_date: Date.current) }
+```
+
+### Alternative Counters
+
+Handles `StandardError` and fallbacks into -1 this way not all measures fail if we encounter one exception.
+Mainly used for settings and configurations.
+
+Method: `alt_usage_data(value = nil, fallback: -1, &block)`
+
+Arguments:
+
+- `value`: a simple static value in which case the value is simply returned.
+- or a `block`: which is evaluated
+- `fallback: -1`: the common value used for any metrics that are failing.
+
+Usage:
+
+```ruby
+alt_usage_data { Gitlab::VERSION }
+alt_usage_data { Gitlab::CurrentSettings.uuid }
+alt_usage_data(999)
+```
+
+### Adding counters to build new metrics
+
+When adding the results of two counters, use the `add` usage data method that
+handles fallback values and exceptions. It also generates a valid [SQL export](#exporting-usage-ping-sql-queries-and-definitions).
+
+Example usage:
+
+```ruby
+add(User.active, User.bot)
+```
+
+### Prometheus Queries
+
+In those cases where operational metrics should be part of Usage Ping, a database or Redis query is unlikely
+to provide useful data. Instead, Prometheus might be more appropriate, because most GitLab architectural
+components publish metrics to it that can be queried back, aggregated, and included as usage data.
+
+NOTE:
+Prometheus as a data source for Usage Ping is currently only available for single-node Omnibus installations
+that are running the [bundled Prometheus](../../administration/monitoring/prometheus/index.md) instance.
+
+To query Prometheus for metrics, a helper method is available to `yield` a fully configured
+`PrometheusClient`, given it is available as per the note above:
+
+```ruby
+with_prometheus_client do |client|
+ response = client.query('')
+ ...
+end
+```
+
+Please refer to [the `PrometheusClient` definition](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/prometheus_client.rb)
+for how to use its API to query for data.
+
+## Developing and testing Usage Ping
+
+### 1. Naming and placing the metrics
+
+Add the metric in one of the top level keys
+
+- `license`: for license related metrics.
+- `settings`: for settings related metrics.
+- `counts_weekly`: for counters that have data for the most recent 7 days.
+- `counts_monthly`: for counters that have data for the most recent 28 days.
+- `counts`: for counters that have data for all time.
+
+### 2. Use your Rails console to manually test counters
+
+```ruby
+# count
+Gitlab::UsageData.count(User.active)
+Gitlab::UsageData.count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+
+# count distinct
+Gitlab::UsageData.distinct_count(::Project, :creator_id)
+Gitlab::UsageData.distinct_count(::Note.with_suggestions.where(time_period), :author_id, start: ::User.minimum(:id), finish: ::User.maximum(:id))
+```
+
+### 3. Generate the SQL query
+
+Your Rails console returns the generated SQL queries.
+
+Example:
+
+```ruby
+pry(main)> Gitlab::UsageData.count(User.active)
+ (2.6ms) SELECT "features"."key" FROM "features"
+ (15.3ms) SELECT MIN("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4))
+ (2.4ms) SELECT MAX("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4))
+ (1.9ms) SELECT COUNT("users"."id") FROM "users" WHERE ("users"."state" IN ('active')) AND ("users"."user_type" IS NULL OR "users"."user_type" IN (6, 4)) AND "users"."id" BETWEEN 1 AND 100000
+```
+
+### 4. Optimize queries with #database-lab
+
+Paste the SQL query into `#database-lab` to see how the query performs at scale.
+
+- `#database-lab` is a Slack channel which uses a production-sized environment to test your queries.
+- GitLab.com’s production database has a 15 second timeout.
+- Any single query must stay below [1 second execution time](../query_performance.md#timing-guidelines-for-queries) with cold caches.
+- Add a specialized index on columns involved to reduce the execution time.
+
+To have an understanding of the query's execution we add in the MR description the following information:
+
+- For counters that have a `time_period` test we add information for both cases:
+ - `time_period = {}` for all time periods
+ - `time_period = { created_at: 28.days.ago..Time.current }` for last 28 days period
+- Execution plan and query time before and after optimization
+- Query generated for the index and time
+- Migration output for up and down execution
+
+We also use `#database-lab` and [explain.depesz.com](https://explain.depesz.com/). For more details, see the [database review guide](../database_review.md#preparation-when-adding-or-modifying-queries).
+
+#### Optimization recommendations and examples
+
+- Use specialized indexes [example 1](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26871), [example 2](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26445).
+- Use defined `start` and `finish`, and simple queries. These values can be memoized and reused, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/37155).
+- Avoid joins and write the queries as simply as possible, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/36316).
+- Set a custom `batch_size` for `distinct_count`, [example](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/38000).
+
+### 5. Add the metric definition
+
+[Check Metrics Dictionary Guide](metrics_dictionary.md)
+
+When adding, updating, or removing metrics, please update the [Metrics Dictionary](dictionary.md).
+
+### 6. Add new metric to Versions Application
+
+Check if new metrics need to be added to the Versions Application. See `usage_data` [schema](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/db/schema.rb#L147) and usage data [parameters accepted](https://gitlab.com/gitlab-services/version-gitlab-com/-/blob/master/app/services/usage_ping.rb). Any metrics added under the `counts` key are saved in the `stats` column.
+
+### 7. Add the feature label
+
+Add the `feature` label to the Merge Request for new Usage Ping metrics. These are user-facing changes and are part of expanding the Usage Ping feature.
+
+### 8. Add a changelog file
+
+Ensure you comply with the [Changelog entries guide](../changelog.md).
+
+### 9. Ask for a Product Intelligence Review
+
+On GitLab.com, we have DangerBot setup to monitor Product Intelligence related files and DangerBot recommends a [Product Intelligence review](product_intelligence_review.md). Mention `@gitlab-org/growth/product_intelligence/engineers` in your MR for a review.
+
+### 10. Verify your metric
+
+On GitLab.com, the Product Intelligence team regularly monitors Usage Ping. They may alert you that your metrics need further optimization to run quicker and with greater success. You may also use the [Usage Ping QA dashboard](https://app.periscopedata.com/app/gitlab/632033/Usage-Ping-QA) to check how well your metric performs. The dashboard allows filtering by GitLab version, by "Self-managed" & "SaaS" and shows you how many failures have occurred for each metric. Whenever you notice a high failure rate, you may re-optimize your metric.
+
+### Optional: Test Prometheus based Usage Ping
+
+If the data submitted includes metrics [queried from Prometheus](#prometheus-queries) that you would like to inspect and verify,
+then you need to ensure that a Prometheus server is running locally, and that furthermore the respective GitLab components
+are exporting metrics to it. If you do not need to test data coming from Prometheus, no further action
+is necessary. Usage Ping should degrade gracefully in the absence of a running Prometheus server.
+
+There are three kinds of components that may export data to Prometheus, and which are included in Usage Ping:
+
+- [`node_exporter`](https://github.com/prometheus/node_exporter) - Exports node metrics from the host machine
+- [`gitlab-exporter`](https://gitlab.com/gitlab-org/gitlab-exporter) - Exports process metrics from various GitLab components
+- various GitLab services such as Sidekiq and the Rails server that export their own metrics
+
+#### Test with an Omnibus container
+
+This is the recommended approach to test Prometheus based Usage Ping.
+
+The easiest way to verify your changes is to build a new Omnibus image from your code branch by using CI, then download the image
+and run a local container instance:
+
+1. From your merge request, click on the `qa` stage, then trigger the `package-and-qa` job. This job triggers an Omnibus
+build in a [downstream pipeline of the `omnibus-gitlab-mirror` project](https://gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/-/pipelines).
+1. In the downstream pipeline, wait for the `gitlab-docker` job to finish.
+1. Open the job logs and locate the full container name including the version. It takes the following form: `registry.gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/gitlab-ee:`.
+1. On your local machine, make sure you are signed in to the GitLab Docker registry. You can find the instructions for this in
+[Authenticate to the GitLab Container Registry](../../user/packages/container_registry/index.md#authenticate-with-the-container-registry).
+1. Once signed in, download the new image by using `docker pull registry.gitlab.com/gitlab-org/build/omnibus-gitlab-mirror/gitlab-ee:`
+1. For more information about working with and running Omnibus GitLab containers in Docker, please refer to [GitLab Docker images](https://docs.gitlab.com/omnibus/docker/README.html) in the Omnibus documentation.
+
+#### Test with GitLab development toolkits
+
+This is the less recommended approach, because it comes with a number of difficulties when emulating a real GitLab deployment.
+
+The [GDK](https://gitlab.com/gitlab-org/gitlab-development-kit) is not set up to run a Prometheus server or `node_exporter` alongside other GitLab components. If you would
+like to do so, [Monitoring the GDK with Prometheus](https://gitlab.com/gitlab-org/gitlab-development-kit/-/blob/master/doc/howto/prometheus/index.md#monitoring-the-gdk-with-prometheus) is a good start.
+
+The [GCK](https://gitlab.com/gitlab-org/gitlab-compose-kit) has limited support for testing Prometheus based Usage Ping.
+By default, it already comes with a fully configured Prometheus service that is set up to scrape a number of components,
+but with the following limitations:
+
+- It does not run a `gitlab-exporter` instance, so several `process_*` metrics from services such as Gitaly may be missing.
+- While it runs a `node_exporter`, `docker-compose` services emulate hosts, meaning that it would normally report itself to not be associated
+with any of the other services that are running. That is not how node metrics are reported in a production setup, where `node_exporter`
+always runs as a process alongside other GitLab components on any given node. From Usage Ping's perspective none of the node data would therefore
+appear to be associated to any of the services running, because they all appear to be running on different hosts. To alleviate this problem, the `node_exporter` in GCK was arbitrarily "assigned" to the `web` service, meaning only for this service `node_*` metrics appears in Usage Ping.
+
+## Aggregated metrics
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45979) in GitLab 13.6.
+
+WARNING:
+This feature is intended solely for internal GitLab use.
+
+To add data for aggregated metrics into Usage Ping payload you should add corresponding definition in [`aggregated_metrics`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/aggregated_metrics/). Each aggregate definition includes following parts:
+
+- `name`: Unique name under which the aggregate metric is added to the Usage Ping payload.
+- `operator`: Operator that defines how the aggregated metric data is counted. Available operators are:
+ - `OR`: Removes duplicates and counts all entries that triggered any of listed events.
+ - `AND`: Removes duplicates and counts all elements that were observed triggering all of following events.
+- `source`: Data source used to collect all events data included in aggregated metric. Valid data sources are:
+ - [`database`](#database-sourced-aggregated-metrics)
+ - [`redis`](#redis-sourced-aggregated-metrics)
+- `events`: list of events names to aggregate into metric. All events in this list must
+ relay on the same data source. Additional data source requirements are described in the
+ [Database sourced aggregated metrics](#database-sourced-aggregated-metrics) and
+ [Redis sourced aggregated metrics](#redis-sourced-aggregated-metrics) sections.
+- `feature_flag`: Name of [development feature flag](../feature_flags/development.md#development-type)
+ that is checked before metrics aggregation is performed. Corresponding feature flag
+ should have `default_enabled` attribute set to `false`. The `feature_flag` attribute
+ is optional and can be omitted. When `feature_flag` is missing, no feature flag is checked.
+
+Example aggregated metric entries:
+
+```yaml
+- name: product_analytics_test_metrics_union_redis_sourced
+ operator: OR
+ events: ['i_search_total', 'i_search_advanced', 'i_search_paid']
+ source: redis
+- name: product_analytics_test_metrics_intersection_with_feautre_flag_database_sourced
+ operator: AND
+ source: database
+ events: ['dependency_scanning_pipeline_all_time', 'container_scanning_pipeline_all_time']
+ feature_flag: example_aggregated_metric
+```
+
+Aggregated metrics are added under `aggregated_metrics` key in both `counts_weekly` and `counts_monthly` top level keys in Usage Ping payload.
+
+```ruby
+{
+ :counts_monthly => {
+ :deployments => 1003,
+ :successful_deployments => 78,
+ :failed_deployments => 275,
+ :packages => 155,
+ :personal_snippets => 2106,
+ :project_snippets => 407,
+ :promoted_issues => 719,
+ :aggregated_metrics => {
+ :product_analytics_test_metrics_union => 7,
+ :product_analytics_test_metrics_intersection_with_feautre_flag => 2
+ },
+ :snippets => 2513
+ }
+}
+```
+
+### Redis sourced aggregated metrics
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45979) in GitLab 13.6.
+
+To declare the aggregate of events collected with [Redis HLL Counters](#redis-hll-counters),
+you must fulfill the following requirements:
+
+1. All events listed at `events` attribute must come from
+ [`known_events/*.yml`](#known-events-are-added-automatically-in-usage-data-payload) files.
+1. All events listed at `events` attribute must have the same `redis_slot` attribute.
+1. All events listed at `events` attribute must have the same `aggregation` attribute.
+
+### Database sourced aggregated metrics
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52784) in GitLab 13.9.
+> - It's [deployed behind a feature flag](../../user/feature_flags.md), disabled by default.
+> - It's enabled on GitLab.com.
+
+To declare an aggregate of metrics based on events collected from database, follow
+these steps:
+
+1. [Persist the metrics for aggregation](#persist-metrics-for-aggregation).
+1. [Add new aggregated metric definition](#add-new-aggregated-metric-definition).
+
+#### Persist metrics for aggregation
+
+Only metrics calculated with [Estimated Batch Counters](#estimated-batch-counters)
+can be persisted for database sourced aggregated metrics. To persist a metric,
+inject a Ruby block into the
+[estimate_batch_distinct_count](#estimate_batch_distinct_count-method) method.
+This block should invoke the
+`Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll.save_aggregated_metrics`
+[method](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage/metrics/aggregates/sources/postgres_hll.rb#L21),
+which stores `estimate_batch_distinct_count` results for future use in aggregated metrics.
+
+The `Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll.save_aggregated_metrics`
+method accepts the following arguments:
+
+- `metric_name`: The name of metric to use for aggregations. Should be the same
+ as the key under which the metric is added into Usage Ping.
+- `recorded_at_timestamp`: The timestamp representing the moment when a given
+ Usage Ping payload was collected. You should use the convenience method `recorded_at`
+ to fill `recorded_at_timestamp` argument, like this: `recorded_at_timestamp: recorded_at`
+- `time_period`: The time period used to build the `relation` argument passed into
+ `estimate_batch_distinct_count`. To collect the metric with all available historical
+ data, set a `nil` value as time period: `time_period: nil`.
+- `data`: HyperLogLog buckets structure representing unique entries in `relation`.
+ The `estimate_batch_distinct_count` method always passes the correct argument
+ into the block, so `data` argument must always have a value equal to block argument,
+ like this: `data: result`
+
+Example metrics persistence:
+
+```ruby
+class UsageData
+ def count_secure_pipelines(time_period)
+ ...
+ relation = ::Security::Scan.latest_successful_by_build.by_scan_types(scan_type).where(security_scans: time_period)
+
+ pipelines_with_secure_jobs['dependency_scanning_pipeline'] = estimate_batch_distinct_count(relation, :commit_id, batch_size: 1000, start: start_id, finish: finish_id) do |result|
+ ::Gitlab::Usage::Metrics::Aggregates::Sources::PostgresHll
+ .save_aggregated_metrics(metric_name: 'dependency_scanning_pipeline', recorded_at_timestamp: recorded_at, time_period: time_period, data: result)
+ end
+ end
+end
+```
+
+#### Add new aggregated metric definition
+
+After all metrics are persisted, you can add an aggregated metric definition at
+[`aggregated_metrics/`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/usage_data_counters/aggregated_metrics/). When adding definitions for metrics names listed in the
+`events:` attribute, use the same names you passed in the `metric_name` argument
+while persisting metrics in previous step.
+
+Example definition:
+
+```yaml
+- name: product_analytics_test_metrics_intersection_database_sourced
+ operator: AND
+ source: database
+ events: ['dependency_scanning_pipeline', 'container_scanning_pipeline']
+```
+
+## Example Usage Ping payload
+
+The following is example content of the Usage Ping payload.
+
+```json
+{
+ "uuid": "0000000-0000-0000-0000-000000000000",
+ "hostname": "example.com",
+ "version": "12.10.0-pre",
+ "installation_type": "omnibus-gitlab",
+ "active_user_count": 999,
+ "recorded_at": "2020-04-17T07:43:54.162+00:00",
+ "edition": "EEU",
+ "license_md5": "00000000000000000000000000000000",
+ "license_id": null,
+ "historical_max_users": 999,
+ "licensee": {
+ "Name": "ABC, Inc.",
+ "Email": "email@example.com",
+ "Company": "ABC, Inc."
+ },
+ "license_user_count": 999,
+ "license_starts_at": "2020-01-01",
+ "license_expires_at": "2021-01-01",
+ "license_plan": "ultimate",
+ "license_add_ons": {
+ },
+ "license_trial": false,
+ "counts": {
+ "assignee_lists": 999,
+ "boards": 999,
+ "ci_builds": 999,
+ ...
+ },
+ "container_registry_enabled": true,
+ "dependency_proxy_enabled": false,
+ "gitlab_shared_runners_enabled": true,
+ "gravatar_enabled": true,
+ "influxdb_metrics_enabled": true,
+ "ldap_enabled": false,
+ "mattermost_enabled": false,
+ "omniauth_enabled": true,
+ "prometheus_enabled": false,
+ "prometheus_metrics_enabled": false,
+ "reply_by_email_enabled": "incoming+%{key}@incoming.gitlab.com",
+ "signup_enabled": true,
+ "web_ide_clientside_preview_enabled": true,
+ "ingress_modsecurity_enabled": true,
+ "projects_with_expiration_policy_disabled": 999,
+ "projects_with_expiration_policy_enabled": 999,
+ ...
+ "elasticsearch_enabled": true,
+ "license_trial_ends_on": null,
+ "geo_enabled": false,
+ "git": {
+ "version": {
+ "major": 2,
+ "minor": 26,
+ "patch": 1
+ }
+ },
+ "gitaly": {
+ "version": "12.10.0-rc1-93-g40980d40",
+ "servers": 56,
+ "clusters": 14,
+ "filesystems": [
+ "EXT_2_3_4"
+ ]
+ },
+ "gitlab_pages": {
+ "enabled": true,
+ "version": "1.17.0"
+ },
+ "container_registry_server": {
+ "vendor": "gitlab",
+ "version": "2.9.1-gitlab"
+ },
+ "database": {
+ "adapter": "postgresql",
+ "version": "9.6.15",
+ "pg_system_id": 6842684531675334351
+ },
+ "analytics_unique_visits": {
+ "g_analytics_contribution": 999,
+ ...
+ },
+ "usage_activity_by_stage": {
+ "configure": {
+ "project_clusters_enabled": 999,
+ ...
+ },
+ "create": {
+ "merge_requests": 999,
+ ...
+ },
+ "manage": {
+ "events": 999,
+ ...
+ },
+ "monitor": {
+ "clusters": 999,
+ ...
+ },
+ "package": {
+ "projects_with_packages": 999
+ },
+ "plan": {
+ "issues": 999,
+ ...
+ },
+ "release": {
+ "deployments": 999,
+ ...
+ },
+ "secure": {
+ "user_container_scanning_jobs": 999,
+ ...
+ },
+ "verify": {
+ "ci_builds": 999,
+ ...
+ }
+ },
+ "usage_activity_by_stage_monthly": {
+ "configure": {
+ "project_clusters_enabled": 999,
+ ...
+ },
+ "create": {
+ "merge_requests": 999,
+ ...
+ },
+ "manage": {
+ "events": 999,
+ ...
+ },
+ "monitor": {
+ "clusters": 999,
+ ...
+ },
+ "package": {
+ "projects_with_packages": 999
+ },
+ "plan": {
+ "issues": 999,
+ ...
+ },
+ "release": {
+ "deployments": 999,
+ ...
+ },
+ "secure": {
+ "user_container_scanning_jobs": 999,
+ ...
+ },
+ "verify": {
+ "ci_builds": 999,
+ ...
+ }
+ },
+ "topology": {
+ "duration_s": 0.013836685999194742,
+ "application_requests_per_hour": 4224,
+ "query_apdex_weekly_average": 0.996,
+ "failures": [],
+ "nodes": [
+ {
+ "node_memory_total_bytes": 33269903360,
+ "node_memory_utilization": 0.35,
+ "node_cpus": 16,
+ "node_cpu_utilization": 0.2,
+ "node_uname_info": {
+ "machine": "x86_64",
+ "sysname": "Linux",
+ "release": "4.19.76-linuxkit"
+ },
+ "node_services": [
+ {
+ "name": "web",
+ "process_count": 16,
+ "process_memory_pss": 233349888,
+ "process_memory_rss": 788220927,
+ "process_memory_uss": 195295487,
+ "server": "puma"
+ },
+ {
+ "name": "sidekiq",
+ "process_count": 1,
+ "process_memory_pss": 734080000,
+ "process_memory_rss": 750051328,
+ "process_memory_uss": 731533312
+ },
+ ...
+ ],
+ ...
+ },
+ ...
+ ]
+ }
+}
+```
+
+## Notable changes
+
+In GitLab 13.5, `pg_system_id` was added to send the [PostgreSQL system identifier](https://www.2ndquadrant.com/en/blog/support-for-postgresqls-system-identifier-in-barman/).
+
+## Exporting Usage Ping SQL queries and definitions
+
+Two Rake tasks exist to export Usage Ping definitions.
+
+- The Rake tasks export the raw SQL queries for `count`, `distinct_count`, `sum`.
+- The Rake tasks export the Redis counter class or the line of the Redis block for `redis_usage_data`.
+- The Rake tasks calculate the `alt_usage_data` metrics.
+
+In the home directory of your local GitLab installation run the following Rake tasks for the YAML and JSON versions respectively:
+
+```shell
+# for YAML export
+bin/rake gitlab:usage_data:dump_sql_in_yaml
+
+# for JSON export
+bin/rake gitlab:usage_data:dump_sql_in_json
+
+# You may pipe the output into a file
+bin/rake gitlab:usage_data:dump_sql_in_yaml > ~/Desktop/usage-metrics-2020-09-02.yaml
+```
+
+## Generating and troubleshooting usage ping
+
+To get a usage ping, or to troubleshoot caching issues on your GitLab instance, please follow [instructions to generate usage ping](../../administration/troubleshooting/gitlab_rails_cheat_sheet.md#generate-usage-ping).
diff --git a/doc/development/usage_ping/metrics_dictionary.md b/doc/development/usage_ping/metrics_dictionary.md
index a1f24ccd99c..1407a356363 100644
--- a/doc/development/usage_ping/metrics_dictionary.md
+++ b/doc/development/usage_ping/metrics_dictionary.md
@@ -96,6 +96,6 @@ create ee/config/metrics/counts_7d/issues.yml
## Metrics added dynamic to Usage Ping payload
-The [Redis HLL metrics](../usage_ping.md#known-events-are-added-automatically-in-usage-data-payload) are added automatically to Usage Ping payload.
+The [Redis HLL metrics](index.md#known-events-are-added-automatically-in-usage-data-payload) are added automatically to Usage Ping payload.
A YAML metric definition is required for each metric.
diff --git a/doc/development/usage_ping/product_intelligence_review.md b/doc/development/usage_ping/product_intelligence_review.md
index 852166cc7a3..c667bc8354c 100644
--- a/doc/development/usage_ping/product_intelligence_review.md
+++ b/doc/development/usage_ping/product_intelligence_review.md
@@ -13,7 +13,7 @@ general best practices for code reviews, refer to our [code review guide](../cod
## Resources for Product Intelligence reviewers
-- [Usage Ping Guide](../usage_ping.md)
+- [Usage Ping Guide](index.md)
- [Snowplow Guide](../snowplow.md)
- [Metrics Dictionary](metrics_dictionary.md)
@@ -64,13 +64,13 @@ Any of the Product Intelligence engineers can be assigned for the Product Intell
### How to review for Product Intelligence
-- Check the [metrics location](../usage_ping.md#1-naming-and-placing-the-metrics) in
+- Check the [metrics location](index.md#1-naming-and-placing-the-metrics) in
the Usage Ping JSON payload.
- Add `~database` label and ask for [database review](../database_review.md) for
metrics that are based on Database.
- For tracking using Redis HLL (HyperLogLog):
- Check the Redis slot.
- - Check if a [feature flag is needed](../usage_ping.md#recommendations).
+ - Check if a [feature flag is needed](index.md#recommendations).
- Metrics YAML definitions:
- Check the metric `description`.
- Check the metrics `key_path`.
diff --git a/doc/raketasks/index.md b/doc/raketasks/index.md
index be7e55cba9b..ab0505f065e 100644
--- a/doc/raketasks/index.md
+++ b/doc/raketasks/index.md
@@ -42,7 +42,7 @@ The following are available Rake tasks:
| [Repository storage](../administration/raketasks/storage.md) | List and migrate existing projects and attachments from legacy storage to hashed storage. |
| [Uploads migrate](../administration/raketasks/uploads/migrate.md) | Migrate uploads between storage local and object storage. |
| [Uploads sanitize](../administration/raketasks/uploads/sanitize.md) | Remove EXIF data from images uploaded to earlier versions of GitLab. |
-| [Usage data](../administration/troubleshooting/gitlab_rails_cheat_sheet.md#generate-usage-ping) | Generate and troubleshoot [Usage Ping](../development/usage_ping.md).|
+| [Usage data](../administration/troubleshooting/gitlab_rails_cheat_sheet.md#generate-usage-ping) | Generate and troubleshoot [Usage Ping](../development/usage_ping/index.md).|
| [User management](user_management.md) | Perform user management tasks. |
| [Webhooks administration](web_hooks.md) | Maintain project Webhooks. |
| [X.509 signatures](x509_signatures.md) | Update X.509 commit signatures, useful if certificate store has changed. |
diff --git a/doc/user/admin_area/settings/usage_statistics.md b/doc/user/admin_area/settings/usage_statistics.md
index 2bb6adbc32b..ada44115cec 100644
--- a/doc/user/admin_area/settings/usage_statistics.md
+++ b/doc/user/admin_area/settings/usage_statistics.md
@@ -61,7 +61,7 @@ sequenceDiagram
## Usage Ping **(FREE SELF)**
-See [Usage Ping guide](../../../development/usage_ping.md).
+See [Usage Ping guide](../../../development/usage_ping/index.md).
## Instance-level analytics availability
diff --git a/package.json b/package.json
index 1860e87b070..d164480261d 100644
--- a/package.json
+++ b/package.json
@@ -56,9 +56,9 @@
"@babel/preset-env": "^7.10.1",
"@gitlab/at.js": "1.5.7",
"@gitlab/favicon-overlay": "2.0.0",
- "@gitlab/svgs": "1.183.0",
+ "@gitlab/svgs": "1.184.0",
"@gitlab/tributejs": "1.0.0",
- "@gitlab/ui": "27.14.0",
+ "@gitlab/ui": "27.15.0",
"@gitlab/visual-review-tools": "1.6.1",
"@rails/actioncable": "^6.0.3-4",
"@rails/ujs": "^6.0.3-4",
diff --git a/spec/requests/api/merge_requests_spec.rb b/spec/requests/api/merge_requests_spec.rb
index ad8e21bf4c1..09177dd1710 100644
--- a/spec/requests/api/merge_requests_spec.rb
+++ b/spec/requests/api/merge_requests_spec.rb
@@ -2537,7 +2537,7 @@ RSpec.describe API::MergeRequests do
end
end
- describe "the should_remove_source_branch param" do
+ describe "the should_remove_source_branch param", :sidekiq_inline do
let(:source_repository) { merge_request.source_project.repository }
let(:source_branch) { merge_request.source_branch }
@@ -2552,7 +2552,7 @@ RSpec.describe API::MergeRequests do
end
end
- context "with a merge request that has force_remove_source_branch enabled" do
+ context "with a merge request that has force_remove_source_branch enabled", :sidekiq_inline do
let(:source_repository) { merge_request.source_project.repository }
let(:source_branch) { merge_request.source_branch }
diff --git a/spec/services/merge_requests/merge_service_spec.rb b/spec/services/merge_requests/merge_service_spec.rb
index 611f12c8146..87e5750ce6e 100644
--- a/spec/services/merge_requests/merge_service_spec.rb
+++ b/spec/services/merge_requests/merge_service_spec.rb
@@ -258,9 +258,8 @@ RSpec.describe MergeRequests::MergeService do
end
it 'removes the source branch using the author user' do
- expect(::Branches::DeleteService).to receive(:new)
- .with(merge_request.source_project, merge_request.author)
- .and_call_original
+ expect(::MergeRequests::DeleteSourceBranchWorker).to receive(:perform_async).with(merge_request.id, merge_request.source_branch_sha, merge_request.author.id)
+
service.execute(merge_request)
end
@@ -268,7 +267,8 @@ RSpec.describe MergeRequests::MergeService do
let(:service) { described_class.new(project, user, merge_params.merge('should_remove_source_branch' => false)) }
it 'does not delete the source branch' do
- expect(::Branches::DeleteService).not_to receive(:new)
+ expect(::MergeRequests::DeleteSourceBranchWorker).not_to receive(:perform_async)
+
service.execute(merge_request)
end
end
@@ -280,9 +280,8 @@ RSpec.describe MergeRequests::MergeService do
end
it 'removes the source branch using the current user' do
- expect(::Branches::DeleteService).to receive(:new)
- .with(merge_request.source_project, user)
- .and_call_original
+ expect(::MergeRequests::DeleteSourceBranchWorker).to receive(:perform_async).with(merge_request.id, merge_request.source_branch_sha, user.id)
+
service.execute(merge_request)
end
end
diff --git a/spec/workers/merge_requests/delete_source_branch_worker_spec.rb b/spec/workers/merge_requests/delete_source_branch_worker_spec.rb
new file mode 100644
index 00000000000..df5773d55c1
--- /dev/null
+++ b/spec/workers/merge_requests/delete_source_branch_worker_spec.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe MergeRequests::DeleteSourceBranchWorker do
+ let_it_be(:merge_request) { create(:merge_request) }
+ let_it_be(:user) { create(:user) }
+
+ let(:sha) { merge_request.source_branch_sha }
+ let(:worker) { described_class.new }
+
+ describe '#perform' do
+ context 'with a non-existing merge request' do
+ it 'does nothing' do
+ expect(::Branches::DeleteService).not_to receive(:new)
+
+ worker.perform(non_existing_record_id, sha, user.id)
+ end
+ end
+
+ context 'with a non-existing user' do
+ it 'does nothing' do
+ expect(::Branches::DeleteService).not_to receive(:new)
+
+ worker.perform(merge_request.id, sha, non_existing_record_id)
+ end
+ end
+
+ context 'with existing user and merge request' do
+ it 'calls service to delete source branch' do
+ expect_next_instance_of(::Branches::DeleteService) do |instance|
+ expect(instance).to receive(:execute).with(merge_request.source_branch)
+ end
+
+ worker.perform(merge_request.id, sha, user.id)
+ end
+
+ context 'source branch sha does not match' do
+ it 'does nothing' do
+ expect(::Branches::DeleteService).not_to receive(:new)
+
+ worker.perform(merge_request.id, 'new-source-branch-sha', user.id)
+ end
+ end
+ end
+
+ it_behaves_like 'an idempotent worker' do
+ let(:merge_request) { create(:merge_request) }
+ let(:job_args) { [merge_request.id, sha, user.id] }
+ end
+ end
+end
diff --git a/spec/workers/merge_worker_spec.rb b/spec/workers/merge_worker_spec.rb
index 97e8aeb616e..417e6edce96 100644
--- a/spec/workers/merge_worker_spec.rb
+++ b/spec/workers/merge_worker_spec.rb
@@ -14,7 +14,7 @@ RSpec.describe MergeWorker do
source_project.repository.expire_branches_cache
end
- it 'clears cache of source repo after removing source branch' do
+ it 'clears cache of source repo after removing source branch', :sidekiq_inline do
expect(source_project.repository.branch_names).to include('markdown')
described_class.new.perform(
diff --git a/yarn.lock b/yarn.lock
index a38dd3822a1..965de2340a2 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -897,20 +897,20 @@
stylelint-declaration-strict-value "1.7.7"
stylelint-scss "3.18.0"
-"@gitlab/svgs@1.183.0":
- version "1.183.0"
- resolved "https://registry.yarnpkg.com/@gitlab/svgs/-/svgs-1.183.0.tgz#58a3f49d2355531bfc073f35cbd71855b4a0f9da"
- integrity sha512-cOcFlgmTkRhfljT+0arKFtkYMLFMXLk0q74WtMqb9YFRtuPmthqfH0zZd47XZ2yFTSvbEJfcyAXXrPIuX8f8Rg==
+"@gitlab/svgs@1.184.0":
+ version "1.184.0"
+ resolved "https://registry.yarnpkg.com/@gitlab/svgs/-/svgs-1.184.0.tgz#7fe90309999b987362eb1649aa91df24748d3df1"
+ integrity sha512-K8r5jhnjWR4347j2ADFek7yLgs+MDGqm9cjvwT04aa1vSfhvheLqyKGUy1yz8v7RzXC0Mev6+RT1AbK7m6Ay+w==
"@gitlab/tributejs@1.0.0":
version "1.0.0"
resolved "https://registry.yarnpkg.com/@gitlab/tributejs/-/tributejs-1.0.0.tgz#672befa222aeffc83e7d799b0500a7a4418e59b8"
integrity sha512-nmKw1+hB6MHvlmPz63yPwVs1qQkycHwsKgxpEbzmky16Y6mL4EJMk3w1b8QlOAF/AIAzjCERPhe/R4MJiohbZw==
-"@gitlab/ui@27.14.0":
- version "27.14.0"
- resolved "https://registry.yarnpkg.com/@gitlab/ui/-/ui-27.14.0.tgz#f80c11df3650380f66aa763867afa32d1901d407"
- integrity sha512-cQaMN4pjcV0lCic3w1eiZUru9kV8XHwI3ZRT3LOWoBRQEbK0r3qv71SNKzEGDOtVPYq+WBl0/PRDlThk9H+CAg==
+"@gitlab/ui@27.15.0":
+ version "27.15.0"
+ resolved "https://registry.yarnpkg.com/@gitlab/ui/-/ui-27.15.0.tgz#79fde9dd314f45aa4c061dc1bee8254f1101f79d"
+ integrity sha512-xkEu/ewbcGkqHcbS7LdQoygKPpt2hUFgFHTOJ2VirgnA810/wvb5N7MrwQvKX2dy8kAdGwIbYyOZQXt3DIKzYA==
dependencies:
"@babel/standalone" "^7.0.0"
"@gitlab/vue-toasted" "^1.3.0"