Add latest changes from gitlab-org/gitlab@master
This commit is contained in:
parent
2857ba3ce5
commit
1793989ccc
|
@ -17,7 +17,7 @@ module Types
|
|||
@requires_argument = !!kwargs.delete(:requires_argument)
|
||||
@authorize = Array.wrap(kwargs.delete(:authorize))
|
||||
kwargs[:complexity] = field_complexity(kwargs[:resolver_class], kwargs[:complexity])
|
||||
@feature_flag = kwargs[:feature_flag]
|
||||
@feature_flag = kwargs[:_deprecated_feature_flag]
|
||||
kwargs = check_feature_flag(kwargs)
|
||||
@deprecation = gitlab_deprecation(kwargs)
|
||||
after_connection_extensions = kwargs.delete(:late_extensions) || []
|
||||
|
@ -136,7 +136,7 @@ module Types
|
|||
end
|
||||
|
||||
def check_feature_flag(args)
|
||||
ff = args.delete(:feature_flag)
|
||||
ff = args.delete(:_deprecated_feature_flag)
|
||||
return args unless ff.present?
|
||||
|
||||
args[:description] = feature_documentation_message(ff, args[:description])
|
||||
|
|
|
@ -22,7 +22,7 @@ module Types
|
|||
type: Types::CustomEmojiType.connection_type,
|
||||
null: true,
|
||||
description: 'Custom emoji within this namespace.',
|
||||
feature_flag: :custom_emoji
|
||||
_deprecated_feature_flag: :custom_emoji
|
||||
|
||||
field :share_with_group_lock,
|
||||
type: GraphQL::Types::Boolean,
|
||||
|
|
|
@ -37,8 +37,8 @@ module Types
|
|||
mount_mutation Mutations::Clusters::AgentTokens::Create
|
||||
mount_mutation Mutations::Clusters::AgentTokens::Revoke
|
||||
mount_mutation Mutations::Commits::Create, calls_gitaly: true
|
||||
mount_mutation Mutations::CustomEmoji::Create, feature_flag: :custom_emoji
|
||||
mount_mutation Mutations::CustomEmoji::Destroy, feature_flag: :custom_emoji
|
||||
mount_mutation Mutations::CustomEmoji::Create, _deprecated_feature_flag: :custom_emoji
|
||||
mount_mutation Mutations::CustomEmoji::Destroy, _deprecated_feature_flag: :custom_emoji
|
||||
mount_mutation Mutations::CustomerRelations::Contacts::Create
|
||||
mount_mutation Mutations::CustomerRelations::Contacts::Update
|
||||
mount_mutation Mutations::CustomerRelations::Organizations::Create
|
||||
|
|
|
@ -61,7 +61,7 @@ module Types
|
|||
Types::TimeTracking::TimelogCategoryType.connection_type,
|
||||
null: true,
|
||||
description: "Timelog categories for the namespace.",
|
||||
feature_flag: :timelog_categories
|
||||
_deprecated_feature_flag: :timelog_categories
|
||||
|
||||
markdown_field :description_html, null: true
|
||||
|
||||
|
|
|
@ -442,7 +442,7 @@ module Types
|
|||
Types::TimeTracking::TimelogCategoryType.connection_type,
|
||||
null: true,
|
||||
description: "Timelog categories for the project.",
|
||||
feature_flag: :timelog_categories
|
||||
_deprecated_feature_flag: :timelog_categories
|
||||
|
||||
def timelog_categories
|
||||
object.project_namespace.timelog_categories
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
---
|
||||
name: only_positive_pagination_values
|
||||
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/93571
|
||||
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/369225
|
||||
milestone: '15.3'
|
||||
type: development
|
||||
group: group::source code
|
||||
default_enabled: false
|
|
@ -98,11 +98,8 @@ See the [deprecating schema items](#deprecating-schema-items) section for how to
|
|||
|
||||
### Breaking change exemptions
|
||||
|
||||
Two scenarios exist where schema items are exempt from the deprecation process,
|
||||
and can be removed or changed at any time without notice. These are schema items that either:
|
||||
|
||||
- Use the [`feature_flag` property](#feature_flag-property) _and_ the flag is disabled by default.
|
||||
- Are [marked as alpha](#mark-schema-items-as-alpha).
|
||||
Schema items [marked as alpha](#mark-schema-items-as-alpha) are exempt from the deprecation process,
|
||||
and can be removed or changed at any time without notice.
|
||||
|
||||
## Global IDs
|
||||
|
||||
|
@ -472,92 +469,37 @@ end
|
|||
|
||||
## Feature flags
|
||||
|
||||
Developers can add [feature flags](../development/feature_flags/index.md) to GraphQL
|
||||
fields in the following ways:
|
||||
You can implement [feature flags](../development/feature_flags/index.md) in GraphQL to toggle:
|
||||
|
||||
- Add the [`feature_flag` property](#feature_flag-property) to a field. This allows the field to be _hidden_
|
||||
from the GraphQL schema when the flag is disabled.
|
||||
- [Toggle the return value](#toggle-the-value-of-a-field) when resolving the field.
|
||||
- The return value of a field.
|
||||
- The behavior of an argument or mutation.
|
||||
|
||||
You can refer to these guidelines to decide which approach to use:
|
||||
|
||||
- If your field is experimental, and its name or type is subject to
|
||||
change, use the [`feature_flag` property](#feature_flag-property).
|
||||
- If your field is stable and its definition doesn't change, even after the flag is
|
||||
removed, [toggle the return value](#toggle-the-value-of-a-field) of the field instead. Note that
|
||||
[all fields should be nullable](#nullable-fields) anyway.
|
||||
- If your field will be accessed from frontend using the `@include` or `@skip` directive, [do not use the `feature_flag` property](#frontend-and-backend-feature-flag-strategies).
|
||||
|
||||
### `feature_flag` property
|
||||
|
||||
The `feature_flag` property allows you to toggle the field's
|
||||
[visibility](https://graphql-ruby.org/authorization/visibility.html)
|
||||
in the GraphQL schema. This removes the field from the schema
|
||||
when the flag is disabled.
|
||||
|
||||
A description is [appended](https://gitlab.com/gitlab-org/gitlab/-/blob/497b556/app/graphql/types/base_field.rb#L44-53)
|
||||
to the field indicating that it is behind a feature flag.
|
||||
|
||||
WARNING:
|
||||
If a client queries for the field when the feature flag is disabled, the query
|
||||
fails. Consider this when toggling the visibility of the feature on or off on
|
||||
production.
|
||||
|
||||
The `feature_flag` property does not allow the use of
|
||||
[feature gates based on actors](../development/feature_flags/index.md).
|
||||
This means that the feature flag cannot be toggled only for particular
|
||||
projects, groups, or users, but instead can only be toggled globally for
|
||||
everyone.
|
||||
|
||||
Example:
|
||||
|
||||
```ruby
|
||||
field :test_field, type: GraphQL::Types::String,
|
||||
null: true,
|
||||
description: 'Some test field.',
|
||||
feature_flag: :my_feature_flag
|
||||
```
|
||||
|
||||
### Frontend and Backend feature flag strategies
|
||||
|
||||
#### Directives
|
||||
|
||||
When feature flags are used in the frontend to control the `@include` and `@skip` directives, do not use the `feature_flag`
|
||||
property on the server-side. For the accepted backend workaround, see [Toggle the value of a field](#toggle-the-value-of-a-field). It is recommended that the feature flag used in this approach be the same for frontend and backend.
|
||||
|
||||
Even if the frontend directives evaluate to `@include:false` / `@skip:true`, the guarded entity is sent to the backend and matched
|
||||
against the GraphQL schema. We would then get an exception due to a schema mismatch. See the [frontend GraphQL guide](../development/fe_guide/graphql.md#the-include-directive) for more guidance.
|
||||
|
||||
#### Different versions of a query
|
||||
|
||||
See the guide frontend GraphQL guide for [different versions of a query](../development/fe_guide/graphql.md#different-versions-of-a-query), and [why it is not the preferred approach](../development/fe_guide/graphql.md#avoiding-multiple-query-versions)
|
||||
|
||||
### Toggle the value of a field
|
||||
|
||||
This method of using feature flags for fields is to toggle the
|
||||
return value of the field. This can be done in the resolver, in the
|
||||
This can be done in a resolver, in the
|
||||
type, or even in a model method, depending on your preference and
|
||||
situation.
|
||||
|
||||
Consider also [marking the field as Alpha](#mark-schema-items-as-alpha)
|
||||
while the value of the field can be toggled. You can
|
||||
[change or remove Alpha fields at any time](#breaking-change-exemptions) without needing to deprecate them.
|
||||
This also signals to consumers of the public GraphQL API that the field is not
|
||||
NOTE:
|
||||
It's recommended that you also [mark the item as Alpha](#mark-schema-items-as-alpha) while it is behind a feature flag.
|
||||
This signals to consumers of the public GraphQL API that the field is not
|
||||
meant to be used yet.
|
||||
You can also
|
||||
[change or remove Alpha items at any time](#breaking-change-exemptions) without needing to deprecate them. When the flag is removed, "release"
|
||||
the schema item by removing its Alpha property to make it public.
|
||||
|
||||
When applying a feature flag to toggle the value of a field, the
|
||||
`description` of the field must:
|
||||
### Descriptions for feature flagged items
|
||||
|
||||
- State that the value of the field can be toggled by a feature flag.
|
||||
When using a feature flag to toggle the value or behavior of a schema item, the
|
||||
`description` of the item must:
|
||||
|
||||
- State that the value or behavior can be toggled by a feature flag.
|
||||
- Name the feature flag.
|
||||
- State what the field returns when the feature flag is disabled (or
|
||||
- State what the field returns, or behavior is, when the feature flag is disabled (or
|
||||
enabled, if more appropriate).
|
||||
|
||||
Example:
|
||||
Example of a feature-flagged field:
|
||||
|
||||
```ruby
|
||||
field :foo, GraphQL::Types::String,
|
||||
null: true,
|
||||
field :foo, GraphQL::Types::String, null: true,
|
||||
alpha: { milestone: '10.0' },
|
||||
description: 'Some test field. Returns `null`' \
|
||||
'if `my_feature_flag` feature flag is disabled.'
|
||||
|
@ -567,6 +509,26 @@ def foo
|
|||
end
|
||||
```
|
||||
|
||||
Example of a feature-flagged argument:
|
||||
|
||||
```ruby
|
||||
argument :foo, type: GraphQL::Types::String, required: false,
|
||||
alpha: { milestone: '10.0' },
|
||||
description: 'Some test argument. Is ignored if ' \
|
||||
'`my_feature_flag` feature flag is disabled.'
|
||||
|
||||
def resolve(args)
|
||||
args.delete(:foo) unless Feature.enabled?(:my_feature_flag, object)
|
||||
# ...
|
||||
end
|
||||
```
|
||||
|
||||
### `feature_flag` property (deprecated)
|
||||
|
||||
NOTE:
|
||||
This property is deprecated and should no longer be used. The property
|
||||
has been temporarily renamed to `_deprecated_feature_flag` and support for it will be removed in [#369202](https://gitlab.com/gitlab-org/gitlab/-/issues/369202).
|
||||
|
||||
## Deprecating schema items
|
||||
|
||||
The GitLab GraphQL API is versionless, which means we maintain backwards
|
||||
|
|
|
@ -6,7 +6,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
|
|||
|
||||
# Add a foreign key constraint to an existing column
|
||||
|
||||
Foreign keys ensure consistency between related database tables. The current database review process **always** encourages you to add [foreign keys](../foreign_keys.md) when creating tables that reference records from other tables.
|
||||
Foreign keys ensure consistency between related database tables. The current database review process **always** encourages you to add [foreign keys](foreign_keys.md) when creating tables that reference records from other tables.
|
||||
|
||||
Starting with Rails version 4, Rails includes migration helpers to add foreign key constraints
|
||||
to database tables. Before Rails 4, the only way for ensuring some level of consistency was the
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
---
|
||||
stage: Plan
|
||||
group: Project Management
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
---
|
||||
# Filtering by label
|
||||
|
||||
## Introduction
|
||||
|
||||
GitLab has [labels](../../user/project/labels.md) that can be assigned to issues,
|
||||
merge requests, and epics. Labels on those objects are a many-to-many relation
|
||||
through the polymorphic `label_links` table.
|
||||
|
||||
To filter these objects by multiple labels - for instance, 'all open
|
||||
issues with the label ~Plan and the label ~backend' - we generate a
|
||||
query containing a `GROUP BY` clause. In a simple form, this looks like:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
issues.*
|
||||
FROM
|
||||
issues
|
||||
INNER JOIN label_links ON label_links.target_id = issues.id
|
||||
AND label_links.target_type = 'Issue'
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
issues.project_id = 13083
|
||||
AND (issues.state IN ('opened'))
|
||||
AND labels.title IN ('Plan',
|
||||
'backend')
|
||||
GROUP BY
|
||||
issues.id
|
||||
HAVING (COUNT(DISTINCT labels.title) = 2)
|
||||
ORDER BY
|
||||
issues.updated_at DESC,
|
||||
issues.id DESC
|
||||
LIMIT 20 OFFSET 0
|
||||
```
|
||||
|
||||
In particular, note that:
|
||||
|
||||
1. We `GROUP BY issues.id` so that we can ...
|
||||
1. Use the `HAVING (COUNT(DISTINCT labels.title) = 2)` condition to ensure that
|
||||
all matched issues have both labels.
|
||||
|
||||
This is more complicated than is ideal. It makes the query construction more
|
||||
prone to errors (such as
|
||||
[issue #15557](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/15557)).
|
||||
|
||||
## Attempt A: `WHERE EXISTS`
|
||||
|
||||
### Attempt A1: use multiple subqueries with `WHERE EXISTS`
|
||||
|
||||
In [issue #37137](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/37137)
|
||||
and its associated [merge request](https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/14022),
|
||||
we tried to replace the `GROUP BY` with multiple uses of `WHERE EXISTS`. For the
|
||||
example above, this would give:
|
||||
|
||||
```sql
|
||||
WHERE (EXISTS (
|
||||
SELECT
|
||||
TRUE
|
||||
FROM
|
||||
label_links
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
labels.title = 'Plan'
|
||||
AND target_type = 'Issue'
|
||||
AND target_id = issues.id))
|
||||
AND (EXISTS (
|
||||
SELECT
|
||||
TRUE
|
||||
FROM
|
||||
label_links
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
labels.title = 'backend'
|
||||
AND target_type = 'Issue'
|
||||
AND target_id = issues.id))
|
||||
```
|
||||
|
||||
While this worked without schema changes, and did improve readability somewhat,
|
||||
it did not improve query performance.
|
||||
|
||||
### Attempt A2: use label IDs in the `WHERE EXISTS` clause
|
||||
|
||||
In [merge request #34503](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/34503), we followed a similar approach to A1. But this time, we
|
||||
did a separate query to fetch the IDs of the labels used in the filter so that we avoid the `JOIN` in the `EXISTS` clause and filter directly by
|
||||
`label_links.label_id`. We also added a new index on `label_links` for the `target_id`, `label_id`, and `target_type` columns to speed up this query.
|
||||
|
||||
Finding the label IDs wasn't straightforward because there could be multiple labels with the same title within a single root namespace. We solved
|
||||
this by grouping the label IDs by title and then using the array of IDs in the `EXISTS` clauses.
|
||||
|
||||
This resulted in a significant performance improvement. However, this optimization could not be applied to the dashboard pages
|
||||
where we do not have a project or group context. We could not easily search for the label IDs here because that would mean searching across all
|
||||
projects and groups that the user has access to.
|
||||
|
||||
## Attempt B: Denormalize using an array column
|
||||
|
||||
Having [removed MySQL support in GitLab 12.1](https://about.gitlab.com/blog/2019/06/27/removing-mysql-support/),
|
||||
using [PostgreSQL's arrays](https://www.postgresql.org/docs/11/arrays.html) became more
|
||||
tractable as we didn't have to support two databases. We discussed denormalizing
|
||||
the `label_links` table for querying in
|
||||
[issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651),
|
||||
with two options: label IDs and titles.
|
||||
|
||||
We can think of both of those as array columns on `issues`, `merge_requests`,
|
||||
and `epics`: `issues.label_ids` would be an array column of label IDs, and
|
||||
`issues.label_titles` would be an array of label titles.
|
||||
|
||||
These array columns can be complemented with
|
||||
[GIN indexes](https://www.postgresql.org/docs/11/gin-intro.html) to improve
|
||||
matching.
|
||||
|
||||
### Attempt B1: store label IDs for each object
|
||||
|
||||
This has some strong advantages over titles:
|
||||
|
||||
1. Unless a label is deleted, or a project is moved, we never need to
|
||||
bulk-update the denormalized column.
|
||||
1. It uses less storage than the titles.
|
||||
|
||||
Unfortunately, our application design makes this hard. If we were able to query
|
||||
just by label ID easily, we wouldn't need the `INNER JOIN labels` in the initial
|
||||
query at the start of this document. GitLab allows users to filter by label
|
||||
title across projects and even across groups, so a filter by the label ~Plan may
|
||||
include labels with multiple distinct IDs.
|
||||
|
||||
We do not want users to have to know about the different IDs, which means that
|
||||
given this data set:
|
||||
|
||||
| Project | ~Plan label ID | ~backend label ID |
|
||||
| ------- | -------------- | ----------------- |
|
||||
| A | 11 | 12 |
|
||||
| B | 21 | 22 |
|
||||
| C | 31 | 32 |
|
||||
|
||||
We would need something like:
|
||||
|
||||
```sql
|
||||
WHERE
|
||||
label_ids @> ARRAY[11, 12]
|
||||
OR label_ids @> ARRAY[21, 22]
|
||||
OR label_ids @> ARRAY[31, 32]
|
||||
```
|
||||
|
||||
This can get even more complicated when we consider that in some cases, there
|
||||
might be two ~backend labels - with different IDs - that could apply to the same
|
||||
object, so the number of combinations would balloon further.
|
||||
|
||||
### Attempt B2: store label titles for each object
|
||||
|
||||
From the perspective of updating the object, this is the worst
|
||||
option. We have to bulk update the objects when:
|
||||
|
||||
1. The objects are moved from one project to another.
|
||||
1. The project is moved from one group to another.
|
||||
1. The label is renamed.
|
||||
1. The label is deleted.
|
||||
|
||||
It also uses much more storage. Querying is simple, though:
|
||||
|
||||
```sql
|
||||
WHERE
|
||||
label_titles @> ARRAY['Plan', 'backend']
|
||||
```
|
||||
|
||||
And our [tests in issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651#note_188777346)
|
||||
showed that this could be fast.
|
||||
|
||||
However, at present, the disadvantages outweigh the advantages.
|
||||
|
||||
## Conclusion
|
||||
|
||||
We found a method A2 that does not need denormalization and improves the query performance significantly. This
|
||||
did not apply to all cases, but we were able to apply method A1 to the rest of the cases so that we remove the
|
||||
`GROUP BY` and `HAVING` clauses in all scenarios.
|
||||
|
||||
This simplified the query and improved the performance in the most common cases.
|
|
@ -0,0 +1,199 @@
|
|||
---
|
||||
stage: Data Stores
|
||||
group: Database
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
---
|
||||
|
||||
# Foreign Keys & Associations
|
||||
|
||||
When adding an association to a model you must also add a foreign key. For
|
||||
example, say you have the following model:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_many :posts
|
||||
end
|
||||
```
|
||||
|
||||
Add a foreign key here on column `posts.user_id`. This ensures
|
||||
that data consistency is enforced on database level. Foreign keys also mean that
|
||||
the database can very quickly remove associated data (for example, when removing a
|
||||
user), instead of Rails having to do this.
|
||||
|
||||
## Adding Foreign Keys In Migrations
|
||||
|
||||
Foreign keys can be added concurrently using `add_concurrent_foreign_key` as
|
||||
defined in `Gitlab::Database::MigrationHelpers`. See the
|
||||
[Migration Style Guide](../migration_style_guide.md) for more information.
|
||||
|
||||
Keep in mind that you can only safely add foreign keys to existing tables after
|
||||
you have removed any orphaned rows. The method `add_concurrent_foreign_key`
|
||||
does not take care of this so you must do so manually. See
|
||||
[adding foreign key constraint to an existing column](add_foreign_key_to_existing_column.md).
|
||||
|
||||
## Updating Foreign Keys In Migrations
|
||||
|
||||
Sometimes a foreign key constraint must be changed, preserving the column
|
||||
but updating the constraint condition. For example, moving from
|
||||
`ON DELETE CASCADE` to `ON DELETE SET NULL` or vice-versa.
|
||||
|
||||
PostgreSQL does not prevent you from adding overlapping foreign keys. It
|
||||
honors the most recently added constraint. This allows us to replace foreign keys without
|
||||
ever losing foreign key protection on a column.
|
||||
|
||||
To replace a foreign key:
|
||||
|
||||
1. [Add the new foreign key without validation](add_foreign_key_to_existing_column.md#prevent-invalid-records)
|
||||
|
||||
The name of the foreign key constraint must be changed to add a new
|
||||
foreign key before removing the old one.
|
||||
|
||||
```ruby
|
||||
class ReplaceFkOnPackagesPackagesProjectId < Gitlab::Database::Migration[2.0]
|
||||
disable_ddl_transaction!
|
||||
|
||||
NEW_CONSTRAINT_NAME = 'fk_new'
|
||||
|
||||
def up
|
||||
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :nullify, validate: false, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
with_lock_retries do
|
||||
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :nullify, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
1. [Validate the new foreign key](add_foreign_key_to_existing_column.md#validate-the-foreign-key)
|
||||
|
||||
```ruby
|
||||
class ValidateFkNew < Gitlab::Database::Migration[2.0]
|
||||
NEW_CONSTRAINT_NAME = 'fk_new'
|
||||
|
||||
# foreign key added in <link to MR or path to migration adding new FK>
|
||||
def up
|
||||
validate_foreign_key(:packages_packages, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
# no-op
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
1. Remove the old foreign key:
|
||||
|
||||
```ruby
|
||||
class RemoveFkOld < Gitlab::Database::Migration[2.0]
|
||||
OLD_CONSTRAINT_NAME = 'fk_old'
|
||||
|
||||
# new foreign key added in <link to MR or path to migration adding new FK>
|
||||
# and validated in <link to MR or path to migration validating new FK>
|
||||
def up
|
||||
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :cascade, name: OLD_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
# Validation is skipped here, so if rolled back, this will need to be revalidated in a separate migration
|
||||
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :cascade, validate: false, name: OLD_CONSTRAINT_NAME)
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
## Cascading Deletes
|
||||
|
||||
Every foreign key must define an `ON DELETE` clause, and in 99% of the cases
|
||||
this should be set to `CASCADE`.
|
||||
|
||||
## Indexes
|
||||
|
||||
When adding a foreign key in PostgreSQL the column is not indexed automatically,
|
||||
thus you must also add a concurrent index. Not doing so results in cascading
|
||||
deletes being very slow.
|
||||
|
||||
## Naming foreign keys
|
||||
|
||||
By default Ruby on Rails uses the `_id` suffix for foreign keys. So we should
|
||||
only use this suffix for associations between two tables. If you want to
|
||||
reference an ID on a third party platform the `_xid` suffix is recommended.
|
||||
|
||||
The spec `spec/db/schema_spec.rb` tests if all columns with the `_id` suffix
|
||||
have a foreign key constraint. So if that spec fails, don't add the column to
|
||||
`IGNORED_FK_COLUMNS`, but instead add the FK constraint, or consider naming it
|
||||
differently.
|
||||
|
||||
## Dependent Removals
|
||||
|
||||
Don't define options such as `dependent: :destroy` or `dependent: :delete` when
|
||||
defining an association. Defining these options means Rails handles the
|
||||
removal of data, instead of letting the database handle this in the most
|
||||
efficient way possible.
|
||||
|
||||
In other words, this is bad and should be avoided at all costs:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_many :posts, dependent: :destroy
|
||||
end
|
||||
```
|
||||
|
||||
Should you truly have a need for this it should be approved by a database
|
||||
specialist first.
|
||||
|
||||
You should also not define any `before_destroy` or `after_destroy` callbacks on
|
||||
your models _unless_ absolutely required and only when approved by database
|
||||
specialists. For example, if each row in a table has a corresponding file on a
|
||||
file system it may be tempting to add a `after_destroy` hook. This however
|
||||
introduces non database logic to a model, and means we can no longer rely on
|
||||
foreign keys to remove the data as this would result in the file system data
|
||||
being left behind. In such a case you should use a service class instead that
|
||||
takes care of removing non database data.
|
||||
|
||||
In cases where the relation spans multiple databases you have even
|
||||
further problems using `dependent: :destroy` or the above hooks. You can
|
||||
read more about alternatives at
|
||||
[Avoid `dependent: :nullify` and `dependent: :destroy` across databases](multiple_databases.md#avoid-dependent-nullify-and-dependent-destroy-across-databases).
|
||||
|
||||
## Alternative primary keys with `has_one` associations
|
||||
|
||||
Sometimes a `has_one` association is used to create a one-to-one relationship:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_one :user_config
|
||||
end
|
||||
|
||||
class UserConfig < ActiveRecord::Base
|
||||
belongs_to :user
|
||||
end
|
||||
```
|
||||
|
||||
In these cases, there may be an opportunity to remove the unnecessary `id`
|
||||
column on the associated table, `user_config.id` in this example. Instead,
|
||||
the originating table ID can be used as the primary key for the associated
|
||||
table:
|
||||
|
||||
```ruby
|
||||
create_table :user_configs, id: false do |t|
|
||||
t.references :users, primary_key: true, default: nil, index: false, foreign_key: { on_delete: :cascade }
|
||||
...
|
||||
end
|
||||
```
|
||||
|
||||
Setting `default: nil` ensures a primary key sequence is not created, and since the primary key
|
||||
automatically gets an index, we set `index: false` to avoid creating a duplicate.
|
||||
You also need to add the new primary key to the model:
|
||||
|
||||
```ruby
|
||||
class UserConfig < ActiveRecord::Base
|
||||
self.primary_key = :user_id
|
||||
|
||||
belongs_to :user
|
||||
end
|
||||
```
|
||||
|
||||
Using a foreign key as primary key saves space but can make
|
||||
[batch counting](../service_ping/implement.md#batch-counters) in [Service Ping](../service_ping/index.md) less efficient.
|
||||
Consider using a regular `id` column if the table is relevant for Service Ping.
|
|
@ -42,7 +42,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
|
|||
## Best practices
|
||||
|
||||
- [Adding database indexes](adding_database_indexes.md)
|
||||
- [Foreign keys & associations](../foreign_keys.md)
|
||||
- [Foreign keys & associations](foreign_keys.md)
|
||||
- [Adding a foreign key constraint to an existing column](add_foreign_key_to_existing_column.md)
|
||||
- [`NOT NULL` constraints](not_null_constraints.md)
|
||||
- [Strings and the Text data type](strings_and_the_text_data_type.md)
|
||||
|
@ -52,7 +52,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
|
|||
- [Hash indexes](../hash_indexes.md)
|
||||
- [Storing SHA1 hashes as binary](../sha1_as_binary.md)
|
||||
- [Iterating tables in batches](../iterating_tables_in_batches.md)
|
||||
- [Insert into tables in batches](../insert_into_tables_in_batches.md)
|
||||
- [Insert into tables in batches](insert_into_tables_in_batches.md)
|
||||
- [Ordering table columns](ordering_table_columns.md)
|
||||
- [Verifying database capabilities](../verifying_database_capabilities.md)
|
||||
- [Database Debugging and Troubleshooting](../database_debugging.md)
|
||||
|
@ -69,8 +69,8 @@ info: To determine the technical writer assigned to the Stage/Group associated w
|
|||
|
||||
## Case studies
|
||||
|
||||
- [Database case study: Filtering by label](../filtering_by_label.md)
|
||||
- [Database case study: Namespaces storage statistics](../namespaces_storage_statistics.md)
|
||||
- [Database case study: Filtering by label](filtering_by_label.md)
|
||||
- [Database case study: Namespaces storage statistics](namespaces_storage_statistics.md)
|
||||
|
||||
## Miscellaneous
|
||||
|
||||
|
|
|
@ -0,0 +1,196 @@
|
|||
---
|
||||
stage: Data Stores
|
||||
group: Database
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
description: "Sometimes it is necessary to store large amounts of records at once, which can be inefficient
|
||||
when iterating collections and performing individual `save`s. With the arrival of `insert_all`
|
||||
in Rails 6, which operates at the row level (that is, using `Hash`es), GitLab has added a set
|
||||
of APIs that make it safe and simple to insert ActiveRecord objects in bulk."
|
||||
---
|
||||
|
||||
# Insert into tables in batches
|
||||
|
||||
Sometimes it is necessary to store large amounts of records at once, which can be inefficient
|
||||
when iterating collections and saving each record individually. With the arrival of
|
||||
[`insert_all`](https://apidock.com/rails/ActiveRecord/Persistence/ClassMethods/insert_all)
|
||||
in Rails 6, which operates at the row level (that is, using `Hash` objects), GitLab has added a set
|
||||
of APIs that make it safe and simple to insert `ActiveRecord` objects in bulk.
|
||||
|
||||
## Prepare `ApplicationRecord`s for bulk insertion
|
||||
|
||||
In order for a model class to take advantage of the bulk insertion API, it has to include the
|
||||
`BulkInsertSafe` concern first:
|
||||
|
||||
```ruby
|
||||
class MyModel < ApplicationRecord
|
||||
# other includes here
|
||||
# ...
|
||||
include BulkInsertSafe # include this last
|
||||
|
||||
# ...
|
||||
end
|
||||
```
|
||||
|
||||
The `BulkInsertSafe` concern has two functions:
|
||||
|
||||
- It performs checks against your model class to ensure that it does not use ActiveRecord
|
||||
APIs that are not safe to use with respect to bulk insertions (more on that below).
|
||||
- It adds new class methods `bulk_insert!` and `bulk_upsert!`, which you can use to insert many records at once.
|
||||
|
||||
## Insert records with `bulk_insert!` and `bulk_upsert!`
|
||||
|
||||
If the target class passes the checks performed by `BulkInsertSafe`, you can insert an array of
|
||||
ActiveRecord model objects as follows:
|
||||
|
||||
```ruby
|
||||
records = [MyModel.new, ...]
|
||||
|
||||
MyModel.bulk_insert!(records)
|
||||
```
|
||||
|
||||
Calls to `bulk_insert!` always attempt to insert _new records_. If instead
|
||||
you would like to replace existing records with new values, while still inserting those
|
||||
that do not already exist, then you can use `bulk_upsert!`:
|
||||
|
||||
```ruby
|
||||
records = [MyModel.new, existing_model, ...]
|
||||
|
||||
MyModel.bulk_upsert!(records, unique_by: [:name])
|
||||
```
|
||||
|
||||
In this example, `unique_by` specifies the columns by which records are considered to be
|
||||
unique and as such are updated if they existed prior to insertion. For example, if
|
||||
`existing_model` has a `name` attribute, and if a record with the same `name` value already
|
||||
exists, its fields are updated with those of `existing_model`.
|
||||
|
||||
The `unique_by` parameter can also be passed as a `Symbol`, in which case it specifies
|
||||
a database index by which a column is considered unique:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, unique_by: :index_on_name)
|
||||
```
|
||||
|
||||
### Record validation
|
||||
|
||||
The `bulk_insert!` method guarantees that `records` are inserted transactionally, and
|
||||
runs validations on each record prior to insertion. If any record fails to validate,
|
||||
an error is raised and the transaction is rolled back. You can turn off validations via
|
||||
the `:validate` option:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, validate: false)
|
||||
```
|
||||
|
||||
### Batch size configuration
|
||||
|
||||
In those cases where the number of `records` is above a given threshold, insertions
|
||||
occur in multiple batches. The default batch size is defined in
|
||||
[`BulkInsertSafe::DEFAULT_BATCH_SIZE`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
|
||||
Assuming a default threshold of 500, inserting 950 records
|
||||
would result in two batches being written sequentially (of size 500 and 450 respectively.)
|
||||
You can override the default batch size via the `:batch_size` option:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, batch_size: 100)
|
||||
```
|
||||
|
||||
Assuming the same number of 950 records, this would result in 10 batches being written instead.
|
||||
Since this also affects the number of `INSERT` statements that occur, make sure you measure the
|
||||
performance impact this might have on your code. There is a trade-off between the number of
|
||||
`INSERT` statements the database has to process and the size and cost of each `INSERT`.
|
||||
|
||||
### Handling duplicate records
|
||||
|
||||
NOTE:
|
||||
This parameter applies only to `bulk_insert!`. If you intend to update existing
|
||||
records, use `bulk_upsert!` instead.
|
||||
|
||||
It may happen that some records you are trying to insert already exist, which would result in
|
||||
primary key conflicts. There are two ways to address this problem: failing fast by raising an
|
||||
error or skipping duplicate records. The default behavior of `bulk_insert!` is to fail fast
|
||||
and raise an `ActiveRecord::RecordNotUnique` error.
|
||||
|
||||
If this is undesirable, you can instead skip duplicate records with the `skip_duplicates` flag:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, skip_duplicates: true)
|
||||
```
|
||||
|
||||
### Requirements for safe bulk insertions
|
||||
|
||||
Large parts of ActiveRecord's persistence API are built around the notion of callbacks. Many
|
||||
of these callbacks fire in response to model life cycle events such as `save` or `create`.
|
||||
These callbacks cannot be used with bulk insertions, since they are meant to be called for
|
||||
every instance that is saved or created. Since these events do not fire when
|
||||
records are inserted in bulk, we currently prevent their use.
|
||||
|
||||
The specifics around which callbacks are explicitly allowed are defined in
|
||||
[`BulkInsertSafe`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
|
||||
Consult the module source code for details. If your class uses callbacks that are not explicitly designated
|
||||
safe and you `include BulkInsertSafe` the application fails with an error.
|
||||
|
||||
### `BulkInsertSafe` versus `InsertAll`
|
||||
|
||||
Internally, `BulkInsertSafe` is based on `InsertAll`, and you may wonder when to choose
|
||||
the former over the latter. To help you make the decision,
|
||||
the key differences between these classes are listed in the table below.
|
||||
|
||||
| | Input type | Validates input | Specify batch size | Can bypass callbacks | Transactional |
|
||||
|--------------- | -------------------- | --------------- | ------------------ | --------------------------------- | ------------- |
|
||||
| `bulk_insert!` | ActiveRecord objects | Yes (optional) | Yes (optional) | No (prevents unsafe callback use) | Yes |
|
||||
| `insert_all!` | Attribute hashes | No | No | Yes | Yes |
|
||||
|
||||
To summarize, `BulkInsertSafe` moves bulk inserts closer to how ActiveRecord objects
|
||||
and inserts would normally behave. However, if all you need is to insert raw data in bulk, then
|
||||
`insert_all` is more efficient.
|
||||
|
||||
## Insert `has_many` associations in bulk
|
||||
|
||||
A common use case is to save collections of associated relations through the owner side of the relation,
|
||||
where the owned relation is associated to the owner through the `has_many` class method:
|
||||
|
||||
```ruby
|
||||
owner = OwnerModel.new(owned_relations: array_of_owned_relations)
|
||||
# saves all `owned_relations` one-by-one
|
||||
owner.save!
|
||||
```
|
||||
|
||||
This issues a single `INSERT`, and transaction, for every record in `owned_relations`, which is inefficient if
|
||||
`array_of_owned_relations` is large. To remedy this, the `BulkInsertableAssociations` concern can be
|
||||
used to declare that the owner defines associations that are safe for bulk insertion:
|
||||
|
||||
```ruby
|
||||
class OwnerModel < ApplicationRecord
|
||||
# other includes here
|
||||
# ...
|
||||
include BulkInsertableAssociations # include this last
|
||||
|
||||
has_many :my_models
|
||||
end
|
||||
```
|
||||
|
||||
Here `my_models` must be declared `BulkInsertSafe` (as described previously) for bulk insertions
|
||||
to happen. You can now insert any yet unsaved records as follows:
|
||||
|
||||
```ruby
|
||||
BulkInsertableAssociations.with_bulk_insert do
|
||||
owner = OwnerModel.new(my_models: array_of_my_model_instances)
|
||||
# saves `my_models` using a single bulk insert (possibly via multiple batches)
|
||||
owner.save!
|
||||
end
|
||||
```
|
||||
|
||||
You can still save relations that are not `BulkInsertSafe` in this block; they
|
||||
simply are treated as if you had invoked `save` from outside the block.
|
||||
|
||||
## Known limitations
|
||||
|
||||
There are a few restrictions to how these APIs can be used:
|
||||
|
||||
- `BulkInsertableAssociations`:
|
||||
- It is currently only compatible with `has_many` relations.
|
||||
- It does not yet support `has_many through: ...` relations.
|
||||
|
||||
Moreover, input data should either be limited to around 1000 records at most,
|
||||
or already batched prior to calling bulk insert. The `INSERT` statement runs in a single
|
||||
transaction, so for large amounts of records it may negatively affect database stability.
|
|
@ -10,7 +10,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
|
|||
|
||||
In relational databases (including PostgreSQL), foreign keys provide a way to link
|
||||
two database tables together, and ensure data-consistency between them. In GitLab,
|
||||
[foreign keys](../foreign_keys.md) are vital part of the database design process.
|
||||
[foreign keys](foreign_keys.md) are vital part of the database design process.
|
||||
Most of our database tables have foreign keys.
|
||||
|
||||
With the ongoing database [decomposition work](https://gitlab.com/groups/gitlab-org/-/epics/6168),
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
---
|
||||
stage: none
|
||||
group: unassigned
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
---
|
||||
|
||||
# Database case study: Namespaces storage statistics
|
||||
|
||||
## Introduction
|
||||
|
||||
On [Storage and limits management for groups](https://gitlab.com/groups/gitlab-org/-/epics/886),
|
||||
we want to facilitate a method for easily viewing the amount of
|
||||
storage consumed by a group, and allow easy management.
|
||||
|
||||
## Proposal
|
||||
|
||||
1. Create a new ActiveRecord model to hold the namespaces' statistics in an aggregated form (only for root [namespaces](../../user/namespace/index.md)).
|
||||
1. Refresh the statistics in this model every time a project belonging to this namespace is changed.
|
||||
|
||||
## Problem
|
||||
|
||||
In GitLab, we update the project storage statistics through a
|
||||
[callback](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/project.rb#L97)
|
||||
every time the project is saved.
|
||||
|
||||
The summary of those statistics per namespace is then retrieved
|
||||
by [`Namespaces#with_statistics`](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/namespace.rb#L70) scope. Analyzing this query we noticed that:
|
||||
|
||||
- It takes up to `1.2` seconds for namespaces with over `15k` projects.
|
||||
- It can't be analyzed with [ChatOps](../chatops_on_gitlabcom.md), as it times out.
|
||||
|
||||
Additionally, the pattern that is currently used to update the project statistics
|
||||
(the callback) doesn't scale adequately. It is currently one of the largest
|
||||
[database queries transactions on production](https://gitlab.com/gitlab-org/gitlab/-/issues/29070)
|
||||
that takes the most time overall. We can't add one more query to it as
|
||||
it increases the transaction's length.
|
||||
|
||||
Because of all of the above, we can't apply the same pattern to store
|
||||
and update the namespaces statistics, as the `namespaces` table is one
|
||||
of the largest tables on GitLab.com. Therefore we needed to find a performant and
|
||||
alternative method.
|
||||
|
||||
## Attempts
|
||||
|
||||
### Attempt A: PostgreSQL materialized view
|
||||
|
||||
Model can be updated through a refresh strategy based on a project routes SQL and a [materialized view](https://www.postgresql.org/docs/11/rules-materializedviews.html):
|
||||
|
||||
```sql
|
||||
SELECT split_part("rs".path, '/', 1) as root_path,
|
||||
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
|
||||
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
|
||||
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
|
||||
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
|
||||
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
|
||||
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
|
||||
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
|
||||
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
|
||||
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
|
||||
FROM "projects"
|
||||
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
|
||||
INNER JOIN project_statistics ps ON ps.project_id = projects.id
|
||||
GROUP BY root_path
|
||||
```
|
||||
|
||||
We could then execute the query with:
|
||||
|
||||
```sql
|
||||
REFRESH MATERIALIZED VIEW root_namespace_storage_statistics;
|
||||
```
|
||||
|
||||
While this implied a single query update (and probably a fast one), it has some downsides:
|
||||
|
||||
- Materialized views syntax varies from PostgreSQL and MySQL. While this feature was worked on, MySQL was still supported by GitLab.
|
||||
- Rails does not have native support for materialized views. We'd need to use a specialized gem to take care of the management of the database views, which implies additional work.
|
||||
|
||||
### Attempt B: An update through a CTE
|
||||
|
||||
Similar to Attempt A: Model update done through a refresh strategy with a [Common Table Expression](https://www.postgresql.org/docs/9.1/queries-with.html)
|
||||
|
||||
```sql
|
||||
WITH refresh AS (
|
||||
SELECT split_part("rs".path, '/', 1) as root_path,
|
||||
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
|
||||
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
|
||||
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
|
||||
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
|
||||
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
|
||||
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
|
||||
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
|
||||
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
|
||||
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
|
||||
FROM "projects"
|
||||
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
|
||||
INNER JOIN project_statistics ps ON ps.project_id = projects.id
|
||||
GROUP BY root_path)
|
||||
UPDATE namespace_storage_statistics
|
||||
SET storage_size = refresh.storage_size,
|
||||
repository_size = refresh.repository_size,
|
||||
wiki_size = refresh.wiki_size,
|
||||
lfs_objects_size = refresh.lfs_objects_size,
|
||||
build_artifacts_size = refresh.build_artifacts_size,
|
||||
pipeline_artifacts_size = refresh.pipeline_artifacts_size,
|
||||
packages_size = refresh.packages_size,
|
||||
snippets_size = refresh.snippets_size,
|
||||
uploads_size = refresh.uploads_size
|
||||
FROM refresh
|
||||
INNER JOIN routes rs ON rs.path = refresh.root_path AND rs.source_type = 'Namespace'
|
||||
WHERE namespace_storage_statistics.namespace_id = rs.source_id
|
||||
```
|
||||
|
||||
Same benefits and downsides as attempt A.
|
||||
|
||||
### Attempt C: Get rid of the model and store the statistics on Redis
|
||||
|
||||
We could get rid of the model that stores the statistics in aggregated form and instead use a Redis Set.
|
||||
This would be the [boring solution](https://about.gitlab.com/handbook/values/#boring-solutions) and the fastest one
|
||||
to implement, as GitLab already includes Redis as part of its [Architecture](../architecture.md#redis).
|
||||
|
||||
The downside of this approach is that Redis does not provide the same persistence/consistency guarantees as PostgreSQL,
|
||||
and this is information we can't afford to lose in a Redis failure.
|
||||
|
||||
### Attempt D: Tag the root namespace and its child namespaces
|
||||
|
||||
Directly relate the root namespace to its child namespaces, so
|
||||
whenever a namespace is created without a parent, this one is tagged
|
||||
with the root namespace ID:
|
||||
|
||||
| ID | root ID | parent ID |
|
||||
|:---|:--------|:----------|
|
||||
| 1 | 1 | NULL |
|
||||
| 2 | 1 | 1 |
|
||||
| 3 | 1 | 2 |
|
||||
|
||||
To aggregate the statistics inside a namespace we'd execute something like:
|
||||
|
||||
```sql
|
||||
SELECT COUNT(...)
|
||||
FROM projects
|
||||
WHERE namespace_id IN (
|
||||
SELECT id
|
||||
FROM namespaces
|
||||
WHERE root_id = X
|
||||
)
|
||||
```
|
||||
|
||||
Even though this approach would make aggregating much easier, it has some major downsides:
|
||||
|
||||
- We'd have to migrate **all namespaces** by adding and filling a new column. Because of the size of the table, dealing with time/cost would be significant. The background migration would take approximately `153h`, see <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/29772>.
|
||||
- Background migration has to be shipped one release before, delaying the functionality by another milestone.
|
||||
|
||||
### Attempt E (final): Update the namespace storage statistics asynchronously
|
||||
|
||||
This approach consists of continuing to use the incremental statistics updates we already have,
|
||||
but we refresh them through Sidekiq jobs and in different transactions:
|
||||
|
||||
1. Create a second table (`namespace_aggregation_schedules`) with two columns `id` and `namespace_id`.
|
||||
1. Whenever the statistics of a project changes, insert a row into `namespace_aggregation_schedules`
|
||||
- We don't insert a new row if there's already one related to the root namespace.
|
||||
- Keeping in mind the length of the transaction that involves updating `project_statistics`(<https://gitlab.com/gitlab-org/gitlab/-/issues/29070>), the insertion should be done in a different transaction and through a Sidekiq Job.
|
||||
1. After inserting the row, we schedule another worker to be executed asynchronously at two different moments:
|
||||
- One enqueued for immediate execution and another one scheduled in `1.5h` hours.
|
||||
- We only schedule the jobs, if we can obtain a `1.5h` lease on Redis on a key based on the root namespace ID.
|
||||
- If we can't obtain the lease, it indicates there's another aggregation already in progress, or scheduled in no more than `1.5h`.
|
||||
1. This worker will:
|
||||
- Update the root namespace storage statistics by querying all the namespaces through a service.
|
||||
- Delete the related `namespace_aggregation_schedules` after the update.
|
||||
1. Another Sidekiq job is also included to traverse any remaining rows on the `namespace_aggregation_schedules` table and schedule jobs for every pending row.
|
||||
- This job is scheduled with cron to run every night (UTC).
|
||||
|
||||
This implementation has the following benefits:
|
||||
|
||||
- All the updates are done asynchronously, so we're not increasing the length of the transactions for `project_statistics`.
|
||||
- We're doing the update in a single SQL query.
|
||||
- It is compatible with PostgreSQL and MySQL.
|
||||
- No background migration required.
|
||||
|
||||
The only downside of this approach is that namespaces' statistics are updated up to `1.5` hours after the change is done,
|
||||
which means there's a time window in which the statistics are inaccurate. Because we're still not
|
||||
[enforcing storage limits](https://gitlab.com/gitlab-org/gitlab/-/issues/17664), this is not a major problem.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Updating the storage statistics asynchronously, was the less problematic and
|
||||
performant approach of aggregating the root namespaces.
|
||||
|
||||
All the details regarding this use case can be found on:
|
||||
|
||||
- <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/62214>
|
||||
- Merge Request with the implementation: <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/28996>
|
||||
|
||||
Performance of the namespace storage statistics were measured in staging and production (GitLab.com). All results were posted
|
||||
on <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/64092>: No problem has been reported so far.
|
|
@ -597,7 +597,7 @@ export default {
|
|||
Note that, even if the directive evaluates to `false`, the guarded entity is sent to the backend and
|
||||
matched against the GraphQL schema. So this approach requires that the feature-flagged entity
|
||||
exists in the schema, even if the feature flag is disabled. When the feature flag is turned off, it
|
||||
is recommended that the resolver returns `null` at the very least using the same feature flag as the frontend. See the [API GraphQL guide](../api_graphql_styleguide.md#frontend-and-backend-feature-flag-strategies).
|
||||
is recommended that the resolver returns `null` at the very least using the same feature flag as the frontend. See the [API GraphQL guide](../api_graphql_styleguide.md#feature-flags).
|
||||
|
||||
##### Different versions of a query
|
||||
|
||||
|
|
|
@ -1,179 +1,11 @@
|
|||
---
|
||||
stage: Plan
|
||||
group: Project Management
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
redirect_to: 'database/filtering_by_label.md'
|
||||
remove_date: '2022-11-05'
|
||||
---
|
||||
# Filtering by label
|
||||
|
||||
## Introduction
|
||||
This document was moved to [another location](database/filtering_by_label.md).
|
||||
|
||||
GitLab has [labels](../user/project/labels.md) that can be assigned to issues,
|
||||
merge requests, and epics. Labels on those objects are a many-to-many relation
|
||||
through the polymorphic `label_links` table.
|
||||
|
||||
To filter these objects by multiple labels - for instance, 'all open
|
||||
issues with the label ~Plan and the label ~backend' - we generate a
|
||||
query containing a `GROUP BY` clause. In a simple form, this looks like:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
issues.*
|
||||
FROM
|
||||
issues
|
||||
INNER JOIN label_links ON label_links.target_id = issues.id
|
||||
AND label_links.target_type = 'Issue'
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
issues.project_id = 13083
|
||||
AND (issues.state IN ('opened'))
|
||||
AND labels.title IN ('Plan',
|
||||
'backend')
|
||||
GROUP BY
|
||||
issues.id
|
||||
HAVING (COUNT(DISTINCT labels.title) = 2)
|
||||
ORDER BY
|
||||
issues.updated_at DESC,
|
||||
issues.id DESC
|
||||
LIMIT 20 OFFSET 0
|
||||
```
|
||||
|
||||
In particular, note that:
|
||||
|
||||
1. We `GROUP BY issues.id` so that we can ...
|
||||
1. Use the `HAVING (COUNT(DISTINCT labels.title) = 2)` condition to ensure that
|
||||
all matched issues have both labels.
|
||||
|
||||
This is more complicated than is ideal. It makes the query construction more
|
||||
prone to errors (such as
|
||||
[issue #15557](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/15557)).
|
||||
|
||||
## Attempt A: `WHERE EXISTS`
|
||||
|
||||
### Attempt A1: use multiple subqueries with `WHERE EXISTS`
|
||||
|
||||
In [issue #37137](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/37137)
|
||||
and its associated [merge request](https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/14022),
|
||||
we tried to replace the `GROUP BY` with multiple uses of `WHERE EXISTS`. For the
|
||||
example above, this would give:
|
||||
|
||||
```sql
|
||||
WHERE (EXISTS (
|
||||
SELECT
|
||||
TRUE
|
||||
FROM
|
||||
label_links
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
labels.title = 'Plan'
|
||||
AND target_type = 'Issue'
|
||||
AND target_id = issues.id))
|
||||
AND (EXISTS (
|
||||
SELECT
|
||||
TRUE
|
||||
FROM
|
||||
label_links
|
||||
INNER JOIN labels ON labels.id = label_links.label_id
|
||||
WHERE
|
||||
labels.title = 'backend'
|
||||
AND target_type = 'Issue'
|
||||
AND target_id = issues.id))
|
||||
```
|
||||
|
||||
While this worked without schema changes, and did improve readability somewhat,
|
||||
it did not improve query performance.
|
||||
|
||||
### Attempt A2: use label IDs in the `WHERE EXISTS` clause
|
||||
|
||||
In [merge request #34503](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/34503), we followed a similar approach to A1. But this time, we
|
||||
did a separate query to fetch the IDs of the labels used in the filter so that we avoid the `JOIN` in the `EXISTS` clause and filter directly by
|
||||
`label_links.label_id`. We also added a new index on `label_links` for the `target_id`, `label_id`, and `target_type` columns to speed up this query.
|
||||
|
||||
Finding the label IDs wasn't straightforward because there could be multiple labels with the same title within a single root namespace. We solved
|
||||
this by grouping the label IDs by title and then using the array of IDs in the `EXISTS` clauses.
|
||||
|
||||
This resulted in a significant performance improvement. However, this optimization could not be applied to the dashboard pages
|
||||
where we do not have a project or group context. We could not easily search for the label IDs here because that would mean searching across all
|
||||
projects and groups that the user has access to.
|
||||
|
||||
## Attempt B: Denormalize using an array column
|
||||
|
||||
Having [removed MySQL support in GitLab 12.1](https://about.gitlab.com/blog/2019/06/27/removing-mysql-support/),
|
||||
using [PostgreSQL's arrays](https://www.postgresql.org/docs/11/arrays.html) became more
|
||||
tractable as we didn't have to support two databases. We discussed denormalizing
|
||||
the `label_links` table for querying in
|
||||
[issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651),
|
||||
with two options: label IDs and titles.
|
||||
|
||||
We can think of both of those as array columns on `issues`, `merge_requests`,
|
||||
and `epics`: `issues.label_ids` would be an array column of label IDs, and
|
||||
`issues.label_titles` would be an array of label titles.
|
||||
|
||||
These array columns can be complemented with [GIN
|
||||
indexes](https://www.postgresql.org/docs/11/gin-intro.html) to improve
|
||||
matching.
|
||||
|
||||
### Attempt B1: store label IDs for each object
|
||||
|
||||
This has some strong advantages over titles:
|
||||
|
||||
1. Unless a label is deleted, or a project is moved, we never need to
|
||||
bulk-update the denormalized column.
|
||||
1. It uses less storage than the titles.
|
||||
|
||||
Unfortunately, our application design makes this hard. If we were able to query
|
||||
just by label ID easily, we wouldn't need the `INNER JOIN labels` in the initial
|
||||
query at the start of this document. GitLab allows users to filter by label
|
||||
title across projects and even across groups, so a filter by the label ~Plan may
|
||||
include labels with multiple distinct IDs.
|
||||
|
||||
We do not want users to have to know about the different IDs, which means that
|
||||
given this data set:
|
||||
|
||||
| Project | ~Plan label ID | ~backend label ID |
|
||||
| ------- | -------------- | ----------------- |
|
||||
| A | 11 | 12 |
|
||||
| B | 21 | 22 |
|
||||
| C | 31 | 32 |
|
||||
|
||||
We would need something like:
|
||||
|
||||
```sql
|
||||
WHERE
|
||||
label_ids @> ARRAY[11, 12]
|
||||
OR label_ids @> ARRAY[21, 22]
|
||||
OR label_ids @> ARRAY[31, 32]
|
||||
```
|
||||
|
||||
This can get even more complicated when we consider that in some cases, there
|
||||
might be two ~backend labels - with different IDs - that could apply to the same
|
||||
object, so the number of combinations would balloon further.
|
||||
|
||||
### Attempt B2: store label titles for each object
|
||||
|
||||
From the perspective of updating the object, this is the worst
|
||||
option. We have to bulk update the objects when:
|
||||
|
||||
1. The objects are moved from one project to another.
|
||||
1. The project is moved from one group to another.
|
||||
1. The label is renamed.
|
||||
1. The label is deleted.
|
||||
|
||||
It also uses much more storage. Querying is simple, though:
|
||||
|
||||
```sql
|
||||
WHERE
|
||||
label_titles @> ARRAY['Plan', 'backend']
|
||||
```
|
||||
|
||||
And our [tests in issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651#note_188777346)
|
||||
showed that this could be fast.
|
||||
|
||||
However, at present, the disadvantages outweigh the advantages.
|
||||
|
||||
## Conclusion
|
||||
|
||||
We found a method A2 that does not need denormalization and improves the query performance significantly. This
|
||||
did not apply to all cases, but we were able to apply method A1 to the rest of the cases so that we remove the
|
||||
`GROUP BY` and `HAVING` clauses in all scenarios.
|
||||
|
||||
This simplified the query and improved the performance in the most common cases.
|
||||
<!-- This redirect file can be deleted after <2022-11-05>. -->
|
||||
<!-- Redirects that point to other docs in the same project expire in three months. -->
|
||||
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
|
||||
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
|
||||
|
|
|
@ -1,200 +1,11 @@
|
|||
---
|
||||
stage: Data Stores
|
||||
group: Database
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
redirect_to: 'database/foreign_keys.md'
|
||||
remove_date: '2022-11-05'
|
||||
---
|
||||
|
||||
# Foreign Keys & Associations
|
||||
This document was moved to [another location](database/foreign_keys.md).
|
||||
|
||||
When adding an association to a model you must also add a foreign key. For
|
||||
example, say you have the following model:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_many :posts
|
||||
end
|
||||
```
|
||||
|
||||
Add a foreign key here on column `posts.user_id`. This ensures
|
||||
that data consistency is enforced on database level. Foreign keys also mean that
|
||||
the database can very quickly remove associated data (for example, when removing a
|
||||
user), instead of Rails having to do this.
|
||||
|
||||
## Adding Foreign Keys In Migrations
|
||||
|
||||
Foreign keys can be added concurrently using `add_concurrent_foreign_key` as
|
||||
defined in `Gitlab::Database::MigrationHelpers`. See the [Migration Style
|
||||
Guide](migration_style_guide.md) for more information.
|
||||
|
||||
Keep in mind that you can only safely add foreign keys to existing tables after
|
||||
you have removed any orphaned rows. The method `add_concurrent_foreign_key`
|
||||
does not take care of this so you must do so manually. See
|
||||
[adding foreign key constraint to an existing column](database/add_foreign_key_to_existing_column.md).
|
||||
|
||||
## Updating Foreign Keys In Migrations
|
||||
|
||||
Sometimes a foreign key constraint must be changed, preserving the column
|
||||
but updating the constraint condition. For example, moving from
|
||||
`ON DELETE CASCADE` to `ON DELETE SET NULL` or vice-versa.
|
||||
|
||||
PostgreSQL does not prevent you from adding overlapping foreign keys. It
|
||||
honors the most recently added constraint. This allows us to replace foreign keys without
|
||||
ever losing foreign key protection on a column.
|
||||
|
||||
To replace a foreign key:
|
||||
|
||||
1. [Add the new foreign key without validation](database/add_foreign_key_to_existing_column.md#prevent-invalid-records)
|
||||
|
||||
The name of the foreign key constraint must be changed to add a new
|
||||
foreign key before removing the old one.
|
||||
|
||||
```ruby
|
||||
class ReplaceFkOnPackagesPackagesProjectId < Gitlab::Database::Migration[2.0]
|
||||
disable_ddl_transaction!
|
||||
|
||||
NEW_CONSTRAINT_NAME = 'fk_new'
|
||||
|
||||
def up
|
||||
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :nullify, validate: false, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
with_lock_retries do
|
||||
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :nullify, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
1. [Validate the new foreign key](database/add_foreign_key_to_existing_column.md#validate-the-foreign-key)
|
||||
|
||||
```ruby
|
||||
class ValidateFkNew < Gitlab::Database::Migration[2.0]
|
||||
NEW_CONSTRAINT_NAME = 'fk_new'
|
||||
|
||||
# foreign key added in <link to MR or path to migration adding new FK>
|
||||
def up
|
||||
validate_foreign_key(:packages_packages, name: NEW_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
# no-op
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
1. Remove the old foreign key:
|
||||
|
||||
```ruby
|
||||
class RemoveFkOld < Gitlab::Database::Migration[2.0]
|
||||
OLD_CONSTRAINT_NAME = 'fk_old'
|
||||
|
||||
# new foreign key added in <link to MR or path to migration adding new FK>
|
||||
# and validated in <link to MR or path to migration validating new FK>
|
||||
def up
|
||||
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :cascade, name: OLD_CONSTRAINT_NAME)
|
||||
end
|
||||
|
||||
def down
|
||||
# Validation is skipped here, so if rolled back, this will need to be revalidated in a separate migration
|
||||
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :cascade, validate: false, name: OLD_CONSTRAINT_NAME)
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
## Cascading Deletes
|
||||
|
||||
Every foreign key must define an `ON DELETE` clause, and in 99% of the cases
|
||||
this should be set to `CASCADE`.
|
||||
|
||||
## Indexes
|
||||
|
||||
When adding a foreign key in PostgreSQL the column is not indexed automatically,
|
||||
thus you must also add a concurrent index. Not doing so results in cascading
|
||||
deletes being very slow.
|
||||
|
||||
## Naming foreign keys
|
||||
|
||||
By default Ruby on Rails uses the `_id` suffix for foreign keys. So we should
|
||||
only use this suffix for associations between two tables. If you want to
|
||||
reference an ID on a third party platform the `_xid` suffix is recommended.
|
||||
|
||||
The spec `spec/db/schema_spec.rb` tests if all columns with the `_id` suffix
|
||||
have a foreign key constraint. So if that spec fails, don't add the column to
|
||||
`IGNORED_FK_COLUMNS`, but instead add the FK constraint, or consider naming it
|
||||
differently.
|
||||
|
||||
## Dependent Removals
|
||||
|
||||
Don't define options such as `dependent: :destroy` or `dependent: :delete` when
|
||||
defining an association. Defining these options means Rails handles the
|
||||
removal of data, instead of letting the database handle this in the most
|
||||
efficient way possible.
|
||||
|
||||
In other words, this is bad and should be avoided at all costs:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_many :posts, dependent: :destroy
|
||||
end
|
||||
```
|
||||
|
||||
Should you truly have a need for this it should be approved by a database
|
||||
specialist first.
|
||||
|
||||
You should also not define any `before_destroy` or `after_destroy` callbacks on
|
||||
your models _unless_ absolutely required and only when approved by database
|
||||
specialists. For example, if each row in a table has a corresponding file on a
|
||||
file system it may be tempting to add a `after_destroy` hook. This however
|
||||
introduces non database logic to a model, and means we can no longer rely on
|
||||
foreign keys to remove the data as this would result in the file system data
|
||||
being left behind. In such a case you should use a service class instead that
|
||||
takes care of removing non database data.
|
||||
|
||||
In cases where the relation spans multiple databases you have even
|
||||
further problems using `dependent: :destroy` or the above hooks. You can
|
||||
read more about alternatives at [Avoid `dependent: :nullify` and
|
||||
`dependent: :destroy` across
|
||||
databases](database/multiple_databases.md#avoid-dependent-nullify-and-dependent-destroy-across-databases).
|
||||
|
||||
## Alternative primary keys with `has_one` associations
|
||||
|
||||
Sometimes a `has_one` association is used to create a one-to-one relationship:
|
||||
|
||||
```ruby
|
||||
class User < ActiveRecord::Base
|
||||
has_one :user_config
|
||||
end
|
||||
|
||||
class UserConfig < ActiveRecord::Base
|
||||
belongs_to :user
|
||||
end
|
||||
```
|
||||
|
||||
In these cases, there may be an opportunity to remove the unnecessary `id`
|
||||
column on the associated table, `user_config.id` in this example. Instead,
|
||||
the originating table ID can be used as the primary key for the associated
|
||||
table:
|
||||
|
||||
```ruby
|
||||
create_table :user_configs, id: false do |t|
|
||||
t.references :users, primary_key: true, default: nil, index: false, foreign_key: { on_delete: :cascade }
|
||||
...
|
||||
end
|
||||
```
|
||||
|
||||
Setting `default: nil` ensures a primary key sequence is not created, and since the primary key
|
||||
automatically gets an index, we set `index: false` to avoid creating a duplicate.
|
||||
You also need to add the new primary key to the model:
|
||||
|
||||
```ruby
|
||||
class UserConfig < ActiveRecord::Base
|
||||
self.primary_key = :user_id
|
||||
|
||||
belongs_to :user
|
||||
end
|
||||
```
|
||||
|
||||
Using a foreign key as primary key saves space but can make
|
||||
[batch counting](service_ping/implement.md#batch-counters) in [Service Ping](service_ping/index.md) less efficient.
|
||||
Consider using a regular `id` column if the table is relevant for Service Ping.
|
||||
<!-- This redirect file can be deleted after <2022-11-05>. -->
|
||||
<!-- Redirects that point to other docs in the same project expire in three months. -->
|
||||
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
|
||||
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
|
||||
|
|
|
@ -1,196 +1,11 @@
|
|||
---
|
||||
stage: Data Stores
|
||||
group: Database
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
description: "Sometimes it is necessary to store large amounts of records at once, which can be inefficient
|
||||
when iterating collections and performing individual `save`s. With the arrival of `insert_all`
|
||||
in Rails 6, which operates at the row level (that is, using `Hash`es), GitLab has added a set
|
||||
of APIs that make it safe and simple to insert ActiveRecord objects in bulk."
|
||||
redirect_to: 'database/insert_into_tables_in_batches.md'
|
||||
remove_date: '2022-11-05'
|
||||
---
|
||||
|
||||
# Insert into tables in batches
|
||||
This document was moved to [another location](database/insert_into_tables_in_batches.md).
|
||||
|
||||
Sometimes it is necessary to store large amounts of records at once, which can be inefficient
|
||||
when iterating collections and saving each record individually. With the arrival of
|
||||
[`insert_all`](https://apidock.com/rails/ActiveRecord/Persistence/ClassMethods/insert_all)
|
||||
in Rails 6, which operates at the row level (that is, using `Hash` objects), GitLab has added a set
|
||||
of APIs that make it safe and simple to insert `ActiveRecord` objects in bulk.
|
||||
|
||||
## Prepare `ApplicationRecord`s for bulk insertion
|
||||
|
||||
In order for a model class to take advantage of the bulk insertion API, it has to include the
|
||||
`BulkInsertSafe` concern first:
|
||||
|
||||
```ruby
|
||||
class MyModel < ApplicationRecord
|
||||
# other includes here
|
||||
# ...
|
||||
include BulkInsertSafe # include this last
|
||||
|
||||
# ...
|
||||
end
|
||||
```
|
||||
|
||||
The `BulkInsertSafe` concern has two functions:
|
||||
|
||||
- It performs checks against your model class to ensure that it does not use ActiveRecord
|
||||
APIs that are not safe to use with respect to bulk insertions (more on that below).
|
||||
- It adds new class methods `bulk_insert!` and `bulk_upsert!`, which you can use to insert many records at once.
|
||||
|
||||
## Insert records with `bulk_insert!` and `bulk_upsert!`
|
||||
|
||||
If the target class passes the checks performed by `BulkInsertSafe`, you can insert an array of
|
||||
ActiveRecord model objects as follows:
|
||||
|
||||
```ruby
|
||||
records = [MyModel.new, ...]
|
||||
|
||||
MyModel.bulk_insert!(records)
|
||||
```
|
||||
|
||||
Calls to `bulk_insert!` always attempt to insert _new records_. If instead
|
||||
you would like to replace existing records with new values, while still inserting those
|
||||
that do not already exist, then you can use `bulk_upsert!`:
|
||||
|
||||
```ruby
|
||||
records = [MyModel.new, existing_model, ...]
|
||||
|
||||
MyModel.bulk_upsert!(records, unique_by: [:name])
|
||||
```
|
||||
|
||||
In this example, `unique_by` specifies the columns by which records are considered to be
|
||||
unique and as such are updated if they existed prior to insertion. For example, if
|
||||
`existing_model` has a `name` attribute, and if a record with the same `name` value already
|
||||
exists, its fields are updated with those of `existing_model`.
|
||||
|
||||
The `unique_by` parameter can also be passed as a `Symbol`, in which case it specifies
|
||||
a database index by which a column is considered unique:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, unique_by: :index_on_name)
|
||||
```
|
||||
|
||||
### Record validation
|
||||
|
||||
The `bulk_insert!` method guarantees that `records` are inserted transactionally, and
|
||||
runs validations on each record prior to insertion. If any record fails to validate,
|
||||
an error is raised and the transaction is rolled back. You can turn off validations via
|
||||
the `:validate` option:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, validate: false)
|
||||
```
|
||||
|
||||
### Batch size configuration
|
||||
|
||||
In those cases where the number of `records` is above a given threshold, insertions
|
||||
occur in multiple batches. The default batch size is defined in
|
||||
[`BulkInsertSafe::DEFAULT_BATCH_SIZE`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
|
||||
Assuming a default threshold of 500, inserting 950 records
|
||||
would result in two batches being written sequentially (of size 500 and 450 respectively.)
|
||||
You can override the default batch size via the `:batch_size` option:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, batch_size: 100)
|
||||
```
|
||||
|
||||
Assuming the same number of 950 records, this would result in 10 batches being written instead.
|
||||
Since this also affects the number of `INSERT` statements that occur, make sure you measure the
|
||||
performance impact this might have on your code. There is a trade-off between the number of
|
||||
`INSERT` statements the database has to process and the size and cost of each `INSERT`.
|
||||
|
||||
### Handling duplicate records
|
||||
|
||||
NOTE:
|
||||
This parameter applies only to `bulk_insert!`. If you intend to update existing
|
||||
records, use `bulk_upsert!` instead.
|
||||
|
||||
It may happen that some records you are trying to insert already exist, which would result in
|
||||
primary key conflicts. There are two ways to address this problem: failing fast by raising an
|
||||
error or skipping duplicate records. The default behavior of `bulk_insert!` is to fail fast
|
||||
and raise an `ActiveRecord::RecordNotUnique` error.
|
||||
|
||||
If this is undesirable, you can instead skip duplicate records with the `skip_duplicates` flag:
|
||||
|
||||
```ruby
|
||||
MyModel.bulk_insert!(records, skip_duplicates: true)
|
||||
```
|
||||
|
||||
### Requirements for safe bulk insertions
|
||||
|
||||
Large parts of ActiveRecord's persistence API are built around the notion of callbacks. Many
|
||||
of these callbacks fire in response to model life cycle events such as `save` or `create`.
|
||||
These callbacks cannot be used with bulk insertions, since they are meant to be called for
|
||||
every instance that is saved or created. Since these events do not fire when
|
||||
records are inserted in bulk, we currently prevent their use.
|
||||
|
||||
The specifics around which callbacks are explicitly allowed are defined in
|
||||
[`BulkInsertSafe`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
|
||||
Consult the module source code for details. If your class uses callbacks that are not explicitly designated
|
||||
safe and you `include BulkInsertSafe` the application fails with an error.
|
||||
|
||||
### `BulkInsertSafe` versus `InsertAll`
|
||||
|
||||
Internally, `BulkInsertSafe` is based on `InsertAll`, and you may wonder when to choose
|
||||
the former over the latter. To help you make the decision,
|
||||
the key differences between these classes are listed in the table below.
|
||||
|
||||
| | Input type | Validates input | Specify batch size | Can bypass callbacks | Transactional |
|
||||
|--------------- | -------------------- | --------------- | ------------------ | --------------------------------- | ------------- |
|
||||
| `bulk_insert!` | ActiveRecord objects | Yes (optional) | Yes (optional) | No (prevents unsafe callback use) | Yes |
|
||||
| `insert_all!` | Attribute hashes | No | No | Yes | Yes |
|
||||
|
||||
To summarize, `BulkInsertSafe` moves bulk inserts closer to how ActiveRecord objects
|
||||
and inserts would normally behave. However, if all you need is to insert raw data in bulk, then
|
||||
`insert_all` is more efficient.
|
||||
|
||||
## Insert `has_many` associations in bulk
|
||||
|
||||
A common use case is to save collections of associated relations through the owner side of the relation,
|
||||
where the owned relation is associated to the owner through the `has_many` class method:
|
||||
|
||||
```ruby
|
||||
owner = OwnerModel.new(owned_relations: array_of_owned_relations)
|
||||
# saves all `owned_relations` one-by-one
|
||||
owner.save!
|
||||
```
|
||||
|
||||
This issues a single `INSERT`, and transaction, for every record in `owned_relations`, which is inefficient if
|
||||
`array_of_owned_relations` is large. To remedy this, the `BulkInsertableAssociations` concern can be
|
||||
used to declare that the owner defines associations that are safe for bulk insertion:
|
||||
|
||||
```ruby
|
||||
class OwnerModel < ApplicationRecord
|
||||
# other includes here
|
||||
# ...
|
||||
include BulkInsertableAssociations # include this last
|
||||
|
||||
has_many :my_models
|
||||
end
|
||||
```
|
||||
|
||||
Here `my_models` must be declared `BulkInsertSafe` (as described previously) for bulk insertions
|
||||
to happen. You can now insert any yet unsaved records as follows:
|
||||
|
||||
```ruby
|
||||
BulkInsertableAssociations.with_bulk_insert do
|
||||
owner = OwnerModel.new(my_models: array_of_my_model_instances)
|
||||
# saves `my_models` using a single bulk insert (possibly via multiple batches)
|
||||
owner.save!
|
||||
end
|
||||
```
|
||||
|
||||
You can still save relations that are not `BulkInsertSafe` in this block; they
|
||||
simply are treated as if you had invoked `save` from outside the block.
|
||||
|
||||
## Known limitations
|
||||
|
||||
There are a few restrictions to how these APIs can be used:
|
||||
|
||||
- `BulkInsertableAssociations`:
|
||||
- It is currently only compatible with `has_many` relations.
|
||||
- It does not yet support `has_many through: ...` relations.
|
||||
|
||||
Moreover, input data should either be limited to around 1000 records at most,
|
||||
or already batched prior to calling bulk insert. The `INSERT` statement runs in a single
|
||||
transaction, so for large amounts of records it may negatively affect database stability.
|
||||
<!-- This redirect file can be deleted after <2022-11-05>. -->
|
||||
<!-- Redirects that point to other docs in the same project expire in three months. -->
|
||||
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
|
||||
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
|
||||
|
|
|
@ -1,193 +1,11 @@
|
|||
---
|
||||
stage: none
|
||||
group: unassigned
|
||||
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
|
||||
redirect_to: 'database/namespaces_storage_statistics.md'
|
||||
remove_date: '2022-11-05'
|
||||
---
|
||||
|
||||
# Database case study: Namespaces storage statistics
|
||||
This document was moved to [another location](database/namespaces_storage_statistics.md).
|
||||
|
||||
## Introduction
|
||||
|
||||
On [Storage and limits management for groups](https://gitlab.com/groups/gitlab-org/-/epics/886),
|
||||
we want to facilitate a method for easily viewing the amount of
|
||||
storage consumed by a group, and allow easy management.
|
||||
|
||||
## Proposal
|
||||
|
||||
1. Create a new ActiveRecord model to hold the namespaces' statistics in an aggregated form (only for root [namespaces](../user/namespace/index.md)).
|
||||
1. Refresh the statistics in this model every time a project belonging to this namespace is changed.
|
||||
|
||||
## Problem
|
||||
|
||||
In GitLab, we update the project storage statistics through a
|
||||
[callback](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/project.rb#L97)
|
||||
every time the project is saved.
|
||||
|
||||
The summary of those statistics per namespace is then retrieved
|
||||
by [`Namespaces#with_statistics`](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/namespace.rb#L70) scope. Analyzing this query we noticed that:
|
||||
|
||||
- It takes up to `1.2` seconds for namespaces with over `15k` projects.
|
||||
- It can't be analyzed with [ChatOps](chatops_on_gitlabcom.md), as it times out.
|
||||
|
||||
Additionally, the pattern that is currently used to update the project statistics
|
||||
(the callback) doesn't scale adequately. It is currently one of the largest
|
||||
[database queries transactions on production](https://gitlab.com/gitlab-org/gitlab/-/issues/29070)
|
||||
that takes the most time overall. We can't add one more query to it as
|
||||
it increases the transaction's length.
|
||||
|
||||
Because of all of the above, we can't apply the same pattern to store
|
||||
and update the namespaces statistics, as the `namespaces` table is one
|
||||
of the largest tables on GitLab.com. Therefore we needed to find a performant and
|
||||
alternative method.
|
||||
|
||||
## Attempts
|
||||
|
||||
### Attempt A: PostgreSQL materialized view
|
||||
|
||||
Model can be updated through a refresh strategy based on a project routes SQL and a [materialized view](https://www.postgresql.org/docs/11/rules-materializedviews.html):
|
||||
|
||||
```sql
|
||||
SELECT split_part("rs".path, '/', 1) as root_path,
|
||||
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
|
||||
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
|
||||
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
|
||||
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
|
||||
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
|
||||
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
|
||||
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
|
||||
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
|
||||
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
|
||||
FROM "projects"
|
||||
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
|
||||
INNER JOIN project_statistics ps ON ps.project_id = projects.id
|
||||
GROUP BY root_path
|
||||
```
|
||||
|
||||
We could then execute the query with:
|
||||
|
||||
```sql
|
||||
REFRESH MATERIALIZED VIEW root_namespace_storage_statistics;
|
||||
```
|
||||
|
||||
While this implied a single query update (and probably a fast one), it has some downsides:
|
||||
|
||||
- Materialized views syntax varies from PostgreSQL and MySQL. While this feature was worked on, MySQL was still supported by GitLab.
|
||||
- Rails does not have native support for materialized views. We'd need to use a specialized gem to take care of the management of the database views, which implies additional work.
|
||||
|
||||
### Attempt B: An update through a CTE
|
||||
|
||||
Similar to Attempt A: Model update done through a refresh strategy with a [Common Table Expression](https://www.postgresql.org/docs/9.1/queries-with.html)
|
||||
|
||||
```sql
|
||||
WITH refresh AS (
|
||||
SELECT split_part("rs".path, '/', 1) as root_path,
|
||||
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
|
||||
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
|
||||
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
|
||||
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
|
||||
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
|
||||
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
|
||||
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
|
||||
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
|
||||
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
|
||||
FROM "projects"
|
||||
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
|
||||
INNER JOIN project_statistics ps ON ps.project_id = projects.id
|
||||
GROUP BY root_path)
|
||||
UPDATE namespace_storage_statistics
|
||||
SET storage_size = refresh.storage_size,
|
||||
repository_size = refresh.repository_size,
|
||||
wiki_size = refresh.wiki_size,
|
||||
lfs_objects_size = refresh.lfs_objects_size,
|
||||
build_artifacts_size = refresh.build_artifacts_size,
|
||||
pipeline_artifacts_size = refresh.pipeline_artifacts_size,
|
||||
packages_size = refresh.packages_size,
|
||||
snippets_size = refresh.snippets_size,
|
||||
uploads_size = refresh.uploads_size
|
||||
FROM refresh
|
||||
INNER JOIN routes rs ON rs.path = refresh.root_path AND rs.source_type = 'Namespace'
|
||||
WHERE namespace_storage_statistics.namespace_id = rs.source_id
|
||||
```
|
||||
|
||||
Same benefits and downsides as attempt A.
|
||||
|
||||
### Attempt C: Get rid of the model and store the statistics on Redis
|
||||
|
||||
We could get rid of the model that stores the statistics in aggregated form and instead use a Redis Set.
|
||||
This would be the [boring solution](https://about.gitlab.com/handbook/values/#boring-solutions) and the fastest one
|
||||
to implement, as GitLab already includes Redis as part of its [Architecture](architecture.md#redis).
|
||||
|
||||
The downside of this approach is that Redis does not provide the same persistence/consistency guarantees as PostgreSQL,
|
||||
and this is information we can't afford to lose in a Redis failure.
|
||||
|
||||
### Attempt D: Tag the root namespace and its child namespaces
|
||||
|
||||
Directly relate the root namespace to its child namespaces, so
|
||||
whenever a namespace is created without a parent, this one is tagged
|
||||
with the root namespace ID:
|
||||
|
||||
| ID | root ID | parent ID |
|
||||
|:---|:--------|:----------|
|
||||
| 1 | 1 | NULL |
|
||||
| 2 | 1 | 1 |
|
||||
| 3 | 1 | 2 |
|
||||
|
||||
To aggregate the statistics inside a namespace we'd execute something like:
|
||||
|
||||
```sql
|
||||
SELECT COUNT(...)
|
||||
FROM projects
|
||||
WHERE namespace_id IN (
|
||||
SELECT id
|
||||
FROM namespaces
|
||||
WHERE root_id = X
|
||||
)
|
||||
```
|
||||
|
||||
Even though this approach would make aggregating much easier, it has some major downsides:
|
||||
|
||||
- We'd have to migrate **all namespaces** by adding and filling a new column. Because of the size of the table, dealing with time/cost would be significant. The background migration would take approximately `153h`, see <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/29772>.
|
||||
- Background migration has to be shipped one release before, delaying the functionality by another milestone.
|
||||
|
||||
### Attempt E (final): Update the namespace storage statistics asynchronously
|
||||
|
||||
This approach consists of continuing to use the incremental statistics updates we already have,
|
||||
but we refresh them through Sidekiq jobs and in different transactions:
|
||||
|
||||
1. Create a second table (`namespace_aggregation_schedules`) with two columns `id` and `namespace_id`.
|
||||
1. Whenever the statistics of a project changes, insert a row into `namespace_aggregation_schedules`
|
||||
- We don't insert a new row if there's already one related to the root namespace.
|
||||
- Keeping in mind the length of the transaction that involves updating `project_statistics`(<https://gitlab.com/gitlab-org/gitlab/-/issues/29070>), the insertion should be done in a different transaction and through a Sidekiq Job.
|
||||
1. After inserting the row, we schedule another worker to be executed asynchronously at two different moments:
|
||||
- One enqueued for immediate execution and another one scheduled in `1.5h` hours.
|
||||
- We only schedule the jobs, if we can obtain a `1.5h` lease on Redis on a key based on the root namespace ID.
|
||||
- If we can't obtain the lease, it indicates there's another aggregation already in progress, or scheduled in no more than `1.5h`.
|
||||
1. This worker will:
|
||||
- Update the root namespace storage statistics by querying all the namespaces through a service.
|
||||
- Delete the related `namespace_aggregation_schedules` after the update.
|
||||
1. Another Sidekiq job is also included to traverse any remaining rows on the `namespace_aggregation_schedules` table and schedule jobs for every pending row.
|
||||
- This job is scheduled with cron to run every night (UTC).
|
||||
|
||||
This implementation has the following benefits:
|
||||
|
||||
- All the updates are done asynchronously, so we're not increasing the length of the transactions for `project_statistics`.
|
||||
- We're doing the update in a single SQL query.
|
||||
- It is compatible with PostgreSQL and MySQL.
|
||||
- No background migration required.
|
||||
|
||||
The only downside of this approach is that namespaces' statistics are updated up to `1.5` hours after the change is done,
|
||||
which means there's a time window in which the statistics are inaccurate. Because we're still not
|
||||
[enforcing storage limits](https://gitlab.com/gitlab-org/gitlab/-/issues/17664), this is not a major problem.
|
||||
|
||||
## Conclusion
|
||||
|
||||
Updating the storage statistics asynchronously, was the less problematic and
|
||||
performant approach of aggregating the root namespaces.
|
||||
|
||||
All the details regarding this use case can be found on:
|
||||
|
||||
- <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/62214>
|
||||
- Merge Request with the implementation: <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/28996>
|
||||
|
||||
Performance of the namespace storage statistics were measured in staging and production (GitLab.com). All results were posted
|
||||
on <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/64092>: No problem has been reported so far.
|
||||
<!-- This redirect file can be deleted after <2022-11-05>. -->
|
||||
<!-- Redirects that point to other docs in the same project expire in three months. -->
|
||||
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
|
||||
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
|
||||
|
|
|
@ -932,7 +932,7 @@ Models](mass_insert.md) functionality.
|
|||
|
||||
Assuming you are working with ActiveRecord models, you might also find these links helpful:
|
||||
|
||||
- [Insert records in batches](insert_into_tables_in_batches.md)
|
||||
- [Insert records in batches](database/insert_into_tables_in_batches.md)
|
||||
- [BulkInsert gem](https://github.com/jamis/bulk_insert)
|
||||
- [ActiveRecord::PgGenerateSeries gem](https://github.com/ryu39/active_record-pg_generate_series)
|
||||
|
||||
|
|
|
@ -316,6 +316,28 @@ services:
|
|||
alias: docker
|
||||
```
|
||||
|
||||
### Issues when authenticating to the Dependency Proxy from CI/CD jobs
|
||||
|
||||
GitLab Runner will automatically authenticate to the Dependency Proxy. However, the underlying Docker engine is still subject to its [authorization resolving process](https://gitlab.com/gitlab-org/gitlab-runner/-/blob/main/docs/configuration/advanced-configuration.md#precedence-of-docker-authorization-resolving).
|
||||
|
||||
Misconfigurations in the authentication mechanism may cause `HTTP Basic: Access denied` and `403: Access forbidden` errors.
|
||||
|
||||
You can use the job logs to view the authentication mechanism used to authenticate against the Dependency Proxy:
|
||||
|
||||
```plaintext
|
||||
Authenticating with credentials from $DOCKER_AUTH_CONFIG
|
||||
```
|
||||
|
||||
```plaintext
|
||||
Authenticating with credentials from /root/.docker/config.json
|
||||
```
|
||||
|
||||
```plaintext
|
||||
Authenticating with credentials from job payload (GitLab Registry)
|
||||
```
|
||||
|
||||
Make sure you are using the expected authentication mechanism.
|
||||
|
||||
### "Not Found" error when pulling image
|
||||
|
||||
Docker errors similar to the following may indicate that the user running the build job doesn't have
|
||||
|
|
|
@ -10,6 +10,8 @@ module API
|
|||
before do
|
||||
require_repository_enabled!
|
||||
authorize! :download_code, user_project
|
||||
|
||||
verify_pagination_params!
|
||||
end
|
||||
|
||||
helpers do
|
||||
|
|
|
@ -20,6 +20,26 @@ module API
|
|||
optional :page, type: Integer, default: 1, desc: 'Current page number'
|
||||
optional :per_page, type: Integer, default: 20, desc: 'Number of items per page', except_values: [0]
|
||||
end
|
||||
|
||||
def verify_pagination_params!
|
||||
return if Feature.disabled?(:only_positive_pagination_values)
|
||||
|
||||
page = begin
|
||||
Integer(params[:page])
|
||||
rescue ArgumentError, TypeError
|
||||
nil
|
||||
end
|
||||
|
||||
return render_structured_api_error!({ error: 'page does not have a valid value' }, 400) if page&.< 1
|
||||
|
||||
per_page = begin
|
||||
Integer(params[:per_page])
|
||||
rescue ArgumentError, TypeError
|
||||
nil
|
||||
end
|
||||
|
||||
return render_structured_api_error!({ error: 'per_page does not have a valid value' }, 400) if per_page&.< 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -145,7 +145,7 @@ module RuboCop
|
|||
return unless node.children[opts_index]
|
||||
|
||||
node.children[opts_index].each_pair.find do |pair|
|
||||
pair.key.value == :feature_flag
|
||||
pair.key.value == :_deprecated_feature_flag
|
||||
end&.value
|
||||
else
|
||||
arg_index = rugged_method?(node) ? 3 : 2
|
||||
|
|
|
@ -24,7 +24,7 @@ RSpec.describe 'Graphql Field feature flags' do
|
|||
|
||||
let(:query_type) do
|
||||
query_factory do |query|
|
||||
query.field :item, type, null: true, feature_flag: feature_flag, resolver: new_resolver(test_object)
|
||||
query.field :item, type, null: true, _deprecated_feature_flag: feature_flag, resolver: new_resolver(test_object)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -209,7 +209,7 @@ RSpec.describe Types::BaseField do
|
|||
describe '#visible?' do
|
||||
context 'and has a feature_flag' do
|
||||
let(:flag) { :test_feature }
|
||||
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, feature_flag: flag, null: false) }
|
||||
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, _deprecated_feature_flag: flag, null: false) }
|
||||
let(:context) { {} }
|
||||
|
||||
before do
|
||||
|
@ -253,7 +253,7 @@ RSpec.describe Types::BaseField do
|
|||
|
||||
describe '#description' do
|
||||
context 'feature flag given' do
|
||||
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, feature_flag: flag, null: false, description: 'Test description.') }
|
||||
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, _deprecated_feature_flag: flag, null: false, description: 'Test description.') }
|
||||
let(:flag) { :test_flag }
|
||||
|
||||
it 'prepends the description' do
|
||||
|
@ -313,11 +313,11 @@ RSpec.describe Types::BaseField do
|
|||
described_class.new(**base_args.merge(args))
|
||||
end
|
||||
|
||||
it 'interacts well with the `feature_flag` property' do
|
||||
it 'interacts well with the `_deprecated_feature_flag` property' do
|
||||
field = subject(
|
||||
deprecated: { milestone: '1.10', reason: 'Deprecation reason' },
|
||||
description: 'Field description.',
|
||||
feature_flag: 'foo_flag'
|
||||
_deprecated_feature_flag: 'foo_flag'
|
||||
)
|
||||
|
||||
expect(field.description).to start_with('Field description. Available only when feature flag `foo_flag` is enabled.')
|
||||
|
|
|
@ -206,11 +206,13 @@ RSpec.describe API::Commits do
|
|||
let(:page) { 1 }
|
||||
let(:per_page) { 5 }
|
||||
let(:ref_name) { 'master' }
|
||||
let!(:request) do
|
||||
let(:request) do
|
||||
get api("/projects/#{project_id}/repository/commits?page=#{page}&per_page=#{per_page}&ref_name=#{ref_name}", user)
|
||||
end
|
||||
|
||||
it 'returns correct headers' do
|
||||
request
|
||||
|
||||
expect(response).to include_limited_pagination_headers
|
||||
expect(response.headers['Link']).to match(/page=1&per_page=5/)
|
||||
expect(response.headers['Link']).to match(/page=2&per_page=5/)
|
||||
|
@ -218,6 +220,8 @@ RSpec.describe API::Commits do
|
|||
|
||||
context 'viewing the first page' do
|
||||
it 'returns the first 5 commits' do
|
||||
request
|
||||
|
||||
commit = project.repository.commit
|
||||
|
||||
expect(json_response.size).to eq(per_page)
|
||||
|
@ -230,6 +234,8 @@ RSpec.describe API::Commits do
|
|||
let(:page) { 3 }
|
||||
|
||||
it 'returns the third 5 commits' do
|
||||
request
|
||||
|
||||
commit = project.repository.commits('HEAD', limit: per_page, offset: (page - 1) * per_page).first
|
||||
|
||||
expect(json_response.size).to eq(per_page)
|
||||
|
@ -238,10 +244,55 @@ RSpec.describe API::Commits do
|
|||
end
|
||||
end
|
||||
|
||||
context 'when per_page is 0' do
|
||||
let(:per_page) { 0 }
|
||||
context 'when pagination params are invalid' do
|
||||
let_it_be(:project) { create(:project, :repository) }
|
||||
|
||||
it_behaves_like '400 response'
|
||||
using RSpec::Parameterized::TableSyntax
|
||||
|
||||
where(:page, :per_page, :error_message) do
|
||||
0 | nil | 'page does not have a valid value'
|
||||
-1 | nil | 'page does not have a valid value'
|
||||
'a' | nil | 'page is invalid'
|
||||
nil | 0 | 'per_page does not have a valid value'
|
||||
nil | -1 | 'per_page does not have a valid value'
|
||||
nil | 'a' | 'per_page is invalid'
|
||||
end
|
||||
|
||||
with_them do
|
||||
it 'returns 400 response' do
|
||||
request
|
||||
|
||||
expect(response).to have_gitlab_http_status(:bad_request)
|
||||
expect(json_response['error']).to eq(error_message)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when FF is off' do
|
||||
before do
|
||||
stub_feature_flags(only_positive_pagination_values: false)
|
||||
end
|
||||
|
||||
where(:page, :per_page, :error_message, :status) do
|
||||
0 | nil | nil | :success
|
||||
-10 | nil | nil | :internal_server_error
|
||||
'a' | nil | 'page is invalid' | :bad_request
|
||||
nil | 0 | 'per_page has a value not allowed' | :bad_request
|
||||
nil | -1 | nil | :success
|
||||
nil | 'a' | 'per_page is invalid' | :bad_request
|
||||
end
|
||||
|
||||
with_them do
|
||||
it 'returns a response' do
|
||||
request
|
||||
|
||||
expect(response).to have_gitlab_http_status(status)
|
||||
|
||||
if error_message
|
||||
expect(json_response['error']).to eq(error_message)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -217,8 +217,8 @@ RSpec.describe RuboCop::Cop::Gitlab::MarkUsedFeatureFlags do
|
|||
allow(cop).to receive(:in_graphql_types?).and_return(true)
|
||||
end
|
||||
|
||||
include_examples 'sets flag as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, feature_flag: :foo', 'foo'
|
||||
include_examples 'sets flag as used', 'field :runners, null: true, feature_flag: :foo', 'foo'
|
||||
include_examples 'sets flag as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, _deprecated_feature_flag: :foo', 'foo'
|
||||
include_examples 'sets flag as used', 'field :runners, null: true, _deprecated_feature_flag: :foo', 'foo'
|
||||
include_examples 'does not set any flags as used', 'field :solution'
|
||||
include_examples 'does not set any flags as used', 'field :runners, Types::Ci::RunnerType.connection_type'
|
||||
include_examples 'does not set any flags as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, description: "hello world"'
|
||||
|
|
Loading…
Reference in New Issue