Add latest changes from gitlab-org/gitlab@master

This commit is contained in:
GitLab Bot 2022-08-04 21:08:44 +00:00
parent 2857ba3ce5
commit 1793989ccc
28 changed files with 965 additions and 857 deletions

View File

@ -17,7 +17,7 @@ module Types
@requires_argument = !!kwargs.delete(:requires_argument)
@authorize = Array.wrap(kwargs.delete(:authorize))
kwargs[:complexity] = field_complexity(kwargs[:resolver_class], kwargs[:complexity])
@feature_flag = kwargs[:feature_flag]
@feature_flag = kwargs[:_deprecated_feature_flag]
kwargs = check_feature_flag(kwargs)
@deprecation = gitlab_deprecation(kwargs)
after_connection_extensions = kwargs.delete(:late_extensions) || []
@ -136,7 +136,7 @@ module Types
end
def check_feature_flag(args)
ff = args.delete(:feature_flag)
ff = args.delete(:_deprecated_feature_flag)
return args unless ff.present?
args[:description] = feature_documentation_message(ff, args[:description])

View File

@ -22,7 +22,7 @@ module Types
type: Types::CustomEmojiType.connection_type,
null: true,
description: 'Custom emoji within this namespace.',
feature_flag: :custom_emoji
_deprecated_feature_flag: :custom_emoji
field :share_with_group_lock,
type: GraphQL::Types::Boolean,

View File

@ -37,8 +37,8 @@ module Types
mount_mutation Mutations::Clusters::AgentTokens::Create
mount_mutation Mutations::Clusters::AgentTokens::Revoke
mount_mutation Mutations::Commits::Create, calls_gitaly: true
mount_mutation Mutations::CustomEmoji::Create, feature_flag: :custom_emoji
mount_mutation Mutations::CustomEmoji::Destroy, feature_flag: :custom_emoji
mount_mutation Mutations::CustomEmoji::Create, _deprecated_feature_flag: :custom_emoji
mount_mutation Mutations::CustomEmoji::Destroy, _deprecated_feature_flag: :custom_emoji
mount_mutation Mutations::CustomerRelations::Contacts::Create
mount_mutation Mutations::CustomerRelations::Contacts::Update
mount_mutation Mutations::CustomerRelations::Organizations::Create

View File

@ -61,7 +61,7 @@ module Types
Types::TimeTracking::TimelogCategoryType.connection_type,
null: true,
description: "Timelog categories for the namespace.",
feature_flag: :timelog_categories
_deprecated_feature_flag: :timelog_categories
markdown_field :description_html, null: true

View File

@ -442,7 +442,7 @@ module Types
Types::TimeTracking::TimelogCategoryType.connection_type,
null: true,
description: "Timelog categories for the project.",
feature_flag: :timelog_categories
_deprecated_feature_flag: :timelog_categories
def timelog_categories
object.project_namespace.timelog_categories

View File

@ -0,0 +1,8 @@
---
name: only_positive_pagination_values
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/93571
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/369225
milestone: '15.3'
type: development
group: group::source code
default_enabled: false

View File

@ -98,11 +98,8 @@ See the [deprecating schema items](#deprecating-schema-items) section for how to
### Breaking change exemptions
Two scenarios exist where schema items are exempt from the deprecation process,
and can be removed or changed at any time without notice. These are schema items that either:
- Use the [`feature_flag` property](#feature_flag-property) _and_ the flag is disabled by default.
- Are [marked as alpha](#mark-schema-items-as-alpha).
Schema items [marked as alpha](#mark-schema-items-as-alpha) are exempt from the deprecation process,
and can be removed or changed at any time without notice.
## Global IDs
@ -472,92 +469,37 @@ end
## Feature flags
Developers can add [feature flags](../development/feature_flags/index.md) to GraphQL
fields in the following ways:
You can implement [feature flags](../development/feature_flags/index.md) in GraphQL to toggle:
- Add the [`feature_flag` property](#feature_flag-property) to a field. This allows the field to be _hidden_
from the GraphQL schema when the flag is disabled.
- [Toggle the return value](#toggle-the-value-of-a-field) when resolving the field.
- The return value of a field.
- The behavior of an argument or mutation.
You can refer to these guidelines to decide which approach to use:
- If your field is experimental, and its name or type is subject to
change, use the [`feature_flag` property](#feature_flag-property).
- If your field is stable and its definition doesn't change, even after the flag is
removed, [toggle the return value](#toggle-the-value-of-a-field) of the field instead. Note that
[all fields should be nullable](#nullable-fields) anyway.
- If your field will be accessed from frontend using the `@include` or `@skip` directive, [do not use the `feature_flag` property](#frontend-and-backend-feature-flag-strategies).
### `feature_flag` property
The `feature_flag` property allows you to toggle the field's
[visibility](https://graphql-ruby.org/authorization/visibility.html)
in the GraphQL schema. This removes the field from the schema
when the flag is disabled.
A description is [appended](https://gitlab.com/gitlab-org/gitlab/-/blob/497b556/app/graphql/types/base_field.rb#L44-53)
to the field indicating that it is behind a feature flag.
WARNING:
If a client queries for the field when the feature flag is disabled, the query
fails. Consider this when toggling the visibility of the feature on or off on
production.
The `feature_flag` property does not allow the use of
[feature gates based on actors](../development/feature_flags/index.md).
This means that the feature flag cannot be toggled only for particular
projects, groups, or users, but instead can only be toggled globally for
everyone.
Example:
```ruby
field :test_field, type: GraphQL::Types::String,
null: true,
description: 'Some test field.',
feature_flag: :my_feature_flag
```
### Frontend and Backend feature flag strategies
#### Directives
When feature flags are used in the frontend to control the `@include` and `@skip` directives, do not use the `feature_flag`
property on the server-side. For the accepted backend workaround, see [Toggle the value of a field](#toggle-the-value-of-a-field). It is recommended that the feature flag used in this approach be the same for frontend and backend.
Even if the frontend directives evaluate to `@include:false` / `@skip:true`, the guarded entity is sent to the backend and matched
against the GraphQL schema. We would then get an exception due to a schema mismatch. See the [frontend GraphQL guide](../development/fe_guide/graphql.md#the-include-directive) for more guidance.
#### Different versions of a query
See the guide frontend GraphQL guide for [different versions of a query](../development/fe_guide/graphql.md#different-versions-of-a-query), and [why it is not the preferred approach](../development/fe_guide/graphql.md#avoiding-multiple-query-versions)
### Toggle the value of a field
This method of using feature flags for fields is to toggle the
return value of the field. This can be done in the resolver, in the
This can be done in a resolver, in the
type, or even in a model method, depending on your preference and
situation.
Consider also [marking the field as Alpha](#mark-schema-items-as-alpha)
while the value of the field can be toggled. You can
[change or remove Alpha fields at any time](#breaking-change-exemptions) without needing to deprecate them.
This also signals to consumers of the public GraphQL API that the field is not
NOTE:
It's recommended that you also [mark the item as Alpha](#mark-schema-items-as-alpha) while it is behind a feature flag.
This signals to consumers of the public GraphQL API that the field is not
meant to be used yet.
You can also
[change or remove Alpha items at any time](#breaking-change-exemptions) without needing to deprecate them. When the flag is removed, "release"
the schema item by removing its Alpha property to make it public.
When applying a feature flag to toggle the value of a field, the
`description` of the field must:
### Descriptions for feature flagged items
- State that the value of the field can be toggled by a feature flag.
When using a feature flag to toggle the value or behavior of a schema item, the
`description` of the item must:
- State that the value or behavior can be toggled by a feature flag.
- Name the feature flag.
- State what the field returns when the feature flag is disabled (or
- State what the field returns, or behavior is, when the feature flag is disabled (or
enabled, if more appropriate).
Example:
Example of a feature-flagged field:
```ruby
field :foo, GraphQL::Types::String,
null: true,
field :foo, GraphQL::Types::String, null: true,
alpha: { milestone: '10.0' },
description: 'Some test field. Returns `null`' \
'if `my_feature_flag` feature flag is disabled.'
@ -567,6 +509,26 @@ def foo
end
```
Example of a feature-flagged argument:
```ruby
argument :foo, type: GraphQL::Types::String, required: false,
alpha: { milestone: '10.0' },
description: 'Some test argument. Is ignored if ' \
'`my_feature_flag` feature flag is disabled.'
def resolve(args)
args.delete(:foo) unless Feature.enabled?(:my_feature_flag, object)
# ...
end
```
### `feature_flag` property (deprecated)
NOTE:
This property is deprecated and should no longer be used. The property
has been temporarily renamed to `_deprecated_feature_flag` and support for it will be removed in [#369202](https://gitlab.com/gitlab-org/gitlab/-/issues/369202).
## Deprecating schema items
The GitLab GraphQL API is versionless, which means we maintain backwards

View File

@ -6,7 +6,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
# Add a foreign key constraint to an existing column
Foreign keys ensure consistency between related database tables. The current database review process **always** encourages you to add [foreign keys](../foreign_keys.md) when creating tables that reference records from other tables.
Foreign keys ensure consistency between related database tables. The current database review process **always** encourages you to add [foreign keys](foreign_keys.md) when creating tables that reference records from other tables.
Starting with Rails version 4, Rails includes migration helpers to add foreign key constraints
to database tables. Before Rails 4, the only way for ensuring some level of consistency was the

View File

@ -0,0 +1,179 @@
---
stage: Plan
group: Project Management
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
# Filtering by label
## Introduction
GitLab has [labels](../../user/project/labels.md) that can be assigned to issues,
merge requests, and epics. Labels on those objects are a many-to-many relation
through the polymorphic `label_links` table.
To filter these objects by multiple labels - for instance, 'all open
issues with the label ~Plan and the label ~backend' - we generate a
query containing a `GROUP BY` clause. In a simple form, this looks like:
```sql
SELECT
issues.*
FROM
issues
INNER JOIN label_links ON label_links.target_id = issues.id
AND label_links.target_type = 'Issue'
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
issues.project_id = 13083
AND (issues.state IN ('opened'))
AND labels.title IN ('Plan',
'backend')
GROUP BY
issues.id
HAVING (COUNT(DISTINCT labels.title) = 2)
ORDER BY
issues.updated_at DESC,
issues.id DESC
LIMIT 20 OFFSET 0
```
In particular, note that:
1. We `GROUP BY issues.id` so that we can ...
1. Use the `HAVING (COUNT(DISTINCT labels.title) = 2)` condition to ensure that
all matched issues have both labels.
This is more complicated than is ideal. It makes the query construction more
prone to errors (such as
[issue #15557](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/15557)).
## Attempt A: `WHERE EXISTS`
### Attempt A1: use multiple subqueries with `WHERE EXISTS`
In [issue #37137](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/37137)
and its associated [merge request](https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/14022),
we tried to replace the `GROUP BY` with multiple uses of `WHERE EXISTS`. For the
example above, this would give:
```sql
WHERE (EXISTS (
SELECT
TRUE
FROM
label_links
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
labels.title = 'Plan'
AND target_type = 'Issue'
AND target_id = issues.id))
AND (EXISTS (
SELECT
TRUE
FROM
label_links
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
labels.title = 'backend'
AND target_type = 'Issue'
AND target_id = issues.id))
```
While this worked without schema changes, and did improve readability somewhat,
it did not improve query performance.
### Attempt A2: use label IDs in the `WHERE EXISTS` clause
In [merge request #34503](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/34503), we followed a similar approach to A1. But this time, we
did a separate query to fetch the IDs of the labels used in the filter so that we avoid the `JOIN` in the `EXISTS` clause and filter directly by
`label_links.label_id`. We also added a new index on `label_links` for the `target_id`, `label_id`, and `target_type` columns to speed up this query.
Finding the label IDs wasn't straightforward because there could be multiple labels with the same title within a single root namespace. We solved
this by grouping the label IDs by title and then using the array of IDs in the `EXISTS` clauses.
This resulted in a significant performance improvement. However, this optimization could not be applied to the dashboard pages
where we do not have a project or group context. We could not easily search for the label IDs here because that would mean searching across all
projects and groups that the user has access to.
## Attempt B: Denormalize using an array column
Having [removed MySQL support in GitLab 12.1](https://about.gitlab.com/blog/2019/06/27/removing-mysql-support/),
using [PostgreSQL's arrays](https://www.postgresql.org/docs/11/arrays.html) became more
tractable as we didn't have to support two databases. We discussed denormalizing
the `label_links` table for querying in
[issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651),
with two options: label IDs and titles.
We can think of both of those as array columns on `issues`, `merge_requests`,
and `epics`: `issues.label_ids` would be an array column of label IDs, and
`issues.label_titles` would be an array of label titles.
These array columns can be complemented with
[GIN indexes](https://www.postgresql.org/docs/11/gin-intro.html) to improve
matching.
### Attempt B1: store label IDs for each object
This has some strong advantages over titles:
1. Unless a label is deleted, or a project is moved, we never need to
bulk-update the denormalized column.
1. It uses less storage than the titles.
Unfortunately, our application design makes this hard. If we were able to query
just by label ID easily, we wouldn't need the `INNER JOIN labels` in the initial
query at the start of this document. GitLab allows users to filter by label
title across projects and even across groups, so a filter by the label ~Plan may
include labels with multiple distinct IDs.
We do not want users to have to know about the different IDs, which means that
given this data set:
| Project | ~Plan label ID | ~backend label ID |
| ------- | -------------- | ----------------- |
| A | 11 | 12 |
| B | 21 | 22 |
| C | 31 | 32 |
We would need something like:
```sql
WHERE
label_ids @> ARRAY[11, 12]
OR label_ids @> ARRAY[21, 22]
OR label_ids @> ARRAY[31, 32]
```
This can get even more complicated when we consider that in some cases, there
might be two ~backend labels - with different IDs - that could apply to the same
object, so the number of combinations would balloon further.
### Attempt B2: store label titles for each object
From the perspective of updating the object, this is the worst
option. We have to bulk update the objects when:
1. The objects are moved from one project to another.
1. The project is moved from one group to another.
1. The label is renamed.
1. The label is deleted.
It also uses much more storage. Querying is simple, though:
```sql
WHERE
label_titles @> ARRAY['Plan', 'backend']
```
And our [tests in issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651#note_188777346)
showed that this could be fast.
However, at present, the disadvantages outweigh the advantages.
## Conclusion
We found a method A2 that does not need denormalization and improves the query performance significantly. This
did not apply to all cases, but we were able to apply method A1 to the rest of the cases so that we remove the
`GROUP BY` and `HAVING` clauses in all scenarios.
This simplified the query and improved the performance in the most common cases.

View File

@ -0,0 +1,199 @@
---
stage: Data Stores
group: Database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
# Foreign Keys & Associations
When adding an association to a model you must also add a foreign key. For
example, say you have the following model:
```ruby
class User < ActiveRecord::Base
has_many :posts
end
```
Add a foreign key here on column `posts.user_id`. This ensures
that data consistency is enforced on database level. Foreign keys also mean that
the database can very quickly remove associated data (for example, when removing a
user), instead of Rails having to do this.
## Adding Foreign Keys In Migrations
Foreign keys can be added concurrently using `add_concurrent_foreign_key` as
defined in `Gitlab::Database::MigrationHelpers`. See the
[Migration Style Guide](../migration_style_guide.md) for more information.
Keep in mind that you can only safely add foreign keys to existing tables after
you have removed any orphaned rows. The method `add_concurrent_foreign_key`
does not take care of this so you must do so manually. See
[adding foreign key constraint to an existing column](add_foreign_key_to_existing_column.md).
## Updating Foreign Keys In Migrations
Sometimes a foreign key constraint must be changed, preserving the column
but updating the constraint condition. For example, moving from
`ON DELETE CASCADE` to `ON DELETE SET NULL` or vice-versa.
PostgreSQL does not prevent you from adding overlapping foreign keys. It
honors the most recently added constraint. This allows us to replace foreign keys without
ever losing foreign key protection on a column.
To replace a foreign key:
1. [Add the new foreign key without validation](add_foreign_key_to_existing_column.md#prevent-invalid-records)
The name of the foreign key constraint must be changed to add a new
foreign key before removing the old one.
```ruby
class ReplaceFkOnPackagesPackagesProjectId < Gitlab::Database::Migration[2.0]
disable_ddl_transaction!
NEW_CONSTRAINT_NAME = 'fk_new'
def up
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :nullify, validate: false, name: NEW_CONSTRAINT_NAME)
end
def down
with_lock_retries do
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :nullify, name: NEW_CONSTRAINT_NAME)
end
end
end
```
1. [Validate the new foreign key](add_foreign_key_to_existing_column.md#validate-the-foreign-key)
```ruby
class ValidateFkNew < Gitlab::Database::Migration[2.0]
NEW_CONSTRAINT_NAME = 'fk_new'
# foreign key added in <link to MR or path to migration adding new FK>
def up
validate_foreign_key(:packages_packages, name: NEW_CONSTRAINT_NAME)
end
def down
# no-op
end
end
```
1. Remove the old foreign key:
```ruby
class RemoveFkOld < Gitlab::Database::Migration[2.0]
OLD_CONSTRAINT_NAME = 'fk_old'
# new foreign key added in <link to MR or path to migration adding new FK>
# and validated in <link to MR or path to migration validating new FK>
def up
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :cascade, name: OLD_CONSTRAINT_NAME)
end
def down
# Validation is skipped here, so if rolled back, this will need to be revalidated in a separate migration
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :cascade, validate: false, name: OLD_CONSTRAINT_NAME)
end
end
```
## Cascading Deletes
Every foreign key must define an `ON DELETE` clause, and in 99% of the cases
this should be set to `CASCADE`.
## Indexes
When adding a foreign key in PostgreSQL the column is not indexed automatically,
thus you must also add a concurrent index. Not doing so results in cascading
deletes being very slow.
## Naming foreign keys
By default Ruby on Rails uses the `_id` suffix for foreign keys. So we should
only use this suffix for associations between two tables. If you want to
reference an ID on a third party platform the `_xid` suffix is recommended.
The spec `spec/db/schema_spec.rb` tests if all columns with the `_id` suffix
have a foreign key constraint. So if that spec fails, don't add the column to
`IGNORED_FK_COLUMNS`, but instead add the FK constraint, or consider naming it
differently.
## Dependent Removals
Don't define options such as `dependent: :destroy` or `dependent: :delete` when
defining an association. Defining these options means Rails handles the
removal of data, instead of letting the database handle this in the most
efficient way possible.
In other words, this is bad and should be avoided at all costs:
```ruby
class User < ActiveRecord::Base
has_many :posts, dependent: :destroy
end
```
Should you truly have a need for this it should be approved by a database
specialist first.
You should also not define any `before_destroy` or `after_destroy` callbacks on
your models _unless_ absolutely required and only when approved by database
specialists. For example, if each row in a table has a corresponding file on a
file system it may be tempting to add a `after_destroy` hook. This however
introduces non database logic to a model, and means we can no longer rely on
foreign keys to remove the data as this would result in the file system data
being left behind. In such a case you should use a service class instead that
takes care of removing non database data.
In cases where the relation spans multiple databases you have even
further problems using `dependent: :destroy` or the above hooks. You can
read more about alternatives at
[Avoid `dependent: :nullify` and `dependent: :destroy` across databases](multiple_databases.md#avoid-dependent-nullify-and-dependent-destroy-across-databases).
## Alternative primary keys with `has_one` associations
Sometimes a `has_one` association is used to create a one-to-one relationship:
```ruby
class User < ActiveRecord::Base
has_one :user_config
end
class UserConfig < ActiveRecord::Base
belongs_to :user
end
```
In these cases, there may be an opportunity to remove the unnecessary `id`
column on the associated table, `user_config.id` in this example. Instead,
the originating table ID can be used as the primary key for the associated
table:
```ruby
create_table :user_configs, id: false do |t|
t.references :users, primary_key: true, default: nil, index: false, foreign_key: { on_delete: :cascade }
...
end
```
Setting `default: nil` ensures a primary key sequence is not created, and since the primary key
automatically gets an index, we set `index: false` to avoid creating a duplicate.
You also need to add the new primary key to the model:
```ruby
class UserConfig < ActiveRecord::Base
self.primary_key = :user_id
belongs_to :user
end
```
Using a foreign key as primary key saves space but can make
[batch counting](../service_ping/implement.md#batch-counters) in [Service Ping](../service_ping/index.md) less efficient.
Consider using a regular `id` column if the table is relevant for Service Ping.

View File

@ -42,7 +42,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
## Best practices
- [Adding database indexes](adding_database_indexes.md)
- [Foreign keys & associations](../foreign_keys.md)
- [Foreign keys & associations](foreign_keys.md)
- [Adding a foreign key constraint to an existing column](add_foreign_key_to_existing_column.md)
- [`NOT NULL` constraints](not_null_constraints.md)
- [Strings and the Text data type](strings_and_the_text_data_type.md)
@ -52,7 +52,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
- [Hash indexes](../hash_indexes.md)
- [Storing SHA1 hashes as binary](../sha1_as_binary.md)
- [Iterating tables in batches](../iterating_tables_in_batches.md)
- [Insert into tables in batches](../insert_into_tables_in_batches.md)
- [Insert into tables in batches](insert_into_tables_in_batches.md)
- [Ordering table columns](ordering_table_columns.md)
- [Verifying database capabilities](../verifying_database_capabilities.md)
- [Database Debugging and Troubleshooting](../database_debugging.md)
@ -69,8 +69,8 @@ info: To determine the technical writer assigned to the Stage/Group associated w
## Case studies
- [Database case study: Filtering by label](../filtering_by_label.md)
- [Database case study: Namespaces storage statistics](../namespaces_storage_statistics.md)
- [Database case study: Filtering by label](filtering_by_label.md)
- [Database case study: Namespaces storage statistics](namespaces_storage_statistics.md)
## Miscellaneous

View File

@ -0,0 +1,196 @@
---
stage: Data Stores
group: Database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
description: "Sometimes it is necessary to store large amounts of records at once, which can be inefficient
when iterating collections and performing individual `save`s. With the arrival of `insert_all`
in Rails 6, which operates at the row level (that is, using `Hash`es), GitLab has added a set
of APIs that make it safe and simple to insert ActiveRecord objects in bulk."
---
# Insert into tables in batches
Sometimes it is necessary to store large amounts of records at once, which can be inefficient
when iterating collections and saving each record individually. With the arrival of
[`insert_all`](https://apidock.com/rails/ActiveRecord/Persistence/ClassMethods/insert_all)
in Rails 6, which operates at the row level (that is, using `Hash` objects), GitLab has added a set
of APIs that make it safe and simple to insert `ActiveRecord` objects in bulk.
## Prepare `ApplicationRecord`s for bulk insertion
In order for a model class to take advantage of the bulk insertion API, it has to include the
`BulkInsertSafe` concern first:
```ruby
class MyModel < ApplicationRecord
# other includes here
# ...
include BulkInsertSafe # include this last
# ...
end
```
The `BulkInsertSafe` concern has two functions:
- It performs checks against your model class to ensure that it does not use ActiveRecord
APIs that are not safe to use with respect to bulk insertions (more on that below).
- It adds new class methods `bulk_insert!` and `bulk_upsert!`, which you can use to insert many records at once.
## Insert records with `bulk_insert!` and `bulk_upsert!`
If the target class passes the checks performed by `BulkInsertSafe`, you can insert an array of
ActiveRecord model objects as follows:
```ruby
records = [MyModel.new, ...]
MyModel.bulk_insert!(records)
```
Calls to `bulk_insert!` always attempt to insert _new records_. If instead
you would like to replace existing records with new values, while still inserting those
that do not already exist, then you can use `bulk_upsert!`:
```ruby
records = [MyModel.new, existing_model, ...]
MyModel.bulk_upsert!(records, unique_by: [:name])
```
In this example, `unique_by` specifies the columns by which records are considered to be
unique and as such are updated if they existed prior to insertion. For example, if
`existing_model` has a `name` attribute, and if a record with the same `name` value already
exists, its fields are updated with those of `existing_model`.
The `unique_by` parameter can also be passed as a `Symbol`, in which case it specifies
a database index by which a column is considered unique:
```ruby
MyModel.bulk_insert!(records, unique_by: :index_on_name)
```
### Record validation
The `bulk_insert!` method guarantees that `records` are inserted transactionally, and
runs validations on each record prior to insertion. If any record fails to validate,
an error is raised and the transaction is rolled back. You can turn off validations via
the `:validate` option:
```ruby
MyModel.bulk_insert!(records, validate: false)
```
### Batch size configuration
In those cases where the number of `records` is above a given threshold, insertions
occur in multiple batches. The default batch size is defined in
[`BulkInsertSafe::DEFAULT_BATCH_SIZE`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
Assuming a default threshold of 500, inserting 950 records
would result in two batches being written sequentially (of size 500 and 450 respectively.)
You can override the default batch size via the `:batch_size` option:
```ruby
MyModel.bulk_insert!(records, batch_size: 100)
```
Assuming the same number of 950 records, this would result in 10 batches being written instead.
Since this also affects the number of `INSERT` statements that occur, make sure you measure the
performance impact this might have on your code. There is a trade-off between the number of
`INSERT` statements the database has to process and the size and cost of each `INSERT`.
### Handling duplicate records
NOTE:
This parameter applies only to `bulk_insert!`. If you intend to update existing
records, use `bulk_upsert!` instead.
It may happen that some records you are trying to insert already exist, which would result in
primary key conflicts. There are two ways to address this problem: failing fast by raising an
error or skipping duplicate records. The default behavior of `bulk_insert!` is to fail fast
and raise an `ActiveRecord::RecordNotUnique` error.
If this is undesirable, you can instead skip duplicate records with the `skip_duplicates` flag:
```ruby
MyModel.bulk_insert!(records, skip_duplicates: true)
```
### Requirements for safe bulk insertions
Large parts of ActiveRecord's persistence API are built around the notion of callbacks. Many
of these callbacks fire in response to model life cycle events such as `save` or `create`.
These callbacks cannot be used with bulk insertions, since they are meant to be called for
every instance that is saved or created. Since these events do not fire when
records are inserted in bulk, we currently prevent their use.
The specifics around which callbacks are explicitly allowed are defined in
[`BulkInsertSafe`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
Consult the module source code for details. If your class uses callbacks that are not explicitly designated
safe and you `include BulkInsertSafe` the application fails with an error.
### `BulkInsertSafe` versus `InsertAll`
Internally, `BulkInsertSafe` is based on `InsertAll`, and you may wonder when to choose
the former over the latter. To help you make the decision,
the key differences between these classes are listed in the table below.
| | Input type | Validates input | Specify batch size | Can bypass callbacks | Transactional |
|--------------- | -------------------- | --------------- | ------------------ | --------------------------------- | ------------- |
| `bulk_insert!` | ActiveRecord objects | Yes (optional) | Yes (optional) | No (prevents unsafe callback use) | Yes |
| `insert_all!` | Attribute hashes | No | No | Yes | Yes |
To summarize, `BulkInsertSafe` moves bulk inserts closer to how ActiveRecord objects
and inserts would normally behave. However, if all you need is to insert raw data in bulk, then
`insert_all` is more efficient.
## Insert `has_many` associations in bulk
A common use case is to save collections of associated relations through the owner side of the relation,
where the owned relation is associated to the owner through the `has_many` class method:
```ruby
owner = OwnerModel.new(owned_relations: array_of_owned_relations)
# saves all `owned_relations` one-by-one
owner.save!
```
This issues a single `INSERT`, and transaction, for every record in `owned_relations`, which is inefficient if
`array_of_owned_relations` is large. To remedy this, the `BulkInsertableAssociations` concern can be
used to declare that the owner defines associations that are safe for bulk insertion:
```ruby
class OwnerModel < ApplicationRecord
# other includes here
# ...
include BulkInsertableAssociations # include this last
has_many :my_models
end
```
Here `my_models` must be declared `BulkInsertSafe` (as described previously) for bulk insertions
to happen. You can now insert any yet unsaved records as follows:
```ruby
BulkInsertableAssociations.with_bulk_insert do
owner = OwnerModel.new(my_models: array_of_my_model_instances)
# saves `my_models` using a single bulk insert (possibly via multiple batches)
owner.save!
end
```
You can still save relations that are not `BulkInsertSafe` in this block; they
simply are treated as if you had invoked `save` from outside the block.
## Known limitations
There are a few restrictions to how these APIs can be used:
- `BulkInsertableAssociations`:
- It is currently only compatible with `has_many` relations.
- It does not yet support `has_many through: ...` relations.
Moreover, input data should either be limited to around 1000 records at most,
or already batched prior to calling bulk insert. The `INSERT` statement runs in a single
transaction, so for large amounts of records it may negatively affect database stability.

View File

@ -10,7 +10,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
In relational databases (including PostgreSQL), foreign keys provide a way to link
two database tables together, and ensure data-consistency between them. In GitLab,
[foreign keys](../foreign_keys.md) are vital part of the database design process.
[foreign keys](foreign_keys.md) are vital part of the database design process.
Most of our database tables have foreign keys.
With the ongoing database [decomposition work](https://gitlab.com/groups/gitlab-org/-/epics/6168),

View File

@ -0,0 +1,193 @@
---
stage: none
group: unassigned
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
# Database case study: Namespaces storage statistics
## Introduction
On [Storage and limits management for groups](https://gitlab.com/groups/gitlab-org/-/epics/886),
we want to facilitate a method for easily viewing the amount of
storage consumed by a group, and allow easy management.
## Proposal
1. Create a new ActiveRecord model to hold the namespaces' statistics in an aggregated form (only for root [namespaces](../../user/namespace/index.md)).
1. Refresh the statistics in this model every time a project belonging to this namespace is changed.
## Problem
In GitLab, we update the project storage statistics through a
[callback](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/project.rb#L97)
every time the project is saved.
The summary of those statistics per namespace is then retrieved
by [`Namespaces#with_statistics`](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/namespace.rb#L70) scope. Analyzing this query we noticed that:
- It takes up to `1.2` seconds for namespaces with over `15k` projects.
- It can't be analyzed with [ChatOps](../chatops_on_gitlabcom.md), as it times out.
Additionally, the pattern that is currently used to update the project statistics
(the callback) doesn't scale adequately. It is currently one of the largest
[database queries transactions on production](https://gitlab.com/gitlab-org/gitlab/-/issues/29070)
that takes the most time overall. We can't add one more query to it as
it increases the transaction's length.
Because of all of the above, we can't apply the same pattern to store
and update the namespaces statistics, as the `namespaces` table is one
of the largest tables on GitLab.com. Therefore we needed to find a performant and
alternative method.
## Attempts
### Attempt A: PostgreSQL materialized view
Model can be updated through a refresh strategy based on a project routes SQL and a [materialized view](https://www.postgresql.org/docs/11/rules-materializedviews.html):
```sql
SELECT split_part("rs".path, '/', 1) as root_path,
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
FROM "projects"
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
INNER JOIN project_statistics ps ON ps.project_id = projects.id
GROUP BY root_path
```
We could then execute the query with:
```sql
REFRESH MATERIALIZED VIEW root_namespace_storage_statistics;
```
While this implied a single query update (and probably a fast one), it has some downsides:
- Materialized views syntax varies from PostgreSQL and MySQL. While this feature was worked on, MySQL was still supported by GitLab.
- Rails does not have native support for materialized views. We'd need to use a specialized gem to take care of the management of the database views, which implies additional work.
### Attempt B: An update through a CTE
Similar to Attempt A: Model update done through a refresh strategy with a [Common Table Expression](https://www.postgresql.org/docs/9.1/queries-with.html)
```sql
WITH refresh AS (
SELECT split_part("rs".path, '/', 1) as root_path,
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
FROM "projects"
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
INNER JOIN project_statistics ps ON ps.project_id = projects.id
GROUP BY root_path)
UPDATE namespace_storage_statistics
SET storage_size = refresh.storage_size,
repository_size = refresh.repository_size,
wiki_size = refresh.wiki_size,
lfs_objects_size = refresh.lfs_objects_size,
build_artifacts_size = refresh.build_artifacts_size,
pipeline_artifacts_size = refresh.pipeline_artifacts_size,
packages_size = refresh.packages_size,
snippets_size = refresh.snippets_size,
uploads_size = refresh.uploads_size
FROM refresh
INNER JOIN routes rs ON rs.path = refresh.root_path AND rs.source_type = 'Namespace'
WHERE namespace_storage_statistics.namespace_id = rs.source_id
```
Same benefits and downsides as attempt A.
### Attempt C: Get rid of the model and store the statistics on Redis
We could get rid of the model that stores the statistics in aggregated form and instead use a Redis Set.
This would be the [boring solution](https://about.gitlab.com/handbook/values/#boring-solutions) and the fastest one
to implement, as GitLab already includes Redis as part of its [Architecture](../architecture.md#redis).
The downside of this approach is that Redis does not provide the same persistence/consistency guarantees as PostgreSQL,
and this is information we can't afford to lose in a Redis failure.
### Attempt D: Tag the root namespace and its child namespaces
Directly relate the root namespace to its child namespaces, so
whenever a namespace is created without a parent, this one is tagged
with the root namespace ID:
| ID | root ID | parent ID |
|:---|:--------|:----------|
| 1 | 1 | NULL |
| 2 | 1 | 1 |
| 3 | 1 | 2 |
To aggregate the statistics inside a namespace we'd execute something like:
```sql
SELECT COUNT(...)
FROM projects
WHERE namespace_id IN (
SELECT id
FROM namespaces
WHERE root_id = X
)
```
Even though this approach would make aggregating much easier, it has some major downsides:
- We'd have to migrate **all namespaces** by adding and filling a new column. Because of the size of the table, dealing with time/cost would be significant. The background migration would take approximately `153h`, see <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/29772>.
- Background migration has to be shipped one release before, delaying the functionality by another milestone.
### Attempt E (final): Update the namespace storage statistics asynchronously
This approach consists of continuing to use the incremental statistics updates we already have,
but we refresh them through Sidekiq jobs and in different transactions:
1. Create a second table (`namespace_aggregation_schedules`) with two columns `id` and `namespace_id`.
1. Whenever the statistics of a project changes, insert a row into `namespace_aggregation_schedules`
- We don't insert a new row if there's already one related to the root namespace.
- Keeping in mind the length of the transaction that involves updating `project_statistics`(<https://gitlab.com/gitlab-org/gitlab/-/issues/29070>), the insertion should be done in a different transaction and through a Sidekiq Job.
1. After inserting the row, we schedule another worker to be executed asynchronously at two different moments:
- One enqueued for immediate execution and another one scheduled in `1.5h` hours.
- We only schedule the jobs, if we can obtain a `1.5h` lease on Redis on a key based on the root namespace ID.
- If we can't obtain the lease, it indicates there's another aggregation already in progress, or scheduled in no more than `1.5h`.
1. This worker will:
- Update the root namespace storage statistics by querying all the namespaces through a service.
- Delete the related `namespace_aggregation_schedules` after the update.
1. Another Sidekiq job is also included to traverse any remaining rows on the `namespace_aggregation_schedules` table and schedule jobs for every pending row.
- This job is scheduled with cron to run every night (UTC).
This implementation has the following benefits:
- All the updates are done asynchronously, so we're not increasing the length of the transactions for `project_statistics`.
- We're doing the update in a single SQL query.
- It is compatible with PostgreSQL and MySQL.
- No background migration required.
The only downside of this approach is that namespaces' statistics are updated up to `1.5` hours after the change is done,
which means there's a time window in which the statistics are inaccurate. Because we're still not
[enforcing storage limits](https://gitlab.com/gitlab-org/gitlab/-/issues/17664), this is not a major problem.
## Conclusion
Updating the storage statistics asynchronously, was the less problematic and
performant approach of aggregating the root namespaces.
All the details regarding this use case can be found on:
- <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/62214>
- Merge Request with the implementation: <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/28996>
Performance of the namespace storage statistics were measured in staging and production (GitLab.com). All results were posted
on <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/64092>: No problem has been reported so far.

View File

@ -597,7 +597,7 @@ export default {
Note that, even if the directive evaluates to `false`, the guarded entity is sent to the backend and
matched against the GraphQL schema. So this approach requires that the feature-flagged entity
exists in the schema, even if the feature flag is disabled. When the feature flag is turned off, it
is recommended that the resolver returns `null` at the very least using the same feature flag as the frontend. See the [API GraphQL guide](../api_graphql_styleguide.md#frontend-and-backend-feature-flag-strategies).
is recommended that the resolver returns `null` at the very least using the same feature flag as the frontend. See the [API GraphQL guide](../api_graphql_styleguide.md#feature-flags).
##### Different versions of a query

View File

@ -1,179 +1,11 @@
---
stage: Plan
group: Project Management
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
redirect_to: 'database/filtering_by_label.md'
remove_date: '2022-11-05'
---
# Filtering by label
## Introduction
This document was moved to [another location](database/filtering_by_label.md).
GitLab has [labels](../user/project/labels.md) that can be assigned to issues,
merge requests, and epics. Labels on those objects are a many-to-many relation
through the polymorphic `label_links` table.
To filter these objects by multiple labels - for instance, 'all open
issues with the label ~Plan and the label ~backend' - we generate a
query containing a `GROUP BY` clause. In a simple form, this looks like:
```sql
SELECT
issues.*
FROM
issues
INNER JOIN label_links ON label_links.target_id = issues.id
AND label_links.target_type = 'Issue'
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
issues.project_id = 13083
AND (issues.state IN ('opened'))
AND labels.title IN ('Plan',
'backend')
GROUP BY
issues.id
HAVING (COUNT(DISTINCT labels.title) = 2)
ORDER BY
issues.updated_at DESC,
issues.id DESC
LIMIT 20 OFFSET 0
```
In particular, note that:
1. We `GROUP BY issues.id` so that we can ...
1. Use the `HAVING (COUNT(DISTINCT labels.title) = 2)` condition to ensure that
all matched issues have both labels.
This is more complicated than is ideal. It makes the query construction more
prone to errors (such as
[issue #15557](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/15557)).
## Attempt A: `WHERE EXISTS`
### Attempt A1: use multiple subqueries with `WHERE EXISTS`
In [issue #37137](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/37137)
and its associated [merge request](https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/14022),
we tried to replace the `GROUP BY` with multiple uses of `WHERE EXISTS`. For the
example above, this would give:
```sql
WHERE (EXISTS (
SELECT
TRUE
FROM
label_links
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
labels.title = 'Plan'
AND target_type = 'Issue'
AND target_id = issues.id))
AND (EXISTS (
SELECT
TRUE
FROM
label_links
INNER JOIN labels ON labels.id = label_links.label_id
WHERE
labels.title = 'backend'
AND target_type = 'Issue'
AND target_id = issues.id))
```
While this worked without schema changes, and did improve readability somewhat,
it did not improve query performance.
### Attempt A2: use label IDs in the `WHERE EXISTS` clause
In [merge request #34503](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/34503), we followed a similar approach to A1. But this time, we
did a separate query to fetch the IDs of the labels used in the filter so that we avoid the `JOIN` in the `EXISTS` clause and filter directly by
`label_links.label_id`. We also added a new index on `label_links` for the `target_id`, `label_id`, and `target_type` columns to speed up this query.
Finding the label IDs wasn't straightforward because there could be multiple labels with the same title within a single root namespace. We solved
this by grouping the label IDs by title and then using the array of IDs in the `EXISTS` clauses.
This resulted in a significant performance improvement. However, this optimization could not be applied to the dashboard pages
where we do not have a project or group context. We could not easily search for the label IDs here because that would mean searching across all
projects and groups that the user has access to.
## Attempt B: Denormalize using an array column
Having [removed MySQL support in GitLab 12.1](https://about.gitlab.com/blog/2019/06/27/removing-mysql-support/),
using [PostgreSQL's arrays](https://www.postgresql.org/docs/11/arrays.html) became more
tractable as we didn't have to support two databases. We discussed denormalizing
the `label_links` table for querying in
[issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651),
with two options: label IDs and titles.
We can think of both of those as array columns on `issues`, `merge_requests`,
and `epics`: `issues.label_ids` would be an array column of label IDs, and
`issues.label_titles` would be an array of label titles.
These array columns can be complemented with [GIN
indexes](https://www.postgresql.org/docs/11/gin-intro.html) to improve
matching.
### Attempt B1: store label IDs for each object
This has some strong advantages over titles:
1. Unless a label is deleted, or a project is moved, we never need to
bulk-update the denormalized column.
1. It uses less storage than the titles.
Unfortunately, our application design makes this hard. If we were able to query
just by label ID easily, we wouldn't need the `INNER JOIN labels` in the initial
query at the start of this document. GitLab allows users to filter by label
title across projects and even across groups, so a filter by the label ~Plan may
include labels with multiple distinct IDs.
We do not want users to have to know about the different IDs, which means that
given this data set:
| Project | ~Plan label ID | ~backend label ID |
| ------- | -------------- | ----------------- |
| A | 11 | 12 |
| B | 21 | 22 |
| C | 31 | 32 |
We would need something like:
```sql
WHERE
label_ids @> ARRAY[11, 12]
OR label_ids @> ARRAY[21, 22]
OR label_ids @> ARRAY[31, 32]
```
This can get even more complicated when we consider that in some cases, there
might be two ~backend labels - with different IDs - that could apply to the same
object, so the number of combinations would balloon further.
### Attempt B2: store label titles for each object
From the perspective of updating the object, this is the worst
option. We have to bulk update the objects when:
1. The objects are moved from one project to another.
1. The project is moved from one group to another.
1. The label is renamed.
1. The label is deleted.
It also uses much more storage. Querying is simple, though:
```sql
WHERE
label_titles @> ARRAY['Plan', 'backend']
```
And our [tests in issue #49651](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/49651#note_188777346)
showed that this could be fast.
However, at present, the disadvantages outweigh the advantages.
## Conclusion
We found a method A2 that does not need denormalization and improves the query performance significantly. This
did not apply to all cases, but we were able to apply method A1 to the rest of the cases so that we remove the
`GROUP BY` and `HAVING` clauses in all scenarios.
This simplified the query and improved the performance in the most common cases.
<!-- This redirect file can be deleted after <2022-11-05>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,200 +1,11 @@
---
stage: Data Stores
group: Database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
redirect_to: 'database/foreign_keys.md'
remove_date: '2022-11-05'
---
# Foreign Keys & Associations
This document was moved to [another location](database/foreign_keys.md).
When adding an association to a model you must also add a foreign key. For
example, say you have the following model:
```ruby
class User < ActiveRecord::Base
has_many :posts
end
```
Add a foreign key here on column `posts.user_id`. This ensures
that data consistency is enforced on database level. Foreign keys also mean that
the database can very quickly remove associated data (for example, when removing a
user), instead of Rails having to do this.
## Adding Foreign Keys In Migrations
Foreign keys can be added concurrently using `add_concurrent_foreign_key` as
defined in `Gitlab::Database::MigrationHelpers`. See the [Migration Style
Guide](migration_style_guide.md) for more information.
Keep in mind that you can only safely add foreign keys to existing tables after
you have removed any orphaned rows. The method `add_concurrent_foreign_key`
does not take care of this so you must do so manually. See
[adding foreign key constraint to an existing column](database/add_foreign_key_to_existing_column.md).
## Updating Foreign Keys In Migrations
Sometimes a foreign key constraint must be changed, preserving the column
but updating the constraint condition. For example, moving from
`ON DELETE CASCADE` to `ON DELETE SET NULL` or vice-versa.
PostgreSQL does not prevent you from adding overlapping foreign keys. It
honors the most recently added constraint. This allows us to replace foreign keys without
ever losing foreign key protection on a column.
To replace a foreign key:
1. [Add the new foreign key without validation](database/add_foreign_key_to_existing_column.md#prevent-invalid-records)
The name of the foreign key constraint must be changed to add a new
foreign key before removing the old one.
```ruby
class ReplaceFkOnPackagesPackagesProjectId < Gitlab::Database::Migration[2.0]
disable_ddl_transaction!
NEW_CONSTRAINT_NAME = 'fk_new'
def up
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :nullify, validate: false, name: NEW_CONSTRAINT_NAME)
end
def down
with_lock_retries do
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :nullify, name: NEW_CONSTRAINT_NAME)
end
end
end
```
1. [Validate the new foreign key](database/add_foreign_key_to_existing_column.md#validate-the-foreign-key)
```ruby
class ValidateFkNew < Gitlab::Database::Migration[2.0]
NEW_CONSTRAINT_NAME = 'fk_new'
# foreign key added in <link to MR or path to migration adding new FK>
def up
validate_foreign_key(:packages_packages, name: NEW_CONSTRAINT_NAME)
end
def down
# no-op
end
end
```
1. Remove the old foreign key:
```ruby
class RemoveFkOld < Gitlab::Database::Migration[2.0]
OLD_CONSTRAINT_NAME = 'fk_old'
# new foreign key added in <link to MR or path to migration adding new FK>
# and validated in <link to MR or path to migration validating new FK>
def up
remove_foreign_key_if_exists(:packages_packages, column: :project_id, on_delete: :cascade, name: OLD_CONSTRAINT_NAME)
end
def down
# Validation is skipped here, so if rolled back, this will need to be revalidated in a separate migration
add_concurrent_foreign_key(:packages_packages, :projects, column: :project_id, on_delete: :cascade, validate: false, name: OLD_CONSTRAINT_NAME)
end
end
```
## Cascading Deletes
Every foreign key must define an `ON DELETE` clause, and in 99% of the cases
this should be set to `CASCADE`.
## Indexes
When adding a foreign key in PostgreSQL the column is not indexed automatically,
thus you must also add a concurrent index. Not doing so results in cascading
deletes being very slow.
## Naming foreign keys
By default Ruby on Rails uses the `_id` suffix for foreign keys. So we should
only use this suffix for associations between two tables. If you want to
reference an ID on a third party platform the `_xid` suffix is recommended.
The spec `spec/db/schema_spec.rb` tests if all columns with the `_id` suffix
have a foreign key constraint. So if that spec fails, don't add the column to
`IGNORED_FK_COLUMNS`, but instead add the FK constraint, or consider naming it
differently.
## Dependent Removals
Don't define options such as `dependent: :destroy` or `dependent: :delete` when
defining an association. Defining these options means Rails handles the
removal of data, instead of letting the database handle this in the most
efficient way possible.
In other words, this is bad and should be avoided at all costs:
```ruby
class User < ActiveRecord::Base
has_many :posts, dependent: :destroy
end
```
Should you truly have a need for this it should be approved by a database
specialist first.
You should also not define any `before_destroy` or `after_destroy` callbacks on
your models _unless_ absolutely required and only when approved by database
specialists. For example, if each row in a table has a corresponding file on a
file system it may be tempting to add a `after_destroy` hook. This however
introduces non database logic to a model, and means we can no longer rely on
foreign keys to remove the data as this would result in the file system data
being left behind. In such a case you should use a service class instead that
takes care of removing non database data.
In cases where the relation spans multiple databases you have even
further problems using `dependent: :destroy` or the above hooks. You can
read more about alternatives at [Avoid `dependent: :nullify` and
`dependent: :destroy` across
databases](database/multiple_databases.md#avoid-dependent-nullify-and-dependent-destroy-across-databases).
## Alternative primary keys with `has_one` associations
Sometimes a `has_one` association is used to create a one-to-one relationship:
```ruby
class User < ActiveRecord::Base
has_one :user_config
end
class UserConfig < ActiveRecord::Base
belongs_to :user
end
```
In these cases, there may be an opportunity to remove the unnecessary `id`
column on the associated table, `user_config.id` in this example. Instead,
the originating table ID can be used as the primary key for the associated
table:
```ruby
create_table :user_configs, id: false do |t|
t.references :users, primary_key: true, default: nil, index: false, foreign_key: { on_delete: :cascade }
...
end
```
Setting `default: nil` ensures a primary key sequence is not created, and since the primary key
automatically gets an index, we set `index: false` to avoid creating a duplicate.
You also need to add the new primary key to the model:
```ruby
class UserConfig < ActiveRecord::Base
self.primary_key = :user_id
belongs_to :user
end
```
Using a foreign key as primary key saves space but can make
[batch counting](service_ping/implement.md#batch-counters) in [Service Ping](service_ping/index.md) less efficient.
Consider using a regular `id` column if the table is relevant for Service Ping.
<!-- This redirect file can be deleted after <2022-11-05>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,196 +1,11 @@
---
stage: Data Stores
group: Database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
description: "Sometimes it is necessary to store large amounts of records at once, which can be inefficient
when iterating collections and performing individual `save`s. With the arrival of `insert_all`
in Rails 6, which operates at the row level (that is, using `Hash`es), GitLab has added a set
of APIs that make it safe and simple to insert ActiveRecord objects in bulk."
redirect_to: 'database/insert_into_tables_in_batches.md'
remove_date: '2022-11-05'
---
# Insert into tables in batches
This document was moved to [another location](database/insert_into_tables_in_batches.md).
Sometimes it is necessary to store large amounts of records at once, which can be inefficient
when iterating collections and saving each record individually. With the arrival of
[`insert_all`](https://apidock.com/rails/ActiveRecord/Persistence/ClassMethods/insert_all)
in Rails 6, which operates at the row level (that is, using `Hash` objects), GitLab has added a set
of APIs that make it safe and simple to insert `ActiveRecord` objects in bulk.
## Prepare `ApplicationRecord`s for bulk insertion
In order for a model class to take advantage of the bulk insertion API, it has to include the
`BulkInsertSafe` concern first:
```ruby
class MyModel < ApplicationRecord
# other includes here
# ...
include BulkInsertSafe # include this last
# ...
end
```
The `BulkInsertSafe` concern has two functions:
- It performs checks against your model class to ensure that it does not use ActiveRecord
APIs that are not safe to use with respect to bulk insertions (more on that below).
- It adds new class methods `bulk_insert!` and `bulk_upsert!`, which you can use to insert many records at once.
## Insert records with `bulk_insert!` and `bulk_upsert!`
If the target class passes the checks performed by `BulkInsertSafe`, you can insert an array of
ActiveRecord model objects as follows:
```ruby
records = [MyModel.new, ...]
MyModel.bulk_insert!(records)
```
Calls to `bulk_insert!` always attempt to insert _new records_. If instead
you would like to replace existing records with new values, while still inserting those
that do not already exist, then you can use `bulk_upsert!`:
```ruby
records = [MyModel.new, existing_model, ...]
MyModel.bulk_upsert!(records, unique_by: [:name])
```
In this example, `unique_by` specifies the columns by which records are considered to be
unique and as such are updated if they existed prior to insertion. For example, if
`existing_model` has a `name` attribute, and if a record with the same `name` value already
exists, its fields are updated with those of `existing_model`.
The `unique_by` parameter can also be passed as a `Symbol`, in which case it specifies
a database index by which a column is considered unique:
```ruby
MyModel.bulk_insert!(records, unique_by: :index_on_name)
```
### Record validation
The `bulk_insert!` method guarantees that `records` are inserted transactionally, and
runs validations on each record prior to insertion. If any record fails to validate,
an error is raised and the transaction is rolled back. You can turn off validations via
the `:validate` option:
```ruby
MyModel.bulk_insert!(records, validate: false)
```
### Batch size configuration
In those cases where the number of `records` is above a given threshold, insertions
occur in multiple batches. The default batch size is defined in
[`BulkInsertSafe::DEFAULT_BATCH_SIZE`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
Assuming a default threshold of 500, inserting 950 records
would result in two batches being written sequentially (of size 500 and 450 respectively.)
You can override the default batch size via the `:batch_size` option:
```ruby
MyModel.bulk_insert!(records, batch_size: 100)
```
Assuming the same number of 950 records, this would result in 10 batches being written instead.
Since this also affects the number of `INSERT` statements that occur, make sure you measure the
performance impact this might have on your code. There is a trade-off between the number of
`INSERT` statements the database has to process and the size and cost of each `INSERT`.
### Handling duplicate records
NOTE:
This parameter applies only to `bulk_insert!`. If you intend to update existing
records, use `bulk_upsert!` instead.
It may happen that some records you are trying to insert already exist, which would result in
primary key conflicts. There are two ways to address this problem: failing fast by raising an
error or skipping duplicate records. The default behavior of `bulk_insert!` is to fail fast
and raise an `ActiveRecord::RecordNotUnique` error.
If this is undesirable, you can instead skip duplicate records with the `skip_duplicates` flag:
```ruby
MyModel.bulk_insert!(records, skip_duplicates: true)
```
### Requirements for safe bulk insertions
Large parts of ActiveRecord's persistence API are built around the notion of callbacks. Many
of these callbacks fire in response to model life cycle events such as `save` or `create`.
These callbacks cannot be used with bulk insertions, since they are meant to be called for
every instance that is saved or created. Since these events do not fire when
records are inserted in bulk, we currently prevent their use.
The specifics around which callbacks are explicitly allowed are defined in
[`BulkInsertSafe`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/concerns/bulk_insert_safe.rb).
Consult the module source code for details. If your class uses callbacks that are not explicitly designated
safe and you `include BulkInsertSafe` the application fails with an error.
### `BulkInsertSafe` versus `InsertAll`
Internally, `BulkInsertSafe` is based on `InsertAll`, and you may wonder when to choose
the former over the latter. To help you make the decision,
the key differences between these classes are listed in the table below.
| | Input type | Validates input | Specify batch size | Can bypass callbacks | Transactional |
|--------------- | -------------------- | --------------- | ------------------ | --------------------------------- | ------------- |
| `bulk_insert!` | ActiveRecord objects | Yes (optional) | Yes (optional) | No (prevents unsafe callback use) | Yes |
| `insert_all!` | Attribute hashes | No | No | Yes | Yes |
To summarize, `BulkInsertSafe` moves bulk inserts closer to how ActiveRecord objects
and inserts would normally behave. However, if all you need is to insert raw data in bulk, then
`insert_all` is more efficient.
## Insert `has_many` associations in bulk
A common use case is to save collections of associated relations through the owner side of the relation,
where the owned relation is associated to the owner through the `has_many` class method:
```ruby
owner = OwnerModel.new(owned_relations: array_of_owned_relations)
# saves all `owned_relations` one-by-one
owner.save!
```
This issues a single `INSERT`, and transaction, for every record in `owned_relations`, which is inefficient if
`array_of_owned_relations` is large. To remedy this, the `BulkInsertableAssociations` concern can be
used to declare that the owner defines associations that are safe for bulk insertion:
```ruby
class OwnerModel < ApplicationRecord
# other includes here
# ...
include BulkInsertableAssociations # include this last
has_many :my_models
end
```
Here `my_models` must be declared `BulkInsertSafe` (as described previously) for bulk insertions
to happen. You can now insert any yet unsaved records as follows:
```ruby
BulkInsertableAssociations.with_bulk_insert do
owner = OwnerModel.new(my_models: array_of_my_model_instances)
# saves `my_models` using a single bulk insert (possibly via multiple batches)
owner.save!
end
```
You can still save relations that are not `BulkInsertSafe` in this block; they
simply are treated as if you had invoked `save` from outside the block.
## Known limitations
There are a few restrictions to how these APIs can be used:
- `BulkInsertableAssociations`:
- It is currently only compatible with `has_many` relations.
- It does not yet support `has_many through: ...` relations.
Moreover, input data should either be limited to around 1000 records at most,
or already batched prior to calling bulk insert. The `INSERT` statement runs in a single
transaction, so for large amounts of records it may negatively affect database stability.
<!-- This redirect file can be deleted after <2022-11-05>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -1,193 +1,11 @@
---
stage: none
group: unassigned
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
redirect_to: 'database/namespaces_storage_statistics.md'
remove_date: '2022-11-05'
---
# Database case study: Namespaces storage statistics
This document was moved to [another location](database/namespaces_storage_statistics.md).
## Introduction
On [Storage and limits management for groups](https://gitlab.com/groups/gitlab-org/-/epics/886),
we want to facilitate a method for easily viewing the amount of
storage consumed by a group, and allow easy management.
## Proposal
1. Create a new ActiveRecord model to hold the namespaces' statistics in an aggregated form (only for root [namespaces](../user/namespace/index.md)).
1. Refresh the statistics in this model every time a project belonging to this namespace is changed.
## Problem
In GitLab, we update the project storage statistics through a
[callback](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/project.rb#L97)
every time the project is saved.
The summary of those statistics per namespace is then retrieved
by [`Namespaces#with_statistics`](https://gitlab.com/gitlab-org/gitlab/-/blob/4ab54c2233e91f60a80e5b6fa2181e6899fdcc3e/app/models/namespace.rb#L70) scope. Analyzing this query we noticed that:
- It takes up to `1.2` seconds for namespaces with over `15k` projects.
- It can't be analyzed with [ChatOps](chatops_on_gitlabcom.md), as it times out.
Additionally, the pattern that is currently used to update the project statistics
(the callback) doesn't scale adequately. It is currently one of the largest
[database queries transactions on production](https://gitlab.com/gitlab-org/gitlab/-/issues/29070)
that takes the most time overall. We can't add one more query to it as
it increases the transaction's length.
Because of all of the above, we can't apply the same pattern to store
and update the namespaces statistics, as the `namespaces` table is one
of the largest tables on GitLab.com. Therefore we needed to find a performant and
alternative method.
## Attempts
### Attempt A: PostgreSQL materialized view
Model can be updated through a refresh strategy based on a project routes SQL and a [materialized view](https://www.postgresql.org/docs/11/rules-materializedviews.html):
```sql
SELECT split_part("rs".path, '/', 1) as root_path,
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
FROM "projects"
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
INNER JOIN project_statistics ps ON ps.project_id = projects.id
GROUP BY root_path
```
We could then execute the query with:
```sql
REFRESH MATERIALIZED VIEW root_namespace_storage_statistics;
```
While this implied a single query update (and probably a fast one), it has some downsides:
- Materialized views syntax varies from PostgreSQL and MySQL. While this feature was worked on, MySQL was still supported by GitLab.
- Rails does not have native support for materialized views. We'd need to use a specialized gem to take care of the management of the database views, which implies additional work.
### Attempt B: An update through a CTE
Similar to Attempt A: Model update done through a refresh strategy with a [Common Table Expression](https://www.postgresql.org/docs/9.1/queries-with.html)
```sql
WITH refresh AS (
SELECT split_part("rs".path, '/', 1) as root_path,
COALESCE(SUM(ps.storage_size), 0) AS storage_size,
COALESCE(SUM(ps.repository_size), 0) AS repository_size,
COALESCE(SUM(ps.wiki_size), 0) AS wiki_size,
COALESCE(SUM(ps.lfs_objects_size), 0) AS lfs_objects_size,
COALESCE(SUM(ps.build_artifacts_size), 0) AS build_artifacts_size,
COALESCE(SUM(ps.pipeline_artifacts_size), 0) AS pipeline_artifacts_size,
COALESCE(SUM(ps.packages_size), 0) AS packages_size,
COALESCE(SUM(ps.snippets_size), 0) AS snippets_size,
COALESCE(SUM(ps.uploads_size), 0) AS uploads_size
FROM "projects"
INNER JOIN routes rs ON rs.source_id = projects.id AND rs.source_type = 'Project'
INNER JOIN project_statistics ps ON ps.project_id = projects.id
GROUP BY root_path)
UPDATE namespace_storage_statistics
SET storage_size = refresh.storage_size,
repository_size = refresh.repository_size,
wiki_size = refresh.wiki_size,
lfs_objects_size = refresh.lfs_objects_size,
build_artifacts_size = refresh.build_artifacts_size,
pipeline_artifacts_size = refresh.pipeline_artifacts_size,
packages_size = refresh.packages_size,
snippets_size = refresh.snippets_size,
uploads_size = refresh.uploads_size
FROM refresh
INNER JOIN routes rs ON rs.path = refresh.root_path AND rs.source_type = 'Namespace'
WHERE namespace_storage_statistics.namespace_id = rs.source_id
```
Same benefits and downsides as attempt A.
### Attempt C: Get rid of the model and store the statistics on Redis
We could get rid of the model that stores the statistics in aggregated form and instead use a Redis Set.
This would be the [boring solution](https://about.gitlab.com/handbook/values/#boring-solutions) and the fastest one
to implement, as GitLab already includes Redis as part of its [Architecture](architecture.md#redis).
The downside of this approach is that Redis does not provide the same persistence/consistency guarantees as PostgreSQL,
and this is information we can't afford to lose in a Redis failure.
### Attempt D: Tag the root namespace and its child namespaces
Directly relate the root namespace to its child namespaces, so
whenever a namespace is created without a parent, this one is tagged
with the root namespace ID:
| ID | root ID | parent ID |
|:---|:--------|:----------|
| 1 | 1 | NULL |
| 2 | 1 | 1 |
| 3 | 1 | 2 |
To aggregate the statistics inside a namespace we'd execute something like:
```sql
SELECT COUNT(...)
FROM projects
WHERE namespace_id IN (
SELECT id
FROM namespaces
WHERE root_id = X
)
```
Even though this approach would make aggregating much easier, it has some major downsides:
- We'd have to migrate **all namespaces** by adding and filling a new column. Because of the size of the table, dealing with time/cost would be significant. The background migration would take approximately `153h`, see <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/29772>.
- Background migration has to be shipped one release before, delaying the functionality by another milestone.
### Attempt E (final): Update the namespace storage statistics asynchronously
This approach consists of continuing to use the incremental statistics updates we already have,
but we refresh them through Sidekiq jobs and in different transactions:
1. Create a second table (`namespace_aggregation_schedules`) with two columns `id` and `namespace_id`.
1. Whenever the statistics of a project changes, insert a row into `namespace_aggregation_schedules`
- We don't insert a new row if there's already one related to the root namespace.
- Keeping in mind the length of the transaction that involves updating `project_statistics`(<https://gitlab.com/gitlab-org/gitlab/-/issues/29070>), the insertion should be done in a different transaction and through a Sidekiq Job.
1. After inserting the row, we schedule another worker to be executed asynchronously at two different moments:
- One enqueued for immediate execution and another one scheduled in `1.5h` hours.
- We only schedule the jobs, if we can obtain a `1.5h` lease on Redis on a key based on the root namespace ID.
- If we can't obtain the lease, it indicates there's another aggregation already in progress, or scheduled in no more than `1.5h`.
1. This worker will:
- Update the root namespace storage statistics by querying all the namespaces through a service.
- Delete the related `namespace_aggregation_schedules` after the update.
1. Another Sidekiq job is also included to traverse any remaining rows on the `namespace_aggregation_schedules` table and schedule jobs for every pending row.
- This job is scheduled with cron to run every night (UTC).
This implementation has the following benefits:
- All the updates are done asynchronously, so we're not increasing the length of the transactions for `project_statistics`.
- We're doing the update in a single SQL query.
- It is compatible with PostgreSQL and MySQL.
- No background migration required.
The only downside of this approach is that namespaces' statistics are updated up to `1.5` hours after the change is done,
which means there's a time window in which the statistics are inaccurate. Because we're still not
[enforcing storage limits](https://gitlab.com/gitlab-org/gitlab/-/issues/17664), this is not a major problem.
## Conclusion
Updating the storage statistics asynchronously, was the less problematic and
performant approach of aggregating the root namespaces.
All the details regarding this use case can be found on:
- <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/62214>
- Merge Request with the implementation: <https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/28996>
Performance of the namespace storage statistics were measured in staging and production (GitLab.com). All results were posted
on <https://gitlab.com/gitlab-org/gitlab-foss/-/issues/64092>: No problem has been reported so far.
<!-- This redirect file can be deleted after <2022-11-05>. -->
<!-- Redirects that point to other docs in the same project expire in three months. -->
<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->

View File

@ -932,7 +932,7 @@ Models](mass_insert.md) functionality.
Assuming you are working with ActiveRecord models, you might also find these links helpful:
- [Insert records in batches](insert_into_tables_in_batches.md)
- [Insert records in batches](database/insert_into_tables_in_batches.md)
- [BulkInsert gem](https://github.com/jamis/bulk_insert)
- [ActiveRecord::PgGenerateSeries gem](https://github.com/ryu39/active_record-pg_generate_series)

View File

@ -316,6 +316,28 @@ services:
alias: docker
```
### Issues when authenticating to the Dependency Proxy from CI/CD jobs
GitLab Runner will automatically authenticate to the Dependency Proxy. However, the underlying Docker engine is still subject to its [authorization resolving process](https://gitlab.com/gitlab-org/gitlab-runner/-/blob/main/docs/configuration/advanced-configuration.md#precedence-of-docker-authorization-resolving).
Misconfigurations in the authentication mechanism may cause `HTTP Basic: Access denied` and `403: Access forbidden` errors.
You can use the job logs to view the authentication mechanism used to authenticate against the Dependency Proxy:
```plaintext
Authenticating with credentials from $DOCKER_AUTH_CONFIG
```
```plaintext
Authenticating with credentials from /root/.docker/config.json
```
```plaintext
Authenticating with credentials from job payload (GitLab Registry)
```
Make sure you are using the expected authentication mechanism.
### "Not Found" error when pulling image
Docker errors similar to the following may indicate that the user running the build job doesn't have

View File

@ -10,6 +10,8 @@ module API
before do
require_repository_enabled!
authorize! :download_code, user_project
verify_pagination_params!
end
helpers do

View File

@ -20,6 +20,26 @@ module API
optional :page, type: Integer, default: 1, desc: 'Current page number'
optional :per_page, type: Integer, default: 20, desc: 'Number of items per page', except_values: [0]
end
def verify_pagination_params!
return if Feature.disabled?(:only_positive_pagination_values)
page = begin
Integer(params[:page])
rescue ArgumentError, TypeError
nil
end
return render_structured_api_error!({ error: 'page does not have a valid value' }, 400) if page&.< 1
per_page = begin
Integer(params[:per_page])
rescue ArgumentError, TypeError
nil
end
return render_structured_api_error!({ error: 'per_page does not have a valid value' }, 400) if per_page&.< 1
end
end
end
end

View File

@ -145,7 +145,7 @@ module RuboCop
return unless node.children[opts_index]
node.children[opts_index].each_pair.find do |pair|
pair.key.value == :feature_flag
pair.key.value == :_deprecated_feature_flag
end&.value
else
arg_index = rugged_method?(node) ? 3 : 2

View File

@ -24,7 +24,7 @@ RSpec.describe 'Graphql Field feature flags' do
let(:query_type) do
query_factory do |query|
query.field :item, type, null: true, feature_flag: feature_flag, resolver: new_resolver(test_object)
query.field :item, type, null: true, _deprecated_feature_flag: feature_flag, resolver: new_resolver(test_object)
end
end

View File

@ -209,7 +209,7 @@ RSpec.describe Types::BaseField do
describe '#visible?' do
context 'and has a feature_flag' do
let(:flag) { :test_feature }
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, feature_flag: flag, null: false) }
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, _deprecated_feature_flag: flag, null: false) }
let(:context) { {} }
before do
@ -253,7 +253,7 @@ RSpec.describe Types::BaseField do
describe '#description' do
context 'feature flag given' do
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, feature_flag: flag, null: false, description: 'Test description.') }
let(:field) { described_class.new(name: 'test', type: GraphQL::Types::String, _deprecated_feature_flag: flag, null: false, description: 'Test description.') }
let(:flag) { :test_flag }
it 'prepends the description' do
@ -313,11 +313,11 @@ RSpec.describe Types::BaseField do
described_class.new(**base_args.merge(args))
end
it 'interacts well with the `feature_flag` property' do
it 'interacts well with the `_deprecated_feature_flag` property' do
field = subject(
deprecated: { milestone: '1.10', reason: 'Deprecation reason' },
description: 'Field description.',
feature_flag: 'foo_flag'
_deprecated_feature_flag: 'foo_flag'
)
expect(field.description).to start_with('Field description. Available only when feature flag `foo_flag` is enabled.')

View File

@ -206,11 +206,13 @@ RSpec.describe API::Commits do
let(:page) { 1 }
let(:per_page) { 5 }
let(:ref_name) { 'master' }
let!(:request) do
let(:request) do
get api("/projects/#{project_id}/repository/commits?page=#{page}&per_page=#{per_page}&ref_name=#{ref_name}", user)
end
it 'returns correct headers' do
request
expect(response).to include_limited_pagination_headers
expect(response.headers['Link']).to match(/page=1&per_page=5/)
expect(response.headers['Link']).to match(/page=2&per_page=5/)
@ -218,6 +220,8 @@ RSpec.describe API::Commits do
context 'viewing the first page' do
it 'returns the first 5 commits' do
request
commit = project.repository.commit
expect(json_response.size).to eq(per_page)
@ -230,6 +234,8 @@ RSpec.describe API::Commits do
let(:page) { 3 }
it 'returns the third 5 commits' do
request
commit = project.repository.commits('HEAD', limit: per_page, offset: (page - 1) * per_page).first
expect(json_response.size).to eq(per_page)
@ -238,10 +244,55 @@ RSpec.describe API::Commits do
end
end
context 'when per_page is 0' do
let(:per_page) { 0 }
context 'when pagination params are invalid' do
let_it_be(:project) { create(:project, :repository) }
it_behaves_like '400 response'
using RSpec::Parameterized::TableSyntax
where(:page, :per_page, :error_message) do
0 | nil | 'page does not have a valid value'
-1 | nil | 'page does not have a valid value'
'a' | nil | 'page is invalid'
nil | 0 | 'per_page does not have a valid value'
nil | -1 | 'per_page does not have a valid value'
nil | 'a' | 'per_page is invalid'
end
with_them do
it 'returns 400 response' do
request
expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response['error']).to eq(error_message)
end
end
context 'when FF is off' do
before do
stub_feature_flags(only_positive_pagination_values: false)
end
where(:page, :per_page, :error_message, :status) do
0 | nil | nil | :success
-10 | nil | nil | :internal_server_error
'a' | nil | 'page is invalid' | :bad_request
nil | 0 | 'per_page has a value not allowed' | :bad_request
nil | -1 | nil | :success
nil | 'a' | 'per_page is invalid' | :bad_request
end
with_them do
it 'returns a response' do
request
expect(response).to have_gitlab_http_status(status)
if error_message
expect(json_response['error']).to eq(error_message)
end
end
end
end
end
end

View File

@ -217,8 +217,8 @@ RSpec.describe RuboCop::Cop::Gitlab::MarkUsedFeatureFlags do
allow(cop).to receive(:in_graphql_types?).and_return(true)
end
include_examples 'sets flag as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, feature_flag: :foo', 'foo'
include_examples 'sets flag as used', 'field :runners, null: true, feature_flag: :foo', 'foo'
include_examples 'sets flag as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, _deprecated_feature_flag: :foo', 'foo'
include_examples 'sets flag as used', 'field :runners, null: true, _deprecated_feature_flag: :foo', 'foo'
include_examples 'does not set any flags as used', 'field :solution'
include_examples 'does not set any flags as used', 'field :runners, Types::Ci::RunnerType.connection_type'
include_examples 'does not set any flags as used', 'field :runners, Types::Ci::RunnerType.connection_type, null: true, description: "hello world"'