From 0d8df625a2ee3562cf1b2bdac46a429f69b06daf Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Fri, 28 May 2021 12:10:48 +0000 Subject: [PATCH] Add latest changes from gitlab-org/gitlab@master --- .../operations/extra_sidekiq_processes.md | 81 +-------- .../operations/extra_sidekiq_routing.md | 164 ++++++++++++++++++ doc/administration/operations/index.md | 1 + doc/administration/sidekiq.md | 1 + .../import_export/base/relation_factory.rb | 5 + lib/gitlab/sidekiq_config/worker_router.rb | 2 +- spec/fixtures/bulk_imports/epics.ndjson.gz | Bin 0 -> 2690 bytes .../base/relation_factory_spec.rb | 9 + spec/models/merge_request_spec.rb | 6 +- 9 files changed, 188 insertions(+), 81 deletions(-) create mode 100644 doc/administration/operations/extra_sidekiq_routing.md create mode 100644 spec/fixtures/bulk_imports/epics.ndjson.gz diff --git a/doc/administration/operations/extra_sidekiq_processes.md b/doc/administration/operations/extra_sidekiq_processes.md index 8b6a558ecbd..cf058a85e72 100644 --- a/doc/administration/operations/extra_sidekiq_processes.md +++ b/doc/administration/operations/extra_sidekiq_processes.md @@ -116,83 +116,10 @@ you list: > - [Sidekiq cluster, including queue selector, moved](https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/181) to GitLab Free in 12.10. > - [Renamed from `experimental_queue_selector` to `queue_selector`](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/147) in GitLab 13.6. -In addition to selecting queues by name, as above, the `queue_selector` -option allows queue groups to be selected in a more general way using -the following components: - -- Attributes that can be selected. -- Operators used to construct a query. - -When `queue_selector` is set, all `queue_groups` must be in the queue -selector syntax. - -### Available attributes - -- [Introduced](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/261) in GitLab 13.1, `tags`. - -From the [list of all available -attributes](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/all_queues.yml), -`queue_selector` allows selecting of queues by the following attributes: - -- `feature_category` - the [GitLab feature - category](https://about.gitlab.com/direction/maturity/#category-maturity) the - queue belongs to. For example, the `merge` queue belongs to the - `source_code_management` category. -- `has_external_dependencies` - whether or not the queue connects to external - services. For example, all importers have this set to `true`. -- `urgency` - how important it is that this queue's jobs run - quickly. Can be `high`, `low`, or `throttled`. For example, the - `authorized_projects` queue is used to refresh user permissions, and - is high urgency. -- `worker_name` - the worker name. The other attributes are typically more useful as - they are more general, but this is available in case a particular worker needs - to be selected. -- `name` - the queue name. Similarly, this is available in case a particular queue needs - to be selected. -- `resource_boundary` - if the queue is bound by `cpu`, `memory`, or - `unknown`. For example, the `project_export` queue is memory bound as it has - to load data in memory before saving it for export. -- `tags` - short-lived annotations for queues. These are expected to frequently - change from release to release, and may be removed entirely. - -`has_external_dependencies` is a boolean attribute: only the exact -string `true` is considered true, and everything else is considered -false. - -`tags` is a set, which means that `=` checks for intersecting sets, and -`!=` checks for disjoint sets. For example, `tags=a,b` selects queues -that have tags `a`, `b`, or both. `tags!=a,b` selects queues that have -neither of those tags. - -### Available operators - -`queue_selector` supports the following operators, listed from highest -to lowest precedence: - -- `|` - the logical OR operator. For example, `query_a|query_b` (where `query_a` - and `query_b` are queries made up of the other operators here) will include - queues that match either query. -- `&` - the logical AND operator. For example, `query_a&query_b` (where - `query_a` and `query_b` are queries made up of the other operators here) will - only include queues that match both queries. -- `!=` - the NOT IN operator. For example, `feature_category!=issue_tracking` - excludes all queues from the `issue_tracking` feature category. -- `=` - the IN operator. For example, `resource_boundary=cpu` includes all - queues that are CPU bound. -- `,` - the concatenate set operator. For example, - `feature_category=continuous_integration,pages` includes all queues from - either the `continuous_integration` category or the `pages` category. This - example is also possible using the OR operator, but allows greater brevity, as - well as being lower precedence. - -The operator precedence for this syntax is fixed: it's not possible to make AND -have higher precedence than OR. - -[In GitLab 12.9](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26594) and -later, as with the standard queue group syntax above, a single `*` as the -entire queue group selects all queues. - -### Example queries +In addition to selecting queues by name, as above, the `queue_selector` option +allows queue groups to be selected in a more general way using a [worker matching +query](extra_sidekiq_routing.md#worker-matching-query). After `queue_selector` +is set, all `queue_groups` must follow the aforementioned syntax. In `/etc/gitlab/gitlab.rb`: diff --git a/doc/administration/operations/extra_sidekiq_routing.md b/doc/administration/operations/extra_sidekiq_routing.md new file mode 100644 index 00000000000..93cf8bd4f43 --- /dev/null +++ b/doc/administration/operations/extra_sidekiq_routing.md @@ -0,0 +1,164 @@ +--- +stage: Enablement +group: Distribution +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments +--- + +# Queue routing rules **(FREE SELF)** + +When the number of Sidekiq jobs increases to a certain scale, the system faces +some scalability issues. One of them is that the length of the queue tends to get +longer. High-urgency jobs have to wait longer until other less urgent jobs +finish. This head-of-line blocking situation may eventually affect the +responsiveness of the system, especially critical actions. In another scenario, +the performance of some jobs is degraded due to other long running or CPU-intensive jobs +(computing or rendering ones) in the same machine. + +To counter the aforementioned issues, one effective solution is to split +Sidekiq jobs into different queues and assign machines handling each queue +exclusively. For example, all CPU-intensive jobs could be routed to the +`cpu-bound` queue and handled by a fleet of CPU optimized instances. The queue +topology differs between companies depending on the workloads and usage +patterns. Therefore, GitLab supports a flexible mechanism for the +administrator to route the jobs based on their characteristics. + +As an alternative to [Queue selector](extra_sidekiq_processes.md#queue-selector), which +configures Sidekiq cluster to listen to a specific set of workers or queues, +GitLab also supports routing a job from a worker to the desired queue when it +is scheduled. Sidekiq clients try to match a job against a configured list of +routing rules. Rules are evaluated from first to last, and as soon as we find a +match for a given worker we stop processing for that worker (first match wins). +If the worker doesn't match any rule, it falls back to the queue name generated +from the worker name. + +By default, if the routing rules are not configured (or denoted with an empty +array), all the jobs are routed to the queue generated from the worker name. + +## Example configuration + +In `/etc/gitlab/gitlab.rb`: + +```ruby +sidekiq['routing_rules'] = [ + # Route all non-CPU-bound workers that are high urgency to `high-urgency` queue + ['resource_boundary!=cpu&urgency=high', 'high-urgency'], + # Route all database, gitaly and global search workers that are throttled to `throttled` queue + ['feature_category=database,gitaly,global_search&urgency=throttled', 'throttled'], + # Route all workers having contact with outside work to a `network-intenstive` queue + ['has_external_dependencies=true|feature_category=hooks|tags=network', 'network-intensive'], + # Route all import workers to the queues generated by the worker name, for + # example, JiraImportWorker to `jira_import`, SVNWorker to `svn_worker` + ['feature_category=import', nil], + # Wildcard matching, route the rest to `default` queue + ['*', 'default'] +] +``` + +The routing rules list is an order-matter array of tuples of query and +corresponding queue: + +- The query is following a [worker matching query](#worker-matching-query) syntax. +- The `` must be a valid Sidekiq queue name. If the queue name + is `nil`, or an empty string, the worker is routed to the queue generated + by the name of the worker instead. + +The query supports wildcard matching `*`, which matches all workers. As a +result, the wildcard query must stay at the end of the list or the rules after it +are ignored. + +NOTE: +Mixing queue routing rules and queue selectors requires care to +ensure all jobs that are scheduled and picked up by appropriate Sidekiq +workers. + +## Worker matching query + +GitLab provides a simple query syntax to match a worker based on its +attributes. This query syntax is employed by both [Queue routing +rules](#queue-routing-rules) and [Queue +selector](extra_sidekiq_processes.md#queue-selector). A query includes two +components: + +- Attributes that can be selected. +- Operators used to construct a query. + +### Available attributes + +> [Introduced](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/261) in GitLab 13.1 (`tags`). + +Queue matching query works upon the worker attributes, described in [Sidekiq +style guide](../../development/sidekiq_style_guide.md). We support querying +based on a subset of worker attributes: + +- `feature_category` - the [GitLab feature + category](https://about.gitlab.com/direction/maturity/#category-maturity) the + queue belongs to. For example, the `merge` queue belongs to the + `source_code_management` category. +- `has_external_dependencies` - whether or not the queue connects to external + services. For example, all importers have this set to `true`. +- `urgency` - how important it is that this queue's jobs run + quickly. Can be `high`, `low`, or `throttled`. For example, the + `authorized_projects` queue is used to refresh user permissions, and + is high urgency. +- `worker_name` - the worker name. The other attributes are typically more useful as + they are more general, but this is available in case a particular worker needs + to be selected. +- `name` - the queue name. The other attributes are typically more useful as + they are more general, but this is available in case a particular queue needs + to be selected. +- `resource_boundary` - if the queue is bound by `cpu`, `memory`, or + `unknown`. For example, the `ProjectExportWorker` is memory bound as it has + to load data in memory before saving it for export. +- `tags` - short-lived annotations for queues. These are expected to frequently + change from release to release, and may be removed entirely. + +`has_external_dependencies` is a boolean attribute: only the exact +string `true` is considered true, and everything else is considered +false. + +`tags` is a set, which means that `=` checks for intersecting sets, and +`!=` checks for disjoint sets. For example, `tags=a,b` selects queues +that have tags `a`, `b`, or both. `tags!=a,b` selects queues that have +neither of those tags. + +The attributes of each worker are hard-coded in the source code. For +convenience, we generate a [list of all available attributes in +GitLab Community Edition](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/all_queues.yml) +and a [list of all available attributes in +GitLab Enterprise Edition](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/app/workers/all_queues.yml). + +### Available operators + +`queue_selector` supports the following operators, listed from highest +to lowest precedence: + +- `|` - the logical OR operator. For example, `query_a|query_b` (where `query_a` + and `query_b` are queries made up of the other operators here) will include + queues that match either query. +- `&` - the logical AND operator. For example, `query_a&query_b` (where + `query_a` and `query_b` are queries made up of the other operators here) will + only include queues that match both queries. +- `!=` - the NOT IN operator. For example, `feature_category!=issue_tracking` + excludes all queues from the `issue_tracking` feature category. +- `=` - the IN operator. For example, `resource_boundary=cpu` includes all + queues that are CPU bound. +- `,` - the concatenate set operator. For example, + `feature_category=continuous_integration,pages` includes all queues from + either the `continuous_integration` category or the `pages` category. This + example is also possible using the OR operator, but allows greater brevity, as + well as being lower precedence. + +The operator precedence for this syntax is fixed: it's not possible to make AND +have higher precedence than OR. + +[In GitLab 12.9](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/26594) and +later, as with the standard queue group syntax above, a single `*` as the +entire queue group selects all queues. + +### Migration + +After the Sidekiq routing rules are changed, administrators need to take care +with the migration to avoid losing jobs entirely, especially in a system with +long queues of jobs. The migration can be done by following the migration steps +mentioned in [Sidekiq job +migration](../../raketasks/sidekiq_job_migration.md) diff --git a/doc/administration/operations/index.md b/doc/administration/operations/index.md index 268c9281d87..c735a654b3e 100644 --- a/doc/administration/operations/index.md +++ b/doc/administration/operations/index.md @@ -21,6 +21,7 @@ Keep your GitLab instance up and running smoothly. - [Sidekiq MemoryKiller](sidekiq_memory_killer.md): Configure Sidekiq MemoryKiller to restart Sidekiq. - [Multiple Sidekiq processes](extra_sidekiq_processes.md): Configure multiple Sidekiq processes to ensure certain queues always have dedicated workers, no matter the number of jobs that need to be processed. **(FREE SELF)** +- [Sidekiq routing rules](extra_sidekiq_routing.md): Configure the routing rules to route a job from a worker to a desirable queue. **(FREE SELF)** - [Puma](puma.md): Understand Puma and puma-worker-killer. - Speed up SSH operations by [Authorizing SSH users via a fast, indexed lookup to the GitLab database](fast_ssh_key_lookup.md), and/or diff --git a/doc/administration/sidekiq.md b/doc/administration/sidekiq.md index 352efeae738..12fb44c6b5c 100644 --- a/doc/administration/sidekiq.md +++ b/doc/administration/sidekiq.md @@ -187,4 +187,5 @@ gitlab_rails['monitoring_whitelist'] = ['10.10.1.42', '127.0.0.1'] Related Sidekiq configuration: 1. [Extra Sidekiq processes](operations/extra_sidekiq_processes.md) +1. [Extra Sidekiq routing](operations/extra_sidekiq_routing.md) 1. [Using the GitLab-Sidekiq chart](https://docs.gitlab.com/charts/charts/gitlab/sidekiq/) diff --git a/lib/gitlab/import_export/base/relation_factory.rb b/lib/gitlab/import_export/base/relation_factory.rb index 959ece4b903..30cd5ccfbcb 100644 --- a/lib/gitlab/import_export/base/relation_factory.rb +++ b/lib/gitlab/import_export/base/relation_factory.rb @@ -69,6 +69,7 @@ module Gitlab # the relation_hash, updating references with new object IDs, mapping users using # the "members_mapper" object, also updating notes if required. def create + return @relation_hash if author_relation? return if invalid_relation? || predefined_relation? setup_base_models @@ -95,6 +96,10 @@ module Gitlab relation_class.try(:predefined_id?, @relation_hash['id']) end + def author_relation? + @relation_name == :author + end + def setup_models raise NotImplementedError end diff --git a/lib/gitlab/sidekiq_config/worker_router.rb b/lib/gitlab/sidekiq_config/worker_router.rb index 946296a24d3..0670e5521df 100644 --- a/lib/gitlab/sidekiq_config/worker_router.rb +++ b/lib/gitlab/sidekiq_config/worker_router.rb @@ -40,7 +40,7 @@ module Gitlab # queue defined in the input routing rules. The input routing rules, as # described above, is an order-matter array of tuples [query, queue_name]. # - # - The query syntax is the same as the "queue selector" detailedly + # - The query syntax follows "worker matching query" detailedly # denoted in doc/administration/operations/extra_sidekiq_processes.md. # # - The queue_name must be a valid Sidekiq queue name. If the queue name diff --git a/spec/fixtures/bulk_imports/epics.ndjson.gz b/spec/fixtures/bulk_imports/epics.ndjson.gz new file mode 100644 index 0000000000000000000000000000000000000000..1f02a027c0c2ab99f6c948ac23b206afd3c09f0f GIT binary patch literal 2690 zcmV-|3Vrn-iwFoa=BHo)17&b&V{XRIX5Mi zQ>k)nIdObQ?3w}ra2GaTBEhv(D*g9#H~5;}k#`Q#cuG6BA=v~P-A(lObz|OdjN0sY z5c~ekrZT1OZBy$#-mHhRyxH7#&Ct?y7)1PLBL;idbaVr+`rh2udi{bK9}TS%R;zvf zZuPJfz1?eN9zSU0ulsx5^`@yeJMU)GThZA)4l#-7S{+=k#}anBD?2TwI~(8Q*$d8J z_}&jZ*?IoX%eF}r{&%ye_ed_?1?evIww{x-#+ESU=5^QH8>MT@YW-%=Og427KZG4R zRu2{Ys+dr$6nWE`riZKhrW{%elT@q{x3#FYFr3o8>`ZHEnVYYNTLS}{-Y_sc8|*yP zo52Xy>282=j1?MhY}qTzOiiMqG;l{vlk`l}>m<8n|EQTVy*0Iji55kA6j8d=wXueQ zfZqO8|5X1=8)?A021bGT`T-URu4q_Z3b<#Yw(Oh5yelynO-MQPqP7MX0B>2r zT8O3=RwgVcScBaSF3eOd#thg<;fh!d9>mc>rZTmu#5^gQUYCaK+Ud5_cVHvk;k*BA z@Ix`qErJ|!C(u{DXu*& zD*{)Q?#9LmiM-T%`>sXlSMK)V4*1diX5L2ve%gK$L@3vG%`Xrvs|5&ag}kf4jGNv2 z&Cr$Z!3Wp@4Y*u3YTgoMQ&q-3SUfFZwafSK;KQ#rJKGH!O}tm8D5e*V?%EGs1)t!p zsYUt7hw!;K<^4eb%(o6kgpU`#SKREK$8+I%kxG?{GO2<@ghk+oUaZ1cCsCGrX`y%& zaSse6??iq3IDFRgv_pTl+`l*ax7F0nnWJgRm1tJjp3%iG6>b8(?SzHR|1O+*xltVo5-J+IJ` z3;qX2O?WhF{Lc|Je!Pv-6ZRUAOZ@QpMhy>w!jpmL@ig`dkt!dMhz_z;#F5IA5H_A5 zNo9EMsQLNHq-yVmD({E(sl+-3&p~1xJ@R{rH90Y{Cac7H!+>w*y4*v6{0eZAjcPMK zKcPz>E&;UOY8NsrSgxJcY*d>asK{V$A0`f8@R0Fv7gADBcoKO5%C6OXN|v+%vG)^q zW#bQcc0fN5FHs{TYIy(8Qu#9atXfXi_&e4Yz9E$v1jmOzy`L)*!KbW9_!nG}I0ZVv zc9uqbU6J^%s*#wlh%fF{jWSD704^Hj<_(c`M>7vA5U&hSQ6l?5MX@5l;2X5WXxH@% z7--Gok-Jtbhjjr3b{rPGpey{ZuJ%br>vPfQo2X5bwuTVO^uiQIXfvYz5VJ zz}mXQ5lM02#<0S<$a9a?1XrHG>IdobJ=P?!L^5eP7LTx;QRvQO zH7;r_Qo;OSK@0&>axnn=+p3DSvKyII^h>Si4$`?9t6MNkQxrxTT<$BJbFCUxQt)NN zY-6!bGdbK3 zN)~B+enxsFcV{Pe;itqR!3ATHG&rHc$+sR)*I49x%vKJ@k11-VFQKWmAo4O_@r{FI$8afPjcZ_7qAEQkSTiz-4wxDteuO5gR9uO-rqTx-Ktui<*{Ner zOOCG_05@-?ZUIy|0`_v^IV0g4F$E-lX&qx42@tFpu*tlHnOBgN2EtR6e!m*8C+7HO zXdEj-bAkRqHI06d??;O2c&!6w0_}#`AL!oKBL-UGJ#Y#Rp}}$%sS&{=x1fP|>7w^c?WXQ-sV*uY~MM$gYI!O31#D4exRYS#m}~h7IFWT7)Q< zo#Qw{c$tJO^n(Ih$Ds(M=3y#17pdp#oELt~gF>WIWFq?mT7*|Zc6LG*eM&46UN9EP z(vuD4REju6;#WR#PfkwT)Itgc#Za%ZY|OZp2y4_Y=(`-UCp>p%J8K z)yM|N-qXk~X8D;@jZg{vxTb2Tc-_DkHu5AImoUI9YTeBpBS$V5c7V^Q6{O#`rI7#y zTt@BfL+a9YLigIC0oN4MBcwSXmRt63f&?|>ZJ&lz1yy?oDzLX8`a(GjAPTmBlC;0fWxC6P6pMf_;m&u!= zTn0MuROF?iC`2J+uC$+rnyakvxff)S7fBghdDAoCP4TD1BGKhyk(8g%;sz|jy@OMV zFY{FvCuqIG8m=#asg1f2juFgV6>Pmcf+55nTvnpw-z|W^9X3Sfv~-x*OKf4`u|BP+ zN#b()oMQEOGOrr5B;>kU8U>Fa>yo<26-LT1cBpiNjjd|Vs}A~1{W%~w8du~FM6OzB zGyIvU;D~7OZOX8u37t~Rk=b=1ZXDae#uujd@f4)C?!=&b&2hAb@;G&{VAV?^ACFxw z2m~#mW|(t;Jk~%_P#cR{RmMIzN!Z&7fUL}a*ieI4adqnhhHDf=SYn0Q*6qXTbm4L= z6c$vXZ&0UI_yE2HP^a}!%TB=?tiCW`QADB9?`x310>G<{3k~M07q3RHvj+t literal 0 HcmV?d00001 diff --git a/spec/lib/gitlab/import_export/base/relation_factory_spec.rb b/spec/lib/gitlab/import_export/base/relation_factory_spec.rb index df33b4896a4..6a7ff33465d 100644 --- a/spec/lib/gitlab/import_export/base/relation_factory_spec.rb +++ b/spec/lib/gitlab/import_export/base/relation_factory_spec.rb @@ -43,6 +43,15 @@ RSpec.describe Gitlab::ImportExport::Base::RelationFactory do end end + context 'when author relation' do + let(:relation_sym) { :author } + let(:relation_hash) { { 'name' => 'User', 'project_id' => project.id } } + + it 'returns author hash unchanged' do + expect(subject).to eq(relation_hash) + end + end + context 'when #setup_models is not implemented' do it 'raises NotImplementedError' do expect { subject }.to raise_error(NotImplementedError) diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 7cbfefd421d..8bb7a62929f 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -101,13 +101,13 @@ RSpec.describe MergeRequest, factory_default: :keep do let_it_be(:merge_request3) { create(:merge_request, :unique_branches, reviewers: [])} describe '.review_requested' do - it 'returns MRs that has any review requests' do + it 'returns MRs that have any review requests' do expect(described_class.review_requested).to eq([merge_request1, merge_request2]) end end describe '.no_review_requested' do - it 'returns MRs that has no review requests' do + it 'returns MRs that have no review requests' do expect(described_class.no_review_requested).to eq([merge_request3]) end end @@ -119,7 +119,7 @@ RSpec.describe MergeRequest, factory_default: :keep do end describe '.no_review_requested_to' do - it 'returns MRs that the user has been requested to review' do + it 'returns MRs that the user has not been requested to review' do expect(described_class.no_review_requested_to(user1)).to eq([merge_request2, merge_request3]) end end