diff --git a/app/models/namespace.rb b/app/models/namespace.rb index 3f7ccdb977e..6f6c8b387a1 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -13,6 +13,7 @@ class Namespace < ApplicationRecord include Gitlab::Utils::StrongMemoize include IgnorableColumns include Namespaces::Traversal::Recursive + include Namespaces::Traversal::Linear # Prevent users from creating unreasonably deep level of nesting. # The number 20 was taken based on maximum nesting level of diff --git a/app/models/namespace/traversal_hierarchy.rb b/app/models/namespace/traversal_hierarchy.rb index cfb6cfdde74..28cf55f7486 100644 --- a/app/models/namespace/traversal_hierarchy.rb +++ b/app/models/namespace/traversal_hierarchy.rb @@ -34,17 +34,20 @@ class Namespace sql = """ UPDATE namespaces SET traversal_ids = cte.traversal_ids - FROM (#{recursive_traversal_ids}) as cte + FROM (#{recursive_traversal_ids(lock: true)}) as cte WHERE namespaces.id = cte.id AND namespaces.traversal_ids <> cte.traversal_ids """ Namespace.connection.exec_query(sql) + rescue ActiveRecord::Deadlocked + db_deadlock_counter.increment(source: 'Namespace#sync_traversal_ids!') + raise end # Identify all incorrect traversal_ids in the current namespace hierarchy. - def incorrect_traversal_ids + def incorrect_traversal_ids(lock: false) Namespace - .joins("INNER JOIN (#{recursive_traversal_ids}) as cte ON namespaces.id = cte.id") + .joins("INNER JOIN (#{recursive_traversal_ids(lock: lock)}) as cte ON namespaces.id = cte.id") .where('namespaces.traversal_ids <> cte.traversal_ids') end @@ -55,10 +58,13 @@ class Namespace # # Note that the traversal_ids represent a calculated traversal path for the # namespace and not the value stored within the traversal_ids attribute. - def recursive_traversal_ids + # + # Optionally locked with FOR UPDATE to ensure isolation between concurrent + # updates of the heirarchy. + def recursive_traversal_ids(lock: false) root_id = Integer(@root.id) - """ + sql = <<~SQL WITH RECURSIVE cte(id, traversal_ids, cycle) AS ( VALUES(#{root_id}, ARRAY[#{root_id}], false) UNION ALL @@ -67,7 +73,11 @@ class Namespace WHERE n.parent_id = cte.id AND NOT cycle ) SELECT id, traversal_ids FROM cte - """ + SQL + + sql += ' FOR UPDATE' if lock + + sql end # This is essentially Namespace#root_ancestor which will soon be rewritten @@ -80,5 +90,9 @@ class Namespace .reorder(nil) .find_by(parent_id: nil) end + + def db_deadlock_counter + Gitlab::Metrics.counter(:db_deadlock, 'Counts the times we have deadlocked in the database') + end end end diff --git a/app/models/namespaces/traversal/linear.rb b/app/models/namespaces/traversal/linear.rb new file mode 100644 index 00000000000..8c042e5409d --- /dev/null +++ b/app/models/namespaces/traversal/linear.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true +# +# Query a recursively defined namespace hierarchy using linear methods through +# the traversal_ids attribute. +# +# Namespace is a nested hierarchy of one parent to many children. A search +# using only the parent-child relationships is a slow operation. This process +# was previously optimized using Postgresql recursive common table expressions +# (CTE) with acceptable performance. However, it lead to slower than possible +# performance, and resulted in complicated queries that were difficult to make +# performant. +# +# Instead of searching the hierarchy recursively, we store a `traversal_ids` +# attribute on each node. The `traversal_ids` is an ordered array of Namespace +# IDs that define the traversal path from the root Namespace to the current +# Namespace. +# +# For example, suppose we have the following Namespaces: +# +# GitLab (id: 1) > Engineering (id: 2) > Manage (id: 3) > Access (id: 4) +# +# Then `traversal_ids` for group "Access" is [1, 2, 3, 4] +# +# And we can match against other Namespace `traversal_ids` such that: +# +# - Ancestors are [1], [1, 2], [1, 2, 3] +# - Descendants are [1, 2, 3, 4, *] +# - Root is [1] +# - Hierarchy is [1, *] +# +# Note that this search method works so long as the IDs are unique and the +# traversal path is ordered from root to leaf nodes. +# +# We implement this in the database using Postgresql arrays, indexed by a +# generalized inverted index (gin). +module Namespaces + module Traversal + module Linear + extend ActiveSupport::Concern + + included do + after_create :sync_traversal_ids, if: -> { sync_traversal_ids? } + after_update :sync_traversal_ids, if: -> { sync_traversal_ids? && saved_change_to_parent_id? } + end + + def sync_traversal_ids? + Feature.enabled?(:sync_traversal_ids, root_ancestor, default_enabled: :yaml) + end + + private + + # Update the traversal_ids for the full hierarchy. + # + # NOTE: self.traversal_ids will be stale. Reload for a fresh record. + def sync_traversal_ids + # Clear any previously memoized root_ancestor as our ancestors have changed. + clear_memoization(:root_ancestor) + + Namespace::TraversalHierarchy.for_namespace(root_ancestor).sync_traversal_ids! + end + end + end +end diff --git a/app/models/namespaces/traversal/recursive.rb b/app/models/namespaces/traversal/recursive.rb index d74b7883830..aac70a1a0a9 100644 --- a/app/models/namespaces/traversal/recursive.rb +++ b/app/models/namespaces/traversal/recursive.rb @@ -6,10 +6,14 @@ module Namespaces extend ActiveSupport::Concern def root_ancestor - return self if persisted? && parent_id.nil? + return self if parent.nil? - strong_memoize(:root_ancestor) do - self_and_ancestors.reorder(nil).find_by(parent_id: nil) + if persisted? + strong_memoize(:root_ancestor) do + self_and_ancestors.reorder(nil).find_by(parent_id: nil) + end + else + parent.root_ancestor end end diff --git a/changelogs/unreleased/300377-traversal-ids-column-sync.yml b/changelogs/unreleased/300377-traversal-ids-column-sync.yml new file mode 100644 index 00000000000..123f81238fd --- /dev/null +++ b/changelogs/unreleased/300377-traversal-ids-column-sync.yml @@ -0,0 +1,5 @@ +--- +title: Cache namespace traversal path +merge_request: 52854 +author: +type: performance diff --git a/config/feature_flags/development/sync_traversal_ids.yml b/config/feature_flags/development/sync_traversal_ids.yml new file mode 100644 index 00000000000..56ed5f5a49d --- /dev/null +++ b/config/feature_flags/development/sync_traversal_ids.yml @@ -0,0 +1,7 @@ +--- +name: sync_traversal_ids +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/52854 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300377 +group: group::access +type: development +default_enabled: false diff --git a/doc/administration/troubleshooting/index.md b/doc/administration/troubleshooting/index.md index 63b056df8b7..1c205cc987a 100644 --- a/doc/administration/troubleshooting/index.md +++ b/doc/administration/troubleshooting/index.md @@ -6,11 +6,13 @@ info: To determine the technical writer assigned to the Stage/Group associated w # Troubleshooting a GitLab installation -Below are some resources to help you troubleshoot a GitLab installation -in case something goes wrong: +This page documents a collection of resources to help you troubleshoot a GitLab +installation. -- [Debugging tips](debug.md) -- [Diagnostics tools](diagnostics_tools.md) +## Troubleshooting guides + +- [SSL](ssl.md) +- [Geo](../geo/replication/troubleshooting.md) - [Elasticsearch](elasticsearch.md) - [GitLab Rails console cheat sheet](gitlab_rails_cheat_sheet.md) - [Group SAML and SCIM troubleshooting](group_saml_scim.md) **(PREMIUM SAAS)** @@ -18,9 +20,9 @@ in case something goes wrong: - [Linux cheat sheet](linux_cheat_sheet.md) - [Parsing GitLab logs with `jq`](log_parsing.md) - [Navigating GitLab via Rails console](navigating_gitlab_via_rails_console.md) -- [PostgreSQL](postgresql.md) -- [Sidekiq](sidekiq.md) -- [SSL](ssl.md) +- [Diagnostics tools](diagnostics_tools.md) +- [Debugging tips](debug.md) +- [Tracing requests with correlation ID](tracing_correlation_id.md) If you need a testing environment to troubleshoot, see the [apps for a testing environment](test_environments.md). diff --git a/doc/user/project/pages/introduction.md b/doc/user/project/pages/introduction.md index fbc86abbe66..3e71f543d3d 100644 --- a/doc/user/project/pages/introduction.md +++ b/doc/user/project/pages/introduction.md @@ -281,3 +281,19 @@ No, you don't. You can create your project first and access it under ## Known issues For a list of known issues, visit the GitLab [public issue tracker](https://gitlab.com/gitlab-org/gitlab/-/issues?label_name[]=Category%3APages). + +## Troubleshooting + +### 404 error when accessing a GitLab Pages site URL + +This problem most likely results from a missing `index.html` file in the public directory. If after deploying a Pages site +a 404 is encountered, confirm that the public directory contains an `index.html` file. If the file contains a different name +such as `test.html`, the Pages site can still be accessed, but the full path would be needed. For example: `https//group-name.pages.example.com/project-name/test.html`. + +The contents of the public directory can be confirmed by [browsing the artifacts](../../../ci/pipelines/job_artifacts.md#browsing-artifacts) from the latest pipeline. + +Files listed under the public directory can be accessed through the Pages URL for the project. + +A 404 can also be related to incorrect permissions. If [Pages Access Control](pages_access_control.md) is enabled, and a user +navigates to the Pages URL and receives a 404 reponse, it is possible that the user does not have permission to view the site. +To fix this, verify that the user is a member of the project. diff --git a/spec/models/group_spec.rb b/spec/models/group_spec.rb index 24d09d1c035..576d21c32b9 100644 --- a/spec/models/group_spec.rb +++ b/spec/models/group_spec.rb @@ -3,6 +3,8 @@ require 'spec_helper' RSpec.describe Group do + include ReloadHelpers + let!(:group) { create(:group) } describe 'associations' do @@ -281,7 +283,7 @@ RSpec.describe Group do end describe '#two_factor_authentication_allowed' do - let_it_be(:group) { create(:group) } + let_it_be_with_reload(:group) { create(:group) } context 'for a parent group' do it 'is valid' do @@ -311,6 +313,120 @@ RSpec.describe Group do end end + context 'traversal_ids on create' do + context 'default traversal_ids' do + let(:group) { build(:group) } + + before do + group.save! + group.reload + end + + it { expect(group.traversal_ids).to eq [group.id] } + end + + context 'has a parent' do + let(:parent) { create(:group) } + let(:group) { build(:group, parent: parent) } + + before do + group.save! + reload_models(parent, group) + end + + it { expect(parent.traversal_ids).to eq [parent.id] } + it { expect(group.traversal_ids).to eq [parent.id, group.id] } + end + + context 'has a parent update before save' do + let(:parent) { create(:group) } + let(:group) { build(:group, parent: parent) } + let!(:new_grandparent) { create(:group) } + + before do + parent.update!(parent: new_grandparent) + group.save! + reload_models(parent, group) + end + + it 'avoid traversal_ids race condition' do + expect(parent.traversal_ids).to eq [new_grandparent.id, parent.id] + expect(group.traversal_ids).to eq [new_grandparent.id, parent.id, group.id] + end + end + end + + context 'traversal_ids on update' do + context 'parent is updated' do + let(:new_parent) { create(:group) } + + subject {group.update!(parent: new_parent, name: 'new name') } + + it_behaves_like 'update on column', :traversal_ids + end + + context 'parent is not updated' do + subject { group.update!(name: 'new name') } + + it_behaves_like 'no update on column', :traversal_ids + end + end + + context 'traversal_ids on ancestral update' do + context 'update multiple ancestors before save' do + let(:parent) { create(:group) } + let(:group) { create(:group, parent: parent) } + let!(:new_grandparent) { create(:group) } + let!(:new_parent) { create(:group) } + + before do + group.parent = new_parent + new_parent.update!(parent: new_grandparent) + + group.save! + reload_models(parent, group, new_grandparent, new_parent) + end + + it 'avoids traversal_ids race condition' do + expect(parent.traversal_ids).to eq [parent.id] + expect(group.traversal_ids).to eq [new_grandparent.id, new_parent.id, group.id] + expect(new_grandparent.traversal_ids).to eq [new_grandparent.id] + expect(new_parent.traversal_ids).to eq [new_grandparent.id, new_parent.id] + end + end + + context 'assigning a new parent' do + let!(:old_parent) { create(:group) } + let!(:new_parent) { create(:group) } + let!(:group) { create(:group, parent: old_parent) } + + before do + group.update(parent: new_parent) + reload_models(old_parent, new_parent, group) + end + + it 'updates traversal_ids' do + expect(group.traversal_ids).to eq [new_parent.id, group.id] + end + end + + context 'assigning a new grandparent' do + let!(:old_grandparent) { create(:group) } + let!(:new_grandparent) { create(:group) } + let!(:parent_group) { create(:group, parent: old_grandparent) } + let!(:group) { create(:group, parent: parent_group) } + + before do + parent_group.update(parent: new_grandparent) + end + + it 'updates traversal_ids for all descendants' do + expect(parent_group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id] + expect(group.reload.traversal_ids).to eq [new_grandparent.id, parent_group.id, group.id] + end + end + end + describe '.without_integration' do let(:another_group) { create(:group) } let(:instance_integration) { build(:jira_service, :instance) } @@ -1838,24 +1954,28 @@ RSpec.describe Group do end end - def subject_and_reload(*models) - subject - models.map(&:reload) - end - describe '#update_shared_runners_setting!' do context 'enabled' do subject { group.update_shared_runners_setting!('enabled') } context 'group that its ancestors have shared runners disabled' do - let_it_be(:parent) { create(:group, :shared_runners_disabled) } - let_it_be(:group) { create(:group, :shared_runners_disabled, parent: parent) } - let_it_be(:project) { create(:project, shared_runners_enabled: false, group: group) } + let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled) } + let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, parent: parent) } + let_it_be(:project, reload: true) { create(:project, shared_runners_enabled: false, group: group) } - it 'raises error and does not enable shared Runners' do - expect { subject_and_reload(parent, group, project) } + it 'raises exception' do + expect { subject } .to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Shared runners enabled cannot be enabled because parent group has shared Runners disabled') - .and not_change { parent.shared_runners_enabled } + end + + it 'does not enable shared runners' do + expect do + subject rescue nil + + parent.reload + group.reload + project.reload + end.to not_change { parent.shared_runners_enabled } .and not_change { group.shared_runners_enabled } .and not_change { project.shared_runners_enabled } end @@ -1941,13 +2061,21 @@ RSpec.describe Group do end context 'when parent does not allow' do - let_it_be(:parent) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) } - let_it_be(:group) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) } + let_it_be(:parent, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false ) } + let_it_be(:group, reload: true) { create(:group, :shared_runners_disabled, allow_descendants_override_disabled_shared_runners: false, parent: parent) } - it 'raises error and does not allow descendants to override' do - expect { subject_and_reload(parent, group) } + it 'raises exception' do + expect { subject } .to raise_error(ActiveRecord::RecordInvalid, 'Validation failed: Allow descendants override disabled shared runners cannot be enabled because parent group does not allow it') - .and not_change { parent.allow_descendants_override_disabled_shared_runners } + end + + it 'does not allow descendants to override' do + expect do + subject rescue nil + + parent.reload + group.reload + end.to not_change { parent.allow_descendants_override_disabled_shared_runners } .and not_change { parent.shared_runners_enabled } .and not_change { group.allow_descendants_override_disabled_shared_runners } .and not_change { group.shared_runners_enabled } diff --git a/spec/models/namespace/traversal_hierarchy_spec.rb b/spec/models/namespace/traversal_hierarchy_spec.rb index 83e6d704640..b166d541171 100644 --- a/spec/models/namespace/traversal_hierarchy_spec.rb +++ b/spec/models/namespace/traversal_hierarchy_spec.rb @@ -43,21 +43,63 @@ RSpec.describe Namespace::TraversalHierarchy, type: :model do end end + shared_examples 'locked update query' do + it 'locks query with FOR UPDATE' do + qr = ActiveRecord::QueryRecorder.new do + subject + end + expect(qr.count).to eq 1 + expect(qr.log.first).to match /FOR UPDATE/ + end + end + describe '#incorrect_traversal_ids' do - subject { described_class.new(root).incorrect_traversal_ids } + let!(:hierarchy) { described_class.new(root) } + + subject { hierarchy.incorrect_traversal_ids } + + before do + Namespace.update_all(traversal_ids: []) + end it { is_expected.to match_array Namespace.all } + + context 'when lock is true' do + subject { hierarchy.incorrect_traversal_ids(lock: true).load } + + it_behaves_like 'locked update query' + end end describe '#sync_traversal_ids!' do - let(:hierarchy) { described_class.new(root) } + let!(:hierarchy) { described_class.new(root) } - before do - hierarchy.sync_traversal_ids! - root.reload - end + subject { hierarchy.sync_traversal_ids! } + + it { expect(hierarchy.incorrect_traversal_ids).to be_empty } it_behaves_like 'hierarchy with traversal_ids' - it { expect(hierarchy.incorrect_traversal_ids).to be_empty } + it_behaves_like 'locked update query' + + context 'when deadlocked' do + before do + connection_double = double(:connection) + + allow(Namespace).to receive(:connection).and_return(connection_double) + allow(connection_double).to receive(:exec_query) { raise ActiveRecord::Deadlocked.new } + end + + it { expect { subject }.to raise_error(ActiveRecord::Deadlocked) } + + it 'increment db_deadlock counter' do + expect { subject rescue nil }.to change { db_deadlock_total('Namespace#sync_traversal_ids!') }.by(1) + end + end + end + + def db_deadlock_total(source) + Gitlab::Metrics + .counter(:db_deadlock, 'Counts the times we have deadlocked in the database') + .get(source: source) end end diff --git a/spec/models/namespace_spec.rb b/spec/models/namespace_spec.rb index 65d787d334b..f92510bddbf 100644 --- a/spec/models/namespace_spec.rb +++ b/spec/models/namespace_spec.rb @@ -168,6 +168,20 @@ RSpec.describe Namespace do describe 'inclusions' do it { is_expected.to include_module(Gitlab::VisibilityLevel) } it { is_expected.to include_module(Namespaces::Traversal::Recursive) } + it { is_expected.to include_module(Namespaces::Traversal::Linear) } + end + + context 'traversal_ids on create' do + context 'default traversal_ids' do + let(:namespace) { build(:namespace) } + + before do + namespace.save! + namespace.reload + end + + it { expect(namespace.traversal_ids).to eq [namespace.id] } + end end describe 'callbacks' do @@ -1085,21 +1099,42 @@ RSpec.describe Namespace do end describe '#root_ancestor' do - let!(:root_group) { create(:group) } + context 'with persisted root group' do + let!(:root_group) { create(:group) } - it 'returns root_ancestor for root group without a query' do - expect { root_group.root_ancestor }.not_to exceed_query_limit(0) + it 'returns root_ancestor for root group without a query' do + expect { root_group.root_ancestor }.not_to exceed_query_limit(0) + end + + it 'returns the top most ancestor' do + nested_group = create(:group, parent: root_group) + deep_nested_group = create(:group, parent: nested_group) + very_deep_nested_group = create(:group, parent: deep_nested_group) + + expect(root_group.root_ancestor).to eq(root_group) + expect(nested_group.root_ancestor).to eq(root_group) + expect(deep_nested_group.root_ancestor).to eq(root_group) + expect(very_deep_nested_group.root_ancestor).to eq(root_group) + end end - it 'returns the top most ancestor' do - nested_group = create(:group, parent: root_group) - deep_nested_group = create(:group, parent: nested_group) - very_deep_nested_group = create(:group, parent: deep_nested_group) + context 'with not persisted root group' do + let!(:root_group) { build(:group) } - expect(root_group.root_ancestor).to eq(root_group) - expect(nested_group.root_ancestor).to eq(root_group) - expect(deep_nested_group.root_ancestor).to eq(root_group) - expect(very_deep_nested_group.root_ancestor).to eq(root_group) + it 'returns root_ancestor for root group without a query' do + expect { root_group.root_ancestor }.not_to exceed_query_limit(0) + end + + it 'returns the top most ancestor' do + nested_group = build(:group, parent: root_group) + deep_nested_group = build(:group, parent: nested_group) + very_deep_nested_group = build(:group, parent: deep_nested_group) + + expect(root_group.root_ancestor).to eq(root_group) + expect(nested_group.root_ancestor).to eq(root_group) + expect(deep_nested_group.root_ancestor).to eq(root_group) + expect(very_deep_nested_group.root_ancestor).to eq(root_group) + end end end diff --git a/spec/support/helpers/reload_helpers.rb b/spec/support/helpers/reload_helpers.rb new file mode 100644 index 00000000000..60811e4604f --- /dev/null +++ b/spec/support/helpers/reload_helpers.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module ReloadHelpers + def reload_models(*models) + models.map(&:reload) + end + + def subject_and_reload(*models) + subject + reload_models(*models) + end +end diff --git a/spec/support/shared_examples/querying_shared_examples.rb b/spec/support/shared_examples/querying_shared_examples.rb new file mode 100644 index 00000000000..1f554ddb441 --- /dev/null +++ b/spec/support/shared_examples/querying_shared_examples.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +def update_column_regex(column) + /UPDATE.+SET.+#{column}[^=*]=.+FROM.*/m +end + +RSpec.shared_examples 'update on column' do |column| + it "#{column} column updated" do + qr = ActiveRecord::QueryRecorder.new do + subject + end + expect(qr.log).to include a_string_matching update_column_regex(column) + end +end + +RSpec.shared_examples 'no update on column' do |column| + it "#{column} column is not updated" do + qr = ActiveRecord::QueryRecorder.new do + subject + end + expect(qr.log).not_to include a_string_matching update_column_regex(column) + end +end