From 7cf4947792647fd985c38ebf37c27989fd5a1632 Mon Sep 17 00:00:00 2001 From: Oswaldo Ferreira Date: Sun, 16 Dec 2018 14:00:43 -0200 Subject: [PATCH] Cache diff highlight in discussions This commit handles note diffs caching, which considerably improves the performance on merge requests with lots of comments. Important to note that the caching approach taken here is different from `Gitlab::Diff::HighlightCache`. We do not reset the whole cache when a new push is sent or anything else. That's because discussions diffs are persisted and do not change. --- .../projects/merge_requests_controller.rb | 6 ++ app/models/concerns/discussion_on_diff.rb | 20 +++- app/models/concerns/noteable.rb | 4 + app/models/merge_request.rb | 22 ++++ app/models/note_diff_file.rb | 15 +++ ...sw-cache-discussions-diff-highlighting.yml | 6 ++ lib/gitlab/diff/file.rb | 26 ++++- .../discussions_diff/file_collection.rb | 76 +++++++++++++ .../discussions_diff/highlight_cache.rb | 67 ++++++++++++ .../merge_requests_controller_spec.rb | 64 +++++++++++ .../discussions_diff/file_collection_spec.rb | 61 +++++++++++ .../discussions_diff/highlight_cache_spec.rb | 102 ++++++++++++++++++ spec/models/merge_request_spec.rb | 51 +++++++++ 13 files changed, 516 insertions(+), 4 deletions(-) create mode 100644 changelogs/unreleased/osw-cache-discussions-diff-highlighting.yml create mode 100644 lib/gitlab/discussions_diff/file_collection.rb create mode 100644 lib/gitlab/discussions_diff/highlight_cache.rb create mode 100644 spec/lib/gitlab/discussions_diff/file_collection_spec.rb create mode 100644 spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb diff --git a/app/controllers/projects/merge_requests_controller.rb b/app/controllers/projects/merge_requests_controller.rb index da9316d5f22..db09a9af6e9 100644 --- a/app/controllers/projects/merge_requests_controller.rb +++ b/app/controllers/projects/merge_requests_controller.rb @@ -220,6 +220,12 @@ class Projects::MergeRequestsController < Projects::MergeRequests::ApplicationCo head :ok end + def discussions + merge_request.preload_discussions_diff_highlight + + super + end + protected alias_method :subscribable_resource, :merge_request diff --git a/app/models/concerns/discussion_on_diff.rb b/app/models/concerns/discussion_on_diff.rb index 86b61248534..e4e5928f5cf 100644 --- a/app/models/concerns/discussion_on_diff.rb +++ b/app/models/concerns/discussion_on_diff.rb @@ -9,7 +9,7 @@ module DiscussionOnDiff included do delegate :line_code, :original_line_code, - :diff_file, + :note_diff_file, :diff_line, :active?, :created_at_diff?, @@ -60,6 +60,13 @@ module DiscussionOnDiff prev_lines end + def diff_file + strong_memoize(:diff_file) do + # Falling back here is important as `note_diff_files` are created async. + fetch_preloaded_diff_file || first_note.diff_file + end + end + def line_code_in_diffs(diff_refs) if active?(diff_refs) line_code @@ -67,4 +74,15 @@ module DiscussionOnDiff original_line_code end end + + private + + def fetch_preloaded_diff_file + fetch_preloaded_diff = + context_noteable && + context_noteable.preloads_discussion_diff_highlighting? && + note_diff_file + + context_noteable.discussions_diffs.find_by_id(note_diff_file.id) if fetch_preloaded_diff + end end diff --git a/app/models/concerns/noteable.rb b/app/models/concerns/noteable.rb index f2cad09e779..29476654bf7 100644 --- a/app/models/concerns/noteable.rb +++ b/app/models/concerns/noteable.rb @@ -34,6 +34,10 @@ module Noteable false end + def preloads_discussion_diff_highlighting? + false + end + def discussion_notes notes end diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 944b9f72396..b937bef100b 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -408,6 +408,28 @@ class MergeRequest < ActiveRecord::Base merge_request_diffs.where.not(id: merge_request_diff.id) end + def preloads_discussion_diff_highlighting? + true + end + + def preload_discussions_diff_highlight + preloadable_files = note_diff_files.for_commit_or_unresolved + + discussions_diffs.load_highlight(preloadable_files.pluck(:id)) + end + + def discussions_diffs + strong_memoize(:discussions_diffs) do + Gitlab::DiscussionsDiff::FileCollection.new(note_diff_files.to_a) + end + end + + def note_diff_files + NoteDiffFile + .where(diff_note: discussion_notes) + .includes(diff_note: :project) + end + def diff_size # Calling `merge_request_diff.diffs.real_size` will also perform # highlighting, which we don't need here. diff --git a/app/models/note_diff_file.rb b/app/models/note_diff_file.rb index 27aef7adc48..e369122003e 100644 --- a/app/models/note_diff_file.rb +++ b/app/models/note_diff_file.rb @@ -3,7 +3,22 @@ class NoteDiffFile < ActiveRecord::Base include DiffFile + scope :for_commit_or_unresolved, -> do + joins(:diff_note).where("resolved_at IS NULL OR noteable_type = 'Commit'") + end + + delegate :original_position, :project, to: :diff_note + belongs_to :diff_note, inverse_of: :note_diff_file validates :diff_note, presence: true + + def raw_diff_file + raw_diff = Gitlab::Git::Diff.new(to_hash) + + Gitlab::Diff::File.new(raw_diff, + repository: project.repository, + diff_refs: original_position.diff_refs, + unique_identifier: id) + end end diff --git a/changelogs/unreleased/osw-cache-discussions-diff-highlighting.yml b/changelogs/unreleased/osw-cache-discussions-diff-highlighting.yml new file mode 100644 index 00000000000..7abc7d85794 --- /dev/null +++ b/changelogs/unreleased/osw-cache-discussions-diff-highlighting.yml @@ -0,0 +1,6 @@ +--- +title: Improve the loading time on merge request's discussion page by caching diff + highlight +merge_request: 23857 +author: +type: performance diff --git a/lib/gitlab/diff/file.rb b/lib/gitlab/diff/file.rb index b5fc8d364c8..bcbded6be9a 100644 --- a/lib/gitlab/diff/file.rb +++ b/lib/gitlab/diff/file.rb @@ -3,7 +3,7 @@ module Gitlab module Diff class File - attr_reader :diff, :repository, :diff_refs, :fallback_diff_refs + attr_reader :diff, :repository, :diff_refs, :fallback_diff_refs, :unique_identifier delegate :new_file?, :deleted_file?, :renamed_file?, :old_path, :new_path, :a_mode, :b_mode, :mode_changed?, @@ -22,12 +22,20 @@ module Gitlab DiffViewer::Image ].sort_by { |v| v.binary? ? 0 : 1 }.freeze - def initialize(diff, repository:, diff_refs: nil, fallback_diff_refs: nil, stats: nil) + def initialize( + diff, + repository:, + diff_refs: nil, + fallback_diff_refs: nil, + stats: nil, + unique_identifier: nil) + @diff = diff @stats = stats @repository = repository @diff_refs = diff_refs @fallback_diff_refs = fallback_diff_refs + @unique_identifier = unique_identifier @unfolded = false # Ensure items are collected in the the batch @@ -67,7 +75,15 @@ module Gitlab def line_for_position(pos) return nil unless pos.position_type == 'text' - diff_lines.find { |line| line.old_line == pos.old_line && line.new_line == pos.new_line } + # This method is normally used to find which line the diff was + # commented on, and in this context, it's normally the raw diff persisted + # at `note_diff_files`, which is a fraction of the entire diff + # (it goes from the first line, to the commented line, or + # one line below). Therefore it's more performant to fetch + # from bottom to top instead of the other way around. + diff_lines + .reverse_each + .find { |line| line.old_line == pos.old_line && line.new_line == pos.new_line } end def position_for_line_code(code) @@ -166,6 +182,10 @@ module Gitlab @unfolded end + def highlight_loaded? + @highlighted_diff_lines.present? + end + def highlighted_diff_lines @highlighted_diff_lines ||= Gitlab::Diff::Highlight.new(self, repository: self.repository).highlight diff --git a/lib/gitlab/discussions_diff/file_collection.rb b/lib/gitlab/discussions_diff/file_collection.rb new file mode 100644 index 00000000000..4ab7314f509 --- /dev/null +++ b/lib/gitlab/discussions_diff/file_collection.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +module Gitlab + module DiscussionsDiff + class FileCollection + include Gitlab::Utils::StrongMemoize + + def initialize(collection) + @collection = collection + end + + # Returns a Gitlab::Diff::File with the given ID (`unique_identifier` in + # Gitlab::Diff::File). + def find_by_id(id) + diff_files_indexed_by_id[id] + end + + # Writes cache and preloads highlighted diff lines for + # object IDs, in @collection. + # + # highlightable_ids - Diff file `Array` responding to ID. The ID will be used + # to generate the cache key. + # + # - Highlight cache is written just for uncached diff files + # - The cache content is not updated (there's no need to do so) + def load_highlight(highlightable_ids) + preload_highlighted_lines(highlightable_ids) + end + + private + + def preload_highlighted_lines(ids) + cached_content = read_cache(ids) + + uncached_ids = ids.select.each_with_index { |_, i| cached_content[i].nil? } + mapping = highlighted_lines_by_ids(uncached_ids) + + HighlightCache.write_multiple(mapping) + + diffs = diff_files_indexed_by_id.values_at(*ids) + + diffs.zip(cached_content).each do |diff, cached_lines| + next unless diff && cached_lines + + diff.highlighted_diff_lines = cached_lines + end + end + + def read_cache(ids) + HighlightCache.read_multiple(ids) + end + + def diff_files_indexed_by_id + strong_memoize(:diff_files_indexed_by_id) do + diff_files.index_by(&:unique_identifier) + end + end + + def diff_files + strong_memoize(:diff_files) do + @collection.map(&:raw_diff_file) + end + end + + # Processes the diff lines highlighting for diff files matching the given + # IDs. + # + # Returns a Hash with { id => [Array of Gitlab::Diff::line], ...] + def highlighted_lines_by_ids(ids) + diff_files_indexed_by_id.slice(*ids).each_with_object({}) do |(id, file), hash| + hash[id] = file.highlighted_diff_lines.map(&:to_hash) + end + end + end + end +end diff --git a/lib/gitlab/discussions_diff/highlight_cache.rb b/lib/gitlab/discussions_diff/highlight_cache.rb new file mode 100644 index 00000000000..270cfb89488 --- /dev/null +++ b/lib/gitlab/discussions_diff/highlight_cache.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true +# +module Gitlab + module DiscussionsDiff + class HighlightCache + class << self + VERSION = 1 + EXPIRATION = 1.week + + # Sets multiple keys to a given value. The value + # is serialized as JSON. + # + # mapping - Write multiple cache values at once + def write_multiple(mapping) + Redis::Cache.with do |redis| + redis.multi do |multi| + mapping.each do |raw_key, value| + key = cache_key_for(raw_key) + + multi.set(key, value.to_json, ex: EXPIRATION) + end + end + end + end + + # Reads multiple cache keys at once. + # + # raw_keys - An Array of unique cache keys, without namespaces. + # + # It returns a list of deserialized diff lines. Ex.: + # [[Gitlab::Diff::Line, ...], [Gitlab::Diff::Line]] + def read_multiple(raw_keys) + return [] if raw_keys.empty? + + keys = raw_keys.map { |id| cache_key_for(id) } + + content = + Redis::Cache.with do |redis| + redis.mget(keys) + end + + content.map! do |lines| + next unless lines + + JSON.parse(lines).map! do |line| + line = line.with_indifferent_access + rich_text = line[:rich_text] + line[:rich_text] = rich_text&.html_safe + + Gitlab::Diff::Line.init_from_hash(line) + end + end + end + + def cache_key_for(raw_key) + "#{cache_key_prefix}:#{raw_key}" + end + + private + + def cache_key_prefix + "#{Redis::Cache::CACHE_NAMESPACE}:#{VERSION}:discussion-highlight" + end + end + end + end +end diff --git a/spec/controllers/projects/merge_requests_controller_spec.rb b/spec/controllers/projects/merge_requests_controller_spec.rb index 1ab227bde39..7c167420e1f 100644 --- a/spec/controllers/projects/merge_requests_controller_spec.rb +++ b/spec/controllers/projects/merge_requests_controller_spec.rb @@ -957,6 +957,70 @@ describe Projects::MergeRequestsController do end end + describe 'GET discussions' do + context 'when authenticated' do + before do + project.add_developer(user) + sign_in(user) + end + + it 'returns 200' do + get :discussions, namespace_id: project.namespace, project_id: project, id: merge_request.iid + + expect(response.status).to eq(200) + end + + context 'highlight preloading' do + context 'with commit diff notes' do + let!(:commit_diff_note) do + create(:diff_note_on_commit, project: merge_request.project) + end + + it 'preloads notes diffs highlights' do + expect_next_instance_of(Gitlab::DiscussionsDiff::FileCollection) do |collection| + note_diff_file = commit_diff_note.note_diff_file + + expect(collection).to receive(:load_highlight).with([note_diff_file.id]).and_call_original + expect(collection).to receive(:find_by_id).with(note_diff_file.id).and_call_original + end + + get :discussions, namespace_id: project.namespace, project_id: project, id: merge_request.iid + end + end + + context 'with diff notes' do + let!(:diff_note) do + create(:diff_note_on_merge_request, noteable: merge_request, project: merge_request.project) + end + + it 'preloads notes diffs highlights' do + expect_next_instance_of(Gitlab::DiscussionsDiff::FileCollection) do |collection| + note_diff_file = diff_note.note_diff_file + + expect(collection).to receive(:load_highlight).with([note_diff_file.id]).and_call_original + expect(collection).to receive(:find_by_id).with(note_diff_file.id).and_call_original + end + + get :discussions, namespace_id: project.namespace, project_id: project, id: merge_request.iid + end + + it 'does not preload highlights when diff note is resolved' do + Notes::ResolveService.new(diff_note.project, user).execute(diff_note) + + expect_next_instance_of(Gitlab::DiscussionsDiff::FileCollection) do |collection| + note_diff_file = diff_note.note_diff_file + + expect(collection).to receive(:load_highlight).with([]).and_call_original + expect(collection).to receive(:find_by_id).with(note_diff_file.id).and_call_original + end + + get :discussions, namespace_id: project.namespace, project_id: project, id: merge_request.iid + end + end + end + end + end + describe 'GET edit' do it 'responds successfully' do get :edit, params: { namespace_id: project.namespace, project_id: project, id: merge_request } diff --git a/spec/lib/gitlab/discussions_diff/file_collection_spec.rb b/spec/lib/gitlab/discussions_diff/file_collection_spec.rb new file mode 100644 index 00000000000..0489206458b --- /dev/null +++ b/spec/lib/gitlab/discussions_diff/file_collection_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::DiscussionsDiff::FileCollection do + let(:merge_request) { create(:merge_request) } + let!(:diff_note_a) { create(:diff_note_on_merge_request, project: merge_request.project, noteable: merge_request) } + let!(:diff_note_b) { create(:diff_note_on_merge_request, project: merge_request.project, noteable: merge_request) } + let(:note_diff_file_a) { diff_note_a.note_diff_file } + let(:note_diff_file_b) { diff_note_b.note_diff_file } + + subject { described_class.new([note_diff_file_a, note_diff_file_b]) } + + describe '#load_highlight', :clean_gitlab_redis_shared_state do + it 'writes uncached diffs highlight' do + file_a_caching_content = diff_note_a.diff_file.highlighted_diff_lines.map(&:to_hash) + file_b_caching_content = diff_note_b.diff_file.highlighted_diff_lines.map(&:to_hash) + + expect(Gitlab::DiscussionsDiff::HighlightCache) + .to receive(:write_multiple) + .with({ note_diff_file_a.id => file_a_caching_content, + note_diff_file_b.id => file_b_caching_content }) + .and_call_original + + subject.load_highlight([note_diff_file_a.id, note_diff_file_b.id]) + end + + it 'does not write cache for already cached file' do + subject.load_highlight([note_diff_file_a.id]) + + file_b_caching_content = diff_note_b.diff_file.highlighted_diff_lines.map(&:to_hash) + + expect(Gitlab::DiscussionsDiff::HighlightCache) + .to receive(:write_multiple) + .with({ note_diff_file_b.id => file_b_caching_content }) + .and_call_original + + subject.load_highlight([note_diff_file_a.id, note_diff_file_b.id]) + end + + it 'does not err when given ID does not exist in @collection' do + expect { subject.load_highlight([999]) }.not_to raise_error + end + + it 'loaded diff files have highlighted lines loaded' do + subject.load_highlight([note_diff_file_a.id]) + + diff_file = subject.find_by_id(note_diff_file_a.id) + + expect(diff_file.highlight_loaded?).to be(true) + end + + it 'not loaded diff files does not have highlighted lines loaded' do + subject.load_highlight([note_diff_file_a.id]) + + diff_file = subject.find_by_id(note_diff_file_b.id) + + expect(diff_file.highlight_loaded?).to be(false) + end + end +end diff --git a/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb new file mode 100644 index 00000000000..fe26ebb8796 --- /dev/null +++ b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do + describe '#write_multiple' do + it 'sets multiple keys serializing content as JSON' do + mapping = { + 3 => [ + { + text: 'foo', + type: 'new', + index: 2, + old_pos: 10, + new_pos: 11, + line_code: 'xpto', + rich_text: 'blops' + }, + { + text: 'foo', + type: 'new', + index: 3, + old_pos: 11, + new_pos: 12, + line_code: 'xpto', + rich_text: 'blips' + } + ] + } + + described_class.write_multiple(mapping) + + mapping.each do |key, value| + full_key = described_class.cache_key_for(key) + found = Gitlab::Redis::Cache.with { |r| r.get(full_key) } + + expect(found).to eq(value.to_json) + end + end + end + + describe '#read_multiple' do + it 'reads multiple keys and serializes content into Gitlab::Diff::Line objects' do + mapping = { + 3 => [ + { + text: 'foo', + type: 'new', + index: 2, + old_pos: 11, + new_pos: 12, + line_code: 'xpto', + rich_text: 'blops' + }, + { + text: 'foo', + type: 'new', + index: 3, + old_pos: 10, + new_pos: 11, + line_code: 'xpto', + rich_text: 'blops' + } + ] + } + + described_class.write_multiple(mapping) + + found = described_class.read_multiple(mapping.keys) + + expect(found.size).to eq(1) + expect(found.first.size).to eq(2) + expect(found.first).to all(be_a(Gitlab::Diff::Line)) + end + + it 'returns nil when cached key is not found' do + mapping = { + 3 => [ + { + text: 'foo', + type: 'new', + index: 2, + old_pos: 11, + new_pos: 12, + line_code: 'xpto', + rich_text: 'blops' + } + ] + } + + described_class.write_multiple(mapping) + + found = described_class.read_multiple([2, 3]) + + expect(found.size).to eq(2) + + expect(found.first).to eq(nil) + expect(found.second.size).to eq(1) + expect(found.second).to all(be_a(Gitlab::Diff::Line)) + end + end +end diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 6793d4e8718..4cc3a6a3644 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -559,6 +559,57 @@ describe MergeRequest do end end + describe '#preload_discussions_diff_highlight' do + let(:merge_request) { create(:merge_request) } + + context 'with commit diff note' do + let(:other_merge_request) { create(:merge_request) } + + let!(:diff_note) do + create(:diff_note_on_commit, project: merge_request.project) + end + + let!(:other_mr_diff_note) do + create(:diff_note_on_commit, project: other_merge_request.project) + end + + it 'preloads diff highlighting' do + expect_next_instance_of(Gitlab::DiscussionsDiff::FileCollection) do |collection| + note_diff_file = diff_note.note_diff_file + + expect(collection) + .to receive(:load_highlight) + .with([note_diff_file.id]).and_call_original + end + + merge_request.preload_discussions_diff_highlight + end + end + + context 'with merge request diff note' do + let!(:unresolved_diff_note) do + create(:diff_note_on_merge_request, project: merge_request.project, noteable: merge_request) + end + + let!(:resolved_diff_note) do + create(:diff_note_on_merge_request, :resolved, project: merge_request.project, noteable: merge_request) + end + + it 'preloads diff highlighting' do + expect_next_instance_of(Gitlab::DiscussionsDiff::FileCollection) do |collection| + note_diff_file = unresolved_diff_note.note_diff_file + + expect(collection) + .to receive(:load_highlight) + .with([note_diff_file.id]) + .and_call_original + end + + merge_request.preload_discussions_diff_highlight + end + end + end + describe '#diff_size' do let(:merge_request) do build(:merge_request, source_branch: 'expand-collapse-files', target_branch: 'master')