From 4952a24f584aea1e8e878f5ca276b2682422fbd4 Mon Sep 17 00:00:00 2001 From: Sean McGivern Date: Fri, 5 Aug 2016 13:24:15 +0100 Subject: [PATCH] Find match line headers by backtracking This is more efficient for large files than performing a regex match on every single line. --- lib/gitlab/conflict/file.rb | 35 ++++++--- spec/lib/gitlab/conflict/file_spec.rb | 104 ++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 11 deletions(-) diff --git a/lib/gitlab/conflict/file.rb b/lib/gitlab/conflict/file.rb index 3560fae5b09..926b65d46fc 100644 --- a/lib/gitlab/conflict/file.rb +++ b/lib/gitlab/conflict/file.rb @@ -18,6 +18,7 @@ module Gitlab @our_mode = conflict[:ours][:mode] @merge_request = merge_request @repository = merge_request.project.repository + @match_line_headers = {} end # Array of Gitlab::Diff::Line objects @@ -71,13 +72,6 @@ module Gitlab def sections return @sections if @sections - # Any line beginning with a letter, an underscore, or a dollar can be used in a - # match line header. Only context sections can contain match lines, as match lines - # have to exist in both versions of the file. - candidate_match_headers = lines.map do |line| - " #{line.text}" if line.text.match(/\A[A-Za-z$_]/) && line.type.nil? - end - chunked_lines = lines.chunk { |line| line.type.nil? } match_line = nil @@ -98,7 +92,7 @@ module Gitlab # Ensure any existing match line has text for all lines up to the last # line of its context. - update_match_line_text(match_line, head_lines.last, candidate_match_headers) + update_match_line_text(match_line, head_lines.last) # Insert a new match line after the created gap. match_line = create_match_line(tail_lines.first) @@ -128,7 +122,7 @@ module Gitlab # We want to update the match line's text every time unless we've already # created a gap and its corresponding match line. - update_match_line_text(match_line, lines.last, candidate_match_headers) unless section + update_match_line_text(match_line, lines.last) unless section section ||= { conflict: !no_conflict, lines: lines } section[:id] = line_code(lines.first) unless no_conflict @@ -144,13 +138,32 @@ module Gitlab Gitlab::Diff::Line.new('', 'match', line.index, line.old_pos, line.new_pos) end + # Any line beginning with a letter, an underscore, or a dollar can be used in a + # match line header. Only context sections can contain match lines, as match lines + # have to exist in both versions of the file. + def find_match_line_header(index) + return @match_line_headers[index] if @match_line_headers.key?(index) + + @match_line_headers[index] = begin + if index >= 0 + line = lines[index] + + if line.type.nil? && line.text.match(/\A[A-Za-z$_]/) + " #{line.text}" + else + find_match_line_header(index - 1) + end + end + end + end + # Set the match line's text for the current line. A match line takes its start # position and context header (where present) from itself, and its end position from # the line passed in. - def update_match_line_text(match_line, line, headers) + def update_match_line_text(match_line, line) return unless match_line - header = headers.first(match_line.index).compact.last + header = find_match_line_header(match_line.index - 1) match_line.text = "@@ -#{match_line.old_pos},#{line.old_pos} +#{match_line.new_pos},#{line.new_pos} @@#{header}" end diff --git a/spec/lib/gitlab/conflict/file_spec.rb b/spec/lib/gitlab/conflict/file_spec.rb index 643c5b31791..60020487061 100644 --- a/spec/lib/gitlab/conflict/file_spec.rb +++ b/spec/lib/gitlab/conflict/file_spec.rb @@ -142,6 +142,110 @@ describe Gitlab::Conflict::File, lib: true do expect(section_ids.uniq).to eq(section_ids) end + + context 'with an example file' do + let(:file) do + <>>>>>> files/ruby/regex.rb +end + +# Some extra lines +# To force a match line +# To be created + +def path_regexp + default_regexp +end + +<<<<<<< files/ruby/regex.rb +def archive_formats_regexp + /(zip|tar|7z|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/ +======= +def archive_formats_regex + %r{(zip|tar|7z|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)} +>>>>>>> files/ruby/regex.rb +end + +def git_reference_regexp + # Valid git ref regexp, see: + # https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html + %r{ + (?! + (?# doesn't begins with) + \/| (?# rule #6) + (?# doesn't contain) + .*(?: + [\/.]\.| (?# rule #1,3) + \/\/| (?# rule #6) + @\{| (?# rule #8) + \\ (?# rule #9) + ) + ) + [^\000-\040\177~^:?*\[]+ (?# rule #4-5) + (?# doesn't end with) + (?>>>>>> files/ruby/regex.rb +end +FILE + end + + let(:conflict_file) { Gitlab::Conflict::File.new({ data: file }, conflict, merge_request: merge_request) } + let(:sections) { conflict_file.sections } + + it 'sets the correct match line headers' do + expect(sections[0][:lines].first).to have_attributes(type: 'match', text: '@@ -3,14 +3,14 @@') + expect(sections[3][:lines].first).to have_attributes(type: 'match', text: '@@ -19,26 +19,26 @@ def path_regexp') + expect(sections[6][:lines].first).to have_attributes(type: 'match', text: '@@ -47,52 +47,52 @@ end') + end + + it 'does not add match lines where they are not needed' do + expect(sections[1][:lines].first.type).not_to eq('match') + expect(sections[2][:lines].first.type).not_to eq('match') + expect(sections[4][:lines].first.type).not_to eq('match') + expect(sections[5][:lines].first.type).not_to eq('match') + expect(sections[7][:lines].first.type).not_to eq('match') + end + + it 'creates context sections of the correct length' do + expect(sections[0][:lines].reject(&:type).length).to eq(3) + expect(sections[2][:lines].reject(&:type).length).to eq(3) + expect(sections[3][:lines].reject(&:type).length).to eq(3) + expect(sections[5][:lines].reject(&:type).length).to eq(3) + expect(sections[6][:lines].reject(&:type).length).to eq(3) + expect(sections[8][:lines].reject(&:type).length).to eq(1) + end + end end describe '#as_json' do