96c77bf775
We wanted to check that the text could be encoded as JSON, because conflict resolutions are passed back and forth in that format, so the file itself must be UTF-8. However, all strings from the repository come back without an encoding from Rugged, making them ASCII_8BIT. We force to UTF-8, and reject if it's invalid. This still leaves the problem of a file that 'looks like' UTF-8 (contains valid UTF-8 byte sequences), but isn't. However: 1. If the conflicts contain the problem bytes, the user will see that the file isn't displayed correctly. 2. If the problem bytes are outside of the conflict area, then we will write back the same bytes when we resolve the conflicts, even though we though the encoding was UTF-8.
222 lines
7.4 KiB
Ruby
222 lines
7.4 KiB
Ruby
require 'spec_helper'
|
|
|
|
describe Gitlab::Conflict::Parser, lib: true do
|
|
let(:parser) { Gitlab::Conflict::Parser.new }
|
|
|
|
describe '#parse' do
|
|
def parse_text(text)
|
|
parser.parse(text, our_path: 'README.md', their_path: 'README.md')
|
|
end
|
|
|
|
context 'when the file has valid conflicts' do
|
|
let(:text) do
|
|
<<CONFLICT
|
|
module Gitlab
|
|
module Regexp
|
|
extend self
|
|
|
|
def username_regexp
|
|
default_regexp
|
|
end
|
|
|
|
<<<<<<< files/ruby/regex.rb
|
|
def project_name_regexp
|
|
/\A[a-zA-Z0-9][a-zA-Z0-9_\-\. ]*\z/
|
|
end
|
|
|
|
def name_regexp
|
|
/\A[a-zA-Z0-9_\-\. ]*\z/
|
|
=======
|
|
def project_name_regex
|
|
%r{\A[a-zA-Z0-9][a-zA-Z0-9_\-\. ]*\z}
|
|
end
|
|
|
|
def name_regex
|
|
%r{\A[a-zA-Z0-9_\-\. ]*\z}
|
|
>>>>>>> files/ruby/regex.rb
|
|
end
|
|
|
|
def path_regexp
|
|
default_regexp
|
|
end
|
|
|
|
<<<<<<< files/ruby/regex.rb
|
|
def archive_formats_regexp
|
|
/(zip|tar|7z|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)/
|
|
=======
|
|
def archive_formats_regex
|
|
%r{(zip|tar|7z|tar\.gz|tgz|gz|tar\.bz2|tbz|tbz2|tb2|bz2)}
|
|
>>>>>>> files/ruby/regex.rb
|
|
end
|
|
|
|
def git_reference_regexp
|
|
# Valid git ref regexp, see:
|
|
# https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
|
|
%r{
|
|
(?!
|
|
(?# doesn't begins with)
|
|
\/| (?# rule #6)
|
|
(?# doesn't contain)
|
|
.*(?:
|
|
[\/.]\.| (?# rule #1,3)
|
|
\/\/| (?# rule #6)
|
|
@\{| (?# rule #8)
|
|
\\ (?# rule #9)
|
|
)
|
|
)
|
|
[^\000-\040\177~^:?*\[]+ (?# rule #4-5)
|
|
(?# doesn't end with)
|
|
(?<!\.lock) (?# rule #1)
|
|
(?<![\/.]) (?# rule #6-7)
|
|
}x
|
|
end
|
|
|
|
protected
|
|
|
|
<<<<<<< files/ruby/regex.rb
|
|
def default_regexp
|
|
/\A[.?]?[a-zA-Z0-9][a-zA-Z0-9_\-\.]*(?<!\.git)\z/
|
|
=======
|
|
def default_regex
|
|
%r{\A[.?]?[a-zA-Z0-9][a-zA-Z0-9_\-\.]*(?<!\.git)\z}
|
|
>>>>>>> files/ruby/regex.rb
|
|
end
|
|
end
|
|
end
|
|
CONFLICT
|
|
end
|
|
|
|
let(:lines) do
|
|
parser.parse(text, our_path: 'files/ruby/regex.rb', their_path: 'files/ruby/regex.rb')
|
|
end
|
|
|
|
it 'sets our lines as new lines' do
|
|
expect(lines[8..13]).to all(have_attributes(type: 'new'))
|
|
expect(lines[26..27]).to all(have_attributes(type: 'new'))
|
|
expect(lines[56..57]).to all(have_attributes(type: 'new'))
|
|
end
|
|
|
|
it 'sets their lines as old lines' do
|
|
expect(lines[14..19]).to all(have_attributes(type: 'old'))
|
|
expect(lines[28..29]).to all(have_attributes(type: 'old'))
|
|
expect(lines[58..59]).to all(have_attributes(type: 'old'))
|
|
end
|
|
|
|
it 'sets non-conflicted lines as both' do
|
|
expect(lines[0..7]).to all(have_attributes(type: nil))
|
|
expect(lines[20..25]).to all(have_attributes(type: nil))
|
|
expect(lines[30..55]).to all(have_attributes(type: nil))
|
|
expect(lines[60..62]).to all(have_attributes(type: nil))
|
|
end
|
|
|
|
it 'sets consecutive line numbers for index, old_pos, and new_pos' do
|
|
old_line_numbers = lines.select { |line| line.type != 'new' }.map(&:old_pos)
|
|
new_line_numbers = lines.select { |line| line.type != 'old' }.map(&:new_pos)
|
|
|
|
expect(lines.map(&:index)).to eq(0.upto(62).to_a)
|
|
expect(old_line_numbers).to eq(1.upto(53).to_a)
|
|
expect(new_line_numbers).to eq(1.upto(53).to_a)
|
|
end
|
|
end
|
|
|
|
context 'when the file contents include conflict delimiters' do
|
|
context 'when there is a non-start delimiter first' do
|
|
it 'raises UnexpectedDelimiter when there is a middle delimiter first' do
|
|
expect { parse_text('=======') }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'raises UnexpectedDelimiter when there is an end delimiter first' do
|
|
expect { parse_text('>>>>>>> README.md') }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'does not raise when there is an end delimiter for a different path first' do
|
|
expect { parse_text('>>>>>>> some-other-path.md') }.
|
|
not_to raise_error
|
|
end
|
|
end
|
|
|
|
context 'when a start delimiter is followed by a non-middle delimiter' do
|
|
let(:start_text) { "<<<<<<< README.md\n" }
|
|
let(:end_text) { "\n=======\n>>>>>>> README.md" }
|
|
|
|
it 'raises UnexpectedDelimiter when it is followed by an end delimiter' do
|
|
expect { parse_text(start_text + '>>>>>>> README.md' + end_text) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'raises UnexpectedDelimiter when it is followed by another start delimiter' do
|
|
expect { parse_text(start_text + start_text + end_text) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'does not raise when it is followed by a start delimiter for a different path' do
|
|
expect { parse_text(start_text + '>>>>>>> some-other-path.md' + end_text) }.
|
|
not_to raise_error
|
|
end
|
|
end
|
|
|
|
context 'when a middle delimiter is followed by a non-end delimiter' do
|
|
let(:start_text) { "<<<<<<< README.md\n=======\n" }
|
|
let(:end_text) { "\n>>>>>>> README.md" }
|
|
|
|
it 'raises UnexpectedDelimiter when it is followed by another middle delimiter' do
|
|
expect { parse_text(start_text + '=======' + end_text) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'raises UnexpectedDelimiter when it is followed by a start delimiter' do
|
|
expect { parse_text(start_text + start_text + end_text) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnexpectedDelimiter)
|
|
end
|
|
|
|
it 'does not raise when it is followed by a start delimiter for another path' do
|
|
expect { parse_text(start_text + '<<<<<<< some-other-path.md' + end_text) }.
|
|
not_to raise_error
|
|
end
|
|
end
|
|
|
|
it 'raises MissingEndDelimiter when there is no end delimiter at the end' do
|
|
start_text = "<<<<<<< README.md\n=======\n"
|
|
|
|
expect { parse_text(start_text) }.
|
|
to raise_error(Gitlab::Conflict::Parser::MissingEndDelimiter)
|
|
|
|
expect { parse_text(start_text + '>>>>>>> some-other-path.md') }.
|
|
to raise_error(Gitlab::Conflict::Parser::MissingEndDelimiter)
|
|
end
|
|
end
|
|
|
|
context 'other file types' do
|
|
it 'raises UnmergeableFile when lines is blank, indicating a binary file' do
|
|
expect { parse_text('') }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnmergeableFile)
|
|
|
|
expect { parse_text(nil) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnmergeableFile)
|
|
end
|
|
|
|
it 'raises UnmergeableFile when the file is over 200 KB' do
|
|
expect { parse_text('a' * 204801) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnmergeableFile)
|
|
end
|
|
|
|
# All text from Rugged has an encoding of ASCII_8BIT, so force that in
|
|
# these strings.
|
|
context 'when the file contains UTF-8 characters' do
|
|
it 'does not raise' do
|
|
expect { parse_text("Espa\xC3\xB1a".force_encoding(Encoding::ASCII_8BIT)) }.
|
|
not_to raise_error
|
|
end
|
|
end
|
|
|
|
context 'when the file contains non-UTF-8 characters' do
|
|
it 'raises UnsupportedEncoding' do
|
|
expect { parse_text("a\xC4\xFC".force_encoding(Encoding::ASCII_8BIT)) }.
|
|
to raise_error(Gitlab::Conflict::Parser::UnsupportedEncoding)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|