Merge branch '41677-branch-name-omitted-due-to-bad-utf-8-conversion-by-gitaly-ref-handler' into 'master'
Resolve "Branch name omitted due to bad UTF-8 conversion by Gitaly ref handler" Closes #41677 See merge request gitlab-org/gitlab-ce!16243
This commit is contained in:
commit
f7afb14c11
|
@ -14,14 +14,7 @@ module Gitlab
|
|||
ENCODING_CONFIDENCE_THRESHOLD = 50
|
||||
|
||||
def encode!(message)
|
||||
return nil unless message.respond_to?(:force_encoding)
|
||||
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
|
||||
|
||||
if message.respond_to?(:frozen?) && message.frozen?
|
||||
message = message.dup
|
||||
end
|
||||
|
||||
message.force_encoding("UTF-8")
|
||||
message = force_encode_utf8(message)
|
||||
return message if message.valid_encoding?
|
||||
|
||||
# return message if message type is binary
|
||||
|
@ -35,6 +28,8 @@ module Gitlab
|
|||
|
||||
# encode and clean the bad chars
|
||||
message.replace clean(message)
|
||||
rescue ArgumentError
|
||||
return nil
|
||||
rescue
|
||||
encoding = detect ? detect[:encoding] : "unknown"
|
||||
"--broken encoding: #{encoding}"
|
||||
|
@ -54,8 +49,8 @@ module Gitlab
|
|||
end
|
||||
|
||||
def encode_utf8(message)
|
||||
return nil unless message.is_a?(String)
|
||||
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
|
||||
message = force_encode_utf8(message)
|
||||
return message if message.valid_encoding?
|
||||
|
||||
detect = CharlockHolmes::EncodingDetector.detect(message)
|
||||
if detect && detect[:encoding]
|
||||
|
@ -69,6 +64,8 @@ module Gitlab
|
|||
else
|
||||
clean(message)
|
||||
end
|
||||
rescue ArgumentError
|
||||
return nil
|
||||
end
|
||||
|
||||
def encode_binary(s)
|
||||
|
@ -83,6 +80,15 @@ module Gitlab
|
|||
|
||||
private
|
||||
|
||||
def force_encode_utf8(message)
|
||||
raise ArgumentError unless message.respond_to?(:force_encoding)
|
||||
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
|
||||
|
||||
message = message.dup if message.respond_to?(:frozen?) && message.frozen?
|
||||
|
||||
message.force_encoding("UTF-8")
|
||||
end
|
||||
|
||||
def clean(message)
|
||||
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
|
||||
.encode("UTF-8")
|
||||
|
|
|
@ -11,7 +11,7 @@ module Gitlab
|
|||
include Gitlab::EncodingHelper
|
||||
|
||||
def ref_name(ref)
|
||||
encode_utf8(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
|
||||
encode!(ref).sub(/\Arefs\/(tags|heads|remotes)\//, '')
|
||||
end
|
||||
|
||||
def branch_name(ref)
|
||||
|
|
|
@ -120,6 +120,24 @@ describe Gitlab::EncodingHelper do
|
|||
it 'returns empty string on conversion errors' do
|
||||
expect { ext_class.encode_utf8('') }.not_to raise_error(ArgumentError)
|
||||
end
|
||||
|
||||
context 'with strings that can be forcefully encoded into utf8' do
|
||||
let(:test_string) do
|
||||
"refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
|
||||
end
|
||||
let(:expected_string) do
|
||||
"refs/heads/FixSymbolsTitleDropdown".encode("UTF-8")
|
||||
end
|
||||
|
||||
subject { ext_class.encode_utf8(test_string) }
|
||||
|
||||
it "doesn't use CharlockHolmes if the encoding can be forced into utf_8" do
|
||||
expect(CharlockHolmes::EncodingDetector).not_to receive(:detect)
|
||||
|
||||
expect(subject).to eq(expected_string)
|
||||
expect(subject.encoding.name).to eq('UTF-8')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#clean' do
|
||||
|
|
Loading…
Reference in New Issue