Avoind unnecesary force_encoding operations

They're costly. This will also avoid some edge cases where charlock_holmes assigns a weird encoding to a perfectly valid UTF-8 string.
2017-06-14 12:11:03 -04:00 · 2017-06-14 12:11:03 -04:00 · 520866a0d0
commit 520866a0d0
parent 00c15cc27c
2 changed files with 19 additions and 4 deletions
--- a/lib/gitlab/encoding_helper.rb
+++ b/lib/gitlab/encoding_helper.rb
@ -14,9 +14,9 @@ module Gitlab
    ENCODING_CONFIDENCE_THRESHOLD = 50

    def encode!(message)
-      return nil unless message.respond_to? :force_encoding
+      return nil unless message.respond_to?(:force_encoding)
+      return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?

-      # if message is utf-8 encoding, just return it
      message.force_encoding("UTF-8")
      return message if message.valid_encoding?

@ -50,6 +50,9 @@ module Gitlab
    end

    def encode_utf8(message)
+      return nil unless message.is_a?(String)
+      return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
+
      detect = CharlockHolmes::EncodingDetector.detect(message)
      if detect && detect[:encoding]
        begin
--- a/spec/lib/gitlab/encoding_helper_spec.rb
+++ b/spec/lib/gitlab/encoding_helper_spec.rb
@ -6,6 +6,9 @@ describe Gitlab::EncodingHelper do

  describe '#encode!' do
    [
+      ["nil", nil, nil],
+      ["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
+      ["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad string"],
      [
        'leaves ascii only string as is',
        'ascii only string',
@ -81,6 +84,9 @@ describe Gitlab::EncodingHelper do

  describe '#encode_utf8' do
    [
+      ["nil", nil, nil],
+      ["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
+      ["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad stringå"],
      [
        "encodes valid utf8 encoded string to utf8",
        "λ, λ, λ".encode("UTF-8"),
@ -95,12 +101,18 @@ describe Gitlab::EncodingHelper do
        "encodes valid ISO-8859-1 encoded string to utf8",
        "Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("ISO-8859-1", "UTF-8"),
        "Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("UTF-8")
+      ],
+      [
+        # Test case from https://gitlab.com/gitlab-org/gitlab-ce/issues/39227
+        "Equifax branch name",
+        "refs/heads/Equifax".encode("UTF-8"),
+        "refs/heads/Equifax".encode("UTF-8")
      ]
    ].each do |description, test_string, xpect|
      it description do
-        r = ext_class.encode_utf8(test_string.force_encoding('UTF-8'))
+        r = ext_class.encode_utf8(test_string)
        expect(r).to eq(xpect)
-        expect(r.encoding.name).to eq('UTF-8')
+        expect(r.encoding.name).to eq('UTF-8') if xpect
      end
    end