From 1f2cc29c8f703cedc03645bc365fda068f33a86b Mon Sep 17 00:00:00 2001 From: Ahmad Sherif Date: Thu, 22 Mar 2018 21:22:20 +0100 Subject: [PATCH] Fix EncodingHelper#clean blowing up on UTF-16BE strings Closes gitaly#1101 --- lib/gitlab/encoding_helper.rb | 2 +- spec/lib/gitlab/encoding_helper_spec.rb | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 6659efa0961..0b8f6cfe3cb 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -90,7 +90,7 @@ module Gitlab end def clean(message) - message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "") + message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "".encode("UTF-16BE")) .encode("UTF-8") .gsub("\0".encode("UTF-8"), "") end diff --git a/spec/lib/gitlab/encoding_helper_spec.rb b/spec/lib/gitlab/encoding_helper_spec.rb index 83d431a7458..e68c9850f6b 100644 --- a/spec/lib/gitlab/encoding_helper_spec.rb +++ b/spec/lib/gitlab/encoding_helper_spec.rb @@ -161,6 +161,11 @@ describe Gitlab::EncodingHelper do 'removes invalid bytes from ASCII-8bit encoded multibyte string.', "Lorem ipsum\xC3\n dolor sit amet, xy\xC3\xA0y\xC3\xB9abcd\xC3\xB9efg".force_encoding('ASCII-8BIT'), "Lorem ipsum\n dolor sit amet, xyàyùabcdùefg" + ], + [ + 'handles UTF-16BE encoded strings', + "\xFE\xFF\x00\x41".force_encoding('ASCII-8BIT'), # An "A" prepended with UTF-16 BOM + "\xEF\xBB\xBFA" # An "A" prepended with UTF-8 BOM ] ].each do |description, test_string, xpect| it description do