From 9980f52cb4ee32fbf1b132d605add5678e5ec067 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Fri, 9 Sep 2016 15:19:48 +0200 Subject: [PATCH 1/2] Update gitlab_git to 10.6.6 --- Gemfile | 2 +- Gemfile.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 81b7002027a..481e94ae569 100644 --- a/Gemfile +++ b/Gemfile @@ -53,7 +53,7 @@ gem 'browser', '~> 2.2' # Extracting information from a git repository # Provide access to Gitlab::Git library -gem 'gitlab_git', '~> 10.6.3' +gem 'gitlab_git', '~> 10.6.6' # LDAP Auth # GitLab fork with several improvements to original library. For full list of changes diff --git a/Gemfile.lock b/Gemfile.lock index c421713f6a1..a1346bcb5ce 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -279,7 +279,7 @@ GEM diff-lcs (~> 1.1) mime-types (>= 1.16, < 3) posix-spawn (~> 0.3) - gitlab_git (10.6.3) + gitlab_git (10.6.6) activesupport (~> 4.0) charlock_holmes (~> 0.7.3) github-linguist (~> 4.7.0) @@ -394,7 +394,7 @@ GEM mime-types (>= 1.16, < 4) mail_room (0.8.0) method_source (0.8.2) - mime-types (2.99.2) + mime-types (2.99.3) mimemagic (0.3.0) mini_portile2 (2.1.0) minitest (5.7.0) @@ -858,7 +858,7 @@ DEPENDENCIES github-linguist (~> 4.7.0) github-markup (~> 1.4) gitlab-flowdock-git-hook (~> 1.0.1) - gitlab_git (~> 10.6.3) + gitlab_git (~> 10.6.6) gitlab_meta (= 7.0) gitlab_omniauth-ldap (~> 1.2.1) gollum-lib (~> 4.2) From 0bc443e3b442b49cb6989282601d477c673c4412 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 12 Sep 2016 17:25:35 +0200 Subject: [PATCH 2/2] Handle encoding in non-binary Blob instances gitlab_git 10.6.4 relies on Rugged marking blobs as binary or not, instead of relying on Linguist. Linguist in turn would mark text blobs as binary whenever they would contain byte sequences that could not be encoded using UTF-8. However, marking such blobs as binary is not correct. If one pushes a Markdown document with invalid character sequences it's still a text based Markdown document and not some random binary blob. This commit overwrites Blob#data so it automatically converts text-based content to UTF-8 (the encoding we use everywhere else) while taking care of replacing any invalid sequences with the UTF-8 replacement character. The data of binary blobs is left as-is. --- app/models/blob.rb | 12 ++++++++++++ spec/models/blob_spec.rb | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/app/models/blob.rb b/app/models/blob.rb index 12cc5aaafba..ab92e820335 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -22,6 +22,18 @@ class Blob < SimpleDelegator new(blob) end + # Returns the data of the blob. + # + # If the blob is a text based blob the content is converted to UTF-8 and any + # invalid byte sequences are replaced. + def data + if binary? + super + else + @data ||= super.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) + end + end + def no_highlighting? size && size > 1.megabyte end diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index cee20234e1f..03d02b4d382 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require 'rails_helper' describe Blob do @@ -7,6 +8,25 @@ describe Blob do end end + describe '#data' do + context 'using a binary blob' do + it 'returns the data as-is' do + data = "\n\xFF\xB9\xC3" + blob = described_class.new(double(binary?: true, data: data)) + + expect(blob.data).to eq(data) + end + end + + context 'using a text blob' do + it 'converts the data to UTF-8' do + blob = described_class.new(double(binary?: false, data: "\n\xFF\xB9\xC3")) + + expect(blob.data).to eq("\n���") + end + end + end + describe '#svg?' do it 'is falsey when not text' do git_blob = double(text?: false)