From bca72f5906ed38dc231ef066231238758c1cb42d Mon Sep 17 00:00:00 2001 From: "micael.bergeron" Date: Sun, 3 Sep 2017 07:45:44 -0400 Subject: [PATCH] wip: fake its a binary diff --- lib/api/commits.rb | 2 +- lib/api/entities.rb | 3 ++- lib/gitlab/encoding_helper.rb | 16 ++++++++++++---- lib/gitlab/git/diff.rb | 16 +++++++++++++++- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/api/commits.rb b/lib/api/commits.rb index ea78737288a..4b8d248f5f7 100644 --- a/lib/api/commits.rb +++ b/lib/api/commits.rb @@ -104,7 +104,7 @@ module API not_found! 'Commit' unless commit - commit.raw_diffs.to_a + present commit.raw_diffs.to_a, with: Entities::RepoDiff end desc "Get a commit's comments" do diff --git a/lib/api/entities.rb b/lib/api/entities.rb index 9114b69606b..0c63dc345c2 100644 --- a/lib/api/entities.rb +++ b/lib/api/entities.rb @@ -291,10 +291,11 @@ module API end class RepoDiff < Grape::Entity - expose :old_path, :new_path, :a_mode, :b_mode, :diff + expose :old_path, :new_path, :a_mode, :b_mode expose :new_file?, as: :new_file expose :renamed_file?, as: :renamed_file expose :deleted_file?, as: :deleted_file + expose :diff end class ProtectedRefAccess < Grape::Entity diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 8ddc91e341d..c5e173ba55a 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -13,6 +13,8 @@ module Gitlab # https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193 ENCODING_CONFIDENCE_THRESHOLD = 50 + # + # def encode!(message) return nil unless message.respond_to? :force_encoding @@ -22,20 +24,26 @@ module Gitlab # return message if message type is binary detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if detect && detect[:type] == :binary + return message.force_encoding("BINARY") if binary?(message, detect) - # force detected encoding if we have sufficient confidence. if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + # force detected encoding if we have sufficient confidence. message.force_encoding(detect[:encoding]) end # encode and clean the bad chars message.replace clean(message) - rescue + rescue => e + byebug encoding = detect ? detect[:encoding] : "unknown" "--broken encoding: #{encoding}" end + def binary?(message, detect=nil) + detect ||= CharlockHolmes::EncodingDetector.detect(message) + detect && detect[:type] == :binary && detect[:confidence] == 100 + end + def encode_utf8(message) detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] @@ -50,7 +58,7 @@ module Gitlab clean(message) end end - + private def clean(message) diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index ce3d65062e8..e544f255a5d 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -116,6 +116,13 @@ module Gitlab filtered_opts end + + # Return a binary diff message like: + # + # "Binary files a/file/path and b/file/path differ\n" + def binary_message(old_path, new_path) + "Binary files #{old_path} and #{new_path} differ\n" + end end def initialize(raw_diff, expanded: true) @@ -214,7 +221,14 @@ module Gitlab # binary we're not going to display anything so we skip the size check. return if !patch.delta.binary? && prune_large_patch(patch) - @diff = encode!(strip_diff_headers(patch.to_s)) + diff = strip_diff_headers(patch.to_s) + @diff = if binary?(diff) + # the diff is binary, let's make a message for it + Diff::binary_message(patch.delta.old_file[:path], + patch.delta.new_file[:path]) + else + encode!(diff) + end end def init_from_hash(hash)