2018-11-05 23:45:35 -05:00
# frozen_string_literal: true
2014-09-08 09:25:50 -04:00
module Gitlab
module Diff
class File
2018-12-13 12:49:05 -05:00
include Gitlab :: Utils :: StrongMemoize
2018-12-16 11:00:43 -05:00
attr_reader :diff , :repository , :diff_refs , :fallback_diff_refs , :unique_identifier
2014-09-08 09:25:50 -04:00
2017-05-15 13:10:29 -04:00
delegate :new_file? , :deleted_file? , :renamed_file? ,
:old_path , :new_path , :a_mode , :b_mode , :mode_changed? ,
2017-09-11 08:52:27 -04:00
:submodule? , :expanded? , :too_large? , :collapsed? , :line_count , :has_binary_notice? , to : :diff , prefix : false
2017-06-06 17:28:06 -04:00
# Finding a viewer for a diff file happens based only on extension and whether the
# diff file blobs are binary or text, which means 1 diff file should only be matched by 1 viewer,
# and the order of these viewers doesn't really matter.
#
# However, when the diff file blobs are LFS pointers, we cannot know for sure whether the
# file being pointed to is binary or text. In this case, we match only on
# extension, preferring binary viewers over text ones if both exist, since the
# large files referred to in "Large File Storage" are much more likely to be
# binary than text.
RICH_VIEWERS = [
DiffViewer :: Image
] . sort_by { | v | v . binary? ? 0 : 1 } . freeze
2014-09-08 09:25:50 -04:00
2018-12-16 11:00:43 -05:00
def initialize (
diff ,
repository : ,
diff_refs : nil ,
fallback_diff_refs : nil ,
stats : nil ,
unique_identifier : nil )
2014-09-08 09:25:50 -04:00
@diff = diff
2018-09-19 08:26:28 -04:00
@stats = stats
2016-06-20 12:51:48 -04:00
@repository = repository
2016-01-20 12:44:27 -05:00
@diff_refs = diff_refs
2017-05-15 14:19:49 -04:00
@fallback_diff_refs = fallback_diff_refs
2018-12-16 11:00:43 -05:00
@unique_identifier = unique_identifier
2018-10-16 12:21:16 -04:00
@unfolded = false
2017-11-03 09:16:43 -04:00
# Ensure items are collected in the the batch
2018-03-16 11:35:58 -04:00
new_blob_lazy
old_blob_lazy
2021-11-01 14:12:42 -04:00
2021-12-10 13:14:42 -05:00
diff . diff = Gitlab :: Diff :: CustomDiff . preprocess_before_diff ( diff . new_path , old_blob_lazy , new_blob_lazy ) || diff . diff if use_custom_diff?
end
def use_custom_diff?
strong_memoize ( :_custom_diff_enabled ) { Feature . enabled? ( :jupyter_clean_diffs , repository . project , default_enabled : true ) }
2016-01-20 12:44:27 -05:00
end
2017-10-07 00:25:17 -04:00
def position ( position_marker , position_type : :text )
2016-06-20 13:17:25 -04:00
return unless diff_refs
2017-10-07 00:25:17 -04:00
data = {
diff_refs : diff_refs ,
position_type : position_type . to_s ,
2016-06-20 13:17:25 -04:00
old_path : old_path ,
2017-10-07 00:25:17 -04:00
new_path : new_path
}
if position_type == :text
data . merge! ( text_position_properties ( position_marker ) )
else
data . merge! ( image_position_properties ( position_marker ) )
end
Position . new ( data )
2016-06-20 13:17:25 -04:00
end
2016-06-20 13:15:44 -04:00
def line_code ( line )
return if line . meta?
2017-10-10 13:44:14 -04:00
Gitlab :: Git . diff_line_code ( file_path , line . new_pos , line . old_pos )
2016-06-20 13:15:44 -04:00
end
def line_for_line_code ( code )
diff_lines . find { | line | line_code ( line ) == code }
end
2016-06-20 13:17:25 -04:00
def line_for_position ( pos )
2019-02-08 07:19:53 -05:00
return unless pos . position_type == 'text'
2017-12-22 06:49:56 -05:00
2018-12-16 11:00:43 -05:00
# This method is normally used to find which line the diff was
# commented on, and in this context, it's normally the raw diff persisted
# at `note_diff_files`, which is a fraction of the entire diff
# (it goes from the first line, to the commented line, or
# one line below). Therefore it's more performant to fetch
# from bottom to top instead of the other way around.
diff_lines
. reverse_each
. find { | line | line . old_line == pos . old_line && line . new_line == pos . new_line }
2016-06-20 13:17:25 -04:00
end
def position_for_line_code ( code )
line = line_for_line_code ( code )
position ( line ) if line
end
def line_code_for_position ( pos )
line = line_for_position ( pos )
line_code ( line ) if line
end
2018-05-16 11:46:18 -04:00
# Returns the raw diff content up to the given line index
def diff_hunk ( diff_line )
2018-06-04 18:20:58 -04:00
diff_line_index = diff_line . index
# @@ (match) header is not kept if it's found in the top of the file,
# therefore we should keep an extra line on this scenario.
diff_line_index += 1 unless diff_lines . first . match?
diff_lines . select { | line | line . index < = diff_line_index } . map ( & :text ) . join ( " \n " )
2018-05-16 11:46:18 -04:00
end
2017-05-15 14:19:49 -04:00
def old_sha
diff_refs & . base_sha
end
def new_sha
diff_refs & . head_sha
end
2017-06-06 17:24:32 -04:00
def new_content_sha
return if deleted_file?
return @new_content_sha if defined? ( @new_content_sha )
2017-05-15 14:19:49 -04:00
refs = diff_refs || fallback_diff_refs
2017-06-06 17:24:32 -04:00
@new_content_sha = refs & . head_sha
2017-05-15 14:19:49 -04:00
end
def old_content_sha
return if new_file?
return @old_content_sha if defined? ( @old_content_sha )
2016-07-06 19:29:41 -04:00
2017-05-15 14:19:49 -04:00
refs = diff_refs || fallback_diff_refs
@old_content_sha = refs & . base_sha
2016-06-20 12:54:53 -04:00
end
2017-06-06 17:24:32 -04:00
def new_blob
2019-03-28 08:11:37 -04:00
strong_memoize ( :new_blob ) do
new_blob_lazy & . itself
end
2016-01-20 12:44:27 -05:00
end
2017-05-15 14:19:49 -04:00
def old_blob
2019-03-28 08:11:37 -04:00
strong_memoize ( :old_blob ) do
old_blob_lazy & . itself
end
2014-09-08 09:25:50 -04:00
end
2018-12-13 14:17:19 -05:00
def new_blob_lines_between ( from_line , to_line )
return [ ] unless new_blob
from_index = from_line - 1
to_index = to_line - 1
new_blob . load_all_data!
new_blob . data . lines [ from_index .. to_index ]
end
2017-06-06 17:24:32 -04:00
def content_sha
new_content_sha || old_content_sha
end
def blob
2018-03-16 11:35:58 -04:00
new_blob || old_blob
2017-06-06 17:24:32 -04:00
end
2019-03-21 09:38:52 -04:00
def highlighted_diff_lines = ( value )
clear_memoization ( :diff_lines_for_serializer )
@highlighted_diff_lines = value
end
2016-07-20 12:25:36 -04:00
2016-06-20 12:51:48 -04:00
# Array of Gitlab::Diff::Line objects
2014-09-08 09:25:50 -04:00
def diff_lines
2018-06-21 08:22:40 -04:00
@diff_lines || =
Gitlab :: Diff :: Parser . new . parse ( raw_diff . each_line , diff_file : self ) . to_a
2016-03-11 11:40:59 -05:00
end
2018-10-16 12:21:16 -04:00
# Changes diff_lines according to the given position. That is,
# it checks whether the position requires blob lines into the diff
# in order to be presented.
def unfold_diff_lines ( position )
return unless position
unfolder = Gitlab :: Diff :: LinesUnfolder . new ( self , position )
if unfolder . unfold_required?
@diff_lines = unfolder . unfolded_diff_lines
@unfolded = true
end
end
def unfolded?
@unfolded
end
2018-12-16 11:00:43 -05:00
def highlight_loaded?
@highlighted_diff_lines . present?
end
2015-12-30 00:52:50 -05:00
def highlighted_diff_lines
2018-06-21 08:22:40 -04:00
@highlighted_diff_lines || =
Gitlab :: Diff :: Highlight . new ( self , repository : self . repository ) . highlight
2015-12-30 00:52:50 -05:00
end
2021-10-14 23:12:17 -04:00
# Array[<Hash>] with right/left keys that contains Gitlab::Diff::Line objects which text is highlighted
2016-01-20 08:51:56 -05:00
def parallel_diff_lines
2016-06-20 12:51:48 -04:00
@parallel_diff_lines || = Gitlab :: Diff :: ParallelDiff . new ( self ) . parallelize
2016-01-20 08:51:56 -05:00
end
2014-09-08 14:54:52 -04:00
def raw_diff
2014-09-24 06:26:53 -04:00
diff . diff . to_s
2014-09-08 14:54:52 -04:00
end
2014-09-08 09:25:50 -04:00
def next_line ( index )
diff_lines [ index + 1 ]
end
def prev_line ( index )
2016-06-20 12:57:10 -04:00
diff_lines [ index - 1 ] if index > 0
2014-09-08 09:25:50 -04:00
end
2014-09-08 14:54:52 -04:00
2016-06-20 13:17:25 -04:00
def paths
[ old_path , new_path ] . compact
end
2014-09-08 14:54:52 -04:00
def file_path
2016-06-20 12:57:10 -04:00
new_path . presence || old_path
2014-09-08 14:54:52 -04:00
end
2015-10-01 07:52:08 -04:00
2020-05-22 08:08:15 -04:00
def file_hash
Digest :: SHA1 . hexdigest ( file_path )
end
2015-10-01 07:52:08 -04:00
def added_lines
2020-06-24 11:08:50 -04:00
strong_memoize ( :added_lines ) do
@stats & . additions || diff_lines . count ( & :added? )
end
2015-10-01 07:52:08 -04:00
end
def removed_lines
2020-06-24 11:08:50 -04:00
strong_memoize ( :removed_lines ) do
@stats & . deletions || diff_lines . count ( & :removed? )
end
2015-10-01 07:52:08 -04:00
end
2016-06-20 12:54:53 -04:00
2016-11-10 12:24:12 -05:00
def file_identifier
2017-05-15 13:10:29 -04:00
" #{ file_path } - #{ new_file? } - #{ deleted_file? } - #{ renamed_file? } "
2016-10-12 09:34:47 -04:00
end
2017-06-06 17:24:32 -04:00
2020-05-22 08:08:15 -04:00
def file_identifier_hash
Digest :: SHA1 . hexdigest ( file_identifier )
end
2017-06-06 17:24:32 -04:00
def diffable?
2021-07-01 17:08:38 -04:00
diffable_by_attribute? && ! text_with_binary_notice?
2017-06-06 17:24:32 -04:00
end
2018-12-13 12:49:05 -05:00
def binary_in_repo?
has_binary_notice? || try_blobs ( :binary_in_repo? )
2017-06-06 17:24:32 -04:00
end
2018-12-13 12:49:05 -05:00
def text_in_repo?
! binary_in_repo?
2017-06-06 17:24:32 -04:00
end
2017-06-06 17:28:06 -04:00
def external_storage_error?
2018-01-03 09:18:13 -05:00
try_blobs ( :external_storage_error? )
2017-06-06 17:28:06 -04:00
end
def stored_externally?
2018-01-03 09:18:13 -05:00
try_blobs ( :stored_externally? )
2017-06-06 17:28:06 -04:00
end
def external_storage
2018-01-03 09:18:13 -05:00
try_blobs ( :external_storage )
2017-06-06 17:28:06 -04:00
end
def content_changed?
2017-08-22 11:27:09 -04:00
return blobs_changed? if diff_refs
return false if new_file? || deleted_file? || renamed_file?
text? && diff_lines . any?
2017-06-06 17:28:06 -04:00
end
def different_type?
old_blob && new_blob && old_blob . binary? != new_blob . binary?
end
2018-08-27 11:31:01 -04:00
# rubocop: disable CodeReuse/ActiveRecord
2017-06-06 17:28:06 -04:00
def size
2018-01-03 09:18:13 -05:00
valid_blobs . map ( & :size ) . sum
2017-06-06 17:28:06 -04:00
end
2018-08-27 11:31:01 -04:00
# rubocop: enable CodeReuse/ActiveRecord
2017-06-06 17:28:06 -04:00
2018-08-27 11:31:01 -04:00
# rubocop: disable CodeReuse/ActiveRecord
2017-06-06 17:28:06 -04:00
def raw_size
2018-01-03 09:18:13 -05:00
valid_blobs . map ( & :raw_size ) . sum
2017-06-06 17:28:06 -04:00
end
2018-08-27 11:31:01 -04:00
# rubocop: enable CodeReuse/ActiveRecord
2017-06-06 17:28:06 -04:00
2018-12-08 00:50:56 -05:00
def empty?
valid_blobs . map ( & :empty? ) . all?
end
2018-12-13 12:49:05 -05:00
def binary?
strong_memoize ( :is_binary ) do
try_blobs ( :binary? )
end
2017-06-06 17:28:06 -04:00
end
2018-12-13 12:49:05 -05:00
def text?
strong_memoize ( :is_text ) do
! binary? && ! different_type?
end
2017-06-06 17:28:06 -04:00
end
2019-02-15 12:56:50 -05:00
def viewer
rich_viewer || simple_viewer
end
2017-06-06 17:28:06 -04:00
def simple_viewer
@simple_viewer || = simple_viewer_class . new ( self )
end
def rich_viewer
return @rich_viewer if defined? ( @rich_viewer )
@rich_viewer = rich_viewer_class & . new ( self )
end
2020-04-28 08:09:44 -04:00
def alternate_viewer
alternate_viewer_class & . new ( self )
end
2017-06-06 17:28:06 -04:00
def rendered_as_text? ( ignore_errors : true )
simple_viewer . is_a? ( DiffViewer :: Text ) && ( ignore_errors || simple_viewer . render_error . nil? )
end
2018-06-21 08:22:40 -04:00
# This adds the bottom match line to the array if needed. It contains
# the data to load more context lines.
def diff_lines_for_serializer
2019-03-21 09:38:52 -04:00
strong_memoize ( :diff_lines_for_serializer ) do
lines = highlighted_diff_lines
2018-06-21 08:22:40 -04:00
2019-03-21 09:38:52 -04:00
next if lines . empty?
next if blob . nil?
2018-06-21 08:22:40 -04:00
2019-03-21 09:38:52 -04:00
last_line = lines . last
2018-06-21 08:22:40 -04:00
2019-03-21 09:38:52 -04:00
if last_line . new_pos < total_blob_lines ( blob ) && ! deleted_file?
match_line = Gitlab :: Diff :: Line . new ( " " , 'match' , nil , last_line . old_pos , last_line . new_pos )
lines . push ( match_line )
end
2018-06-21 08:22:40 -04:00
2019-03-21 09:38:52 -04:00
lines
end
2018-06-21 08:22:40 -04:00
end
2019-02-28 09:14:15 -05:00
def fully_expanded?
return true if binary?
lines = diff_lines_for_serializer
return true if lines . nil?
lines . none? { | line | line . type . to_s == 'match' }
end
2017-06-06 17:28:06 -04:00
private
2021-07-01 17:08:38 -04:00
def diffable_by_attribute?
repository . attributes ( file_path ) . fetch ( 'diff' ) { true }
end
# NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff.
def text_with_binary_notice?
text? && has_binary_notice?
end
2020-02-02 01:08:56 -05:00
def fetch_blob ( sha , path )
return unless sha
2020-05-18 14:08:22 -04:00
Blob . lazy ( repository , sha , path )
2020-02-02 01:08:56 -05:00
end
2018-06-21 08:22:40 -04:00
def total_blob_lines ( blob )
@total_lines || = begin
line_count = blob . lines . size
line_count -= 1 if line_count > 0 && blob . lines . last . blank?
line_count
end
end
2020-01-15 16:08:48 -05:00
def modified_file?
new_file? || deleted_file? || content_changed?
end
2018-03-16 11:35:58 -04:00
# We can't use Object#try because Blob doesn't inherit from Object, but
# from BasicObject (via SimpleDelegator).
2018-01-03 09:18:13 -05:00
def try_blobs ( meth )
2018-03-16 11:35:58 -04:00
old_blob & . public_send ( meth ) || new_blob & . public_send ( meth )
2018-01-03 09:18:13 -05:00
end
def valid_blobs
2018-03-16 11:35:58 -04:00
[ old_blob , new_blob ] . compact
2018-01-03 09:18:13 -05:00
end
2017-10-07 00:25:17 -04:00
def text_position_properties ( line )
{ old_line : line . old_line , new_line : line . new_line }
end
def image_position_properties ( image_point )
image_point . to_h
end
2017-08-22 11:27:09 -04:00
def blobs_changed?
old_blob && new_blob && old_blob . id != new_blob . id
end
2018-03-16 11:35:58 -04:00
def new_blob_lazy
2020-02-02 01:08:56 -05:00
fetch_blob ( new_content_sha , file_path )
2018-03-16 11:35:58 -04:00
end
def old_blob_lazy
2020-02-02 01:08:56 -05:00
fetch_blob ( old_content_sha , old_path )
2018-03-16 11:35:58 -04:00
end
2017-06-06 17:28:06 -04:00
def simple_viewer_class
2020-01-15 16:08:48 -05:00
return DiffViewer :: Collapsed if collapsed?
2017-06-06 17:28:06 -04:00
return DiffViewer :: NotDiffable unless diffable?
2020-01-15 16:08:48 -05:00
return DiffViewer :: Text if modified_file? && text?
return DiffViewer :: NoPreview if content_changed?
return DiffViewer :: Added if new_file?
return DiffViewer :: Deleted if deleted_file?
return DiffViewer :: Renamed if renamed_file?
return DiffViewer :: ModeChanged if mode_changed?
2017-06-06 17:28:06 -04:00
2020-01-15 16:08:48 -05:00
DiffViewer :: NoPreview
2017-06-06 17:28:06 -04:00
end
def rich_viewer_class
viewer_class_from ( RICH_VIEWERS )
end
def viewer_class_from ( classes )
2020-01-15 16:08:48 -05:00
return if collapsed?
2017-06-06 17:28:06 -04:00
return unless diffable?
2020-01-15 16:08:48 -05:00
return unless modified_file?
2020-04-28 08:09:44 -04:00
find_renderable_viewer_class ( classes )
end
def alternate_viewer_class
2021-07-05 05:07:37 -04:00
return unless viewer . instance_of? ( DiffViewer :: Renamed )
2020-04-28 08:09:44 -04:00
find_renderable_viewer_class ( RICH_VIEWERS ) || ( DiffViewer :: Text if text? )
end
def find_renderable_viewer_class ( classes )
2019-09-26 05:06:04 -04:00
return if different_type? || external_storage_error?
2017-06-06 17:28:06 -04:00
verify_binary = ! stored_externally?
classes . find { | viewer_class | viewer_class . can_render? ( self , verify_binary : verify_binary ) }
end
2014-09-08 09:25:50 -04:00
end
end
end