gitlab-org--gitlab-foss/app/models/blob.rb
Yorick Peterse 0bc443e3b4
Handle encoding in non-binary Blob instances
gitlab_git 10.6.4 relies on Rugged marking blobs as binary or not,
instead of relying on Linguist. Linguist in turn would mark text blobs
as binary whenever they would contain byte sequences that could not be
encoded using UTF-8.

However, marking such blobs as binary is not correct. If one pushes a
Markdown document with invalid character sequences it's still a text
based Markdown document and not some random binary blob.

This commit overwrites Blob#data so it automatically converts text-based
content to UTF-8 (the encoding we use everywhere else) while taking care
of replacing any invalid sequences with the UTF-8 replacement character.
The data of binary blobs is left as-is.
2016-09-14 14:15:31 +02:00

68 lines
1.5 KiB
Ruby

# Blob is a Rails-specific wrapper around Gitlab::Git::Blob objects
class Blob < SimpleDelegator
CACHE_TIME = 60 # Cache raw blobs referred to by a (mutable) ref for 1 minute
CACHE_TIME_IMMUTABLE = 3600 # Cache blobs referred to by an immutable reference for 1 hour
# The maximum size of an SVG that can be displayed.
MAXIMUM_SVG_SIZE = 2.megabytes
# Wrap a Gitlab::Git::Blob object, or return nil when given nil
#
# This method prevents the decorated object from evaluating to "truthy" when
# given a nil value. For example:
#
# blob = Blob.new(nil)
# puts "truthy" if blob # => "truthy"
#
# blob = Blob.decorate(nil)
# puts "truthy" if blob # No output
def self.decorate(blob)
return if blob.nil?
new(blob)
end
# Returns the data of the blob.
#
# If the blob is a text based blob the content is converted to UTF-8 and any
# invalid byte sequences are replaced.
def data
if binary?
super
else
@data ||= super.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
end
end
def no_highlighting?
size && size > 1.megabyte
end
def only_display_raw?
size && truncated?
end
def svg?
text? && language && language.name == 'SVG'
end
def size_within_svg_limits?
size <= MAXIMUM_SVG_SIZE
end
def video?
UploaderHelper::VIDEO_EXT.include?(extname.downcase.delete('.'))
end
def to_partial_path
if lfs_pointer?
'download'
elsif image? || svg?
'image'
elsif text?
'text'
else
'download'
end
end
end