require_relative 'encoding_helper' require_relative 'path_helper' module Gitlab module Git class Blob include Linguist::BlobHelper include Gitlab::Git::EncodingHelper # This number is the maximum amount of data that we want to display to # the user. We load as much as we can for encoding detection # (Linguist) and LFS pointer parsing. All other cases where we need full # blob data should use load_all_data!. MAX_DATA_DISPLAY_SIZE = 10485760 attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary class << self def find(repository, sha, path) commit = repository.lookup(sha) root_tree = commit.tree blob_entry = find_entry_by_path(repository, root_tree.oid, path) return nil unless blob_entry if blob_entry[:type] == :commit submodule_blob(blob_entry, path, sha) else blob = repository.lookup(blob_entry[:oid]) if blob Blob.new( id: blob.oid, name: blob_entry[:name], size: blob.size, data: blob.content(MAX_DATA_DISPLAY_SIZE), mode: blob_entry[:filemode].to_s(8), path: path, commit_id: sha, binary: blob.binary? ) end end end def raw(repository, sha) blob = repository.lookup(sha) Blob.new( id: blob.oid, size: blob.size, data: blob.content(MAX_DATA_DISPLAY_SIZE), binary: blob.binary? ) end # Recursive search of blob id by path # # Ex. # blog/ # oid: 1a # app/ # oid: 2a # models/ # oid: 3a # file.rb # oid: 4a # # # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a' # def find_entry_by_path(repository, root_id, path) root_tree = repository.lookup(root_id) # Strip leading slashes path[/^\/*/] = '' path_arr = path.split('/') entry = root_tree.find do |entry| entry[:name] == path_arr[0] end return nil unless entry if path_arr.size > 1 return nil unless entry[:type] == :tree path_arr.shift find_entry_by_path(repository, entry[:oid], path_arr.join('/')) else [:blob, :commit].include?(entry[:type]) ? entry : nil end end def submodule_blob(blob_entry, path, sha) Blob.new( id: blob_entry[:oid], name: blob_entry[:name], data: '', path: path, commit_id: sha, ) end # Commit file in repository and return commit sha # # options should contain next structure: # file: { # content: 'Lorem ipsum...', # path: 'documents/story.txt', # update: true # }, # author: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # committer: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # commit: { # message: 'Wow such commit', # branch: 'master', # update_ref: false # } # # rubocop:disable Metrics/AbcSize # rubocop:disable Metrics/CyclomaticComplexity # rubocop:disable Metrics/PerceivedComplexity def commit(repository, options, action = :add) file = options[:file] update = file[:update].nil? ? true : file[:update] author = options[:author] committer = options[:committer] commit = options[:commit] repo = repository.rugged ref = commit[:branch] update_ref = commit[:update_ref].nil? ? true : commit[:update_ref] parents = [] mode = 0o100644 unless ref.start_with?('refs/') ref = 'refs/heads/' + ref end path_name = PathHelper.normalize_path(file[:path]) # Abort if any invalid characters remain (e.g. ../foo) raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..') filename = path_name.to_s index = repo.index unless repo.empty? rugged_ref = repo.references[ref] raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref last_commit = rugged_ref.target index.read_tree(last_commit.tree) parents = [last_commit] end if action == :remove index.remove(filename) else file_entry = index.get(filename) if action == :rename old_path_name = PathHelper.normalize_path(file[:previous_path]) old_filename = old_path_name.to_s file_entry = index.get(old_filename) index.remove(old_filename) unless file_entry.blank? end if file_entry raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update # Preserve the current file mode if one is available mode = file_entry[:mode] if file_entry[:mode] end content = file[:content] detect = CharlockHolmes::EncodingDetector.new.detect(content) if content unless detect && detect[:type] == :binary # When writing to the repo directly as we are doing here, # the `core.autocrlf` config isn't taken into account. content.gsub!("\r\n", "\n") if repository.autocrlf end oid = repo.write(content, :blob) index.add(path: filename, oid: oid, mode: mode) end opts = {} opts[:tree] = index.write_tree(repo) opts[:author] = author opts[:committer] = committer opts[:message] = commit[:message] opts[:parents] = parents opts[:update_ref] = ref if update_ref Rugged::Commit.create(repo, opts) end # rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/CyclomaticComplexity # rubocop:enable Metrics/PerceivedComplexity # Remove file from repository and return commit sha # # options should contain next structure: # file: { # path: 'documents/story.txt' # }, # author: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # committer: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # commit: { # message: 'Remove FILENAME', # branch: 'master' # } # def remove(repository, options) commit(repository, options, :remove) end # Rename file from repository and return commit sha # # options should contain next structure: # file: { # previous_path: 'documents/old_story.txt' # path: 'documents/story.txt' # content: 'Lorem ipsum...', # update: true # }, # author: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # committer: { # email: 'user@example.com', # name: 'Test User', # time: Time.now # }, # commit: { # message: 'Rename FILENAME', # branch: 'master' # } # def rename(repository, options) commit(repository, options, :rename) end end def initialize(options) %w(id name path size data mode commit_id binary).each do |key| self.send("#{key}=", options[key.to_sym]) end @loaded_all_data = false # Retain the actual size before it is encoded @loaded_size = @data.bytesize if @data end def binary? @binary.nil? ? super : @binary == true end def empty? !data || data == '' end def data encode! @data end # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into # memory as a Ruby string. def load_all_data!(repository) return if @data == '' # don't mess with submodule blobs return @data if @loaded_all_data @loaded_all_data = true @data = repository.lookup(id).content @loaded_size = @data.bytesize end def name encode! @name end # Valid LFS object pointer is a text file consisting of # version # oid # size # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer def lfs_pointer? has_lfs_version_key? && lfs_oid.present? && lfs_size.present? end def lfs_oid if has_lfs_version_key? oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) return oid[1] if oid end nil end def lfs_size if has_lfs_version_key? size = data.match(/(?<=size )([0-9]+)/) return size[1] if size end nil end def truncated? size && (size > loaded_size) end private def has_lfs_version_key? !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec") end end end end