gitlab-org--gitlab-foss/lib/gitlab/git/blob.rb

require_relative 'encoding_helper'
require_relative 'path_helper'

module Gitlab
  module Git
    class Blob
      include Linguist::BlobHelper
      include Gitlab::Git::EncodingHelper

      # This number is the maximum amount of data that we want to display to
      # the user. We load as much as we can for encoding detection
      # (Linguist) and LFS pointer parsing. All other cases where we need full
      # blob data should use load_all_data!.
      MAX_DATA_DISPLAY_SIZE = 10485760

      attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary

      class << self
        def find(repository, sha, path)
          commit = repository.lookup(sha)
          root_tree = commit.tree

          blob_entry = find_entry_by_path(repository, root_tree.oid, path)

          return nil unless blob_entry

          if blob_entry[:type] == :commit
            submodule_blob(blob_entry, path, sha)
          else
            blob = repository.lookup(blob_entry[:oid])

            if blob
              Blob.new(
                id: blob.oid,
                name: blob_entry[:name],
                size: blob.size,
                data: blob.content(MAX_DATA_DISPLAY_SIZE),
                mode: blob_entry[:filemode].to_s(8),
                path: path,
                commit_id: sha,
                binary: blob.binary?
              )
            end
          end
        end

        def raw(repository, sha)
          blob = repository.lookup(sha)

          Blob.new(
            id: blob.oid,
            size: blob.size,
            data: blob.content(MAX_DATA_DISPLAY_SIZE),
            binary: blob.binary?
          )
        end

        # Recursive search of blob id by path
        #
        # Ex.
        #   blog/            # oid: 1a
        #     app/           # oid: 2a
        #       models/      # oid: 3a
        #       file.rb      # oid: 4a
        #
        #
        # Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'
        #
        def find_entry_by_path(repository, root_id, path)
          root_tree = repository.lookup(root_id)
          # Strip leading slashes
          path[/^\/*/] = ''
          path_arr = path.split('/')

          entry = root_tree.find do |entry|
            entry[:name] == path_arr[0]
          end

          return nil unless entry

          if path_arr.size > 1
            return nil unless entry[:type] == :tree
            path_arr.shift
            find_entry_by_path(repository, entry[:oid], path_arr.join('/'))
          else
            [:blob, :commit].include?(entry[:type]) ? entry : nil
          end
        end

        def submodule_blob(blob_entry, path, sha)
          Blob.new(
            id: blob_entry[:oid],
            name: blob_entry[:name],
            data: '',
            path: path,
            commit_id: sha,
          )
        end

        # Commit file in repository and return commit sha
        #
        # options should contain next structure:
        #   file: {
        #     content: 'Lorem ipsum...',
        #     path: 'documents/story.txt',
        #     update: true
        #   },
        #   author: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   committer: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   commit: {
        #     message: 'Wow such commit',
        #     branch: 'master',
        #     update_ref: false
        #   }
        #
        # rubocop:disable Metrics/AbcSize
        # rubocop:disable Metrics/CyclomaticComplexity
        # rubocop:disable Metrics/PerceivedComplexity
        def commit(repository, options, action = :add)
          file = options[:file]
          update = file[:update].nil? ? true : file[:update]
          author = options[:author]
          committer = options[:committer]
          commit = options[:commit]
          repo = repository.rugged
          ref = commit[:branch]
          update_ref = commit[:update_ref].nil? ? true : commit[:update_ref]
          parents = []
          mode = 0o100644

          unless ref.start_with?('refs/')
            ref = 'refs/heads/' + ref
          end

          path_name = PathHelper.normalize_path(file[:path])
          # Abort if any invalid characters remain (e.g. ../foo)
          raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..')

          filename = path_name.to_s
          index = repo.index

          unless repo.empty?
            rugged_ref = repo.references[ref]
            raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref
            last_commit = rugged_ref.target
            index.read_tree(last_commit.tree)
            parents = [last_commit]
          end

          if action == :remove
            index.remove(filename)
          else
            file_entry = index.get(filename)

            if action == :rename
              old_path_name = PathHelper.normalize_path(file[:previous_path])
              old_filename = old_path_name.to_s
              file_entry = index.get(old_filename)
              index.remove(old_filename) unless file_entry.blank?
            end

            if file_entry
              raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update

              # Preserve the current file mode if one is available
              mode = file_entry[:mode] if file_entry[:mode]
            end

            content = file[:content]
            detect = CharlockHolmes::EncodingDetector.new.detect(content) if content

            unless detect && detect[:type] == :binary
              # When writing to the repo directly as we are doing here,
              # the `core.autocrlf` config isn't taken into account.
              content.gsub!("\r\n", "\n") if repository.autocrlf
            end

            oid = repo.write(content, :blob)
            index.add(path: filename, oid: oid, mode: mode)
          end

          opts = {}
          opts[:tree] = index.write_tree(repo)
          opts[:author] = author
          opts[:committer] = committer
          opts[:message] = commit[:message]
          opts[:parents] = parents
          opts[:update_ref] = ref if update_ref

          Rugged::Commit.create(repo, opts)
        end
        # rubocop:enable Metrics/AbcSize
        # rubocop:enable Metrics/CyclomaticComplexity
        # rubocop:enable Metrics/PerceivedComplexity

        # Remove file from repository and return commit sha
        #
        # options should contain next structure:
        #   file: {
        #     path: 'documents/story.txt'
        #   },
        #   author: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   committer: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   commit: {
        #     message: 'Remove FILENAME',
        #     branch: 'master'
        #   }
        #
        def remove(repository, options)
          commit(repository, options, :remove)
        end

        # Rename file from repository and return commit sha
        #
        # options should contain next structure:
        #   file: {
        #     previous_path: 'documents/old_story.txt'
        #     path: 'documents/story.txt'
        #     content: 'Lorem ipsum...',
        #     update: true
        #   },
        #   author: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   committer: {
        #     email: 'user@example.com',
        #     name: 'Test User',
        #     time: Time.now
        #   },
        #   commit: {
        #     message: 'Rename FILENAME',
        #     branch: 'master'
        #   }
        #
        def rename(repository, options)
          commit(repository, options, :rename)
        end
      end

      def initialize(options)
        %w(id name path size data mode commit_id binary).each do |key|
          self.send("#{key}=", options[key.to_sym])
        end

        @loaded_all_data = false
        # Retain the actual size before it is encoded
        @loaded_size = @data.bytesize if @data
      end

      def binary?
        @binary.nil? ? super : @binary == true
      end

      def empty?
        !data || data == ''
      end

      def data
        encode! @data
      end

      # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into
      # memory as a Ruby string.
      def load_all_data!(repository)
        return if @data == '' # don't mess with submodule blobs
        return @data if @loaded_all_data

        @loaded_all_data = true
        @data = repository.lookup(id).content
        @loaded_size = @data.bytesize
      end

      def name
        encode! @name
      end

      # Valid LFS object pointer is a text file consisting of
      # version
      # oid
      # size
      # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer
      def lfs_pointer?
        has_lfs_version_key? && lfs_oid.present? && lfs_size.present?
      end

      def lfs_oid
        if has_lfs_version_key?
          oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)
          return oid[1] if oid
        end

        nil
      end

      def lfs_size
        if has_lfs_version_key?
          size = data.match(/(?<=size )([0-9]+)/)
          return size[1] if size
        end

        nil
      end

      def truncated?
        size && (size > loaded_size)
      end

      private

      def has_lfs_version_key?
        !empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")
      end
    end
  end
end
Absorb gitlab_git 2017-01-04 18:43:06 +00:00			`require_relative 'encoding_helper'`
			`require_relative 'path_helper'`

			`module Gitlab`
			`module Git`
			`class Blob`
			`include Linguist::BlobHelper`
			`include Gitlab::Git::EncodingHelper`

			`# This number is the maximum amount of data that we want to display to`
			`# the user. We load as much as we can for encoding detection`
			`# (Linguist) and LFS pointer parsing. All other cases where we need full`
			`# blob data should use load_all_data!.`
			`MAX_DATA_DISPLAY_SIZE = 10485760`

			`attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary`

			`class << self`
			`def find(repository, sha, path)`
			`commit = repository.lookup(sha)`
			`root_tree = commit.tree`

			`blob_entry = find_entry_by_path(repository, root_tree.oid, path)`

			`return nil unless blob_entry`

			`if blob_entry[:type] == :commit`
			`submodule_blob(blob_entry, path, sha)`
			`else`
			`blob = repository.lookup(blob_entry[:oid])`

			`if blob`
			`Blob.new(`
			`id: blob.oid,`
			`name: blob_entry[:name],`
			`size: blob.size,`
			`data: blob.content(MAX_DATA_DISPLAY_SIZE),`
			`mode: blob_entry[:filemode].to_s(8),`
			`path: path,`
			`commit_id: sha,`
			`binary: blob.binary?`
			`)`
			`end`
			`end`
			`end`

			`def raw(repository, sha)`
			`blob = repository.lookup(sha)`

			`Blob.new(`
			`id: blob.oid,`
			`size: blob.size,`
			`data: blob.content(MAX_DATA_DISPLAY_SIZE),`
			`binary: blob.binary?`
			`)`
			`end`

			`# Recursive search of blob id by path`
			`#`
			`# Ex.`
			`# blog/ # oid: 1a`
			`# app/ # oid: 2a`
			`# models/ # oid: 3a`
			`# file.rb # oid: 4a`
			`#`
			`#`
			`# Blob.find_entry_by_path(repo, '1a', 'app/file.rb') # => '4a'`
			`#`
			`def find_entry_by_path(repository, root_id, path)`
			`root_tree = repository.lookup(root_id)`
			`# Strip leading slashes`
			`path[/^\/*/] = ''`
			`path_arr = path.split('/')`

			`entry = root_tree.find do \|entry\|`
			`entry[:name] == path_arr[0]`
			`end`

			`return nil unless entry`

			`if path_arr.size > 1`
			`return nil unless entry[:type] == :tree`
			`path_arr.shift`
			`find_entry_by_path(repository, entry[:oid], path_arr.join('/'))`
			`else`
			`[:blob, :commit].include?(entry[:type]) ? entry : nil`
			`end`
			`end`

			`def submodule_blob(blob_entry, path, sha)`
			`Blob.new(`
			`id: blob_entry[:oid],`
			`name: blob_entry[:name],`
			`data: '',`
			`path: path,`
			`commit_id: sha,`
			`)`
			`end`

			`# Commit file in repository and return commit sha`
			`#`
			`# options should contain next structure:`
			`# file: {`
			`# content: 'Lorem ipsum...',`
			`# path: 'documents/story.txt',`
			`# update: true`
			`# },`
			`# author: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# committer: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# commit: {`
			`# message: 'Wow such commit',`
			`# branch: 'master',`
			`# update_ref: false`
			`# }`
			`#`
			`# rubocop:disable Metrics/AbcSize`
			`# rubocop:disable Metrics/CyclomaticComplexity`
			`# rubocop:disable Metrics/PerceivedComplexity`
			`def commit(repository, options, action = :add)`
			`file = options[:file]`
			`update = file[:update].nil? ? true : file[:update]`
			`author = options[:author]`
			`committer = options[:committer]`
			`commit = options[:commit]`
			`repo = repository.rugged`
			`ref = commit[:branch]`
			`update_ref = commit[:update_ref].nil? ? true : commit[:update_ref]`
			`parents = []`
			`mode = 0o100644`

			`unless ref.start_with?('refs/')`
			`ref = 'refs/heads/' + ref`
			`end`

			`path_name = PathHelper.normalize_path(file[:path])`
			`# Abort if any invalid characters remain (e.g. ../foo)`
			`raise Repository::InvalidBlobName.new("Invalid path") if path_name.each_filename.to_a.include?('..')`

			`filename = path_name.to_s`
			`index = repo.index`

			`unless repo.empty?`
			`rugged_ref = repo.references[ref]`
			`raise Repository::InvalidRef.new("Invalid branch name") unless rugged_ref`
			`last_commit = rugged_ref.target`
			`index.read_tree(last_commit.tree)`
			`parents = [last_commit]`
			`end`

			`if action == :remove`
			`index.remove(filename)`
			`else`
			`file_entry = index.get(filename)`

			`if action == :rename`
			`old_path_name = PathHelper.normalize_path(file[:previous_path])`
			`old_filename = old_path_name.to_s`
			`file_entry = index.get(old_filename)`
			`index.remove(old_filename) unless file_entry.blank?`
			`end`

			`if file_entry`
			`raise Repository::InvalidBlobName.new("Filename already exists; update not allowed") unless update`

			`# Preserve the current file mode if one is available`
			`mode = file_entry[:mode] if file_entry[:mode]`
			`end`

			`content = file[:content]`
			`detect = CharlockHolmes::EncodingDetector.new.detect(content) if content`

			`unless detect && detect[:type] == :binary`
			`# When writing to the repo directly as we are doing here,`
			# the `core.autocrlf` config isn't taken into account.
			`content.gsub!("\r\n", "\n") if repository.autocrlf`
			`end`

			`oid = repo.write(content, :blob)`
			`index.add(path: filename, oid: oid, mode: mode)`
			`end`

			`opts = {}`
			`opts[:tree] = index.write_tree(repo)`
			`opts[:author] = author`
			`opts[:committer] = committer`
			`opts[:message] = commit[:message]`
			`opts[:parents] = parents`
			`opts[:update_ref] = ref if update_ref`

			`Rugged::Commit.create(repo, opts)`
			`end`
			`# rubocop:enable Metrics/AbcSize`
			`# rubocop:enable Metrics/CyclomaticComplexity`
			`# rubocop:enable Metrics/PerceivedComplexity`

			`# Remove file from repository and return commit sha`
			`#`
			`# options should contain next structure:`
			`# file: {`
			`# path: 'documents/story.txt'`
			`# },`
			`# author: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# committer: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# commit: {`
			`# message: 'Remove FILENAME',`
			`# branch: 'master'`
			`# }`
			`#`
			`def remove(repository, options)`
			`commit(repository, options, :remove)`
			`end`

			`# Rename file from repository and return commit sha`
			`#`
			`# options should contain next structure:`
			`# file: {`
			`# previous_path: 'documents/old_story.txt'`
			`# path: 'documents/story.txt'`
			`# content: 'Lorem ipsum...',`
			`# update: true`
			`# },`
			`# author: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# committer: {`
			`# email: 'user@example.com',`
			`# name: 'Test User',`
			`# time: Time.now`
			`# },`
			`# commit: {`
			`# message: 'Rename FILENAME',`
			`# branch: 'master'`
			`# }`
			`#`
			`def rename(repository, options)`
			`commit(repository, options, :rename)`
			`end`
			`end`

			`def initialize(options)`
			`%w(id name path size data mode commit_id binary).each do \|key\|`
			`self.send("#{key}=", options[key.to_sym])`
			`end`

			`@loaded_all_data = false`
			`# Retain the actual size before it is encoded`
			`@loaded_size = @data.bytesize if @data`
			`end`

			`def binary?`
			`@binary.nil? ? super : @binary == true`
			`end`

			`def empty?`
			`!data \|\| data == ''`
			`end`

			`def data`
			`encode! @data`
			`end`

			`# Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into`
			`# memory as a Ruby string.`
			`def load_all_data!(repository)`
			`return if @data == '' # don't mess with submodule blobs`
			`return @data if @loaded_all_data`

			`@loaded_all_data = true`
			`@data = repository.lookup(id).content`
			`@loaded_size = @data.bytesize`
			`end`

			`def name`
			`encode! @name`
			`end`

			`# Valid LFS object pointer is a text file consisting of`
			`# version`
			`# oid`
			`# size`
			`# see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer`
			`def lfs_pointer?`
			`has_lfs_version_key? && lfs_oid.present? && lfs_size.present?`
			`end`

			`def lfs_oid`
			`if has_lfs_version_key?`
			`oid = data.match(/(?<=sha256:)([0-9a-f]{64})/)`
			`return oid[1] if oid`
			`end`

			`nil`
			`end`

			`def lfs_size`
			`if has_lfs_version_key?`
			`size = data.match(/(?<=size )([0-9]+)/)`
			`return size[1] if size`
			`end`

			`nil`
			`end`

			`def truncated?`
			`size && (size > loaded_size)`
			`end`

			`private`

			`def has_lfs_version_key?`
			`!empty? && text? && data.start_with?("version https://git-lfs.github.com/spec")`
			`end`
			`end`
			`end`
			`end`