diff --git a/config/initializers/rugged_use_gitlab_git_attributes.rb b/config/initializers/rugged_use_gitlab_git_attributes.rb index 1cfb3bcb4bd..c0d45caec42 100644 --- a/config/initializers/rugged_use_gitlab_git_attributes.rb +++ b/config/initializers/rugged_use_gitlab_git_attributes.rb @@ -7,7 +7,7 @@ # repository-wide language statistics: # # -# The options passed by Linguist are those assumed by Gitlab::Git::Attributes +# The options passed by Linguist are those assumed by Gitlab::Git::InfoAttributes # anyway, and there is no great efficiency gain from just fetching the listed # attributes with our implementation, so we ignore the additional arguments. # @@ -19,7 +19,7 @@ module Rugged end def attributes - @attributes ||= Gitlab::Git::Attributes.new(path) + @attributes ||= Gitlab::Git::InfoAttributes.new(path) end end diff --git a/lib/gitlab/git/attributes_at_ref_parser.rb b/lib/gitlab/git/attributes_at_ref_parser.rb new file mode 100644 index 00000000000..26b5bd520d5 --- /dev/null +++ b/lib/gitlab/git/attributes_at_ref_parser.rb @@ -0,0 +1,14 @@ +module Gitlab + module Git + # Parses root .gitattributes file at a given ref + class AttributesAtRefParser + delegate :attributes, to: :@parser + + def initialize(repository, ref) + blob = repository.blob_at(ref, '.gitattributes') + + @parser = AttributesParser.new(blob&.data) + end + end + end +end diff --git a/lib/gitlab/git/attributes.rb b/lib/gitlab/git/attributes_parser.rb similarity index 61% rename from lib/gitlab/git/attributes.rb rename to lib/gitlab/git/attributes_parser.rb index 2d20cd473a7..d8aeabb6cba 100644 --- a/lib/gitlab/git/attributes.rb +++ b/lib/gitlab/git/attributes_parser.rb @@ -1,42 +1,26 @@ -# Gitaly note: JV: not sure what to make of this class. Why does it use -# the full disk path of the repository to look up attributes This is -# problematic in Gitaly, because Gitaly hides the full disk path to the -# repository from gitlab-ce. - module Gitlab module Git # Class for parsing Git attribute files and extracting the attributes for # file patterns. - # - # Unlike Rugged this parser only needs a single IO call (a call to `open`), - # vastly reducing the time spent in extracting attributes. - # - # This class _only_ supports parsing the attributes file located at - # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files - # (`.gitattributes` is copied to this particular path). - # - # Basic usage: - # - # attributes = Gitlab::Git::Attributes.new(some_repo.path) - # - # attributes.attributes('README.md') # => { "eol" => "lf } - class Attributes - # path - The path to the Git repository. - def initialize(path) - @path = File.expand_path(path) - @patterns = nil + class AttributesParser + def initialize(attributes_data) + @data = attributes_data || "" + + if @data.is_a?(File) + @patterns = parse_file + end end # Returns all the Git attributes for the given path. # - # path - A path to a file for which to get the attributes. + # file_path - A path to a file for which to get the attributes. # # Returns a Hash. - def attributes(path) - full_path = File.join(@path, path) + def attributes(file_path) + absolute_path = File.join('/', file_path) patterns.each do |pattern, attrs| - return attrs if File.fnmatch?(pattern, full_path) + return attrs if File.fnmatch?(pattern, absolute_path) end {} @@ -98,16 +82,10 @@ module Gitlab # Iterates over every line in the attributes file. def each_line - full_path = File.join(@path, 'info/attributes') + @data.each_line do |line| + break unless line.valid_encoding? - return unless File.exist?(full_path) - - File.open(full_path, 'r') do |handle| - handle.each_line do |line| - break unless line.valid_encoding? - - yield line.strip - end + yield line.strip end end @@ -125,7 +103,8 @@ module Gitlab parsed = attrs ? parse_attributes(attrs) : {} - pairs << [File.join(@path, pattern), parsed] + absolute_pattern = File.join('/', pattern) + pairs << [absolute_pattern, parsed] end # Newer entries take precedence over older entries. diff --git a/lib/gitlab/git/info_attributes.rb b/lib/gitlab/git/info_attributes.rb new file mode 100644 index 00000000000..e79a440950b --- /dev/null +++ b/lib/gitlab/git/info_attributes.rb @@ -0,0 +1,49 @@ +# Gitaly note: JV: not sure what to make of this class. Why does it use +# the full disk path of the repository to look up attributes This is +# problematic in Gitaly, because Gitaly hides the full disk path to the +# repository from gitlab-ce. + +module Gitlab + module Git + # Parses gitattributes at `$GIT_DIR/info/attributes` + # + # Unlike Rugged this parser only needs a single IO call (a call to `open`), + # vastly reducing the time spent in extracting attributes. + # + # This class _only_ supports parsing the attributes file located at + # `$GIT_DIR/info/attributes` as GitLab doesn't use any other files + # (`.gitattributes` is copied to this particular path). + # + # Basic usage: + # + # attributes = Gitlab::Git::InfoAttributes.new(some_repo.path) + # + # attributes.attributes('README.md') # => { "eol" => "lf } + class InfoAttributes + delegate :attributes, :patterns, to: :parser + + # path - The path to the Git repository. + def initialize(path) + @repo_path = File.expand_path(path) + end + + def parser + @parser ||= begin + if File.exist?(attributes_path) + File.open(attributes_path, 'r') do |file_handle| + AttributesParser.new(file_handle) + end + else + AttributesParser.new("") + end + end + end + + private + + def attributes_path + @attributes_path ||= File.join(@repo_path, 'info/attributes') + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 283134e043e..736aaf0a642 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -102,7 +102,7 @@ module Gitlab ) @path = File.join(storage_path, @relative_path) @name = @relative_path.split("/").last - @attributes = Gitlab::Git::Attributes.new(path) + @attributes = Gitlab::Git::InfoAttributes.new(path) end def ==(other) @@ -1011,6 +1011,18 @@ module Gitlab attributes(path)[name] end + # Check .gitattributes for a given ref + # + # This only checks the root .gitattributes file, + # it does not traverse subfolders to find additional .gitattributes files + # + # This method is around 30 times slower than `attributes`, + # which uses `$GIT_DIR/info/attributes` + def attributes_at(ref, file_path) + parser = AttributesAtRefParser.new(self, ref) + parser.attributes(file_path) + end + def languages(ref = nil) Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled| if is_enabled diff --git a/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb new file mode 100644 index 00000000000..5d22dcfb508 --- /dev/null +++ b/spec/lib/gitlab/git/attributes_at_ref_parser_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +describe Gitlab::Git::AttributesAtRefParser, seed_helper: true do + let(:project) { create(:project, :repository) } + let(:repository) { project.repository } + + subject { described_class.new(repository, 'lfs') } + + it 'loads .gitattributes blob' do + repository.raw # Initialize repository in advance since this also checks attributes + + expected_filter = 'filter=lfs diff=lfs merge=lfs' + receive_blob = receive(:new).with(a_string_including(expected_filter)) + expect(Gitlab::Git::AttributesParser).to receive_blob.and_call_original + + subject + end + + it 'handles missing blobs' do + expect { described_class.new(repository, 'non-existant-branch') }.not_to raise_error + end + + describe '#attributes' do + it 'returns the attributes as a Hash' do + expect(subject.attributes('test.lfs')['filter']).to eq('lfs') + end + end +end diff --git a/spec/lib/gitlab/git/attributes_spec.rb b/spec/lib/gitlab/git/attributes_parser_spec.rb similarity index 81% rename from spec/lib/gitlab/git/attributes_spec.rb rename to spec/lib/gitlab/git/attributes_parser_spec.rb index b715fc3410a..323334e99a5 100644 --- a/spec/lib/gitlab/git/attributes_spec.rb +++ b/spec/lib/gitlab/git/attributes_parser_spec.rb @@ -1,11 +1,10 @@ require 'spec_helper' -describe Gitlab::Git::Attributes, seed_helper: true do - let(:path) do - File.join(SEED_STORAGE_PATH, 'with-git-attributes.git') - end +describe Gitlab::Git::AttributesParser, seed_helper: true do + let(:attributes_path) { File.join(SEED_STORAGE_PATH, 'with-git-attributes.git', 'info', 'attributes') } + let(:data) { File.read(attributes_path) } - subject { described_class.new(path) } + subject { described_class.new(data) } describe '#attributes' do context 'using a path with attributes' do @@ -66,6 +65,26 @@ describe Gitlab::Git::Attributes, seed_helper: true do expect(subject.attributes('test.foo')).to eq({}) end end + + context 'when attributes data is a file handle' do + subject do + File.open(attributes_path, 'r') do |file_handle| + described_class.new(file_handle) + end + end + + it 'returns the attributes as a Hash' do + expect(subject.attributes('test.txt')).to eq({ 'text' => true }) + end + end + + context 'when attributes data is nil' do + let(:data) { nil } + + it 'returns an empty Hash' do + expect(subject.attributes('test.foo')).to eq({}) + end + end end describe '#patterns' do @@ -74,14 +93,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do end it 'parses an entry that uses a tab to separate the pattern and attributes' do - expect(subject.patterns[File.join(path, '*.md')]) + expect(subject.patterns[File.join('/', '*.md')]) .to eq({ 'gitlab-language' => 'markdown' }) end it 'stores patterns in reverse order' do first = subject.patterns.to_a[0] - expect(first[0]).to eq(File.join(path, 'bla/bla.txt')) + expect(first[0]).to eq(File.join('/', 'bla/bla.txt')) end # It's a bit hard to test for something _not_ being processed. As such we'll @@ -89,14 +108,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do it 'ignores any comments and empty lines' do expect(subject.patterns.length).to eq(10) end - - it 'does not parse anything when the attributes file does not exist' do - expect(File).to receive(:exist?) - .with(File.join(path, 'info/attributes')) - .and_return(false) - - expect(subject.patterns).to eq({}) - end end describe '#parse_attributes' do @@ -132,17 +143,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do expect { |b| subject.each_line(&b) }.to yield_successive_args(*args) end - it 'does not yield when the attributes file does not exist' do - expect(File).to receive(:exist?) - .with(File.join(path, 'info/attributes')) - .and_return(false) - - expect { |b| subject.each_line(&b) }.not_to yield_control - end - it 'does not yield when the attributes file has an unsupported encoding' do - path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git') - attrs = described_class.new(path) + path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git', 'info', 'attributes') + attrs = described_class.new(File.read(path)) expect { |b| attrs.each_line(&b) }.not_to yield_control end diff --git a/spec/lib/gitlab/git/info_attributes_spec.rb b/spec/lib/gitlab/git/info_attributes_spec.rb new file mode 100644 index 00000000000..ea84909c3e0 --- /dev/null +++ b/spec/lib/gitlab/git/info_attributes_spec.rb @@ -0,0 +1,43 @@ +require 'spec_helper' + +describe Gitlab::Git::InfoAttributes, seed_helper: true do + let(:path) do + File.join(SEED_STORAGE_PATH, 'with-git-attributes.git') + end + + subject { described_class.new(path) } + + describe '#attributes' do + context 'using a path with attributes' do + it 'returns the attributes as a Hash' do + expect(subject.attributes('test.txt')).to eq({ 'text' => true }) + end + + it 'returns an empty Hash for a defined path without attributes' do + expect(subject.attributes('bla/bla.txt')).to eq({}) + end + end + end + + describe '#parser' do + it 'parses a file with entries' do + expect(subject.patterns).to be_an_instance_of(Hash) + expect(subject.patterns["/*.txt"]).to eq({ 'text' => true }) + end + + it 'does not parse anything when the attributes file does not exist' do + expect(File).to receive(:exist?) + .with(File.join(path, 'info/attributes')) + .and_return(false) + + expect(subject.patterns).to eq({}) + end + + it 'does not parse attributes files with unsupported encoding' do + path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git') + subject = described_class.new(path) + + expect(subject.patterns).to eq({}) + end + end +end diff --git a/spec/support/test_env.rb b/spec/support/test_env.rb index 664698fcbaf..a00ef543128 100644 --- a/spec/support/test_env.rb +++ b/spec/support/test_env.rb @@ -20,7 +20,7 @@ module TestEnv 'improve/awesome' => '5937ac0', 'merged-target' => '21751bf', 'markdown' => '0ed8c6c', - 'lfs' => 'be93687', + 'lfs' => '55bc176', 'master' => 'b83d6e3', 'merge-test' => '5937ac0', "'test'" => 'e56497b',