From df5548e19e8c988b709e66a7e35ddc097344913e Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 8 Nov 2016 16:42:28 +0100 Subject: [PATCH 1/4] Unify detecting of special repository files This moves the logic of detecting special repository files (e.g. a README or a Koding configuration file) to a single class: Gitlab::FileDetector. Moving this logic into a single place allows this to be re-used more easily. This commit also changes Repository#gitlab_ci_yaml so that its cached similar to other data (e.g. the Koding configuration file). --- app/models/repository.rb | 28 ++++---- app/models/tree.rb | 4 +- lib/gitlab/file_detector.rb | 63 ++++++++++++++++++ spec/lib/gitlab/file_detector_spec.rb | 59 +++++++++++++++++ spec/models/repository_spec.rb | 94 +++++++++++++++++++++++++++ 5 files changed, 232 insertions(+), 16 deletions(-) create mode 100644 lib/gitlab/file_detector.rb create mode 100644 spec/lib/gitlab/file_detector_spec.rb diff --git a/app/models/repository.rb b/app/models/repository.rb index 31be06be50c..06c9eb3bd70 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -321,7 +321,7 @@ class Repository def cache_keys %i(size commit_count readme version contribution_guide changelog - license_blob license_key gitignore koding_yml) + license_blob license_key gitignore koding_yml gitlab_ci_yml) end # Keys for data on branch/tag operations. @@ -556,23 +556,19 @@ class Repository def version cache.fetch(:version) do - tree(:head).blobs.find do |file| - file.name.casecmp('version').zero? - end + file_on_head(:version) end end def contribution_guide cache.fetch(:contribution_guide) do - tree(:head).blobs.find do |file| - file.contributing? - end + file_on_head(:contributing) end end def changelog cache.fetch(:changelog) do - file_on_head(/\A(changelog|history|changes|news)/i) + file_on_head(:changelog) end end @@ -580,7 +576,7 @@ class Repository return nil unless head_exists? cache.fetch(:license_blob) do - file_on_head(/\A(licen[sc]e|copying)(\..+|\z)/i) + file_on_head(:license) end end @@ -596,7 +592,7 @@ class Repository return nil if !exists? || empty? cache.fetch(:gitignore) do - file_on_head(/\A\.gitignore\z/) + file_on_head(:gitignore) end end @@ -604,15 +600,15 @@ class Repository return nil unless head_exists? cache.fetch(:koding_yml) do - file_on_head(/\A\.koding\.yml\z/) + file_on_head(:koding) end end def gitlab_ci_yml return nil unless head_exists? - @gitlab_ci_yml ||= tree(:head).blobs.find do |file| - file.name == '.gitlab-ci.yml' + @gitlab_ci_yml ||= cache.fetch(:gitlab_ci_yml) do + file_on_head(:gitlab_ci) end rescue Rugged::ReferenceError # For unknow reason spinach scenario "Scenario: I change project path" @@ -1160,8 +1156,10 @@ class Repository exists? && !empty? && !rugged.head_unborn? end - def file_on_head(regex) - tree(:head).blobs.find { |file| file.name =~ regex } + def file_on_head(type) + tree(:head).blobs.find do |file| + Gitlab::FileDetector.type_of(file.name) == type + end end def tags_sorted_by_committed_date diff --git a/app/models/tree.rb b/app/models/tree.rb index 2d1d68dbd81..fe148b0ec65 100644 --- a/app/models/tree.rb +++ b/app/models/tree.rb @@ -18,7 +18,9 @@ class Tree def readme return @readme if defined?(@readme) - available_readmes = blobs.select(&:readme?) + available_readmes = blobs.select do |blob| + Gitlab::FileDetector.type_of(blob.name) == :readme + end previewable_readmes = available_readmes.select do |blob| previewable?(blob.name) diff --git a/lib/gitlab/file_detector.rb b/lib/gitlab/file_detector.rb new file mode 100644 index 00000000000..1d93a67dc56 --- /dev/null +++ b/lib/gitlab/file_detector.rb @@ -0,0 +1,63 @@ +require 'set' + +module Gitlab + # Module that can be used to detect if a path points to a special file such as + # a README or a CONTRIBUTING file. + module FileDetector + PATTERNS = { + readme: /\Areadme/i, + changelog: /\A(changelog|history|changes|news)/i, + license: /\A(licen[sc]e|copying)(\..+|\z)/i, + contributing: /\Acontributing/i, + version: 'version', + gitignore: '.gitignore', + koding: '.koding.yml', + gitlab_ci: '.gitlab-ci.yml', + avatar: /\Alogo\.(png|jpg|gif)\z/ + } + + # Returns an Array of file types based on the given paths. + # + # This method can be used to check if a list of file paths (e.g. of changed + # files) involve any special files such as a README or a LICENSE file. + # + # Example: + # + # types_in_paths(%w{README.md foo/bar.txt}) # => [:readme] + def self.types_in_paths(paths) + types = Set.new + + paths.each do |path| + type = type_of(path) + + types << type if type + end + + types.to_a + end + + # Returns the type of a file path, or nil if none could be detected. + # + # Returned types are Symbols such as `:readme`, `:version`, etc. + # + # Example: + # + # type_of('README.md') # => :readme + # type_of('VERSION') # => :version + def self.type_of(path) + name = File.basename(path) + + PATTERNS.each do |type, search| + did_match = if search.is_a?(Regexp) + name =~ search + else + name.casecmp(search) == 0 + end + + return type if did_match + end + + nil + end + end +end diff --git a/spec/lib/gitlab/file_detector_spec.rb b/spec/lib/gitlab/file_detector_spec.rb new file mode 100644 index 00000000000..e5ba13bbaf8 --- /dev/null +++ b/spec/lib/gitlab/file_detector_spec.rb @@ -0,0 +1,59 @@ +require 'spec_helper' + +describe Gitlab::FileDetector do + describe '.types_in_paths' do + it 'returns the file types for the given paths' do + expect(described_class.types_in_paths(%w(README.md CHANGELOG VERSION VERSION))). + to eq(%i{readme changelog version}) + end + + it 'does not include unrecognized file paths' do + expect(described_class.types_in_paths(%w(README.md foo.txt))). + to eq(%i{readme}) + end + end + + describe '.type_of' do + it 'returns the type of a README file' do + expect(described_class.type_of('README.md')).to eq(:readme) + end + + it 'returns the type of a changelog file' do + %w(CHANGELOG HISTORY CHANGES NEWS).each do |file| + expect(described_class.type_of(file)).to eq(:changelog) + end + end + + it 'returns the type of a license file' do + %w(LICENSE LICENCE COPYING).each do |file| + expect(described_class.type_of(file)).to eq(:license) + end + end + + it 'returns the type of a version file' do + expect(described_class.type_of('VERSION')).to eq(:version) + end + + it 'returns the type of a .gitignore file' do + expect(described_class.type_of('.gitignore')).to eq(:gitignore) + end + + it 'returns the type of a Koding config file' do + expect(described_class.type_of('.koding.yml')).to eq(:koding) + end + + it 'returns the type of a GitLab CI config file' do + expect(described_class.type_of('.gitlab-ci.yml')).to eq(:gitlab_ci) + end + + it 'returns the type of an avatar' do + %w(logo.gif logo.png logo.jpg).each do |file| + expect(described_class.type_of(file)).to eq(:avatar) + end + end + + it 'returns nil for an unknown file' do + expect(described_class.type_of('foo.txt')).to be_nil + end + end +end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 72ac41f3472..5e8a7bb08a1 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -1578,4 +1578,98 @@ describe Repository, models: true do end.to raise_error(Repository::CommitError) end end + + describe '#contribution_guide', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:contributing). + and_return(Gitlab::Git::Tree.new(path: 'CONTRIBUTING.md')). + once + + 2.times do + expect(repository.contribution_guide). + to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#changelog', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:changelog). + and_return(Gitlab::Git::Tree.new(path: 'CHANGELOG')). + once + + 2.times do + expect(repository.changelog).to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#license_blob', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:license). + and_return(Gitlab::Git::Tree.new(path: 'LICENSE')). + once + + 2.times do + expect(repository.license_blob).to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#license_key', caching: true do + it 'returns and caches the output' do + license = double(key: 'mit') + + expect(Licensee).to receive(:license). + with(repository.path). + and_return(license). + once + + 2.times do + expect(repository.license_key).to eq('mit') + end + end + end + + describe '#gitignore', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:gitignore). + and_return(Gitlab::Git::Tree.new(path: '.gitignore')). + once + + 2.times do + expect(repository.gitignore).to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#koding_yml', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:koding). + and_return(Gitlab::Git::Tree.new(path: '.koding.yml')). + once + + 2.times do + expect(repository.koding_yml).to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#gitlab_ci_yml', caching: true do + it 'returns and caches the output' do + expect(repository).to receive(:file_on_head). + with(:gitlab_ci). + and_return(Gitlab::Git::Tree.new(path: '.gitlab-ci.yml')). + once + + 2.times do + expect(repository.gitlab_ci_yml).to be_an_instance_of(Gitlab::Git::Tree) + end + end + end end From 6f393877e555c9e87017747cd70d3577bb70a03e Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Tue, 8 Nov 2016 18:42:13 +0100 Subject: [PATCH 2/4] Use File.exist? to check if a repository exists Initializing Rugged objects is way too expensive just to check if a repository exists. Even though we cache this data once in a while we have to refresh this. On GitLab.com we have seen Repository#exists? taking up to _1 minute_ to complete in the absolute worst case, though usually it sits around a second or so. Using File.exist? to instead check if $GIT_DIR/refs exists is a much faster way of checking if a repository was initialized properly. --- app/models/repository.rb | 12 +++++++----- spec/models/repository_spec.rb | 3 +-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index 06c9eb3bd70..bd5dd7e8f74 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -41,11 +41,7 @@ class Repository return @exists unless @exists.nil? @exists = cache.fetch(:exists?) do - begin - raw_repository && raw_repository.rugged ? true : false - rescue Gitlab::Git::Repository::NoRepository - false - end + refs_directory_exists? end end @@ -1148,6 +1144,12 @@ class Repository private + def refs_directory_exists? + return false unless path_with_namespace + + File.exist?(File.join(path_to_repo, 'refs')) + end + def cache @cache ||= RepositoryCache.new(path_with_namespace, @project.id) end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 5e8a7bb08a1..635130cf797 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -811,8 +811,7 @@ describe Repository, models: true do end it 'returns false when a repository does not exist' do - expect(repository.raw_repository).to receive(:rugged). - and_raise(Gitlab::Git::Repository::NoRepository) + allow(repository).to receive(:refs_directory_exists?).and_return(false) expect(repository.exists?).to eq(false) end From ffb9b3ef18dee6f7e561ff53253da42a25854c6c Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Fri, 18 Nov 2016 14:04:18 +0100 Subject: [PATCH 3/4] Refactor cache refreshing/expiring This refactors repository caching so it's possible to selectively refresh certain caches, instead of just expiring and refreshing everything. To allow this the various methods that were cached (e.g. "tag_count" and "readme") use a similar pattern that makes expiring and refreshing their data much easier. In this new setup caches are refreshed as follows: 1. After a commit (but before running ProjectCacheWorker) we expire some basic caches such as the commit count and repository size. 2. ProjectCacheWorker will recalculate the commit count, repository size, then refresh a specific set of caches based on the list of files changed in a push payload. This requires a bunch of changes to the various methods that may be cached. For one, data should not be cached if a branch used or the entire repository does not exist. To prevent all these methods from handling this manually this is taken care of in Repository#cache_method_output. Some methods still manually check for the existence of a repository but this result is also cached. With selective flushing implemented ProjectCacheWorker no longer uses an exclusive lease for all of its work. Instead this worker only uses a lease to limit the number of times the repository size is updated as this is a fairly expensive operation. --- app/models/project.rb | 2 +- app/models/repository.rb | 402 +++++++++-------- app/services/git_push_service.rb | 23 +- app/workers/project_cache_worker.rb | 62 +-- spec/models/project_spec.rb | 2 +- spec/models/repository_spec.rb | 500 +++++++++++---------- spec/requests/api/branches_spec.rb | 2 +- spec/services/git_push_service_spec.rb | 104 +++-- spec/services/git_tag_push_service_spec.rb | 8 +- spec/workers/project_cache_worker_spec.rb | 98 ++-- 10 files changed, 626 insertions(+), 577 deletions(-) diff --git a/app/models/project.rb b/app/models/project.rb index 995359daf1e..f8a54324341 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base "refs/heads/#{branch}", force: true) repository.copy_gitattributes(branch) - repository.expire_avatar_cache(branch) + repository.expire_avatar_cache reload_default_branch end diff --git a/app/models/repository.rb b/app/models/repository.rb index bd5dd7e8f74..bf136ccdb6c 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -1,16 +1,54 @@ require 'securerandom' class Repository - class CommitError < StandardError; end - - # Files to use as a project avatar in case no avatar was uploaded via the web - # UI. - AVATAR_FILES = %w{logo.png logo.jpg logo.gif} - include Gitlab::ShellAdapter attr_accessor :path_with_namespace, :project + class CommitError < StandardError; end + + # Methods that cache data from the Git repository. + # + # Each entry in this Array should have a corresponding method with the exact + # same name. The cache key used by those methods must also match method's + # name. + # + # For example, for entry `:readme` there's a method called `readme` which + # stores its data in the `readme` cache key. + CACHED_METHODS = %i(size commit_count readme version contribution_guide + changelog license_blob license_key gitignore koding_yml + gitlab_ci_yml branch_names tag_names branch_count + tag_count avatar exists? empty? root_ref) + + # Certain method caches should be refreshed when certain types of files are + # changed. This Hash maps file types (as returned by Gitlab::FileDetector) to + # the corresponding methods to call for refreshing caches. + METHOD_CACHES_FOR_FILE_TYPES = { + readme: :readme, + changelog: :changelog, + license: %i(license_blob license_key), + contributing: :contribution_guide, + version: :version, + gitignore: :gitignore, + koding: :koding_yml, + gitlab_ci: :gitlab_ci_yml, + avatar: :avatar + } + + # Wraps around the given method and caches its output in Redis and an instance + # variable. + # + # This only works for methods that do not take any arguments. + def self.cache_method(name, fallback: nil) + original = :"_uncached_#{name}" + + alias_method(original, name) + + define_method(name) do + cache_method_output(name, fallback: fallback) { __send__(original) } + end + end + def self.storages Gitlab.config.repositories.storages end @@ -37,20 +75,6 @@ class Repository ) end - def exists? - return @exists unless @exists.nil? - - @exists = cache.fetch(:exists?) do - refs_directory_exists? - end - end - - def empty? - return @empty unless @empty.nil? - - @empty = cache.fetch(:empty?) { raw_repository.empty? } - end - # # Git repository can contains some hidden refs like: # /refs/notes/* @@ -217,10 +241,6 @@ class Repository branch_names + tag_names end - def branch_names - @branch_names ||= cache.fetch(:branch_names) { branches.map(&:name) } - end - def branch_exists?(branch_name) branch_names.include?(branch_name) end @@ -270,34 +290,6 @@ class Repository ref_exists?(keep_around_ref_name(sha)) end - def tag_names - cache.fetch(:tag_names) { raw_repository.tag_names } - end - - def commit_count - cache.fetch(:commit_count) do - begin - raw_repository.commit_count(self.root_ref) - rescue - 0 - end - end - end - - def branch_count - @branch_count ||= cache.fetch(:branch_count) { branches.size } - end - - def tag_count - @tag_count ||= cache.fetch(:tag_count) { raw_repository.rugged.tags.count } - end - - # Return repo size in megabytes - # Cached in redis - def size - cache.fetch(:size) { raw_repository.size } - end - def diverging_commit_counts(branch) root_ref_hash = raw_repository.rev_parse_target(root_ref).oid cache.fetch(:"diverging_commit_counts_#{branch.name}") do @@ -313,48 +305,55 @@ class Repository end end - # Keys for data that can be affected for any commit push. - def cache_keys - %i(size commit_count - readme version contribution_guide changelog - license_blob license_key gitignore koding_yml gitlab_ci_yml) - end - - # Keys for data on branch/tag operations. - def cache_keys_for_branches_and_tags - %i(branch_names tag_names branch_count tag_count) - end - - def build_cache - (cache_keys + cache_keys_for_branches_and_tags).each do |key| - unless cache.exist?(key) - send(key) - end - end - end - def expire_tags_cache - cache.expire(:tag_names) + expire_method_caches(%i(tag_names tag_count)) @tags = nil end def expire_branches_cache - cache.expire(:branch_names) - @branch_names = nil + expire_method_caches(%i(branch_names branch_count)) @local_branches = nil end - def expire_cache(branch_name = nil, revision = nil) - cache_keys.each do |key| + def expire_statistics_caches + expire_method_caches(%i(size commit_count)) + end + + def expire_all_method_caches + expire_method_caches(CACHED_METHODS) + end + + # Expires the caches of a specific set of methods + def expire_method_caches(methods) + methods.each do |key| cache.expire(key) + + ivar = cache_instance_variable_name(key) + + remove_instance_variable(ivar) if instance_variable_defined?(ivar) + end + end + + def expire_avatar_cache + expire_method_caches(%i(avatar)) + end + + # Refreshes the method caches of this repository. + # + # types - An Array of file types (e.g. `:readme`) used to refresh extra + # caches. + def refresh_method_caches(types) + to_refresh = [] + + types.each do |type| + methods = METHOD_CACHES_FOR_FILE_TYPES[type.to_sym] + + to_refresh.concat(Array(methods)) if methods end - expire_branch_cache(branch_name) - expire_avatar_cache(branch_name, revision) + expire_method_caches(to_refresh) - # This ensures this particular cache is flushed after the first commit to a - # new repository. - expire_emptiness_caches if empty? + to_refresh.each { |method| send(method) } end def expire_branch_cache(branch_name = nil) @@ -373,15 +372,14 @@ class Repository end def expire_root_ref_cache - cache.expire(:root_ref) - @root_ref = nil + expire_method_caches(%i(root_ref)) end # Expires the cache(s) used to determine if a repository is empty or not. def expire_emptiness_caches - cache.expire(:empty?) - @empty = nil + return unless empty? + expire_method_caches(%i(empty?)) expire_has_visible_content_cache end @@ -390,51 +388,22 @@ class Repository @has_visible_content = nil end - def expire_branch_count_cache - cache.expire(:branch_count) - @branch_count = nil - end - - def expire_tag_count_cache - cache.expire(:tag_count) - @tag_count = nil - end - def lookup_cache @lookup_cache ||= {} end - def expire_avatar_cache(branch_name = nil, revision = nil) - # Avatars are pulled from the default branch, thus if somebody pushes to a - # different branch there's no need to expire anything. - return if branch_name && branch_name != root_ref - - # We don't want to flush the cache if the commit didn't actually make any - # changes to any of the possible avatar files. - if revision && commit = self.commit(revision) - return unless commit.raw_diffs(deltas_only: true). - any? { |diff| AVATAR_FILES.include?(diff.new_path) } - end - - cache.expire(:avatar) - - @avatar = nil - end - def expire_exists_cache - cache.expire(:exists?) - @exists = nil + expire_method_caches(%i(exists?)) end # expire cache that doesn't depend on repository data (when expiring) def expire_content_cache expire_tags_cache - expire_tag_count_cache expire_branches_cache - expire_branch_count_cache expire_root_ref_cache expire_emptiness_caches expire_exists_cache + expire_statistics_caches end # Runs code after a repository has been created. @@ -449,9 +418,8 @@ class Repository # Runs code just before a repository is deleted. def before_delete expire_exists_cache - - expire_cache if exists? - + expire_all_method_caches + expire_branch_cache if exists? expire_content_cache repository_event(:remove_repository) @@ -468,9 +436,9 @@ class Repository # Runs code before pushing (= creating or removing) a tag. def before_push_tag - expire_cache + expire_statistics_caches + expire_emptiness_caches expire_tags_cache - expire_tag_count_cache repository_event(:push_tag) end @@ -478,7 +446,7 @@ class Repository # Runs code before removing a tag. def before_remove_tag expire_tags_cache - expire_tag_count_cache + expire_statistics_caches repository_event(:remove_tag) end @@ -490,12 +458,14 @@ class Repository # Runs code after a repository has been forked/imported. def after_import expire_content_cache - build_cache + expire_tags_cache + expire_branches_cache end # Runs code after a new commit has been pushed. - def after_push_commit(branch_name, revision) - expire_cache(branch_name, revision) + def after_push_commit(branch_name) + expire_statistics_caches + expire_branch_cache(branch_name) repository_event(:push_commit, branch: branch_name) end @@ -504,7 +474,6 @@ class Repository def after_create_branch expire_branches_cache expire_has_visible_content_cache - expire_branch_count_cache repository_event(:push_branch) end @@ -519,7 +488,6 @@ class Repository # Runs code after an existing branch has been removed. def after_remove_branch expire_has_visible_content_cache - expire_branch_count_cache expire_branches_cache end @@ -546,82 +514,127 @@ class Repository Gitlab::Git::Blob.raw(self, oid) end - def readme - cache.fetch(:readme) { tree(:head).readme } + def root_ref + if raw_repository + raw_repository.root_ref + else + # When the repo does not exist we raise this error so no data is cached. + raise Rugged::ReferenceError + end end + cache_method :root_ref + + def exists? + refs_directory_exists? + end + cache_method :exists? + + def empty? + raw_repository.empty? + end + cache_method :empty? + + # The size of this repository in megabytes. + def size + exists? ? raw_repository.size : 0.0 + end + cache_method :size, fallback: 0.0 + + def commit_count + root_ref ? raw_repository.commit_count(root_ref) : 0 + end + cache_method :commit_count, fallback: 0 + + def branch_names + branches.map(&:name) + end + cache_method :branch_names, fallback: [] + + def tag_names + raw_repository.tag_names + end + cache_method :tag_names, fallback: [] + + def branch_count + branches.size + end + cache_method :branch_count, fallback: 0 + + def tag_count + raw_repository.rugged.tags.count + end + cache_method :tag_count, fallback: 0 + + def avatar + if tree = file_on_head(:avatar) + tree.path + end + end + cache_method :avatar + + def readme + if head = tree(:head) + head.readme + end + end + cache_method :readme def version - cache.fetch(:version) do - file_on_head(:version) - end + file_on_head(:version) end + cache_method :version def contribution_guide - cache.fetch(:contribution_guide) do - file_on_head(:contributing) - end + file_on_head(:contributing) end + cache_method :contribution_guide def changelog - cache.fetch(:changelog) do - file_on_head(:changelog) - end + file_on_head(:changelog) end + cache_method :changelog def license_blob - return nil unless head_exists? - - cache.fetch(:license_blob) do - file_on_head(:license) - end + file_on_head(:license) end + cache_method :license_blob def license_key - return nil unless head_exists? + return unless exists? - cache.fetch(:license_key) do - Licensee.license(path).try(:key) - end + Licensee.license(path).try(:key) end + cache_method :license_key def gitignore - return nil if !exists? || empty? - - cache.fetch(:gitignore) do - file_on_head(:gitignore) - end + file_on_head(:gitignore) end + cache_method :gitignore def koding_yml - return nil unless head_exists? - - cache.fetch(:koding_yml) do - file_on_head(:koding) - end + file_on_head(:koding) end + cache_method :koding_yml def gitlab_ci_yml - return nil unless head_exists? - - @gitlab_ci_yml ||= cache.fetch(:gitlab_ci_yml) do - file_on_head(:gitlab_ci) - end - rescue Rugged::ReferenceError - # For unknow reason spinach scenario "Scenario: I change project path" - # lead to "Reference 'HEAD' not found" exception from Repository#empty? - nil + file_on_head(:gitlab_ci) end + cache_method :gitlab_ci_yml def head_commit @head_commit ||= commit(self.root_ref) end def head_tree - @head_tree ||= Tree.new(self, head_commit.sha, nil) + if head_commit + @head_tree ||= Tree.new(self, head_commit.sha, nil) + end end def tree(sha = :head, path = nil, recursive: false) if sha == :head + return unless head_commit + if path.nil? return head_tree else @@ -771,10 +784,6 @@ class Repository @tags ||= raw_repository.tags end - def root_ref - @root_ref ||= cache.fetch(:root_ref) { raw_repository.root_ref } - end - def commit_dir(user, path, message, branch, author_email: nil, author_name: nil) update_branch_with_hooks(user, branch) do |ref| options = { @@ -1132,12 +1141,41 @@ class Repository end end - def avatar - return nil unless exists? + # Caches the supplied block both in a cache and in an instance variable. + # + # The cache key and instance variable are named the same way as the value of + # the `key` argument. + # + # This method will return `nil` if the corresponding instance variable is also + # set to `nil`. This ensures we don't keep yielding the block when it returns + # `nil`. + # + # key - The name of the key to cache the data in. + # fallback - A value to fall back to in the event of a Git error. + def cache_method_output(key, fallback: nil, &block) + ivar = cache_instance_variable_name(key) - @avatar ||= cache.fetch(:avatar) do - AVATAR_FILES.find do |file| - blob_at_branch(root_ref, file) + if instance_variable_defined?(ivar) + instance_variable_get(ivar) + else + begin + instance_variable_set(ivar, cache.fetch(key, &block)) + rescue Rugged::ReferenceError, Gitlab::Git::Repository::NoRepository + # if e.g. HEAD or the entire repository doesn't exist we want to + # gracefully handle this and not cache anything. + fallback + end + end + end + + def cache_instance_variable_name(key) + :"@#{key.to_s.tr('?!', '')}" + end + + def file_on_head(type) + if head = tree(:head) + head.blobs.find do |file| + Gitlab::FileDetector.type_of(file.name) == type end end end @@ -1154,16 +1192,6 @@ class Repository @cache ||= RepositoryCache.new(path_with_namespace, @project.id) end - def head_exists? - exists? && !empty? && !rugged.head_unborn? - end - - def file_on_head(type) - tree(:head).blobs.find do |file| - Gitlab::FileDetector.type_of(file.name) == type - end - end - def tags_sorted_by_committed_date tags.sort_by { |tag| tag.dereferenced_target.committed_date } end diff --git a/app/services/git_push_service.rb b/app/services/git_push_service.rb index 77c6c81cc1b..647930d555c 100644 --- a/app/services/git_push_service.rb +++ b/app/services/git_push_service.rb @@ -18,7 +18,7 @@ class GitPushService < BaseService # def execute @project.repository.after_create if @project.empty_repo? - @project.repository.after_push_commit(branch_name, params[:newrev]) + @project.repository.after_push_commit(branch_name) if push_remove_branch? @project.repository.after_remove_branch @@ -51,12 +51,32 @@ class GitPushService < BaseService execute_related_hooks perform_housekeeping + + update_caches end def update_gitattributes @project.repository.copy_gitattributes(params[:ref]) end + def update_caches + if is_default_branch? + paths = Set.new + + @push_commits.each do |commit| + commit.raw_diffs(deltas_only: true).each do |diff| + paths << diff.new_path + end + end + + types = Gitlab::FileDetector.types_in_paths(paths.to_a) + else + types = [] + end + + ProjectCacheWorker.perform_async(@project.id, types) + end + protected def execute_related_hooks @@ -70,7 +90,6 @@ class GitPushService < BaseService @project.execute_hooks(build_push_data.dup, :push_hooks) @project.execute_services(build_push_data.dup, :push_hooks) Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute - ProjectCacheWorker.perform_async(@project.id) if push_remove_branch? AfterBranchDeleteService diff --git a/app/workers/project_cache_worker.rb b/app/workers/project_cache_worker.rb index 4dfa745fb50..27d7e652721 100644 --- a/app/workers/project_cache_worker.rb +++ b/app/workers/project_cache_worker.rb @@ -1,54 +1,38 @@ # Worker for updating any project specific caches. -# -# This worker runs at most once every 15 minutes per project. This is to ensure -# that multiple instances of jobs for this worker don't hammer the underlying -# storage engine as much. class ProjectCacheWorker include Sidekiq::Worker include DedicatedSidekiqQueue LEASE_TIMEOUT = 15.minutes.to_i - def self.lease_for(project_id) - Gitlab::ExclusiveLease. - new("project_cache_worker:#{project_id}", timeout: LEASE_TIMEOUT) - end + # project_id - The ID of the project for which to flush the cache. + # refresh - An Array containing extra types of data to refresh such as + # `:readme` to flush the README and `:changelog` to flush the + # CHANGELOG. + def perform(project_id, refresh = []) + project = Project.find_by(id: project_id) - # Overwrite Sidekiq's implementation so we only schedule when actually needed. - def self.perform_async(project_id) - # If a lease for this project is still being held there's no point in - # scheduling a new job. - super unless lease_for(project_id).exists? - end + return unless project && project.repository.exists? - def perform(project_id) - if try_obtain_lease_for(project_id) - Rails.logger. - info("Obtained ProjectCacheWorker lease for project #{project_id}") - else - Rails.logger. - info("Could not obtain ProjectCacheWorker lease for project #{project_id}") - - return - end - - update_caches(project_id) - end - - def update_caches(project_id) - project = Project.find(project_id) - - return unless project.repository.exists? - - project.update_repository_size + update_repository_size(project) project.update_commit_count - if project.repository.root_ref - project.repository.build_cache - end + project.repository.refresh_method_caches(refresh.map(&:to_sym)) end - def try_obtain_lease_for(project_id) - self.class.lease_for(project_id).try_obtain + def update_repository_size(project) + return unless try_obtain_lease_for(project.id, :update_repository_size) + + Rails.logger.info("Updating repository size for project #{project.id}") + + project.update_repository_size + end + + private + + def try_obtain_lease_for(project_id, section) + Gitlab::ExclusiveLease. + new("project_cache_worker:#{project_id}:#{section}", timeout: LEASE_TIMEOUT). + try_obtain end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 25458e20618..e183fa88873 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -1572,7 +1572,7 @@ describe Project, models: true do end it 'expires the avatar cache' do - expect(project.repository).to receive(:expire_avatar_cache).with(project.default_branch) + expect(project.repository).to receive(:expire_avatar_cache) project.change_head(project.default_branch) end diff --git a/spec/models/repository_spec.rb b/spec/models/repository_spec.rb index 635130cf797..04afb8ebc98 100644 --- a/spec/models/repository_spec.rb +++ b/spec/models/repository_spec.rb @@ -464,11 +464,7 @@ describe Repository, models: true do end end - describe "#changelog" do - before do - repository.send(:cache).expire(:changelog) - end - + describe "#changelog", caching: true do it 'accepts changelog' do expect(repository.tree).to receive(:blobs).and_return([TestBlob.new('changelog')]) @@ -500,17 +496,16 @@ describe Repository, models: true do end end - describe "#license_blob" do + describe "#license_blob", caching: true do before do - repository.send(:cache).expire(:license_blob) repository.remove_file(user, 'LICENSE', 'Remove LICENSE', 'master') end it 'handles when HEAD points to non-existent ref' do repository.commit_file(user, 'LICENSE', 'Copyright!', 'Add LICENSE', 'master', false) - rugged = double('rugged') - expect(rugged).to receive(:head_unborn?).and_return(true) - expect(repository).to receive(:rugged).and_return(rugged) + + allow(repository).to receive(:file_on_head). + and_raise(Rugged::ReferenceError) expect(repository.license_blob).to be_nil end @@ -537,22 +532,18 @@ describe Repository, models: true do end end - describe '#license_key' do + describe '#license_key', caching: true do before do - repository.send(:cache).expire(:license_key) repository.remove_file(user, 'LICENSE', 'Remove LICENSE', 'master') end - it 'handles when HEAD points to non-existent ref' do - repository.commit_file(user, 'LICENSE', 'Copyright!', 'Add LICENSE', 'master', false) - rugged = double('rugged') - expect(rugged).to receive(:head_unborn?).and_return(true) - expect(repository).to receive(:rugged).and_return(rugged) - + it 'returns nil when no license is detected' do expect(repository.license_key).to be_nil end - it 'returns nil when no license is detected' do + it 'returns nil when the repository does not exist' do + expect(repository).to receive(:exists?).and_return(false) + expect(repository.license_key).to be_nil end @@ -569,7 +560,7 @@ describe Repository, models: true do end end - describe "#gitlab_ci_yml" do + describe "#gitlab_ci_yml", caching: true do it 'returns valid file' do files = [TestBlob.new('file'), TestBlob.new('.gitlab-ci.yml'), TestBlob.new('copying')] expect(repository.tree).to receive(:blobs).and_return(files) @@ -583,7 +574,7 @@ describe Repository, models: true do end it 'returns nil for empty repository' do - expect(repository).to receive(:empty?).and_return(true) + allow(repository).to receive(:file_on_head).and_raise(Rugged::ReferenceError) expect(repository.gitlab_ci_yml).to be_nil end end @@ -778,7 +769,6 @@ describe Repository, models: true do expect(repository).not_to receive(:expire_emptiness_caches) expect(repository).to receive(:expire_branches_cache) expect(repository).to receive(:expire_has_visible_content_cache) - expect(repository).to receive(:expire_branch_count_cache) repository.update_branch_with_hooks(user, 'new-feature') { new_rev } end @@ -797,7 +787,6 @@ describe Repository, models: true do expect(empty_repository).to receive(:expire_emptiness_caches) expect(empty_repository).to receive(:expire_branches_cache) expect(empty_repository).to receive(:expire_has_visible_content_cache) - expect(empty_repository).to receive(:expire_branch_count_cache) empty_repository.commit_file(user, 'CHANGELOG', 'Changelog!', 'Updates file content', 'master', false) @@ -915,34 +904,6 @@ describe Repository, models: true do end end - describe '#expire_cache' do - it 'expires all caches' do - expect(repository).to receive(:expire_branch_cache) - - repository.expire_cache - end - - it 'expires the caches for a specific branch' do - expect(repository).to receive(:expire_branch_cache).with('master') - - repository.expire_cache('master') - end - - it 'expires the emptiness caches for an empty repository' do - expect(repository).to receive(:empty?).and_return(true) - expect(repository).to receive(:expire_emptiness_caches) - - repository.expire_cache - end - - it 'does not expire the emptiness caches for a non-empty repository' do - expect(repository).to receive(:empty?).and_return(false) - expect(repository).not_to receive(:expire_emptiness_caches) - - repository.expire_cache - end - end - describe '#expire_root_ref_cache' do it 'expires the root reference cache' do repository.root_ref @@ -1002,12 +963,23 @@ describe Repository, models: true do describe '#expire_emptiness_caches' do let(:cache) { repository.send(:cache) } - it 'expires the caches' do + it 'expires the caches for an empty repository' do + allow(repository).to receive(:empty?).and_return(true) + expect(cache).to receive(:expire).with(:empty?) expect(repository).to receive(:expire_has_visible_content_cache) repository.expire_emptiness_caches end + + it 'does not expire the cache for a non-empty repository' do + allow(repository).to receive(:empty?).and_return(false) + + expect(cache).not_to receive(:expire).with(:empty?) + expect(repository).not_to receive(:expire_has_visible_content_cache) + + repository.expire_emptiness_caches + end end describe :skip_merged_commit do @@ -1119,24 +1091,12 @@ describe Repository, models: true do repository.before_delete end - it 'flushes the tag count cache' do - expect(repository).to receive(:expire_tag_count_cache) - - repository.before_delete - end - it 'flushes the branches cache' do expect(repository).to receive(:expire_branches_cache) repository.before_delete end - it 'flushes the branch count cache' do - expect(repository).to receive(:expire_branch_count_cache) - - repository.before_delete - end - it 'flushes the root ref cache' do expect(repository).to receive(:expire_root_ref_cache) @@ -1161,36 +1121,18 @@ describe Repository, models: true do allow(repository).to receive(:exists?).and_return(true) end - it 'flushes the caches that depend on repository data' do - expect(repository).to receive(:expire_cache) - - repository.before_delete - end - it 'flushes the tags cache' do expect(repository).to receive(:expire_tags_cache) repository.before_delete end - it 'flushes the tag count cache' do - expect(repository).to receive(:expire_tag_count_cache) - - repository.before_delete - end - it 'flushes the branches cache' do expect(repository).to receive(:expire_branches_cache) repository.before_delete end - it 'flushes the branch count cache' do - expect(repository).to receive(:expire_branch_count_cache) - - repository.before_delete - end - it 'flushes the root ref cache' do expect(repository).to receive(:expire_root_ref_cache) @@ -1221,8 +1163,9 @@ describe Repository, models: true do describe '#before_push_tag' do it 'flushes the cache' do - expect(repository).to receive(:expire_cache) - expect(repository).to receive(:expire_tag_count_cache) + expect(repository).to receive(:expire_statistics_caches) + expect(repository).to receive(:expire_emptiness_caches) + expect(repository).to receive(:expire_tags_cache) repository.before_push_tag end @@ -1239,17 +1182,23 @@ describe Repository, models: true do describe '#after_import' do it 'flushes and builds the cache' do expect(repository).to receive(:expire_content_cache) - expect(repository).to receive(:build_cache) + expect(repository).to receive(:expire_tags_cache) + expect(repository).to receive(:expire_branches_cache) repository.after_import end end describe '#after_push_commit' do - it 'flushes the cache' do - expect(repository).to receive(:expire_cache).with('master', '123') + it 'expires statistics caches' do + expect(repository).to receive(:expire_statistics_caches). + and_call_original - repository.after_push_commit('master', '123') + expect(repository).to receive(:expire_branch_cache). + with('master'). + and_call_original + + repository.after_push_commit('master') end end @@ -1301,7 +1250,8 @@ describe Repository, models: true do describe '#before_remove_tag' do it 'flushes the tag cache' do - expect(repository).to receive(:expire_tag_count_cache) + expect(repository).to receive(:expire_tags_cache).and_call_original + expect(repository).to receive(:expire_statistics_caches).and_call_original repository.before_remove_tag end @@ -1319,23 +1269,23 @@ describe Repository, models: true do end end - describe '#expire_branch_count_cache' do - let(:cache) { repository.send(:cache) } - + describe '#expire_branches_cache' do it 'expires the cache' do - expect(cache).to receive(:expire).with(:branch_count) + expect(repository).to receive(:expire_method_caches). + with(%i(branch_names branch_count)). + and_call_original - repository.expire_branch_count_cache + repository.expire_branches_cache end end - describe '#expire_tag_count_cache' do - let(:cache) { repository.send(:cache) } - + describe '#expire_tags_cache' do it 'expires the cache' do - expect(cache).to receive(:expire).with(:tag_count) + expect(repository).to receive(:expire_method_caches). + with(%i(tag_names tag_count)). + and_call_original - repository.expire_tag_count_cache + repository.expire_tags_cache end end @@ -1411,84 +1361,27 @@ describe Repository, models: true do describe '#avatar' do it 'returns nil if repo does not exist' do - expect(repository).to receive(:exists?).and_return(false) + expect(repository).to receive(:file_on_head). + and_raise(Rugged::ReferenceError) expect(repository.avatar).to eq(nil) end it 'returns the first avatar file found in the repository' do - expect(repository).to receive(:blob_at_branch). - with('master', 'logo.png'). - and_return(true) + expect(repository).to receive(:file_on_head). + with(:avatar). + and_return(double(:tree, path: 'logo.png')) expect(repository.avatar).to eq('logo.png') end it 'caches the output' do - allow(repository).to receive(:blob_at_branch). - with('master', 'logo.png'). - and_return(true) + expect(repository).to receive(:file_on_head). + with(:avatar). + once. + and_return(double(:tree, path: 'logo.png')) - expect(repository.avatar).to eq('logo.png') - - expect(repository).not_to receive(:blob_at_branch) - expect(repository.avatar).to eq('logo.png') - end - end - - describe '#expire_avatar_cache' do - let(:cache) { repository.send(:cache) } - - before do - allow(repository).to receive(:cache).and_return(cache) - end - - context 'without a branch or revision' do - it 'flushes the cache' do - expect(cache).to receive(:expire).with(:avatar) - - repository.expire_avatar_cache - end - end - - context 'with a branch' do - it 'does not flush the cache if the branch is not the default branch' do - expect(cache).not_to receive(:expire) - - repository.expire_avatar_cache('cats') - end - - it 'flushes the cache if the branch equals the default branch' do - expect(cache).to receive(:expire).with(:avatar) - - repository.expire_avatar_cache(repository.root_ref) - end - end - - context 'with a branch and revision' do - let(:commit) { double(:commit) } - - before do - allow(repository).to receive(:commit).and_return(commit) - end - - it 'does not flush the cache if the commit does not change any logos' do - diff = double(:diff, new_path: 'test.txt') - - expect(commit).to receive(:raw_diffs).and_return([diff]) - expect(cache).not_to receive(:expire) - - repository.expire_avatar_cache(repository.root_ref, '123') - end - - it 'flushes the cache if the commit changes any of the logos' do - diff = double(:diff, new_path: Repository::AVATAR_FILES[0]) - - expect(commit).to receive(:raw_diffs).and_return([diff]) - expect(cache).to receive(:expire).with(:avatar) - - repository.expire_avatar_cache(repository.root_ref, '123') - end + 2.times { expect(repository.avatar).to eq('logo.png') } end end @@ -1502,40 +1395,6 @@ describe Repository, models: true do end end - describe '#build_cache' do - let(:cache) { repository.send(:cache) } - - it 'builds the caches if they do not already exist' do - cache_keys = repository.cache_keys + repository.cache_keys_for_branches_and_tags - - expect(cache).to receive(:exist?). - exactly(cache_keys.length). - times. - and_return(false) - - cache_keys.each do |key| - expect(repository).to receive(key) - end - - repository.build_cache - end - - it 'does not build any caches that already exist' do - cache_keys = repository.cache_keys + repository.cache_keys_for_branches_and_tags - - expect(cache).to receive(:exist?). - exactly(cache_keys.length). - times. - and_return(true) - - cache_keys.each do |key| - expect(repository).not_to receive(key) - end - - repository.build_cache - end - end - describe "#keep_around" do it "does not fail if we attempt to reference bad commit" do expect(repository.kept_around?('abc1234')).to be_falsey @@ -1592,47 +1451,6 @@ describe Repository, models: true do end end - describe '#changelog', caching: true do - it 'returns and caches the output' do - expect(repository).to receive(:file_on_head). - with(:changelog). - and_return(Gitlab::Git::Tree.new(path: 'CHANGELOG')). - once - - 2.times do - expect(repository.changelog).to be_an_instance_of(Gitlab::Git::Tree) - end - end - end - - describe '#license_blob', caching: true do - it 'returns and caches the output' do - expect(repository).to receive(:file_on_head). - with(:license). - and_return(Gitlab::Git::Tree.new(path: 'LICENSE')). - once - - 2.times do - expect(repository.license_blob).to be_an_instance_of(Gitlab::Git::Tree) - end - end - end - - describe '#license_key', caching: true do - it 'returns and caches the output' do - license = double(key: 'mit') - - expect(Licensee).to receive(:license). - with(repository.path). - and_return(license). - once - - 2.times do - expect(repository.license_key).to eq('mit') - end - end - end - describe '#gitignore', caching: true do it 'returns and caches the output' do expect(repository).to receive(:file_on_head). @@ -1659,16 +1477,200 @@ describe Repository, models: true do end end - describe '#gitlab_ci_yml', caching: true do - it 'returns and caches the output' do - expect(repository).to receive(:file_on_head). - with(:gitlab_ci). - and_return(Gitlab::Git::Tree.new(path: '.gitlab-ci.yml')). - once + describe '#readme', caching: true do + context 'with a non-existing repository' do + it 'returns nil' do + expect(repository).to receive(:tree).with(:head).and_return(nil) - 2.times do - expect(repository.gitlab_ci_yml).to be_an_instance_of(Gitlab::Git::Tree) + expect(repository.readme).to be_nil + end + end + + context 'with an existing repository' do + it 'returns the README' do + expect(repository.readme).to be_an_instance_of(Gitlab::Git::Blob) end end end + + describe '#expire_statistics_caches' do + it 'expires the caches' do + expect(repository).to receive(:expire_method_caches). + with(%i(size commit_count)) + + repository.expire_statistics_caches + end + end + + describe '#expire_method_caches' do + it 'expires the caches of the given methods' do + expect_any_instance_of(RepositoryCache).to receive(:expire).with(:readme) + expect_any_instance_of(RepositoryCache).to receive(:expire).with(:gitignore) + + repository.expire_method_caches(%i(readme gitignore)) + end + end + + describe '#expire_all_method_caches' do + it 'expires the caches of all methods' do + expect(repository).to receive(:expire_method_caches). + with(Repository::CACHED_METHODS) + + repository.expire_all_method_caches + end + end + + describe '#expire_avatar_cache' do + it 'expires the cache' do + expect(repository).to receive(:expire_method_caches).with(%i(avatar)) + + repository.expire_avatar_cache + end + end + + describe '#file_on_head' do + context 'with a non-existing repository' do + it 'returns nil' do + expect(repository).to receive(:tree).with(:head).and_return(nil) + + expect(repository.file_on_head(:readme)).to be_nil + end + end + + context 'with a repository that has no blobs' do + it 'returns nil' do + expect_any_instance_of(Tree).to receive(:blobs).and_return([]) + + expect(repository.file_on_head(:readme)).to be_nil + end + end + + context 'with an existing repository' do + it 'returns a Gitlab::Git::Tree' do + expect(repository.file_on_head(:readme)). + to be_an_instance_of(Gitlab::Git::Tree) + end + end + end + + describe '#head_tree' do + context 'with an existing repository' do + it 'returns a Tree' do + expect(repository.head_tree).to be_an_instance_of(Tree) + end + end + + context 'with a non-existing repository' do + it 'returns nil' do + expect(repository).to receive(:head_commit).and_return(nil) + + expect(repository.head_tree).to be_nil + end + end + end + + describe '#tree' do + context 'using a non-existing repository' do + before do + allow(repository).to receive(:head_commit).and_return(nil) + end + + it 'returns nil' do + expect(repository.tree(:head)).to be_nil + end + + it 'returns nil when using a path' do + expect(repository.tree(:head, 'README.md')).to be_nil + end + end + + context 'using an existing repository' do + it 'returns a Tree' do + expect(repository.tree(:head)).to be_an_instance_of(Tree) + end + end + end + + describe '#size' do + context 'with a non-existing repository' do + it 'returns 0' do + expect(repository).to receive(:exists?).and_return(false) + + expect(repository.size).to eq(0.0) + end + end + + context 'with an existing repository' do + it 'returns the repository size as a Float' do + expect(repository.size).to be_an_instance_of(Float) + end + end + end + + describe '#commit_count' do + context 'with a non-existing repository' do + it 'returns 0' do + expect(repository).to receive(:root_ref).and_return(nil) + + expect(repository.commit_count).to eq(0) + end + end + + context 'with an existing repository' do + it 'returns the commit count' do + expect(repository.commit_count).to be_an_instance_of(Fixnum) + end + end + end + + describe '#cache_method_output', caching: true do + context 'with a non-existing repository' do + let(:value) do + repository.cache_method_output(:cats, fallback: 10) do + raise Rugged::ReferenceError + end + end + + it 'returns a fallback value' do + expect(value).to eq(10) + end + + it 'does not cache the data' do + value + + expect(repository.instance_variable_defined?(:@cats)).to eq(false) + expect(repository.send(:cache).exist?(:cats)).to eq(false) + end + end + + context 'with an existing repository' do + it 'caches the output' do + object = double + + expect(object).to receive(:number).once.and_return(10) + + 2.times do + val = repository.cache_method_output(:cats) { object.number } + + expect(val).to eq(10) + end + + expect(repository.send(:cache).exist?(:cats)).to eq(true) + expect(repository.instance_variable_get(:@cats)).to eq(10) + end + end + end + + describe '#refresh_method_caches' do + it 'refreshes the caches of the given types' do + expect(repository).to receive(:expire_method_caches). + with(%i(readme license_blob license_key)) + + expect(repository).to receive(:readme) + expect(repository).to receive(:license_blob) + expect(repository).to receive(:license_key) + + repository.refresh_method_caches(%i(readme license)) + end + end end diff --git a/spec/requests/api/branches_spec.rb b/spec/requests/api/branches_spec.rb index 8f605757186..fe6b875b997 100644 --- a/spec/requests/api/branches_spec.rb +++ b/spec/requests/api/branches_spec.rb @@ -14,7 +14,7 @@ describe API::API, api: true do describe "GET /projects/:id/repository/branches" do it "returns an array of project branches" do - project.repository.expire_cache + project.repository.expire_all_method_caches get api("/projects/#{project.id}/repository/branches", user) expect(response).to have_http_status(200) diff --git a/spec/services/git_push_service_spec.rb b/spec/services/git_push_service_spec.rb index 62f9982e840..9d7702f5c96 100644 --- a/spec/services/git_push_service_spec.rb +++ b/spec/services/git_push_service_spec.rb @@ -27,27 +27,14 @@ describe GitPushService, services: true do it { is_expected.to be_truthy } - it 'flushes general cached data' do - expect(project.repository).to receive(:expire_cache). - with('master', newrev) + it 'calls the after_push_commit hook' do + expect(project.repository).to receive(:after_push_commit).with('master') subject end - it 'flushes the visible content cache' do - expect(project.repository).to receive(:expire_has_visible_content_cache) - - subject - end - - it 'flushes the branches cache' do - expect(project.repository).to receive(:expire_branches_cache) - - subject - end - - it 'flushes the branch count cache' do - expect(project.repository).to receive(:expire_branch_count_cache) + it 'calls the after_create_branch hook' do + expect(project.repository).to receive(:after_create_branch) subject end @@ -56,21 +43,8 @@ describe GitPushService, services: true do context 'existing branch' do it { is_expected.to be_truthy } - it 'flushes general cached data' do - expect(project.repository).to receive(:expire_cache). - with('master', newrev) - - subject - end - - it 'does not flush the branches cache' do - expect(project.repository).not_to receive(:expire_branches_cache) - - subject - end - - it 'does not flush the branch count cache' do - expect(project.repository).not_to receive(:expire_branch_count_cache) + it 'calls the after_push_commit hook' do + expect(project.repository).to receive(:after_push_commit).with('master') subject end @@ -81,27 +55,14 @@ describe GitPushService, services: true do it { is_expected.to be_truthy } - it 'flushes the visible content cache' do - expect(project.repository).to receive(:expire_has_visible_content_cache) + it 'calls the after_push_commit hook' do + expect(project.repository).to receive(:after_push_commit).with('master') subject end - it 'flushes the branches cache' do - expect(project.repository).to receive(:expire_branches_cache) - - subject - end - - it 'flushes the branch count cache' do - expect(project.repository).to receive(:expire_branch_count_cache) - - subject - end - - it 'flushes general cached data' do - expect(project.repository).to receive(:expire_cache). - with('master', newrev) + it 'calls the after_remove_branch hook' do + expect(project.repository).to receive(:after_remove_branch) subject end @@ -598,6 +559,51 @@ describe GitPushService, services: true do end end + describe '#update_caches' do + let(:service) do + described_class.new(project, + user, + oldrev: sample_commit.parent_id, + newrev: sample_commit.id, + ref: 'refs/heads/master') + end + + context 'on the default branch' do + before do + allow(service).to receive(:is_default_branch?).and_return(true) + end + + it 'flushes the caches of any special files that have been changed' do + commit = double(:commit) + diff = double(:diff, new_path: 'README.md') + + expect(commit).to receive(:raw_diffs).with(deltas_only: true). + and_return([diff]) + + service.push_commits = [commit] + + expect(ProjectCacheWorker).to receive(:perform_async). + with(project.id, %i(readme)) + + service.update_caches + end + end + + context 'on a non-default branch' do + before do + allow(service).to receive(:is_default_branch?).and_return(false) + end + + it 'does not flush any conditional caches' do + expect(ProjectCacheWorker).to receive(:perform_async). + with(project.id, []). + and_call_original + + service.update_caches + end + end + end + def execute_service(project, user, oldrev, newrev, ref) service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref ) service.execute diff --git a/spec/services/git_tag_push_service_spec.rb b/spec/services/git_tag_push_service_spec.rb index 0879e3ab4c8..bd074b9bd71 100644 --- a/spec/services/git_tag_push_service_spec.rb +++ b/spec/services/git_tag_push_service_spec.rb @@ -18,7 +18,7 @@ describe GitTagPushService, services: true do end it 'flushes general cached data' do - expect(project.repository).to receive(:expire_cache) + expect(project.repository).to receive(:before_push_tag) subject end @@ -28,12 +28,6 @@ describe GitTagPushService, services: true do subject end - - it 'flushes the tag count cache' do - expect(project.repository).to receive(:expire_tag_count_cache) - - subject - end end describe "Git Tag Push Data" do diff --git a/spec/workers/project_cache_worker_spec.rb b/spec/workers/project_cache_worker_spec.rb index bfa8c0ff2c6..855c28b584e 100644 --- a/spec/workers/project_cache_worker_spec.rb +++ b/spec/workers/project_cache_worker_spec.rb @@ -2,62 +2,78 @@ require 'spec_helper' describe ProjectCacheWorker do let(:project) { create(:project) } + let(:worker) { described_class.new } - subject { described_class.new } - - describe '.perform_async' do - it 'schedules the job when no lease exists' do - allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). - and_return(false) - - expect_any_instance_of(described_class).to receive(:perform) - - described_class.perform_async(project.id) + describe '#perform' do + before do + allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain). + and_return(true) end - it 'does not schedule the job when a lease exists' do - allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). - and_return(true) + context 'with a non-existing project' do + it 'does nothing' do + expect(worker).not_to receive(:update_repository_size) - expect_any_instance_of(described_class).not_to receive(:perform) + worker.perform(-1) + end + end - described_class.perform_async(project.id) + context 'with an existing project without a repository' do + it 'does nothing' do + allow_any_instance_of(Repository).to receive(:exists?).and_return(false) + + expect(worker).not_to receive(:update_repository_size) + + worker.perform(project.id) + end + end + + context 'with an existing project' do + it 'updates the repository size' do + expect(worker).to receive(:update_repository_size).and_call_original + + worker.perform(project.id) + end + + it 'updates the commit count' do + expect_any_instance_of(Project).to receive(:update_commit_count). + and_call_original + + worker.perform(project.id) + end + + it 'refreshes the method caches' do + expect_any_instance_of(Repository).to receive(:refresh_method_caches). + with(%i(readme)). + and_call_original + + worker.perform(project.id, %i(readme)) + end end end - describe '#perform' do - context 'when an exclusive lease can be obtained' do - before do - allow(subject).to receive(:try_obtain_lease_for).with(project.id). - and_return(true) - end + describe '#update_repository_size' do + context 'when a lease could not be obtained' do + it 'does not update the repository size' do + allow(worker).to receive(:try_obtain_lease_for). + with(project.id, :update_repository_size). + and_return(false) - it 'updates project cache data' do - expect_any_instance_of(Repository).to receive(:size) - expect_any_instance_of(Repository).to receive(:commit_count) + expect(project).not_to receive(:update_repository_size) - expect_any_instance_of(Project).to receive(:update_repository_size) - expect_any_instance_of(Project).to receive(:update_commit_count) - - subject.perform(project.id) - end - - it 'handles missing repository data' do - expect_any_instance_of(Repository).to receive(:exists?).and_return(false) - expect_any_instance_of(Repository).not_to receive(:size) - - subject.perform(project.id) + worker.update_repository_size(project) end end - context 'when an exclusive lease can not be obtained' do - it 'does nothing' do - allow(subject).to receive(:try_obtain_lease_for).with(project.id). - and_return(false) + context 'when a lease could be obtained' do + it 'updates the repository size' do + allow(worker).to receive(:try_obtain_lease_for). + with(project.id, :update_repository_size). + and_return(true) - expect(subject).not_to receive(:update_caches) + expect(project).to receive(:update_repository_size).and_call_original - subject.perform(project.id) + worker.update_repository_size(project) end end end From 289e433685867dbc569adc4e7d137253bb42c3c8 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Fri, 18 Nov 2016 15:18:12 +0100 Subject: [PATCH 4/4] Added CHANGELOG entry for !7360 --- changelogs/unreleased/smarter-cache-invalidation.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelogs/unreleased/smarter-cache-invalidation.yml diff --git a/changelogs/unreleased/smarter-cache-invalidation.yml b/changelogs/unreleased/smarter-cache-invalidation.yml new file mode 100644 index 00000000000..14a93e26ac4 --- /dev/null +++ b/changelogs/unreleased/smarter-cache-invalidation.yml @@ -0,0 +1,4 @@ +--- +title: Rework cache invalidation so only changed data is refreshed +merge_request: 7360 +author: