From 6bcc52a53678ca68001189c801497862d3f6e758 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 27 Oct 2016 14:59:52 +0200 Subject: [PATCH] Refine Git garbage collection --- .../admin/application_settings_controller.rb | 5 + app/models/application_setting.rb | 17 +++ app/services/projects/housekeeping_service.rb | 59 +++++++-- .../application_settings/_form.html.haml | 39 ++++++ app/workers/git_garbage_collect_worker.rb | 47 ++++++- changelogs/unreleased/git-gc-improvements.yml | 4 + ...dd_housekeeping_to_application_settings.rb | 32 +++++ db/schema.rb | 5 + doc/administration/housekeeping.md | 8 ++ lib/gitlab/backend/shell.rb | 13 -- lib/gitlab/exclusive_lease.rb | 68 ++++------ spec/lib/gitlab/backend/shell_spec.rb | 1 - spec/lib/gitlab/exclusive_lease_spec.rb | 27 +++- spec/models/application_setting_spec.rb | 18 +++ .../projects/housekeeping_service_spec.rb | 28 +++- .../git_garbage_collect_worker_spec.rb | 122 ++++++++++++++++-- 16 files changed, 410 insertions(+), 83 deletions(-) create mode 100644 changelogs/unreleased/git-gc-improvements.yml create mode 100644 db/migrate/20161031155516_add_housekeeping_to_application_settings.rb diff --git a/app/controllers/admin/application_settings_controller.rb b/app/controllers/admin/application_settings_controller.rb index 86e808314f4..52e0256943a 100644 --- a/app/controllers/admin/application_settings_controller.rb +++ b/app/controllers/admin/application_settings_controller.rb @@ -117,6 +117,11 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController :send_user_confirmation_email, :container_registry_token_expire_delay, :enabled_git_access_protocol, + :housekeeping_enabled, + :housekeeping_bitmaps_enabled, + :housekeeping_incremental_repack_period, + :housekeeping_full_repack_period, + :housekeeping_gc_period, repository_storages: [], restricted_visibility_levels: [], import_sources: [], diff --git a/app/models/application_setting.rb b/app/models/application_setting.rb index 6e7a90e7d9c..fa5188ca27b 100644 --- a/app/models/application_setting.rb +++ b/app/models/application_setting.rb @@ -85,6 +85,18 @@ class ApplicationSetting < ActiveRecord::Base presence: { message: 'Domain blacklist cannot be empty if Blacklist is enabled.' }, if: :domain_blacklist_enabled? + validates :housekeeping_incremental_repack_period, + presence: true, + numericality: { only_integer: true, greater_than: 0 } + + validates :housekeeping_full_repack_period, + presence: true, + numericality: { only_integer: true, greater_than: :housekeeping_incremental_repack_period } + + validates :housekeeping_gc_period, + presence: true, + numericality: { only_integer: true, greater_than: :housekeeping_full_repack_period } + validates_each :restricted_visibility_levels do |record, attr, value| unless value.nil? value.each do |level| @@ -168,6 +180,11 @@ class ApplicationSetting < ActiveRecord::Base container_registry_token_expire_delay: 5, repository_storages: ['default'], user_default_external: false, + housekeeping_enabled: true, + housekeeping_bitmaps_enabled: true, + housekeeping_incremental_repack_period: 10, + housekeeping_full_repack_period: 50, + housekeeping_gc_period: 200, ) end diff --git a/app/services/projects/housekeeping_service.rb b/app/services/projects/housekeeping_service.rb index c3dfc8cfbe8..4b8946f8ee2 100644 --- a/app/services/projects/housekeeping_service.rb +++ b/app/services/projects/housekeeping_service.rb @@ -7,6 +7,8 @@ # module Projects class HousekeepingService < BaseService + include Gitlab::CurrentSettings + LEASE_TIMEOUT = 3600 class LeaseTaken < StandardError @@ -20,13 +22,14 @@ module Projects end def execute - raise LeaseTaken unless try_obtain_lease + lease_uuid = try_obtain_lease + raise LeaseTaken unless lease_uuid.present? - execute_gitlab_shell_gc + execute_gitlab_shell_gc(lease_uuid) end def needed? - @project.pushes_since_gc >= 10 + pushes_since_gc > 0 && period_match? && housekeeping_enabled? end def increment! @@ -37,19 +40,59 @@ module Projects private - def execute_gitlab_shell_gc - GitGarbageCollectWorker.perform_async(@project.id) + def execute_gitlab_shell_gc(lease_uuid) + GitGarbageCollectWorker.perform_async(@project.id, task, lease_key, lease_uuid) ensure - Gitlab::Metrics.measure(:reset_pushes_since_gc) do - @project.reset_pushes_since_gc + if pushes_since_gc >= gc_period + Gitlab::Metrics.measure(:reset_pushes_since_gc) do + @project.reset_pushes_since_gc + end end end def try_obtain_lease Gitlab::Metrics.measure(:obtain_housekeeping_lease) do - lease = ::Gitlab::ExclusiveLease.new("project_housekeeping:#{@project.id}", timeout: LEASE_TIMEOUT) + lease = ::Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT) lease.try_obtain end end + + def lease_key + "project_housekeeping:#{@project.id}" + end + + def pushes_since_gc + @project.pushes_since_gc + end + + def task + if pushes_since_gc % gc_period == 0 + :gc + elsif pushes_since_gc % full_repack_period == 0 + :full_repack + else + :incremental_repack + end + end + + def period_match? + [gc_period, full_repack_period, repack_period].any? { |period| pushes_since_gc % period == 0 } + end + + def housekeeping_enabled? + current_application_settings.housekeeping_enabled + end + + def gc_period + current_application_settings.housekeeping_gc_period + end + + def full_repack_period + current_application_settings.housekeeping_full_repack_period + end + + def repack_period + current_application_settings.housekeeping_incremental_repack_period + end end end diff --git a/app/views/admin/application_settings/_form.html.haml b/app/views/admin/application_settings/_form.html.haml index 28003e5f509..450ec322f2c 100644 --- a/app/views/admin/application_settings/_form.html.haml +++ b/app/views/admin/application_settings/_form.html.haml @@ -422,5 +422,44 @@ Enable this option to include the name of the author of the issue, merge request or comment in the email body instead. + %fieldset + %legend Automatic Git repository housekeeping + .form-group + .col-sm-offset-2.col-sm-10 + .checkbox + = f.label :housekeeping_enabled do + = f.check_box :housekeeping_enabled + Enable automatic repository housekeeping (git repack, git gc) + .help-block + If you keep automatic housekeeping disabled for a long time Git + repository access on your GitLab server will become slower and your + repositories will use more disk space. We recommend to always leave + this enabled. + .checkbox + = f.label :housekeeping_bitmaps_enabled do + = f.check_box :housekeeping_bitmaps_enabled + Enable Git pack file bitmap creation + .help-block + Creating pack file bitmaps makes housekeeping take a little longer but + bitmaps should accelerate 'git clone' performance. + .form-group + = f.label :housekeeping_incremental_repack_period, 'Incremental repack period', class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :housekeeping_incremental_repack_period, class: 'form-control' + .help-block + Number of Git pushes after which an incremental 'git repack' is run. + .form-group + = f.label :housekeeping_full_repack_period, 'Full repack period', class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :housekeeping_full_repack_period, class: 'form-control' + .help-block + Number of Git pushes after which a full 'git repack' is run. + .form-group + = f.label :housekeeping_gc_period, 'Git GC period', class: 'control-label col-sm-2' + .col-sm-10 + = f.number_field :housekeeping_gc_period, class: 'form-control' + .help-block + Number of Git pushes after which 'git gc' is run. + .form-actions = f.submit 'Save', class: 'btn btn-save' diff --git a/app/workers/git_garbage_collect_worker.rb b/app/workers/git_garbage_collect_worker.rb index 65f8093b5b0..d369b639ae9 100644 --- a/app/workers/git_garbage_collect_worker.rb +++ b/app/workers/git_garbage_collect_worker.rb @@ -1,17 +1,58 @@ class GitGarbageCollectWorker include Sidekiq::Worker - include Gitlab::ShellAdapter include DedicatedSidekiqQueue + include Gitlab::CurrentSettings sidekiq_options retry: false - def perform(project_id) + def perform(project_id, task = :gc, lease_key = nil, lease_uuid = nil) project = Project.find(project_id) + task = task.to_sym + + cmd = command(task) + repo_path = project.repository.path_to_repo + description = "'#{cmd.join(' ')}' in #{repo_path}" + + Gitlab::GitLogger.info(description) + + output, status = Gitlab::Popen.popen(cmd, repo_path) + Gitlab::GitLogger.error("#{description} failed:\n#{output}") unless status.zero? - gitlab_shell.gc(project.repository_storage_path, project.path_with_namespace) # Refresh the branch cache in case garbage collection caused a ref lookup to fail + flush_ref_caches(project) if task == :gc + ensure + Gitlab::ExclusiveLease.cancel(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present? + end + + private + + def command(task) + case task + when :gc + git(write_bitmaps: bitmaps_enabled?) + %w[gc] + when :full_repack + git(write_bitmaps: bitmaps_enabled?) + %w[repack -A -d --pack-kept-objects] + when :incremental_repack + # Normal git repack fails when bitmaps are enabled. It is impossible to + # create a bitmap here anyway. + git(write_bitmaps: false) + %w[repack -d] + else + raise "Invalid gc task: #{task.inspect}" + end + end + + def flush_ref_caches(project) project.repository.after_create_branch project.repository.branch_names project.repository.has_visible_content? end + + def bitmaps_enabled? + current_application_settings.housekeeping_bitmaps_enabled + end + + def git(write_bitmaps:) + config_value = write_bitmaps ? 'true' : 'false' + %W[git -c repack.writeBitmaps=#{config_value}] + end end diff --git a/changelogs/unreleased/git-gc-improvements.yml b/changelogs/unreleased/git-gc-improvements.yml new file mode 100644 index 00000000000..f15e667ce87 --- /dev/null +++ b/changelogs/unreleased/git-gc-improvements.yml @@ -0,0 +1,4 @@ +--- +title: Finer-grained Git gargage collection +merge_request: 6588 +author: diff --git a/db/migrate/20161031155516_add_housekeeping_to_application_settings.rb b/db/migrate/20161031155516_add_housekeeping_to_application_settings.rb new file mode 100644 index 00000000000..5a451fb575b --- /dev/null +++ b/db/migrate/20161031155516_add_housekeeping_to_application_settings.rb @@ -0,0 +1,32 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddHousekeepingToApplicationSettings < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # Set this constant to true if this migration requires downtime. + DOWNTIME = false + + # When a migration requires downtime you **must** uncomment the following + # constant and define a short and easy to understand explanation as to why the + # migration requires downtime. + # DOWNTIME_REASON = '' + + disable_ddl_transaction! + + def up + add_column_with_default(:application_settings, :housekeeping_enabled, :boolean, default: true, allow_null: false) + add_column_with_default(:application_settings, :housekeeping_bitmaps_enabled, :boolean, default: true, allow_null: false) + add_column_with_default(:application_settings, :housekeeping_incremental_repack_period, :integer, default: 10, allow_null: false) + add_column_with_default(:application_settings, :housekeeping_full_repack_period, :integer, default: 50, allow_null: false) + add_column_with_default(:application_settings, :housekeeping_gc_period, :integer, default: 200, allow_null: false) + end + + def down + remove_column(:application_settings, :housekeeping_enabled, :boolean, default: true, allow_null: false) + remove_column(:application_settings, :housekeeping_bitmaps_enabled, :boolean, default: true, allow_null: false) + remove_column(:application_settings, :housekeeping_incremental_repack_period, :integer, default: 10, allow_null: false) + remove_column(:application_settings, :housekeeping_full_repack_period, :integer, default: 50, allow_null: false) + remove_column(:application_settings, :housekeeping_gc_period, :integer, default: 200, allow_null: false) + end +end diff --git a/db/schema.rb b/db/schema.rb index dc088925d97..48cb24ed20d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -98,6 +98,11 @@ ActiveRecord::Schema.define(version: 20161103171205) do t.text "help_page_text_html" t.text "shared_runners_text_html" t.text "after_sign_up_text_html" + t.boolean "housekeeping_enabled", default: true, null: false + t.boolean "housekeeping_bitmaps_enabled", default: true, null: false + t.integer "housekeeping_incremental_repack_period", default: 10, null: false + t.integer "housekeeping_full_repack_period", default: 50, null: false + t.integer "housekeeping_gc_period", default: 200, null: false end create_table "audit_events", force: :cascade do |t| diff --git a/doc/administration/housekeeping.md b/doc/administration/housekeeping.md index ad1fa98b63b..f846c06ca42 100644 --- a/doc/administration/housekeeping.md +++ b/doc/administration/housekeeping.md @@ -3,6 +3,14 @@ > [Introduced][ce-2371] in GitLab 8.4. --- +## Automatic housekeeping + +GitLab automatically runs `git gc` and `git repack` on repositories +after Git pushes. If needed you can change how often this happens, or +to turn it off, go to **Admin area > Settings** +(`/admin/application_settings`). + +## Manual housekeeping The housekeeping function runs `git gc` ([man page][man]) on the current project Git repository. diff --git a/lib/gitlab/backend/shell.rb b/lib/gitlab/backend/shell.rb index 9cec71a3222..82e194c1af1 100644 --- a/lib/gitlab/backend/shell.rb +++ b/lib/gitlab/backend/shell.rb @@ -127,19 +127,6 @@ module Gitlab 'rm-project', storage, "#{name}.git"]) end - # Gc repository - # - # storage - project storage path - # path - project path with namespace - # - # Ex. - # gc("/path/to/storage", "gitlab/gitlab-ci") - # - def gc(storage, path) - Gitlab::Utils.system_silent([gitlab_shell_projects_path, 'gc', - storage, "#{path}.git"]) - end - # Add new key to gitlab-shell # # Ex. diff --git a/lib/gitlab/exclusive_lease.rb b/lib/gitlab/exclusive_lease.rb index 7e8f35e9298..2dd42704396 100644 --- a/lib/gitlab/exclusive_lease.rb +++ b/lib/gitlab/exclusive_lease.rb @@ -1,66 +1,52 @@ +require 'securerandom' + module Gitlab # This class implements an 'exclusive lease'. We call it a 'lease' # because it has a set expiry time. We call it 'exclusive' because only # one caller may obtain a lease for a given key at a time. The # implementation is intended to work across GitLab processes and across - # servers. It is a 'cheap' alternative to using SQL queries and updates: + # servers. It is a cheap alternative to using SQL queries and updates: # you do not need to change the SQL schema to start using # ExclusiveLease. # - # It is important to choose the timeout wisely. If the timeout is very - # high (1 hour) then the throughput of your operation gets very low (at - # most once an hour). If the timeout is lower than how long your - # operation may take then you cannot count on exclusivity. For example, - # if the timeout is 10 seconds and you do an operation which may take 20 - # seconds then two overlapping operations may hold a lease for the same - # key at the same time. - # - # This class has no 'cancel' method. I originally decided against adding - # it because it would add complexity and a false sense of security. The - # complexity: instead of setting '1' we would have to set a UUID, and to - # delete it we would have to execute Lua on the Redis server to only - # delete the key if the value was our own UUID. Otherwise there is a - # chance that when you intend to cancel your lease you actually delete - # someone else's. The false sense of security: you cannot design your - # system to rely too much on the lease being cancelled after use because - # the calling (Ruby) process may crash or be killed. You _cannot_ count - # on begin/ensure blocks to cancel a lease, because the 'ensure' does - # not always run. Think of 'kill -9' from the Unicorn master for - # instance. - # - # If you find that leases are getting in your way, ask yourself: would - # it be enough to lower the lease timeout? Another thing that might be - # appropriate is to only use a lease for bulk/automated operations, and - # to ignore the lease when you get a single 'manual' user request (a - # button click). - # class ExclusiveLease - def initialize(key, timeout:) - @key, @timeout = key, timeout + LUA_CANCEL_SCRIPT = <<-EOS + local key, uuid = KEYS[1], ARGV[1] + if redis.call("get", key) == uuid then + redis.call("del", key) + end + EOS + + def self.cancel(key, uuid) + Gitlab::Redis.with do |redis| + redis.eval(LUA_CANCEL_SCRIPT, keys: [redis_key(key)], argv: [uuid]) + end end - # Try to obtain the lease. Return true on success, + def self.redis_key(key) + "gitlab:exclusive_lease:#{key}" + end + + def initialize(key, timeout:) + @redis_key = self.class.redis_key(key) + @timeout = timeout + @uuid = SecureRandom.uuid + end + + # Try to obtain the lease. Return lease UUID on success, # false if the lease is already taken. def try_obtain # Performing a single SET is atomic Gitlab::Redis.with do |redis| - !!redis.set(redis_key, '1', nx: true, ex: @timeout) + redis.set(@redis_key, @uuid, nx: true, ex: @timeout) && @uuid end end # Returns true if the key for this lease is set. def exists? Gitlab::Redis.with do |redis| - redis.exists(redis_key) + redis.exists(@redis_key) end end - - # No #cancel method. See comments above! - - private - - def redis_key - "gitlab:exclusive_lease:#{@key}" - end end end diff --git a/spec/lib/gitlab/backend/shell_spec.rb b/spec/lib/gitlab/backend/shell_spec.rb index f826d0d1b04..4b08a02ec73 100644 --- a/spec/lib/gitlab/backend/shell_spec.rb +++ b/spec/lib/gitlab/backend/shell_spec.rb @@ -14,7 +14,6 @@ describe Gitlab::Shell, lib: true do it { is_expected.to respond_to :add_repository } it { is_expected.to respond_to :remove_repository } it { is_expected.to respond_to :fork_repository } - it { is_expected.to respond_to :gc } it { is_expected.to respond_to :add_namespace } it { is_expected.to respond_to :rm_namespace } it { is_expected.to respond_to :mv_namespace } diff --git a/spec/lib/gitlab/exclusive_lease_spec.rb b/spec/lib/gitlab/exclusive_lease_spec.rb index 6b3bd08b978..a366d68a146 100644 --- a/spec/lib/gitlab/exclusive_lease_spec.rb +++ b/spec/lib/gitlab/exclusive_lease_spec.rb @@ -5,32 +5,47 @@ describe Gitlab::ExclusiveLease, type: :redis do describe '#try_obtain' do it 'cannot obtain twice before the lease has expired' do - lease = Gitlab::ExclusiveLease.new(unique_key, timeout: 3600) - expect(lease.try_obtain).to eq(true) + lease = described_class.new(unique_key, timeout: 3600) + expect(lease.try_obtain).to be_present expect(lease.try_obtain).to eq(false) end it 'can obtain after the lease has expired' do timeout = 1 - lease = Gitlab::ExclusiveLease.new(unique_key, timeout: timeout) + lease = described_class.new(unique_key, timeout: timeout) lease.try_obtain # start the lease sleep(2 * timeout) # lease should have expired now - expect(lease.try_obtain).to eq(true) + expect(lease.try_obtain).to be_present end end describe '#exists?' do it 'returns true for an existing lease' do - lease = Gitlab::ExclusiveLease.new(unique_key, timeout: 3600) + lease = described_class.new(unique_key, timeout: 3600) lease.try_obtain expect(lease.exists?).to eq(true) end it 'returns false for a lease that does not exist' do - lease = Gitlab::ExclusiveLease.new(unique_key, timeout: 3600) + lease = described_class.new(unique_key, timeout: 3600) expect(lease.exists?).to eq(false) end end + + describe '.cancel' do + it 'can cancel a lease' do + uuid = new_lease(unique_key) + expect(uuid).to be_present + expect(new_lease(unique_key)).to eq(false) + + described_class.cancel(unique_key, uuid) + expect(new_lease(unique_key)).to be_present + end + + def new_lease(key) + described_class.new(key, timeout: 3600).try_obtain + end + end end diff --git a/spec/models/application_setting_spec.rb b/spec/models/application_setting_spec.rb index 2b76e056f3c..b950fcdd81a 100644 --- a/spec/models/application_setting_spec.rb +++ b/spec/models/application_setting_spec.rb @@ -98,6 +98,24 @@ describe ApplicationSetting, models: true do end end end + + context 'housekeeping settings' do + it { is_expected.not_to allow_value(0).for(:housekeeping_incremental_repack_period) } + + it 'wants the full repack period to be longer than the incremental repack period' do + subject.housekeeping_incremental_repack_period = 2 + subject.housekeeping_full_repack_period = 1 + + expect(subject).not_to be_valid + end + + it 'wants the gc period to be longer than the full repack period' do + subject.housekeeping_full_repack_period = 2 + subject.housekeeping_gc_period = 1 + + expect(subject).not_to be_valid + end + end end context 'restricted signup domains' do diff --git a/spec/services/projects/housekeeping_service_spec.rb b/spec/services/projects/housekeeping_service_spec.rb index cf90b33dfb4..57a5aa5cedc 100644 --- a/spec/services/projects/housekeeping_service_spec.rb +++ b/spec/services/projects/housekeeping_service_spec.rb @@ -14,8 +14,10 @@ describe Projects::HousekeepingService do describe '#execute' do it 'enqueues a sidekiq job' do - expect(subject).to receive(:try_obtain_lease).and_return(true) - expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id) + expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid) + expect(subject).to receive(:lease_key).and_return(:the_lease_key) + expect(subject).to receive(:task).and_return(:the_task) + expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :the_task, :the_lease_key, :the_uuid) subject.execute expect(project.reload.pushes_since_gc).to eq(0) @@ -58,4 +60,26 @@ describe Projects::HousekeepingService do end.to change { project.pushes_since_gc }.from(0).to(1) end end + + it 'uses all three kinds of housekeeping we offer' do + allow(subject).to receive(:try_obtain_lease).and_return(:the_uuid) + allow(subject).to receive(:lease_key).and_return(:the_lease_key) + + # At push 200 + expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid). + exactly(1).times + # At push 50, 100, 150 + expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :full_repack, :the_lease_key, :the_uuid). + exactly(3).times + # At push 10, 20, ... (except those above) + expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid). + exactly(16).times + + 201.times do + subject.increment! + subject.execute if subject.needed? + end + + expect(project.pushes_since_gc).to eq(1) + end end diff --git a/spec/workers/git_garbage_collect_worker_spec.rb b/spec/workers/git_garbage_collect_worker_spec.rb index c9f5aae0815..ae258bde26d 100644 --- a/spec/workers/git_garbage_collect_worker_spec.rb +++ b/spec/workers/git_garbage_collect_worker_spec.rb @@ -1,3 +1,6 @@ +require 'digest' +require 'fileutils' + require 'spec_helper' describe GitGarbageCollectWorker do @@ -6,16 +9,12 @@ describe GitGarbageCollectWorker do subject { GitGarbageCollectWorker.new } - before do - allow(subject).to receive(:gitlab_shell).and_return(shell) - end - describe "#perform" do - it "runs `git gc`" do - expect(shell).to receive(:gc).with( - project.repository_storage_path, - project.path_with_namespace). - and_return(true) + it "flushes ref caches when the task is 'gc'" do + expect(subject).to receive(:command).with(:gc).and_return([:the, :command]) + expect(Gitlab::Popen).to receive(:popen). + with([:the, :command], project.repository.path_to_repo).and_return(["", 0]) + expect_any_instance_of(Repository).to receive(:after_create_branch).and_call_original expect_any_instance_of(Repository).to receive(:branch_names).and_call_original expect_any_instance_of(Repository).to receive(:branch_count).and_call_original @@ -23,5 +22,110 @@ describe GitGarbageCollectWorker do subject.perform(project.id) end + + shared_examples 'gc tasks' do + before { allow(subject).to receive(:bitmaps_enabled?).and_return(bitmaps_enabled) } + + it 'incremental repack adds a new packfile' do + create_objects(project) + before_packs = packs(project) + + expect(before_packs.count).to be >= 1 + + subject.perform(project.id, 'incremental_repack') + after_packs = packs(project) + + # Exactly one new pack should have been created + expect(after_packs.count).to eq(before_packs.count + 1) + + # Previously existing packs are still around + expect(before_packs & after_packs).to eq(before_packs) + end + + it 'full repack consolidates into 1 packfile' do + create_objects(project) + subject.perform(project.id, 'incremental_repack') + before_packs = packs(project) + + expect(before_packs.count).to be >= 2 + + subject.perform(project.id, 'full_repack') + after_packs = packs(project) + + expect(after_packs.count).to eq(1) + + # Previously existing packs should be gone now + expect(after_packs - before_packs).to eq(after_packs) + + expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) + end + + it 'gc consolidates into 1 packfile and updates packed-refs' do + create_objects(project) + before_packs = packs(project) + before_packed_refs = packed_refs(project) + + expect(before_packs.count).to be >= 1 + + subject.perform(project.id, 'gc') + after_packed_refs = packed_refs(project) + after_packs = packs(project) + + expect(after_packs.count).to eq(1) + + # Previously existing packs should be gone now + expect(after_packs - before_packs).to eq(after_packs) + + # The packed-refs file should have been updated during 'git gc' + expect(before_packed_refs).not_to eq(after_packed_refs) + + expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled) + end + end + + context 'with bitmaps enabled' do + let(:bitmaps_enabled) { true } + + include_examples 'gc tasks' + end + + context 'with bitmaps disabled' do + let(:bitmaps_enabled) { false } + + include_examples 'gc tasks' + end + end + + # Create a new commit on a random new branch + def create_objects(project) + rugged = project.repository.rugged + old_commit = rugged.branches.first.target + new_commit_sha = Rugged::Commit.create( + rugged, + message: "hello world #{SecureRandom.hex(6)}", + author: Gitlab::Git::committer_hash(email: 'foo@bar', name: 'baz'), + committer: Gitlab::Git::committer_hash(email: 'foo@bar', name: 'baz'), + tree: old_commit.tree, + parents: [old_commit], + ) + project.repository.update_ref!( + "refs/heads/#{SecureRandom.hex(6)}", + new_commit_sha, + Gitlab::Git::BLANK_SHA + ) + end + + def packs(project) + Dir["#{project.repository.path_to_repo}/objects/pack/*.pack"] + end + + def packed_refs(project) + path = "#{project.repository.path_to_repo}/packed-refs" + FileUtils.touch(path) + File.read(path) + end + + def bitmap_path(pack) + pack.sub(/\.pack\z/, '.bitmap') end end