diff --git a/app/models/ci/build_trace_chunk.rb b/app/models/ci/build_trace_chunk.rb index 5dc43c6e00f..3f4a901b626 100644 --- a/app/models/ci/build_trace_chunk.rb +++ b/app/models/ci/build_trace_chunk.rb @@ -1,10 +1,12 @@ module Ci class BuildTraceChunk < ActiveRecord::Base + include FastDestroyAll extend Gitlab::Ci::Model belongs_to :build, class_name: "Ci::Build", foreign_key: :build_id default_value_for :data_store, :redis + fast_destroy_all_with :delete_all_redis_data, :redis_all_data_keys WriteError = Class.new(StandardError) @@ -19,27 +21,23 @@ module Ci db: 2 } - def self.delayed_cleanup_blk - ids = all.redis.pluck(:build_id, :chunk_index).map do |data| - "gitlab:ci:trace:#{data.first}:chunks:#{data.second}" + class << self + def redis_data_key(build_id, chunk_index) + "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}" end - puts "before cleanup: #{ids.count}" - - Proc.new do - puts "after cleanup: #{ids.count}" - Gitlab::Redis::SharedState.with do |redis| - redis.del(ids) - end unless ids.empty? - - true + def redis_all_data_keys + redis.pluck(:build_id, :chunk_index).map do |data| + redis_data_key(data.first, data.second) + end end - end - def self.fast_destroy_all - delayed_cleanup_blk.tap do |cleanup| - delete_all - cleanup.call + def delete_all_redis_data(redis_keys) + if redis_keys.any? + Gitlab::Redis::SharedState.with do |redis| + redis.del(redis_keys) + end + end end end @@ -130,26 +128,22 @@ module Ci def redis_data Gitlab::Redis::SharedState.with do |redis| - redis.get(redis_data_key) + redis.get(self.class.redis_data_key(build_id, chunk_index)) end end def redis_set_data(data) Gitlab::Redis::SharedState.with do |redis| - redis.set(redis_data_key, data, ex: CHUNK_REDIS_TTL) + redis.set(self.class.redis_data_key(build_id, chunk_index), data, ex: CHUNK_REDIS_TTL) end end def redis_delete_data Gitlab::Redis::SharedState.with do |redis| - redis.del(redis_data_key) + redis.del(self.class.redis_data_key(build_id, chunk_index)) end end - def redis_data_key - "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}" - end - def redis_lock_key "trace_write:#{build_id}:chunks:#{chunk_index}" end diff --git a/app/models/concerns/fast_destroy_all.rb b/app/models/concerns/fast_destroy_all.rb new file mode 100644 index 00000000000..28ab541dd13 --- /dev/null +++ b/app/models/concerns/fast_destroy_all.rb @@ -0,0 +1,61 @@ +## +# This module is for replacing `dependent: :destroy` and `before_destroy` hooks. +# +# In general, `destroy_all` is inefficient because it calls each callback with `DELETE` queries i.e. O(n), whereas, +# `delete_all` is efficient as it deletes all rows with a single `DELETE` query. +# +# It's better to use `delete_all` as much as possible, however, in the real cases, it's hard to adopt because +# in some cases, external data (in ObjectStorage/FileStorage/Redis) is assosiated with rows. +# +# This module introduces a protocol to support the adoption with easy way. +# You can use in the following scnenes +# - When calling `destroy_all` explicitly +# e.g. `job.trace_chunks.fast_destroy_all`` +# - When a parent record is deleted and all children are deleted subsequently (cascade delete) +# e.g. `before_destroy -> { run_after_commit(&build_trace_chunks.delete_all_external_data_proc) }`` +module FastDestroyAll + extend ActiveSupport::Concern + + included do + class_attribute :_delete_method, :_delete_params_generator + end + + class_methods do + ## + # This method is for registering :delete_method and :delete_params_generator + # You have to define the method if you want to use `FastDestroyAll`` module. + # + # e.g. fast_destroy_all_with :delete_all_redis_data, :redis_all_data_keys + def fast_destroy_all_with(delete_method, delete_params_generator) + self._delete_method = delete_method + self._delete_params_generator = delete_params_generator + end + + ## + # This method generates a proc to delete external data. + # It's useful especially when you want to hook parent record's deletion event. + # + # e.g. before_destroy -> { run_after_commit(&build_trace_chunks.delete_all_external_data_proc) } + def delete_all_external_data_proc + params = send self._delete_params_generator + callee = self # Preserve the callee class, otherwise `proc` uses a different class + + proc do + callee.send callee._delete_method, params + + true + end + end + + ## + # This method deletes all rows at first and delete all external data at second. + # Before deleting the rows, it generates a proc to delete external data. + # So it won't lose the track of deleting external data, even if it happened after rows had been deleted. + def fast_destroy_all + delete_all_external_data_proc.tap do |delete_all_external_data| + delete_all + delete_all_external_data.call + end + end + end +end diff --git a/app/models/project.rb b/app/models/project.rb index 48e30f0fd36..fd46c7b1c70 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -78,6 +78,12 @@ class Project < ActiveRecord::Base after_update :update_forks_visibility_level before_destroy :remove_private_deploy_keys + + # This has to be defined before `has_many :builds, depenedent: :destroy`, + # otherwise we will not delete any data, due to trace chunks + # going through :builds + before_destroy -> { run_after_commit(&build_trace_chunks.delete_all_external_data_proc) } + after_destroy -> { run_after_commit { remove_pages } } after_destroy :remove_exports @@ -214,14 +220,6 @@ class Project < ActiveRecord::Base has_many :commit_statuses has_many :pipelines, class_name: 'Ci::Pipeline', inverse_of: :project - # This has to be defined before `has_many :builds, depenedent: :destroy`, - # otherwise we will not delete any data, due to trace chunks - # going through :builds - before_destroy do - puts "destroying all chunks" - self.run_after_commit(&build_trace_chunks.delayed_cleanup_blk) - end - # Ci::Build objects store data on the file system such as artifact files and # build traces. Currently there's no efficient way of removing this data in # bulk that doesn't involve loading the rows into memory. As a result we're diff --git a/app/workers/build_finished_worker.rb b/app/workers/build_finished_worker.rb index 446ea9051ad..46f1ac09915 100644 --- a/app/workers/build_finished_worker.rb +++ b/app/workers/build_finished_worker.rb @@ -6,7 +6,7 @@ class BuildFinishedWorker def perform(build_id) Ci::Build.find_by(id: build_id).try do |build| - # We execute that in sync as this access the files in order to access local data, and reduce IO + # We execute that in sync as this access the files in order to access local file, and reduce IO BuildTraceSectionsWorker.new.perform(build.id) BuildCoverageWorker.new.perform(build.id) diff --git a/lib/gitlab/ci/trace/chunked_io.rb b/lib/gitlab/ci/trace/chunked_io.rb index 1bb8420a220..55c8abd6a86 100644 --- a/lib/gitlab/ci/trace/chunked_io.rb +++ b/lib/gitlab/ci/trace/chunked_io.rb @@ -134,7 +134,7 @@ module Gitlab end def truncate(offset) - raise ArgumentError, 'Outside of file' if offset > size + return unless offset < size && offset >= 0 @tell = offset @size = offset diff --git a/spec/models/ci/build_trace_chunk_spec.rb b/spec/models/ci/build_trace_chunk_spec.rb index dff4e1aeff5..b0ede29669e 100644 --- a/spec/models/ci/build_trace_chunk_spec.rb +++ b/spec/models/ci/build_trace_chunk_spec.rb @@ -345,7 +345,7 @@ describe Ci::BuildTraceChunk, :clean_gitlab_redis_shared_state do shared_examples_for 'deletes all build_trace_chunk and data in redis' do it do Gitlab::Redis::SharedState.with do |redis| - expect(redis.scan_each(match: "gitlab:ci:trace:?:chunks:?").to_a.count).to eq(3) + expect(redis.scan_each(match: "gitlab:ci:trace:?:chunks:?").to_a.size).to eq(3) end expect(described_class.count).to eq(3) @@ -355,7 +355,7 @@ describe Ci::BuildTraceChunk, :clean_gitlab_redis_shared_state do expect(described_class.count).to eq(0) Gitlab::Redis::SharedState.with do |redis| - expect(redis.scan_each(match: "gitlab:ci:trace:?:chunks:?").to_a.count).to eq(0) + expect(redis.scan_each(match: "gitlab:ci:trace:?:chunks:?").to_a.size).to eq(0) end end end