2018-08-03 03:15:25 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-03-26 07:45:18 -04:00
|
|
|
module Ci
|
2021-08-03 02:08:50 -04:00
|
|
|
class BuildTraceChunk < Ci::ApplicationRecord
|
2020-09-28 11:09:44 -04:00
|
|
|
include ::Comparable
|
2020-08-27 20:10:34 -04:00
|
|
|
include ::FastDestroyAll
|
|
|
|
include ::Checksummable
|
2018-07-03 03:20:27 -04:00
|
|
|
include ::Gitlab::ExclusiveLeaseHelpers
|
2020-10-07 14:08:34 -04:00
|
|
|
include ::Gitlab::OptimisticLocking
|
2021-04-29 08:09:58 -04:00
|
|
|
|
2018-04-26 02:06:04 -04:00
|
|
|
belongs_to :build, class_name: "Ci::Build", foreign_key: :build_id
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2021-07-13 17:08:20 -04:00
|
|
|
default_value_for :data_store, :redis_trace_chunks
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2020-09-16 14:09:47 -04:00
|
|
|
after_create { metrics.increment_trace_operation(operation: :chunked) }
|
|
|
|
|
2018-04-05 07:39:35 -04:00
|
|
|
CHUNK_SIZE = 128.kilobytes
|
2018-05-07 04:34:47 -04:00
|
|
|
WRITE_LOCK_RETRY = 10
|
2018-05-07 05:45:38 -04:00
|
|
|
WRITE_LOCK_SLEEP = 0.01.seconds
|
2018-05-07 04:34:47 -04:00
|
|
|
WRITE_LOCK_TTL = 1.minute
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2018-11-28 07:27:25 -05:00
|
|
|
FailedToPersistDataError = Class.new(StandardError)
|
|
|
|
|
2020-11-12 07:09:39 -05:00
|
|
|
DATA_STORES = {
|
2018-04-04 06:19:17 -04:00
|
|
|
redis: 1,
|
2018-06-07 04:04:55 -04:00
|
|
|
database: 2,
|
2021-06-10 11:10:14 -04:00
|
|
|
fog: 3,
|
|
|
|
redis_trace_chunks: 4
|
2020-11-12 07:09:39 -05:00
|
|
|
}.freeze
|
|
|
|
|
2021-03-28 11:09:30 -04:00
|
|
|
STORE_TYPES = DATA_STORES.keys.to_h do |store|
|
2021-06-10 11:10:14 -04:00
|
|
|
[store, "Ci::BuildTraceChunks::#{store.to_s.camelize}".constantize]
|
2021-03-28 11:09:30 -04:00
|
|
|
end.freeze
|
2021-06-10 11:10:14 -04:00
|
|
|
LIVE_STORES = %i[redis redis_trace_chunks].freeze
|
2020-11-12 07:09:39 -05:00
|
|
|
|
|
|
|
enum data_store: DATA_STORES
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2021-06-10 11:10:14 -04:00
|
|
|
scope :live, -> { where(data_store: LIVE_STORES) }
|
|
|
|
scope :persisted, -> { where.not(data_store: LIVE_STORES).order(:chunk_index) }
|
2020-09-15 14:09:43 -04:00
|
|
|
|
2018-05-01 04:06:44 -04:00
|
|
|
class << self
|
2018-06-07 04:04:55 -04:00
|
|
|
def all_stores
|
2020-11-12 07:09:39 -05:00
|
|
|
STORE_TYPES.keys
|
2018-04-24 09:13:46 -04:00
|
|
|
end
|
|
|
|
|
2018-07-03 01:33:11 -04:00
|
|
|
def persistable_store
|
2021-06-10 11:10:14 -04:00
|
|
|
STORE_TYPES[:fog].available? ? :fog : :database
|
2018-04-24 09:13:46 -04:00
|
|
|
end
|
|
|
|
|
2018-06-07 04:04:55 -04:00
|
|
|
def get_store_class(store)
|
2020-11-12 07:09:39 -05:00
|
|
|
store = store.to_sym
|
|
|
|
|
|
|
|
raise "Unknown store type: #{store}" unless STORE_TYPES.key?(store)
|
2020-10-28 14:08:52 -04:00
|
|
|
|
2020-11-12 07:09:39 -05:00
|
|
|
STORE_TYPES[store].new
|
2018-04-24 09:13:46 -04:00
|
|
|
end
|
2018-05-03 04:08:05 -04:00
|
|
|
|
|
|
|
##
|
|
|
|
# FastDestroyAll concerns
|
|
|
|
def begin_fast_destroy
|
2018-06-15 02:48:03 -04:00
|
|
|
all_stores.each_with_object({}) do |store, result|
|
|
|
|
relation = public_send(store) # rubocop:disable GitlabSecurity/PublicSend
|
2018-06-07 04:04:55 -04:00
|
|
|
keys = get_store_class(store).keys(relation)
|
|
|
|
|
|
|
|
result[store] = keys if keys.present?
|
|
|
|
end
|
2018-05-03 04:08:05 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# FastDestroyAll concerns
|
|
|
|
def finalize_fast_destroy(keys)
|
2018-06-07 04:04:55 -04:00
|
|
|
keys.each do |store, value|
|
|
|
|
get_store_class(store).delete_keys(value)
|
|
|
|
end
|
2018-05-03 04:08:05 -04:00
|
|
|
end
|
2020-09-28 11:09:44 -04:00
|
|
|
|
2021-02-03 16:09:17 -05:00
|
|
|
##
|
|
|
|
# Sometime we need to ensure that the first read goes to a primary
|
|
|
|
# database, what is especially important in EE. This method does not
|
|
|
|
# change the behavior in CE.
|
|
|
|
#
|
|
|
|
def with_read_consistency(build, &block)
|
|
|
|
::Gitlab::Database::Consistency
|
|
|
|
.with_read_consistency(&block)
|
|
|
|
end
|
|
|
|
|
2020-09-28 11:09:44 -04:00
|
|
|
##
|
|
|
|
# Sometimes we do not want to read raw data. This method makes it easier
|
|
|
|
# to find attributes that are just metadata excluding raw data.
|
|
|
|
#
|
|
|
|
def metadata_attributes
|
|
|
|
attribute_names - %w[raw_data]
|
|
|
|
end
|
2018-04-24 09:13:46 -04:00
|
|
|
end
|
|
|
|
|
2018-04-04 06:19:17 -04:00
|
|
|
def data
|
2018-04-30 01:52:29 -04:00
|
|
|
@data ||= get_data.to_s
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
|
|
|
|
2020-09-28 11:09:44 -04:00
|
|
|
def crc32
|
|
|
|
checksum.to_i
|
|
|
|
end
|
|
|
|
|
2018-04-04 06:19:17 -04:00
|
|
|
def truncate(offset = 0)
|
2018-05-04 04:42:37 -04:00
|
|
|
raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
|
|
|
|
return if offset == size # Skip the following process as it doesn't affect anything
|
|
|
|
|
2021-06-24 08:08:07 -04:00
|
|
|
self.append(+"", offset)
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def append(new_data, offset)
|
2018-07-03 01:48:00 -04:00
|
|
|
raise ArgumentError, 'New data is missing' unless new_data
|
2020-07-22 11:09:28 -04:00
|
|
|
raise ArgumentError, 'Offset is out of range' if offset < 0 || offset > size
|
2018-04-30 01:52:29 -04:00
|
|
|
raise ArgumentError, 'Chunk size overflow' if CHUNK_SIZE < (offset + new_data.bytesize)
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2020-11-16 04:09:18 -05:00
|
|
|
in_lock(lock_key, **lock_params) { unsafe_append_data!(new_data, offset) }
|
2018-06-15 02:48:03 -04:00
|
|
|
|
2020-08-27 20:10:34 -04:00
|
|
|
schedule_to_persist! if full?
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def size
|
2020-08-27 20:10:34 -04:00
|
|
|
@size ||= @data&.bytesize || current_store.size(self) || data&.bytesize
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def start_offset
|
|
|
|
chunk_index * CHUNK_SIZE
|
|
|
|
end
|
|
|
|
|
|
|
|
def end_offset
|
|
|
|
start_offset + size
|
|
|
|
end
|
|
|
|
|
|
|
|
def range
|
|
|
|
(start_offset...end_offset)
|
|
|
|
end
|
2018-06-15 02:48:03 -04:00
|
|
|
|
2020-08-27 20:10:34 -04:00
|
|
|
def schedule_to_persist!
|
2020-10-07 14:08:34 -04:00
|
|
|
return if flushed?
|
2020-08-27 20:10:34 -04:00
|
|
|
|
|
|
|
Ci::BuildTraceChunkFlushWorker.perform_async(id)
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
|
|
|
|
2020-10-06 05:08:32 -04:00
|
|
|
##
|
|
|
|
# It is possible that we run into two concurrent migrations. It might
|
|
|
|
# happen that a chunk gets migrated after being loaded by another worker
|
|
|
|
# but before the worker acquires a lock to perform the migration.
|
|
|
|
#
|
2020-10-07 14:08:34 -04:00
|
|
|
# We are using Redis locking to ensure that we perform this operation
|
|
|
|
# inside an exclusive lock, but this does not prevent us from running into
|
|
|
|
# race conditions related to updating a model representation in the
|
|
|
|
# database. Optimistic locking is another mechanism that help here.
|
|
|
|
#
|
|
|
|
# We are using optimistic locking combined with Redis locking to ensure
|
|
|
|
# that a chunk gets migrated properly.
|
2020-10-06 05:08:32 -04:00
|
|
|
#
|
2020-10-26 14:08:27 -04:00
|
|
|
# We are using until_executed deduplication strategy for workers,
|
|
|
|
# which should prevent duplicated workers running in parallel for the same build trace,
|
|
|
|
# and causing an exception related to an exclusive lock not being
|
|
|
|
# acquired
|
2020-10-14 08:08:58 -04:00
|
|
|
#
|
2020-10-07 14:08:34 -04:00
|
|
|
def persist_data!
|
2020-11-16 04:09:18 -05:00
|
|
|
in_lock(lock_key, **lock_params) do # exclusive Redis lock is acquired first
|
2020-10-07 14:08:34 -04:00
|
|
|
raise FailedToPersistDataError, 'Modifed build trace chunk detected' if has_changes_to_save?
|
|
|
|
|
2021-02-03 16:09:17 -05:00
|
|
|
self.class.with_read_consistency(build) do
|
|
|
|
self.reset.then { |chunk| chunk.unsafe_persist_data! }
|
2020-10-07 14:08:34 -04:00
|
|
|
end
|
|
|
|
end
|
2020-10-14 08:08:58 -04:00
|
|
|
rescue FailedToObtainLockError
|
|
|
|
metrics.increment_trace_operation(operation: :stalled)
|
2020-10-26 14:08:27 -04:00
|
|
|
|
|
|
|
raise FailedToPersistDataError, 'Data migration failed due to a worker duplication'
|
2020-10-07 14:08:34 -04:00
|
|
|
rescue ActiveRecord::StaleObjectError
|
|
|
|
raise FailedToPersistDataError, <<~MSG
|
|
|
|
Data migration race condition detected
|
|
|
|
|
|
|
|
store: #{data_store}
|
|
|
|
build: #{build.id}
|
|
|
|
index: #{chunk_index}
|
|
|
|
MSG
|
2020-09-07 08:08:27 -04:00
|
|
|
end
|
|
|
|
|
2020-09-15 14:09:43 -04:00
|
|
|
##
|
|
|
|
# Build trace chunk is final (the last one that we do not expect to ever
|
|
|
|
# become full) when a runner submitted a build pending state and there is
|
|
|
|
# no chunk with higher index in the database.
|
|
|
|
#
|
|
|
|
def final?
|
2020-10-02 05:08:33 -04:00
|
|
|
build.pending_state.present? && chunks_max_index == chunk_index
|
2020-09-15 14:09:43 -04:00
|
|
|
end
|
|
|
|
|
2020-10-07 14:08:34 -04:00
|
|
|
def flushed?
|
2021-06-10 11:10:14 -04:00
|
|
|
!live?
|
2020-10-06 05:08:32 -04:00
|
|
|
end
|
|
|
|
|
2020-10-07 14:08:34 -04:00
|
|
|
def migrated?
|
|
|
|
flushed?
|
|
|
|
end
|
|
|
|
|
2020-10-06 05:08:32 -04:00
|
|
|
def live?
|
2021-06-10 11:10:14 -04:00
|
|
|
LIVE_STORES.include?(data_store.to_sym)
|
2020-10-06 05:08:32 -04:00
|
|
|
end
|
|
|
|
|
2020-09-28 11:09:44 -04:00
|
|
|
def <=>(other)
|
|
|
|
return unless self.build_id == other.build_id
|
|
|
|
|
|
|
|
self.chunk_index <=> other.chunk_index
|
|
|
|
end
|
|
|
|
|
2020-10-07 14:08:34 -04:00
|
|
|
protected
|
2018-04-04 06:19:17 -04:00
|
|
|
|
2020-08-27 20:10:34 -04:00
|
|
|
def get_data
|
|
|
|
# Redis / database return UTF-8 encoded string by default
|
|
|
|
current_store.data(self)&.force_encoding(Encoding::BINARY)
|
|
|
|
end
|
|
|
|
|
|
|
|
def unsafe_persist_data!(new_store = self.class.persistable_store)
|
2018-06-07 04:04:55 -04:00
|
|
|
return if data_store == new_store.to_s
|
|
|
|
|
2020-08-27 20:10:34 -04:00
|
|
|
current_data = data
|
|
|
|
old_store_class = current_store
|
2020-09-15 14:09:43 -04:00
|
|
|
current_size = current_data&.bytesize.to_i
|
2018-06-07 04:04:55 -04:00
|
|
|
|
2020-09-15 14:09:43 -04:00
|
|
|
unless current_size == CHUNK_SIZE || final?
|
2020-10-02 05:08:33 -04:00
|
|
|
raise FailedToPersistDataError, <<~MSG
|
|
|
|
data is not fulfilled in a bucket
|
|
|
|
|
|
|
|
size: #{current_size}
|
2020-10-07 14:08:34 -04:00
|
|
|
state: #{pending_state?}
|
2020-10-02 05:08:33 -04:00
|
|
|
max: #{chunks_max_index}
|
|
|
|
index: #{chunk_index}
|
|
|
|
MSG
|
2018-06-07 04:04:55 -04:00
|
|
|
end
|
|
|
|
|
2018-11-28 07:27:25 -05:00
|
|
|
self.raw_data = nil
|
|
|
|
self.data_store = new_store
|
2020-09-28 11:09:44 -04:00
|
|
|
self.checksum = self.class.crc32(current_data)
|
2020-08-27 20:10:34 -04:00
|
|
|
|
|
|
|
##
|
|
|
|
# We need to so persist data then save a new store identifier before we
|
|
|
|
# remove data from the previous store to make this operation
|
|
|
|
# trasnaction-safe. `unsafe_set_data! calls `save!` because of this
|
|
|
|
# reason.
|
|
|
|
#
|
|
|
|
# TODO consider using callbacks and state machine to remove old data
|
|
|
|
#
|
2018-11-28 07:27:25 -05:00
|
|
|
unsafe_set_data!(current_data)
|
|
|
|
|
2018-06-07 04:04:55 -04:00
|
|
|
old_store_class.delete_data(self)
|
|
|
|
end
|
|
|
|
|
2018-06-15 02:48:03 -04:00
|
|
|
def unsafe_set_data!(value)
|
2018-06-25 03:19:40 -04:00
|
|
|
raise ArgumentError, 'New data size exceeds chunk size' if value.bytesize > CHUNK_SIZE
|
2018-04-26 03:30:27 -04:00
|
|
|
|
2020-07-22 11:09:28 -04:00
|
|
|
current_store.set_data(self, value)
|
|
|
|
|
2018-06-15 02:48:03 -04:00
|
|
|
@data = value
|
2020-07-22 11:09:28 -04:00
|
|
|
@size = value.bytesize
|
|
|
|
|
|
|
|
save! if changed?
|
|
|
|
end
|
|
|
|
|
|
|
|
def unsafe_append_data!(value, offset)
|
|
|
|
new_size = value.bytesize + offset
|
|
|
|
|
|
|
|
if new_size > CHUNK_SIZE
|
|
|
|
raise ArgumentError, 'New data size exceeds chunk size'
|
|
|
|
end
|
|
|
|
|
|
|
|
current_store.append_data(self, value, offset).then do |stored|
|
2020-09-16 14:09:47 -04:00
|
|
|
metrics.increment_trace_operation(operation: :appended)
|
|
|
|
|
2020-07-22 11:09:28 -04:00
|
|
|
raise ArgumentError, 'Trace appended incorrectly' if stored != new_size
|
|
|
|
end
|
|
|
|
|
|
|
|
@data = nil
|
|
|
|
@size = new_size
|
2018-04-26 03:30:27 -04:00
|
|
|
|
2018-06-15 02:48:03 -04:00
|
|
|
save! if changed?
|
2018-04-26 03:30:27 -04:00
|
|
|
end
|
|
|
|
|
2018-05-07 04:34:47 -04:00
|
|
|
def full?
|
2018-04-04 06:19:17 -04:00
|
|
|
size == CHUNK_SIZE
|
|
|
|
end
|
|
|
|
|
2020-10-07 14:08:34 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
def pending_state?
|
|
|
|
build.pending_state.present?
|
|
|
|
end
|
|
|
|
|
2020-07-22 11:09:28 -04:00
|
|
|
def current_store
|
|
|
|
self.class.get_store_class(data_store)
|
|
|
|
end
|
|
|
|
|
2020-10-02 05:08:33 -04:00
|
|
|
def chunks_max_index
|
|
|
|
build.trace_chunks.maximum(:chunk_index).to_i
|
|
|
|
end
|
|
|
|
|
2020-11-16 04:09:18 -05:00
|
|
|
def lock_key
|
|
|
|
"trace_write:#{build_id}:chunks:#{chunk_index}"
|
|
|
|
end
|
|
|
|
|
2018-06-15 02:48:03 -04:00
|
|
|
def lock_params
|
2020-11-16 04:09:18 -05:00
|
|
|
{
|
|
|
|
ttl: WRITE_LOCK_TTL,
|
|
|
|
retries: WRITE_LOCK_RETRY,
|
|
|
|
sleep_sec: WRITE_LOCK_SLEEP
|
|
|
|
}
|
2018-04-04 06:19:17 -04:00
|
|
|
end
|
2020-09-16 14:09:47 -04:00
|
|
|
|
|
|
|
def metrics
|
|
|
|
@metrics ||= ::Gitlab::Ci::Trace::Metrics.new
|
|
|
|
end
|
2018-03-26 07:45:18 -04:00
|
|
|
end
|
|
|
|
end
|