2018-09-04 16:06:34 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
#
|
|
|
|
module Gitlab
|
|
|
|
module Diff
|
|
|
|
class HighlightCache
|
2020-09-01 23:10:30 -04:00
|
|
|
include Gitlab::Utils::Gzip
|
2019-12-10 13:08:04 -05:00
|
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
EXPIRATION = 1.week
|
|
|
|
VERSION = 1
|
|
|
|
|
|
|
|
delegate :diffable, to: :@diff_collection
|
2018-09-04 16:06:34 -04:00
|
|
|
delegate :diff_options, to: :@diff_collection
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
def initialize(diff_collection)
|
2018-09-04 16:06:34 -04:00
|
|
|
@diff_collection = diff_collection
|
|
|
|
end
|
|
|
|
|
|
|
|
# - Reads from cache
|
|
|
|
# - Assigns DiffFile#highlighted_diff_lines for cached files
|
2019-12-04 07:07:12 -05:00
|
|
|
#
|
2018-09-04 16:06:34 -04:00
|
|
|
def decorate(diff_file)
|
2020-10-02 11:08:13 -04:00
|
|
|
content = read_file(diff_file)
|
|
|
|
|
|
|
|
return [] unless content
|
|
|
|
|
2020-10-15 23:08:29 -04:00
|
|
|
# TODO: We could add some kind of flag to #initialize that would allow
|
|
|
|
# us to force re-caching
|
|
|
|
# https://gitlab.com/gitlab-org/gitlab/-/issues/263508
|
|
|
|
#
|
2020-10-02 11:08:13 -04:00
|
|
|
if content.empty? && recache_due_to_size?(diff_file)
|
|
|
|
# If the file is missing from the cache and there's reason to believe
|
|
|
|
# it is uncached due to a size issue around changing the values for
|
|
|
|
# max patch size, manually populate the hash and then set the value.
|
|
|
|
#
|
|
|
|
new_cache_content = {}
|
|
|
|
new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash)
|
|
|
|
|
|
|
|
write_to_redis_hash(new_cache_content)
|
|
|
|
|
|
|
|
set_highlighted_diff_lines(diff_file, read_file(diff_file))
|
|
|
|
else
|
|
|
|
set_highlighted_diff_lines(diff_file, content)
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
# For every file that isn't already contained in the redis hash, store the
|
|
|
|
# result of #highlighted_diff_lines, then submit the uncached content
|
|
|
|
# to #write_to_redis_hash to submit a single write. This avoids excessive
|
|
|
|
# IO generated by N+1's (1 writing for each highlighted line or file).
|
|
|
|
#
|
2018-09-04 16:06:34 -04:00
|
|
|
def write_if_empty
|
2019-12-10 13:08:04 -05:00
|
|
|
return if cacheable_files.empty?
|
2018-09-04 16:06:34 -04:00
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
new_cache_content = {}
|
2018-09-04 16:06:34 -04:00
|
|
|
|
2019-12-10 13:08:04 -05:00
|
|
|
cacheable_files.each do |diff_file|
|
2019-12-04 07:07:12 -05:00
|
|
|
new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash)
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
write_to_redis_hash(new_cache_content)
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def clear
|
2019-12-04 07:07:12 -05:00
|
|
|
Gitlab::Redis::Cache.with do |redis|
|
|
|
|
redis.del(key)
|
|
|
|
end
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def key
|
2019-12-10 13:08:04 -05:00
|
|
|
strong_memoize(:redis_key) do
|
|
|
|
['highlighted-diff-files', diffable.cache_key, VERSION, diff_options].join(":")
|
|
|
|
end
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2020-10-02 11:08:13 -04:00
|
|
|
def set_highlighted_diff_lines(diff_file, content)
|
|
|
|
diff_file.highlighted_diff_lines = content.map do |line|
|
|
|
|
Gitlab::Diff::Line.safe_init_from_hash(line)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def recache_due_to_size?(diff_file)
|
|
|
|
diff_file_class = diff_file.diff.class
|
|
|
|
|
|
|
|
current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes
|
|
|
|
default_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes(diff_file_class::DEFAULT_MAX_PATCH_BYTES)
|
|
|
|
|
|
|
|
# If the diff is >= than the default limit, but less than the current
|
|
|
|
# limit, it is likely uncached due to having hit the default limit,
|
|
|
|
# making it eligible for recalculating.
|
|
|
|
#
|
|
|
|
diff_file.diff.diff_bytesize.between?(
|
|
|
|
default_patch_safe_limit_bytes,
|
|
|
|
current_patch_safe_limit_bytes
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2019-12-10 13:08:04 -05:00
|
|
|
def cacheable_files
|
|
|
|
strong_memoize(:cacheable_files) do
|
|
|
|
diff_files.select { |file| cacheable?(file) && read_file(file).nil? }
|
|
|
|
end
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
# Given a hash of:
|
|
|
|
# { "file/to/cache" =>
|
|
|
|
# [ { line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_19_19",
|
|
|
|
# rich_text: " <span id=\"LC19\" class=\"line\" lang=\"plaintext\">config/initializers/secret_token.rb</span>\n",
|
|
|
|
# text: " config/initializers/secret_token.rb",
|
|
|
|
# type: nil,
|
|
|
|
# index: 3,
|
|
|
|
# old_pos: 19,
|
|
|
|
# new_pos: 19 }
|
|
|
|
# ] }
|
|
|
|
#
|
|
|
|
# ...it will write/update a Gitlab::Redis hash (HSET)
|
|
|
|
#
|
|
|
|
def write_to_redis_hash(hash)
|
|
|
|
Gitlab::Redis::Cache.with do |redis|
|
|
|
|
redis.pipelined do
|
|
|
|
hash.each do |diff_file_id, highlighted_diff_lines_hash|
|
2020-04-21 11:21:10 -04:00
|
|
|
redis.hset(
|
|
|
|
key,
|
|
|
|
diff_file_id,
|
2020-09-01 23:10:30 -04:00
|
|
|
gzip_compress(highlighted_diff_lines_hash.to_json)
|
2020-04-21 11:21:10 -04:00
|
|
|
)
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
# HSETs have to have their expiration date manually updated
|
|
|
|
#
|
|
|
|
redis.expire(key, EXPIRATION)
|
|
|
|
end
|
2020-03-16 17:09:21 -04:00
|
|
|
|
|
|
|
record_memory_usage(fetch_memory_usage(redis, key))
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
2019-12-09 07:07:58 -05:00
|
|
|
|
2019-12-10 13:08:04 -05:00
|
|
|
# Subsequent read_file calls would need the latest cache.
|
|
|
|
#
|
|
|
|
clear_memoization(:cached_content)
|
|
|
|
clear_memoization(:cacheable_files)
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
|
|
|
|
2020-03-16 17:09:21 -04:00
|
|
|
def record_memory_usage(memory_usage)
|
|
|
|
if memory_usage
|
2020-07-28 08:09:49 -04:00
|
|
|
current_transaction&.observe(:gitlab_redis_diff_caching_memory_usage_bytes, memory_usage) do
|
|
|
|
docstring 'Redis diff caching memory usage by key'
|
|
|
|
buckets [100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000]
|
|
|
|
end
|
2020-03-16 17:09:21 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_memory_usage(redis, key)
|
|
|
|
# Redis versions prior to 4.0.0 do not support memory usage reporting
|
|
|
|
# for a specific key. As of 11-March-2020 we support Redis 3.x, so
|
|
|
|
# need to account for this. We can remove this check once we
|
|
|
|
# officially cease supporting versions <4.0.0.
|
|
|
|
#
|
|
|
|
return if Gem::Version.new(redis.info["redis_version"]) < Gem::Version.new("4")
|
|
|
|
|
|
|
|
redis.memory("USAGE", key)
|
|
|
|
end
|
|
|
|
|
2019-12-04 07:07:12 -05:00
|
|
|
def file_paths
|
2019-12-10 13:08:04 -05:00
|
|
|
strong_memoize(:file_paths) do
|
2019-12-16 07:07:43 -05:00
|
|
|
diff_files.collect(&:file_path)
|
2019-12-10 13:08:04 -05:00
|
|
|
end
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def read_file(diff_file)
|
|
|
|
cached_content[diff_file.file_path]
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def cached_content
|
2019-12-10 13:08:04 -05:00
|
|
|
strong_memoize(:cached_content) { read_cache }
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def read_cache
|
|
|
|
return {} unless file_paths.any?
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
|
|
Gitlab::Redis::Cache.with do |redis|
|
|
|
|
results = redis.hmget(key, file_paths)
|
|
|
|
end
|
|
|
|
|
|
|
|
results.map! do |result|
|
2020-09-01 23:10:30 -04:00
|
|
|
Gitlab::Json.parse(gzip_decompress(result), symbolize_names: true) unless result.nil?
|
2019-12-04 07:07:12 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
file_paths.zip(results).to_h
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def cacheable?(diff_file)
|
|
|
|
diffable.present? && diff_file.text? && diff_file.diffable?
|
|
|
|
end
|
2019-12-16 07:07:43 -05:00
|
|
|
|
|
|
|
def diff_files
|
|
|
|
# We access raw_diff_files here, as diff_files will attempt to apply the
|
|
|
|
# highlighting code found in this class, leading to a circular
|
|
|
|
# reference.
|
|
|
|
#
|
|
|
|
@diff_collection.raw_diff_files
|
|
|
|
end
|
2020-07-28 08:09:49 -04:00
|
|
|
|
|
|
|
def current_transaction
|
|
|
|
::Gitlab::Metrics::Transaction.current
|
|
|
|
end
|
2018-09-04 16:06:34 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|