2018-07-25 05:30:33 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2019-03-28 09:17:42 -04:00
|
|
|
class MergeRequestDiff < ApplicationRecord
|
2015-02-05 17:20:55 -05:00
|
|
|
include Sortable
|
2016-06-01 12:03:51 -04:00
|
|
|
include Importable
|
Use latest_merge_request_diff association
Compared to the merge_request_diff association:
1. It's simpler to query. The query uses a foreign key to the
merge_request_diffs table, so no ordering is necessary.
2. It's faster for preloading. The merge_request_diff association has to load
every diff for the MRs in the set, then discard all but the most recent for
each. This association means that Rails can just query for N diffs from N
MRs.
3. It's more complicated to update. This is a bidirectional foreign key, so we
need to update two tables when adding a diff record. This also means we need
to handle this as a special case when importing a GitLab project.
There is some juggling with this association in the merge request model:
* `MergeRequest#latest_merge_request_diff` is _always_ the latest diff.
* `MergeRequest#merge_request_diff` reuses
`MergeRequest#latest_merge_request_diff` unless:
* Arguments are passed. These are typically to force-reload the association.
* It doesn't exist. That means we might be trying to implicitly create a
diff. This only seems to happen in specs.
* The association is already loaded. This is important for the reasons
explained in the comment, which I'll reiterate here: if we a) load a
non-latest diff, then b) get its `merge_request`, then c) get that MR's
`merge_request_diff`, we should get the diff we loaded in c), even though
that's not the latest diff.
Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases,
but not quite all.
2017-11-15 12:22:18 -05:00
|
|
|
include ManualInverseAssociation
|
2018-06-11 16:45:16 -04:00
|
|
|
include EachBatch
|
2018-10-25 09:46:04 -04:00
|
|
|
include Gitlab::Utils::StrongMemoize
|
2019-01-09 12:01:28 -05:00
|
|
|
include ObjectStorage::BackgroundMove
|
2020-03-07 13:08:21 -05:00
|
|
|
include BulkInsertableAssociations
|
2015-02-05 17:20:55 -05:00
|
|
|
|
2017-11-21 11:58:08 -05:00
|
|
|
# Don't display more than 100 commits at once
|
2016-03-04 09:17:12 -05:00
|
|
|
COMMITS_SAFE_SIZE = 100
|
2019-11-27 07:06:30 -05:00
|
|
|
BATCH_SIZE = 1000
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
# Applies to closed or merged MRs when determining whether to migrate their
|
|
|
|
# diffs to external storage
|
|
|
|
EXTERNAL_DIFF_CUTOFF = 7.days.freeze
|
|
|
|
|
2020-09-10 17:08:28 -04:00
|
|
|
# The files_count column is a 2-byte signed integer. Look up the true value
|
|
|
|
# from the database if this sentinel is seen
|
|
|
|
FILES_COUNT_SENTINEL = 2**15 - 1
|
|
|
|
|
2022-07-05 14:08:43 -04:00
|
|
|
# External diff cache key used by diffs export
|
|
|
|
EXTERNAL_DIFFS_CACHE_TMPDIR = 'project-%{project_id}-external-mr-%{mr_id}-diff-%{id}-cache'
|
|
|
|
EXTERNAL_DIFF_CACHE_CHUNK_SIZE = 8.megabytes
|
|
|
|
|
2014-01-22 08:19:02 -05:00
|
|
|
belongs_to :merge_request
|
2019-01-09 12:01:28 -05:00
|
|
|
|
Use latest_merge_request_diff association
Compared to the merge_request_diff association:
1. It's simpler to query. The query uses a foreign key to the
merge_request_diffs table, so no ordering is necessary.
2. It's faster for preloading. The merge_request_diff association has to load
every diff for the MRs in the set, then discard all but the most recent for
each. This association means that Rails can just query for N diffs from N
MRs.
3. It's more complicated to update. This is a bidirectional foreign key, so we
need to update two tables when adding a diff record. This also means we need
to handle this as a special case when importing a GitLab project.
There is some juggling with this association in the merge request model:
* `MergeRequest#latest_merge_request_diff` is _always_ the latest diff.
* `MergeRequest#merge_request_diff` reuses
`MergeRequest#latest_merge_request_diff` unless:
* Arguments are passed. These are typically to force-reload the association.
* It doesn't exist. That means we might be trying to implicitly create a
diff. This only seems to happen in specs.
* The association is already loaded. This is important for the reasons
explained in the comment, which I'll reiterate here: if we a) load a
non-latest diff, then b) get its `merge_request`, then c) get that MR's
`merge_request_diff`, we should get the diff we loaded in c), even though
that's not the latest diff.
Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases,
but not quite all.
2017-11-15 12:22:18 -05:00
|
|
|
manual_inverse_association :merge_request, :merge_request_diff
|
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
has_many :merge_request_diff_files,
|
|
|
|
-> { order(:merge_request_diff_id, :relative_order) },
|
|
|
|
inverse_of: :merge_request_diff
|
|
|
|
|
2017-06-16 10:00:58 -04:00
|
|
|
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2019-03-04 13:36:34 -05:00
|
|
|
validates :base_commit_sha, :head_commit_sha, :start_commit_sha, sha: true
|
2021-01-27 13:09:08 -05:00
|
|
|
validates :merge_request_id, uniqueness: { scope: :diff_type }, if: :merge_head?
|
2019-03-04 13:36:34 -05:00
|
|
|
|
2014-01-22 08:19:02 -05:00
|
|
|
state_machine :state, initial: :empty do
|
2018-06-11 16:45:16 -04:00
|
|
|
event :clean do
|
|
|
|
transition any => :without_files
|
|
|
|
end
|
|
|
|
|
2014-01-22 08:19:02 -05:00
|
|
|
state :collected
|
2016-03-03 12:38:44 -05:00
|
|
|
state :overflow
|
2018-06-11 16:45:16 -04:00
|
|
|
# Diff files have been deleted by the system
|
|
|
|
state :without_files
|
2016-03-03 12:38:44 -05:00
|
|
|
# Deprecated states: these are no longer used but these values may still occur
|
|
|
|
# in the database.
|
2014-01-22 08:19:02 -05:00
|
|
|
state :timeout
|
|
|
|
state :overflow_commits_safe_size
|
|
|
|
state :overflow_diff_files_limit
|
|
|
|
state :overflow_diff_lines_limit
|
|
|
|
end
|
|
|
|
|
2021-01-27 13:09:08 -05:00
|
|
|
enum diff_type: {
|
|
|
|
regular: 1,
|
|
|
|
merge_head: 2
|
|
|
|
}
|
|
|
|
|
2018-06-11 16:45:16 -04:00
|
|
|
scope :with_files, -> { without_states(:without_files, :empty) }
|
2016-11-18 08:33:42 -05:00
|
|
|
scope :viewable, -> { without_state(:empty) }
|
2018-01-12 15:38:36 -05:00
|
|
|
scope :by_commit_sha, ->(sha) do
|
|
|
|
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
|
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2019-03-25 10:29:51 -04:00
|
|
|
scope :by_project_id, -> (project_id) do
|
|
|
|
joins(:merge_request).where(merge_requests: { target_project_id: project_id })
|
|
|
|
end
|
|
|
|
|
2021-10-06 11:11:48 -04:00
|
|
|
scope :recent, -> (limit = 100) { order(id: :desc).limit(limit) }
|
2020-08-18 14:10:10 -04:00
|
|
|
|
|
|
|
scope :files_in_database, -> do
|
|
|
|
where(stored_externally: [false, nil]).where(arel_table[:files_count].gt(0))
|
|
|
|
end
|
2019-02-20 10:35:57 -05:00
|
|
|
|
|
|
|
scope :not_latest_diffs, -> do
|
|
|
|
merge_requests = MergeRequest.arel_table
|
|
|
|
mr_diffs = arel_table
|
|
|
|
|
|
|
|
join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
|
|
|
|
.and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
|
2021-01-27 13:09:08 -05:00
|
|
|
.and(mr_diffs[:diff_type].eq(diff_types[:regular]))
|
2019-02-20 10:35:57 -05:00
|
|
|
|
|
|
|
arel_join = mr_diffs.join(merge_requests).on(join_condition)
|
|
|
|
joins(arel_join.join_sources)
|
|
|
|
end
|
|
|
|
|
|
|
|
scope :old_merged_diffs, -> (before) do
|
|
|
|
merge_requests = MergeRequest.arel_table
|
|
|
|
mr_metrics = MergeRequest::Metrics.arel_table
|
|
|
|
mr_diffs = arel_table
|
|
|
|
|
|
|
|
mr_join = mr_diffs
|
|
|
|
.join(merge_requests)
|
|
|
|
.on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
|
|
|
|
|
|
|
|
metrics_join_condition = mr_diffs[:merge_request_id]
|
|
|
|
.eq(mr_metrics[:merge_request_id])
|
|
|
|
.and(mr_metrics[:merged_at].not_eq(nil))
|
|
|
|
|
|
|
|
metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
|
|
|
|
|
2019-10-18 07:11:44 -04:00
|
|
|
condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:merged])
|
2019-02-20 10:35:57 -05:00
|
|
|
.and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
|
|
|
|
.and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
|
|
|
|
|
|
|
|
joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
|
|
|
|
end
|
|
|
|
|
|
|
|
scope :old_closed_diffs, -> (before) do
|
2019-10-18 07:11:44 -04:00
|
|
|
condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:closed])
|
2019-02-20 10:35:57 -05:00
|
|
|
.and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
|
|
|
|
|
|
|
|
joins(merge_request: :metrics).where(condition)
|
|
|
|
end
|
|
|
|
|
2021-05-05 23:10:25 -04:00
|
|
|
# This scope uses LATERAL JOIN to find the most recent MR diff association for the given merge requests.
|
|
|
|
# To avoid joining the merge_requests table, we build an in memory table using the merge request ids.
|
|
|
|
# Example:
|
|
|
|
# SELECT ...
|
|
|
|
# FROM (VALUES (MR_ID_1),(MR_ID_2)) merge_requests (id)
|
|
|
|
# INNER JOIN LATERAL (...)
|
2020-10-19 17:09:06 -04:00
|
|
|
scope :latest_diff_for_merge_requests, -> (merge_requests) do
|
2021-05-05 23:10:25 -04:00
|
|
|
mrs = Array(merge_requests)
|
|
|
|
return MergeRequestDiff.none if mrs.empty?
|
2020-10-19 17:09:06 -04:00
|
|
|
|
2021-05-05 23:10:25 -04:00
|
|
|
merge_request_table = MergeRequest.arel_table
|
|
|
|
merge_request_diff_table = MergeRequestDiff.arel_table
|
|
|
|
|
|
|
|
join_query = MergeRequestDiff
|
|
|
|
.where(merge_request_table[:id].eq(merge_request_diff_table[:merge_request_id]))
|
|
|
|
.order(created_at: :desc)
|
|
|
|
.limit(1)
|
|
|
|
|
|
|
|
mr_id_list = mrs.map { |mr| "(#{Integer(mr.id)})" }.join(",")
|
|
|
|
|
|
|
|
MergeRequestDiff
|
|
|
|
.from("(VALUES #{mr_id_list}) merge_requests (id)")
|
|
|
|
.joins("INNER JOIN LATERAL (#{join_query.to_sql}) #{MergeRequestDiff.table_name} ON TRUE")
|
2020-10-19 17:09:06 -04:00
|
|
|
.includes(:merge_request_diff_commits)
|
|
|
|
end
|
|
|
|
|
2020-08-05 20:09:53 -04:00
|
|
|
class << self
|
|
|
|
def ids_for_external_storage_migration(limit:)
|
|
|
|
return [] unless Gitlab.config.external_diffs.enabled
|
|
|
|
|
|
|
|
case Gitlab.config.external_diffs.when
|
|
|
|
when 'always'
|
|
|
|
ids_for_external_storage_migration_strategy_always(limit: limit)
|
|
|
|
when 'outdated'
|
|
|
|
ids_for_external_storage_migration_strategy_outdated(limit: limit)
|
|
|
|
else
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def ids_for_external_storage_migration_strategy_always(limit:)
|
2020-08-18 14:10:10 -04:00
|
|
|
files_in_database.limit(limit).pluck(:id)
|
2020-08-05 20:09:53 -04:00
|
|
|
end
|
2019-02-20 10:35:57 -05:00
|
|
|
|
2020-08-05 20:09:53 -04:00
|
|
|
def ids_for_external_storage_migration_strategy_outdated(limit:)
|
2019-02-20 10:35:57 -05:00
|
|
|
# Outdated is too complex to be a single SQL query, so split into three
|
|
|
|
before = EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
|
2020-08-18 14:10:10 -04:00
|
|
|
ids = files_in_database
|
2019-02-20 10:35:57 -05:00
|
|
|
.old_merged_diffs(before)
|
|
|
|
.limit(limit)
|
|
|
|
.pluck(:id)
|
|
|
|
|
|
|
|
return ids if ids.size >= limit
|
|
|
|
|
2020-08-18 14:10:10 -04:00
|
|
|
ids += files_in_database
|
2019-02-20 10:35:57 -05:00
|
|
|
.old_closed_diffs(before)
|
|
|
|
.limit(limit - ids.size)
|
|
|
|
.pluck(:id)
|
|
|
|
|
|
|
|
return ids if ids.size >= limit
|
|
|
|
|
2020-08-18 14:10:10 -04:00
|
|
|
ids + files_in_database
|
2019-02-20 10:35:57 -05:00
|
|
|
.not_latest_diffs
|
|
|
|
.limit(limit - ids.size)
|
|
|
|
.pluck(:id)
|
|
|
|
end
|
|
|
|
end
|
2017-11-21 11:58:08 -05:00
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
mount_uploader :external_diff, ExternalDiffUploader
|
|
|
|
|
2016-08-02 08:38:03 -04:00
|
|
|
# All diff information is collected from repository after object is created.
|
|
|
|
# It allows you to override variables like head_commit_sha before getting diff.
|
2016-07-28 15:36:18 -04:00
|
|
|
after_create :save_git_content, unless: :importing?
|
2020-01-14 10:07:55 -05:00
|
|
|
after_create_commit :set_as_latest_diff, unless: :importing?
|
2016-07-27 07:41:19 -04:00
|
|
|
|
2020-05-06 17:10:00 -04:00
|
|
|
after_save :update_external_diff_store
|
2020-08-26 08:10:53 -04:00
|
|
|
after_save :set_count_columns
|
2019-01-09 12:01:28 -05:00
|
|
|
|
2017-04-06 18:13:28 -04:00
|
|
|
def self.find_by_diff_refs(diff_refs)
|
2017-04-06 18:24:51 -04:00
|
|
|
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
|
2017-04-06 18:13:28 -04:00
|
|
|
end
|
|
|
|
|
2018-06-11 16:45:16 -04:00
|
|
|
def viewable?
|
|
|
|
collected? || without_files? || overflow?
|
|
|
|
end
|
|
|
|
|
2016-07-28 06:46:27 -04:00
|
|
|
# Collect information about commits and diff from repository
|
|
|
|
# and save it to the database as serialized data
|
|
|
|
def save_git_content
|
2017-06-16 10:00:58 -04:00
|
|
|
ensure_commit_shas
|
2016-07-28 06:46:27 -04:00
|
|
|
save_commits
|
|
|
|
save_diffs
|
2019-04-23 05:30:18 -04:00
|
|
|
|
|
|
|
# Another set of `after_save` hooks will be called here when we update the record
|
2017-12-29 11:15:50 -05:00
|
|
|
save
|
2019-04-23 05:30:18 -04:00
|
|
|
# We need to reset so that dirty tracking is reset when running the original set
|
|
|
|
# of `after_save` hooks that come after this `after_create` hook. Otherwise, the
|
|
|
|
# hooks that run when an attribute was changed are run twice.
|
|
|
|
reset
|
|
|
|
|
2019-12-04 10:11:23 -05:00
|
|
|
keep_around_commits unless importing?
|
2016-08-02 08:38:03 -04:00
|
|
|
end
|
|
|
|
|
2019-10-17 08:07:33 -04:00
|
|
|
def set_as_latest_diff
|
2021-01-27 13:09:08 -05:00
|
|
|
# Don't set merge_head diff as latest so it won't get considered as the
|
|
|
|
# MergeRequest#merge_request_diff.
|
|
|
|
return if merge_head?
|
|
|
|
|
2019-10-17 08:07:33 -04:00
|
|
|
MergeRequest
|
|
|
|
.where('id = ? AND COALESCE(latest_merge_request_diff_id, 0) < ?', self.merge_request_id, self.id)
|
|
|
|
.update_all(latest_merge_request_diff_id: self.id)
|
|
|
|
end
|
|
|
|
|
2017-06-16 10:00:58 -04:00
|
|
|
def ensure_commit_shas
|
2016-08-02 08:38:03 -04:00
|
|
|
self.start_commit_sha ||= merge_request.target_branch_sha
|
2021-01-27 13:09:08 -05:00
|
|
|
|
|
|
|
if merge_head? && merge_request.merge_ref_head.present?
|
|
|
|
diff_refs = merge_request.merge_ref_head.diff_refs
|
|
|
|
|
|
|
|
self.head_commit_sha ||= diff_refs.head_sha
|
|
|
|
self.base_commit_sha ||= diff_refs.base_sha
|
|
|
|
else
|
|
|
|
self.head_commit_sha ||= merge_request.source_branch_sha
|
|
|
|
self.base_commit_sha ||= find_base_sha
|
|
|
|
end
|
2016-08-15 09:25:29 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
# Override head_commit_sha to keep compatibility with merge request diff
|
|
|
|
# created before version 8.4 that does not store head_commit_sha in separate db field.
|
|
|
|
def head_commit_sha
|
|
|
|
if persisted? && super.nil?
|
2017-06-16 10:00:58 -04:00
|
|
|
last_commit_sha
|
2016-08-15 09:25:29 -04:00
|
|
|
else
|
|
|
|
super
|
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2020-09-10 17:08:28 -04:00
|
|
|
def files_count
|
|
|
|
db_value = read_attribute(:files_count)
|
|
|
|
|
|
|
|
case db_value
|
|
|
|
when nil, FILES_COUNT_SENTINEL
|
|
|
|
merge_request_diff_files.count
|
|
|
|
else
|
|
|
|
db_value
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-08-15 08:20:36 -04:00
|
|
|
# This method will rely on repository branch sha
|
|
|
|
# in case start_commit_sha is nil. Its necesarry for old merge request diff
|
|
|
|
# created before version 8.4 to work
|
|
|
|
def safe_start_commit_sha
|
|
|
|
start_commit_sha || merge_request.target_branch_sha
|
|
|
|
end
|
|
|
|
|
2016-03-03 12:38:44 -05:00
|
|
|
def size
|
2016-08-03 17:32:12 -04:00
|
|
|
real_size.presence || raw_diffs.size
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2019-07-30 06:13:53 -04:00
|
|
|
def lines_count
|
|
|
|
strong_memoize(:lines_count) do
|
2019-08-13 04:26:16 -04:00
|
|
|
raw_diffs(limits: false).line_count
|
2019-07-30 06:13:53 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-08-05 22:03:01 -04:00
|
|
|
def raw_diffs(options = {})
|
2016-03-03 12:38:44 -05:00
|
|
|
if options[:ignore_whitespace_change]
|
2017-07-03 10:48:59 -04:00
|
|
|
@diffs_no_whitespace ||= compare.diffs(options)
|
2016-03-03 12:38:44 -05:00
|
|
|
else
|
2016-08-03 17:32:12 -04:00
|
|
|
@raw_diffs ||= {}
|
2017-06-09 07:48:25 -04:00
|
|
|
@raw_diffs[options] ||= load_diffs(options)
|
2016-03-03 12:38:44 -05:00
|
|
|
end
|
2015-10-21 20:55:35 -04:00
|
|
|
end
|
|
|
|
|
2021-08-25 23:09:01 -04:00
|
|
|
def commits(limit: nil, load_from_gitaly: false)
|
|
|
|
strong_memoize(:"commits_#{limit || 'all'}_#{load_from_gitaly}") do
|
|
|
|
load_commits(limit: limit, load_from_gitaly: load_from_gitaly)
|
2019-11-15 10:06:12 -05:00
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2017-06-16 10:00:58 -04:00
|
|
|
def last_commit_sha
|
2019-11-14 07:06:30 -05:00
|
|
|
commit_shas(limit: 1).first
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2015-10-20 08:23:56 -04:00
|
|
|
def first_commit
|
|
|
|
commits.last
|
|
|
|
end
|
|
|
|
|
2019-08-13 04:26:16 -04:00
|
|
|
def last_commit
|
|
|
|
commits.first
|
|
|
|
end
|
|
|
|
|
2016-01-20 12:44:27 -05:00
|
|
|
def base_commit
|
2016-07-27 07:41:19 -04:00
|
|
|
return unless base_commit_sha
|
2016-01-20 12:44:27 -05:00
|
|
|
|
2017-12-11 10:38:16 -05:00
|
|
|
project.commit_by(oid: base_commit_sha)
|
2016-01-20 12:44:27 -05:00
|
|
|
end
|
|
|
|
|
2016-06-20 12:48:04 -04:00
|
|
|
def start_commit
|
2016-07-27 07:41:19 -04:00
|
|
|
return unless start_commit_sha
|
2016-06-20 12:48:04 -04:00
|
|
|
|
2017-12-11 10:38:16 -05:00
|
|
|
project.commit_by(oid: start_commit_sha)
|
2016-06-20 12:48:04 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def head_commit
|
2016-08-15 09:25:29 -04:00
|
|
|
return unless head_commit_sha
|
2016-06-20 12:48:04 -04:00
|
|
|
|
2017-12-11 10:38:16 -05:00
|
|
|
project.commit_by(oid: head_commit_sha)
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2019-11-14 07:06:30 -05:00
|
|
|
def commit_shas(limit: nil)
|
2020-10-19 17:09:06 -04:00
|
|
|
if association(:merge_request_diff_commits).loaded?
|
|
|
|
sorted_diff_commits = merge_request_diff_commits.sort_by { |diff_commit| [diff_commit.id, diff_commit.relative_order] }
|
|
|
|
sorted_diff_commits = sorted_diff_commits.take(limit) if limit
|
|
|
|
sorted_diff_commits.map(&:sha)
|
|
|
|
else
|
|
|
|
merge_request_diff_commits.limit(limit).pluck(:sha)
|
|
|
|
end
|
2016-09-19 11:51:27 -04:00
|
|
|
end
|
|
|
|
|
2019-11-27 07:06:30 -05:00
|
|
|
def includes_any_commits?(shas)
|
|
|
|
return false if shas.blank?
|
2018-11-02 14:27:01 -04:00
|
|
|
|
2019-11-27 07:06:30 -05:00
|
|
|
# when the number of shas is huge (1000+) we don't want
|
|
|
|
# to pass them all as an SQL param, let's pass them in batches
|
|
|
|
shas.each_slice(BATCH_SIZE).any? do |batched_shas|
|
|
|
|
merge_request_diff_commits.where(sha: batched_shas).exists?
|
|
|
|
end
|
2018-11-02 14:27:01 -04:00
|
|
|
end
|
|
|
|
|
2017-04-10 16:17:47 -04:00
|
|
|
def diff_refs=(new_diff_refs)
|
2017-04-11 11:51:48 -04:00
|
|
|
self.base_commit_sha = new_diff_refs&.base_sha
|
|
|
|
self.start_commit_sha = new_diff_refs&.start_sha
|
|
|
|
self.head_commit_sha = new_diff_refs&.head_sha
|
2017-04-10 16:17:47 -04:00
|
|
|
end
|
|
|
|
|
2016-07-26 11:24:25 -04:00
|
|
|
def diff_refs
|
2016-08-15 11:11:18 -04:00
|
|
|
return unless start_commit_sha || base_commit_sha
|
2016-07-26 11:24:25 -04:00
|
|
|
|
|
|
|
Gitlab::Diff::DiffRefs.new(
|
|
|
|
base_sha: base_commit_sha,
|
|
|
|
start_sha: start_commit_sha,
|
|
|
|
head_sha: head_commit_sha
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2017-05-15 14:19:49 -04:00
|
|
|
# MRs created before 8.4 don't store their true diff refs (start and base),
|
|
|
|
# but we need to get a commit SHA for the "View file @ ..." link by a file,
|
2017-05-16 19:58:23 -04:00
|
|
|
# so we use an approximation of the diff refs if we can't get the actual one.
|
|
|
|
#
|
2017-05-15 14:19:49 -04:00
|
|
|
# These will not be the actual diff refs if the target branch was merged into
|
|
|
|
# the source branch after the merge request was created, but it is good enough
|
|
|
|
# for the specific purpose of linking to a commit.
|
2017-05-16 19:58:23 -04:00
|
|
|
#
|
2017-05-15 14:19:49 -04:00
|
|
|
# It is not good enough for highlighting diffs, so we can't simply pass
|
|
|
|
# these as `diff_refs.`
|
|
|
|
def fallback_diff_refs
|
2017-05-15 14:44:15 -04:00
|
|
|
real_refs = diff_refs
|
|
|
|
return real_refs if real_refs
|
|
|
|
|
2017-05-15 14:19:49 -04:00
|
|
|
likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha
|
|
|
|
|
|
|
|
Gitlab::Diff::DiffRefs.new(
|
|
|
|
base_sha: likely_base_commit_sha,
|
|
|
|
start_sha: safe_start_commit_sha,
|
|
|
|
head_sha: head_commit_sha
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2016-08-01 10:55:51 -04:00
|
|
|
def diff_refs_by_sha?
|
|
|
|
base_commit_sha? && head_commit_sha? && start_commit_sha?
|
|
|
|
end
|
|
|
|
|
2019-10-09 17:06:24 -04:00
|
|
|
def diffs_in_batch(batch_page, batch_size, diff_options:)
|
2019-12-11 10:07:38 -05:00
|
|
|
fetching_repository_diffs(diff_options) do |comparison|
|
2021-06-14 14:10:28 -04:00
|
|
|
reorder_diff_files!
|
|
|
|
diffs_batch = diffs_in_batch_collection(batch_page, batch_size, diff_options: diff_options)
|
|
|
|
|
2019-12-11 10:07:38 -05:00
|
|
|
if comparison
|
2021-06-14 14:10:28 -04:00
|
|
|
if diff_options[:paths].blank? && !without_files?
|
|
|
|
# Return the empty MergeRequestDiffBatch for an out of bound batch request
|
2021-08-03 02:08:50 -04:00
|
|
|
break diffs_batch if diffs_batch.diff_paths.blank?
|
2021-06-14 14:10:28 -04:00
|
|
|
|
|
|
|
diff_options.merge!(
|
2021-08-03 02:08:50 -04:00
|
|
|
paths: diffs_batch.diff_paths,
|
2021-06-14 14:10:28 -04:00
|
|
|
pagination_data: diffs_batch.pagination_data
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
comparison.diffs(diff_options)
|
2019-12-11 10:07:38 -05:00
|
|
|
else
|
2021-06-14 14:10:28 -04:00
|
|
|
diffs_batch
|
2019-12-11 10:07:38 -05:00
|
|
|
end
|
|
|
|
end
|
2019-10-09 17:06:24 -04:00
|
|
|
end
|
|
|
|
|
2016-08-15 10:57:19 -04:00
|
|
|
def diffs(diff_options = nil)
|
2019-12-11 10:07:38 -05:00
|
|
|
fetching_repository_diffs(diff_options) do |comparison|
|
2018-06-11 16:45:16 -04:00
|
|
|
# It should fetch the repository when diffs are cleaned by the system.
|
|
|
|
# We don't keep these for storage overload purposes.
|
2019-09-18 10:02:45 -04:00
|
|
|
# See https://gitlab.com/gitlab-org/gitlab-foss/issues/37639
|
2019-12-11 10:07:38 -05:00
|
|
|
if comparison
|
|
|
|
comparison.diffs(diff_options)
|
|
|
|
else
|
2020-12-14 19:10:07 -05:00
|
|
|
reorder_diff_files!
|
2019-12-11 10:07:38 -05:00
|
|
|
diffs_collection(diff_options)
|
|
|
|
end
|
2018-06-11 16:45:16 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Should always return the DB persisted diffs collection
|
|
|
|
# (e.g. Gitlab::Diff::FileCollection::MergeRequestDiff.
|
|
|
|
# It's useful when trying to invalidate old caches through
|
|
|
|
# FileCollection::MergeRequestDiff#clear_cache!
|
|
|
|
def diffs_collection(diff_options = nil)
|
2016-08-15 10:57:19 -04:00
|
|
|
Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
|
|
|
|
end
|
|
|
|
|
|
|
|
def project
|
|
|
|
merge_request.target_project
|
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2016-07-27 07:41:19 -04:00
|
|
|
def compare
|
|
|
|
@compare ||=
|
2016-08-25 04:58:17 -04:00
|
|
|
Gitlab::Git::Compare.new(
|
|
|
|
repository.raw_repository,
|
|
|
|
safe_start_commit_sha,
|
|
|
|
head_commit_sha
|
|
|
|
)
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
2016-08-25 03:59:30 -04:00
|
|
|
def latest?
|
Use latest_merge_request_diff association
Compared to the merge_request_diff association:
1. It's simpler to query. The query uses a foreign key to the
merge_request_diffs table, so no ordering is necessary.
2. It's faster for preloading. The merge_request_diff association has to load
every diff for the MRs in the set, then discard all but the most recent for
each. This association means that Rails can just query for N diffs from N
MRs.
3. It's more complicated to update. This is a bidirectional foreign key, so we
need to update two tables when adding a diff record. This also means we need
to handle this as a special case when importing a GitLab project.
There is some juggling with this association in the merge request model:
* `MergeRequest#latest_merge_request_diff` is _always_ the latest diff.
* `MergeRequest#merge_request_diff` reuses
`MergeRequest#latest_merge_request_diff` unless:
* Arguments are passed. These are typically to force-reload the association.
* It doesn't exist. That means we might be trying to implicitly create a
diff. This only seems to happen in specs.
* The association is already loaded. This is important for the reasons
explained in the comment, which I'll reiterate here: if we a) load a
non-latest diff, then b) get its `merge_request`, then c) get that MR's
`merge_request_diff`, we should get the diff we loaded in c), even though
that's not the latest diff.
Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases,
but not quite all.
2017-11-15 12:22:18 -05:00
|
|
|
self.id == merge_request.latest_merge_request_diff_id
|
2016-08-25 03:59:30 -04:00
|
|
|
end
|
|
|
|
|
2018-08-27 11:31:01 -04:00
|
|
|
# rubocop: disable CodeReuse/ServiceClass
|
2017-05-21 16:38:33 -04:00
|
|
|
def compare_with(sha)
|
2016-09-29 07:04:50 -04:00
|
|
|
# When compare merge request versions we want diff A..B instead of A...B
|
2016-09-29 10:46:47 -04:00
|
|
|
# so we handle cases when user does squash and rebase of the commits between versions.
|
2016-09-29 07:04:50 -04:00
|
|
|
# For this reason we set straight to true by default.
|
2017-05-21 16:38:33 -04:00
|
|
|
CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
|
2016-09-07 03:16:21 -04:00
|
|
|
end
|
2018-08-27 11:31:01 -04:00
|
|
|
# rubocop: enable CodeReuse/ServiceClass
|
2016-09-07 03:16:21 -04:00
|
|
|
|
2020-04-21 11:21:10 -04:00
|
|
|
def modified_paths(fallback_on_overflow: false)
|
|
|
|
if fallback_on_overflow && overflow?
|
|
|
|
# This is an extremely slow means to find the modified paths for a given
|
|
|
|
# MergeRequestDiff. This should be avoided, except where the limit of
|
|
|
|
# 1_000 (as of %12.10) entries returned by the default behavior is an
|
|
|
|
# issue.
|
|
|
|
strong_memoize(:overflowed_modified_paths) do
|
|
|
|
project.repository.diff_stats(
|
|
|
|
base_commit_sha,
|
|
|
|
head_commit_sha
|
|
|
|
).paths
|
|
|
|
end
|
|
|
|
else
|
|
|
|
strong_memoize(:modified_paths) do
|
|
|
|
merge_request_diff_files.pluck(:new_path, :old_path).flatten.uniq
|
|
|
|
end
|
2018-10-25 09:46:04 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
def update_external_diff_store
|
2020-05-06 17:10:00 -04:00
|
|
|
return unless saved_change_to_external_diff? || saved_change_to_stored_externally?
|
|
|
|
|
|
|
|
update_column(:external_diff_store, external_diff.object_store)
|
2019-01-09 12:01:28 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
# If enabled, yields the external file containing the diff. Otherwise, yields
|
|
|
|
# nil. This method is not thread-safe, but it *is* re-entrant, which allows
|
|
|
|
# multiple merge_request_diff_files to load their data efficiently
|
|
|
|
def opening_external_diff
|
|
|
|
return yield(nil) unless stored_externally?
|
|
|
|
return yield(@external_diff_file) if @external_diff_file
|
|
|
|
|
|
|
|
external_diff.open do |file|
|
2019-03-13 09:42:43 -04:00
|
|
|
@external_diff_file = file
|
2019-01-09 12:01:28 -05:00
|
|
|
|
2019-03-13 09:42:43 -04:00
|
|
|
yield(@external_diff_file)
|
|
|
|
ensure
|
|
|
|
@external_diff_file = nil
|
2019-01-09 12:01:28 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
# Transactionally migrate the current merge_request_diff_files entries to
|
|
|
|
# external storage. If external storage isn't an option for this diff, the
|
|
|
|
# method is a no-op.
|
|
|
|
def migrate_files_to_external_storage!
|
2020-09-10 17:08:28 -04:00
|
|
|
return if stored_externally? || !use_external_diff? || files_count == 0
|
2019-02-20 10:35:57 -05:00
|
|
|
|
|
|
|
rows = build_merge_request_diff_files(merge_request_diff_files)
|
2020-07-03 17:09:31 -04:00
|
|
|
rows = build_external_merge_request_diff_files(rows)
|
|
|
|
|
|
|
|
# Perform carrierwave activity before entering the database transaction.
|
|
|
|
# This is safe as until the `external_diff_store` column is changed, we will
|
|
|
|
# continue to consult the in-database content.
|
|
|
|
self.external_diff.store!
|
2019-02-20 10:35:57 -05:00
|
|
|
|
|
|
|
transaction do
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
2021-11-15 10:10:57 -05:00
|
|
|
ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2019-02-20 10:35:57 -05:00
|
|
|
save!
|
|
|
|
end
|
|
|
|
|
2019-04-08 09:33:36 -04:00
|
|
|
merge_request_diff_files.reset
|
2019-02-20 10:35:57 -05:00
|
|
|
end
|
|
|
|
|
2020-03-20 17:09:17 -04:00
|
|
|
# Transactionally migrate the current merge_request_diff_files entries from
|
|
|
|
# external storage, back to the database. This is the rollback operation for
|
|
|
|
# +migrate_files_to_external_storage!+
|
|
|
|
#
|
|
|
|
# If this diff isn't in external storage, the method is a no-op.
|
|
|
|
def migrate_files_to_database!
|
|
|
|
return unless stored_externally?
|
2020-09-10 17:08:28 -04:00
|
|
|
return if files_count == 0
|
2020-03-20 17:09:17 -04:00
|
|
|
|
|
|
|
rows = convert_external_diffs_to_database
|
|
|
|
|
|
|
|
transaction do
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
2021-11-15 10:10:57 -05:00
|
|
|
ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2020-03-20 17:09:17 -04:00
|
|
|
update!(stored_externally: false)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Only delete the external diff file after the contents have been saved to
|
|
|
|
# the database
|
|
|
|
remove_external_diff!
|
|
|
|
merge_request_diff_files.reset
|
|
|
|
end
|
|
|
|
|
2022-07-05 14:08:43 -04:00
|
|
|
# Yields locally cached external diff if it's externally stored.
|
|
|
|
# Used during Project Export to speed up externally
|
|
|
|
# stored merge request diffs export
|
|
|
|
def cached_external_diff
|
|
|
|
return yield(nil) unless stored_externally?
|
|
|
|
|
|
|
|
cache_external_diff unless File.exist?(external_diff_cache_filepath)
|
|
|
|
|
|
|
|
File.open(external_diff_cache_filepath) do |file|
|
|
|
|
yield(file)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_cached_external_diff
|
|
|
|
Gitlab::Utils.check_path_traversal!(external_diff_cache_dir)
|
|
|
|
Gitlab::Utils.check_allowed_absolute_path!(external_diff_cache_dir, [Dir.tmpdir])
|
|
|
|
|
|
|
|
return unless Dir.exist?(external_diff_cache_dir)
|
|
|
|
|
|
|
|
FileUtils.rm_rf(external_diff_cache_dir)
|
|
|
|
end
|
|
|
|
|
2016-08-15 10:57:19 -04:00
|
|
|
private
|
|
|
|
|
2020-03-20 17:09:17 -04:00
|
|
|
def convert_external_diffs_to_database
|
|
|
|
opening_external_diff do |external_file|
|
|
|
|
merge_request_diff_files.map do |diff_file|
|
|
|
|
row = diff_file.attributes.except('diff')
|
|
|
|
|
|
|
|
raise "Diff file lacks external diff offset or size: #{row.inspect}" unless
|
|
|
|
row['external_diff_offset'] && row['external_diff_size']
|
|
|
|
|
|
|
|
# The diff in the external file is already base64-encoded if necessary,
|
|
|
|
# matching the 'binary' attribute of the row. Reading it directly allows
|
|
|
|
# a cycle of decode-encode to be skipped
|
|
|
|
external_file.seek(row.delete('external_diff_offset'))
|
|
|
|
row['diff'] = external_file.read(row.delete('external_diff_size'))
|
|
|
|
|
|
|
|
row
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-12-11 10:07:38 -05:00
|
|
|
def diffs_in_batch_collection(batch_page, batch_size, diff_options:)
|
|
|
|
Gitlab::Diff::FileCollection::MergeRequestDiffBatch.new(self,
|
|
|
|
batch_page,
|
|
|
|
batch_size,
|
|
|
|
diff_options: diff_options)
|
|
|
|
end
|
|
|
|
|
2019-03-14 15:40:10 -04:00
|
|
|
def encode_in_base64?(diff_text)
|
2020-09-22 05:09:43 -04:00
|
|
|
return false if diff_text.nil?
|
|
|
|
|
2019-03-14 15:40:10 -04:00
|
|
|
(diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?) ||
|
|
|
|
diff_text.include?("\0")
|
|
|
|
end
|
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
def build_external_merge_request_diff_files(rows)
|
2019-01-09 12:01:28 -05:00
|
|
|
tempfile = build_external_diff_tempfile(rows)
|
|
|
|
|
|
|
|
self.external_diff = tempfile
|
|
|
|
self.stored_externally = true
|
|
|
|
|
|
|
|
rows
|
2020-07-07 17:09:13 -04:00
|
|
|
ensure
|
|
|
|
tempfile&.unlink
|
2019-01-09 12:01:28 -05:00
|
|
|
end
|
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
def create_merge_request_diff_files(rows)
|
|
|
|
rows = build_external_merge_request_diff_files(rows) if use_external_diff?
|
|
|
|
|
|
|
|
# Faster inserts
|
2021-11-15 10:10:57 -05:00
|
|
|
ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
|
2019-02-20 10:35:57 -05:00
|
|
|
end
|
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
def build_external_diff_tempfile(rows)
|
2020-07-07 17:09:13 -04:00
|
|
|
Tempfile.open(external_diff.filename) do |file|
|
|
|
|
rows.each do |row|
|
|
|
|
data = row.delete(:diff)
|
|
|
|
row[:external_diff_offset] = file.pos
|
2020-09-22 05:09:43 -04:00
|
|
|
row[:external_diff_size] = data&.bytesize || 0
|
2019-01-09 12:01:28 -05:00
|
|
|
|
2020-07-07 17:09:13 -04:00
|
|
|
file.write(data)
|
|
|
|
end
|
2020-07-03 17:09:31 -04:00
|
|
|
|
2020-07-07 17:09:13 -04:00
|
|
|
file
|
2019-01-09 12:01:28 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def build_merge_request_diff_files(diffs)
|
2020-12-14 19:10:07 -05:00
|
|
|
sort_diffs(diffs).map.with_index do |diff, index|
|
2017-07-25 12:57:02 -04:00
|
|
|
diff_hash = diff.to_hash.merge(
|
|
|
|
binary: false,
|
2017-06-09 07:48:25 -04:00
|
|
|
merge_request_diff_id: self.id,
|
|
|
|
relative_order: index
|
|
|
|
)
|
2017-07-25 12:57:02 -04:00
|
|
|
|
|
|
|
# Compatibility with old diffs created with Psych.
|
|
|
|
diff_hash.tap do |hash|
|
|
|
|
diff_text = hash[:diff]
|
|
|
|
|
2019-03-14 15:40:10 -04:00
|
|
|
if encode_in_base64?(diff_text)
|
2017-07-25 12:57:02 -04:00
|
|
|
hash[:binary] = true
|
|
|
|
hash[:diff] = [diff_text].pack('m0')
|
|
|
|
end
|
|
|
|
end
|
2016-07-06 19:29:14 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-12-11 10:07:38 -05:00
|
|
|
# Yields the block with the repository Compare object if it should
|
|
|
|
# fetch diffs from the repository instead DB.
|
|
|
|
def fetching_repository_diffs(diff_options)
|
|
|
|
return unless block_given?
|
|
|
|
|
|
|
|
diff_options ||= {}
|
|
|
|
|
|
|
|
# Can be read as: fetch the persisted diffs if yielded without the
|
|
|
|
# Compare object.
|
|
|
|
return yield unless without_files? || diff_options[:ignore_whitespace_change]
|
|
|
|
return yield unless diff_refs&.complete?
|
|
|
|
|
|
|
|
comparison = diff_refs.compare_in(repository.project)
|
|
|
|
|
|
|
|
return yield unless comparison
|
|
|
|
|
|
|
|
yield(comparison)
|
|
|
|
end
|
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
def use_external_diff?
|
|
|
|
return false unless Gitlab.config.external_diffs.enabled
|
|
|
|
|
|
|
|
case Gitlab.config.external_diffs.when
|
|
|
|
when 'always'
|
|
|
|
true
|
|
|
|
when 'outdated'
|
|
|
|
outdated_by_merge? || outdated_by_closure? || old_version?
|
|
|
|
else
|
|
|
|
false # Disable external diffs if misconfigured
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def outdated_by_merge?
|
|
|
|
return false unless merge_request&.metrics&.merged_at
|
|
|
|
|
|
|
|
merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
end
|
|
|
|
|
|
|
|
def outdated_by_closure?
|
|
|
|
return false unless merge_request&.metrics&.latest_closed_at
|
|
|
|
|
|
|
|
merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
|
|
|
|
end
|
|
|
|
|
|
|
|
def old_version?
|
|
|
|
latest_id = MergeRequest
|
|
|
|
.where(id: merge_request_id)
|
|
|
|
.limit(1)
|
|
|
|
.pluck(:latest_merge_request_diff_id)
|
|
|
|
.first
|
|
|
|
|
2019-10-17 08:07:33 -04:00
|
|
|
latest_id && self.id < latest_id
|
2019-02-20 10:35:57 -05:00
|
|
|
end
|
|
|
|
|
2017-06-09 07:48:25 -04:00
|
|
|
def load_diffs(options)
|
2019-01-09 12:01:28 -05:00
|
|
|
# Ensure all diff files operate on the same external diff file instance if
|
|
|
|
# present. This reduces file open/close overhead.
|
|
|
|
opening_external_diff do
|
|
|
|
collection = merge_request_diff_files
|
2017-06-09 07:48:25 -04:00
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
if paths = options[:paths]
|
|
|
|
collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths)
|
|
|
|
end
|
2017-06-09 07:48:25 -04:00
|
|
|
|
2019-01-09 12:01:28 -05:00
|
|
|
Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options)
|
|
|
|
end
|
2017-06-09 07:48:25 -04:00
|
|
|
end
|
|
|
|
|
2021-08-25 23:09:01 -04:00
|
|
|
def load_commits(limit: nil, load_from_gitaly: false)
|
|
|
|
if load_from_gitaly
|
|
|
|
commits = Gitlab::Git::Commit.batch_by_oid(repository, merge_request_diff_commits.limit(limit).map(&:sha))
|
|
|
|
commits = Commit.decorate(commits, project)
|
|
|
|
else
|
|
|
|
commits = merge_request_diff_commits.with_users.limit(limit)
|
|
|
|
.map { |commit| Commit.from_hash(commit.to_hash, project) }
|
|
|
|
end
|
2017-06-16 10:00:58 -04:00
|
|
|
|
2017-11-10 14:57:11 -05:00
|
|
|
CommitCollection
|
2021-11-08 22:42:22 -05:00
|
|
|
.new(merge_request.target_project, commits, merge_request.target_branch)
|
2017-06-16 10:00:58 -04:00
|
|
|
end
|
|
|
|
|
2016-07-28 06:46:27 -04:00
|
|
|
def save_diffs
|
2016-06-24 12:30:33 -04:00
|
|
|
new_attributes = {}
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2020-08-12 02:09:53 -04:00
|
|
|
if compare.commits.empty?
|
2016-06-24 12:30:33 -04:00
|
|
|
new_attributes[:state] = :empty
|
2014-01-22 08:19:02 -05:00
|
|
|
else
|
2021-11-23 13:12:49 -05:00
|
|
|
diff_collection = compare.diffs(Commit.max_diff_options)
|
2017-04-20 10:47:32 -04:00
|
|
|
new_attributes[:real_size] = diff_collection.real_size
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2016-03-03 12:38:44 -05:00
|
|
|
if diff_collection.any?
|
2016-06-24 12:30:33 -04:00
|
|
|
new_attributes[:state] = :collected
|
2016-12-30 14:16:25 -05:00
|
|
|
|
2019-02-20 10:35:57 -05:00
|
|
|
rows = build_merge_request_diff_files(diff_collection)
|
|
|
|
create_merge_request_diff_files(rows)
|
2020-12-14 19:10:07 -05:00
|
|
|
new_attributes[:sorted] = true
|
2020-08-13 11:10:03 -04:00
|
|
|
self.class.uncached { merge_request_diff_files.reset }
|
2017-06-09 07:48:25 -04:00
|
|
|
end
|
2016-12-30 14:16:25 -05:00
|
|
|
|
|
|
|
# Set our state to 'overflow' to make the #empty? and #collected?
|
|
|
|
# methods (generated by StateMachine) return false.
|
|
|
|
#
|
|
|
|
# This attribution has to come at the end of the method so 'overflow'
|
|
|
|
# state does not get overridden by 'collected'.
|
|
|
|
new_attributes[:state] = :overflow if diff_collection.overflow?
|
2014-01-22 08:30:14 -05:00
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
|
2017-12-29 11:15:50 -05:00
|
|
|
assign_attributes(new_attributes)
|
2017-06-16 10:00:58 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def save_commits
|
|
|
|
MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)
|
2020-08-13 11:10:03 -04:00
|
|
|
self.class.uncached { merge_request_diff_commits.reset }
|
|
|
|
end
|
2017-06-16 10:00:58 -04:00
|
|
|
|
2020-08-13 11:10:03 -04:00
|
|
|
def set_count_columns
|
|
|
|
update_columns(
|
|
|
|
commits_count: merge_request_diff_commits.size,
|
2020-09-10 17:08:28 -04:00
|
|
|
files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
|
2020-08-13 11:10:03 -04:00
|
|
|
)
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def repository
|
2016-07-06 19:29:14 -04:00
|
|
|
project.repository
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
2014-05-07 09:14:24 -04:00
|
|
|
|
2016-07-28 04:36:30 -04:00
|
|
|
def find_base_sha
|
|
|
|
return unless head_commit_sha && start_commit_sha
|
2016-04-26 18:57:37 -04:00
|
|
|
|
2016-07-28 04:36:30 -04:00
|
|
|
project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
|
2016-04-26 18:57:37 -04:00
|
|
|
end
|
|
|
|
|
2016-06-20 12:43:55 -04:00
|
|
|
def keep_around_commits
|
2018-02-07 09:03:42 -05:00
|
|
|
[repository, merge_request.source_project.repository].uniq.each do |repo|
|
2018-08-16 19:55:00 -04:00
|
|
|
repo.keep_around(start_commit_sha, head_commit_sha, base_commit_sha)
|
2016-10-24 12:08:30 -04:00
|
|
|
end
|
2016-07-03 19:58:58 -04:00
|
|
|
end
|
2020-12-14 19:10:07 -05:00
|
|
|
|
|
|
|
def reorder_diff_files!
|
|
|
|
return if sorted? || merge_request_diff_files.empty?
|
|
|
|
|
|
|
|
diff_files = sort_diffs(merge_request_diff_files)
|
|
|
|
|
|
|
|
diff_files.each_with_index do |diff_file, index|
|
|
|
|
diff_file.relative_order = index
|
|
|
|
end
|
|
|
|
|
|
|
|
transaction do
|
|
|
|
# The `merge_request_diff_files` table doesn't have an `id` column so
|
|
|
|
# we cannot use `Gitlab::Database::BulkUpdate`.
|
|
|
|
MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
|
|
|
|
MergeRequestDiffFile.bulk_insert!(diff_files)
|
|
|
|
update_column(:sorted, true)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def sort_diffs(diffs)
|
2020-12-15 04:10:00 -05:00
|
|
|
Gitlab::Diff::FileCollectionSorter.new(diffs).sort
|
2020-12-14 19:10:07 -05:00
|
|
|
end
|
2022-07-05 14:08:43 -04:00
|
|
|
|
|
|
|
# Downloads external diff to a temp storage location.
|
|
|
|
def cache_external_diff
|
|
|
|
return unless stored_externally?
|
|
|
|
return if File.exist?(external_diff_cache_filepath)
|
|
|
|
|
|
|
|
Dir.mkdir(external_diff_cache_dir) unless Dir.exist?(external_diff_cache_dir)
|
|
|
|
|
|
|
|
opening_external_diff do |external_diff|
|
|
|
|
File.open(external_diff_cache_filepath, 'wb') do |file|
|
|
|
|
file.write(external_diff.read(EXTERNAL_DIFF_CACHE_CHUNK_SIZE)) until external_diff.eof?
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def external_diff_cache_filepath
|
|
|
|
File.join(external_diff_cache_dir, "diff-#{id}")
|
|
|
|
end
|
|
|
|
|
|
|
|
def external_diff_cache_dir
|
|
|
|
File.join(
|
|
|
|
Dir.tmpdir,
|
|
|
|
EXTERNAL_DIFFS_CACHE_TMPDIR % { project_id: project.id, mr_id: merge_request_id, id: id }
|
|
|
|
)
|
|
|
|
end
|
2014-01-22 08:19:02 -05:00
|
|
|
end
|
2019-10-10 17:06:01 -04:00
|
|
|
|
2021-05-11 17:10:21 -04:00
|
|
|
MergeRequestDiff.prepend_mod_with('MergeRequestDiff')
|