2021-01-20 10:10:29 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Repositories
|
2021-03-04 13:09:08 -05:00
|
|
|
# Finder for getting the commits to include in a changelog.
|
|
|
|
class ChangelogCommitsFinder
|
2021-01-20 10:10:29 -05:00
|
|
|
# The maximum number of commits to retrieve per page.
|
|
|
|
#
|
|
|
|
# This value is arbitrarily chosen. Lowering it means more Gitaly calls, but
|
|
|
|
# less data being loaded into memory at once. Increasing it has the opposite
|
|
|
|
# effect.
|
|
|
|
#
|
|
|
|
# This amount is based around the number of commits that usually go in a
|
|
|
|
# GitLab release. Some examples for GitLab's own releases:
|
|
|
|
#
|
|
|
|
# * 13.6.0: 4636 commits
|
|
|
|
# * 13.5.0: 5912 commits
|
|
|
|
# * 13.4.0: 5541 commits
|
|
|
|
#
|
|
|
|
# Using this limit should result in most (very large) projects only needing
|
|
|
|
# 5-10 Gitaly calls, while keeping memory usage at a reasonable amount.
|
|
|
|
COMMITS_PER_PAGE = 1024
|
|
|
|
|
2021-03-04 13:09:08 -05:00
|
|
|
# The regex to use for extracting the SHA of a reverted commit.
|
|
|
|
REVERT_REGEX = /^This reverts commit (?<sha>[0-9a-f]{40})/i.freeze
|
|
|
|
|
2021-01-20 10:10:29 -05:00
|
|
|
# The `project` argument specifies the project for which to obtain the
|
|
|
|
# commits.
|
|
|
|
#
|
|
|
|
# The `from` and `to` arguments specify the range of commits to include. The
|
|
|
|
# commit specified in `from` won't be included itself. The commit specified
|
|
|
|
# in `to` _is_ included.
|
|
|
|
#
|
|
|
|
# The `per_page` argument specifies how many commits are retrieved in a single
|
|
|
|
# Gitaly API call.
|
|
|
|
def initialize(project:, from:, to:, per_page: COMMITS_PER_PAGE)
|
|
|
|
@project = project
|
|
|
|
@from = from
|
|
|
|
@to = to
|
|
|
|
@per_page = per_page
|
|
|
|
end
|
|
|
|
|
|
|
|
# Fetches all commits that have the given trailer set.
|
|
|
|
#
|
|
|
|
# The commits are yielded to the supplied block in batches. This allows
|
|
|
|
# other code to process these commits in batches too, instead of first
|
|
|
|
# having to load all commits into memory.
|
|
|
|
#
|
|
|
|
# Example:
|
|
|
|
#
|
2021-03-04 13:09:08 -05:00
|
|
|
# ChangelogCommitsFinder.new(...).each_page('Changelog') do |commits|
|
2021-01-20 10:10:29 -05:00
|
|
|
# commits.each do |commit|
|
|
|
|
# ...
|
|
|
|
# end
|
|
|
|
# end
|
|
|
|
def each_page(trailer)
|
|
|
|
return to_enum(__method__, trailer) unless block_given?
|
|
|
|
|
|
|
|
offset = 0
|
2021-03-04 13:09:08 -05:00
|
|
|
reverted = Set.new
|
2021-01-20 10:10:29 -05:00
|
|
|
response = fetch_commits
|
|
|
|
|
|
|
|
while response.any?
|
|
|
|
commits = []
|
|
|
|
|
|
|
|
response.each do |commit|
|
2021-03-04 13:09:08 -05:00
|
|
|
# If the commit is reverted in the same range (by a newer commit), we
|
|
|
|
# won't include it. This works here because commits are processed in
|
|
|
|
# reverse order (= newer first).
|
|
|
|
next if reverted.include?(commit.id)
|
|
|
|
|
|
|
|
if (sha = revert_commit_sha(commit))
|
|
|
|
reverted << sha
|
|
|
|
end
|
|
|
|
|
2021-01-20 10:10:29 -05:00
|
|
|
commits.push(commit) if commit.trailers.key?(trailer)
|
|
|
|
end
|
|
|
|
|
|
|
|
yield commits
|
|
|
|
|
|
|
|
offset += response.length
|
|
|
|
response = fetch_commits(offset)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def fetch_commits(offset = 0)
|
|
|
|
range = "#{@from}..#{@to}"
|
|
|
|
|
|
|
|
@project
|
|
|
|
.repository
|
|
|
|
.commits(range, limit: @per_page, offset: offset, trailers: true)
|
|
|
|
end
|
2021-03-04 13:09:08 -05:00
|
|
|
|
|
|
|
def revert_commit_sha(commit)
|
2021-03-11 07:09:28 -05:00
|
|
|
matches = commit.description&.match(REVERT_REGEX)
|
2021-03-04 13:09:08 -05:00
|
|
|
|
|
|
|
matches[:sha] if matches
|
|
|
|
end
|
2021-01-20 10:10:29 -05:00
|
|
|
end
|
|
|
|
end
|