From ff78882f08ddf3c9f9ac382627b12b05a655607e Mon Sep 17 00:00:00 2001
From: Yorick Peterse <yorick@yorickpeterse.com>
Date: Wed, 9 Dec 2020 15:31:34 +0100
Subject: [PATCH] Add finder for getting commits with a trailer set

This class fetches all commits in a range in batches, selecting only
commits with a certain trailer set. This class will be used to fetch the
commits that should be used to generate a changelog.

This builds on the changes introduced in
https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2842 and
https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2999. To take
advantage of these changes, we also bump the Gitaly Gem version and add
fallback code for getting trailers using Rugged; as Rugged is still in
use. In addition, we change the merge_request_diff_commits table and the
MergeRequestDiffCommit model to support storing the trailers as JSON.

See https://gitlab.com/gitlab-com/gl-infra/delivery/-/issues/1365 for
more information.
---
 Gemfile                                       |  2 +-
 Gemfile.lock                                  |  4 +-
 .../commits_with_trailer_finder.rb            | 82 +++++++++++++++++++
 app/models/merge_request_context_commit.rb    |  3 +
 app/models/merge_request_diff_commit.rb       |  6 +-
 app/models/repository.rb                      |  3 +-
 .../merge_requests/add_context_service.rb     |  3 +-
 app/validators/json_schemas/git_trailers.json |  9 ++
 .../unreleased/gitaly-trailer-parsing.yml     |  5 ++
 ..._add_merge_request_diff_commit_trailers.rb | 22 +++++
 ...d_merge_request_context_commit_trailers.rb | 12 +++
 db/schema_migrations/20210106155209           |  1 +
 db/schema_migrations/20210107154615           |  1 +
 db/structure.sql                              |  6 +-
 lib/gitlab/git/commit.rb                      |  3 +-
 lib/gitlab/git/rugged_impl/commit.rb          |  1 +
 lib/gitlab/gitaly_client/commit_service.rb    |  3 +-
 .../commits_with_trailer_finder_spec.rb       | 38 +++++++++
 spec/lib/gitlab/git/commit_spec.rb            |  3 +-
 .../import_export/safe_model_attributes.yml   |  2 +
 spec/models/merge_request_diff_commit_spec.rb |  9 +-
 21 files changed, 204 insertions(+), 14 deletions(-)
 create mode 100644 app/finders/repositories/commits_with_trailer_finder.rb
 create mode 100644 app/validators/json_schemas/git_trailers.json
 create mode 100644 changelogs/unreleased/gitaly-trailer-parsing.yml
 create mode 100644 db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb
 create mode 100644 db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb
 create mode 100644 db/schema_migrations/20210106155209
 create mode 100644 db/schema_migrations/20210107154615
 create mode 100644 spec/finders/repositories/commits_with_trailer_finder_spec.rb

diff --git a/Gemfile b/Gemfile
index c7ed1cd4d71e..a2aff0f4b1e8 100644
--- a/Gemfile
+++ b/Gemfile
@@ -462,7 +462,7 @@ group :ed25519 do
 end
 
 # Gitaly GRPC protocol definitions
-gem 'gitaly', '~> 13.8.0.pre.rc2'
+gem 'gitaly', '~> 13.8.0.pre.rc3'
 
 gem 'grpc', '~> 1.30.2'
 
diff --git a/Gemfile.lock b/Gemfile.lock
index b370024b95b0..74d88417541a 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -417,7 +417,7 @@ GEM
       rails (>= 3.2.0)
     git (1.7.0)
       rchardet (~> 1.8)
-    gitaly (13.8.0.pre.rc2)
+    gitaly (13.8.0.pre.rc3)
       grpc (~> 1.0)
     github-markup (1.7.0)
     gitlab-chronic (0.10.5)
@@ -1357,7 +1357,7 @@ DEPENDENCIES
   gettext (~> 3.3)
   gettext_i18n_rails (~> 1.8.0)
   gettext_i18n_rails_js (~> 1.3)
-  gitaly (~> 13.8.0.pre.rc2)
+  gitaly (~> 13.8.0.pre.rc3)
   github-markup (~> 1.7.0)
   gitlab-chronic (~> 0.10.5)
   gitlab-experiment (~> 0.4.5)
diff --git a/app/finders/repositories/commits_with_trailer_finder.rb b/app/finders/repositories/commits_with_trailer_finder.rb
new file mode 100644
index 000000000000..4bd643c345ba
--- /dev/null
+++ b/app/finders/repositories/commits_with_trailer_finder.rb
@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+
+module Repositories
+  # Finder for obtaining commits between two refs, with a Git trailer set.
+  class CommitsWithTrailerFinder
+    # The maximum number of commits to retrieve per page.
+    #
+    # This value is arbitrarily chosen. Lowering it means more Gitaly calls, but
+    # less data being loaded into memory at once. Increasing it has the opposite
+    # effect.
+    #
+    # This amount is based around the number of commits that usually go in a
+    # GitLab release. Some examples for GitLab's own releases:
+    #
+    # * 13.6.0: 4636 commits
+    # * 13.5.0: 5912 commits
+    # * 13.4.0: 5541 commits
+    #
+    # Using this limit should result in most (very large) projects only needing
+    # 5-10 Gitaly calls, while keeping memory usage at a reasonable amount.
+    COMMITS_PER_PAGE = 1024
+
+    # The `project` argument specifies the project for which to obtain the
+    # commits.
+    #
+    # The `from` and `to` arguments specify the range of commits to include. The
+    # commit specified in `from` won't be included itself. The commit specified
+    # in `to` _is_ included.
+    #
+    # The `per_page` argument specifies how many commits are retrieved in a single
+    # Gitaly API call.
+    def initialize(project:, from:, to:, per_page: COMMITS_PER_PAGE)
+      @project = project
+      @from = from
+      @to = to
+      @per_page = per_page
+    end
+
+    # Fetches all commits that have the given trailer set.
+    #
+    # The commits are yielded to the supplied block in batches. This allows
+    # other code to process these commits in batches too, instead of first
+    # having to load all commits into memory.
+    #
+    # Example:
+    #
+    #     CommitsWithTrailerFinder.new(...).each_page('Signed-off-by') do |commits|
+    #       commits.each do |commit|
+    #         ...
+    #       end
+    #     end
+    def each_page(trailer)
+      return to_enum(__method__, trailer) unless block_given?
+
+      offset = 0
+      response = fetch_commits
+
+      while response.any?
+        commits = []
+
+        response.each do |commit|
+          commits.push(commit) if commit.trailers.key?(trailer)
+        end
+
+        yield commits
+
+        offset += response.length
+        response = fetch_commits(offset)
+      end
+    end
+
+    private
+
+    def fetch_commits(offset = 0)
+      range = "#{@from}..#{@to}"
+
+      @project
+        .repository
+        .commits(range, limit: @per_page, offset: offset, trailers: true)
+    end
+  end
+end
diff --git a/app/models/merge_request_context_commit.rb b/app/models/merge_request_context_commit.rb
index 59cc82cfaf50..e081a96dc10e 100644
--- a/app/models/merge_request_context_commit.rb
+++ b/app/models/merge_request_context_commit.rb
@@ -12,6 +12,9 @@ class MergeRequestContextCommit < ApplicationRecord
   validates :sha, presence: true
   validates :sha, uniqueness: { message: 'has already been added' }
 
+  serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize
+  validates :trailers, json_schema: { filename: 'git_trailers' }
+
   # Sort by committed date in descending order to ensure latest commits comes on the top
   scope :order_by_committed_date_desc, -> { order('committed_date DESC') }
 
diff --git a/app/models/merge_request_diff_commit.rb b/app/models/merge_request_diff_commit.rb
index 9f6933d0879c..cf2d07b85619 100644
--- a/app/models/merge_request_diff_commit.rb
+++ b/app/models/merge_request_diff_commit.rb
@@ -10,6 +10,9 @@ class MergeRequestDiffCommit < ApplicationRecord
   sha_attribute :sha
   alias_attribute :id, :sha
 
+  serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize
+  validates :trailers, json_schema: { filename: 'git_trailers' }
+
   # Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead.
   # cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress
   def self.create_bulk(merge_request_diff_id, commits)
@@ -23,7 +26,8 @@ def self.create_bulk(merge_request_diff_id, commits)
         relative_order: index,
         sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize
         authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]),
-        committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date])
+        committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]),
+        trailers: commit_hash.fetch(:trailers, {}).to_json
       )
     end
 
diff --git a/app/models/repository.rb b/app/models/repository.rb
index c19448332f8b..6e57e5c66204 100644
--- a/app/models/repository.rb
+++ b/app/models/repository.rb
@@ -151,7 +151,8 @@ def commits(ref = nil, opts = {})
       all: !!opts[:all],
       first_parent: !!opts[:first_parent],
       order: opts[:order],
-      literal_pathspec: opts.fetch(:literal_pathspec, true)
+      literal_pathspec: opts.fetch(:literal_pathspec, true),
+      trailers: opts[:trailers]
     }
 
     commits = Gitlab::Git::Commit.where(options)
diff --git a/app/services/merge_requests/add_context_service.rb b/app/services/merge_requests/add_context_service.rb
index bb82fa23468e..b693f8509a2b 100644
--- a/app/services/merge_requests/add_context_service.rb
+++ b/app/services/merge_requests/add_context_service.rb
@@ -66,7 +66,8 @@ def build_context_commit_rows(merge_request_id, commits)
           relative_order: index,
           sha: sha,
           authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]),
-          committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date])
+          committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]),
+          trailers: commit_hash.fetch(:trailers, {}).to_json
         )
       end
     end
diff --git a/app/validators/json_schemas/git_trailers.json b/app/validators/json_schemas/git_trailers.json
new file mode 100644
index 000000000000..18ac97226a72
--- /dev/null
+++ b/app/validators/json_schemas/git_trailers.json
@@ -0,0 +1,9 @@
+{
+  "description": "Git trailer key/value pairs",
+  "type": "object",
+  "patternProperties": {
+    ".*": {
+      "type": "string"
+    }
+  }
+}
diff --git a/changelogs/unreleased/gitaly-trailer-parsing.yml b/changelogs/unreleased/gitaly-trailer-parsing.yml
new file mode 100644
index 000000000000..096ef4a94a73
--- /dev/null
+++ b/changelogs/unreleased/gitaly-trailer-parsing.yml
@@ -0,0 +1,5 @@
+---
+title: Add finder for getting commits with a trailer set
+merge_request: 49243
+author:
+type: added
diff --git a/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb b/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb
new file mode 100644
index 000000000000..906efa58bcde
--- /dev/null
+++ b/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+# See https://docs.gitlab.com/ee/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class AddMergeRequestDiffCommitTrailers < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  def up
+    with_lock_retries do
+      add_column :merge_request_diff_commits, :trailers, :jsonb, default: {}, null: false
+    end
+  end
+
+  def down
+    with_lock_retries do
+      remove_column :merge_request_diff_commits, :trailers
+    end
+  end
+end
diff --git a/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb b/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb
new file mode 100644
index 000000000000..e7bd7c2ea56b
--- /dev/null
+++ b/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+# See https://docs.gitlab.com/ee/development/migration_style_guide.html
+# for more information on how to write migrations for GitLab.
+
+class AddMergeRequestContextCommitTrailers < ActiveRecord::Migration[6.0]
+  DOWNTIME = false
+
+  def change
+    add_column :merge_request_context_commits, :trailers, :jsonb, default: {}, null: false
+  end
+end
diff --git a/db/schema_migrations/20210106155209 b/db/schema_migrations/20210106155209
new file mode 100644
index 000000000000..10dde1cf8748
--- /dev/null
+++ b/db/schema_migrations/20210106155209
@@ -0,0 +1 @@
+4aeff45663a9f5a41a8dd92298afb4b0b57aa6f190f4648455df2fa1e39e174f
\ No newline at end of file
diff --git a/db/schema_migrations/20210107154615 b/db/schema_migrations/20210107154615
new file mode 100644
index 000000000000..f8b6f97d3fe4
--- /dev/null
+++ b/db/schema_migrations/20210107154615
@@ -0,0 +1 @@
+3e867ceefcab4f043b89d3c04e6e0a1182423039e1a9245e611128efe77c0e88
\ No newline at end of file
diff --git a/db/structure.sql b/db/structure.sql
index 38965f00aa60..ed6376b3da0e 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -13863,7 +13863,8 @@ CREATE TABLE merge_request_context_commits (
     committer_name text,
     committer_email text,
     message text,
-    merge_request_id bigint
+    merge_request_id bigint,
+    trailers jsonb DEFAULT '{}'::jsonb NOT NULL
 );
 
 CREATE SEQUENCE merge_request_context_commits_id_seq
@@ -13885,7 +13886,8 @@ CREATE TABLE merge_request_diff_commits (
     author_email text,
     committer_name text,
     committer_email text,
-    message text
+    message text,
+    trailers jsonb DEFAULT '{}'::jsonb NOT NULL
 );
 
 CREATE TABLE merge_request_diff_details (
diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb
index 0bc7ecccf5ec..35c3dc5b0b3d 100644
--- a/lib/gitlab/git/commit.rb
+++ b/lib/gitlab/git/commit.rb
@@ -16,7 +16,7 @@ class Commit
       SERIALIZE_KEYS = [
         :id, :message, :parent_ids,
         :authored_date, :author_name, :author_email,
-        :committed_date, :committer_name, :committer_email
+        :committed_date, :committer_name, :committer_email, :trailers
       ].freeze
 
       attr_accessor(*SERIALIZE_KEYS)
@@ -389,6 +389,7 @@ def init_from_gitaly(commit)
         @committer_name = commit.committer.name.dup
         @committer_email = commit.committer.email.dup
         @parent_ids = Array(commit.parent_ids)
+        @trailers = Hash[commit.trailers.map { |t| [t.key, t.value] }]
       end
 
       # Gitaly provides a UNIX timestamp in author.date.seconds, and a timezone
diff --git a/lib/gitlab/git/rugged_impl/commit.rb b/lib/gitlab/git/rugged_impl/commit.rb
index 0eff35ab1c47..0607b151de2f 100644
--- a/lib/gitlab/git/rugged_impl/commit.rb
+++ b/lib/gitlab/git/rugged_impl/commit.rb
@@ -103,6 +103,7 @@ def init_from_rugged(commit)
           @committer_name = committer[:name]
           @committer_email = committer[:email]
           @parent_ids = commit.parents.map(&:oid)
+          @trailers = Hash[commit.trailers]
         end
       end
     end
diff --git a/lib/gitlab/gitaly_client/commit_service.rb b/lib/gitlab/gitaly_client/commit_service.rb
index ea940150941c..ef5221a80423 100644
--- a/lib/gitlab/gitaly_client/commit_service.rb
+++ b/lib/gitlab/gitaly_client/commit_service.rb
@@ -335,7 +335,8 @@ def find_commits(options)
           all:          !!options[:all],
           first_parent: !!options[:first_parent],
           global_options: parse_global_options!(options),
-          disable_walk: true # This option is deprecated. The 'walk' implementation is being removed.
+          disable_walk: true, # This option is deprecated. The 'walk' implementation is being removed.
+          trailers: options[:trailers]
         )
         request.after    = GitalyClient.timestamp(options[:after]) if options[:after]
         request.before   = GitalyClient.timestamp(options[:before]) if options[:before]
diff --git a/spec/finders/repositories/commits_with_trailer_finder_spec.rb b/spec/finders/repositories/commits_with_trailer_finder_spec.rb
new file mode 100644
index 000000000000..0c457aae3400
--- /dev/null
+++ b/spec/finders/repositories/commits_with_trailer_finder_spec.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Repositories::CommitsWithTrailerFinder do
+  let(:project) { create(:project, :repository) }
+
+  describe '#each_page' do
+    it 'only yields commits with the given trailer' do
+      finder = described_class.new(
+        project: project,
+        from: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d',
+        to: 'c7fbe50c7c7419d9701eebe64b1fdacc3df5b9dd'
+      )
+
+      commits = finder.each_page('Signed-off-by').to_a.flatten
+
+      expect(commits.length).to eq(1)
+      expect(commits.first.id).to eq('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
+      expect(commits.first.trailers).to eq(
+        'Signed-off-by' => 'Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>'
+      )
+    end
+
+    it 'supports paginating of commits' do
+      finder = described_class.new(
+        project: project,
+        from: 'c1acaa58bbcbc3eafe538cb8274ba387047b69f8',
+        to: '5937ac0a7beb003549fc5fd26fc247adbce4a52e',
+        per_page: 1
+      )
+
+      commits = finder.each_page('Signed-off-by')
+
+      expect(commits.count).to eq(4)
+    end
+  end
+end
diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb
index 8961cdcae7d3..49f1e6e994f4 100644
--- a/spec/lib/gitlab/git/commit_spec.rb
+++ b/spec/lib/gitlab/git/commit_spec.rb
@@ -720,7 +720,8 @@ def sample_commit_hash
       committer_name: "Dmitriy Zaporozhets",
       id: SeedRepo::Commit::ID,
       message: "tree css fixes",
-      parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"]
+      parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"],
+      trailers: {}
     }
   end
 end
diff --git a/spec/lib/gitlab/import_export/safe_model_attributes.yml b/spec/lib/gitlab/import_export/safe_model_attributes.yml
index a93ee051ccf2..79769f7ba7a4 100644
--- a/spec/lib/gitlab/import_export/safe_model_attributes.yml
+++ b/spec/lib/gitlab/import_export/safe_model_attributes.yml
@@ -231,6 +231,7 @@ MergeRequestDiffCommit:
 - committer_name
 - committer_email
 - message
+- trailers
 MergeRequestDiffFile:
 - merge_request_diff_id
 - relative_order
@@ -255,6 +256,7 @@ MergeRequestContextCommit:
 - committer_email
 - message
 - merge_request_id
+- trailers
 MergeRequestContextCommitDiffFile:
 - sha
 - relative_order
diff --git a/spec/models/merge_request_diff_commit_spec.rb b/spec/models/merge_request_diff_commit_spec.rb
index 2edf44ecdc49..a24628b0f9de 100644
--- a/spec/models/merge_request_diff_commit_spec.rb
+++ b/spec/models/merge_request_diff_commit_spec.rb
@@ -48,7 +48,8 @@
           "committer_email": "dmitriy.zaporozhets@gmail.com",
           "merge_request_diff_id": merge_request_diff_id,
           "relative_order": 0,
-          "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e")
+          "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e"),
+          "trailers": {}.to_json
         },
         {
           "message": "Change some files\n\nSigned-off-by: Dmitriy Zaporozhets \u003cdmitriy.zaporozhets@gmail.com\u003e\n",
@@ -60,7 +61,8 @@
           "committer_email": "dmitriy.zaporozhets@gmail.com",
           "merge_request_diff_id": merge_request_diff_id,
           "relative_order": 1,
-          "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d")
+          "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d"),
+          "trailers": {}.to_json
         }
       ]
     end
@@ -92,7 +94,8 @@
           "committer_email": "alejorro70@gmail.com",
           "merge_request_diff_id": merge_request_diff_id,
           "relative_order": 0,
-          "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69")
+          "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69"),
+          "trailers": {}.to_json
         }]
       end
 
-- 
GitLab