From ff78882f08ddf3c9f9ac382627b12b05a655607e Mon Sep 17 00:00:00 2001 From: Yorick Peterse <yorick@yorickpeterse.com> Date: Wed, 9 Dec 2020 15:31:34 +0100 Subject: [PATCH] Add finder for getting commits with a trailer set This class fetches all commits in a range in batches, selecting only commits with a certain trailer set. This class will be used to fetch the commits that should be used to generate a changelog. This builds on the changes introduced in https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2842 and https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2999. To take advantage of these changes, we also bump the Gitaly Gem version and add fallback code for getting trailers using Rugged; as Rugged is still in use. In addition, we change the merge_request_diff_commits table and the MergeRequestDiffCommit model to support storing the trailers as JSON. See https://gitlab.com/gitlab-com/gl-infra/delivery/-/issues/1365 for more information. --- Gemfile | 2 +- Gemfile.lock | 4 +- .../commits_with_trailer_finder.rb | 82 +++++++++++++++++++ app/models/merge_request_context_commit.rb | 3 + app/models/merge_request_diff_commit.rb | 6 +- app/models/repository.rb | 3 +- .../merge_requests/add_context_service.rb | 3 +- app/validators/json_schemas/git_trailers.json | 9 ++ .../unreleased/gitaly-trailer-parsing.yml | 5 ++ ..._add_merge_request_diff_commit_trailers.rb | 22 +++++ ...d_merge_request_context_commit_trailers.rb | 12 +++ db/schema_migrations/20210106155209 | 1 + db/schema_migrations/20210107154615 | 1 + db/structure.sql | 6 +- lib/gitlab/git/commit.rb | 3 +- lib/gitlab/git/rugged_impl/commit.rb | 1 + lib/gitlab/gitaly_client/commit_service.rb | 3 +- .../commits_with_trailer_finder_spec.rb | 38 +++++++++ spec/lib/gitlab/git/commit_spec.rb | 3 +- .../import_export/safe_model_attributes.yml | 2 + spec/models/merge_request_diff_commit_spec.rb | 9 +- 21 files changed, 204 insertions(+), 14 deletions(-) create mode 100644 app/finders/repositories/commits_with_trailer_finder.rb create mode 100644 app/validators/json_schemas/git_trailers.json create mode 100644 changelogs/unreleased/gitaly-trailer-parsing.yml create mode 100644 db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb create mode 100644 db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb create mode 100644 db/schema_migrations/20210106155209 create mode 100644 db/schema_migrations/20210107154615 create mode 100644 spec/finders/repositories/commits_with_trailer_finder_spec.rb diff --git a/Gemfile b/Gemfile index c7ed1cd4d71e..a2aff0f4b1e8 100644 --- a/Gemfile +++ b/Gemfile @@ -462,7 +462,7 @@ group :ed25519 do end # Gitaly GRPC protocol definitions -gem 'gitaly', '~> 13.8.0.pre.rc2' +gem 'gitaly', '~> 13.8.0.pre.rc3' gem 'grpc', '~> 1.30.2' diff --git a/Gemfile.lock b/Gemfile.lock index b370024b95b0..74d88417541a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -417,7 +417,7 @@ GEM rails (>= 3.2.0) git (1.7.0) rchardet (~> 1.8) - gitaly (13.8.0.pre.rc2) + gitaly (13.8.0.pre.rc3) grpc (~> 1.0) github-markup (1.7.0) gitlab-chronic (0.10.5) @@ -1357,7 +1357,7 @@ DEPENDENCIES gettext (~> 3.3) gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails_js (~> 1.3) - gitaly (~> 13.8.0.pre.rc2) + gitaly (~> 13.8.0.pre.rc3) github-markup (~> 1.7.0) gitlab-chronic (~> 0.10.5) gitlab-experiment (~> 0.4.5) diff --git a/app/finders/repositories/commits_with_trailer_finder.rb b/app/finders/repositories/commits_with_trailer_finder.rb new file mode 100644 index 000000000000..4bd643c345ba --- /dev/null +++ b/app/finders/repositories/commits_with_trailer_finder.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Repositories + # Finder for obtaining commits between two refs, with a Git trailer set. + class CommitsWithTrailerFinder + # The maximum number of commits to retrieve per page. + # + # This value is arbitrarily chosen. Lowering it means more Gitaly calls, but + # less data being loaded into memory at once. Increasing it has the opposite + # effect. + # + # This amount is based around the number of commits that usually go in a + # GitLab release. Some examples for GitLab's own releases: + # + # * 13.6.0: 4636 commits + # * 13.5.0: 5912 commits + # * 13.4.0: 5541 commits + # + # Using this limit should result in most (very large) projects only needing + # 5-10 Gitaly calls, while keeping memory usage at a reasonable amount. + COMMITS_PER_PAGE = 1024 + + # The `project` argument specifies the project for which to obtain the + # commits. + # + # The `from` and `to` arguments specify the range of commits to include. The + # commit specified in `from` won't be included itself. The commit specified + # in `to` _is_ included. + # + # The `per_page` argument specifies how many commits are retrieved in a single + # Gitaly API call. + def initialize(project:, from:, to:, per_page: COMMITS_PER_PAGE) + @project = project + @from = from + @to = to + @per_page = per_page + end + + # Fetches all commits that have the given trailer set. + # + # The commits are yielded to the supplied block in batches. This allows + # other code to process these commits in batches too, instead of first + # having to load all commits into memory. + # + # Example: + # + # CommitsWithTrailerFinder.new(...).each_page('Signed-off-by') do |commits| + # commits.each do |commit| + # ... + # end + # end + def each_page(trailer) + return to_enum(__method__, trailer) unless block_given? + + offset = 0 + response = fetch_commits + + while response.any? + commits = [] + + response.each do |commit| + commits.push(commit) if commit.trailers.key?(trailer) + end + + yield commits + + offset += response.length + response = fetch_commits(offset) + end + end + + private + + def fetch_commits(offset = 0) + range = "#{@from}..#{@to}" + + @project + .repository + .commits(range, limit: @per_page, offset: offset, trailers: true) + end + end +end diff --git a/app/models/merge_request_context_commit.rb b/app/models/merge_request_context_commit.rb index 59cc82cfaf50..e081a96dc10e 100644 --- a/app/models/merge_request_context_commit.rb +++ b/app/models/merge_request_context_commit.rb @@ -12,6 +12,9 @@ class MergeRequestContextCommit < ApplicationRecord validates :sha, presence: true validates :sha, uniqueness: { message: 'has already been added' } + serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize + validates :trailers, json_schema: { filename: 'git_trailers' } + # Sort by committed date in descending order to ensure latest commits comes on the top scope :order_by_committed_date_desc, -> { order('committed_date DESC') } diff --git a/app/models/merge_request_diff_commit.rb b/app/models/merge_request_diff_commit.rb index 9f6933d0879c..cf2d07b85619 100644 --- a/app/models/merge_request_diff_commit.rb +++ b/app/models/merge_request_diff_commit.rb @@ -10,6 +10,9 @@ class MergeRequestDiffCommit < ApplicationRecord sha_attribute :sha alias_attribute :id, :sha + serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize + validates :trailers, json_schema: { filename: 'git_trailers' } + # Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead. # cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress def self.create_bulk(merge_request_diff_id, commits) @@ -23,7 +26,8 @@ def self.create_bulk(merge_request_diff_id, commits) relative_order: index, sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]), - committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]) + committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]), + trailers: commit_hash.fetch(:trailers, {}).to_json ) end diff --git a/app/models/repository.rb b/app/models/repository.rb index c19448332f8b..6e57e5c66204 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -151,7 +151,8 @@ def commits(ref = nil, opts = {}) all: !!opts[:all], first_parent: !!opts[:first_parent], order: opts[:order], - literal_pathspec: opts.fetch(:literal_pathspec, true) + literal_pathspec: opts.fetch(:literal_pathspec, true), + trailers: opts[:trailers] } commits = Gitlab::Git::Commit.where(options) diff --git a/app/services/merge_requests/add_context_service.rb b/app/services/merge_requests/add_context_service.rb index bb82fa23468e..b693f8509a2b 100644 --- a/app/services/merge_requests/add_context_service.rb +++ b/app/services/merge_requests/add_context_service.rb @@ -66,7 +66,8 @@ def build_context_commit_rows(merge_request_id, commits) relative_order: index, sha: sha, authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]), - committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]) + committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]), + trailers: commit_hash.fetch(:trailers, {}).to_json ) end end diff --git a/app/validators/json_schemas/git_trailers.json b/app/validators/json_schemas/git_trailers.json new file mode 100644 index 000000000000..18ac97226a72 --- /dev/null +++ b/app/validators/json_schemas/git_trailers.json @@ -0,0 +1,9 @@ +{ + "description": "Git trailer key/value pairs", + "type": "object", + "patternProperties": { + ".*": { + "type": "string" + } + } +} diff --git a/changelogs/unreleased/gitaly-trailer-parsing.yml b/changelogs/unreleased/gitaly-trailer-parsing.yml new file mode 100644 index 000000000000..096ef4a94a73 --- /dev/null +++ b/changelogs/unreleased/gitaly-trailer-parsing.yml @@ -0,0 +1,5 @@ +--- +title: Add finder for getting commits with a trailer set +merge_request: 49243 +author: +type: added diff --git a/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb b/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb new file mode 100644 index 000000000000..906efa58bcde --- /dev/null +++ b/db/migrate/20210106155209_add_merge_request_diff_commit_trailers.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddMergeRequestDiffCommitTrailers < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def up + with_lock_retries do + add_column :merge_request_diff_commits, :trailers, :jsonb, default: {}, null: false + end + end + + def down + with_lock_retries do + remove_column :merge_request_diff_commits, :trailers + end + end +end diff --git a/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb b/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb new file mode 100644 index 000000000000..e7bd7c2ea56b --- /dev/null +++ b/db/migrate/20210107154615_add_merge_request_context_commit_trailers.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddMergeRequestContextCommitTrailers < ActiveRecord::Migration[6.0] + DOWNTIME = false + + def change + add_column :merge_request_context_commits, :trailers, :jsonb, default: {}, null: false + end +end diff --git a/db/schema_migrations/20210106155209 b/db/schema_migrations/20210106155209 new file mode 100644 index 000000000000..10dde1cf8748 --- /dev/null +++ b/db/schema_migrations/20210106155209 @@ -0,0 +1 @@ +4aeff45663a9f5a41a8dd92298afb4b0b57aa6f190f4648455df2fa1e39e174f \ No newline at end of file diff --git a/db/schema_migrations/20210107154615 b/db/schema_migrations/20210107154615 new file mode 100644 index 000000000000..f8b6f97d3fe4 --- /dev/null +++ b/db/schema_migrations/20210107154615 @@ -0,0 +1 @@ +3e867ceefcab4f043b89d3c04e6e0a1182423039e1a9245e611128efe77c0e88 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 38965f00aa60..ed6376b3da0e 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -13863,7 +13863,8 @@ CREATE TABLE merge_request_context_commits ( committer_name text, committer_email text, message text, - merge_request_id bigint + merge_request_id bigint, + trailers jsonb DEFAULT '{}'::jsonb NOT NULL ); CREATE SEQUENCE merge_request_context_commits_id_seq @@ -13885,7 +13886,8 @@ CREATE TABLE merge_request_diff_commits ( author_email text, committer_name text, committer_email text, - message text + message text, + trailers jsonb DEFAULT '{}'::jsonb NOT NULL ); CREATE TABLE merge_request_diff_details ( diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 0bc7ecccf5ec..35c3dc5b0b3d 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -16,7 +16,7 @@ class Commit SERIALIZE_KEYS = [ :id, :message, :parent_ids, :authored_date, :author_name, :author_email, - :committed_date, :committer_name, :committer_email + :committed_date, :committer_name, :committer_email, :trailers ].freeze attr_accessor(*SERIALIZE_KEYS) @@ -389,6 +389,7 @@ def init_from_gitaly(commit) @committer_name = commit.committer.name.dup @committer_email = commit.committer.email.dup @parent_ids = Array(commit.parent_ids) + @trailers = Hash[commit.trailers.map { |t| [t.key, t.value] }] end # Gitaly provides a UNIX timestamp in author.date.seconds, and a timezone diff --git a/lib/gitlab/git/rugged_impl/commit.rb b/lib/gitlab/git/rugged_impl/commit.rb index 0eff35ab1c47..0607b151de2f 100644 --- a/lib/gitlab/git/rugged_impl/commit.rb +++ b/lib/gitlab/git/rugged_impl/commit.rb @@ -103,6 +103,7 @@ def init_from_rugged(commit) @committer_name = committer[:name] @committer_email = committer[:email] @parent_ids = commit.parents.map(&:oid) + @trailers = Hash[commit.trailers] end end end diff --git a/lib/gitlab/gitaly_client/commit_service.rb b/lib/gitlab/gitaly_client/commit_service.rb index ea940150941c..ef5221a80423 100644 --- a/lib/gitlab/gitaly_client/commit_service.rb +++ b/lib/gitlab/gitaly_client/commit_service.rb @@ -335,7 +335,8 @@ def find_commits(options) all: !!options[:all], first_parent: !!options[:first_parent], global_options: parse_global_options!(options), - disable_walk: true # This option is deprecated. The 'walk' implementation is being removed. + disable_walk: true, # This option is deprecated. The 'walk' implementation is being removed. + trailers: options[:trailers] ) request.after = GitalyClient.timestamp(options[:after]) if options[:after] request.before = GitalyClient.timestamp(options[:before]) if options[:before] diff --git a/spec/finders/repositories/commits_with_trailer_finder_spec.rb b/spec/finders/repositories/commits_with_trailer_finder_spec.rb new file mode 100644 index 000000000000..0c457aae3400 --- /dev/null +++ b/spec/finders/repositories/commits_with_trailer_finder_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Repositories::CommitsWithTrailerFinder do + let(:project) { create(:project, :repository) } + + describe '#each_page' do + it 'only yields commits with the given trailer' do + finder = described_class.new( + project: project, + from: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d', + to: 'c7fbe50c7c7419d9701eebe64b1fdacc3df5b9dd' + ) + + commits = finder.each_page('Signed-off-by').to_a.flatten + + expect(commits.length).to eq(1) + expect(commits.first.id).to eq('5937ac0a7beb003549fc5fd26fc247adbce4a52e') + expect(commits.first.trailers).to eq( + 'Signed-off-by' => 'Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>' + ) + end + + it 'supports paginating of commits' do + finder = described_class.new( + project: project, + from: 'c1acaa58bbcbc3eafe538cb8274ba387047b69f8', + to: '5937ac0a7beb003549fc5fd26fc247adbce4a52e', + per_page: 1 + ) + + commits = finder.each_page('Signed-off-by') + + expect(commits.count).to eq(4) + end + end +end diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb index 8961cdcae7d3..49f1e6e994f4 100644 --- a/spec/lib/gitlab/git/commit_spec.rb +++ b/spec/lib/gitlab/git/commit_spec.rb @@ -720,7 +720,8 @@ def sample_commit_hash committer_name: "Dmitriy Zaporozhets", id: SeedRepo::Commit::ID, message: "tree css fixes", - parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"] + parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"], + trailers: {} } end end diff --git a/spec/lib/gitlab/import_export/safe_model_attributes.yml b/spec/lib/gitlab/import_export/safe_model_attributes.yml index a93ee051ccf2..79769f7ba7a4 100644 --- a/spec/lib/gitlab/import_export/safe_model_attributes.yml +++ b/spec/lib/gitlab/import_export/safe_model_attributes.yml @@ -231,6 +231,7 @@ MergeRequestDiffCommit: - committer_name - committer_email - message +- trailers MergeRequestDiffFile: - merge_request_diff_id - relative_order @@ -255,6 +256,7 @@ MergeRequestContextCommit: - committer_email - message - merge_request_id +- trailers MergeRequestContextCommitDiffFile: - sha - relative_order diff --git a/spec/models/merge_request_diff_commit_spec.rb b/spec/models/merge_request_diff_commit_spec.rb index 2edf44ecdc49..a24628b0f9de 100644 --- a/spec/models/merge_request_diff_commit_spec.rb +++ b/spec/models/merge_request_diff_commit_spec.rb @@ -48,7 +48,8 @@ "committer_email": "dmitriy.zaporozhets@gmail.com", "merge_request_diff_id": merge_request_diff_id, "relative_order": 0, - "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e") + "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e"), + "trailers": {}.to_json }, { "message": "Change some files\n\nSigned-off-by: Dmitriy Zaporozhets \u003cdmitriy.zaporozhets@gmail.com\u003e\n", @@ -60,7 +61,8 @@ "committer_email": "dmitriy.zaporozhets@gmail.com", "merge_request_diff_id": merge_request_diff_id, "relative_order": 1, - "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d") + "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d"), + "trailers": {}.to_json } ] end @@ -92,7 +94,8 @@ "committer_email": "alejorro70@gmail.com", "merge_request_diff_id": merge_request_diff_id, "relative_order": 0, - "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69") + "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69"), + "trailers": {}.to_json }] end -- GitLab