From adb93622cca7edbe15f5c75daaa39c9eb975e196 Mon Sep 17 00:00:00 2001 From: johnwparent <john.parent@kitware.com> Date: Wed, 28 Jun 2023 14:51:57 -0400 Subject: [PATCH] Diffs: Model files newly returned from gitaly Gitaly's https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5680 allows for returning information about all files in a diff, not just those that do not exceed file limitations. Add handling for this in Gitlab to request the metadata for diffs that exceed set limits and ensure diff collections process the added metadata as diffs to be stored. lib/gitlab/git/diff#Diff: Add overflow attr gitlab/git/diff.rb#Diff: make `collect_patch_overage` a predicate method gitlab/git/diff.rb#Diff: prune guard on overflow gitlab/git/diff.rb#Diff: fixup typo gitlab/git/diff_collection.rb#DiffCollection: force consideration of feature flag gitlab/git/diff: Review formatting suggestions gitlab/git/diff_collection: make collect_all_paths a class method lib/gitlab/git/diff.rb: Reduce one off method useage spec/lib/gitlab/git/diff_spec.rb: Correct constructor signature spec/lib/gitlab/git/diff_spec.rb: rubocop lib/gitlab/git/diff.rb: rm dangling ref spec/lib/gitlab/git/diff_spec.rb: simulate patch pruned by gitaly --- .../development/collect_all_diff_paths.yml | 8 +++++ lib/gitlab/git/diff.rb | 34 ++++++++++++++++--- lib/gitlab/git/diff_collection.rb | 13 +++++-- spec/lib/gitlab/git/diff_spec.rb | 25 ++++++++++++++ 4 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 config/feature_flags/development/collect_all_diff_paths.yml diff --git a/config/feature_flags/development/collect_all_diff_paths.yml b/config/feature_flags/development/collect_all_diff_paths.yml new file mode 100644 index 0000000000000..d382889fe1690 --- /dev/null +++ b/config/feature_flags/development/collect_all_diff_paths.yml @@ -0,0 +1,8 @@ +--- +name: collect_all_diff_paths +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/124907 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/421460 +milestone: '16.4' +type: development +group: "group::gitaly" +default_enabled: false diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index 72f7413500f04..2dd85445602e8 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -81,7 +81,7 @@ def between(repo, head, base, options = {}, *paths) # exceeded def filter_diff_options(options, default_options = {}) allowed_options = [:ignore_whitespace_change, :max_files, :max_lines, - :limits, :expanded] + :limits, :expanded, :collect_all_paths] if default_options actual_defaults = default_options.dup @@ -126,6 +126,10 @@ def patch_safe_limit_bytes(limit = patch_hard_limit_bytes) limit / 10 end + def collect_patch_overage? + !!Feature.enabled?(:collect_all_diff_paths) + end + # Returns the limit for a single diff file (patch). # # Patches surpassing this limit shouldn't be persisted in the database @@ -198,9 +202,13 @@ def too_large? # This is used by `to_hash` and `init_from_hash`. alias_method :too_large, :too_large? - def too_large! + def prune! @diff = '' @line_count = 0 + end + + def too_large! + prune! @too_large = true end @@ -211,11 +219,19 @@ def collapsed? end def collapse! - @diff = '' - @line_count = 0 + prune! @collapsed = true end + def overflow? + return @overflow if defined?(@overflow) + + # If overflow is not defined, we're + # not recieveing a diff from Gitaly + # and overflow has no meaning + false + end + def json_safe_diff return @diff unless detect_binary?(@diff) @@ -248,7 +264,7 @@ def init_from_hash(hash) end def init_from_gitaly(gitaly_diff) - @diff = gitaly_diff.respond_to?(:patch) ? encode!(gitaly_diff.patch) : '' + @diff = gitaly_diff.try(:patch).present? ? encode!(gitaly_diff.patch) : '' @new_path = encode!(gitaly_diff.to_path.dup) @old_path = encode!(gitaly_diff.from_path.dup) @a_mode = gitaly_diff.old_mode.to_s(8) @@ -257,11 +273,19 @@ def init_from_gitaly(gitaly_diff) @renamed_file = gitaly_diff.from_path != gitaly_diff.to_path @deleted_file = gitaly_diff.to_id == BLANK_SHA @too_large = gitaly_diff.too_large if gitaly_diff.respond_to?(:too_large) + gitaly_overflow = gitaly_diff.try(:overflow_marker) + @overflow = Diff.collect_patch_overage? && gitaly_overflow collapse! if gitaly_diff.respond_to?(:collapsed) && gitaly_diff.collapsed + # Diffs exceeding limits returned from gitaly when "collect_all_paths" are enabled + # are already pruned, but should be "collapsed" as they have no content + @collapsed = true if @overflow end def prune_diff_if_eligible + # If we have overflow, diffs are already pruned, retain line counts + return if overflow? + if too_large? ::Gitlab::Metrics.add_event(:patch_hard_limit_bytes_hit) diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb index b4dd880ceb7d7..c021268a62a5f 100644 --- a/lib/gitlab/git/diff_collection.rb +++ b/lib/gitlab/git/diff_collection.rb @@ -13,6 +13,10 @@ def self.default_limits { max_files: ::Commit.diff_safe_max_files, max_lines: ::Commit.diff_safe_max_lines } end + def self.collect_all_paths?(collect_all_paths) + Gitlab::Git::Diff.collect_patch_overage? ? collect_all_paths : false + end + def self.limits(options = {}) limits = {} defaults = default_limits @@ -25,7 +29,7 @@ def self.limits(options = {}) limits[:safe_max_bytes] = limits[:safe_max_files] * 5.kilobytes # Average 5 KB per file limits[:max_patch_bytes] = Gitlab::Git::Diff.patch_hard_limit_bytes limits[:max_patch_bytes_for_file_extension] = options.fetch(:max_patch_bytes_for_file_extension, {}) - + limits[:collect_all_paths] = collect_all_paths?(options.fetch(:collect_all_paths, false)) limits end @@ -164,7 +168,12 @@ def each_gitaly_patch if raw.overflow_marker @overflow = true - break + # If we're requesting patches with `collect_all_paths` enabled, then + # Once we hit the overflow marker, gitlay has still returned diffs, just without + # patches, only metadata + unless @limits[:collect_all_paths] + break + end end yield @array[i] = diff diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 6745c700b92ef..4d78e194da87a 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -131,6 +131,31 @@ expect(diff.diff).to be_utf8 end end + + context 'using a diff that it too large but collecting all paths' do + let(:gitaly_diff) do + Gitlab::GitalyClient::Diff.new( + from_path: '.gitmodules', + to_path: '.gitmodules', + old_mode: 0100644, + new_mode: 0100644, + from_id: '0792c58905eff3432b721f8c4a64363d8e28d9ae', + to_id: 'efd587ccb47caf5f31fc954edb21f0a713d9ecc3', + overflow_marker: true, + collapsed: false, + too_large: false, + patch: '' + ) + end + + let(:diff) { described_class.new(gitaly_diff) } + + it 'is already pruned and collapsed but not too large' do + expect(diff.diff).to be_empty + expect(diff).not_to be_too_large + expect(diff).to be_collapsed + end + end end context 'using a Gitaly::CommitDelta' do -- GitLab