diff --git a/config/feature_flags/development/collect_all_diff_paths.yml b/config/feature_flags/development/collect_all_diff_paths.yml new file mode 100644 index 0000000000000000000000000000000000000000..d382889fe169090d0f53c4b2a478aac059a1aa90 --- /dev/null +++ b/config/feature_flags/development/collect_all_diff_paths.yml @@ -0,0 +1,8 @@ +--- +name: collect_all_diff_paths +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/124907 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/421460 +milestone: '16.4' +type: development +group: "group::gitaly" +default_enabled: false diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index 72f7413500f04c11337c447ab0a43b139eb22893..2dd85445602e869de73338bf6321f616f621d2f5 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -81,7 +81,7 @@ def between(repo, head, base, options = {}, *paths) # exceeded def filter_diff_options(options, default_options = {}) allowed_options = [:ignore_whitespace_change, :max_files, :max_lines, - :limits, :expanded] + :limits, :expanded, :collect_all_paths] if default_options actual_defaults = default_options.dup @@ -126,6 +126,10 @@ def patch_safe_limit_bytes(limit = patch_hard_limit_bytes) limit / 10 end + def collect_patch_overage? + !!Feature.enabled?(:collect_all_diff_paths) + end + # Returns the limit for a single diff file (patch). # # Patches surpassing this limit shouldn't be persisted in the database @@ -198,9 +202,13 @@ def too_large? # This is used by `to_hash` and `init_from_hash`. alias_method :too_large, :too_large? - def too_large! + def prune! @diff = '' @line_count = 0 + end + + def too_large! + prune! @too_large = true end @@ -211,11 +219,19 @@ def collapsed? end def collapse! - @diff = '' - @line_count = 0 + prune! @collapsed = true end + def overflow? + return @overflow if defined?(@overflow) + + # If overflow is not defined, we're + # not recieveing a diff from Gitaly + # and overflow has no meaning + false + end + def json_safe_diff return @diff unless detect_binary?(@diff) @@ -248,7 +264,7 @@ def init_from_hash(hash) end def init_from_gitaly(gitaly_diff) - @diff = gitaly_diff.respond_to?(:patch) ? encode!(gitaly_diff.patch) : '' + @diff = gitaly_diff.try(:patch).present? ? encode!(gitaly_diff.patch) : '' @new_path = encode!(gitaly_diff.to_path.dup) @old_path = encode!(gitaly_diff.from_path.dup) @a_mode = gitaly_diff.old_mode.to_s(8) @@ -257,11 +273,19 @@ def init_from_gitaly(gitaly_diff) @renamed_file = gitaly_diff.from_path != gitaly_diff.to_path @deleted_file = gitaly_diff.to_id == BLANK_SHA @too_large = gitaly_diff.too_large if gitaly_diff.respond_to?(:too_large) + gitaly_overflow = gitaly_diff.try(:overflow_marker) + @overflow = Diff.collect_patch_overage? && gitaly_overflow collapse! if gitaly_diff.respond_to?(:collapsed) && gitaly_diff.collapsed + # Diffs exceeding limits returned from gitaly when "collect_all_paths" are enabled + # are already pruned, but should be "collapsed" as they have no content + @collapsed = true if @overflow end def prune_diff_if_eligible + # If we have overflow, diffs are already pruned, retain line counts + return if overflow? + if too_large? ::Gitlab::Metrics.add_event(:patch_hard_limit_bytes_hit) diff --git a/lib/gitlab/git/diff_collection.rb b/lib/gitlab/git/diff_collection.rb index b4dd880ceb7d7b7aad72f4258ead981d9713b0bb..c021268a62a5f807a466929bdd00cc9674fbb356 100644 --- a/lib/gitlab/git/diff_collection.rb +++ b/lib/gitlab/git/diff_collection.rb @@ -13,6 +13,10 @@ def self.default_limits { max_files: ::Commit.diff_safe_max_files, max_lines: ::Commit.diff_safe_max_lines } end + def self.collect_all_paths?(collect_all_paths) + Gitlab::Git::Diff.collect_patch_overage? ? collect_all_paths : false + end + def self.limits(options = {}) limits = {} defaults = default_limits @@ -25,7 +29,7 @@ def self.limits(options = {}) limits[:safe_max_bytes] = limits[:safe_max_files] * 5.kilobytes # Average 5 KB per file limits[:max_patch_bytes] = Gitlab::Git::Diff.patch_hard_limit_bytes limits[:max_patch_bytes_for_file_extension] = options.fetch(:max_patch_bytes_for_file_extension, {}) - + limits[:collect_all_paths] = collect_all_paths?(options.fetch(:collect_all_paths, false)) limits end @@ -164,7 +168,12 @@ def each_gitaly_patch if raw.overflow_marker @overflow = true - break + # If we're requesting patches with `collect_all_paths` enabled, then + # Once we hit the overflow marker, gitlay has still returned diffs, just without + # patches, only metadata + unless @limits[:collect_all_paths] + break + end end yield @array[i] = diff diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 6745c700b92ef5b9c9d41e30b2301d9da823fe05..4d78e194da87a0dda883d551768f7e3ed4411045 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -131,6 +131,31 @@ expect(diff.diff).to be_utf8 end end + + context 'using a diff that it too large but collecting all paths' do + let(:gitaly_diff) do + Gitlab::GitalyClient::Diff.new( + from_path: '.gitmodules', + to_path: '.gitmodules', + old_mode: 0100644, + new_mode: 0100644, + from_id: '0792c58905eff3432b721f8c4a64363d8e28d9ae', + to_id: 'efd587ccb47caf5f31fc954edb21f0a713d9ecc3', + overflow_marker: true, + collapsed: false, + too_large: false, + patch: '' + ) + end + + let(:diff) { described_class.new(gitaly_diff) } + + it 'is already pruned and collapsed but not too large' do + expect(diff.diff).to be_empty + expect(diff).not_to be_too_large + expect(diff).to be_collapsed + end + end end context 'using a Gitaly::CommitDelta' do