From 285673f03c2d0667af8d09a5250a26b076e427bd Mon Sep 17 00:00:00 2001 From: Gary Holtz <gholtz@gitlab.com> Date: Tue, 12 Apr 2022 14:47:08 -0500 Subject: [PATCH] Add an additional edge case and method to deal with binary looking diffs --- lib/gitlab/git/diff.rb | 6 +++++- spec/lib/gitlab/git/diff_spec.rb | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index a66517b4ca08..bb561232ac65 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -231,11 +231,15 @@ def has_binary_notice? def encode_diff_to_utf8(replace_invalid_utf8_chars) return unless Feature.enabled?(:convert_diff_to_utf8_with_replacement_symbol, default_enabled: :yaml) - return unless replace_invalid_utf8_chars && !detect_binary?(@diff) + return unless replace_invalid_utf8_chars && diff_should_be_converted? @diff = Gitlab::EncodingHelper.encode_utf8_with_replacement_character(@diff) end + def diff_should_be_converted? + !detect_binary?(@diff) || !@diff&.valid_encoding? + end + def init_from_hash(hash) raw_diff = hash.symbolize_keys diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 46f544797bba..4c47912e2185 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -165,17 +165,21 @@ context 'when diff contains invalid characters' do let(:bad_string) { [0xae].pack("C*") } let(:bad_string_two) { [0x89].pack("C*") } + let(:bad_string_three) { "@@ -1,5 +1,6 @@\n \xFF\xFE#\x00l\x00a\x00n\x00g\x00u\x00" } let(:diff) { described_class.new(@raw_diff_hash.merge({ diff: bad_string })) } let(:diff_two) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_two })) } + let(:diff_three) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_three })) } context 'when replace_invalid_utf8_chars is true' do it 'will convert invalid characters and not cause an encoding error' do expect(diff.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) expect(diff_two.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) + expect(diff_three.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) expect { Oj.dump(diff) }.not_to raise_error(EncodingError) expect { Oj.dump(diff_two) }.not_to raise_error(EncodingError) + expect { Oj.dump(diff_three) }.not_to raise_error(EncodingError) end context 'when the diff is binary' do -- GitLab