diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index a66517b4ca08018ce67ff7ca254eff8f7f64a6e2..bb561232ac657b6fe2b1589c5455f5526008b592 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -231,11 +231,15 @@ def has_binary_notice? def encode_diff_to_utf8(replace_invalid_utf8_chars) return unless Feature.enabled?(:convert_diff_to_utf8_with_replacement_symbol, default_enabled: :yaml) - return unless replace_invalid_utf8_chars && !detect_binary?(@diff) + return unless replace_invalid_utf8_chars && diff_should_be_converted? @diff = Gitlab::EncodingHelper.encode_utf8_with_replacement_character(@diff) end + def diff_should_be_converted? + !detect_binary?(@diff) || !@diff&.valid_encoding? + end + def init_from_hash(hash) raw_diff = hash.symbolize_keys diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 46f544797bba73abeddd26a63bab47de2cae457a..4c47912e218525f7555d0ab7746da548e74f25f1 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -165,17 +165,21 @@ context 'when diff contains invalid characters' do let(:bad_string) { [0xae].pack("C*") } let(:bad_string_two) { [0x89].pack("C*") } + let(:bad_string_three) { "@@ -1,5 +1,6 @@\n \xFF\xFE#\x00l\x00a\x00n\x00g\x00u\x00" } let(:diff) { described_class.new(@raw_diff_hash.merge({ diff: bad_string })) } let(:diff_two) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_two })) } + let(:diff_three) { described_class.new(@raw_diff_hash.merge({ diff: bad_string_three })) } context 'when replace_invalid_utf8_chars is true' do it 'will convert invalid characters and not cause an encoding error' do expect(diff.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) expect(diff_two.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) + expect(diff_three.diff).to include(Gitlab::EncodingHelper::UNICODE_REPLACEMENT_CHARACTER) expect { Oj.dump(diff) }.not_to raise_error(EncodingError) expect { Oj.dump(diff_two) }.not_to raise_error(EncodingError) + expect { Oj.dump(diff_three) }.not_to raise_error(EncodingError) end context 'when the diff is binary' do