diff --git a/ee/lib/gitlab/git_guardian/client.rb b/ee/lib/gitlab/git_guardian/client.rb index c2fe63c49cf465595acfbfe43c8c7fddd55095be..46b1715f718efdd54f92c6d61a4cbefb1dd6b06d 100644 --- a/ee/lib/gitlab/git_guardian/client.rb +++ b/ee/lib/gitlab/git_guardian/client.rb @@ -36,26 +36,36 @@ def execute(blobs = []) def execute_batched_request(blobs_batch) Thread.new do - params = blobs_batch.map do |blob| - blob_params = { document: blob.data } - + params = blobs_batch.each_with_object([]) do |blob, all| # GitGuardian limits filename field to 256 characters. # That is why we only pass file name, which is sufficient for Git Guardian to perform its checks. # See: https://api.gitguardian.com/docs#operation/multiple_scan if blob.path.present? filename = File.basename(blob.path) limited_filename = limit_filename(filename) + end - blob_params[:filename] = limited_filename + unless can_be_jsonified?(blob.data) + Gitlab::AppJsonLogger.warn(class: self.class.name, + message: "Not processing data with filename '#{limited_filename}' as it cannot be JSONified") + next end - blob_params + blob_params = { document: blob.data } + blob_params[:filename] = limited_filename if limited_filename + + all << blob_params end - response = perform_request(params) - policy_breaks = process_response(response, blobs_batch) + if params.empty? + Gitlab::AppJsonLogger.warn(class: self.class.name, message: "Nothing to process") + nil + else + response = perform_request(params) + policy_breaks = process_response(response, blobs_batch) - policy_breaks.presence + policy_breaks.presence + end end end @@ -70,6 +80,13 @@ def limit_filename(filename) filename[over_limit..filename_size] end + def can_be_jsonified?(data) + data.to_json + true + rescue JSON::GeneratorError + false + end + def perform_request(params) options = { headers: headers, diff --git a/ee/spec/lib/gitlab/git_guardian/client_spec.rb b/ee/spec/lib/gitlab/git_guardian/client_spec.rb index a954c4c07059329766b196f22f7444b7491b4ffa..fdc9e3cffdbd71d780d86850d62e66a7aef97bb7 100644 --- a/ee/spec/lib/gitlab/git_guardian/client_spec.rb +++ b/ee/spec/lib/gitlab/git_guardian/client_spec.rb @@ -328,4 +328,26 @@ end end end + + context 'with a blob containing binary data' do + let(:filename) { 'rails_sample.jpg' } + let(:blobs) do + [ + fake_blob( + path: filename, + data: File.read(File.join('spec', 'fixtures', filename)), + binary: true + ) + ] + end + + it 'warns and does not call GitGuardian API' do + expect(::Gitlab::AppJsonLogger).to receive(:warn).with(class: described_class.name, + message: "Not processing data with filename '#{filename}' as it cannot be JSONified") + expect(::Gitlab::AppJsonLogger).to receive(:warn).with(class: described_class.name, + message: "Nothing to process") + + expect(client.execute(blobs)).to eq([]) + end + end end