diff --git a/ee/lib/gitlab/git_guardian/client.rb b/ee/lib/gitlab/git_guardian/client.rb new file mode 100644 index 0000000000000000000000000000000000000000..ae200d5e9f9b66e6a1dd1c32fc3ccfa37b84ea83 --- /dev/null +++ b/ee/lib/gitlab/git_guardian/client.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Gitlab + module GitGuardian + class Client + API_URL = "https://api.gitguardian.com/v1/multiscan" + TIMEOUT = 5.seconds + BATCH_SIZE = 20 + + Error = Class.new(StandardError) + ConfigError = Class.new(Error) + RequestError = Class.new(Error) + + attr_reader :api_token + + def initialize(api_token) + raise ConfigError, 'Please check your integration configuration.' unless api_token.present? + + @api_token = api_token + end + + def execute(blobs = []) + blobs.each_slice(BATCH_SIZE).with_object([]) do |blobs_batch, _| + params = blobs_batch.map do |blob| + { + filename: File.basename(blob.path), + document: blob.data + } + # GitGuardian limits filename field to 256 characters. + # That is why we only pass file name, which is sufficient for Git Guardian to perform its checks. + # See: https://api.gitguardian.com/docs#operation/multiple_scan + end + + response = perform_request(params) + blobs_paths = blobs_batch.map(&:path) + policy_breaks = process_response(response, blobs_paths) + + break policy_breaks if policy_breaks.present? + end + end + + private + + def perform_request(params) + options = { + headers: headers, + body: params.to_json, + timeout: TIMEOUT + } + + response = Gitlab::HTTP.post(API_URL, options) + + raise RequestError, "HTTP status code #{response.code}" unless response.success? + + response + end + + def headers + { + 'Content-Type': 'application/json', + Authorization: "Token #{api_token}" + } + end + + def process_response(response, file_paths) + parsed_response = Gitlab::Json.parse(response.body) + + parsed_response.map.with_index do |policy_break_for_file, blob_index| + next if policy_break_for_file['policy_break_count'] == 0 + + file_path = file_paths[blob_index] + + policy_break_for_file['policy_breaks'].map do |policy_break| + violation_match = policy_break['matches'].first + match_type = violation_match['type'] + match_value = violation_match['match'] + + "#{policy_break['policy']} policy violated at '#{file_path}' for #{match_type} '#{match_value}'" + end + end.compact.flatten + rescue JSON::ParserError + raise Error, 'invalid response format' + end + end + end +end diff --git a/ee/spec/lib/gitlab/git_guardian/client_spec.rb b/ee/spec/lib/gitlab/git_guardian/client_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8fa18b1f4abd218a9acaeda8fdf0b0e7a89b01e0 --- /dev/null +++ b/ee/spec/lib/gitlab/git_guardian/client_spec.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Gitlab::GitGuardian::Client, feature_category: :source_code_management do + include FakeBlobHelpers + + let_it_be(:project) { build(:project) } + let_it_be(:guardian_url) { 'https://api.gitguardian.com/v1/multiscan' } + let_it_be(:token) { 'test-token' } + + let(:file_paths) { [] } + + let(:stubbed_response) do + # see doc https://api.gitguardian.com/docs#operation/multiple_scan to know more about the response structure + file_paths.map do |_| + { + policy_break_count: 0, + policies: [ + "Filename", + "File extensions", + "Secrets detection" + ], + policy_breaks: [] + } + end.to_json + end + + let(:blobs) { file_paths.map { |path| fake_blob(path: path) } } + + let(:status) { 200 } + + let(:stub_guardian_request) do + stub_request(:post, guardian_url).to_return( + status: status, + headers: { 'Content-Type' => 'application/json', Authorization: "Token #{token}" }, + body: stubbed_response + ) + end + + subject(:client) { described_class.new(token) } + + context 'without credentials' do + let(:token) { '' } + let!(:guardian_api_request) { stub_guardian_request } + + it 'raises a config error' do + expect { client }.to raise_error(::Gitlab::GitGuardian::Client::ConfigError) + expect(guardian_api_request).not_to have_been_requested + end + end + + context 'with credential' do + let!(:guardian_api_request) { stub_guardian_request } + let(:client_response) { client.execute(blobs) } + + context 'with no blobs' do + let(:blobs) { [] } + + it 'returns an empty array' do + expect(client_response).to eq [] + expect(guardian_api_request).not_to have_been_requested + end + end + + context 'with blobs without policy breaks' do + let(:file_paths) { %w[README.md test_path/file.md test.yml] } + + it 'returns an empty array' do + expect(client_response).to eq [] + expect(guardian_api_request).to have_been_requested + end + end + + context 'with errors' do + let(:file_paths) { %w[test_path/file.md lib/.env] } + + context 'when an API respond with an error' do + # see doc https://api.gitguardian.com/docs#operation/multiple_scan to know more about possible error responses + let(:status) { 403 } + + let(:stubbed_response) { nil } + + it 'raises a request error' do + expect { client_response }.to raise_error(::Gitlab::GitGuardian::Client::RequestError) + expect(guardian_api_request).to have_been_requested + end + end + + context 'when API response is malformed' do + let(:stubbed_response) { '{fsde' } + + it 'raises a JSON error' do + expect { client_response }.to raise_error(::Gitlab::GitGuardian::Client::Error, 'invalid response format') + expect(guardian_api_request).to have_been_requested + end + end + end + + context 'with policy breaking blobs' do + let(:file_paths) { %w[test_path/file.md lib/.env] } + + let(:stubbed_response) do + # see doc https://api.gitguardian.com/docs#operation/multiple_scan to know more about the response structure + [ + { + policy_break_count: 0, + policies: [ + "Filename", + "File extensions", + "Secrets detection" + ], + policy_breaks: [] + }, + { + policy_break_count: 2, + policies: [ + "Filename", + "File extensions", + "Secrets detection" + ], + policy_breaks: [ + { + type: ".env", + policy: "Filenames", + matches: [ + { + type: "filename", + match: ".env" + } + ] + }, + { + type: "Basic Auth String", + policy: "Secrets detection", + validity: "cannot_check", + matches: [ + { + type: "username", + match: "jen_barber", + index_start: 52, + index_end: 61, + line_start: 2, + line_end: 2 + } + ] + } + ] + } + ].to_json + end + + it 'returns appropriate error messages' do + expect(client_response).to eq [ + "Filenames policy violated at 'lib/.env' for filename '.env'", + "Secrets detection policy violated at 'lib/.env' for username 'jen_barber'" + ] + expect(guardian_api_request).to have_been_requested + end + end + end +end