diff --git a/ee/lib/gitlab/checks/integrations/git_guardian_check.rb b/ee/lib/gitlab/checks/integrations/git_guardian_check.rb index ff20ff05023a6c721d14d65c08c2b3fba7e6fe4d..3395f3419d919d3ca7e5fc911da627af81d2689d 100644 --- a/ee/lib/gitlab/checks/integrations/git_guardian_check.rb +++ b/ee/lib/gitlab/checks/integrations/git_guardian_check.rb @@ -9,6 +9,20 @@ class GitGuardianCheck < ::Gitlab::Checks::BaseBulkChecker LOG_MESSAGE = 'Starting GitGuardian scan...' SPECIAL_COMMIT_FLAG = /\[skip secret detection\]/i + REMEDIATION_MESSAGE = <<~MESSAGE + How to remediate: + + The violation was detected before the commit was pushed: + + 1. Fix the violation in the detected files. + 2. Commit and try pushing again. + + [To apply with caution] If you want to bypass the secrets check: + + 1. Add [skip secret detection] flag to the commit message. + 2. Commit and try pushing again. + MESSAGE + def initialize(integration_check) @changes_access = integration_check.changes_access end @@ -56,7 +70,7 @@ def revisions def format_git_guardian_response(response) return unless response.present? - message = response.join(",\n") + message = response.join("\n") << REMEDIATION_MESSAGE raise ::Gitlab::GitAccess::ForbiddenError, message end diff --git a/ee/lib/gitlab/git_guardian/client.rb b/ee/lib/gitlab/git_guardian/client.rb index 29b8a86d861664a017ffd0be4e102a7be97d7c17..c2fe63c49cf465595acfbfe43c8c7fddd55095be 100644 --- a/ee/lib/gitlab/git_guardian/client.rb +++ b/ee/lib/gitlab/git_guardian/client.rb @@ -3,10 +3,13 @@ module Gitlab module GitGuardian class Client + include ActionView::Helpers::TextHelper + API_URL = "https://api.gitguardian.com/v1/multiscan" TIMEOUT = 5.seconds BATCH_SIZE = 20 FILENAME_LIMIT = 256 + NA = 'N/A' Error = Class.new(StandardError) ConfigError = Class.new(Error) @@ -50,8 +53,7 @@ def execute_batched_request(blobs_batch) end response = perform_request(params) - blobs_paths = blobs_batch.map(&:path) - policy_breaks = process_response(response, blobs_paths) + policy_breaks = process_response(response, blobs_batch) policy_breaks.presence end @@ -89,26 +91,86 @@ def headers } end - def process_response(response, file_paths) + def process_response(response, blobs) parsed_response = Gitlab::Json.parse(response.body) - parsed_response.map.with_index do |policy_break_for_file, blob_index| + parsed_response.filter_map.with_index do |policy_break_for_file, blob_index| next if policy_break_for_file['policy_break_count'] == 0 - file_path = file_paths[blob_index] + blob = blobs[blob_index] - policy_break_for_file['policy_breaks'].map do |policy_break| - violation_match = policy_break['matches'].first - match_type = violation_match['type'] - match_value = violation_match['match'] - file_path_substring = file_path.present? ? " at '#{file_path}'" : '' + next unless blob - "#{policy_break['policy']} policy violated#{file_path_substring} for #{match_type} '#{match_value}'" - end - end.compact.flatten + formatted_errors_for_file(policy_break_for_file['policy_breaks'], blob) + end rescue JSON::ParserError raise Error, 'invalid response format' end + + # Format the message with indentation to print it like: + # + # remote: GitLab: .env: 1 incident detected: + # remote: + # remote: >> Filenames: .env + # remote: Validity: N/A + # remote: Known by GitGuardian: N/A + # remote: Incident URL: N/A + # remote: Violation: filename `.env` detected + def formatted_errors_for_file(policy_breaks, blob) + result = "#{blob.path}: #{pluralize(policy_breaks.size, 'incident')} detected:\n\n" + + error_output_for_file(policy_breaks, blob).each do |messages| + result << " >> " + result << messages.join("\n ") + result << "\n\n" + end + + result + end + + def error_output_for_file(policy_breaks, blob) + policy_breaks.map do |policy_break| + result = [ + "#{policy_break['policy']}: #{policy_break['type']}", + "Validity: #{policy_break['validity']&.humanize || NA}", + "Known by GitGuardian: #{Gitlab::Utils.boolean_to_yes_no(policy_break['known_secret'])}", + "Incident URL: #{policy_break['incident_url'].presence || NA}" + ] + + blob_lines = blob.lines + + policy_break['matches'].each do |violation_match| + type, match = violation_match.values_at('type', 'match') + result << "Violation: #{type} `#{match}` detected" + + next unless violation_match['line_start'].present? + + line_start = violation_match['line_start'] + line_end = violation_match['line_end'] || line_start + + (line_start..line_end).each do |line_number| + # line_start field index origin is 1 + line = blob_lines[line_number - 1] + + next unless line && line.index(match) + + # Prefix the printed line with the line number and + # print the match type underneath the matched value + # + # Violation: password `password` detected + # 201 | url = 'http://user:password123456@hi.gitlab.com/hello.json' + # |__password__| + line_number_prefix = "#{line_number} | " + result << "#{line_number_prefix}#{line}" + + index_start = line_number_prefix.size + line.index(match) + result << "#{' ' * index_start}|#{type.center(match.size - 2, '_')}|" + end + end + + result + end + end end end end diff --git a/ee/spec/lib/gitlab/checks/integrations/git_guardian_check_spec.rb b/ee/spec/lib/gitlab/checks/integrations/git_guardian_check_spec.rb index 1ef35aea134d09c50c9e306526507cc80aa91aee..1c493647989d9fd2e249a18e2e4ff8d06afccfce 100644 --- a/ee/spec/lib/gitlab/checks/integrations/git_guardian_check_spec.rb +++ b/ee/spec/lib/gitlab/checks/integrations/git_guardian_check_spec.rb @@ -70,8 +70,15 @@ context 'when policies were broken' do let(:policy_breaks) do [ - "Filenames policy violated at 'lib/.env' for filename '.env'", - "Secrets detection policy violated at 'lib/.env' for username 'jen_barber'" + <<~POLICY_BREAK + .env: 2 incidents detected: + + >> Filenames: .env + Validity: N/A + Known by GitGuardian: No + Incident URL: N/A + Violation: filename `.env` detected + POLICY_BREAK ] end @@ -89,7 +96,8 @@ end expect { git_guardian_check.validate! } - .to raise_error(::Gitlab::GitAccess::ForbiddenError, policy_breaks_message) + .to raise_error(::Gitlab::GitAccess::ForbiddenError, + policy_breaks_message + described_class::REMEDIATION_MESSAGE) end context 'when a commit contains a special flag' do diff --git a/ee/spec/lib/gitlab/git_guardian/client_spec.rb b/ee/spec/lib/gitlab/git_guardian/client_spec.rb index 85144c3bdd737407e522202adda98c01cd270e37..a954c4c07059329766b196f22f7444b7491b4ffa 100644 --- a/ee/spec/lib/gitlab/git_guardian/client_spec.rb +++ b/ee/spec/lib/gitlab/git_guardian/client_spec.rb @@ -126,15 +126,33 @@ end context 'with policy breaking blobs' do - let(:file_paths) { %w[test_path/file.md lib/.env] } + let(:file_paths) { %w[file.md .env] } + + let(:blobs) do + document_with_policy_breaks = <<~DOCUMENT + import urllib.request + url = 'http://simple_username:simple_password@hi@gitlab.com/hello.json' + response = urllib.request.urlopen(url) + consume(response.read()) + DOCUMENT + + blob_with_policy_breaks = fake_blob( + path: ".env", + data: document_with_policy_breaks + ) - let(:request_body) do [ - { document: 'foo', filename: 'file.md' }, - { document: 'foo', filename: '.env' } + fake_blob(path: "file.md"), + blob_with_policy_breaks ] end + let(:request_body) do + blobs.map do |blob| + { document: blob.data, filename: blob.name } + end + end + let(:stubbed_response) do # see doc https://api.gitguardian.com/docs#operation/multiple_scan to know more about the response structure [ @@ -169,14 +187,32 @@ type: "Basic Auth String", policy: "Secrets detection", validity: "cannot_check", + known_secret: true, + incident_url: 'https://incident.example.com', matches: [ { type: "username", - match: "jen_barber", - index_start: 52, + match: "simple_username", + index_start: 37, + index_end: 45, + line_start: 2, + line_end: 2 + }, + { + type: "password", + match: "simple_password", + index_start: 46, index_end: 61, line_start: 2, line_end: 2 + }, + { + type: "host", + match: "hi@gitlab.com", + index_start: 62, + index_end: 70, + line_start: 2, + line_end: 2 } ] } @@ -186,10 +222,33 @@ end it 'returns appropriate error messages' do - expect(client_response).to eq [ - "Filenames policy violated at 'lib/.env' for filename '.env'", - "Secrets detection policy violated at 'lib/.env' for username 'jen_barber'" - ] + expected_message = <<~POLICY_BREAKS + .env: 2 incidents detected: + + >> Filenames: .env + Validity: N/A + Known by GitGuardian: No + Incident URL: N/A + Violation: filename `.env` detected + + >> Secrets detection: Basic Auth String + Validity: Cannot check + Known by GitGuardian: Yes + Incident URL: https://incident.example.com + Violation: username `simple_username` detected + 2 | url = 'http://simple_username:simple_password@hi@gitlab.com/hello.json' + |__username___| + Violation: password `simple_password` detected + 2 | url = 'http://simple_username:simple_password@hi@gitlab.com/hello.json' + |__password___| + Violation: host `hi@gitlab.com` detected + 2 | url = 'http://simple_username:simple_password@hi@gitlab.com/hello.json' + |___host____| + + POLICY_BREAKS + + expect(client_response).to eq [expected_message] + expect(guardian_api_request).to have_been_requested end end @@ -198,8 +257,15 @@ let(:blobs) { Array.new(46) { |i| fake_blob(path: "fake_path#{i}.txt") } } let(:policies_breaks_message) do [ - "Filenames policy violated at 'lib/.env' for filename '.env'", - "Secrets detection policy violated at 'lib/.env' for username 'jen_barber'" + <<~POLICY_BREAK + .env: 2 incidents detected: + + >> Filenames: .env + Validity: N/A + Known by GitGuardian: No + Incident URL: N/A + Violation: filename `.env` detected + POLICY_BREAK ] end @@ -214,10 +280,7 @@ end it 'returns appropriate error messages' do - expect(client_response).to eq [ - "Filenames policy violated at 'lib/.env' for filename '.env'", - "Secrets detection policy violated at 'lib/.env' for username 'jen_barber'" - ] + expect(client_response).to eq policies_breaks_message expect(guardian_api_request).to have_been_requested.times(3) end end