Skip to content
代码片段 群组 项目
未验证 提交 0bb8dbf2 编辑于 作者: Ahmed Hemdan's avatar Ahmed Hemdan 提交者: GitLab
浏览文件

Update gitlab-secret_detection gem

This commit update `gitlab-secret_detection` gem with some minor changes
to work nicely for the changes introduced in other commits in this merge
request.
上级 66f975e2
No related branches found
No related tags found
无相关合并请求
显示
849 个添加156 个删除
......@@ -1059,6 +1059,7 @@ Gitlab/NamespacedClass:
- 'ee/lib/gitlab/proxy.rb'
- 'ee/lib/gitlab/return_to_location.rb'
- 'ee/lib/gitlab/update_mirror_service_json_logger.rb'
- 'ee/lib/gitlab/secret_detection_logger.rb'
- 'ee/spec/support/elastic_query_name_inspector.rb'
- 'ee/spec/support/test_license.rb'
- 'lib/carrier_wave_string_file.rb'
......
......@@ -807,6 +807,7 @@ RSpec/ContextWording:
- 'ee/spec/support/shared_contexts/lib/gitlab/insights/reducers/reducers_shared_contexts.rb'
- 'ee/spec/support/shared_contexts/lib/gitlab/insights/serializers/serializers_shared_context.rb'
- 'ee/spec/support/shared_contexts/push_rules_checks_shared_context.rb'
- 'ee/spec/support/shared_contexts/secrets_check_shared_contexts.rb'
- 'ee/spec/support/shared_contexts/requests/api/members_shared_contexts.rb'
- 'ee/spec/support/shared_contexts/status_page/status_page_enabled_context.rb'
- 'ee/spec/support/shared_contexts/status_page/status_page_list_objects.rb'
......
......@@ -5,9 +5,31 @@ module Gitlab
module Checks
module PushRules
class SecretsCheck < ::Gitlab::Checks::BaseBulkChecker
BLOB_BYTES_LIMIT = 1.megabyte # Limit is 1MiB to start with.
ERROR_MESSAGES = {
failed_to_scan_regex_error: "\n-- Failed to scan blob(id: %{blob_id}) due to regex error.\n",
blob_timed_out_error: "\n-- Scanning blob(id: %{blob_id}) timed out.\n",
scan_timeout_error: 'Secret detection scan timed out.',
scan_initialization_error: 'Secret detection scan failed to initialize.',
invalid_input_error: 'Secret detection scan failed due to invalid input.',
invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.'
}.freeze
LOG_MESSAGES = {
secrets_check: 'Detecting secrets...',
secrets_not_found: 'Secret detection scan completed with no findings.',
found_secrets: 'Secret detection scan completed with one or more findings.',
found_secrets_post_message: "\n\nPlease remove the identified secrets in your commits and try again.",
found_secrets_with_errors: 'Secret detection scan completed with one or more findings ' \
'but some errors occured during the scan.',
finding_details_message: <<~MESSAGE
\n\nBlob id: %{blob_id}
-- Line: %{line_number}
-- Type: %{type}
-- Description: %{description}\n
MESSAGE
}.freeze
LOG_MESSAGE = "Detecting secrets..."
BLOB_BYTES_LIMIT = 1.megabyte # Limit is 1MiB to start with.
def validate!
# Return early and not perform the check if:
......@@ -22,7 +44,7 @@ def validate!
return unless push_rule && project.licensed_feature_available?(:pre_receive_secret_detection)
logger.log_timed(LOG_MESSAGE) do
logger.log_timed(LOG_MESSAGES[:secrets_check]) do
# List all blobs via `ListAllBlobs()` based on the existence of a
# quarantine directory. If no directory exists, we use `ListBlobs()` instead.
blobs =
......@@ -61,11 +83,28 @@ def validate!
# Filter out larger than BLOB_BYTES_LIMIT blobs and binary blobs.
blobs.reject! { |blob| blob.size > BLOB_BYTES_LIMIT || blob.binary }
# Pass blobs to gem for scanning.
response = ::Gitlab::SecretDetection::Scan
.new(logger: secret_detection_logger)
.secrets_scan(blobs, timeout: logger.time_left)
# Handle the response depending on the status returned.
format_response(response)
# TODO: Perhaps have a separate message for each and better logging?
rescue ::Gitlab::SecretDetection::Scan::RulesetParseError,
::Gitlab::SecretDetection::Scan::RulesetCompilationError => _
secret_detection_logger.error(message: ERROR_MESSAGES[:scan_initialization_error])
end
end
private
def secret_detection_logger
@secret_detection_logger ||= ::Gitlab::SecretDetectionLogger.build
end
def ignore_alternate_directories?
git_env = ::Gitlab::Git::HookEnv.all(project.repository.gl_repository)
git_env['GIT_OBJECT_DIRECTORY_RELATIVE'].present?
......@@ -86,6 +125,63 @@ def filter_existing(blobs)
# Remove blobs that already exist.
blobs.reject { |blob| map_blob_id_to_existence[blob.id] }
end
def format_response(response)
case response.status
when ::Gitlab::SecretDetection::Status::NOT_FOUND
# No secrets found, we log and skip the check.
secret_detection_logger.info(message: LOG_MESSAGES[:secrets_not_found])
when ::Gitlab::SecretDetection::Status::FOUND
# One or more secrets found, generate message with findings and fail check.
message = LOG_MESSAGES[:found_secrets]
response.results.each do |finding|
message += format(LOG_MESSAGES[:finding_details_message], finding.to_h)
end
message += LOG_MESSAGES[:found_secrets_post_message]
secret_detection_logger.info(message: LOG_MESSAGES[:found_secrets])
raise ::Gitlab::GitAccess::ForbiddenError, message
when ::Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS
# One or more secrets found, but with scan errors, so we
# generate a message with findings and errors, and fail the check.
message = LOG_MESSAGES[:found_secrets_with_errors]
response.results.each do |finding|
case finding.status
when ::Gitlab::SecretDetection::Status::FOUND
message += format(LOG_MESSAGES[:finding_details_message], finding.to_h)
when ::Gitlab::SecretDetection::Status::SCAN_ERROR
message += format(ERROR_MESSAGES[:failed_to_scan_regex_error], finding.to_h)
when ::Gitlab::SecretDetection::Status::BLOB_TIMEOUT
message += format(ERROR_MESSAGES[:blob_timed_out_error], finding.to_h)
end
end
message += LOG_MESSAGES[:found_secrets_post_message]
secret_detection_logger.info(message: LOG_MESSAGES[:found_secrets_with_errors])
raise ::Gitlab::GitAccess::ForbiddenError, message
when ::Gitlab::SecretDetection::Status::SCAN_TIMEOUT
# Entire scan timed out, we log and skip the check for now.
secret_detection_logger.error(message: ERROR_MESSAGES[:scan_timeout_error])
# TODO: Decide if we should also fail the check here.
# raise ::Gitlab::GitAccess::ForbiddenError, ERROR_MESSAGES[:scan_timeout_error]
when ::Gitlab::SecretDetection::Status::INPUT_ERROR
# Scan failed to invalid input. We skip the check because an input error
# could be due to not having `blobs` being empty (i.e. no new blobs to scan).
secret_detection_logger.error(message: ERROR_MESSAGES[:invalid_input_error])
else
# Invalid status returned by the scanning service/gem, we don't
# know how to handle that, so nothing happens and we skip the check.
secret_detection_logger.error(message: ERROR_MESSAGES[:invalid_scan_status_code_error])
end
end
end
end
end
......
# frozen_string_literal: true
module Gitlab
class SecretDetectionLogger < Gitlab::JsonLogger
def self.file_name_noext
'secret_detection'
end
end
end
......@@ -3,66 +3,9 @@
require 'spec_helper'
RSpec.describe EE::Gitlab::Checks::PushRules::SecretsCheck, feature_category: :secret_detection do
let_it_be(:user) { create(:user) }
let_it_be(:push_rule) { create(:push_rule) }
# Project is created with a custom repository, so
# we create a README to have a blob committed already.
let_it_be(:project) do
create(
:project,
:custom_repo,
files: { 'README' => 'Documentation goes here.' },
push_rule: push_rule
)
end
include_context 'secrets check context'
let_it_be(:repository) { project.repository }
# Set revisions as follows:
# 1. oldrev to commit created with the project above.
# 2. newrev to commit created in before_all below.
let(:oldrev) { '5e08a9ef8e7e257cbc727d6786bbc33ad7662625' }
let(:newrev) { 'fe29d93da4843da433e62711ace82db601eb4f8f' }
let(:changes) do
[
{
oldrev: oldrev,
newrev: newrev,
ref: 'refs/heads/master'
}
]
end
let(:protocol) { 'ssh' }
let(:timeout) { Gitlab::GitAccess::INTERNAL_TIMEOUT }
let(:logger) { Gitlab::Checks::TimedLogger.new(timeout: timeout) }
let(:user_access) { Gitlab::UserAccess.new(user, container: project) }
let(:changes_access) do
Gitlab::Checks::ChangesAccess.new(
changes,
project: project,
user_access: user_access,
protocol: protocol,
logger: logger
)
end
subject { described_class.new(changes_access) }
before_all do
project.add_developer(user)
# Create a new commit to be used as the new revision in changes passed to secrets check.
repository.commit_files(
user,
branch_name: 'add-dotenv-file',
message: 'Add .env file',
actions: [
{ action: :create, file_path: '.env', content: "SECRET=glpat-JUST20LETTERSANDNUMB" } # gitleaks:allow
]
)
end
let_it_be(:push_rule) { create(:push_rule) }
describe '#validate!' do
it_behaves_like 'check ignored when push rule unlicensed'
......@@ -100,7 +43,13 @@
stub_licensed_features(pre_receive_secret_detection: true)
end
it_behaves_like 'list and filter blobs'
it_behaves_like 'scan passed'
it_behaves_like 'scan detected secrets'
it_behaves_like 'scan detected secrets but some errors occured'
it_behaves_like 'scan timed out'
it_behaves_like 'scan failed to initialize'
it_behaves_like 'scan failed with invalid input'
it_behaves_like 'scan skipped due to invalid status'
end
end
......@@ -120,7 +69,13 @@
stub_licensed_features(pre_receive_secret_detection: true)
end
it_behaves_like 'list and filter blobs'
it_behaves_like 'scan passed'
it_behaves_like 'scan detected secrets'
it_behaves_like 'scan detected secrets but some errors occured'
it_behaves_like 'scan timed out'
it_behaves_like 'scan failed to initialize'
it_behaves_like 'scan failed with invalid input'
it_behaves_like 'scan skipped due to invalid status'
end
context 'when feature flag is disabled' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SecretDetectionLogger, feature_category: :secret_detection do
subject { described_class.new('/dev/null') }
it_behaves_like 'a json logger', {}
describe '#file_name_noext' do
it 'returns log file name without extension' do
expect(described_class.file_name_noext).to eq('secret_detection')
end
end
end
# frozen_string_literal: true
RSpec.shared_context 'secrets check context' do
include_context 'secret detection error and log messages context'
let_it_be(:user) { create(:user) }
# Project is created with an empty repository, so
# we create an initial commit to have a blob committed.
let_it_be(:project) { create(:project, :empty_repo, push_rule: push_rule) }
let_it_be(:repository) { project.repository }
let_it_be(:initial_commit) do
# An initial commit to use as the oldrev in `changes` object below.
repository.commit_files(
user,
branch_name: 'master',
message: 'Initial commit',
actions: [
{ action: :create, file_path: 'README', content: 'Documentation goes here' }
]
)
end
# Create a default `new_commit` for use cases in which we don't care much about blobs.
let_it_be(:new_commit) { create_commit('.env' => 'BASE_URL=https://foo.bar') }
# Define blob references as follows:
# 1. old reference is used as the blob id for the initial commit.
# 2. new reference is used as the blob id for commits created in before_all statements elsewhere.
let(:old_blob_reference) { 'f3ac5ae18d057a11d856951f27b9b5b8043cf1ec' }
let(:new_blob_reference) { 'fe29d93da4843da433e62711ace82db601eb4f8f' }
let(:changes) do
[
{
oldrev: initial_commit,
newrev: new_commit,
ref: 'refs/heads/master'
}
]
end
# Set up the `changes_access` object to use below.
let(:protocol) { 'ssh' }
let(:timeout) { Gitlab::GitAccess::INTERNAL_TIMEOUT }
let(:logger) { Gitlab::Checks::TimedLogger.new(timeout: timeout) }
let(:user_access) { Gitlab::UserAccess.new(user, container: project) }
let(:changes_access) do
Gitlab::Checks::ChangesAccess.new(
changes,
project: project,
user_access: user_access,
protocol: protocol,
logger: logger
)
end
# We cannot really get the same Gitlab::Git::Blob objects even if we call `list_all_blobs` or `list_blobs`
# directly in any of the specs (which is also not a very good idea) as the object ids will always
# be different, so we expect the attributes of the returned object to match.
let(:old_blob) { have_attributes(class: Gitlab::Git::Blob, id: old_blob_reference, size: 23) }
let(:new_blob) { have_attributes(class: Gitlab::Git::Blob, id: new_blob_reference, size: 33) }
# Used for mocking calls to logger.
let(:secret_detection_logger) { instance_double(::Gitlab::SecretDetectionLogger) }
before do
allow(::Gitlab::SecretDetectionLogger).to receive(:build).and_return(secret_detection_logger)
end
before_all do
project.add_developer(user)
end
subject(:secrets_check) { described_class.new(changes_access) }
end
RSpec.shared_context 'secret detection error and log messages context' do
# Error messsages
let(:failed_to_scan_regex_error) do
format(
"\n-- Failed to scan blob(id: %{blob_id}) due to regex error.\n",
{ blob_id: failed_to_scan_blob_reference }
)
end
let(:blob_timed_out_error) do
format(
"\n-- Scanning blob(id: %{blob_id}) timed out.\n",
{ blob_id: timed_out_blob_reference }
)
end
let(:error_messages) do
{
scan_timeout_error: 'Secret detection scan timed out.',
scan_initialization_error: 'Secret detection scan failed to initialize.',
invalid_input_error: 'Secret detection scan failed due to invalid input.',
invalid_scan_status_code_error: 'Invalid secret detection scan status, check passed.'
}
end
# Log messages
let(:secrets_not_found) { 'Secret detection scan completed with no findings.' }
let(:found_secrets) { 'Secret detection scan completed with one or more findings.' }
let(:found_secrets_post_message) { "\n\nPlease remove the identified secrets in your commits and try again." }
let(:found_secrets_with_errors) do
'Secret detection scan completed with one or more findings but some errors occured during the scan.'
end
let(:found_secret_line_number) { '1' }
let(:found_secret_type) { 'gitlab_personal_access_token' }
let(:found_secret_description) { 'GitLab Personal Access Token' }
let(:found_secrets_message) do
message = <<~MESSAGE
\nBlob id: %{found_secret_blob_id}
-- Line: %{found_secret_line_number}
-- Type: %{found_secret_type}
-- Description: %{found_secret_description}\n
MESSAGE
format(
message,
{
found_secret_blob_id: new_blob_reference,
found_secret_line_number: found_secret_line_number,
found_secret_type: found_secret_type,
found_secret_description: found_secret_description
}
)
end
end
RSpec.shared_context 'quarantine directory exists' do
let(:git_env) { { 'GIT_OBJECT_DIRECTORY_RELATIVE' => 'objects' } }
let(:gitaly_commit_client) { instance_double(Gitlab::GitalyClient::CommitService) }
let(:object_existence_map) do
{
old_blob_reference.to_s => true,
new_blob_reference.to_s => false
}
end
before do
allow(Gitlab::Git::HookEnv).to receive(:all).with(repository.gl_repository).and_return(git_env)
# Since all blobs are committed to the repository, we mock the gitaly commit
# client and `object_existence_map` in such way only some of them are considered new.
allow(repository).to receive(:gitaly_commit_client).and_return(gitaly_commit_client)
allow(gitaly_commit_client).to receive(:object_existence_map).and_return(object_existence_map)
end
end
def create_commit(blobs)
commit = repository.commit_files(
user,
branch_name: 'a-new-branch',
message: 'Add a file',
actions: blobs.map do |path, content|
{
action: :create,
file_path: path,
content: content
}
end
)
# `list_blobs` only returns unreferenced blobs because it is used for hooks, so we have
# to delete the branch since Gitaly does not allow us to create loose objects via the RPC.
repository.delete_branch('a-new-branch')
commit
end
# frozen_string_literal: true
RSpec.shared_examples 'list and filter blobs' do
# We cannot really get the same Gitlab::Git::Blob objects even if we call `list_all_blobs`
# or `list_blobs` directly in the spec (which is perhaps also not a good idea) as the object
# ids will always be different, so we expect the blobs to be an array with two objects of that kind.
let(:old_blob) do
have_attributes(
class: Gitlab::Git::Blob,
id: oldrev,
size: 24
)
RSpec.shared_examples 'scan passed' do
include_context 'secrets check context'
let(:passed_scan_response) { ::Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::NOT_FOUND) }
let(:new_blob_reference) { 'da66bef46dbf0ad7fdcbeec97c9eaa24c2846dda' }
let(:new_blob) { have_attributes(class: Gitlab::Git::Blob, id: new_blob_reference, size: 24) }
context 'with quarantine directory' do
include_context 'quarantine directory exists'
it 'lists all blobs of a repository' do
expect(repository).to receive(:list_all_blobs)
.with(
bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1,
dynamic_timeout: kind_of(Float),
ignore_alternate_object_directories: true
)
.once
.and_return([old_blob, new_blob])
.and_call_original
expect(secret_detection_logger).to receive(:info)
.once
.with(message: secrets_not_found)
expect { subject.validate! }.not_to raise_error
end
it 'filters existing blobs out' do
expect_next_instance_of(described_class) do |instance|
# old blob is expected to be filtered out
expect(instance).to receive(:filter_existing)
.with(
array_including(old_blob, new_blob)
)
.once
.and_return(new_blob)
.and_call_original
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: secrets_not_found)
expect { subject.validate! }.not_to raise_error
end
end
let(:new_blob) do
have_attributes(
class: Gitlab::Git::Blob,
id: newrev,
size: 33
context 'with no quarantine directory' do
it 'list new blobs' do
expect(repository).to receive(:list_blobs)
.with(
['--not', '--all', '--not'] + changes.pluck(:newrev),
bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1,
dynamic_timeout: kind_of(Float)
)
.once
.and_return(new_blob)
.and_call_original
expect(secret_detection_logger).to receive(:info)
.once
.with(message: secrets_not_found)
expect { subject.validate! }.not_to raise_error
end
end
it 'scans blobs' do
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
[new_blob],
timeout: kind_of(Float)
)
.once
.and_return(passed_scan_response)
.and_call_original
end
expect_next_instance_of(described_class) do |instance|
expect(instance).to receive(:format_response)
.with(passed_scan_response)
.once
.and_call_original
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: secrets_not_found)
expect { subject.validate! }.not_to raise_error
end
end
RSpec.shared_examples 'scan detected secrets' do
include_context 'secrets check context'
let(:successful_scan_response) do
::Gitlab::SecretDetection::Response.new(
Gitlab::SecretDetection::Status::FOUND,
[
Gitlab::SecretDetection::Finding.new(
new_blob_reference,
Gitlab::SecretDetection::Status::FOUND,
1,
"gitlab_personal_access_token",
"GitLab Personal Access Token"
)
]
)
end
context 'when quarantine directory exists' do
let(:git_env) { { 'GIT_OBJECT_DIRECTORY_RELATIVE' => 'objects' } }
let(:gitaly_commit_client) { instance_double(Gitlab::GitalyClient::CommitService) }
# The new commit must have a secret, so create a commit with one.
let_it_be(:new_commit) { create_commit('.env' => 'SECRET=glpat-JUST20LETTERSANDNUMB') } # gitleaks:allow
before do
allow(Gitlab::Git::HookEnv).to receive(:all).with(repository.gl_repository).and_return(git_env)
context 'with quarantine directory' do
include_context 'quarantine directory exists'
# Since both blobs are committed to the repository, we mock the gitaly commit
# client in such way that only the first is considered to exist in the repository.
allow(repository).to receive(:gitaly_commit_client).and_return(gitaly_commit_client)
allow(gitaly_commit_client).to receive(:object_existence_map).and_return(
{
oldrev.to_s => true,
newrev.to_s => false
}
)
it 'lists all blobs of a repository' do
expect(repository).to receive(:list_all_blobs)
.with(
bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1,
dynamic_timeout: kind_of(Float),
ignore_alternate_object_directories: true
)
.once
.and_return([old_blob, new_blob])
.and_call_original
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
it 'filters existing blobs out' do
expect_next_instance_of(described_class) do |instance|
# old blob is expected to be filtered out
expect(instance).to receive(:filter_existing)
.with(
array_including(old_blob, new_blob)
)
.once
.and_return(new_blob)
.and_call_original
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
end
context 'with no quarantine directory' do
it 'list new blobs' do
expect(repository).to receive(:list_blobs)
.with(
['--not', '--all', '--not'] + changes.pluck(:newrev),
bytes_limit: EE::Gitlab::Checks::PushRules::SecretsCheck::BLOB_BYTES_LIMIT + 1,
dynamic_timeout: kind_of(Float)
)
.once
.and_return(new_blob)
.and_call_original
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
end
it 'scans blobs' do
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
[new_blob],
timeout: kind_of(Float)
)
.once
.and_return(successful_scan_response)
.and_call_original
end
expect_next_instance_of(described_class) do |instance|
expect(instance).to receive(:format_response)
.with(successful_scan_response)
.once
.and_call_original
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets)
expect { subject.validate! }.to raise_error do |error|
expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError)
expect(error.message).to include(found_secrets)
expect(error.message).to include(found_secrets_message)
expect(error.message).to include(found_secrets_post_message)
end
end
end
RSpec.shared_examples 'scan detected secrets but some errors occured' do
include_context 'secrets check context'
let(:successful_scan_with_errors_response) do
::Gitlab::SecretDetection::Response.new(
Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS,
[
Gitlab::SecretDetection::Finding.new(
new_blob_reference,
Gitlab::SecretDetection::Status::FOUND,
1,
"gitlab_personal_access_token",
"GitLab Personal Access Token"
),
Gitlab::SecretDetection::Finding.new(
timed_out_blob_reference,
Gitlab::SecretDetection::Status::BLOB_TIMEOUT
),
Gitlab::SecretDetection::Finding.new(
failed_to_scan_blob_reference,
Gitlab::SecretDetection::Status::SCAN_ERROR
)
]
)
end
let_it_be(:new_commit) { create_commit('.env' => 'SECRET=glpat-JUST20LETTERSANDNUMB') } # gitleaks:allow
let_it_be(:timed_out_commit) { create_commit('.test.env' => 'TOKEN=glpat-JUST20LETTERSANDNUMB') } # gitleaks:allow
let_it_be(:failed_to_scan_commit) { create_commit('.dev.env' => 'GLPAT=glpat-JUST20LETTERSANDNUMB') } # gitleaks:allow
let(:changes) do
[
{ oldrev: initial_commit, newrev: new_commit, ref: 'refs/heads/master' },
{ oldrev: initial_commit, newrev: timed_out_commit, ref: 'refs/heads/master' },
{ oldrev: initial_commit, newrev: failed_to_scan_commit, ref: 'refs/heads/master' }
]
end
let(:timed_out_blob_reference) { 'eaf3c09526f50b5e35a096ef70cca033f9974653' }
let(:failed_to_scan_blob_reference) { '4fbec77313fd240d00fc37e522d0274b8fb54bd1' }
let(:timed_out_blob) { have_attributes(class: Gitlab::Git::Blob, id: timed_out_blob_reference, size: 32) }
let(:failed_to_scan_blob) { have_attributes(class: Gitlab::Git::Blob, id: failed_to_scan_blob_reference, size: 32) }
# Used for the quarantine directory context below.
let(:object_existence_map) do
{
old_blob_reference.to_s => true,
new_blob_reference.to_s => false,
timed_out_blob_reference.to_s => false,
failed_to_scan_blob_reference.to_s => false
}
end
context 'with quarantine directory' do
include_context 'quarantine directory exists'
it 'lists all blobs of a repository' do
expect(repository).to receive(:list_all_blobs)
......@@ -47,11 +275,24 @@
)
.once
.and_return(
[old_blob, new_blob]
[old_blob, new_blob, timed_out_blob, failed_to_scan_blob]
)
.and_call_original
expect(subject.validate!).to be_truthy
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
array_including(new_blob, timed_out_blob, failed_to_scan_blob),
timeout: kind_of(Float)
)
.and_return(successful_scan_with_errors_response)
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets_with_errors)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
it 'filters existing blobs out' do
......@@ -59,18 +300,33 @@
# old blob is expected to be filtered out
expect(instance).to receive(:filter_existing)
.with(
[old_blob, new_blob]
array_including(old_blob, new_blob, timed_out_blob, failed_to_scan_blob)
)
.once
.and_return(new_blob)
.and_return(
array_including(new_blob, timed_out_blob, failed_to_scan_blob)
)
.and_call_original
end
expect(subject.validate!).to be_truthy
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
array_including(new_blob, timed_out_blob, failed_to_scan_blob),
timeout: kind_of(Float)
)
.and_return(successful_scan_with_errors_response)
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets_with_errors)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
end
context 'when quarantine directory does not exist' do
context 'with no quarantine directory' do
it 'list new blobs' do
expect(repository).to receive(:list_blobs)
.with(
......@@ -79,10 +335,151 @@
dynamic_timeout: kind_of(Float)
)
.once
.and_return(new_blob)
.and_return(
array_including(new_blob, old_blob, timed_out_blob, failed_to_scan_blob)
)
.and_call_original
expect(subject.validate!).to be_truthy
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
array_including(new_blob, timed_out_blob, failed_to_scan_blob),
timeout: kind_of(Float)
)
.and_return(successful_scan_with_errors_response)
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets_with_errors)
expect { subject.validate! }.to raise_error(::Gitlab::GitAccess::ForbiddenError)
end
end
it 'scans blobs' do
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.with(
array_including(new_blob, timed_out_blob, failed_to_scan_blob),
timeout: kind_of(Float)
)
.once
.and_return(successful_scan_with_errors_response)
end
expect_next_instance_of(described_class) do |instance|
expect(instance).to receive(:format_response)
.with(successful_scan_with_errors_response)
.once
.and_call_original
end
expect(secret_detection_logger).to receive(:info)
.once
.with(message: found_secrets_with_errors)
expect { subject.validate! }.to raise_error do |error|
expect(error).to be_a(::Gitlab::GitAccess::ForbiddenError)
expect(error.message).to include(found_secrets_with_errors)
expect(error.message).to include(found_secrets_message)
expect(error.message).to include(blob_timed_out_error)
expect(error.message).to include(failed_to_scan_regex_error)
expect(error.message).to include(found_secrets_post_message)
end
end
end
RSpec.shared_examples 'scan timed out' do
include_context 'secrets check context'
let(:scan_timed_out_scan_response) do
::Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::SCAN_TIMEOUT)
end
it 'logs the error and passes the check' do
# Mock the response to return a scan timed out status.
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.and_return(scan_timed_out_scan_response)
end
# Error bubbles up from scan class and is handled in secrets check.
expect(secret_detection_logger).to receive(:error)
.once
.with(message: error_messages[:scan_timeout_error])
expect { subject.validate! }.not_to raise_error
end
end
RSpec.shared_examples 'scan failed to initialize' do
include_context 'secrets check context'
before do
# Intentionally set `RULESET_FILE_PATH` to an incorrect path to cause error.
stub_const('::Gitlab::SecretDetection::Scan::RULESET_FILE_PATH', 'gitleaks.toml')
end
it 'logs the error and passes the check' do
# File parsing error is written to the logger.
expect(secret_detection_logger).to receive(:error)
.once
.with(
"Failed to parse secret detection ruleset from 'gitleaks.toml' path: " \
"No such file or directory @ rb_sysopen - gitleaks.toml"
)
# Error bubbles up from scan class and is handled in secrets check.
expect(secret_detection_logger).to receive(:error)
.once
.with(message: error_messages[:scan_initialization_error])
expect { subject.validate! }.not_to raise_error
end
end
RSpec.shared_examples 'scan failed with invalid input' do
include_context 'secrets check context'
let(:failed_with_invalid_input_response) do
::Gitlab::SecretDetection::Response.new(::Gitlab::SecretDetection::Status::INPUT_ERROR)
end
it 'logs the error and passes the check' do
# Mock the response to return a scan invalid input status.
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.and_return(failed_with_invalid_input_response)
end
# Error bubbles up from scan class and is handled in secrets check.
expect(secret_detection_logger).to receive(:error)
.once
.with(message: error_messages[:invalid_input_error])
expect { subject.validate! }.not_to raise_error
end
end
RSpec.shared_examples 'scan skipped due to invalid status' do
include_context 'secrets check context'
let(:invalid_scan_status_code) { 7 } # doesn't exist in ::Gitlab::SecretDetection::Status
let(:invalid_scan_status_code_response) { ::Gitlab::SecretDetection::Response.new(invalid_scan_status_code) }
it 'logs the error and passes the check' do
# Mock the response to return a scan invalid status.
expect_next_instance_of(::Gitlab::SecretDetection::Scan) do |instance|
expect(instance).to receive(:secrets_scan)
.and_return(invalid_scan_status_code_response)
end
# Error bubbles up from scan class and is handled in secrets check.
expect(secret_detection_logger).to receive(:error)
.once
.with(message: error_messages[:invalid_scan_status_code_error])
expect { subject.validate! }.not_to raise_error
end
end
# frozen_string_literal: true
require_relative 'secret_detection/version'
require_relative 'secret_detection/status'
require_relative 'secret_detection/finding'
require_relative 'secret_detection/response'
......
......@@ -18,6 +18,16 @@ def ==(other)
self.class == other.class && other.state == state
end
def to_h
{
blob_id: blob_id,
status: status,
line_number: line_number,
type: type,
description: description
}
end
protected
def state
......
......@@ -34,9 +34,9 @@ class Scan
# in case the compilation fails.
def initialize(logger: Logger.new($stdout), ruleset_path: RULESET_FILE_PATH)
@logger = logger
@rules = parse_ruleset ruleset_path
@keywords = create_keywords @rules
@matcher = build_pattern_matcher @rules
@rules = parse_ruleset(ruleset_path)
@keywords = create_keywords(rules)
@pattern_matcher = build_pattern_matcher(rules)
end
# Runs Secret Detection scan on the list of given blobs. Both the total scan duration and
......@@ -52,49 +52,51 @@ def initialize(logger: Logger.new($stdout), ruleset_path: RULESET_FILE_PATH)
# status: One of the SecretDetection::Status values
# results: [SecretDetection::Finding]
# }
#
#
def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT_SECS, blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS)
return SecretDetection::Response.new(SecretDetection::Status::INPUT_ERROR) unless validate_scan_input(blobs)
Timeout.timeout timeout do
Timeout.timeout(timeout) do
matched_blobs = filter_by_keywords(blobs)
next SecretDetection::Response.new(SecretDetection::Status::NOT_FOUND) if matched_blobs.empty?
secrets = find_secrets_bulk(matched_blobs, blob_timeout)
scan_status = overall_scan_status secrets
scan_status = overall_scan_status(secrets)
SecretDetection::Response.new(scan_status, secrets)
end
rescue Timeout::Error => e
@logger.error "Secret Detection operation timed out: #{e}"
logger.error "Secret detection operation timed out: #{e}"
SecretDetection::Response.new(SecretDetection::Status::SCAN_TIMEOUT)
end
private
attr_reader :logger, :rules, :keywords, :matcher
attr_reader :logger, :rules, :keywords, :pattern_matcher
# parses given ruleset file and returns the parsed rules
def parse_ruleset(ruleset_file_path)
rules_data = TomlRB.load_file(ruleset_file_path)
rules_data['rules']
rescue StandardError => e
logger.error "Failed to parse Secret Detection ruleset from '#{ruleset_file_path}' path: #{e}"
logger.error "Failed to parse secret detection ruleset from '#{ruleset_file_path}' path: #{e}"
raise RulesetParseError
end
# builds RE2::Set pattern matcher for the given rules
def build_pattern_matcher(rules)
matcher = RE2::Set.new
rules.each do |rule|
matcher.add(rule['regex'])
matcher.add(rule["regex"])
end
unless matcher.compile
logger.error "Failed to compile Secret Detection rulesets in RE::Set"
logger.error "Failed to compile secret detection rulesets in RE::Set"
raise RulesetCompilationError
end
......@@ -104,8 +106,9 @@ def build_pattern_matcher(rules)
# creates and returns the unique set of rule matching keywords
def create_keywords(rules)
secrets_keywords = []
rules.each do |rule|
secrets_keywords << rule['keywords']
secrets_keywords << rule["keywords"]
end
secrets_keywords.flatten.compact.to_set
......@@ -126,14 +129,16 @@ def filter_by_keywords(blobs)
# finds secrets in the given list of blobs
def find_secrets_bulk(blobs, blob_timeout)
found_secrets = []
blobs.each do |blob|
found_secrets << Timeout.timeout(blob_timeout) do
find_secrets(blob)
end
found_secrets << Timeout.timeout(blob_timeout) { find_secrets(blob) }
rescue Timeout::Error => e
logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}"
found_secrets << SecretDetection::Finding.new(blob.id,
SecretDetection::Status::BLOB_TIMEOUT)
logger.error "Secret detection scan timed out on the blob(id:#{blob.id}): #{e}"
found_secrets << SecretDetection::Finding.new(
blob.id,
SecretDetection::Status::BLOB_TIMEOUT
)
end
found_secrets.flatten.freeze
......@@ -147,20 +152,28 @@ def find_secrets(blob)
# ignore the line scan if it is suffixed with '#gitleaks:allow'
next if line.end_with?(GITLEAKS_KEYWORD_IGNORE)
patterns = matcher.match(line, :exception => false)
patterns = pattern_matcher.match(line, :exception => false)
next unless patterns.any?
line_no = index + 1
line_number = index + 1
patterns.each do |pattern|
type = rules[pattern]['id']
description = rules[pattern]['description']
secrets << SecretDetection::Finding.new(blob.id, SecretDetection::Status::FOUND, line_no, type,
description)
type = rules[pattern]["id"]
description = rules[pattern]["description"]
secrets << SecretDetection::Finding.new(
blob.id,
SecretDetection::Status::FOUND,
line_number,
type,
description
)
end
end
secrets
rescue StandardError => e
logger.error "Secret Detection scan failed on the blob(id:#{blob.id}): #{e}"
logger.error "Secret detection scan failed on the blob(id:#{blob.id}): #{e}"
SecretDetection::Finding.new(blob.id, SecretDetection::Status::SCAN_ERROR)
end
......
......@@ -2,7 +2,7 @@
module Gitlab
module SecretDetection
# All the possible statuses emitted by the Scan operation
# All the possible statuses emitted by the scan operation
class Status
NOT_FOUND = 0 # When scan operation completes with zero findings
FOUND = 1 # When scan operation completes with one or more findings
......
......@@ -13,26 +13,34 @@ def new_blob(id:, data:)
{
"title" => "gitleaks config",
"rules" => [
{ "id" => "gitlab_personal_access_token",
{
"id" => "gitlab_personal_access_token",
"description" => "GitLab Personal Access Token",
"regex" => "glpat-[0-9a-zA-Z_\\-]{20}",
"tags" => %w[gitlab revocation_type],
"keywords" => ["glpat"] },
{ "id" => "gitlab_pipeline_trigger_token",
"keywords" => ["glpat"]
},
{
"id" => "gitlab_pipeline_trigger_token",
"description" => "GitLab Pipeline Trigger Token",
"regex" => "glptt-[0-9a-zA-Z_\\-]{20}",
"tags" => ["gitlab"],
"keywords" => ["glptt"] },
{ "id" => "gitlab_runner_registration_token",
"keywords" => ["glptt"]
},
{
"id" => "gitlab_runner_registration_token",
"description" => "GitLab Runner Registration Token",
"regex" => "GR1348941[0-9a-zA-Z_-]{20}",
"tags" => ["gitlab"],
"keywords" => ["GR1348941"] },
{ "id" => "gitlab_feed_token",
"keywords" => ["GR1348941"]
},
{
"id" => "gitlab_feed_token",
"description" => "GitLab Feed Token",
"regex" => "glft-[0-9a-zA-Z_-]{20}",
"tags" => ["gitlab"],
"keywords" => ["glft"] }
"keywords" => ["glft"]
}
]
}
end
......@@ -65,16 +73,21 @@ def new_blob(id:, data:)
it "does not match" do
expected_response = Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::NOT_FOUND)
expect(scan.secrets_scan(blobs)).to eq(expected_response)
end
it "attempts to keyword match returning no blobs for further scan" do
expect(scan).to receive(:filter_by_keywords).with(blobs).and_return([])
expect(scan).to receive(:filter_by_keywords)
.with(blobs)
.and_return([])
scan.secrets_scan(blobs)
end
it "does not attempt to regex match" do
expect(scan).not_to receive(:match_rules_bulk)
scan.secrets_scan(blobs)
end
end
......@@ -89,7 +102,7 @@ def new_blob(id:, data:)
]
end
it "matches glpat" do
it "matches different types of rules" do
expected_response = Gitlab::SecretDetection::Response.new(
Gitlab::SecretDetection::Status::FOUND,
[
......@@ -129,6 +142,8 @@ def new_blob(id:, data:)
end
context "when configured with time out" do
let(:each_blob_timeout_secs) { 0.000_001 } # 1 micro-sec to intentionally timeout large blob
let(:large_data) do
("large data with a secret glpat-12312312312312312312\n" * 10_000_000).freeze # gitleaks:allow
end
......@@ -141,40 +156,44 @@ def new_blob(id:, data:)
]
end
let(:all_large_blobs) do
[
new_blob(id: 111, data: large_data),
new_blob(id: 222, data: large_data),
new_blob(id: 333, data: large_data)
]
end
it "whole secret detection scan operation times out" do
scan_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
response = Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::SCAN_TIMEOUT)
expect(scan.secrets_scan(blobs, timeout: scan_timeout_secs)).to eq(response)
end
it "one of the blobs times out while others continue to get scanned" do
each_blob_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
expected_response = Gitlab::SecretDetection::Response.new(
Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS,
[
Gitlab::SecretDetection::Finding.new(
blobs[0].id, Gitlab::SecretDetection::Status::FOUND, 1,
blobs[0].id,
Gitlab::SecretDetection::Status::FOUND,
1,
ruleset['rules'][2]['id'],
ruleset['rules'][2]['description']
),
Gitlab::SecretDetection::Finding.new(
blobs[2].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
blobs[2].id,
Gitlab::SecretDetection::Status::BLOB_TIMEOUT
)
])
]
)
expect(scan.secrets_scan(blobs, blob_timeout: each_blob_timeout_secs)).to eq(expected_response)
end
it "all the blobs time out" do
each_blob_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
all_large_blobs = [
new_blob(id: 111, data: large_data),
new_blob(id: 222, data: large_data),
new_blob(id: 333, data: large_data)
]
# scan status changes to SCAN_TIMEOUT when *all* the blobs time out
expected_scan_status = Gitlab::SecretDetection::Status::SCAN_TIMEOUT
......@@ -182,15 +201,19 @@ def new_blob(id:, data:)
expected_scan_status,
[
Gitlab::SecretDetection::Finding.new(
all_large_blobs[0].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
all_large_blobs[0].id,
Gitlab::SecretDetection::Status::BLOB_TIMEOUT
),
Gitlab::SecretDetection::Finding.new(
all_large_blobs[1].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
all_large_blobs[1].id,
Gitlab::SecretDetection::Status::BLOB_TIMEOUT
),
Gitlab::SecretDetection::Finding.new(
all_large_blobs[2].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
all_large_blobs[2].id,
Gitlab::SecretDetection::Status::BLOB_TIMEOUT
)
])
]
)
expect(scan.secrets_scan(all_large_blobs, blob_timeout: each_blob_timeout_secs)).to eq(expected_response)
end
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册