From 5b49a6099d2f7f292fe6de28942113317c155cf2 Mon Sep 17 00:00:00 2001
From: Serena Fang <sfang@gitlab.com>
Date: Wed, 6 Dec 2023 03:00:23 +0000
Subject: [PATCH] Add Secret Detection scanning gem implementation

- Introduce Interfaces for Push Check to invoke Secret Detection
- Add Gitleaks Ruleset file consisting rules for scanning SD
- Add tests to confirm Secret Detection behaviour
---
 Gemfile.lock                                  |   2 +
 gems/gitlab-secret_detection/Gemfile.lock     |  30 ++-
 .../gitlab-secret_detection.gemspec           |   7 +-
 .../lib/gitlab/secret_detection.rb            |   6 +-
 .../lib/gitlab/secret_detection/finding.rb    |  28 +++
 .../lib/gitlab/secret_detection/response.rb   |  28 +++
 .../lib/gitlab/secret_detection/scan.rb       | 193 +++++++++++++++++
 .../lib/gitlab/secret_detection/status.rb     |  16 ++
 .../gitlab-secret_detection/lib/gitleaks.toml |  49 +++++
 .../spec/gitlab/secret_detection_spec.rb      |   7 -
 .../lib/gitlab/secret_detection/scan_spec.rb  | 199 ++++++++++++++++++
 11 files changed, 545 insertions(+), 20 deletions(-)
 create mode 100644 gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb
 create mode 100644 gems/gitlab-secret_detection/lib/gitlab/secret_detection/response.rb
 create mode 100644 gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
 create mode 100644 gems/gitlab-secret_detection/lib/gitlab/secret_detection/status.rb
 create mode 100644 gems/gitlab-secret_detection/lib/gitleaks.toml
 delete mode 100644 gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb
 create mode 100644 gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb

diff --git a/Gemfile.lock b/Gemfile.lock
index 6ef11ee6a1313..bce114b23e8d1 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -65,6 +65,8 @@ PATH
   remote: gems/gitlab-secret_detection
   specs:
     gitlab-secret_detection (0.1.0)
+      re2 (~> 2.4)
+      toml-rb (~> 2.2)
 
 PATH
   remote: gems/gitlab-utils
diff --git a/gems/gitlab-secret_detection/Gemfile.lock b/gems/gitlab-secret_detection/Gemfile.lock
index 2f615b24d86a9..dd9f621ee4a2b 100644
--- a/gems/gitlab-secret_detection/Gemfile.lock
+++ b/gems/gitlab-secret_detection/Gemfile.lock
@@ -2,6 +2,8 @@ PATH
   remote: .
   specs:
     gitlab-secret_detection (0.1.0)
+      re2 (~> 2.4)
+      toml-rb (~> 2.2)
 
 GEM
   remote: https://rubygems.org/
@@ -24,6 +26,7 @@ GEM
     bigdecimal (3.1.4)
     binding_of_caller (1.0.0)
       debug_inspector (>= 0.0.1)
+    citrus (3.0.2)
     coderay (1.1.3)
     concurrent-ruby (1.2.2)
     connection_pool (2.4.1)
@@ -31,8 +34,8 @@ GEM
     diff-lcs (1.5.0)
     drb (2.2.0)
       ruby2_keywords
-    gitlab-styles (10.1.0)
-      rubocop (~> 1.50.2)
+    gitlab-styles (11.0.0)
+      rubocop (~> 1.57.1)
       rubocop-graphql (~> 0.18)
       rubocop-performance (~> 1.15)
       rubocop-rails (~> 2.17)
@@ -40,6 +43,8 @@ GEM
     i18n (1.14.1)
       concurrent-ruby (~> 1.0)
     json (2.6.3)
+    language_server-protocol (3.17.0.3)
+    mini_portile2 (2.8.5)
     minitest (5.20.0)
     mutex_m (0.2.0)
     parallel (1.23.0)
@@ -50,9 +55,11 @@ GEM
       coderay
       parser
       unparser
-    racc (1.7.1)
+    racc (1.7.3)
     rack (3.0.8)
     rainbow (3.1.1)
+    re2 (2.4.3)
+      mini_portile2 (~> 2.8.5)
     regexp_parser (2.8.2)
     rexml (3.2.6)
     rspec (3.12.0)
@@ -84,14 +91,15 @@ GEM
       binding_of_caller
       rspec-parameterized-core (< 2)
     rspec-support (3.12.1)
-    rubocop (1.50.2)
+    rubocop (1.57.2)
       json (~> 2.3)
+      language_server-protocol (>= 3.17.0)
       parallel (~> 1.10)
-      parser (>= 3.2.0.0)
+      parser (>= 3.2.2.4)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 1.8, < 3.0)
       rexml (>= 3.2.5, < 4.0)
-      rubocop-ast (>= 1.28.0, < 2.0)
+      rubocop-ast (>= 1.28.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 2.4.0, < 3.0)
     rubocop-ast (1.30.0)
@@ -115,10 +123,12 @@ GEM
       rubocop-factory_bot (~> 2.22)
     ruby-progressbar (1.13.0)
     ruby2_keywords (0.0.5)
+    toml-rb (2.2.0)
+      citrus (~> 3.0, > 3.0)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.5.0)
-    unparser (0.6.9)
+    unparser (0.6.10)
       diff-lcs (~> 1.3)
       parser (>= 3.2.2.4)
 
@@ -127,13 +137,13 @@ PLATFORMS
 
 DEPENDENCIES
   gitlab-secret_detection!
-  gitlab-styles (~> 10.1.0)
+  gitlab-styles (~> 11.0)
   rspec (~> 3.0)
   rspec-benchmark (~> 0.6.0)
   rspec-parameterized (~> 1.0)
-  rubocop (~> 1.50)
+  rubocop (~> 1.57)
   rubocop-rails (<= 2.20)
   rubocop-rspec (~> 2.22)
 
 BUNDLED WITH
-   2.4.14
+   2.4.22
diff --git a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
index ff5121846f468..be9db3aa38933 100644
--- a/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
+++ b/gems/gitlab-secret_detection/gitlab-secret_detection.gemspec
@@ -24,11 +24,14 @@ Gem::Specification.new do |spec|
   spec.files = Dir['lib/**/*.rb']
   spec.require_paths = ["lib"]
 
-  spec.add_development_dependency "gitlab-styles", "~> 10.1.0"
+  spec.add_runtime_dependency "re2", "~> 2.4"
+  spec.add_runtime_dependency "toml-rb", "~> 2.2"
+
+  spec.add_development_dependency "gitlab-styles", "~> 11.0"
   spec.add_development_dependency "rspec", "~> 3.0"
   spec.add_development_dependency "rspec-benchmark", "~> 0.6.0"
   spec.add_development_dependency "rspec-parameterized", "~> 1.0"
-  spec.add_development_dependency "rubocop", "~> 1.50"
+  spec.add_development_dependency "rubocop", "~> 1.57"
   spec.add_development_dependency "rubocop-rails", "<= 2.20" # https://github.com/rubocop/rubocop-rails/issues/1173
   spec.add_development_dependency "rubocop-rspec", "~> 2.22"
 end
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb
index 54e0eb794a37f..95da376b7c1fa 100644
--- a/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection.rb
@@ -1,6 +1,10 @@
 # frozen_string_literal: true
 
-require_relative "secret_detection/version"
+require_relative 'secret_detection/version'
+require_relative 'secret_detection/status'
+require_relative 'secret_detection/finding'
+require_relative 'secret_detection/response'
+require_relative 'secret_detection/scan'
 
 module Gitlab
   module SecretDetection
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb
new file mode 100644
index 0000000000000..9bded2dbf9756
--- /dev/null
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/finding.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module SecretDetection
+    # Finding is a data object representing a secret finding identified within a blob
+    class Finding
+      attr_reader :blob_id, :status, :line_number, :type, :description
+
+      def initialize(blob_id, status, line_number = nil, type = nil, description = nil)
+        @blob_id = blob_id
+        @status = status
+        @line_number = line_number
+        @type = type
+        @description = description
+      end
+
+      def ==(other)
+        self.class == other.class && other.state == state
+      end
+
+      protected
+
+      def state
+        [blob_id, status, line_number, type, description]
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/response.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/response.rb
new file mode 100644
index 0000000000000..a34fba7c0b60e
--- /dev/null
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/response.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module SecretDetection
+    # Response is the data object returned by the scan operation with the following structure
+    #
+    # +status+:: One of values from SecretDetection::Status indicating the scan operation's status
+    # +results+:: Array of SecretDetection::Finding values. Default value is nil.
+    class Response
+      attr_reader :status, :results
+
+      def initialize(status, results = nil)
+        @status = status
+        @results = results
+      end
+
+      def ==(other)
+        self.class == other.class && other.state == state
+      end
+
+      protected
+
+      def state
+        [status, results]
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
new file mode 100644
index 0000000000000..83fc65a9b33db
--- /dev/null
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/scan.rb
@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+
+require 'toml-rb'
+require 're2'
+require 'logger'
+require 'timeout'
+
+module Gitlab
+  module SecretDetection
+    # Scan is responsible for running Secret Detection scan operation
+    class Scan
+      # RulesetParseError is thrown when the code fails to parse the
+      # ruleset file from the given path
+      RulesetParseError = Class.new(StandardError)
+
+      # RulesetCompilationError is thrown when the code fails to compile
+      # the predefined rulesets
+      RulesetCompilationError = Class.new(StandardError)
+
+      # default time limit(in seconds) for running the scan operation per invocation
+      DEFAULT_SCAN_TIMEOUT_SECS = 60
+      # default time limit(in seconds) for running the scan operation on a single blob
+      DEFAULT_BLOB_TIMEOUT_SECS = 5
+      # file path where the secrets ruleset file is located
+      RULESET_FILE_PATH = File.expand_path('../../gitleaks.toml', __dir__)
+      # ignore the scanning of a line which ends with the following keyword
+      GITLEAKS_KEYWORD_IGNORE = 'gitleaks:allow'
+
+      # Initializes the instance with logger along with following operations:
+      # 1. Parse ruleset for the given +ruleset_path+(default: +RULESET_FILE_PATH+). Raises +RulesetParseError+
+      # incase the operation fails.
+      # 2. Extract keywords from the parsed ruleset to use it for matching keywords before regex operation.
+      # 3. Build and Compile rule regex patterns obtained from the ruleset. Raises +RulesetCompilationError+
+      # in case the compilation fails.
+      def initialize(logger: Logger.new($stdout), ruleset_path: RULESET_FILE_PATH)
+        @logger = logger
+        @rules = parse_ruleset ruleset_path
+        @keywords = create_keywords @rules
+        @matcher = build_pattern_matcher @rules
+      end
+
+      # Runs Secret Detection scan on the list of given blobs. Both the total scan duration and
+      # the duration for each blob is time bound via +timeout+ and +blob_timeout+ respectively.
+      #
+      # +blobs+:: Array of blobs with each blob to have `id` and `data` properties.
+      # +timeout+:: No of seconds(accepts floating point for smaller time values) to limit the total scan duration
+      # +blob_timeout+:: No of seconds(accepts floating point for smaller time values) to limit
+      #                  the scan duration on each blob
+      #
+      # Returns an instance of SecretDetection::Response by following below structure:
+      # {
+      #     status: One of the SecretDetection::Status values
+      #     results: [SecretDetection::Finding]
+      # }
+      #
+      #
+      def secrets_scan(blobs, timeout: DEFAULT_SCAN_TIMEOUT_SECS, blob_timeout: DEFAULT_BLOB_TIMEOUT_SECS)
+        return SecretDetection::Response.new(SecretDetection::Status::INPUT_ERROR) unless validate_scan_input(blobs)
+
+        Timeout.timeout timeout do
+          matched_blobs = filter_by_keywords(blobs)
+
+          next SecretDetection::Response.new(SecretDetection::Status::NOT_FOUND) if matched_blobs.empty?
+
+          secrets = find_secrets_bulk(matched_blobs, blob_timeout)
+
+          scan_status = overall_scan_status secrets
+
+          SecretDetection::Response.new(scan_status, secrets)
+        end
+      rescue Timeout::Error => e
+        @logger.error "Secret Detection operation timed out: #{e}"
+        SecretDetection::Response.new(SecretDetection::Status::SCAN_TIMEOUT)
+      end
+
+      private
+
+      attr_reader :logger, :rules, :keywords, :matcher
+
+      # parses given ruleset file and returns the parsed rules
+      def parse_ruleset(ruleset_file_path)
+        rules_data = TomlRB.load_file(ruleset_file_path)
+        rules_data['rules']
+      rescue StandardError => e
+        logger.error "Failed to parse Secret Detection ruleset from '#{ruleset_file_path}' path: #{e}"
+        raise RulesetParseError
+      end
+
+      # builds RE2::Set pattern matcher for the given rules
+      def build_pattern_matcher(rules)
+        matcher = RE2::Set.new
+        rules.each do |rule|
+          matcher.add(rule['regex'])
+        end
+
+        unless matcher.compile
+          logger.error "Failed to compile Secret Detection rulesets in RE::Set"
+          raise RulesetCompilationError
+        end
+
+        matcher
+      end
+
+      # creates and returns the unique set of rule matching keywords
+      def create_keywords(rules)
+        secrets_keywords = []
+        rules.each do |rule|
+          secrets_keywords << rule['keywords']
+        end
+
+        secrets_keywords.flatten.compact.to_set
+      end
+
+      # returns only those blobs that contain atleast one of the keywords
+      # from the keywords list
+      def filter_by_keywords(blobs)
+        matched_blobs = []
+
+        blobs.each do |blob|
+          matched_blobs << blob if keywords.any? { |keyword| blob.data.include?(keyword) }
+        end
+
+        matched_blobs.freeze
+      end
+
+      # finds secrets in the given list of blobs
+      def find_secrets_bulk(blobs, blob_timeout)
+        found_secrets = []
+        blobs.each do |blob|
+          found_secrets << Timeout.timeout(blob_timeout) do
+            find_secrets(blob)
+          end
+        rescue Timeout::Error => e
+          logger.error "Secret Detection scan timed out on the blob(id:#{blob.id}): #{e}"
+          found_secrets << SecretDetection::Finding.new(blob.id,
+            SecretDetection::Status::BLOB_TIMEOUT)
+        end
+
+        found_secrets.flatten.freeze
+      end
+
+      # finds secrets in the given blob with a timeout circuit breaker
+      def find_secrets(blob)
+        secrets = []
+
+        blob.data.each_line.with_index do |line, index|
+          # ignore the line scan if it is suffixed with '#gitleaks:allow'
+          next if line.end_with?(GITLEAKS_KEYWORD_IGNORE)
+
+          patterns = matcher.match(line, :exception => false)
+          next unless patterns.any?
+
+          line_no = index + 1
+          patterns.each do |pattern|
+            type = rules[pattern]['id']
+            description = rules[pattern]['description']
+            secrets << SecretDetection::Finding.new(blob.id, SecretDetection::Status::FOUND, line_no, type,
+              description)
+          end
+        end
+        secrets
+      rescue StandardError => e
+        logger.error "Secret Detection scan failed on the blob(id:#{blob.id}): #{e}"
+        SecretDetection::Finding.new(blob.id, SecretDetection::Status::SCAN_ERROR)
+      end
+
+      def validate_scan_input(blobs)
+        return false if blobs.nil? || !blobs.instance_of?(Array)
+
+        blobs.all? do |blob|
+          next false unless blob.respond_to?(:id) || blob.respond_to?(:data)
+
+          blob.data.freeze # freeze blobs to avoid additional object allocations on strings
+        end
+      end
+
+      def overall_scan_status(found_secrets)
+        return SecretDetection::Status::NOT_FOUND if found_secrets.empty?
+
+        timed_out_blobs = found_secrets.count { |el| el.status == SecretDetection::Status::BLOB_TIMEOUT }
+
+        case timed_out_blobs
+        when 0
+          SecretDetection::Status::FOUND
+        when found_secrets.length
+          SecretDetection::Status::SCAN_TIMEOUT
+        else
+          SecretDetection::Status::FOUND_WITH_ERRORS
+        end
+      end
+    end
+  end
+end
diff --git a/gems/gitlab-secret_detection/lib/gitlab/secret_detection/status.rb b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/status.rb
new file mode 100644
index 0000000000000..45ac04a81b735
--- /dev/null
+++ b/gems/gitlab-secret_detection/lib/gitlab/secret_detection/status.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module SecretDetection
+    # All the possible statuses emitted by the Scan operation
+    class Status
+      NOT_FOUND = 0 # When scan operation completes with zero findings
+      FOUND = 1 # When scan operation completes with one or more findings
+      FOUND_WITH_ERRORS = 2 # When scan operation completes with one or more findings along with some errors
+      SCAN_TIMEOUT = 3 # When the scan operation runs beyond given time out
+      BLOB_TIMEOUT = 4 # When the scan operation on a blob runs beyond given time out
+      SCAN_ERROR = 5 # When the scan operation fails due to regex error
+      INPUT_ERROR = 6 # When the scan operation fails due to invalid input
+    end
+  end
+end
diff --git a/gems/gitlab-secret_detection/lib/gitleaks.toml b/gems/gitlab-secret_detection/lib/gitleaks.toml
new file mode 100644
index 0000000000000..de679a41ea285
--- /dev/null
+++ b/gems/gitlab-secret_detection/lib/gitleaks.toml
@@ -0,0 +1,49 @@
+# This file contains a subset of rules pulled from the original source file.
+# Original Source: https://gitlab.com/gitlab-org/security-products/analyzers/secrets/-/blob/master/gitleaks.toml
+# Reference: https://gitlab.com/gitlab-org/gitlab/-/issues/427011
+title = "gitleaks config"
+
+[[rules]]
+id = "gitlab_personal_access_token"
+description = "GitLab Personal Access Token"
+regex = '''glpat-[0-9a-zA-Z_\-]{20}'''
+tags = ["gitlab", "revocation_type"]
+keywords = [
+    "glpat",
+]
+
+[[rules]]
+id = "gitlab_pipeline_trigger_token"
+description = "GitLab Pipeline Trigger Token"
+regex = '''glptt-[0-9a-zA-Z_\-]{20}'''
+tags = ["gitlab"]
+keywords = [
+    "glptt",
+]
+
+[[rules]]
+id = "gitlab_runner_registration_token"
+description = "GitLab Runner Registration Token"
+regex = '''GR1348941[0-9a-zA-Z_\-]{20}'''
+tags = ["gitlab"]
+keywords = [
+    "GR1348941",
+]
+
+[[rules]]
+id = "gitlab_runner_auth_token"
+description = "GitLab Runner Authentication Token"
+regex = '''glrt-[0-9a-zA-Z_\-]{20}'''
+tags = ["gitlab"]
+keywords = [
+    "glrt",
+]
+
+[[rules]]
+id = "gitlab_feed_token"
+description = "GitLab Feed Token"
+regex = '''glft-[0-9a-zA-Z_\-]{20}'''
+tags = ["gitlab"]
+keywords = [
+    "glft",
+]
diff --git a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb b/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb
deleted file mode 100644
index 112ab8c7468c1..0000000000000
--- a/gems/gitlab-secret_detection/spec/gitlab/secret_detection_spec.rb
+++ /dev/null
@@ -1,7 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe Gitlab::SecretDetection do
-  it "has a version number" do
-    expect(Gitlab::SecretDetection::VERSION).not_to be_nil
-  end
-end
diff --git a/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb
new file mode 100644
index 0000000000000..dfe3fdf4bb927
--- /dev/null
+++ b/gems/gitlab-secret_detection/spec/lib/gitlab/secret_detection/scan_spec.rb
@@ -0,0 +1,199 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::SecretDetection::Scan, feature_category: :secret_detection do
+  subject(:scan) { described_class.new }
+
+  def new_blob(id:, data:)
+    Struct.new(:id, :data).new(id, data)
+  end
+
+  let(:ruleset) do
+    {
+      "title" => "gitleaks config",
+      "rules" => [
+        { "id" => "gitlab_personal_access_token",
+          "description" => "GitLab Personal Access Token",
+          "regex" => "glpat-[0-9a-zA-Z_\\-]{20}",
+          "tags" => %w[gitlab revocation_type],
+          "keywords" => ["glpat"] },
+        { "id" => "gitlab_pipeline_trigger_token",
+          "description" => "GitLab Pipeline Trigger Token",
+          "regex" => "glptt-[0-9a-zA-Z_\\-]{20}",
+          "tags" => ["gitlab"],
+          "keywords" => ["glptt"] },
+        { "id" => "gitlab_runner_registration_token",
+          "description" => "GitLab Runner Registration Token",
+          "regex" => "GR1348941[0-9a-zA-Z_-]{20}",
+          "tags" => ["gitlab"],
+          "keywords" => ["GR1348941"] },
+        { "id" => "gitlab_feed_token",
+          "description" => "GitLab Feed Token",
+          "regex" => "glft-[0-9a-zA-Z_-]{20}",
+          "tags" => ["gitlab"],
+          "keywords" => ["glft"] }
+      ]
+    }
+  end
+
+  it "does not raise an error parsing the toml file" do
+    expect { scan }.not_to raise_error
+  end
+
+  context "when it creates RE2 patterns from file data" do
+    before do
+      allow(scan).to receive(:parse_ruleset).and_return(ruleset)
+    end
+
+    it "does not raise an error when building patterns" do
+      expect { scan }.not_to raise_error
+    end
+  end
+
+  context "when matching patterns" do
+    before do
+      allow(scan).to receive(:parse_ruleset).and_return(ruleset)
+    end
+
+    context 'when the blob does not contain a secret' do
+      let(:blobs) do
+        [
+          new_blob(id: 1234, data: "no secrets")
+        ]
+      end
+
+      it "does not match" do
+        expected_response = Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::NOT_FOUND)
+        expect(scan.secrets_scan(blobs)).to eq(expected_response)
+      end
+
+      it "attempts to keyword match returning no blobs for further scan" do
+        expect(scan).to receive(:filter_by_keywords).with(blobs).and_return([])
+        scan.secrets_scan(blobs)
+      end
+
+      it "does not attempt to regex match" do
+        expect(scan).not_to receive(:match_rules_bulk)
+        scan.secrets_scan(blobs)
+      end
+    end
+
+    context "when multiple blobs contains secrets" do
+      let(:blobs) do
+        [
+          new_blob(id: 111, data: "glpat-12312312312312312312"), # gitleaks:allow
+          new_blob(id: 222, data: "\n\nglptt-12312312312312312312"), # gitleaks:allow
+          new_blob(id: 333, data: "data with no secret"),
+          new_blob(id: 444, data: "GR134894112312312312312312312\nglft-12312312312312312312") # gitleaks:allow
+        ]
+      end
+
+      it "matches glpat" do
+        expected_response = Gitlab::SecretDetection::Response.new(
+          Gitlab::SecretDetection::Status::FOUND,
+          [
+            Gitlab::SecretDetection::Finding.new(
+              blobs[0].id,
+              Gitlab::SecretDetection::Status::FOUND,
+              1,
+              ruleset['rules'][0]['id'],
+              ruleset['rules'][0]['description']
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              blobs[1].id,
+              Gitlab::SecretDetection::Status::FOUND,
+              3,
+              ruleset['rules'][1]['id'],
+              ruleset['rules'][1]['description']
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              blobs[3].id,
+              Gitlab::SecretDetection::Status::FOUND,
+              1,
+              ruleset['rules'][2]['id'],
+              ruleset['rules'][2]['description']
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              blobs[3].id,
+              Gitlab::SecretDetection::Status::FOUND,
+              2,
+              ruleset['rules'][3]['id'],
+              ruleset['rules'][3]['description']
+            )
+          ]
+        )
+
+        expect(scan.secrets_scan(blobs)).to eq(expected_response)
+      end
+    end
+
+    context "when configured with time out" do
+      let(:large_data) do
+        ("large data with a secret glpat-12312312312312312312\n" * 10_000_000).freeze # gitleaks:allow
+      end
+
+      let(:blobs) do
+        [
+          new_blob(id: 111, data: "GR134894112312312312312312312"), # gitleaks:allow
+          new_blob(id: 333, data: "data with no secret"),
+          new_blob(id: 333, data: large_data)
+        ]
+      end
+
+      it "whole secret detection scan operation times out" do
+        scan_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
+        response = Gitlab::SecretDetection::Response.new(Gitlab::SecretDetection::Status::SCAN_TIMEOUT)
+        expect(scan.secrets_scan(blobs, timeout: scan_timeout_secs)).to eq(response)
+      end
+
+      it "one of the blobs times out while others continue to get scanned" do
+        each_blob_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
+
+        expected_response = Gitlab::SecretDetection::Response.new(
+          Gitlab::SecretDetection::Status::FOUND_WITH_ERRORS,
+          [
+            Gitlab::SecretDetection::Finding.new(
+              blobs[0].id, Gitlab::SecretDetection::Status::FOUND, 1,
+              ruleset['rules'][2]['id'],
+              ruleset['rules'][2]['description']
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              blobs[2].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
+            )
+          ])
+
+        expect(scan.secrets_scan(blobs, blob_timeout: each_blob_timeout_secs)).to eq(expected_response)
+      end
+
+      it "all the blobs time out" do
+        each_blob_timeout_secs = 0.000_001 # 1 micro-sec to intentionally timeout large blob
+
+        all_large_blobs = [
+          new_blob(id: 111, data: large_data),
+          new_blob(id: 222, data: large_data),
+          new_blob(id: 333, data: large_data)
+        ]
+
+        # scan status changes to SCAN_TIMEOUT when *all* the blobs time out
+        expected_scan_status = Gitlab::SecretDetection::Status::SCAN_TIMEOUT
+
+        expected_response = Gitlab::SecretDetection::Response.new(
+          expected_scan_status,
+          [
+            Gitlab::SecretDetection::Finding.new(
+              all_large_blobs[0].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              all_large_blobs[1].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
+            ),
+            Gitlab::SecretDetection::Finding.new(
+              all_large_blobs[2].id, Gitlab::SecretDetection::Status::BLOB_TIMEOUT
+            )
+          ])
+
+        expect(scan.secrets_scan(all_large_blobs, blob_timeout: each_blob_timeout_secs)).to eq(expected_response)
+      end
+    end
+  end
+end
-- 
GitLab