From 19b483346280cf0879ef541be068f1de6aff9ee0 Mon Sep 17 00:00:00 2001
From: Oscar Tovar <otovar@gitlab.com>
Date: Tue, 6 Feb 2024 16:44:38 -0500
Subject: [PATCH] Fix SBOM ingestion errors caused by input file path
 validation

In https://gitlab.com/gitlab-org/gitlab/-/merge_requests/140282, we
started to ingest the "input_file_path" of SBOM components found by
Trivy. The components do not have an actual file path, so we instead
convert them into a URI of sorts that can be detected by the
"container-image:" magic string prefix. This pseudo URI contains the
entire fully qualified name of the container image, and can often be
longer than 255 characters, which started to cause a spike in SBOM
ingestion errors. To fix this, we're going to raise the max size to
1024, or twice the limit of what we would get if the container image
used the longest image name and tag supported by the GitLab container
registry.

Fix https://gitlab.com/gitlab-org/gitlab/-/issues/440705

Changelog: fixed
---
 ...se_sbom_occurrence_input_file_name_limit.rb | 18 ++++++++++++++++++
 db/schema_migrations/20240206210111            |  1 +
 db/structure.sql                               |  2 +-
 ee/app/models/sbom/occurrence.rb               |  2 +-
 ee/spec/models/sbom/occurrence_spec.rb         |  2 +-
 5 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 db/migrate/20240206210111_increase_sbom_occurrence_input_file_name_limit.rb
 create mode 100644 db/schema_migrations/20240206210111

diff --git a/db/migrate/20240206210111_increase_sbom_occurrence_input_file_name_limit.rb b/db/migrate/20240206210111_increase_sbom_occurrence_input_file_name_limit.rb
new file mode 100644
index 000000000000..83eb0bf1ff9f
--- /dev/null
+++ b/db/migrate/20240206210111_increase_sbom_occurrence_input_file_name_limit.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+class IncreaseSbomOccurrenceInputFileNameLimit < Gitlab::Database::Migration[2.2]
+  disable_ddl_transaction!
+
+  milestone '16.9'
+
+  def up
+    add_text_limit(:sbom_occurrences, :input_file_path, 1024,
+      constraint_name: check_constraint_name(:sbom_occurrences, :input_file_path, 'max_length_1KiB'))
+    remove_text_limit :sbom_occurrences, :input_file_path,
+      constraint_name: check_constraint_name(:sbom_occurrences, :input_file_path, 'max_length')
+  end
+
+  def down
+    # no-op: Danger of failing if there are records with length(input_file_path) > 255
+  end
+end
diff --git a/db/schema_migrations/20240206210111 b/db/schema_migrations/20240206210111
new file mode 100644
index 000000000000..f7492bdb773f
--- /dev/null
+++ b/db/schema_migrations/20240206210111
@@ -0,0 +1 @@
+86e7dc8cce7fee1b9b4a2492bcb7db181c480d347b8490120e2d5337da90daa6
\ No newline at end of file
diff --git a/db/structure.sql b/db/structure.sql
index c3e70f72ddfc..5f4af450b8fe 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -23655,7 +23655,7 @@ CREATE TABLE sbom_occurrences (
     traversal_ids bigint[] DEFAULT '{}'::bigint[] NOT NULL,
     CONSTRAINT check_3f2d2c7ffc CHECK ((char_length(package_manager) <= 255)),
     CONSTRAINT check_9b29021fa8 CHECK ((char_length(component_name) <= 255)),
-    CONSTRAINT check_bd1367d4c1 CHECK ((char_length(input_file_path) <= 255))
+    CONSTRAINT check_e6b8437cfe CHECK ((char_length(input_file_path) <= 1024))
 );
 
 CREATE SEQUENCE sbom_occurrences_id_seq
diff --git a/ee/app/models/sbom/occurrence.rb b/ee/app/models/sbom/occurrence.rb
index bea7ebc57aa0..42d463e9b51e 100644
--- a/ee/app/models/sbom/occurrence.rb
+++ b/ee/app/models/sbom/occurrence.rb
@@ -29,7 +29,7 @@ class Occurrence < ApplicationRecord
     validates :uuid, presence: true, uniqueness: { case_sensitive: false }
     validates :package_manager, length: { maximum: 255 }
     validates :component_name, length: { maximum: 255 }
-    validates :input_file_path, length: { maximum: 255 }
+    validates :input_file_path, length: { maximum: 1024 }
     validates :licenses, json_schema: { filename: 'sbom_occurrences-licenses' }
 
     delegate :name, to: :component
diff --git a/ee/spec/models/sbom/occurrence_spec.rb b/ee/spec/models/sbom/occurrence_spec.rb
index 27499eb5c0e7..d0fa248ea478 100644
--- a/ee/spec/models/sbom/occurrence_spec.rb
+++ b/ee/spec/models/sbom/occurrence_spec.rb
@@ -31,7 +31,7 @@
     it { is_expected.to validate_uniqueness_of(:uuid).case_insensitive }
     it { is_expected.to validate_length_of(:package_manager).is_at_most(255) }
     it { is_expected.to validate_length_of(:component_name).is_at_most(255) }
-    it { is_expected.to validate_length_of(:input_file_path).is_at_most(255) }
+    it { is_expected.to validate_length_of(:input_file_path).is_at_most(1024) }
 
     describe '#licenses' do
       subject { build(:sbom_occurrence, licenses: licenses) }
-- 
GitLab