diff --git a/db/docs/batched_background_migrations/update_sbom_occurrences_component_name_based_on_pep503.yml b/db/docs/batched_background_migrations/update_sbom_occurrences_component_name_based_on_pep503.yml new file mode 100644 index 0000000000000000000000000000000000000000..967f51069fdc87abd5cfe3ee2bb6424e5baa345e --- /dev/null +++ b/db/docs/batched_background_migrations/update_sbom_occurrences_component_name_based_on_pep503.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: UpdateSbomOccurrencesComponentNameBasedOnPep503 +description: Updates sbom_occurrences.component_name in accordance with PEP503 +feature_category: software_composition_analysis +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/146776 +milestone: '16.11' +queued_migration_version: 20240306120522 +finalize_after: '2024-03-24' diff --git a/db/post_migrate/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503.rb b/db/post_migrate/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503.rb new file mode 100644 index 0000000000000000000000000000000000000000..3e76bf7d34351b84895f5ad05dc8db168a4cc81a --- /dev/null +++ b/db/post_migrate/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class QueueUpdateSbomOccurrencesComponentNameBasedOnPep503 < Gitlab::Database::Migration[2.2] + milestone '16.11' + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + MIGRATION = "UpdateSbomOccurrencesComponentNameBasedOnPep503" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 2000 + SUB_BATCH_SIZE = 200 + + def up + queue_batched_background_migration( + MIGRATION, + :sbom_occurrences, + :id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :sbom_occurrences, :id, []) + end +end diff --git a/db/schema_migrations/20240306120522 b/db/schema_migrations/20240306120522 new file mode 100644 index 0000000000000000000000000000000000000000..975ba2e88168dd984b81c2c0cf0033c62c82d310 --- /dev/null +++ b/db/schema_migrations/20240306120522 @@ -0,0 +1 @@ +2645479cd4ce377b4183c1fade37050a2b143d9697f341c690483a11a202bde4 \ No newline at end of file diff --git a/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503.rb b/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503.rb new file mode 100644 index 0000000000000000000000000000000000000000..852663faeb5b607ed81d48d484c1d4aa6c3fab31 --- /dev/null +++ b/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class UpdateSbomOccurrencesComponentNameBasedOnPep503 < BatchedMigrationJob + operation_name :update_occurrence_component_name_based_on_pep_503 + feature_category :software_composition_analysis + + def perform + each_sub_batch do |sub_batch| + update_occurrence_component_name(sub_batch) + end + end + + private + + def normalized_name(name) + connection.quote(name.gsub(Sbom::PackageUrl::Normalizer::PYPI_REGEX, '-')) + end + + def update_occurrence_component_name(batch) + occurrences = batch + .joins("INNER JOIN sbom_components ON sbom_occurrences.component_id = sbom_components.id") + .where("sbom_components.purl_type = 8 AND sbom_occurrences.component_name LIKE '%.%'") + + return if occurrences.blank? + + values_list = occurrences.map do |occurrence| + "(#{occurrence.id}, #{normalized_name(occurrence.component_name)})" + end.join(", ") + + sql = <<~SQL + WITH new_values (id, component_name) AS ( + VALUES + #{values_list} + ) + UPDATE sbom_occurrences + SET component_name = new_values.component_name + FROM new_values + WHERE sbom_occurrences.id = new_values.id + SQL + + connection.execute(sql) + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503_spec.rb b/spec/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..299baca54bdbe96c66e553fe64843e4162ff86ff --- /dev/null +++ b/spec/lib/gitlab/background_migration/update_sbom_occurrences_component_name_based_on_pep503_spec.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::UpdateSbomOccurrencesComponentNameBasedOnPep503, feature_category: :software_composition_analysis do + let(:occurrences) { table(:sbom_occurrences) } + let(:components) { table(:sbom_components) } + let(:projects) { table(:projects) } + let(:namespaces) { table(:namespaces) } + let(:namespace) { namespaces.create!(name: 'name', path: 'path') } + let(:project) { projects.create!(namespace_id: namespace.id, project_namespace_id: namespace.id) } + + describe '#perform' do + subject(:perform_migration) do + described_class.new( + start_id: occurrences.first.id, + end_id: occurrences.last.id, + batch_table: :sbom_occurrences, + batch_column: :id, + sub_batch_size: occurrences.count, + pause_ms: 0, + connection: ActiveRecord::Base.connection + ).perform + end + + context 'without data' do + before do + component = components.create!(name: 'azure', purl_type: 8, component_type: 0) + occurrences.create!(project_id: project.id, component_id: component.id, commit_sha: 'commit_sha', + uuid: SecureRandom.uuid, component_name: 'azure') + end + + it 'does not raise exception' do + expect { perform_migration }.not_to raise_error + end + end + + context 'with data' do + before do + %w[aws-cdk.region-info azure.identity backports.cached-property backports.csv].each do |input_name| + component = components.create!(name: input_name, purl_type: 8, component_type: 0) + occurrences.create!(project_id: project.id, component_id: component.id, commit_sha: 'commit_sha', + uuid: SecureRandom.uuid, component_name: input_name) + end + end + + let(:expected_names) { %w[aws-cdk-region-info azure-identity backports-cached-property backports-csv] } + + it 'successfully updates name according to PEP 0503' do + perform_migration + + expect(occurrences.pluck(:component_name)).to eq(expected_names) + end + + context 'with unrelated components' do + let(:component_name) { 'unrelated.component' } + let(:unrelated_component) { components.create!(name: component_name, purl_type: 6, component_type: 0) } + let!(:unrelated_occurrence) do + occurrences.create!(project_id: project.id, component_id: unrelated_component.id, commit_sha: 'commit_sha', + uuid: SecureRandom.uuid, component_name: component_name) + end + + it 'does not update the unrelated occurrence' do + expect { perform_migration }.not_to change { unrelated_occurrence.reload.component_name } + end + end + end + end +end diff --git a/spec/migrations/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503_spec.rb b/spec/migrations/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..1e42d66090c422c8c7054299bafdc83ec0c3afbb --- /dev/null +++ b/spec/migrations/20240306120522_queue_update_sbom_occurrences_component_name_based_on_pep503_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueUpdateSbomOccurrencesComponentNameBasedOnPep503, feature_category: :software_composition_analysis do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :sbom_occurrences, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end