From 34849d27534b4d6655a71b8faac263e511e049bb Mon Sep 17 00:00:00 2001
From: Shubham Kumar <shukumar@gitlab.com>
Date: Tue, 1 Oct 2024 15:26:47 +0000
Subject: [PATCH] Add and backfill project_id for packages_nuget_metadata

## What does this MR do and why?

Add and backfill project_id for packages_nuget_metadata.

This table has a
[desired sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#define-a-desired_sharding_key-to-automatically-backfill-a-sharding_key)
configured ([view configuration](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/packages_nuget_metadata.yml)).

This merge request is the first step towards transforming the desired sharding key into a
[sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#defining-a-sharding-key-for-all-cell-local-tables).

This involves three changes:

- Adding a new column that will serve as the sharding key (along with the relevant index and foreign key).
- Populating the sharding key when new records are created by adding a database function and trigger.
- Scheduling a [batched background migration](https://docs.gitlab.com/ee/development/database/batched_background_migrations.html)
  to set the sharding key for existing records.

Once the background migration has completed, a second merge request will be created to finalize the background
migration and validate the not null constraint.

## How to verify

We have assigned a random backend engineer from ~"group::package registry" to review these changes. Please review this merge
request from a ~backend perspective. The main thing we are looking to verify is that the added column and association
match the values specified by the [desired sharding key](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/packages_nuget_metadata.yml)
configuration and that backfilling the column from this other table makes sense in the context of this feature.

When you are finished, please:

1. Trigger the [database testing pipeline](https://docs.gitlab.com/ee/development/database/database_migration_pipeline.html)
   as instructed by Danger.
1. Request a review from the ~backend maintainer and ~database reviewer suggested by Danger.

If you have any questions or concerns, reach out to `@tigerwnz` or @shubhamkrai.

This merge request was generated by a once off keep implemented in
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/143774

This change was generated by
[gitlab-housekeeper](https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/gitlab-housekeeper)
using the Keeps::BackfillDesiredShardingKeySmallTable keep.

To provide feedback on your experience with `gitlab-housekeeper` please create an issue with the
label ~"GitLab Housekeeper" and consider pinging the author of this keep.

Changelog: other
---
 ...ill_packages_nuget_metadata_project_id.yml |  9 +++++
 db/docs/packages_nuget_metadata.yml           |  1 +
 ...d_project_id_to_packages_nuget_metadata.rb |  9 +++++
 ...x_packages_nuget_metadata_on_project_id.rb | 16 ++++++++
 ...d_packages_nuget_metadata_project_id_fk.rb | 16 ++++++++
 ...kages_nuget_metadata_project_id_trigger.rb | 25 ++++++++++++
 ...fill_packages_nuget_metadata_project_id.rb | 40 +++++++++++++++++++
 db/schema_migrations/20240930121132           |  1 +
 db/schema_migrations/20240930121133           |  1 +
 db/schema_migrations/20240930121134           |  1 +
 db/schema_migrations/20240930121135           |  1 +
 db/schema_migrations/20240930121136           |  1 +
 db/structure.sql                              | 24 +++++++++++
 ...fill_packages_nuget_metadata_project_id.rb | 10 +++++
 ...packages_nuget_metadata_project_id_spec.rb | 16 ++++++++
 ...packages_nuget_metadata_project_id_spec.rb | 33 +++++++++++++++
 16 files changed, 204 insertions(+)
 create mode 100644 db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml
 create mode 100644 db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb
 create mode 100644 db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb
 create mode 100644 db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb
 create mode 100644 db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb
 create mode 100644 db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb
 create mode 100644 db/schema_migrations/20240930121132
 create mode 100644 db/schema_migrations/20240930121133
 create mode 100644 db/schema_migrations/20240930121134
 create mode 100644 db/schema_migrations/20240930121135
 create mode 100644 db/schema_migrations/20240930121136
 create mode 100644 lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb
 create mode 100644 spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb
 create mode 100644 spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb

diff --git a/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml b/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml
new file mode 100644
index 000000000000..5b38828db285
--- /dev/null
+++ b/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml
@@ -0,0 +1,9 @@
+---
+migration_job_name: BackfillPackagesNugetMetadataProjectId
+description: Backfills sharding key `packages_nuget_metadata.project_id` from `packages_packages`.
+feature_category: package_registry
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/167607
+milestone: '17.5'
+queued_migration_version: 20240930121136
+finalize_after: '2024-10-22'
+finalized_by: # version of the migration that finalized this BBM
diff --git a/db/docs/packages_nuget_metadata.yml b/db/docs/packages_nuget_metadata.yml
index 639539e9b135..28f64be60419 100644
--- a/db/docs/packages_nuget_metadata.yml
+++ b/db/docs/packages_nuget_metadata.yml
@@ -19,3 +19,4 @@ desired_sharding_key:
         table: packages_packages
         sharding_key: project_id
         belongs_to: package
+desired_sharding_key_migration_job_name: BackfillPackagesNugetMetadataProjectId
diff --git a/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb b/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb
new file mode 100644
index 000000000000..4df4ef3d197e
--- /dev/null
+++ b/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class AddProjectIdToPackagesNugetMetadata < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+
+  def change
+    add_column :packages_nuget_metadata, :project_id, :bigint
+  end
+end
diff --git a/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb b/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb
new file mode 100644
index 000000000000..8d51bad26973
--- /dev/null
+++ b/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class IndexPackagesNugetMetadataOnProjectId < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  disable_ddl_transaction!
+
+  INDEX_NAME = 'index_packages_nuget_metadata_on_project_id'
+
+  def up
+    add_concurrent_index :packages_nuget_metadata, :project_id, name: INDEX_NAME
+  end
+
+  def down
+    remove_concurrent_index_by_name :packages_nuget_metadata, INDEX_NAME
+  end
+end
diff --git a/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb b/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb
new file mode 100644
index 000000000000..d9c95cea180c
--- /dev/null
+++ b/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class AddPackagesNugetMetadataProjectIdFk < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  disable_ddl_transaction!
+
+  def up
+    add_concurrent_foreign_key :packages_nuget_metadata, :projects, column: :project_id, on_delete: :cascade
+  end
+
+  def down
+    with_lock_retries do
+      remove_foreign_key :packages_nuget_metadata, column: :project_id
+    end
+  end
+end
diff --git a/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb b/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb
new file mode 100644
index 000000000000..4f16cefec7a8
--- /dev/null
+++ b/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class AddPackagesNugetMetadataProjectIdTrigger < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+
+  def up
+    install_sharding_key_assignment_trigger(
+      table: :packages_nuget_metadata,
+      sharding_key: :project_id,
+      parent_table: :packages_packages,
+      parent_sharding_key: :project_id,
+      foreign_key: :package_id
+    )
+  end
+
+  def down
+    remove_sharding_key_assignment_trigger(
+      table: :packages_nuget_metadata,
+      sharding_key: :project_id,
+      parent_table: :packages_packages,
+      parent_sharding_key: :project_id,
+      foreign_key: :package_id
+    )
+  end
+end
diff --git a/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb b/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb
new file mode 100644
index 000000000000..c055bee7b221
--- /dev/null
+++ b/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+class QueueBackfillPackagesNugetMetadataProjectId < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  restrict_gitlab_migration gitlab_schema: :gitlab_main_cell
+
+  MIGRATION = "BackfillPackagesNugetMetadataProjectId"
+  DELAY_INTERVAL = 2.minutes
+  BATCH_SIZE = 1000
+  SUB_BATCH_SIZE = 100
+
+  def up
+    queue_batched_background_migration(
+      MIGRATION,
+      :packages_nuget_metadata,
+      :package_id,
+      :project_id,
+      :packages_packages,
+      :project_id,
+      :package_id,
+      job_interval: DELAY_INTERVAL,
+      batch_size: BATCH_SIZE,
+      sub_batch_size: SUB_BATCH_SIZE
+    )
+  end
+
+  def down
+    delete_batched_background_migration(
+      MIGRATION,
+      :packages_nuget_metadata,
+      :package_id,
+      [
+        :project_id,
+        :packages_packages,
+        :project_id,
+        :package_id
+      ]
+    )
+  end
+end
diff --git a/db/schema_migrations/20240930121132 b/db/schema_migrations/20240930121132
new file mode 100644
index 000000000000..5bd4f69344ed
--- /dev/null
+++ b/db/schema_migrations/20240930121132
@@ -0,0 +1 @@
+1f8ef02007a199d46b542de398345c66d5106d18c72dd5cc18f162a4a17fa315
\ No newline at end of file
diff --git a/db/schema_migrations/20240930121133 b/db/schema_migrations/20240930121133
new file mode 100644
index 000000000000..7e054400244f
--- /dev/null
+++ b/db/schema_migrations/20240930121133
@@ -0,0 +1 @@
+0062f4cb57d36d99b9e647419d874b02f4ae135944f73f6f7bc96d38498a7374
\ No newline at end of file
diff --git a/db/schema_migrations/20240930121134 b/db/schema_migrations/20240930121134
new file mode 100644
index 000000000000..51f582836cfd
--- /dev/null
+++ b/db/schema_migrations/20240930121134
@@ -0,0 +1 @@
+704079bebfa5cc60e7f5eb3e6e50d823af9f68acc9b518da0df1b10b33398462
\ No newline at end of file
diff --git a/db/schema_migrations/20240930121135 b/db/schema_migrations/20240930121135
new file mode 100644
index 000000000000..9e7646f635c9
--- /dev/null
+++ b/db/schema_migrations/20240930121135
@@ -0,0 +1 @@
+63f9db5b418f528a9356396c958ca6d56453b72b6e01efd10b85a30cee58487c
\ No newline at end of file
diff --git a/db/schema_migrations/20240930121136 b/db/schema_migrations/20240930121136
new file mode 100644
index 000000000000..409f03218cdb
--- /dev/null
+++ b/db/schema_migrations/20240930121136
@@ -0,0 +1 @@
+39d969b1f1b2edf41be73346a1b89a9c313272eebbd483cffc0b0328e7849f76
\ No newline at end of file
diff --git a/db/structure.sql b/db/structure.sql
index a6add27da228..24c101e6e6e1 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -905,6 +905,22 @@ RETURN NEW;
 END
 $$;
 
+CREATE FUNCTION trigger_14a39509be0a() RETURNS trigger
+    LANGUAGE plpgsql
+    AS $$
+BEGIN
+IF NEW."project_id" IS NULL THEN
+  SELECT "project_id"
+  INTO NEW."project_id"
+  FROM "packages_packages"
+  WHERE "packages_packages"."id" = NEW."package_id";
+END IF;
+
+RETURN NEW;
+
+END
+$$;
+
 CREATE FUNCTION trigger_158ac875f254() RETURNS trigger
     LANGUAGE plpgsql
     AS $$
@@ -15276,6 +15292,7 @@ CREATE TABLE packages_nuget_metadata (
     authors text,
     description text,
     normalized_version text,
+    project_id bigint,
     CONSTRAINT check_9973c0cc33 CHECK ((char_length(normalized_version) <= 255)),
     CONSTRAINT check_d39a5fe9ee CHECK ((char_length(description) <= 4000)),
     CONSTRAINT check_e2fc129ebd CHECK ((char_length(authors) <= 255)),
@@ -29828,6 +29845,8 @@ CREATE INDEX index_packages_npm_metadata_caches_on_project_id_status ON packages
 
 CREATE INDEX index_packages_nuget_dl_metadata_on_dependency_link_id ON packages_nuget_dependency_link_metadata USING btree (dependency_link_id);
 
+CREATE INDEX index_packages_nuget_metadata_on_project_id ON packages_nuget_metadata USING btree (project_id);
+
 CREATE UNIQUE INDEX index_packages_nuget_symbols_on_object_storage_key ON packages_nuget_symbols USING btree (object_storage_key);
 
 CREATE INDEX index_packages_nuget_symbols_on_package_id ON packages_nuget_symbols USING btree (package_id);
@@ -33258,6 +33277,8 @@ CREATE TRIGGER trigger_0f38e5af9adf BEFORE INSERT OR UPDATE ON ml_candidate_para
 
 CREATE TRIGGER trigger_13d4aa8fe3dd BEFORE INSERT OR UPDATE ON draft_notes FOR EACH ROW EXECUTE FUNCTION trigger_13d4aa8fe3dd();
 
+CREATE TRIGGER trigger_14a39509be0a BEFORE INSERT OR UPDATE ON packages_nuget_metadata FOR EACH ROW EXECUTE FUNCTION trigger_14a39509be0a();
+
 CREATE TRIGGER trigger_158ac875f254 BEFORE INSERT OR UPDATE ON approval_group_rules_users FOR EACH ROW EXECUTE FUNCTION trigger_158ac875f254();
 
 CREATE TRIGGER trigger_174b23fa3dfb BEFORE INSERT OR UPDATE ON approval_project_rules_users FOR EACH ROW EXECUTE FUNCTION trigger_174b23fa3dfb();
@@ -33710,6 +33731,9 @@ ALTER TABLE ONLY coverage_fuzzing_corpuses
 ALTER TABLE ONLY namespace_settings
     ADD CONSTRAINT fk_20cf0eb2f9 FOREIGN KEY (default_compliance_framework_id) REFERENCES compliance_management_frameworks(id) ON DELETE SET NULL;
 
+ALTER TABLE ONLY packages_nuget_metadata
+    ADD CONSTRAINT fk_21569c0856 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
+
 ALTER TABLE p_ci_build_trace_metadata
     ADD CONSTRAINT fk_21d25cac1a_p FOREIGN KEY (partition_id, trace_artifact_id) REFERENCES p_ci_job_artifacts(partition_id, id) ON UPDATE CASCADE ON DELETE CASCADE;
 
diff --git a/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb b/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb
new file mode 100644
index 000000000000..3f42b6498d66
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    class BackfillPackagesNugetMetadataProjectId < BackfillDesiredShardingKeyJob
+      operation_name :backfill_packages_nuget_metadata_project_id
+      feature_category :package_registry
+    end
+  end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb b/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb
new file mode 100644
index 000000000000..69374f9cfc76
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::BackgroundMigration::BackfillPackagesNugetMetadataProjectId,
+  feature_category: :package_registry,
+  schema: 20240930121132 do
+  include_examples 'desired sharding key backfill job' do
+    let(:batch_table) { :packages_nuget_metadata }
+    let(:batch_column) { :package_id }
+    let(:backfill_column) { :project_id }
+    let(:backfill_via_table) { :packages_packages }
+    let(:backfill_via_column) { :project_id }
+    let(:backfill_via_foreign_key) { :package_id }
+  end
+end
diff --git a/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb b/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb
new file mode 100644
index 000000000000..a2db34efd43b
--- /dev/null
+++ b/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require_migration!
+
+RSpec.describe QueueBackfillPackagesNugetMetadataProjectId, feature_category: :package_registry do
+  let!(:batched_migration) { described_class::MIGRATION }
+
+  it 'schedules a new batched migration' do
+    reversible_migration do |migration|
+      migration.before -> {
+        expect(batched_migration).not_to have_scheduled_batched_migration
+      }
+
+      migration.after -> {
+        expect(batched_migration).to have_scheduled_batched_migration(
+          table_name: :packages_nuget_metadata,
+          column_name: :package_id,
+          interval: described_class::DELAY_INTERVAL,
+          batch_size: described_class::BATCH_SIZE,
+          sub_batch_size: described_class::SUB_BATCH_SIZE,
+          gitlab_schema: :gitlab_main_cell,
+          job_arguments: [
+            :project_id,
+            :packages_packages,
+            :project_id,
+            :package_id
+          ]
+        )
+      }
+    end
+  end
+end
-- 
GitLab