From 7a1f0960202ff3fbc0ef71946e8e32b92b4127c1 Mon Sep 17 00:00:00 2001
From: Shubham Kumar <shukumar@gitlab.com>
Date: Tue, 1 Oct 2024 12:00:36 +0000
Subject: [PATCH] Add and backfill project_id for pages_deployment_states

## What does this MR do and why?

Add and backfill project_id for pages_deployment_states.

This table has a
[desired sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#define-a-desired_sharding_key-to-automatically-backfill-a-sharding_key)
configured ([view configuration](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/pages_deployment_states.yml)).

This merge request is the first step towards transforming the desired sharding key into a
[sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#defining-a-sharding-key-for-all-cell-local-tables).

This involves three changes:

- Adding a new column that will serve as the sharding key (along with the relevant index and foreign key).
- Populating the sharding key when new records are created by adding a database function and trigger.
- Scheduling a [batched background migration](https://docs.gitlab.com/ee/development/database/batched_background_migrations.html)
  to set the sharding key for existing records.

Once the background migration has completed, a second merge request will be created to finalize the background
migration and validate the not null constraint.

## How to verify

We have assigned a random backend engineer from ~"group::knowledge" to review these changes. Please review this merge
request from a ~backend perspective. The main thing we are looking to verify is that the added column and association
match the values specified by the [desired sharding key](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/pages_deployment_states.yml)
configuration and that backfilling the column from this other table makes sense in the context of this feature.

When you are finished, please:

1. Trigger the [database testing pipeline](https://docs.gitlab.com/ee/development/database/database_migration_pipeline.html)
   as instructed by Danger.
1. Request a review from the ~backend maintainer and ~database reviewer suggested by Danger.

If you have any questions or concerns, reach out to `@tigerwnz` or @shubhamkrai.

This merge request was generated by a once off keep implemented in
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/143774

This change was generated by
[gitlab-housekeeper](https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/gitlab-housekeeper)
using the Keeps::BackfillDesiredShardingKeySmallTable keep.

To provide feedback on your experience with `gitlab-housekeeper` please create an issue with the
label ~"GitLab Housekeeper" and consider pinging the author of this keep.

Changelog: other
---
 ...ill_pages_deployment_states_project_id.yml |  9 +++++
 db/docs/pages_deployment_states.yml           |  1 +
 ...d_project_id_to_pages_deployment_states.rb |  9 +++++
 ...x_pages_deployment_states_on_project_id.rb | 16 ++++++++
 ...d_pages_deployment_states_project_id_fk.rb | 16 ++++++++
 ...es_deployment_states_project_id_trigger.rb | 25 ++++++++++++
 ...fill_pages_deployment_states_project_id.rb | 40 +++++++++++++++++++
 db/schema_migrations/20240930123052           |  1 +
 db/schema_migrations/20240930123053           |  1 +
 db/schema_migrations/20240930123054           |  1 +
 db/schema_migrations/20240930123055           |  1 +
 db/schema_migrations/20240930123056           |  1 +
 db/structure.sql                              | 24 +++++++++++
 ...fill_pages_deployment_states_project_id.rb | 10 +++++
 ...pages_deployment_states_project_id_spec.rb | 16 ++++++++
 ...pages_deployment_states_project_id_spec.rb | 33 +++++++++++++++
 16 files changed, 204 insertions(+)
 create mode 100644 db/docs/batched_background_migrations/backfill_pages_deployment_states_project_id.yml
 create mode 100644 db/migrate/20240930123052_add_project_id_to_pages_deployment_states.rb
 create mode 100644 db/post_migrate/20240930123053_index_pages_deployment_states_on_project_id.rb
 create mode 100644 db/post_migrate/20240930123054_add_pages_deployment_states_project_id_fk.rb
 create mode 100644 db/post_migrate/20240930123055_add_pages_deployment_states_project_id_trigger.rb
 create mode 100644 db/post_migrate/20240930123056_queue_backfill_pages_deployment_states_project_id.rb
 create mode 100644 db/schema_migrations/20240930123052
 create mode 100644 db/schema_migrations/20240930123053
 create mode 100644 db/schema_migrations/20240930123054
 create mode 100644 db/schema_migrations/20240930123055
 create mode 100644 db/schema_migrations/20240930123056
 create mode 100644 lib/gitlab/background_migration/backfill_pages_deployment_states_project_id.rb
 create mode 100644 spec/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id_spec.rb
 create mode 100644 spec/migrations/20240930123056_queue_backfill_pages_deployment_states_project_id_spec.rb

diff --git a/db/docs/batched_background_migrations/backfill_pages_deployment_states_project_id.yml b/db/docs/batched_background_migrations/backfill_pages_deployment_states_project_id.yml
new file mode 100644
index 0000000000000..8202b2c04ff4d
--- /dev/null
+++ b/db/docs/batched_background_migrations/backfill_pages_deployment_states_project_id.yml
@@ -0,0 +1,9 @@
+---
+migration_job_name: BackfillPagesDeploymentStatesProjectId
+description: Backfills sharding key `pages_deployment_states.project_id` from `pages_deployments`.
+feature_category: pages
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/167613
+milestone: '17.5'
+queued_migration_version: 20240930123056
+finalize_after: '2024-10-22'
+finalized_by: # version of the migration that finalized this BBM
diff --git a/db/docs/pages_deployment_states.yml b/db/docs/pages_deployment_states.yml
index e37aa7330ce3a..18857bb4a5505 100644
--- a/db/docs/pages_deployment_states.yml
+++ b/db/docs/pages_deployment_states.yml
@@ -19,3 +19,4 @@ desired_sharding_key:
         table: pages_deployments
         sharding_key: project_id
         belongs_to: pages_deployment
+desired_sharding_key_migration_job_name: BackfillPagesDeploymentStatesProjectId
diff --git a/db/migrate/20240930123052_add_project_id_to_pages_deployment_states.rb b/db/migrate/20240930123052_add_project_id_to_pages_deployment_states.rb
new file mode 100644
index 0000000000000..d6cb46bd12386
--- /dev/null
+++ b/db/migrate/20240930123052_add_project_id_to_pages_deployment_states.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class AddProjectIdToPagesDeploymentStates < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+
+  def change
+    add_column :pages_deployment_states, :project_id, :bigint
+  end
+end
diff --git a/db/post_migrate/20240930123053_index_pages_deployment_states_on_project_id.rb b/db/post_migrate/20240930123053_index_pages_deployment_states_on_project_id.rb
new file mode 100644
index 0000000000000..3736d5320fb12
--- /dev/null
+++ b/db/post_migrate/20240930123053_index_pages_deployment_states_on_project_id.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class IndexPagesDeploymentStatesOnProjectId < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  disable_ddl_transaction!
+
+  INDEX_NAME = 'index_pages_deployment_states_on_project_id'
+
+  def up
+    add_concurrent_index :pages_deployment_states, :project_id, name: INDEX_NAME
+  end
+
+  def down
+    remove_concurrent_index_by_name :pages_deployment_states, INDEX_NAME
+  end
+end
diff --git a/db/post_migrate/20240930123054_add_pages_deployment_states_project_id_fk.rb b/db/post_migrate/20240930123054_add_pages_deployment_states_project_id_fk.rb
new file mode 100644
index 0000000000000..8a3f897be9a94
--- /dev/null
+++ b/db/post_migrate/20240930123054_add_pages_deployment_states_project_id_fk.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class AddPagesDeploymentStatesProjectIdFk < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  disable_ddl_transaction!
+
+  def up
+    add_concurrent_foreign_key :pages_deployment_states, :projects, column: :project_id, on_delete: :cascade
+  end
+
+  def down
+    with_lock_retries do
+      remove_foreign_key :pages_deployment_states, column: :project_id
+    end
+  end
+end
diff --git a/db/post_migrate/20240930123055_add_pages_deployment_states_project_id_trigger.rb b/db/post_migrate/20240930123055_add_pages_deployment_states_project_id_trigger.rb
new file mode 100644
index 0000000000000..b1e541b08409f
--- /dev/null
+++ b/db/post_migrate/20240930123055_add_pages_deployment_states_project_id_trigger.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class AddPagesDeploymentStatesProjectIdTrigger < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+
+  def up
+    install_sharding_key_assignment_trigger(
+      table: :pages_deployment_states,
+      sharding_key: :project_id,
+      parent_table: :pages_deployments,
+      parent_sharding_key: :project_id,
+      foreign_key: :pages_deployment_id
+    )
+  end
+
+  def down
+    remove_sharding_key_assignment_trigger(
+      table: :pages_deployment_states,
+      sharding_key: :project_id,
+      parent_table: :pages_deployments,
+      parent_sharding_key: :project_id,
+      foreign_key: :pages_deployment_id
+    )
+  end
+end
diff --git a/db/post_migrate/20240930123056_queue_backfill_pages_deployment_states_project_id.rb b/db/post_migrate/20240930123056_queue_backfill_pages_deployment_states_project_id.rb
new file mode 100644
index 0000000000000..4d8f9152f4d28
--- /dev/null
+++ b/db/post_migrate/20240930123056_queue_backfill_pages_deployment_states_project_id.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+class QueueBackfillPagesDeploymentStatesProjectId < Gitlab::Database::Migration[2.2]
+  milestone '17.5'
+  restrict_gitlab_migration gitlab_schema: :gitlab_main_cell
+
+  MIGRATION = "BackfillPagesDeploymentStatesProjectId"
+  DELAY_INTERVAL = 2.minutes
+  BATCH_SIZE = 1000
+  SUB_BATCH_SIZE = 100
+
+  def up
+    queue_batched_background_migration(
+      MIGRATION,
+      :pages_deployment_states,
+      :pages_deployment_id,
+      :project_id,
+      :pages_deployments,
+      :project_id,
+      :pages_deployment_id,
+      job_interval: DELAY_INTERVAL,
+      batch_size: BATCH_SIZE,
+      sub_batch_size: SUB_BATCH_SIZE
+    )
+  end
+
+  def down
+    delete_batched_background_migration(
+      MIGRATION,
+      :pages_deployment_states,
+      :pages_deployment_id,
+      [
+        :project_id,
+        :pages_deployments,
+        :project_id,
+        :pages_deployment_id
+      ]
+    )
+  end
+end
diff --git a/db/schema_migrations/20240930123052 b/db/schema_migrations/20240930123052
new file mode 100644
index 0000000000000..606f058dd4bc4
--- /dev/null
+++ b/db/schema_migrations/20240930123052
@@ -0,0 +1 @@
+06fcf8b77415a84dfd30254e2ab3d220cb2bb92cdf1960cb50281f262a5815e3
\ No newline at end of file
diff --git a/db/schema_migrations/20240930123053 b/db/schema_migrations/20240930123053
new file mode 100644
index 0000000000000..cd52872d977d8
--- /dev/null
+++ b/db/schema_migrations/20240930123053
@@ -0,0 +1 @@
+97f22306864d8c4fb739d4a2df80abfbeafb8ee2e14905b692ff97acd526be33
\ No newline at end of file
diff --git a/db/schema_migrations/20240930123054 b/db/schema_migrations/20240930123054
new file mode 100644
index 0000000000000..59a6e3a25e6d4
--- /dev/null
+++ b/db/schema_migrations/20240930123054
@@ -0,0 +1 @@
+8a6d023abf2b37d6099ee7e7373b3b153fcde822a850119c01cafc6590b12b8c
\ No newline at end of file
diff --git a/db/schema_migrations/20240930123055 b/db/schema_migrations/20240930123055
new file mode 100644
index 0000000000000..f3f026513bd68
--- /dev/null
+++ b/db/schema_migrations/20240930123055
@@ -0,0 +1 @@
+03bd5975e16db4f4e6e5ebfc7ac3b693c29fa5726cda318f1117c413fb05e230
\ No newline at end of file
diff --git a/db/schema_migrations/20240930123056 b/db/schema_migrations/20240930123056
new file mode 100644
index 0000000000000..26b3d01a483e7
--- /dev/null
+++ b/db/schema_migrations/20240930123056
@@ -0,0 +1 @@
+082d0e983d22c78baabb5615894db1c48f195eec081fe701b65b8d97816a5a33
\ No newline at end of file
diff --git a/db/structure.sql b/db/structure.sql
index c70a36970661e..af8df7490f7bf 100644
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -1577,6 +1577,22 @@ RETURN NEW;
 END
 $$;
 
+CREATE FUNCTION trigger_81b4c93e7133() RETURNS trigger
+    LANGUAGE plpgsql
+    AS $$
+BEGIN
+IF NEW."project_id" IS NULL THEN
+  SELECT "project_id"
+  INTO NEW."project_id"
+  FROM "pages_deployments"
+  WHERE "pages_deployments"."id" = NEW."pages_deployment_id";
+END IF;
+
+RETURN NEW;
+
+END
+$$;
+
 CREATE FUNCTION trigger_8204480b3a2e() RETURNS trigger
     LANGUAGE plpgsql
     AS $$
@@ -15574,6 +15590,7 @@ CREATE TABLE pages_deployment_states (
     verification_retry_count smallint,
     verification_checksum bytea,
     verification_failure text,
+    project_id bigint,
     CONSTRAINT check_15217e8c3a CHECK ((char_length(verification_failure) <= 255))
 );
 
@@ -29939,6 +29956,8 @@ CREATE INDEX index_pages_deployment_states_needs_verification ON pages_deploymen
 
 CREATE INDEX index_pages_deployment_states_on_pages_deployment_id ON pages_deployment_states USING btree (pages_deployment_id);
 
+CREATE INDEX index_pages_deployment_states_on_project_id ON pages_deployment_states USING btree (project_id);
+
 CREATE INDEX index_pages_deployment_states_on_verification_state ON pages_deployment_states USING btree (verification_state);
 
 CREATE INDEX index_pages_deployment_states_pending_verification ON pages_deployment_states USING btree (verified_at NULLS FIRST) WHERE (verification_state = 0);
@@ -33393,6 +33412,8 @@ CREATE TRIGGER trigger_7a8b08eed782 BEFORE INSERT OR UPDATE ON boards_epic_board
 
 CREATE TRIGGER trigger_7de792ddbc05 BEFORE INSERT OR UPDATE ON dast_site_validations FOR EACH ROW EXECUTE FUNCTION trigger_7de792ddbc05();
 
+CREATE TRIGGER trigger_81b4c93e7133 BEFORE INSERT OR UPDATE ON pages_deployment_states FOR EACH ROW EXECUTE FUNCTION trigger_81b4c93e7133();
+
 CREATE TRIGGER trigger_8204480b3a2e BEFORE INSERT OR UPDATE ON incident_management_escalation_rules FOR EACH ROW EXECUTE FUNCTION trigger_8204480b3a2e();
 
 CREATE TRIGGER trigger_84d67ad63e93 BEFORE INSERT OR UPDATE ON wiki_page_slugs FOR EACH ROW EXECUTE FUNCTION trigger_84d67ad63e93();
@@ -34318,6 +34339,9 @@ ALTER TABLE ONLY requirements
 ALTER TABLE ONLY catalog_resource_components
     ADD CONSTRAINT fk_85bb1d1e79 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
 
+ALTER TABLE ONLY pages_deployment_states
+    ADD CONSTRAINT fk_8610d3d1cc FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE;
+
 ALTER TABLE ONLY ci_build_pending_states
     ADD CONSTRAINT fk_861cd17da3_p FOREIGN KEY (partition_id, build_id) REFERENCES p_ci_builds(partition_id, id) ON UPDATE CASCADE ON DELETE CASCADE;
 
diff --git a/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id.rb b/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id.rb
new file mode 100644
index 0000000000000..5e523f757db23
--- /dev/null
+++ b/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module BackgroundMigration
+    class BackfillPagesDeploymentStatesProjectId < BackfillDesiredShardingKeyJob
+      operation_name :backfill_pages_deployment_states_project_id
+      feature_category :pages
+    end
+  end
+end
diff --git a/spec/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id_spec.rb b/spec/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id_spec.rb
new file mode 100644
index 0000000000000..57c0e7ffc3275
--- /dev/null
+++ b/spec/lib/gitlab/background_migration/backfill_pages_deployment_states_project_id_spec.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::BackgroundMigration::BackfillPagesDeploymentStatesProjectId,
+  feature_category: :pages,
+  schema: 20240930123052 do
+  include_examples 'desired sharding key backfill job' do
+    let(:batch_table) { :pages_deployment_states }
+    let(:batch_column) { :pages_deployment_id }
+    let(:backfill_column) { :project_id }
+    let(:backfill_via_table) { :pages_deployments }
+    let(:backfill_via_column) { :project_id }
+    let(:backfill_via_foreign_key) { :pages_deployment_id }
+  end
+end
diff --git a/spec/migrations/20240930123056_queue_backfill_pages_deployment_states_project_id_spec.rb b/spec/migrations/20240930123056_queue_backfill_pages_deployment_states_project_id_spec.rb
new file mode 100644
index 0000000000000..91fc5bcd5a675
--- /dev/null
+++ b/spec/migrations/20240930123056_queue_backfill_pages_deployment_states_project_id_spec.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require_migration!
+
+RSpec.describe QueueBackfillPagesDeploymentStatesProjectId, feature_category: :pages do
+  let!(:batched_migration) { described_class::MIGRATION }
+
+  it 'schedules a new batched migration' do
+    reversible_migration do |migration|
+      migration.before -> {
+        expect(batched_migration).not_to have_scheduled_batched_migration
+      }
+
+      migration.after -> {
+        expect(batched_migration).to have_scheduled_batched_migration(
+          table_name: :pages_deployment_states,
+          column_name: :pages_deployment_id,
+          interval: described_class::DELAY_INTERVAL,
+          batch_size: described_class::BATCH_SIZE,
+          sub_batch_size: described_class::SUB_BATCH_SIZE,
+          gitlab_schema: :gitlab_main_cell,
+          job_arguments: [
+            :project_id,
+            :pages_deployments,
+            :project_id,
+            :pages_deployment_id
+          ]
+        )
+      }
+    end
+  end
+end
-- 
GitLab