From 34849d27534b4d6655a71b8faac263e511e049bb Mon Sep 17 00:00:00 2001 From: Shubham Kumar <shukumar@gitlab.com> Date: Tue, 1 Oct 2024 15:26:47 +0000 Subject: [PATCH] Add and backfill project_id for packages_nuget_metadata ## What does this MR do and why? Add and backfill project_id for packages_nuget_metadata. This table has a [desired sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#define-a-desired_sharding_key-to-automatically-backfill-a-sharding_key) configured ([view configuration](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/packages_nuget_metadata.yml)). This merge request is the first step towards transforming the desired sharding key into a [sharding key](https://docs.gitlab.com/ee/development/database/multiple_databases.html#defining-a-sharding-key-for-all-cell-local-tables). This involves three changes: - Adding a new column that will serve as the sharding key (along with the relevant index and foreign key). - Populating the sharding key when new records are created by adding a database function and trigger. - Scheduling a [batched background migration](https://docs.gitlab.com/ee/development/database/batched_background_migrations.html) to set the sharding key for existing records. Once the background migration has completed, a second merge request will be created to finalize the background migration and validate the not null constraint. ## How to verify We have assigned a random backend engineer from ~"group::package registry" to review these changes. Please review this merge request from a ~backend perspective. The main thing we are looking to verify is that the added column and association match the values specified by the [desired sharding key](https://gitlab.com/gitlab-org/gitlab/-/blob/master/db/docs/packages_nuget_metadata.yml) configuration and that backfilling the column from this other table makes sense in the context of this feature. When you are finished, please: 1. Trigger the [database testing pipeline](https://docs.gitlab.com/ee/development/database/database_migration_pipeline.html) as instructed by Danger. 1. Request a review from the ~backend maintainer and ~database reviewer suggested by Danger. If you have any questions or concerns, reach out to `@tigerwnz` or @shubhamkrai. This merge request was generated by a once off keep implemented in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/143774 This change was generated by [gitlab-housekeeper](https://gitlab.com/gitlab-org/gitlab/-/tree/master/gems/gitlab-housekeeper) using the Keeps::BackfillDesiredShardingKeySmallTable keep. To provide feedback on your experience with `gitlab-housekeeper` please create an issue with the label ~"GitLab Housekeeper" and consider pinging the author of this keep. Changelog: other --- ...ill_packages_nuget_metadata_project_id.yml | 9 +++++ db/docs/packages_nuget_metadata.yml | 1 + ...d_project_id_to_packages_nuget_metadata.rb | 9 +++++ ...x_packages_nuget_metadata_on_project_id.rb | 16 ++++++++ ...d_packages_nuget_metadata_project_id_fk.rb | 16 ++++++++ ...kages_nuget_metadata_project_id_trigger.rb | 25 ++++++++++++ ...fill_packages_nuget_metadata_project_id.rb | 40 +++++++++++++++++++ db/schema_migrations/20240930121132 | 1 + db/schema_migrations/20240930121133 | 1 + db/schema_migrations/20240930121134 | 1 + db/schema_migrations/20240930121135 | 1 + db/schema_migrations/20240930121136 | 1 + db/structure.sql | 24 +++++++++++ ...fill_packages_nuget_metadata_project_id.rb | 10 +++++ ...packages_nuget_metadata_project_id_spec.rb | 16 ++++++++ ...packages_nuget_metadata_project_id_spec.rb | 33 +++++++++++++++ 16 files changed, 204 insertions(+) create mode 100644 db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml create mode 100644 db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb create mode 100644 db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb create mode 100644 db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb create mode 100644 db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb create mode 100644 db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb create mode 100644 db/schema_migrations/20240930121132 create mode 100644 db/schema_migrations/20240930121133 create mode 100644 db/schema_migrations/20240930121134 create mode 100644 db/schema_migrations/20240930121135 create mode 100644 db/schema_migrations/20240930121136 create mode 100644 lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb create mode 100644 spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb create mode 100644 spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb diff --git a/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml b/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml new file mode 100644 index 000000000000..5b38828db285 --- /dev/null +++ b/db/docs/batched_background_migrations/backfill_packages_nuget_metadata_project_id.yml @@ -0,0 +1,9 @@ +--- +migration_job_name: BackfillPackagesNugetMetadataProjectId +description: Backfills sharding key `packages_nuget_metadata.project_id` from `packages_packages`. +feature_category: package_registry +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/167607 +milestone: '17.5' +queued_migration_version: 20240930121136 +finalize_after: '2024-10-22' +finalized_by: # version of the migration that finalized this BBM diff --git a/db/docs/packages_nuget_metadata.yml b/db/docs/packages_nuget_metadata.yml index 639539e9b135..28f64be60419 100644 --- a/db/docs/packages_nuget_metadata.yml +++ b/db/docs/packages_nuget_metadata.yml @@ -19,3 +19,4 @@ desired_sharding_key: table: packages_packages sharding_key: project_id belongs_to: package +desired_sharding_key_migration_job_name: BackfillPackagesNugetMetadataProjectId diff --git a/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb b/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb new file mode 100644 index 000000000000..4df4ef3d197e --- /dev/null +++ b/db/migrate/20240930121132_add_project_id_to_packages_nuget_metadata.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class AddProjectIdToPackagesNugetMetadata < Gitlab::Database::Migration[2.2] + milestone '17.5' + + def change + add_column :packages_nuget_metadata, :project_id, :bigint + end +end diff --git a/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb b/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb new file mode 100644 index 000000000000..8d51bad26973 --- /dev/null +++ b/db/post_migrate/20240930121133_index_packages_nuget_metadata_on_project_id.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class IndexPackagesNugetMetadataOnProjectId < Gitlab::Database::Migration[2.2] + milestone '17.5' + disable_ddl_transaction! + + INDEX_NAME = 'index_packages_nuget_metadata_on_project_id' + + def up + add_concurrent_index :packages_nuget_metadata, :project_id, name: INDEX_NAME + end + + def down + remove_concurrent_index_by_name :packages_nuget_metadata, INDEX_NAME + end +end diff --git a/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb b/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb new file mode 100644 index 000000000000..d9c95cea180c --- /dev/null +++ b/db/post_migrate/20240930121134_add_packages_nuget_metadata_project_id_fk.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class AddPackagesNugetMetadataProjectIdFk < Gitlab::Database::Migration[2.2] + milestone '17.5' + disable_ddl_transaction! + + def up + add_concurrent_foreign_key :packages_nuget_metadata, :projects, column: :project_id, on_delete: :cascade + end + + def down + with_lock_retries do + remove_foreign_key :packages_nuget_metadata, column: :project_id + end + end +end diff --git a/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb b/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb new file mode 100644 index 000000000000..4f16cefec7a8 --- /dev/null +++ b/db/post_migrate/20240930121135_add_packages_nuget_metadata_project_id_trigger.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class AddPackagesNugetMetadataProjectIdTrigger < Gitlab::Database::Migration[2.2] + milestone '17.5' + + def up + install_sharding_key_assignment_trigger( + table: :packages_nuget_metadata, + sharding_key: :project_id, + parent_table: :packages_packages, + parent_sharding_key: :project_id, + foreign_key: :package_id + ) + end + + def down + remove_sharding_key_assignment_trigger( + table: :packages_nuget_metadata, + sharding_key: :project_id, + parent_table: :packages_packages, + parent_sharding_key: :project_id, + foreign_key: :package_id + ) + end +end diff --git a/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb b/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb new file mode 100644 index 000000000000..c055bee7b221 --- /dev/null +++ b/db/post_migrate/20240930121136_queue_backfill_packages_nuget_metadata_project_id.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class QueueBackfillPackagesNugetMetadataProjectId < Gitlab::Database::Migration[2.2] + milestone '17.5' + restrict_gitlab_migration gitlab_schema: :gitlab_main_cell + + MIGRATION = "BackfillPackagesNugetMetadataProjectId" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + + def up + queue_batched_background_migration( + MIGRATION, + :packages_nuget_metadata, + :package_id, + :project_id, + :packages_packages, + :project_id, + :package_id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration( + MIGRATION, + :packages_nuget_metadata, + :package_id, + [ + :project_id, + :packages_packages, + :project_id, + :package_id + ] + ) + end +end diff --git a/db/schema_migrations/20240930121132 b/db/schema_migrations/20240930121132 new file mode 100644 index 000000000000..5bd4f69344ed --- /dev/null +++ b/db/schema_migrations/20240930121132 @@ -0,0 +1 @@ +1f8ef02007a199d46b542de398345c66d5106d18c72dd5cc18f162a4a17fa315 \ No newline at end of file diff --git a/db/schema_migrations/20240930121133 b/db/schema_migrations/20240930121133 new file mode 100644 index 000000000000..7e054400244f --- /dev/null +++ b/db/schema_migrations/20240930121133 @@ -0,0 +1 @@ +0062f4cb57d36d99b9e647419d874b02f4ae135944f73f6f7bc96d38498a7374 \ No newline at end of file diff --git a/db/schema_migrations/20240930121134 b/db/schema_migrations/20240930121134 new file mode 100644 index 000000000000..51f582836cfd --- /dev/null +++ b/db/schema_migrations/20240930121134 @@ -0,0 +1 @@ +704079bebfa5cc60e7f5eb3e6e50d823af9f68acc9b518da0df1b10b33398462 \ No newline at end of file diff --git a/db/schema_migrations/20240930121135 b/db/schema_migrations/20240930121135 new file mode 100644 index 000000000000..9e7646f635c9 --- /dev/null +++ b/db/schema_migrations/20240930121135 @@ -0,0 +1 @@ +63f9db5b418f528a9356396c958ca6d56453b72b6e01efd10b85a30cee58487c \ No newline at end of file diff --git a/db/schema_migrations/20240930121136 b/db/schema_migrations/20240930121136 new file mode 100644 index 000000000000..409f03218cdb --- /dev/null +++ b/db/schema_migrations/20240930121136 @@ -0,0 +1 @@ +39d969b1f1b2edf41be73346a1b89a9c313272eebbd483cffc0b0328e7849f76 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index a6add27da228..24c101e6e6e1 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -905,6 +905,22 @@ RETURN NEW; END $$; +CREATE FUNCTION trigger_14a39509be0a() RETURNS trigger + LANGUAGE plpgsql + AS $$ +BEGIN +IF NEW."project_id" IS NULL THEN + SELECT "project_id" + INTO NEW."project_id" + FROM "packages_packages" + WHERE "packages_packages"."id" = NEW."package_id"; +END IF; + +RETURN NEW; + +END +$$; + CREATE FUNCTION trigger_158ac875f254() RETURNS trigger LANGUAGE plpgsql AS $$ @@ -15276,6 +15292,7 @@ CREATE TABLE packages_nuget_metadata ( authors text, description text, normalized_version text, + project_id bigint, CONSTRAINT check_9973c0cc33 CHECK ((char_length(normalized_version) <= 255)), CONSTRAINT check_d39a5fe9ee CHECK ((char_length(description) <= 4000)), CONSTRAINT check_e2fc129ebd CHECK ((char_length(authors) <= 255)), @@ -29828,6 +29845,8 @@ CREATE INDEX index_packages_npm_metadata_caches_on_project_id_status ON packages CREATE INDEX index_packages_nuget_dl_metadata_on_dependency_link_id ON packages_nuget_dependency_link_metadata USING btree (dependency_link_id); +CREATE INDEX index_packages_nuget_metadata_on_project_id ON packages_nuget_metadata USING btree (project_id); + CREATE UNIQUE INDEX index_packages_nuget_symbols_on_object_storage_key ON packages_nuget_symbols USING btree (object_storage_key); CREATE INDEX index_packages_nuget_symbols_on_package_id ON packages_nuget_symbols USING btree (package_id); @@ -33258,6 +33277,8 @@ CREATE TRIGGER trigger_0f38e5af9adf BEFORE INSERT OR UPDATE ON ml_candidate_para CREATE TRIGGER trigger_13d4aa8fe3dd BEFORE INSERT OR UPDATE ON draft_notes FOR EACH ROW EXECUTE FUNCTION trigger_13d4aa8fe3dd(); +CREATE TRIGGER trigger_14a39509be0a BEFORE INSERT OR UPDATE ON packages_nuget_metadata FOR EACH ROW EXECUTE FUNCTION trigger_14a39509be0a(); + CREATE TRIGGER trigger_158ac875f254 BEFORE INSERT OR UPDATE ON approval_group_rules_users FOR EACH ROW EXECUTE FUNCTION trigger_158ac875f254(); CREATE TRIGGER trigger_174b23fa3dfb BEFORE INSERT OR UPDATE ON approval_project_rules_users FOR EACH ROW EXECUTE FUNCTION trigger_174b23fa3dfb(); @@ -33710,6 +33731,9 @@ ALTER TABLE ONLY coverage_fuzzing_corpuses ALTER TABLE ONLY namespace_settings ADD CONSTRAINT fk_20cf0eb2f9 FOREIGN KEY (default_compliance_framework_id) REFERENCES compliance_management_frameworks(id) ON DELETE SET NULL; +ALTER TABLE ONLY packages_nuget_metadata + ADD CONSTRAINT fk_21569c0856 FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE; + ALTER TABLE p_ci_build_trace_metadata ADD CONSTRAINT fk_21d25cac1a_p FOREIGN KEY (partition_id, trace_artifact_id) REFERENCES p_ci_job_artifacts(partition_id, id) ON UPDATE CASCADE ON DELETE CASCADE; diff --git a/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb b/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb new file mode 100644 index 000000000000..3f42b6498d66 --- /dev/null +++ b/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class BackfillPackagesNugetMetadataProjectId < BackfillDesiredShardingKeyJob + operation_name :backfill_packages_nuget_metadata_project_id + feature_category :package_registry + end + end +end diff --git a/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb b/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb new file mode 100644 index 000000000000..69374f9cfc76 --- /dev/null +++ b/spec/lib/gitlab/background_migration/backfill_packages_nuget_metadata_project_id_spec.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::BackfillPackagesNugetMetadataProjectId, + feature_category: :package_registry, + schema: 20240930121132 do + include_examples 'desired sharding key backfill job' do + let(:batch_table) { :packages_nuget_metadata } + let(:batch_column) { :package_id } + let(:backfill_column) { :project_id } + let(:backfill_via_table) { :packages_packages } + let(:backfill_via_column) { :project_id } + let(:backfill_via_foreign_key) { :package_id } + end +end diff --git a/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb b/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb new file mode 100644 index 000000000000..a2db34efd43b --- /dev/null +++ b/spec/migrations/20240930121136_queue_backfill_packages_nuget_metadata_project_id_spec.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueBackfillPackagesNugetMetadataProjectId, feature_category: :package_registry do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :packages_nuget_metadata, + column_name: :package_id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE, + gitlab_schema: :gitlab_main_cell, + job_arguments: [ + :project_id, + :packages_packages, + :project_id, + :package_id + ] + ) + } + end + end +end -- GitLab