diff --git a/db/post_migrate/20221123133054_queue_reset_status_on_container_repositories.rb b/db/post_migrate/20221123133054_queue_reset_status_on_container_repositories.rb new file mode 100644 index 0000000000000000000000000000000000000000..2d482e0b83c06ec01413c22e8c7049bc97a2fc61 --- /dev/null +++ b/db/post_migrate/20221123133054_queue_reset_status_on_container_repositories.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class QueueResetStatusOnContainerRepositories < Gitlab::Database::Migration[2.0] + MIGRATION = 'ResetStatusOnContainerRepositories' + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 50 + + restrict_gitlab_migration gitlab_schema: :gitlab_main + + def up + return unless ::Gitlab.config.registry.enabled + + queue_batched_background_migration( + MIGRATION, + :container_repositories, + :id, + job_interval: DELAY_INTERVAL, + sub_batch_size: BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :container_repositories, :id, []) + end +end diff --git a/db/schema_migrations/20221123133054 b/db/schema_migrations/20221123133054 new file mode 100644 index 0000000000000000000000000000000000000000..3a7a382ee748212d55f8017c08e61c49042cbd72 --- /dev/null +++ b/db/schema_migrations/20221123133054 @@ -0,0 +1 @@ +1a0a090433dd422b1bd9efdb56f82c02af8bab45b1a651b51a6ed224d823964c \ No newline at end of file diff --git a/lib/gitlab/background_migration/reset_status_on_container_repositories.rb b/lib/gitlab/background_migration/reset_status_on_container_repositories.rb new file mode 100644 index 0000000000000000000000000000000000000000..09cd3b1895f44f198d615b3cedad2e8828c777d4 --- /dev/null +++ b/lib/gitlab/background_migration/reset_status_on_container_repositories.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # A job that: + # * pickup container repositories with delete_scheduled status. + # * check if there are tags linked to it. + # * if there are tags, reset the status to nil. + class ResetStatusOnContainerRepositories < BatchedMigrationJob + DELETE_SCHEDULED_STATUS = 0 + DUMMY_TAGS = %w[tag].freeze + MIGRATOR = 'ResetStatusOnContainerRepositories' + + scope_to ->(relation) { relation.where(status: DELETE_SCHEDULED_STATUS) } + operation_name :reset_status_on_container_repositories + + def perform + each_sub_batch do |sub_batch| + reset_status_if_tags(sub_batch) + end + end + + private + + def reset_status_if_tags(container_repositories) + container_repositories_with_tags = container_repositories.select { |cr| cr.becomes(ContainerRepository).tags? } # rubocop:disable Cop/AvoidBecomes + + ContainerRepository.where(id: container_repositories_with_tags.map(&:id)) + .update_all(status: nil) + end + + # rubocop:disable Style/Documentation + module Routable + extend ActiveSupport::Concern + + included do + has_one :route, + as: :source, + class_name: '::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Route' + end + + def full_path + route&.path || build_full_path + end + + def build_full_path + if parent && path + "#{parent.full_path}/#{path}" + else + path + end + end + end + + class Route < ::ApplicationRecord + self.table_name = 'routes' + end + + class Namespace < ::ApplicationRecord + include ::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Routable + include ::Namespaces::Traversal::Recursive + include ::Namespaces::Traversal::Linear + include ::Gitlab::Utils::StrongMemoize + + self.table_name = 'namespaces' + self.inheritance_column = :_type_disabled + + belongs_to :parent, + class_name: '::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Namespace' + + def self.polymorphic_name + 'Namespace' + end + end + + class Project < ::ApplicationRecord + include ::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Routable + + self.table_name = 'projects' + + belongs_to :namespace, + class_name: '::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Namespace' + + alias_method :parent, :namespace + alias_attribute :parent_id, :namespace_id + + delegate :root_ancestor, to: :namespace, allow_nil: true + end + + class ContainerRepository < ::ApplicationRecord + self.table_name = 'container_repositories' + + belongs_to :project, + class_name: '::Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories::Project' + + def tags? + result = ContainerRegistry.tags_for(path).any? + ::Gitlab::BackgroundMigration::Logger.info( + migrator: MIGRATOR, + has_tags: result, + container_repository_id: id, + container_repository_path: path + ) + result + end + + def path + @path ||= [project.full_path, name].select(&:present?).join('/').downcase + end + end + + class ContainerRegistry + class << self + def tags_for(path) + response = ContainerRegistryClient.repository_tags(path, page_size: 1) + return DUMMY_TAGS unless response + + response['tags'] || [] + rescue StandardError + DUMMY_TAGS + end + end + end + + class ContainerRegistryClient + def self.repository_tags(path, page_size:) + registry_config = ::Gitlab.config.registry + + return { 'tags' => DUMMY_TAGS } unless registry_config.enabled && registry_config.api_url.present? + + pull_token = ::Auth::ContainerRegistryAuthenticationService.pull_access_token(path) + client = ::ContainerRegistry::Client.new(registry_config.api_url, token: pull_token) + client.repository_tags(path, page_size: page_size) + end + end + # rubocop:enable Style/Documentation + end + end +end diff --git a/spec/lib/gitlab/background_migration/reset_status_on_container_repositories_spec.rb b/spec/lib/gitlab/background_migration/reset_status_on_container_repositories_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..d50b04857d64d7fcc5cf4b532a17a7026cf7d7f5 --- /dev/null +++ b/spec/lib/gitlab/background_migration/reset_status_on_container_repositories_spec.rb @@ -0,0 +1,261 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::ResetStatusOnContainerRepositories, feature_category: :container_registry do + let(:projects_table) { table(:projects) } + let(:namespaces_table) { table(:namespaces) } + let(:container_repositories_table) { table(:container_repositories) } + let(:routes_table) { table(:routes) } + + let!(:root_group) do + namespaces_table.create!(name: 'root_group', path: 'root_group', type: 'Group') do |new_group| + new_group.update!(traversal_ids: [new_group.id]) + end + end + + let!(:group1) do + namespaces_table.create!(name: 'group1', path: 'group1', parent_id: root_group.id, type: 'Group') do |new_group| + new_group.update!(traversal_ids: [root_group.id, new_group.id]) + end + end + + let!(:subgroup1) do + namespaces_table.create!(name: 'subgroup1', path: 'subgroup1', parent_id: group1.id, type: 'Group') do |new_group| + new_group.update!(traversal_ids: [root_group.id, group1.id, new_group.id]) + end + end + + let!(:group2) do + namespaces_table.create!(name: 'group2', path: 'group2', parent_id: root_group.id, type: 'Group') do |new_group| + new_group.update!(traversal_ids: [root_group.id, new_group.id]) + end + end + + let!(:group1_project_namespace) do + namespaces_table.create!(name: 'group1_project', path: 'group1_project', type: 'Project', parent_id: group1.id) + end + + let!(:subgroup1_project_namespace) do + namespaces_table.create!( + name: 'subgroup1_project', + path: 'subgroup1_project', + type: 'Project', + parent_id: subgroup1.id + ) + end + + let!(:group2_project_namespace) do + namespaces_table.create!( + name: 'group2_project', + path: 'group2_project', + type: 'Project', + parent_id: group2.id + ) + end + + let!(:group1_project) do + projects_table.create!( + name: 'group1_project', + path: 'group1_project', + namespace_id: group1.id, + project_namespace_id: group1_project_namespace.id + ) + end + + let!(:subgroup1_project) do + projects_table.create!( + name: 'subgroup1_project', + path: 'subgroup1_project', + namespace_id: subgroup1.id, + project_namespace_id: subgroup1_project_namespace.id + ) + end + + let!(:group2_project) do + projects_table.create!( + name: 'group2_project', + path: 'group2_project', + namespace_id: group2.id, + project_namespace_id: group2_project_namespace.id + ) + end + + let!(:route2) do + routes_table.create!( + source_id: group2_project.id, + source_type: 'Project', + path: 'root_group/group2/group2_project', + namespace_id: group2_project_namespace.id + ) + end + + let!(:delete_scheduled_container_repository1) do + container_repositories_table.create!(project_id: group1_project.id, status: 0, name: 'container_repository1') + end + + let!(:delete_scheduled_container_repository2) do + container_repositories_table.create!(project_id: subgroup1_project.id, status: 0, name: 'container_repository2') + end + + let!(:delete_scheduled_container_repository3) do + container_repositories_table.create!(project_id: group2_project.id, status: 0, name: 'container_repository3') + end + + let!(:delete_ongoing_container_repository4) do + container_repositories_table.create!(project_id: group2_project.id, status: 2, name: 'container_repository4') + end + + let(:migration) do + described_class.new( + start_id: container_repositories_table.minimum(:id), + end_id: container_repositories_table.maximum(:id), + batch_table: :container_repositories, + batch_column: :id, + sub_batch_size: 50, + pause_ms: 0, + connection: ApplicationRecord.connection + ) + end + + describe '#filter_batch' do + it 'scopes the relation to delete scheduled container repositories' do + expected = container_repositories_table.where(status: 0).pluck(:id) + actual = migration.filter_batch(container_repositories_table).pluck(:id) + + expect(actual).to match_array(expected) + end + end + + describe '#perform' do + let(:registry_api_url) { 'http://example.com' } + + subject(:perform) { migration.perform } + + before do + stub_container_registry_config( + enabled: true, + api_url: registry_api_url, + key: 'spec/fixtures/x509_certificate_pk.key' + ) + stub_tags_list(path: 'root_group/group1/group1_project/container_repository1') + stub_tags_list(path: 'root_group/group1/subgroup1/subgroup1_project/container_repository2', tags: []) + stub_tags_list(path: 'root_group/group2/group2_project/container_repository3') + end + + shared_examples 'resetting status of all container repositories scheduled for deletion' do + it 'resets all statuses' do + expect_logging_on( + path: 'root_group/group1/group1_project/container_repository1', + id: delete_scheduled_container_repository1.id, + has_tags: true + ) + expect_logging_on( + path: 'root_group/group1/subgroup1/subgroup1_project/container_repository2', + id: delete_scheduled_container_repository2.id, + has_tags: true + ) + expect_logging_on( + path: 'root_group/group2/group2_project/container_repository3', + id: delete_scheduled_container_repository3.id, + has_tags: true + ) + + expect { perform } + .to change { delete_scheduled_container_repository1.reload.status }.from(0).to(nil) + .and change { delete_scheduled_container_repository3.reload.status }.from(0).to(nil) + .and change { delete_scheduled_container_repository2.reload.status }.from(0).to(nil) + end + end + + it 'resets status of container repositories with tags' do + expect_pull_access_token_on(path: 'root_group/group1/group1_project/container_repository1') + expect_pull_access_token_on(path: 'root_group/group1/subgroup1/subgroup1_project/container_repository2') + expect_pull_access_token_on(path: 'root_group/group2/group2_project/container_repository3') + + expect_logging_on( + path: 'root_group/group1/group1_project/container_repository1', + id: delete_scheduled_container_repository1.id, + has_tags: true + ) + expect_logging_on( + path: 'root_group/group1/subgroup1/subgroup1_project/container_repository2', + id: delete_scheduled_container_repository2.id, + has_tags: false + ) + expect_logging_on( + path: 'root_group/group2/group2_project/container_repository3', + id: delete_scheduled_container_repository3.id, + has_tags: true + ) + + expect { perform } + .to change { delete_scheduled_container_repository1.reload.status }.from(0).to(nil) + .and change { delete_scheduled_container_repository3.reload.status }.from(0).to(nil) + .and not_change { delete_scheduled_container_repository2.reload.status } + end + + context 'with the registry disabled' do + before do + allow(::Gitlab.config.registry).to receive(:enabled).and_return(false) + end + + it_behaves_like 'resetting status of all container repositories scheduled for deletion' + end + + context 'with the registry api url not defined' do + before do + allow(::Gitlab.config.registry).to receive(:api_url).and_return('') + end + + it_behaves_like 'resetting status of all container repositories scheduled for deletion' + end + + context 'with a faraday error' do + before do + client_double = instance_double('::ContainerRegistry::Client') + allow(::ContainerRegistry::Client).to receive(:new).and_return(client_double) + allow(client_double).to receive(:repository_tags).and_raise(Faraday::TimeoutError) + + expect_pull_access_token_on(path: 'root_group/group1/group1_project/container_repository1') + expect_pull_access_token_on(path: 'root_group/group1/subgroup1/subgroup1_project/container_repository2') + expect_pull_access_token_on(path: 'root_group/group2/group2_project/container_repository3') + end + + it_behaves_like 'resetting status of all container repositories scheduled for deletion' + end + + def stub_tags_list(path:, tags: %w[tag1]) + url = "#{registry_api_url}/v2/#{path}/tags/list?n=1" + + stub_request(:get, url) + .with( + headers: { + 'Accept' => ContainerRegistry::Client::ACCEPTED_TYPES.join(', '), + 'Authorization' => /bearer .+/, + 'User-Agent' => "GitLab/#{Gitlab::VERSION}" + } + ) + .to_return( + status: 200, + body: Gitlab::Json.dump(tags: tags), + headers: { 'Content-Type' => 'application/json' } + ) + end + + def expect_pull_access_token_on(path:) + expect(Auth::ContainerRegistryAuthenticationService) + .to receive(:pull_access_token).with(path).and_call_original + end + + def expect_logging_on(path:, id:, has_tags:) + expect(::Gitlab::BackgroundMigration::Logger) + .to receive(:info).with( + migrator: described_class::MIGRATOR, + has_tags: has_tags, + container_repository_id: id, + container_repository_path: path + ) + end + end +end diff --git a/spec/migrations/20221123133054_queue_reset_status_on_container_repositories_spec.rb b/spec/migrations/20221123133054_queue_reset_status_on_container_repositories_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..79094a2b8d0a8d51852a46bcc36bcdab427ce425 --- /dev/null +++ b/spec/migrations/20221123133054_queue_reset_status_on_container_repositories_spec.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueResetStatusOnContainerRepositories, feature_category: :container_registry do + let_it_be(:batched_migration) { described_class::MIGRATION } + + before do + stub_container_registry_config( + enabled: true, + api_url: 'http://example.com', + key: 'spec/fixtures/x509_certificate_pk.key' + ) + end + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + table_name: :container_repositories, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + sub_batch_size: described_class::BATCH_SIZE + ) + } + end + end + + context 'with the container registry disabled' do + before do + allow(::Gitlab.config.registry).to receive(:enabled).and_return(false) + end + + it 'does not schedule a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + end + end + end +end