From a4fe627e5da23a4bfae2aa0ad58f43219e587787 Mon Sep 17 00:00:00 2001 From: David Fernandez <dfernandez@gitlab.com> Date: Fri, 11 Feb 2022 18:03:48 +0100 Subject: [PATCH] Guard worker will check ongoing migrations By pinging the Container Registry API. If the status returned is coeherent with an ongoing migration, the job will skip the migration. The migration is aborted otherwise. --- app/models/container_repository.rb | 10 +++- .../migration/guard_worker.rb | 56 +++++++++++++++++- lib/container_registry/gitlab_api_client.rb | 4 ++ lib/container_registry/migration.rb | 5 ++ spec/models/container_repository_spec.rb | 10 ++++ .../migration/guard_worker_spec.rb | 59 +++++++++++++++++-- 6 files changed, 134 insertions(+), 10 deletions(-) diff --git a/app/models/container_repository.rb b/app/models/container_repository.rb index 7e28c9e079f60..38bd8143bf776 100644 --- a/app/models/container_repository.rb +++ b/app/models/container_repository.rb @@ -274,9 +274,7 @@ def finish_pre_import_and_start_import def retry_aborted_migration return unless migration_state == 'import_aborted' - import_status = gitlab_api_client.import_status(self.path) - - case import_status + case external_import_status when 'native' raise NativeImportError when 'import_in_progress' @@ -322,6 +320,12 @@ def last_import_step_done_at [migration_pre_import_done_at, migration_import_done_at, migration_aborted_at].compact.max end + def external_import_status + strong_memoize(:import_status) do + gitlab_api_client.import_status(self.path) + end + end + # rubocop: disable CodeReuse/ServiceClass def registry @registry ||= begin diff --git a/app/workers/container_registry/migration/guard_worker.rb b/app/workers/container_registry/migration/guard_worker.rb index 1237b6058e47f..77ae111c1cb84 100644 --- a/app/workers/container_registry/migration/guard_worker.rb +++ b/app/workers/container_registry/migration/guard_worker.rb @@ -21,18 +21,68 @@ def perform repositories = ::ContainerRepository.with_stale_migration(step_before_timestamp) .limit(max_capacity) + aborts_count = 0 + long_running_migration_ids = [] # the #to_a is safe as the amount of entries is limited. # In addition, we're calling #each in the next line and we don't want two different SQL queries for these two lines log_extra_metadata_on_done(:stale_migrations_count, repositories.to_a.size) repositories.each do |repository| - repository.abort_import + if abortable?(repository) + repository.abort_import + aborts_count += 1 + else + long_running_migration_ids << repository.id if long_running_migration?(repository) + end + end + + log_extra_metadata_on_done(:aborted_stale_migrations_count, aborts_count) + + if long_running_migration_ids.any? + log_extra_metadata_on_done(:long_running_stale_migration_container_repository_ids, long_running_migration_ids) end end private + # This can ping the Container Registry API. + # We loop on a set of repositories to calls this function (see #perform) + # In the worst case scenario, we have a n+1 API calls situation here. + # + # This is reasonable because the maximum amount of repositories looped + # on is `25`. See ::ContainerRegistry::Migration.capacity. + # + # TODO We can remove this n+1 situation by having a Container Registry API + # endpoint that accepts multiple repository paths at once. This is issue + # https://gitlab.com/gitlab-org/container-registry/-/issues/582 + def abortable?(repository) + # early return to save one Container Registry API request + return true unless repository.importing? || repository.pre_importing? + return true unless external_migration_in_progress?(repository) + + false + end + + def long_running_migration?(repository) + migration_start_timestamp(repository).before?(long_running_migration_threshold) + end + + def external_migration_in_progress?(repository) + status = repository.external_import_status + + (status == 'pre_import_in_progress' && repository.pre_importing?) || + (status == 'import_in_progress' && repository.importing?) + end + + def migration_start_timestamp(repository) + if repository.pre_importing? + repository.migration_pre_import_started_at + else + repository.migration_import_started_at + end + end + def step_before_timestamp ::ContainerRegistry::Migration.max_step_duration.seconds.ago end @@ -42,6 +92,10 @@ def max_capacity # is not properly applied ::ContainerRegistry::Migration.capacity * 2 end + + def long_running_migration_threshold + @threshold ||= 30.minutes.ago + end end end end diff --git a/lib/container_registry/gitlab_api_client.rb b/lib/container_registry/gitlab_api_client.rb index aa3d8ff47c431..20b8e1d419b74 100644 --- a/lib/container_registry/gitlab_api_client.rb +++ b/lib/container_registry/gitlab_api_client.rb @@ -25,6 +25,7 @@ def self.supports_gitlab_api? end end + # https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs-gitlab/api.md#compliance-check def supports_gitlab_api? strong_memoize(:supports_gitlab_api) do registry_features = Gitlab::CurrentSettings.container_registry_features || [] @@ -35,16 +36,19 @@ def supports_gitlab_api? end end + # https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs-gitlab/api.md#import-repository def pre_import_repository(path) response = start_import_for(path, pre: true) IMPORT_RESPONSES.fetch(response.status, :error) end + # https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs-gitlab/api.md#import-repository def import_repository(path) response = start_import_for(path, pre: false) IMPORT_RESPONSES.fetch(response.status, :error) end + # https://gitlab.com/gitlab-org/container-registry/-/blob/master/docs-gitlab/api.md#get-repository-import-status def import_status(path) body_hash = response_body(faraday.get(import_url_for(path))) body_hash['status'] || 'error' diff --git a/lib/container_registry/migration.rb b/lib/container_registry/migration.rb index 322804418eb05..b03c94e5ebfcb 100644 --- a/lib/container_registry/migration.rb +++ b/lib/container_registry/migration.rb @@ -34,6 +34,11 @@ def self.enqueue_waiting_time end def self.capacity + # Increasing capacity numbers will increase the n+1 API calls we can have + # in ContainerRegistry::Migration::GuardWorker#external_migration_in_progress? + # + # TODO: See https://gitlab.com/gitlab-org/container-registry/-/issues/582 + # return 25 if Feature.enabled?(:container_registry_migration_phase2_capacity_25) return 10 if Feature.enabled?(:container_registry_migration_phase2_capacity_10) return 1 if Feature.enabled?(:container_registry_migration_phase2_capacity_1) diff --git a/spec/models/container_repository_spec.rb b/spec/models/container_repository_spec.rb index 77de9e83f0a0d..3524c810ff455 100644 --- a/spec/models/container_repository_spec.rb +++ b/spec/models/container_repository_spec.rb @@ -1179,6 +1179,16 @@ def repository_creation_race(path) end end + describe '#external_import_status' do + subject { repository.external_import_status } + + it 'returns the response from the client' do + expect(repository.gitlab_api_client).to receive(:import_status).with(repository.path).and_return('test') + + expect(subject).to eq('test') + end + end + describe '.with_stale_migration' do let_it_be(:repository) { create(:container_repository) } let_it_be(:stale_pre_importing_old_timestamp) { create(:container_repository, :pre_importing, migration_pre_import_started_at: 10.minutes.ago) } diff --git a/spec/workers/container_registry/migration/guard_worker_spec.rb b/spec/workers/container_registry/migration/guard_worker_spec.rb index 480e8adbd5cac..7d1df320d4eb5 100644 --- a/spec/workers/container_registry/migration/guard_worker_spec.rb +++ b/spec/workers/container_registry/migration/guard_worker_spec.rb @@ -26,11 +26,30 @@ allow(::Gitlab).to receive(:com?).and_return(true) end + shared_examples 'not aborting any migration' do + it 'will not abort the migration' do + expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) + expect(worker).to receive(:log_extra_metadata_on_done).with(:aborted_stale_migrations_count, 0) + expect(worker).to receive(:log_extra_metadata_on_done).with(:long_running_stale_migration_container_repository_ids, [stale_migration.id]) + + expect { subject } + .to not_change(pre_importing_migrations, :count) + .and not_change(pre_import_done_migrations, :count) + .and not_change(importing_migrations, :count) + .and not_change(import_done_migrations, :count) + .and not_change(import_aborted_migrations, :count) + .and not_change { stale_migration.reload.migration_state } + .and not_change { ongoing_migration.migration_state } + end + end + context 'with no stale migrations' do it_behaves_like 'an idempotent worker' it 'will not update any migration state' do expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 0) + expect(worker).to receive(:log_extra_metadata_on_done).with(:aborted_stale_migrations_count, 0) + expect { subject } .to not_change(pre_importing_migrations, :count) .and not_change(pre_import_done_migrations, :count) @@ -41,10 +60,19 @@ context 'with pre_importing stale migrations' do let(:ongoing_migration) { create(:container_repository, :pre_importing) } - let(:stale_migration) { create(:container_repository, :pre_importing, migration_pre_import_started_at: 10.minutes.ago) } + let(:stale_migration) { create(:container_repository, :pre_importing, migration_pre_import_started_at: 35.minutes.ago) } + let(:import_status) { 'test' } + + before do + allow_next_instance_of(ContainerRegistry::GitlabApiClient) do |client| + allow(client).to receive(:import_status).and_return(import_status) + end + end it 'will abort the migration' do expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) + expect(worker).to receive(:log_extra_metadata_on_done).with(:aborted_stale_migrations_count, 1) + expect { subject } .to change(pre_importing_migrations, :count).by(-1) .and not_change(pre_import_done_migrations, :count) @@ -54,18 +82,26 @@ .and change { stale_migration.reload.migration_state }.from('pre_importing').to('import_aborted') .and not_change { ongoing_migration.migration_state } end + + context 'the client returns pre_import_in_progress' do + let(:import_status) { 'pre_import_in_progress' } + + it_behaves_like 'not aborting any migration' + end end context 'with pre_import_done stale migrations' do let(:ongoing_migration) { create(:container_repository, :pre_import_done) } - let(:stale_migration) { create(:container_repository, :pre_import_done, migration_pre_import_done_at: 10.minutes.ago) } + let(:stale_migration) { create(:container_repository, :pre_import_done, migration_pre_import_done_at: 35.minutes.ago) } before do allow(::ContainerRegistry::Migration).to receive(:max_step_duration).and_return(5.minutes) - expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) end it 'will abort the migration' do + expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) + expect(worker).to receive(:log_extra_metadata_on_done).with(:aborted_stale_migrations_count, 1) + expect { subject } .to not_change(pre_importing_migrations, :count) .and change(pre_import_done_migrations, :count).by(-1) @@ -79,14 +115,19 @@ context 'with importing stale migrations' do let(:ongoing_migration) { create(:container_repository, :importing) } - let(:stale_migration) { create(:container_repository, :importing, migration_import_started_at: 10.minutes.ago) } + let(:stale_migration) { create(:container_repository, :importing, migration_import_started_at: 35.minutes.ago) } + let(:import_status) { 'test' } before do - allow(::ContainerRegistry::Migration).to receive(:max_step_duration).and_return(5.minutes) - expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) + allow_next_instance_of(ContainerRegistry::GitlabApiClient) do |client| + allow(client).to receive(:import_status).and_return(import_status) + end end it 'will abort the migration' do + expect(worker).to receive(:log_extra_metadata_on_done).with(:stale_migrations_count, 1) + expect(worker).to receive(:log_extra_metadata_on_done).with(:aborted_stale_migrations_count, 1) + expect { subject } .to not_change(pre_importing_migrations, :count) .and not_change(pre_import_done_migrations, :count) @@ -96,6 +137,12 @@ .and change { stale_migration.reload.migration_state }.from('importing').to('import_aborted') .and not_change { ongoing_migration.migration_state } end + + context 'the client returns import_in_progress' do + let(:import_status) { 'import_in_progress' } + + it_behaves_like 'not aborting any migration' + end end end -- GitLab