From f4bcda433bd758af3600765eefc76d642e65998c Mon Sep 17 00:00:00 2001 From: Rostyslav Safonov <elhowm@gmail.com> Date: Thu, 5 Jan 2023 14:44:49 +0200 Subject: [PATCH] [385860] Fix ProjectImportWorker record not-found Sometimes in concurency between project import job and project delete job - deleting get firstly. In such case project import job is failing because there is no record in DB anymore. Changelog: fixed --- app/workers/repository_import_worker.rb | 7 +- ee/app/workers/ee/repository_import_worker.rb | 1 + ee/spec/models/project_import_state_spec.rb | 52 +++++---- .../workers/repository_import_worker_spec.rb | 12 +- spec/models/project_import_state_spec.rb | 20 ++-- spec/workers/repository_import_worker_spec.rb | 104 +++++++++--------- 6 files changed, 104 insertions(+), 92 deletions(-) diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb index 5e89b9f336240..f9e12c5135aaf 100644 --- a/app/workers/repository_import_worker.rb +++ b/app/workers/repository_import_worker.rb @@ -18,9 +18,8 @@ class RepositoryImportWorker # rubocop:disable Scalability/IdempotentWorker sidekiq_options memory_killer_max_memory_growth_kb: ENV.fetch('MEMORY_KILLER_REPOSITORY_IMPORT_WORKER_MAX_MEMORY_GROWTH_KB', 300_000).to_i def perform(project_id) - @project = Project.find(project_id) - - return unless start_import + @project = Project.find_by_id(project_id) + return if project.nil? || !start_import? Gitlab::Metrics.add_event(:import_repository) @@ -42,7 +41,7 @@ def perform(project_id) attr_reader :project - def start_import + def start_import? return true if start(project.import_state) Gitlab::Import::Logger.info( diff --git a/ee/app/workers/ee/repository_import_worker.rb b/ee/app/workers/ee/repository_import_worker.rb index 6df62b74ef1de..c160980dd6a6d 100644 --- a/ee/app/workers/ee/repository_import_worker.rb +++ b/ee/app/workers/ee/repository_import_worker.rb @@ -7,6 +7,7 @@ module RepositoryImportWorker override :perform def perform(project_id) super + return if project.nil? # Explicitly enqueue mirror for update so # that upstream remote is created and fetched diff --git a/ee/spec/models/project_import_state_spec.rb b/ee/spec/models/project_import_state_spec.rb index fa0b3f2851d87..8db02d55813fa 100644 --- a/ee/spec/models/project_import_state_spec.rb +++ b/ee/spec/models/project_import_state_spec.rb @@ -2,38 +2,46 @@ require 'spec_helper' -RSpec.describe ProjectImportState, type: :model do +RSpec.describe ProjectImportState, type: :model, feature_category: :importers do include ::EE::GeoHelpers - describe 'Project import job' do - let(:project) { import_state.project } + let_it_be(:project) { create(:project) } - before do - allow_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:import_repository) - .with(project.import_url).and_return(true) + describe 'transitions' do + let(:import_state) { create(:import_state, :started, import_type: :github, project: project) } - # Works around https://github.com/rspec/rspec-mocks/issues/910 - allow(Project).to receive(:find).with(project.id).and_return(project) - expect(project).to receive(:after_import).and_call_original - end + context 'state transition: [:none, :finished, :failed] => :scheduled' do + let(:import_state) { create(:import_state, :failed, import_type: :github, project: project) } + let(:jid) { '551d3ceac5f67a116719ce41' } - context 'with a mirrored project' do - let(:import_state) { create(:import_state, :mirror) } + before do + allow(import_state.project).to receive(:add_import_job).and_return(jid) + allow(Gitlab::Mirror).to receive(:increment_capacity).with(import_state.project_id) + end - it 'calls RepositoryImportWorker and inserts in front of the mirror scheduler queue', :sidekiq_might_not_need_inline do - allow_any_instance_of(EE::Project).to receive(:repository_exists?).and_return(false, true) + it 'calls project import job and sets last_update_scheduled_at' do + import_state.schedule - expect_any_instance_of(EE::ProjectImportState).to receive(:force_import_job!) - expect(RepositoryImportWorker).to receive(:perform_async).with(import_state.project_id).and_call_original + expect(Gitlab::Mirror).not_to have_received(:increment_capacity) + expect(import_state.project).to have_received(:add_import_job) + expect(import_state.jid).to eq jid + expect(import_state.last_update_scheduled_at).to be_within(1.second).of(Time.current) + end - expect { import_state.schedule }.to change { import_state.jid } + context 'when project mirrored' do + let(:mirror_project) { create(:project) } + let(:import_state) { create(:import_state, :failed, :mirror, import_type: :github, project: mirror_project) } + + it 'increments mirror capacity' do + import_state.schedule + + expect(Gitlab::Mirror).to have_received(:increment_capacity) + expect(import_state.project).to have_received(:add_import_job) + expect(import_state.jid).to eq jid + expect(import_state.last_update_scheduled_at).to be_within(1.second).of(Time.current) + end end end - end - - describe 'transitions' do - let(:import_state) { create(:import_state, :started, import_type: :github) } - let(:project) { import_state.project } context 'state transition: [:started] => [:finished]' do context 'Geo repository update events' do diff --git a/ee/spec/workers/repository_import_worker_spec.rb b/ee/spec/workers/repository_import_worker_spec.rb index a9583e8f1f12f..b1b200d930c04 100644 --- a/ee/spec/workers/repository_import_worker_spec.rb +++ b/ee/spec/workers/repository_import_worker_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe RepositoryImportWorker do +RSpec.describe RepositoryImportWorker, feature_category: :importers do let_it_be(:project) { create(:project) } it 'updates the error on custom project template Import/Export' do @@ -50,6 +50,16 @@ end end + context 'when project not found (deleted)' do + before do + allow(Project).to receive(:find_by_id).with(project.id).and_return(nil) + end + + it 'does not raise any exception' do + expect { subject.perform(project.id) }.not_to raise_error + end + end + describe 'sidekiq options' do it 'disables retry' do expect(described_class.sidekiq_options['retry']).to eq(false) diff --git a/spec/models/project_import_state_spec.rb b/spec/models/project_import_state_spec.rb index ba1a29a8b27ac..e5232026c39fd 100644 --- a/spec/models/project_import_state_spec.rb +++ b/spec/models/project_import_state_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe ProjectImportState, type: :model do +RSpec.describe ProjectImportState, type: :model, feature_category: :importers do let_it_be(:correlation_id) { 'cid' } let_it_be(:import_state, refind: true) { create(:import_state, correlation_id_value: correlation_id) } @@ -17,22 +17,19 @@ end describe 'Project import job' do - let_it_be(:import_state) { create(:import_state, import_url: generate(:url)) } - let_it_be(:project) { import_state.project } + let_it_be(:project) { create(:project) } - before do - allow_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:import_repository) - .with(project.import_url, http_authorization_header: '', mirror: false, resolved_address: '').and_return(true) + let(:import_state) { create(:import_state, import_url: generate(:url), project: project) } + let(:jid) { '551d3ceac5f67a116719ce41' } + before do # Works around https://github.com/rspec/rspec-mocks/issues/910 allow(Project).to receive(:find).with(project.id).and_return(project) - expect(project).to receive(:after_import).and_call_original + allow(project).to receive(:add_import_job).and_return(jid) end it 'imports a project', :sidekiq_might_not_need_inline do - expect(RepositoryImportWorker).to receive(:perform_async).and_call_original - - expect { import_state.schedule }.to change { import_state.status }.from('none').to('finished') + expect { import_state.schedule }.to change { import_state.status }.from('none').to('scheduled') end it 'records job and correlation IDs', :sidekiq_might_not_need_inline do @@ -40,7 +37,8 @@ import_state.schedule - expect(import_state.jid).to be_an_instance_of(String) + expect(project).to have_received(:add_import_job) + expect(import_state.jid).to eq(jid) expect(import_state.correlation_id).to eq(correlation_id) end end diff --git a/spec/workers/repository_import_worker_spec.rb b/spec/workers/repository_import_worker_spec.rb index 1dc77fbf83fe4..ca7c13fe24e75 100644 --- a/spec/workers/repository_import_worker_spec.rb +++ b/spec/workers/repository_import_worker_spec.rb @@ -2,92 +2,88 @@ require 'spec_helper' -RSpec.describe RepositoryImportWorker do +RSpec.describe RepositoryImportWorker, feature_category: :importers do describe '#perform' do - let(:project) { create(:project, :import_scheduled) } - let(:import_state) { project.import_state } - - context 'when worker was reset without cleanup' do - it 'imports the project successfully' do - jid = '12345678' - started_project = create(:project) - started_import_state = create(:import_state, :started, project: started_project, jid: jid) - - allow(subject).to receive(:jid).and_return(jid) + let(:project) { build_stubbed(:project, :import_scheduled, import_state: import_state, import_url: 'url') } + let(:import_state) { create(:import_state, status: :scheduled) } + let(:jid) { '12345678' } + + before do + allow(subject).to receive(:jid).and_return(jid) + allow(Project).to receive(:find_by_id).with(project.id).and_return(project) + allow(project).to receive(:after_import) + allow(import_state).to receive(:start).and_return(true) + end - expect_next_instance_of(Projects::ImportService) do |instance| - expect(instance).to receive(:execute).and_return({ status: :ok }) - end + context 'when project not found (deleted)' do + before do + allow(Project).to receive(:find_by_id).with(project.id).and_return(nil) + end - # Works around https://github.com/rspec/rspec-mocks/issues/910 - expect(Project).to receive(:find).with(started_project.id).and_return(started_project) - expect(started_project.repository).to receive(:expire_emptiness_caches) - expect(started_project.wiki.repository).to receive(:expire_emptiness_caches) - expect(started_import_state).to receive(:finish) + it 'does not raise any exception' do + expect(Projects::ImportService).not_to receive(:new) - subject.perform(started_project.id) + expect { subject.perform(project.id) }.not_to raise_error end end - context 'when the import was successful' do - it 'imports a project' do + context 'when import_state is scheduled' do + it 'imports the project successfully' do expect_next_instance_of(Projects::ImportService) do |instance| expect(instance).to receive(:execute).and_return({ status: :ok }) end - # Works around https://github.com/rspec/rspec-mocks/issues/910 - expect(Project).to receive(:find).with(project.id).and_return(project) - expect(project.repository).to receive(:expire_emptiness_caches) - expect(project.wiki.repository).to receive(:expire_emptiness_caches) - expect(import_state).to receive(:finish) - subject.perform(project.id) + + expect(project).to have_received(:after_import) + expect(import_state).to have_received(:start) end end - context 'when the import has failed' do - it 'updates the error on Import/Export & hides credentials from import URL' do - import_url = 'https://user:pass@test.com/root/repoC.git/' - error = "#{import_url} not found" - - import_state.update!(jid: '123') - project.update!(import_type: 'gitlab_project') + context 'when worker was reset without cleanup (import_state is started)' do + let(:import_state) { create(:import_state, :started, jid: jid) } + it 'imports the project successfully' do expect_next_instance_of(Projects::ImportService) do |instance| - expect(instance).to receive(:track_start_import).and_raise(StandardError, error) + expect(instance).to receive(:execute).and_return({ status: :ok }) end - expect { subject.perform(project.id) }.not_to raise_error + subject.perform(project.id) - import_state.reload - expect(import_state.jid).to eq('123') - expect(import_state.status).to eq('failed') - expect(import_state.last_error).to include("[FILTERED] not found") - expect(import_state.last_error).not_to include(import_url) + expect(project).to have_received(:after_import) + expect(import_state).not_to have_received(:start) end end context 'when using an asynchronous importer' do it 'does not mark the import process as finished' do - service = double(:service) + expect_next_instance_of(Projects::ImportService) do |instance| + expect(instance).to receive(:execute).and_return({ status: :ok }) + expect(instance).to receive(:async?).and_return(true) + end + + subject.perform(project.id) - allow(Projects::ImportService) - .to receive(:new) - .and_return(service) + expect(project).not_to have_received(:after_import) + end + end - allow(service) - .to receive(:execute) - .and_return(true) + context 'when the import has failed' do + let(:error) { "https://user:pass@test.com/root/repoC.git/ not found" } - allow(service) - .to receive(:async?) - .and_return(true) + before do + allow(import_state).to receive(:mark_as_failed) + end - expect_next_instance_of(ProjectImportState) do |instance| - expect(instance).not_to receive(:finish) + it 'marks import_state as failed' do + expect_next_instance_of(Projects::ImportService) do |instance| + expect(instance).to receive(:execute).and_return({ status: :error, message: error }) end subject.perform(project.id) + + expect(import_state).to have_received(:mark_as_failed).with(error) + expect(project).not_to have_received(:after_import) end end end -- GitLab