From f4bcda433bd758af3600765eefc76d642e65998c Mon Sep 17 00:00:00 2001
From: Rostyslav Safonov <elhowm@gmail.com>
Date: Thu, 5 Jan 2023 14:44:49 +0200
Subject: [PATCH] [385860] Fix ProjectImportWorker record not-found

Sometimes in concurency between project import job and
project delete job - deleting get firstly. In such case
project import job is failing because there is no record
in DB anymore.

Changelog: fixed
---
 app/workers/repository_import_worker.rb       |   7 +-
 ee/app/workers/ee/repository_import_worker.rb |   1 +
 ee/spec/models/project_import_state_spec.rb   |  52 +++++----
 .../workers/repository_import_worker_spec.rb  |  12 +-
 spec/models/project_import_state_spec.rb      |  20 ++--
 spec/workers/repository_import_worker_spec.rb | 104 +++++++++---------
 6 files changed, 104 insertions(+), 92 deletions(-)

diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb
index 5e89b9f336240..f9e12c5135aaf 100644
--- a/app/workers/repository_import_worker.rb
+++ b/app/workers/repository_import_worker.rb
@@ -18,9 +18,8 @@ class RepositoryImportWorker # rubocop:disable Scalability/IdempotentWorker
   sidekiq_options memory_killer_max_memory_growth_kb: ENV.fetch('MEMORY_KILLER_REPOSITORY_IMPORT_WORKER_MAX_MEMORY_GROWTH_KB', 300_000).to_i
 
   def perform(project_id)
-    @project = Project.find(project_id)
-
-    return unless start_import
+    @project = Project.find_by_id(project_id)
+    return if project.nil? || !start_import?
 
     Gitlab::Metrics.add_event(:import_repository)
 
@@ -42,7 +41,7 @@ def perform(project_id)
 
   attr_reader :project
 
-  def start_import
+  def start_import?
     return true if start(project.import_state)
 
     Gitlab::Import::Logger.info(
diff --git a/ee/app/workers/ee/repository_import_worker.rb b/ee/app/workers/ee/repository_import_worker.rb
index 6df62b74ef1de..c160980dd6a6d 100644
--- a/ee/app/workers/ee/repository_import_worker.rb
+++ b/ee/app/workers/ee/repository_import_worker.rb
@@ -7,6 +7,7 @@ module RepositoryImportWorker
     override :perform
     def perform(project_id)
       super
+      return if project.nil?
 
       # Explicitly enqueue mirror for update so
       # that upstream remote is created and fetched
diff --git a/ee/spec/models/project_import_state_spec.rb b/ee/spec/models/project_import_state_spec.rb
index fa0b3f2851d87..8db02d55813fa 100644
--- a/ee/spec/models/project_import_state_spec.rb
+++ b/ee/spec/models/project_import_state_spec.rb
@@ -2,38 +2,46 @@
 
 require 'spec_helper'
 
-RSpec.describe ProjectImportState, type: :model do
+RSpec.describe ProjectImportState, type: :model, feature_category: :importers do
   include ::EE::GeoHelpers
 
-  describe 'Project import job' do
-    let(:project) { import_state.project }
+  let_it_be(:project) { create(:project) }
 
-    before do
-      allow_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:import_repository)
-        .with(project.import_url).and_return(true)
+  describe 'transitions' do
+    let(:import_state) { create(:import_state, :started, import_type: :github, project: project) }
 
-      # Works around https://github.com/rspec/rspec-mocks/issues/910
-      allow(Project).to receive(:find).with(project.id).and_return(project)
-      expect(project).to receive(:after_import).and_call_original
-    end
+    context 'state transition: [:none, :finished, :failed] => :scheduled' do
+      let(:import_state) { create(:import_state, :failed, import_type: :github, project: project) }
+      let(:jid) { '551d3ceac5f67a116719ce41' }
 
-    context 'with a mirrored project' do
-      let(:import_state) { create(:import_state, :mirror) }
+      before do
+        allow(import_state.project).to receive(:add_import_job).and_return(jid)
+        allow(Gitlab::Mirror).to receive(:increment_capacity).with(import_state.project_id)
+      end
 
-      it 'calls RepositoryImportWorker and inserts in front of the mirror scheduler queue', :sidekiq_might_not_need_inline do
-        allow_any_instance_of(EE::Project).to receive(:repository_exists?).and_return(false, true)
+      it 'calls project import job and sets last_update_scheduled_at' do
+        import_state.schedule
 
-        expect_any_instance_of(EE::ProjectImportState).to receive(:force_import_job!)
-        expect(RepositoryImportWorker).to receive(:perform_async).with(import_state.project_id).and_call_original
+        expect(Gitlab::Mirror).not_to have_received(:increment_capacity)
+        expect(import_state.project).to have_received(:add_import_job)
+        expect(import_state.jid).to eq jid
+        expect(import_state.last_update_scheduled_at).to be_within(1.second).of(Time.current)
+      end
 
-        expect { import_state.schedule }.to change { import_state.jid }
+      context 'when project mirrored' do
+        let(:mirror_project) { create(:project) }
+        let(:import_state) { create(:import_state, :failed, :mirror, import_type: :github, project: mirror_project) }
+
+        it 'increments mirror capacity' do
+          import_state.schedule
+
+          expect(Gitlab::Mirror).to have_received(:increment_capacity)
+          expect(import_state.project).to have_received(:add_import_job)
+          expect(import_state.jid).to eq jid
+          expect(import_state.last_update_scheduled_at).to be_within(1.second).of(Time.current)
+        end
       end
     end
-  end
-
-  describe 'transitions' do
-    let(:import_state) { create(:import_state, :started, import_type: :github) }
-    let(:project) { import_state.project }
 
     context 'state transition: [:started] => [:finished]' do
       context 'Geo repository update events' do
diff --git a/ee/spec/workers/repository_import_worker_spec.rb b/ee/spec/workers/repository_import_worker_spec.rb
index a9583e8f1f12f..b1b200d930c04 100644
--- a/ee/spec/workers/repository_import_worker_spec.rb
+++ b/ee/spec/workers/repository_import_worker_spec.rb
@@ -2,7 +2,7 @@
 
 require 'spec_helper'
 
-RSpec.describe RepositoryImportWorker do
+RSpec.describe RepositoryImportWorker, feature_category: :importers do
   let_it_be(:project) { create(:project) }
 
   it 'updates the error on custom project template Import/Export' do
@@ -50,6 +50,16 @@
     end
   end
 
+  context 'when project not found (deleted)' do
+    before do
+      allow(Project).to receive(:find_by_id).with(project.id).and_return(nil)
+    end
+
+    it 'does not raise any exception' do
+      expect { subject.perform(project.id) }.not_to raise_error
+    end
+  end
+
   describe 'sidekiq options' do
     it 'disables retry' do
       expect(described_class.sidekiq_options['retry']).to eq(false)
diff --git a/spec/models/project_import_state_spec.rb b/spec/models/project_import_state_spec.rb
index ba1a29a8b27ac..e5232026c39fd 100644
--- a/spec/models/project_import_state_spec.rb
+++ b/spec/models/project_import_state_spec.rb
@@ -2,7 +2,7 @@
 
 require 'spec_helper'
 
-RSpec.describe ProjectImportState, type: :model do
+RSpec.describe ProjectImportState, type: :model, feature_category: :importers do
   let_it_be(:correlation_id) { 'cid' }
   let_it_be(:import_state, refind: true) { create(:import_state, correlation_id_value: correlation_id) }
 
@@ -17,22 +17,19 @@
   end
 
   describe 'Project import job' do
-    let_it_be(:import_state) { create(:import_state, import_url: generate(:url)) }
-    let_it_be(:project) { import_state.project }
+    let_it_be(:project) { create(:project) }
 
-    before do
-      allow_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:import_repository)
-        .with(project.import_url, http_authorization_header: '', mirror: false, resolved_address: '').and_return(true)
+    let(:import_state) { create(:import_state, import_url: generate(:url), project: project) }
+    let(:jid) { '551d3ceac5f67a116719ce41' }
 
+    before do
       # Works around https://github.com/rspec/rspec-mocks/issues/910
       allow(Project).to receive(:find).with(project.id).and_return(project)
-      expect(project).to receive(:after_import).and_call_original
+      allow(project).to receive(:add_import_job).and_return(jid)
     end
 
     it 'imports a project', :sidekiq_might_not_need_inline do
-      expect(RepositoryImportWorker).to receive(:perform_async).and_call_original
-
-      expect { import_state.schedule }.to change { import_state.status }.from('none').to('finished')
+      expect { import_state.schedule }.to change { import_state.status }.from('none').to('scheduled')
     end
 
     it 'records job and correlation IDs', :sidekiq_might_not_need_inline do
@@ -40,7 +37,8 @@
 
       import_state.schedule
 
-      expect(import_state.jid).to be_an_instance_of(String)
+      expect(project).to have_received(:add_import_job)
+      expect(import_state.jid).to eq(jid)
       expect(import_state.correlation_id).to eq(correlation_id)
     end
   end
diff --git a/spec/workers/repository_import_worker_spec.rb b/spec/workers/repository_import_worker_spec.rb
index 1dc77fbf83fe4..ca7c13fe24e75 100644
--- a/spec/workers/repository_import_worker_spec.rb
+++ b/spec/workers/repository_import_worker_spec.rb
@@ -2,92 +2,88 @@
 
 require 'spec_helper'
 
-RSpec.describe RepositoryImportWorker do
+RSpec.describe RepositoryImportWorker, feature_category: :importers do
   describe '#perform' do
-    let(:project) { create(:project, :import_scheduled) }
-    let(:import_state) { project.import_state }
-
-    context 'when worker was reset without cleanup' do
-      it 'imports the project successfully' do
-        jid = '12345678'
-        started_project = create(:project)
-        started_import_state = create(:import_state, :started, project: started_project, jid: jid)
-
-        allow(subject).to receive(:jid).and_return(jid)
+    let(:project) { build_stubbed(:project, :import_scheduled, import_state: import_state, import_url: 'url') }
+    let(:import_state) { create(:import_state, status: :scheduled) }
+    let(:jid) { '12345678' }
+
+    before do
+      allow(subject).to receive(:jid).and_return(jid)
+      allow(Project).to receive(:find_by_id).with(project.id).and_return(project)
+      allow(project).to receive(:after_import)
+      allow(import_state).to receive(:start).and_return(true)
+    end
 
-        expect_next_instance_of(Projects::ImportService) do |instance|
-          expect(instance).to receive(:execute).and_return({ status: :ok })
-        end
+    context 'when project not found (deleted)' do
+      before do
+        allow(Project).to receive(:find_by_id).with(project.id).and_return(nil)
+      end
 
-        # Works around https://github.com/rspec/rspec-mocks/issues/910
-        expect(Project).to receive(:find).with(started_project.id).and_return(started_project)
-        expect(started_project.repository).to receive(:expire_emptiness_caches)
-        expect(started_project.wiki.repository).to receive(:expire_emptiness_caches)
-        expect(started_import_state).to receive(:finish)
+      it 'does not raise any exception' do
+        expect(Projects::ImportService).not_to receive(:new)
 
-        subject.perform(started_project.id)
+        expect { subject.perform(project.id) }.not_to raise_error
       end
     end
 
-    context 'when the import was successful' do
-      it 'imports a project' do
+    context 'when import_state is scheduled' do
+      it 'imports the project successfully' do
         expect_next_instance_of(Projects::ImportService) do |instance|
           expect(instance).to receive(:execute).and_return({ status: :ok })
         end
 
-        # Works around https://github.com/rspec/rspec-mocks/issues/910
-        expect(Project).to receive(:find).with(project.id).and_return(project)
-        expect(project.repository).to receive(:expire_emptiness_caches)
-        expect(project.wiki.repository).to receive(:expire_emptiness_caches)
-        expect(import_state).to receive(:finish)
-
         subject.perform(project.id)
+
+        expect(project).to have_received(:after_import)
+        expect(import_state).to have_received(:start)
       end
     end
 
-    context 'when the import has failed' do
-      it 'updates the error on Import/Export & hides credentials from import URL' do
-        import_url = 'https://user:pass@test.com/root/repoC.git/'
-        error = "#{import_url} not found"
-
-        import_state.update!(jid: '123')
-        project.update!(import_type: 'gitlab_project')
+    context 'when worker was reset without cleanup (import_state is started)' do
+      let(:import_state) { create(:import_state, :started, jid: jid) }
 
+      it 'imports the project successfully' do
         expect_next_instance_of(Projects::ImportService) do |instance|
-          expect(instance).to receive(:track_start_import).and_raise(StandardError, error)
+          expect(instance).to receive(:execute).and_return({ status: :ok })
         end
 
-        expect { subject.perform(project.id) }.not_to raise_error
+        subject.perform(project.id)
 
-        import_state.reload
-        expect(import_state.jid).to eq('123')
-        expect(import_state.status).to eq('failed')
-        expect(import_state.last_error).to include("[FILTERED] not found")
-        expect(import_state.last_error).not_to include(import_url)
+        expect(project).to have_received(:after_import)
+        expect(import_state).not_to have_received(:start)
       end
     end
 
     context 'when using an asynchronous importer' do
       it 'does not mark the import process as finished' do
-        service = double(:service)
+        expect_next_instance_of(Projects::ImportService) do |instance|
+          expect(instance).to receive(:execute).and_return({ status: :ok })
+          expect(instance).to receive(:async?).and_return(true)
+        end
+
+        subject.perform(project.id)
 
-        allow(Projects::ImportService)
-          .to receive(:new)
-          .and_return(service)
+        expect(project).not_to have_received(:after_import)
+      end
+    end
 
-        allow(service)
-          .to receive(:execute)
-          .and_return(true)
+    context 'when the import has failed' do
+      let(:error) { "https://user:pass@test.com/root/repoC.git/ not found" }
 
-        allow(service)
-          .to receive(:async?)
-          .and_return(true)
+      before do
+        allow(import_state).to receive(:mark_as_failed)
+      end
 
-        expect_next_instance_of(ProjectImportState) do |instance|
-          expect(instance).not_to receive(:finish)
+      it 'marks import_state as failed' do
+        expect_next_instance_of(Projects::ImportService) do |instance|
+          expect(instance).to receive(:execute).and_return({ status: :error, message: error })
         end
 
         subject.perform(project.id)
+
+        expect(import_state).to have_received(:mark_as_failed).with(error)
+        expect(project).not_to have_received(:after_import)
       end
     end
   end
-- 
GitLab