diff --git a/app/models/concerns/ci/deployable.rb b/app/models/concerns/ci/deployable.rb index d25151f9a34f14ca40afb0584d03b2c784112ee5..bc2c67a863384b95de48fda3e9a8d40c55476618 100644 --- a/app/models/concerns/ci/deployable.rb +++ b/app/models/concerns/ci/deployable.rb @@ -17,6 +17,14 @@ module Deployable end end + after_transition any => [:failed] do |job| + next unless job.stops_environment? + + job.run_after_commit do + Environments::StopJobFailedWorker.perform_async(id) + end + end + # Synchronize Deployment Status # Please note that the data integirty is not assured because we can't use # a database transaction due to DB decomposition. diff --git a/app/models/environment.rb b/app/models/environment.rb index 29394c37e2c5f9d06c7dc8a60e82a4edcc955a13..efdcf7174aab84b855ac9b924435567a5e330435 100644 --- a/app/models/environment.rb +++ b/app/models/environment.rb @@ -195,6 +195,10 @@ class Environment < ApplicationRecord transition %i[available stopping] => :stopped end + event :recover_stuck_stopping do + transition stopping: :available + end + state :available state :stopping state :stopped diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index da05824be4f048a8756cda7bd6dea1211255656a..f39c5f5c2328a6e11cce0c2ee9be84ef5a1e8c9d 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -2892,6 +2892,15 @@ :weight: 1 :idempotent: true :tags: [] +- :name: environments_stop_job_failed + :worker_name: Environments::StopJobFailedWorker + :feature_category: :continuous_delivery + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: environments_stop_job_success :worker_name: Environments::StopJobSuccessWorker :feature_category: :continuous_delivery diff --git a/app/workers/environments/stop_job_failed_worker.rb b/app/workers/environments/stop_job_failed_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..44aa3a4e91f0356bd45cebb684c54214d163e76b --- /dev/null +++ b/app/workers/environments/stop_job_failed_worker.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +module Environments + class StopJobFailedWorker + include ApplicationWorker + + data_consistency :delayed + idempotent! + feature_category :continuous_delivery + + def perform(job_id, _params = {}) + Ci::Processable.find_by_id(job_id).try do |job| + revert_environment(job) if job.stops_environment? && job.failed? + end + end + + private + + def revert_environment(job) + job.persisted_environment.fire_state_event(:recover_stuck_stopping) + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 70563327c20863f75e6b1e643e3350f231501713..6ebefabbef1c9baeba245e29fb98f8ac175c2b84 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -271,6 +271,8 @@ - 1 - - environments_canary_ingress_update - 1 +- - environments_stop_job_failed + - 1 - - environments_stop_job_success - 1 - - epics diff --git a/spec/factories/environments.rb b/spec/factories/environments.rb index 2df9f482bb9e134e2d0f8ebf965b85015afd9b23..6f2cd4bf596938c60b98cd21d85a47204acb251e 100644 --- a/spec/factories/environments.rb +++ b/spec/factories/environments.rb @@ -15,6 +15,10 @@ state { :stopped } end + trait :stopping do + state { :stopping } + end + trait :production do name { 'production' } end diff --git a/spec/support/shared_examples/ci/deployable_shared_examples.rb b/spec/support/shared_examples/ci/deployable_shared_examples.rb index 4f43d38e604700ebb1520b81081e36d524570274..0781eec1b4b787c33b996cab826d69bc78832ee9 100644 --- a/spec/support/shared_examples/ci/deployable_shared_examples.rb +++ b/spec/support/shared_examples/ci/deployable_shared_examples.rb @@ -166,6 +166,28 @@ expect(deployment).to be_failed end + + context 'when the job is a stop job' do + before do + job.update!(environment: 'review', options: { environment: { action: 'stop' } }) + end + + it 'enqueues Environments::StopJobFailedWorker' do + expect(Environments::StopJobFailedWorker) + .to receive(:perform_async) + + subject + end + end + + context 'when the job is not a stop job' do + it 'does not enqueue Environments::StopJobFailedWorker' do + expect(Environments::StopJobFailedWorker) + .not_to receive(:perform_async) + + subject + end + end end context 'when transits to skipped' do diff --git a/spec/workers/environments/stop_job_failed_worker_spec.rb b/spec/workers/environments/stop_job_failed_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..952b96ee3ecb4993e6a4cd36adf9ac6bbf6875ba --- /dev/null +++ b/spec/workers/environments/stop_job_failed_worker_spec.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Environments::StopJobFailedWorker, feature_category: :continuous_delivery do + describe '#perform' do + let_it_be_with_refind(:environment) { create(:environment, state: :stopping) } + + subject { described_class.new.perform(job.id) } + + shared_examples_for 'recovering a stuck stopping environment' do + context 'when the job is not a stop job' do + let(:job) { non_stop_job } + + it 'does not recover the environment' do + expect { subject }.not_to change { environment.reload.state } + end + end + + context 'when the stop job is not failed' do + let(:job) { stop_job } + + before do + job.update!(status: :success) + end + + it 'does not recover the environment' do + expect { subject }.not_to change { environment.reload.state } + end + end + + context 'when the stop job is failed' do + let(:job) { stop_job } + + it 'recovers the environment' do + expect { subject } + .to change { environment.reload.state } + .from('stopping') + .to('available') + end + end + end + + context 'with build job' do + let!(:stop_job) do + create( + :ci_build, + :stop_review_app, + environment: environment.name, + project: environment.project, + status: :failed + ) + end + + let!(:non_stop_job) do + create( + :ci_build, + :start_review_app, + environment: environment.name, + project: environment.project, + status: :failed + ) + end + + it_behaves_like 'recovering a stuck stopping environment' + end + + context 'with bridge job' do + let!(:stop_job) do + create( + :ci_bridge, + :stop_review_app, + environment: environment.name, + project: environment.project, + status: :failed + ) + end + + let!(:non_stop_job) do + create( + :ci_bridge, + :start_review_app, + environment: environment.name, + project: environment.project, + status: :failed + ) + end + + it_behaves_like 'recovering a stuck stopping environment' + end + + context 'when job does not exist' do + it 'does not raise exception' do + expect { described_class.new.perform(non_existing_record_id) } + .not_to raise_error + end + end + end +end