diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index b161a2dafd45b067a730d40fa11286f3f5182987..289455a419e22bdc97f1a7402ef5d1cd89d79915 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -386,13 +386,15 @@ def supported_keyset_orderings after_transition any => [:failed] do |build| next unless build.project - if build.auto_retry_allowed? - begin - # rubocop: disable CodeReuse/ServiceClass - Ci::RetryJobService.new(build.project, build.user).execute(build) - # rubocop: enable CodeReuse/ServiceClass - rescue Gitlab::Access::AccessDeniedError => e - Gitlab::AppLogger.error "Unable to auto-retry job #{build.id}: #{e}" + build.run_after_commit do + if build.auto_retry_allowed? + begin + # rubocop: disable CodeReuse/ServiceClass -- https://gitlab.com/gitlab-org/gitlab/-/issues/494865 + Ci::RetryJobService.new(build.project, build.user).execute(build) + # rubocop: enable CodeReuse/ServiceClass + rescue Gitlab::Access::AccessDeniedError => e + Gitlab::AppLogger.error "Unable to auto-retry job #{build.id}: #{e}" + end end end end diff --git a/app/models/environment.rb b/app/models/environment.rb index 1d45817a8a67d8c38212407c0fa9dd7987212a92..bacf30962ba404852a1927eecb8e2507ad2408a0 100644 --- a/app/models/environment.rb +++ b/app/models/environment.rb @@ -377,16 +377,24 @@ def stop_with_actions! actions = [] stop_actions.each do |stop_action| - Gitlab::OptimisticLocking.retry_lock( - stop_action, - name: 'environment_stop_with_actions' - ) do |job| + play_job = ->(job) do actions << job.play(job.user) rescue StateMachines::InvalidTransition - # Ci::PlayBuildService rescues an error of StateMachines::InvalidTransition and fall back to retry. However, - # Ci::PlayBridgeService doesn't rescue it, so we're ignoring the error if it's not playable. + # Ci::PlayBuildService rescues an error of StateMachines::InvalidTransition and fall back to retry. + # However, Ci::PlayBridgeService doesn't rescue it, so we're ignoring the error if it's not playable. # We should fix this inconsistency in https://gitlab.com/gitlab-org/gitlab/-/issues/420855. end + + if Feature.enabled?(:no_locking_for_stop_actions, stop_action.project) + play_job.call(stop_action) + else + Gitlab::OptimisticLocking.retry_lock( + stop_action, + name: 'environment_stop_with_actions' + ) do |job| + play_job.call(job) + end + end end actions diff --git a/app/services/ci/retry_job_service.rb b/app/services/ci/retry_job_service.rb index bcfa2c52906b5ba20e6db892a3cf6ce58f7d41b4..841153a1a6e287847bc5f3a06ced086dcd46bc35 100644 --- a/app/services/ci/retry_job_service.rb +++ b/app/services/ci/retry_job_service.rb @@ -45,11 +45,7 @@ def clone!(job, variables: [], enqueue_if_actionable: false, start_pipeline: fal .close(new_job) end - # This method is called on the `drop!` state transition for Ci::Build which runs the retry in the - # `after_transition` block within a transaction. - # Ci::Pipelines::AddJobService then obtains the exclusive lease inside the same transaction. - # See issue: https://gitlab.com/gitlab-org/gitlab/-/issues/441525 - Gitlab::ExclusiveLease.skipping_transaction_check do + add_job = -> do ::Ci::Pipelines::AddJobService.new(job.pipeline).execute!(new_job) do |processable| BulkInsertableAssociations.with_bulk_insert do processable.save! @@ -57,6 +53,14 @@ def clone!(job, variables: [], enqueue_if_actionable: false, start_pipeline: fal end end + if Feature.enabled?(:no_locking_for_stop_actions, new_job.project) + add_job.call + else + Gitlab::ExclusiveLease.skipping_transaction_check do + add_job.call + end + end + job.reset # refresh the data to get new values of `retried` and `processed`. new_job diff --git a/config/feature_flags/gitlab_com_derisk/no_locking_for_stop_actions.yml b/config/feature_flags/gitlab_com_derisk/no_locking_for_stop_actions.yml new file mode 100644 index 0000000000000000000000000000000000000000..1054db560a6959fbffcf25533575e769f66711f9 --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/no_locking_for_stop_actions.yml @@ -0,0 +1,9 @@ +--- +name: no_locking_for_stop_actions +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/492409 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/166730 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/493689 +milestone: '17.5' +group: group::pipeline execution +type: gitlab_com_derisk +default_enabled: false diff --git a/spec/models/environment_spec.rb b/spec/models/environment_spec.rb index 2c6ac42d26bc4873000df5396047587dd0d2b963..926165e2e7f213e7633a56a8b472f028c953aa9b 100644 --- a/spec/models/environment_spec.rb +++ b/spec/models/environment_spec.rb @@ -931,6 +931,24 @@ expect(close_action.reload.processed).to be_truthy end + context "when 'no_locking_for_stop_actions' is disabled" do + before do + stub_feature_flags(no_locking_for_stop_actions: false) + allow(Gitlab::OptimisticLocking).to receive(:retry_lock).and_call_original + end + + it 'plays the job with locking' do + skip unless factory_type == :ci_build + # Since job is droped. + expect(close_action.processed).to be_falsey + # it encounters the StaleObjectError at first, but reloads the object and runs `job.play` + expect { subject }.not_to raise_error + expect(Gitlab::OptimisticLocking).to have_received(:retry_lock).exactly(3).times + # Now the job should be processed. + expect(close_action.reload.processed).to be_truthy + end + end + it 'does nothing when bridge job' do skip unless factory_type == :ci_bridge