diff --git a/app/services/ci/destroy_expired_job_artifacts_service.rb b/app/services/ci/destroy_expired_job_artifacts_service.rb index 0c41bf9bb6bed1a341cea9ed908dd7a5415b9b0e..6e7caba854541eb8220ea0f977f0b241ec5c22b4 100644 --- a/app/services/ci/destroy_expired_job_artifacts_service.rb +++ b/app/services/ci/destroy_expired_job_artifacts_service.rb @@ -4,24 +4,23 @@ module Ci class DestroyExpiredJobArtifactsService include ::Gitlab::ExclusiveLeaseHelpers include ::Gitlab::LoopHelpers + include ::Gitlab::Utils::StrongMemoize BATCH_SIZE = 100 LOOP_TIMEOUT = 5.minutes - LEGACY_LOOP_TIMEOUT = 45.minutes LOOP_LIMIT = 1000 EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock' - LOCK_TIMEOUT = 10.minutes - LEGACY_LOCK_TIMEOUT = 50.minutes + LOCK_TIMEOUT = 6.minutes ## # Destroy expired job artifacts on GitLab instance # - # This destroy process cannot run for more than 10 minutes. This is for + # This destroy process cannot run for more than 6 minutes. This is for # preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently, - # which is scheduled at every hour. + # which is scheduled every 7 minutes. def execute - in_lock(EXCLUSIVE_LOCK_KEY, ttl: lock_timeout, retries: 1) do - loop_until(timeout: loop_timeout, limit: LOOP_LIMIT) do + in_lock(EXCLUSIVE_LOCK_KEY, ttl: LOCK_TIMEOUT, retries: 1) do + loop_until(timeout: LOOP_TIMEOUT, limit: LOOP_LIMIT) do destroy_artifacts_batch end end @@ -42,30 +41,19 @@ def destroy_job_artifacts_batch return false if artifacts.empty? - if parallel_destroy? - parallel_destroy_batch(artifacts) - else - legacy_destroy_batch(artifacts) - destroy_related_records_for(artifacts) - end - + parallel_destroy_batch(artifacts) true end + # TODO: Make sure this can also be parallelized + # https://gitlab.com/gitlab-org/gitlab/-/issues/270973 def destroy_pipeline_artifacts_batch artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a return false if artifacts.empty? - legacy_destroy_batch(artifacts) - true - end - - def parallel_destroy? - ::Feature.enabled?(:ci_delete_objects) - end - - def legacy_destroy_batch(artifacts) artifacts.each(&:destroy!) + + true end def parallel_destroy_batch(job_artifacts) @@ -77,6 +65,7 @@ def parallel_destroy_batch(job_artifacts) # This is executed outside of the transaction because it depends on Redis update_statistics_for(job_artifacts) + destroyed_artifacts_counter.increment({}, job_artifacts.size) end # This method is implemented in EE and it must do only database work @@ -91,19 +80,12 @@ def update_statistics_for(job_artifacts) end end - def loop_timeout - if parallel_destroy? - LOOP_TIMEOUT - else - LEGACY_LOOP_TIMEOUT - end - end + def destroyed_artifacts_counter + strong_memoize(:destroyed_artifacts_counter) do + name = :destroyed_job_artifacts_count_total + comment = 'Counter of destroyed expired job artifacts' - def lock_timeout - if parallel_destroy? - LOCK_TIMEOUT - else - LEGACY_LOCK_TIMEOUT + ::Gitlab::Metrics.counter(name, comment) end end end diff --git a/app/workers/ci/delete_objects_worker.rb b/app/workers/ci/delete_objects_worker.rb index 1a886d0efeb5da384cb3a3c0cd8f8e766932bc1e..d845ad613589d577ced8eae825bb2a5a4e148021 100644 --- a/app/workers/ci/delete_objects_worker.rb +++ b/app/workers/ci/delete_objects_worker.rb @@ -18,14 +18,12 @@ def remaining_work_count(*args) end def max_running_jobs - if ::Feature.enabled?(:ci_delete_objects_low_concurrency) - 2 - elsif ::Feature.enabled?(:ci_delete_objects_medium_concurrency) + if ::Feature.enabled?(:ci_delete_objects_medium_concurrency) 20 elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency) 50 else - 0 + 2 end end diff --git a/changelogs/unreleased/enable-ci-deleted-objects-ff.yml b/changelogs/unreleased/enable-ci-deleted-objects-ff.yml new file mode 100644 index 0000000000000000000000000000000000000000..f7c76c1ed8eb9c9433523c28057ac50ef6ae19ab --- /dev/null +++ b/changelogs/unreleased/enable-ci-deleted-objects-ff.yml @@ -0,0 +1,5 @@ +--- +title: Parallelize the removal of expired job artifacts +merge_request: 46971 +author: +type: performance diff --git a/config/feature_flags/development/ci_delete_objects.yml b/config/feature_flags/development/ci_delete_objects.yml deleted file mode 100644 index d1bdce6dfab9ddcaf34d5f5131819062ca240128..0000000000000000000000000000000000000000 --- a/config/feature_flags/development/ci_delete_objects.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -name: ci_delete_objects -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42242 -rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103 -group: group::continuous integration -type: development -default_enabled: false \ No newline at end of file diff --git a/config/feature_flags/development/ci_delete_objects_low_concurrency.yml b/config/feature_flags/development/ci_delete_objects_low_concurrency.yml deleted file mode 100644 index cc59e0e3f6f8b3f5a710d3ec0f4f9667efc02804..0000000000000000000000000000000000000000 --- a/config/feature_flags/development/ci_delete_objects_low_concurrency.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -name: ci_delete_objects_low_concurrency -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39464 -rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103 -group: group::continuous integration -type: development -default_enabled: false \ No newline at end of file diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index 102d24b1edd7addc4309ba58be0d5f8f147b2cf7..21537c06b0bb68b9cd5ae3a66dc319395561ebb1 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -435,7 +435,7 @@ production: &base cron: "19 * * * *" # Remove expired build artifacts expire_build_artifacts_worker: - cron: "50 * * * *" + cron: "*/7 * * * *" # Remove files from object storage ci_schedule_delete_objects_worker: cron: "*/16 * * * *" diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index cb37c4c586e79dcf9ac8ab988aa174f38a47b7f7..4a9c6669bb6c031c6bee4cc3fd9ecbc14a0278ce 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *' Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker' Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({}) -Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *' +Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '*/7 * * * *' Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker' Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *' diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index e72fde0c04068b68e13d59f15498b7576c75c3b9..5c9268b54e15434d02f80edb2da0fd732f421036 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the | `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` | | `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` | | `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` | +| `destroyed_job_artifacts_count_total` | Counter | 13.6 | Number of destroyed expired job artifacts | | ## Database load balancing metrics **(PREMIUM ONLY)** diff --git a/ee/spec/services/ee/ci/destroy_expired_job_artifacts_service_spec.rb b/ee/spec/services/ee/ci/destroy_expired_job_artifacts_service_spec.rb index c6fd5b5c29d346b8994a12ab52088de966465b60..3c2c8923436505b40a2f7d0bf242b4211fe2c55f 100644 --- a/ee/spec/services/ee/ci/destroy_expired_job_artifacts_service_spec.rb +++ b/ee/spec/services/ee/ci/destroy_expired_job_artifacts_service_spec.rb @@ -61,35 +61,17 @@ end context 'when failed to destroy artifact' do - context 'with ci_delete_objects disabled' do - before do - stub_feature_flags(ci_delete_objects: false) - stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) - - allow_next_found_instance_of(Ci::JobArtifact) do |artifact| - allow(artifact).to receive(:destroy!).and_raise(ActiveRecord::RecordNotDestroyed) - end - end - - it 'raises an exception and stop destroying' do - expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) - .and not_change { Security::Finding.count }.from(1) - end + before do + stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) + expect(Ci::DeletedObject) + .to receive(:bulk_import) + .once + .and_raise(ActiveRecord::RecordNotDestroyed) end - context 'with ci_delete_objects enabled' do - before do - stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) - expect(Ci::DeletedObject) - .to receive(:bulk_import) - .once - .and_raise(ActiveRecord::RecordNotDestroyed) - end - - it 'raises an exception and stop destroying' do - expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) - .and not_change { Security::Finding.count }.from(1) - end + it 'raises an exception and stop destroying' do + expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) + .and not_change { Security::Finding.count }.from(1) end end diff --git a/spec/services/ci/destroy_expired_job_artifacts_service_spec.rb b/spec/services/ci/destroy_expired_job_artifacts_service_spec.rb index 741d2666efc7102db41350206a4eff375ff120ee..c8d426ee657176bb1682999de0aaca4802b927be 100644 --- a/spec/services/ci/destroy_expired_job_artifacts_service_spec.rb +++ b/spec/services/ci/destroy_expired_job_artifacts_service_spec.rb @@ -75,6 +75,14 @@ it 'does not remove the files' do expect { subject }.not_to change { artifact.file.exists? } end + + it 'reports metrics for destroyed artifacts' do + counter = service.send(:destroyed_artifacts_counter) + + expect(counter).to receive(:increment).with({}, 1).and_call_original + + subject + end end end @@ -114,49 +122,31 @@ stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) end - context 'with ci_delete_objects disabled' do + context 'when the import fails' do before do - stub_feature_flags(ci_delete_objects: false) - - allow_any_instance_of(Ci::JobArtifact) - .to receive(:destroy!) + expect(Ci::DeletedObject) + .to receive(:bulk_import) + .once .and_raise(ActiveRecord::RecordNotDestroyed) end it 'raises an exception and stop destroying' do expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) + .and not_change { Ci::JobArtifact.count }.from(1) end end - context 'with ci_delete_objects enabled' do - context 'when the import fails' do - before do - stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) - expect(Ci::DeletedObject) - .to receive(:bulk_import) - .once - .and_raise(ActiveRecord::RecordNotDestroyed) - end - - it 'raises an exception and stop destroying' do - expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) - .and not_change { Ci::JobArtifact.count }.from(1) - end + context 'when the delete fails' do + before do + expect(Ci::JobArtifact) + .to receive(:id_in) + .once + .and_raise(ActiveRecord::RecordNotDestroyed) end - context 'when the delete fails' do - before do - stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) - expect(Ci::JobArtifact) - .to receive(:id_in) - .once - .and_raise(ActiveRecord::RecordNotDestroyed) - end - - it 'raises an exception rolls back the insert' do - expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) - .and not_change { Ci::DeletedObject.count }.from(0) - end + it 'raises an exception rolls back the insert' do + expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed) + .and not_change { Ci::DeletedObject.count }.from(0) end end end diff --git a/spec/workers/ci/delete_objects_worker_spec.rb b/spec/workers/ci/delete_objects_worker_spec.rb index 5ff3b1d724aeabd12b0c76b0361cc48ed40db1ac..52d90d7667af71e0e229c77605da20c4aa8357f7 100644 --- a/spec/workers/ci/delete_objects_worker_spec.rb +++ b/spec/workers/ci/delete_objects_worker_spec.rb @@ -28,7 +28,6 @@ before do stub_feature_flags( - ci_delete_objects_low_concurrency: low, ci_delete_objects_medium_concurrency: medium, ci_delete_objects_high_concurrency: high ) @@ -36,13 +35,11 @@ subject(:max_running_jobs) { worker.max_running_jobs } - where(:low, :medium, :high, :expected) do - false | false | false | 0 - true | true | true | 2 - true | false | false | 2 - false | true | false | 20 - false | true | true | 20 - false | false | true | 50 + where(:medium, :high, :expected) do + false | false | 2 + true | false | 20 + true | true | 20 + false | true | 50 end with_them do