diff --git a/app/workers/concurrency_limit/resume_worker.rb b/app/workers/concurrency_limit/resume_worker.rb index e88277dc58118262d4c34345655af2a58e1f8d91..90af4b7a8980ebf81667ed094717c6747f9c1a2b 100644 --- a/app/workers/concurrency_limit/resume_worker.rb +++ b/app/workers/concurrency_limit/resume_worker.rb @@ -18,11 +18,13 @@ def perform reschedule_job = false workers.each do |worker| - next unless jobs_in_the_queue?(worker) + limit = ::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap.limit_for(worker: worker)&.call + queue_size = queue_size(worker) + report_prometheus_metrics(worker, queue_size, limit) - reschedule_job = true + next unless queue_size > 0 - limit = ::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap.limit_for(worker: worker)&.call + reschedule_job = true processing_limit = if limit current = current_concurrency(worker: worker) @@ -49,8 +51,8 @@ def current_concurrency(worker:) @current_concurrency[worker.name].to_i end - def jobs_in_the_queue?(worker) - Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService.has_jobs_in_queue?(worker.name) + def queue_size(worker) + Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService.queue_size(worker.name) end def resume_processing!(worker, limit:) @@ -60,5 +62,18 @@ def resume_processing!(worker, limit:) def workers Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap.workers end + + def report_prometheus_metrics(worker, queue_size, limit) + queue_size_metric = Gitlab::Metrics.gauge(:sidekiq_concurrency_limit_queue_jobs, + 'Number of jobs queued by the concurrency limit middleware.', + {}, + :max) + queue_size_metric.set({ worker: worker.name }, queue_size) + + limit_metric = Gitlab::Metrics.gauge(:sidekiq_concurrency_limit_max_concurrent_jobs, + 'Max number of concurrent running jobs.', + {}) + limit_metric.set({ worker: worker.name }, limit || DEFAULT_LIMIT) + end end end diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index a284806124fec913aeca76612d83dd1fa228afe5..5ed8ab3f4d16c31a4c95f3b616a9aec4be5274be 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -232,6 +232,8 @@ configuration option in `gitlab.yml`. These metrics are served from the | `sidekiq_running_jobs` | Gauge | 12.2 | Number of Sidekiq jobs running | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` | | `sidekiq_concurrency` | Gauge | 12.5 | Maximum number of Sidekiq jobs | | | `sidekiq_mem_total_bytes` | Gauge | 15.3 | Number of bytes allocated for both objects consuming an object slot and objects that required a malloc'| | +| `sidekiq_concurrency_limit_queue_jobs` | Gauge | 17.3 | Number of Sidekiq jobs waiting in the concurrency limit queue| `worker` | +| `sidekiq_concurrency_limit_max_concurrent_jobs` | Gauge | 17.3 | Max number of concurrent running Sidekiq jobs | `worker` | | `geo_db_replication_lag_seconds` | Gauge | 10.2 | Database replication lag (seconds) | `url` | | `geo_repositories` | Gauge | 10.2 | Total number of repositories available on primary | `url` | | `geo_lfs_objects` | Gauge | 10.2 | Number of LFS objects on primary | `url` | diff --git a/ee/spec/workers/concurrency_limit/resume_worker_spec.rb b/ee/spec/workers/concurrency_limit/resume_worker_spec.rb index 3effcf3512390ea52ad9aa477fc8ef905364f2e5..c7c96cacde3b9b36e89bc3f52975372eb82867e3 100644 --- a/ee/spec/workers/concurrency_limit/resume_worker_spec.rb +++ b/ee/spec/workers/concurrency_limit/resume_worker_spec.rb @@ -15,7 +15,7 @@ context 'when there are no jobs in the queue' do before do allow(Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService).to receive(:has_jobs_in_queue?) - .and_return(false) + .and_return(0) end it 'does nothing' do @@ -24,12 +24,33 @@ worker.perform end + + it 'reports prometheus metrics' do + stub_application_setting(elasticsearch_max_code_indexing_concurrency: 30) + queue_size_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_queue_jobs, anything, {}, :max) + .and_return(queue_size_gauge_double) + + allow(queue_size_gauge_double).to receive(:set).with({ worker: anything }, 0) + expect(queue_size_gauge_double).to receive(:set).with({ worker: worker_with_concurrency_limit.name }, 0) + + limit_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_max_concurrent_jobs, anything, {}) + .and_return(limit_gauge_double) + + allow(limit_gauge_double).to receive(:set).with({ worker: anything }, anything) + expect(limit_gauge_double).to receive(:set).with({ worker: worker_with_concurrency_limit.name }, 30) + + worker.perform + end end context 'when there are jobs in the queue' do before do - allow(Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService).to receive(:has_jobs_in_queue?) - .and_return(true) + allow(Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService).to receive(:queue_size) + .and_return(100) end it 'resumes processing' do @@ -52,19 +73,72 @@ worker.perform end - it 'resumes processing if limit is not set' do - nil_proc = -> { nil } - allow(Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap).to receive(:limit_for) - expect(::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap).to receive(:limit_for) - .with(worker: worker_with_concurrency_limit) - .and_return(nil_proc) - expect(Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService) - .to receive(:resume_processing!) - .with(worker_with_concurrency_limit.name, limit: described_class::DEFAULT_LIMIT) - expect(described_class).to receive(:perform_in) + it 'reports prometheus metrics' do + stub_application_setting(elasticsearch_max_code_indexing_concurrency: 60) + allow(::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersConcurrency).to receive(:workers) + .and_return(worker_with_concurrency_limit.name => 15) + + queue_size_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_queue_jobs, anything, {}, :max) + .and_return(queue_size_gauge_double) + + allow(queue_size_gauge_double).to receive(:set).with({ worker: anything }, anything) + expect(queue_size_gauge_double).to receive(:set).with({ worker: worker_with_concurrency_limit.name }, 100) + + limit_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_max_concurrent_jobs, anything, {}) + .and_return(limit_gauge_double) + + allow(limit_gauge_double).to receive(:set).with({ worker: anything }, anything) + expect(limit_gauge_double).to receive(:set).with({ worker: worker_with_concurrency_limit.name }, 60) worker.perform end + + context 'when limit is not set' do + before do + allow(Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap).to receive(:limit_for) + nil_proc = -> { nil } + allow(::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersMap).to receive(:limit_for) + .with(worker: worker_with_concurrency_limit) + .and_return(nil_proc) + end + + it 'resumes processing using the DEFAULT_LIMIT' do + expect(Gitlab::SidekiqMiddleware::ConcurrencyLimit::ConcurrencyLimitService) + .to receive(:resume_processing!) + .with(worker_with_concurrency_limit.name, limit: described_class::DEFAULT_LIMIT) + expect(described_class).to receive(:perform_in) + + worker.perform + end + + it 'reports limit as DEFAULT_LIMIT' do + allow(::Gitlab::SidekiqMiddleware::ConcurrencyLimit::WorkersConcurrency).to receive(:workers) + .and_return(worker_with_concurrency_limit.name => 15) + + queue_size_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_queue_jobs, anything, {}, :max) + .and_return(queue_size_gauge_double) + + allow(queue_size_gauge_double).to receive(:set).with({ worker: anything }, anything) + expect(queue_size_gauge_double).to receive(:set).with({ worker: worker_with_concurrency_limit.name }, 100) + + limit_gauge_double = instance_double(Prometheus::Client::Gauge) + expect(Gitlab::Metrics).to receive(:gauge).at_least(:once) + .with(:sidekiq_concurrency_limit_max_concurrent_jobs, anything, {}) + .and_return(limit_gauge_double) + + allow(limit_gauge_double).to receive(:set).with({ worker: anything }, anything) + expect(limit_gauge_double).to receive(:set) + .with({ worker: worker_with_concurrency_limit.name }, described_class::DEFAULT_LIMIT) + + worker.perform + end + end end end end