diff --git a/Gemfile b/Gemfile index 5f3721e7663be28c6fa2512edf423865459b1ab6..0c4f8d83782b72da3039c1fc8c61a0ec1c9ada90 100644 --- a/Gemfile +++ b/Gemfile @@ -327,7 +327,7 @@ group :metrics do gem 'influxdb', '~> 0.2', require: false # Prometheus - gem 'prometheus-client-mmap', '~> 0.9.10' + gem 'prometheus-client-mmap', '~> 0.10.0' gem 'raindrops', '~> 0.18' end diff --git a/Gemfile.lock b/Gemfile.lock index c5f3dc3e5a4a825359133afff3f88ca639c7286b..6a19d35774d80f83e5f4d6122036e5422a92b25e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -749,7 +749,7 @@ GEM parser unparser procto (0.0.3) - prometheus-client-mmap (0.9.10) + prometheus-client-mmap (0.10.0) pry (0.11.3) coderay (~> 1.1.0) method_source (~> 0.9.0) @@ -1292,7 +1292,7 @@ DEPENDENCIES pg (~> 1.1) png_quantizator (~> 0.2.1) premailer-rails (~> 1.10.3) - prometheus-client-mmap (~> 0.9.10) + prometheus-client-mmap (~> 0.10.0) pry-byebug (~> 3.5.1) pry-rails (~> 0.3.4) rack (~> 2.0.7) diff --git a/changelogs/unreleased/sh-disable-prom-metrics-on-failure.yml b/changelogs/unreleased/sh-disable-prom-metrics-on-failure.yml new file mode 100644 index 0000000000000000000000000000000000000000..d9db2847d2ee212c9271964740f94c3b7aaa31b9 --- /dev/null +++ b/changelogs/unreleased/sh-disable-prom-metrics-on-failure.yml @@ -0,0 +1,5 @@ +--- +title: Disable Prometheus metrics if initialization fails +merge_request: 22355 +author: +type: fixed diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb index 22bb5f1764ddf51281db47cc674286b57dec8b91..aa2601ea65037c137b24da5546f6da7dbe445830 100644 --- a/config/initializers/7_prometheus_metrics.rb +++ b/config/initializers/7_prometheus_metrics.rb @@ -43,6 +43,9 @@ def prometheus_default_multiproc_dir defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start + rescue IOError => e + Gitlab::ErrorTracking.track_exception(e) + Gitlab::Metrics.error_detected! end Gitlab::Cluster::LifecycleEvents.on_master_start do @@ -55,6 +58,9 @@ def prometheus_default_multiproc_dir end Gitlab::Metrics::RequestsRackMiddleware.initialize_http_request_duration_seconds + rescue IOError => e + Gitlab::ErrorTracking.track_exception(e) + Gitlab::Metrics.error_detected! end end diff --git a/lib/gitlab/metrics.rb b/lib/gitlab/metrics.rb index 61ed20ad6236b44459c926dc6d4ab3a6c362ccc9..d759ae24051f47f56752fe1246361ef9b84b9e8c 100644 --- a/lib/gitlab/metrics.rb +++ b/lib/gitlab/metrics.rb @@ -5,8 +5,14 @@ module Metrics include Gitlab::Metrics::InfluxDb include Gitlab::Metrics::Prometheus + @error = false + def self.enabled? influx_metrics_enabled? || prometheus_metrics_enabled? end + + def self.error? + @error + end end end diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index cab1edab48fb3b15c5bac2c1d40f059ee00abb89..f7480a8789e99a2dd6cce8c83cbee939ea6e7f96 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -61,6 +61,14 @@ def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client: safe_provide_metric(:histogram, name, docstring, base_labels, buckets) end + def error_detected! + clear_memoization(:prometheus_metrics_enabled) + + PROVIDER_MUTEX.synchronize do + @error = true + end + end + private def safe_provide_metric(method, name, *args) @@ -81,7 +89,7 @@ def provide_metric(name) end def prometheus_metrics_enabled_unmemoized - metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false + !error? && metrics_folder_present? && Gitlab::CurrentSettings.prometheus_metrics_enabled || false end end end diff --git a/spec/lib/gitlab/metrics/prometheus_spec.rb b/spec/lib/gitlab/metrics/prometheus_spec.rb index b37624982e2140dc50a2fa08991f57b2e82118b4..d4aa96a5b2028fdb07b0c4d035759893e20fe9be 100644 --- a/spec/lib/gitlab/metrics/prometheus_spec.rb +++ b/spec/lib/gitlab/metrics/prometheus_spec.rb @@ -17,4 +17,21 @@ expect(all_metrics.registry.metrics.count).to eq(0) end end + + describe '#error_detected!' do + before do + allow(all_metrics).to receive(:metrics_folder_present?).and_return(true) + stub_application_setting(prometheus_metrics_enabled: true) + end + + it 'disables Prometheus metrics' do + expect(all_metrics.error?).to be_falsey + expect(all_metrics.prometheus_metrics_enabled?).to be_truthy + + all_metrics.error_detected! + + expect(all_metrics.prometheus_metrics_enabled?).to be_falsey + expect(all_metrics.error?).to be_truthy + end + end end