diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example index a2224a13d199120065da32791908c4e16ab770f3..fee7e599fc70fc10005ebda0e1a93cdca791422e 100644 --- a/config/gitlab.yml.example +++ b/config/gitlab.yml.example @@ -1286,9 +1286,8 @@ production: &base sidekiq_health_checks: # enabled: true - # log_enabled: false # address: localhost - # port: 8082 + # port: 8092 # Web exporter is a dedicated Rack server running alongside Puma to expose Prometheus metrics # It runs alongside the `/metrics` endpoints to ease the publish of metrics diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 9b72fa773507967c8ea7fa4dc7ea20e666fb88df..ea6a00b395fe75ebe305d7e633ae233c3bd4586e 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -966,15 +966,10 @@ Settings.monitoring.sidekiq_exporter['address'] ||= 'localhost' Settings.monitoring.sidekiq_exporter['port'] ||= 8082 -# TODO: Once we split out health checks from SidekiqExporter, we -# should not let this default to the same settings anymore; we only -# do this for back-compat currently. -# https://gitlab.com/gitlab-org/gitlab/-/issues/345804 Settings.monitoring['sidekiq_health_checks'] ||= Settingslogic.new({}) -Settings.monitoring.sidekiq_health_checks['enabled'] ||= Settings.monitoring.sidekiq_exporter['enabled'] -Settings.monitoring.sidekiq_health_checks['log_enabled'] ||= Settings.monitoring.sidekiq_exporter['log_enabled'] -Settings.monitoring.sidekiq_health_checks['address'] ||= Settings.monitoring.sidekiq_exporter['address'] -Settings.monitoring.sidekiq_health_checks['port'] ||= Settings.monitoring.sidekiq_exporter['port'] +Settings.monitoring.sidekiq_health_checks['enabled'] ||= false +Settings.monitoring.sidekiq_health_checks['address'] ||= 'localhost' +Settings.monitoring.sidekiq_health_checks['port'] ||= 8092 Settings.monitoring['web_exporter'] ||= Settingslogic.new({}) Settings.monitoring.web_exporter['enabled'] ||= false diff --git a/config/initializers/7_prometheus_metrics.rb b/config/initializers/7_prometheus_metrics.rb index 6953de670a75f39331f20e6247b2cb57aefbc272..a8e8bdf2c9fb1f4dbbd1ff6dc590576388213feb 100644 --- a/config/initializers/7_prometheus_metrics.rb +++ b/config/initializers/7_prometheus_metrics.rb @@ -58,24 +58,6 @@ def puma_dedicated_metrics_server? # context (i.e. not in the Rails console or rspec) and when users have enabled metrics. return if Rails.env.test? || !Gitlab::Runtime.application? || !Gitlab::Metrics.prometheus_metrics_enabled? -if Gitlab::Runtime.sidekiq? && (!ENV['SIDEKIQ_WORKER_ID'] || ENV['SIDEKIQ_WORKER_ID'] == '0') - # The single worker outside of a sidekiq-cluster, or the first worker (sidekiq_0) - # in a cluster of processes, is responsible for serving health checks. - # - # Do not clean the metrics directory here - the supervisor script should - # have already taken care of that. - Sidekiq.configure_server do |config| - config.on(:startup) do - # In https://gitlab.com/gitlab-org/gitlab/-/issues/345804 we are looking to - # only serve health-checks from a worker process; for backwards compatibility - # we still go through the metrics exporter server, but start to configure it - # with the new settings keys. - exporter_settings = Settings.monitoring.sidekiq_health_checks - Gitlab::Metrics::Exporter::SidekiqExporter.instance(exporter_settings).start - end - end -end - Gitlab::Cluster::LifecycleEvents.on_master_start do Gitlab::Metrics.gauge(:deployments, 'GitLab Version', {}, :max).set({ version: Gitlab::VERSION, revision: Gitlab.revision }, 1) diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 25984c45318846fe239e23b94c49c85152545b9e..ade14f2e1a18844b1e692b7482b8fd33fedd93fd 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -63,6 +63,17 @@ def enable_semi_reliable_fetch_mode? Gitlab::SidekiqDaemon::Monitor.instance.start Gitlab::SidekiqDaemon::MemoryKiller.instance.start if enable_sidekiq_memory_killer && use_sidekiq_daemon_memory_killer + + first_sidekiq_worker = !ENV['SIDEKIQ_WORKER_ID'] || ENV['SIDEKIQ_WORKER_ID'] == '0' + health_checks = Settings.monitoring.sidekiq_health_checks + + # Start health-check in-process server + if first_sidekiq_worker && health_checks.enabled + Gitlab::HealthChecks::Server.instance( + address: health_checks.address, + port: health_checks.port + ).start + end end if enable_reliable_fetch? diff --git a/lib/gitlab/health_checks/middleware.rb b/lib/gitlab/health_checks/middleware.rb new file mode 100644 index 0000000000000000000000000000000000000000..3fe065147c8ad32af2686bd413ea82fd697b1a1f --- /dev/null +++ b/lib/gitlab/health_checks/middleware.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module HealthChecks + class Middleware + def initialize(app, readiness_probe, liveness_probe) + @app = app + @readiness_probe = readiness_probe + @liveness_probe = liveness_probe + end + + def call(env) + case env['PATH_INFO'] + when '/readiness' then render_probe(@readiness_probe) + when '/liveness' then render_probe(@liveness_probe) + else @app.call(env) + end + end + + private + + def render_probe(probe) + result = probe.execute + + [ + result.http_status, + { 'Content-Type' => 'application/json; charset=utf-8' }, + [result.json.to_json] + ] + end + end + end +end diff --git a/lib/gitlab/health_checks/server.rb b/lib/gitlab/health_checks/server.rb new file mode 100644 index 0000000000000000000000000000000000000000..d747b64a2215db697900a5e1530f718f38231666 --- /dev/null +++ b/lib/gitlab/health_checks/server.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true + +require 'webrick' + +module Gitlab + module HealthChecks + class Server < Daemon + def initialize(address:, port:, **options) + super(**options) + + @address = address + @port = port + end + + private + + def start_working + @server = ::WEBrick::HTTPServer.new( + Port: @port, BindAddress: @address, AccessLog: [] + ) + @server.mount '/', Rack::Handler::WEBrick, rack_app + + true + end + + def run_thread + @server&.start + rescue IOError + # ignore forcibily closed servers + end + + def stop_working + if @server + # we close sockets if thread is not longer running + # this happens, when the process forks + if thread.alive? + @server.shutdown + else + @server.listeners.each(&:close) + end + end + + @server = nil + end + + def rack_app + readiness = new_probe + liveness = new_probe + + Rack::Builder.app do + use Rack::Deflater + use HealthChecks::Middleware, readiness, liveness + run -> (env) { [404, {}, ['']] } + end + end + + def new_probe + ::Gitlab::HealthChecks::Probes::Collection.new + end + end + end +end diff --git a/lib/gitlab/metrics/exporter/base_exporter.rb b/lib/gitlab/metrics/exporter/base_exporter.rb index 2aea8d655fa1ff3f4b8960217842c146e2461e15..ba2eb729d7b84d9f4bcf30ea9dce8ea48493a0af 100644 --- a/lib/gitlab/metrics/exporter/base_exporter.rb +++ b/lib/gitlab/metrics/exporter/base_exporter.rb @@ -71,28 +71,17 @@ def stop_working end def rack_app - readiness = readiness_probe - liveness = liveness_probe pid = thread_name gc_requests = @gc_requests Rack::Builder.app do use Rack::Deflater use Gitlab::Metrics::Exporter::MetricsMiddleware, pid - use Gitlab::Metrics::Exporter::HealthChecksMiddleware, readiness, liveness use Gitlab::Metrics::Exporter::GcRequestMiddleware if gc_requests use ::Prometheus::Client::Rack::Exporter if ::Gitlab::Metrics.metrics_folder_present? run -> (env) { [404, {}, ['']] } end end - - def readiness_probe - ::Gitlab::HealthChecks::Probes::Collection.new - end - - def liveness_probe - ::Gitlab::HealthChecks::Probes::Collection.new - end end end end diff --git a/lib/gitlab/metrics/exporter/health_checks_middleware.rb b/lib/gitlab/metrics/exporter/health_checks_middleware.rb deleted file mode 100644 index c43b8004b72a911bcd1460598d85742556b4394f..0000000000000000000000000000000000000000 --- a/lib/gitlab/metrics/exporter/health_checks_middleware.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Metrics - module Exporter - class HealthChecksMiddleware - def initialize(app, readiness_probe, liveness_probe) - @app = app - @readiness_probe = readiness_probe - @liveness_probe = liveness_probe - end - - def call(env) - case env['PATH_INFO'] - when '/readiness' then render_probe(@readiness_probe) - when '/liveness' then render_probe(@liveness_probe) - else @app.call(env) - end - end - - private - - def render_probe(probe) - result = probe.execute - - [ - result.http_status, - { 'Content-Type' => 'application/json; charset=utf-8' }, - [result.json.to_json] - ] - end - end - end - end -end diff --git a/metrics_server/dependencies.rb b/metrics_server/dependencies.rb index bfa6aae8ef82b7c0f3f7d29f55a39dcd0689ec84..3f188658ba298b58fc694019d6579eb7d5e9bb22 100644 --- a/metrics_server/dependencies.rb +++ b/metrics_server/dependencies.rb @@ -26,7 +26,6 @@ require_relative '../lib/gitlab/metrics/exporter/web_exporter' require_relative '../lib/gitlab/metrics/exporter/sidekiq_exporter' require_relative '../lib/gitlab/metrics/exporter/metrics_middleware' -require_relative '../lib/gitlab/metrics/exporter/health_checks_middleware' require_relative '../lib/gitlab/metrics/exporter/gc_request_middleware' require_relative '../lib/gitlab/health_checks/probes/collection' require_relative '../lib/gitlab/health_checks/probes/status' diff --git a/sidekiq_cluster/cli.rb b/sidekiq_cluster/cli.rb index 4f02812d2e29f6dc162f7aa8bbcc436346ef62de..408b3864d5ed5bc8f5e415fd46e4fa59ada89e3b 100644 --- a/sidekiq_cluster/cli.rb +++ b/sidekiq_cluster/cli.rb @@ -174,26 +174,12 @@ def sidekiq_exporter_enabled? ::Settings.dig('monitoring', 'sidekiq_exporter', 'enabled') end - def exporter_has_a_unique_port? - # In https://gitlab.com/gitlab-org/gitlab/-/issues/345802 we added settings for sidekiq_health_checks. - # These settings default to the same values as sidekiq_exporter for backwards compatibility. - # If a different port for sidekiq_health_checks has been set up, we know that the - # user wants to serve health checks and metrics from different servers. - return false if sidekiq_health_check_port.nil? || sidekiq_exporter_port.nil? - - sidekiq_exporter_port != sidekiq_health_check_port - end - def sidekiq_exporter_port ::Settings.dig('monitoring', 'sidekiq_exporter', 'port') end - def sidekiq_health_check_port - ::Settings.dig('monitoring', 'sidekiq_health_checks', 'port') - end - def metrics_server_enabled? - !@dryrun && sidekiq_exporter_enabled? && exporter_has_a_unique_port? + !@dryrun && sidekiq_exporter_enabled? end def option_parser diff --git a/spec/commands/sidekiq_cluster/cli_spec.rb b/spec/commands/sidekiq_cluster/cli_spec.rb index d949be8d102b933c35d9d3cf20883c08a2cd3abb..ecad3f6b0bf2f0c6a117274f68f0da2bdd942f44 100644 --- a/spec/commands/sidekiq_cluster/cli_spec.rb +++ b/spec/commands/sidekiq_cluster/cli_spec.rb @@ -18,7 +18,6 @@ let(:sidekiq_exporter_enabled) { false } let(:sidekiq_exporter_port) { '3807' } - let(:sidekiq_health_checks_port) { '3807' } let(:config_file) { Tempfile.new('gitlab.yml') } let(:config) do @@ -29,11 +28,6 @@ 'address' => 'localhost', 'enabled' => sidekiq_exporter_enabled, 'port' => sidekiq_exporter_port - }, - 'sidekiq_health_checks' => { - 'address' => 'localhost', - 'enabled' => sidekiq_exporter_enabled, - 'port' => sidekiq_health_checks_port } } } @@ -310,37 +304,12 @@ cli.run(%w(foo)) end - context 'when there are no sidekiq_health_checks settings set' do - let(:sidekiq_exporter_enabled) { true } - - it 'does not start a sidekiq metrics server' do - expect(MetricsServer).not_to receive(:start_for_sidekiq) - - cli.run(%w(foo)) - end - end - - context 'when the sidekiq_exporter.port setting is not set' do - let(:sidekiq_exporter_enabled) { true } - - it 'does not start a sidekiq metrics server' do - expect(MetricsServer).not_to receive(:start_for_sidekiq) - - cli.run(%w(foo)) - end - end - - context 'when sidekiq_exporter.enabled setting is not set' do + context 'when sidekiq_exporter is not set up' do let(:config) do { 'test' => { 'monitoring' => { - 'sidekiq_exporter' => {}, - 'sidekiq_health_checks' => { - 'address' => 'localhost', - 'enabled' => sidekiq_exporter_enabled, - 'port' => sidekiq_health_checks_port - } + 'sidekiq_exporter' => {} } } } @@ -353,13 +322,12 @@ end end - context 'with a blank sidekiq_exporter setting' do + context 'with missing sidekiq_exporter setting' do let(:config) do { 'test' => { 'monitoring' => { - 'sidekiq_exporter' => nil, - 'sidekiq_health_checks' => nil + 'sidekiq_exporter' => nil } } } @@ -376,26 +344,21 @@ end end - context 'with valid settings' do - using RSpec::Parameterized::TableSyntax + context 'when sidekiq_exporter is disabled' do + it 'does not start a sidekiq metrics server' do + expect(MetricsServer).not_to receive(:start_for_sidekiq) - where(:sidekiq_exporter_enabled, :sidekiq_exporter_port, :sidekiq_health_checks_port, :start_metrics_server) do - true | '3807' | '3907' | true - true | '3807' | '3807' | false - false | '3807' | '3907' | false - false | '3807' | '3907' | false + cli.run(%w(foo)) end + end - with_them do - specify do - if start_metrics_server - expect(MetricsServer).to receive(:start_for_sidekiq).with(metrics_dir: metrics_dir, reset_signals: trapped_signals) - else - expect(MetricsServer).not_to receive(:start_for_sidekiq) - end + context 'when sidekiq_exporter is enabled' do + let(:sidekiq_exporter_enabled) { true } + + it 'starts the metrics server' do + expect(MetricsServer).to receive(:start_for_sidekiq).with(metrics_dir: metrics_dir, reset_signals: trapped_signals) - cli.run(%w(foo)) - end + cli.run(%w(foo)) end end @@ -431,7 +394,6 @@ context 'supervising the cluster' do let(:sidekiq_exporter_enabled) { true } - let(:sidekiq_health_checks_port) { '3907' } let(:metrics_server_pid) { 99 } let(:sidekiq_worker_pids) { [2, 42] } diff --git a/spec/lib/gitlab/metrics/exporter/health_checks_middleware_spec.rb b/spec/lib/gitlab/health_checks/middleware_spec.rb similarity index 83% rename from spec/lib/gitlab/metrics/exporter/health_checks_middleware_spec.rb rename to spec/lib/gitlab/health_checks/middleware_spec.rb index 9ee46a45e7ab3aad2dd6fdbccac55d0ae7dc5fca..3b644539acc8d4ce426a65c2c964ac790651ce48 100644 --- a/spec/lib/gitlab/metrics/exporter/health_checks_middleware_spec.rb +++ b/spec/lib/gitlab/health_checks/middleware_spec.rb @@ -2,12 +2,12 @@ require 'fast_spec_helper' -RSpec.describe Gitlab::Metrics::Exporter::HealthChecksMiddleware do - let(:app) { double(:app) } +RSpec.describe Gitlab::HealthChecks::Middleware do + let(:app) { instance_double(Proc) } let(:env) { { 'PATH_INFO' => path } } - let(:readiness_probe) { double(:readiness_probe) } - let(:liveness_probe) { double(:liveness_probe) } + let(:readiness_probe) { instance_double(Gitlab::HealthChecks::Probes::Collection) } + let(:liveness_probe) { instance_double(Gitlab::HealthChecks::Probes::Collection) } let(:probe_result) { Gitlab::HealthChecks::Probes::Status.new(200, { status: 'ok' }) } subject(:middleware) { described_class.new(app, readiness_probe, liveness_probe) } diff --git a/spec/lib/gitlab/health_checks/server_spec.rb b/spec/lib/gitlab/health_checks/server_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..65d24acbf22338a6f636886a2f0ca56f587824a6 --- /dev/null +++ b/spec/lib/gitlab/health_checks/server_spec.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::HealthChecks::Server do + context 'with running server thread' do + subject(:server) { described_class.new(address: 'localhost', port: 8082) } + + before do + # We need to send a request to localhost + WebMock.allow_net_connect! + + server.start + end + + after do + webmock_enable! + + server.stop + end + + shared_examples 'serves health check at' do |path| + it 'responds with 200 OK' do + response = Gitlab::HTTP.try_get("http://localhost:8082/#{path}", allow_local_requests: true) + + expect(response.code).to eq(200) + end + end + + describe '/readiness' do + it_behaves_like 'serves health check at', 'readiness' + end + + describe '/liveness' do + it_behaves_like 'serves health check at', 'liveness' + end + + describe 'other routes' do + it 'serves 404' do + response = Gitlab::HTTP.try_get("http://localhost:8082/other", allow_local_requests: true) + + expect(response.code).to eq(404) + end + end + end + + context 'when server thread goes away' do + before do + expect_next_instance_of(::WEBrick::HTTPServer) do |webrick| + allow(webrick).to receive(:start) + expect(webrick).to receive(:listeners).and_call_original + end + end + + specify 'stop closes TCP socket' do + server = described_class.new(address: 'localhost', port: 8082) + server.start + + expect(server.thread).to receive(:alive?).and_return(false).at_least(:once) + + server.stop + end + end +end diff --git a/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb b/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb index c7afc02f0afbd844d944be45fe28ccff489993cf..66fba7ab6839db3f70d39195d2c7ceb86dfeb3eb 100644 --- a/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb +++ b/spec/lib/gitlab/metrics/exporter/base_exporter_spec.rb @@ -152,8 +152,6 @@ def call(env) where(:method_class, :path, :http_status) do Net::HTTP::Get | '/metrics' | 200 - Net::HTTP::Get | '/liveness' | 200 - Net::HTTP::Get | '/readiness' | 200 Net::HTTP::Get | '/' | 404 end