diff --git a/lib/gitlab/process_supervisor.rb b/lib/gitlab/process_supervisor.rb
index 18fd24aa582702817bff7714db48f397156a4954..714034f043dc34e79d8e0402463d7ed6be88dde9 100644
--- a/lib/gitlab/process_supervisor.rb
+++ b/lib/gitlab/process_supervisor.rb
@@ -20,7 +20,7 @@ def initialize(
       health_check_interval_seconds: DEFAULT_HEALTH_CHECK_INTERVAL_SECONDS,
       check_terminate_interval_seconds: DEFAULT_TERMINATE_INTERVAL_SECONDS,
       terminate_timeout_seconds: DEFAULT_TERMINATE_TIMEOUT_SECONDS,
-      term_signals: %i(INT TERM),
+      term_signals: [],
       forwarded_signals: [],
       **options)
       super(**options)
@@ -31,7 +31,7 @@ def initialize(
       @check_terminate_interval_seconds = check_terminate_interval_seconds
       @terminate_timeout_seconds = terminate_timeout_seconds
 
-      @pids = []
+      @pids = Set.new
       @alive = false
     end
 
@@ -43,7 +43,7 @@ def initialize(
     # If the block returns a non-empty list of IDs, the supervisor will
     # start observing those processes instead. Otherwise it will shut down.
     def supervise(pid_or_pids, &on_process_death)
-      @pids = Array(pid_or_pids)
+      @pids = Array(pid_or_pids).to_set
       @on_process_death = on_process_death
 
       trap_signals!
@@ -56,7 +56,6 @@ def shutdown(signal = :TERM)
       return unless @alive
 
       stop_processes(signal)
-      stop
     end
 
     def supervised_pids
@@ -75,26 +74,25 @@ def stop_working
 
     def run_thread
       while @alive
-        sleep(@health_check_interval_seconds)
-
         check_process_health
+
+        sleep(@health_check_interval_seconds)
       end
     end
 
     def check_process_health
       unless all_alive?
-        existing_pids = live_pids # Capture this value for the duration of the block.
+        existing_pids = live_pids.to_set # Capture this value for the duration of the block.
         dead_pids = @pids - existing_pids
-        new_pids = Array(@on_process_death.call(dead_pids))
-        @pids = existing_pids + new_pids
-        @alive = @pids.any?
+        new_pids = Array(@on_process_death.call(dead_pids.to_a))
+        @pids = existing_pids + new_pids.to_set
       end
     end
 
     def stop_processes(signal)
       # Set this prior to shutting down so that shutdown hooks which read `alive`
       # know the supervisor is about to shut down.
-      @alive = false
+      stop_working
 
       # Shut down supervised processes.
       signal_all(signal)
diff --git a/metrics_server/metrics_server.rb b/metrics_server/metrics_server.rb
index 7bc48ed1ff8c5707b9ffa671b22ef5824b9d1f7b..309334cd899c839eb55968f38b53abb557775e44 100644
--- a/metrics_server/metrics_server.rb
+++ b/metrics_server/metrics_server.rb
@@ -22,8 +22,6 @@ def start_for_puma
 
       supervisor = PumaProcessSupervisor.instance
       supervisor.supervise(start_server.call) do
-        next unless supervisor.alive
-
         Gitlab::AppLogger.info('Puma metrics server terminated, restarting...')
         start_server.call
       end
diff --git a/sidekiq_cluster/cli.rb b/sidekiq_cluster/cli.rb
index 4f02812d2e29f6dc162f7aa8bbcc436346ef62de..5dac3b1d3c7d44ee9076649957e77acd5a7ab614 100644
--- a/sidekiq_cluster/cli.rb
+++ b/sidekiq_cluster/cli.rb
@@ -20,7 +20,7 @@
 module Gitlab
   module SidekiqCluster
     class CLI
-      THREAD_NAME = 'supervisor'
+      THREAD_NAME = 'sidekiq-cluster'
 
       # The signals that should terminate both the master and workers.
       TERMINATE_SIGNALS = %i(INT TERM).freeze
@@ -134,23 +134,17 @@ def start_and_supervise_workers(queue_groups)
         )
 
         metrics_server_pid = start_metrics_server
-
-        all_pids = worker_pids + Array(metrics_server_pid)
-
-        supervisor.supervise(all_pids) do |dead_pids|
+        supervisor.supervise(worker_pids + Array(metrics_server_pid)) do |dead_pids|
           # If we're not in the process of shutting down the cluster,
           # and the metrics server died, restart it.
-          if supervisor.alive && dead_pids.include?(metrics_server_pid)
+          if dead_pids == Array(metrics_server_pid)
             @logger.info('Sidekiq metrics server terminated, restarting...')
             metrics_server_pid = restart_metrics_server
-            all_pids = worker_pids + Array(metrics_server_pid)
           else
             # If a worker process died we'll just terminate the whole cluster.
             # We let an external system (runit, kubernetes) handle the restart.
             @logger.info('A worker terminated, shutting down the cluster')
-
-            ProcessManagement.signal_processes(all_pids - dead_pids, :TERM)
-            # Signal supervisor not to respawn workers and shut down.
+            supervisor.shutdown
             []
           end
         end
diff --git a/spec/commands/sidekiq_cluster/cli_spec.rb b/spec/commands/sidekiq_cluster/cli_spec.rb
index d949be8d102b933c35d9d3cf20883c08a2cd3abb..64c0b24abf0f1f518fff3c90aaf20f03df5cc85e 100644
--- a/spec/commands/sidekiq_cluster/cli_spec.rb
+++ b/spec/commands/sidekiq_cluster/cli_spec.rb
@@ -440,32 +440,18 @@
       end
 
       it 'stops the entire process cluster if one of the workers has been terminated' do
-        expect(supervisor).to receive(:alive).and_return(true)
-        expect(supervisor).to receive(:supervise).and_yield([2])
         expect(MetricsServer).to receive(:start_for_sidekiq).once.and_return(metrics_server_pid)
-        expect(Gitlab::ProcessManagement).to receive(:signal_processes).with([42, 99], :TERM)
+        expect(supervisor).to receive(:supervise).and_yield([2, 99])
+        expect(supervisor).to receive(:shutdown)
 
         cli.run(%w(foo))
       end
 
-      context 'when the supervisor is alive' do
-        it 'restarts the metrics server when it is down' do
-          expect(supervisor).to receive(:alive).and_return(true)
-          expect(supervisor).to receive(:supervise).and_yield([metrics_server_pid])
-          expect(MetricsServer).to receive(:start_for_sidekiq).twice.and_return(metrics_server_pid)
+      it 'restarts the metrics server when it is down' do
+        expect(supervisor).to receive(:supervise).and_yield([metrics_server_pid])
+        expect(MetricsServer).to receive(:start_for_sidekiq).twice.and_return(metrics_server_pid)
 
-          cli.run(%w(foo))
-        end
-      end
-
-      context 'when the supervisor is shutting down' do
-        it 'does not restart the metrics server' do
-          expect(supervisor).to receive(:alive).and_return(false)
-          expect(supervisor).to receive(:supervise).and_yield([metrics_server_pid])
-          expect(MetricsServer).to receive(:start_for_sidekiq).once.and_return(metrics_server_pid)
-
-          cli.run(%w(foo))
-        end
+        cli.run(%w(foo))
       end
     end
   end
diff --git a/spec/lib/gitlab/process_supervisor_spec.rb b/spec/lib/gitlab/process_supervisor_spec.rb
index 60b127daddacd5f7a8c18d2ad87c71da1c2f41ec..8356197805ced337602c12dd39fe1f94cbec560f 100644
--- a/spec/lib/gitlab/process_supervisor_spec.rb
+++ b/spec/lib/gitlab/process_supervisor_spec.rb
@@ -6,6 +6,7 @@
   let(:health_check_interval_seconds) { 0.1 }
   let(:check_terminate_interval_seconds) { 1 }
   let(:forwarded_signals) { [] }
+  let(:term_signals) { [] }
   let(:process_ids) { [spawn_process, spawn_process] }
 
   def spawn_process
@@ -19,7 +20,8 @@ def spawn_process
       health_check_interval_seconds: health_check_interval_seconds,
       check_terminate_interval_seconds: check_terminate_interval_seconds,
       terminate_timeout_seconds: 1 + check_terminate_interval_seconds,
-      forwarded_signals: forwarded_signals
+      forwarded_signals: forwarded_signals,
+      term_signals: term_signals
     )
   end
 
@@ -29,6 +31,8 @@ def spawn_process
     rescue Errno::ESRCH
       # Ignore if a process wasn't actually alive.
     end
+
+    supervisor.stop
   end
 
   describe '#supervise' do
@@ -60,7 +64,7 @@ def spawn_process
           [42] # Fake starting a new process in place of the terminated one.
         end
 
-        # Terminate the supervised process.
+        # Terminate a supervised process.
         Process.kill('TERM', process_ids.first)
 
         await_condition(sleep_sec: health_check_interval_seconds) do
@@ -71,6 +75,72 @@ def spawn_process
         expect(Gitlab::ProcessManagement.process_alive?(process_ids.last)).to be(true)
         expect(supervisor.supervised_pids).to match_array([process_ids.last, 42])
       end
+
+      it 'deduplicates PIDs returned from callback' do
+        expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
+        pids_killed = []
+
+        supervisor.supervise(process_ids) do |dead_pids|
+          pids_killed = dead_pids
+          # Fake a new process having the same pid as one that was just terminated.
+          [process_ids.last]
+        end
+
+        # Terminate a supervised process.
+        Process.kill('TERM', process_ids.first)
+
+        await_condition(sleep_sec: health_check_interval_seconds) do
+          pids_killed == [process_ids.first]
+        end
+
+        expect(supervisor.supervised_pids).to contain_exactly(process_ids.last)
+      end
+
+      it 'accepts single PID returned from callback' do
+        expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
+        pids_killed = []
+
+        supervisor.supervise(process_ids) do |dead_pids|
+          pids_killed = dead_pids
+          42
+        end
+
+        # Terminate a supervised process.
+        Process.kill('TERM', process_ids.first)
+
+        await_condition(sleep_sec: health_check_interval_seconds) do
+          pids_killed == [process_ids.first]
+        end
+
+        expect(supervisor.supervised_pids).to contain_exactly(42, process_ids.last)
+      end
+
+      context 'but supervisor has entered shutdown' do
+        it 'does not trigger callback again' do
+          expect(Gitlab::ProcessManagement.all_alive?(process_ids)).to be(true)
+          callback_count = 0
+
+          supervisor.supervise(process_ids) do |dead_pids|
+            callback_count += 1
+
+            Thread.new { supervisor.shutdown }
+
+            [42]
+          end
+
+          # Terminate the supervised processes to trigger more than 1 callback.
+          Process.kill('TERM', process_ids.first)
+          Process.kill('TERM', process_ids.last)
+
+          await_condition(sleep_sec: health_check_interval_seconds * 3) do
+            supervisor.alive == false
+          end
+
+          # Since we shut down the supervisor during the first callback, it should not
+          # be called anymore.
+          expect(callback_count).to eq(1)
+        end
+      end
     end
 
     context 'signal handling' do
@@ -82,6 +152,8 @@ def spawn_process
       end
 
       context 'termination signals' do
+        let(:term_signals) { %i(INT TERM) }
+
         context 'when TERM results in timely shutdown of processes' do
           it 'forwards them to observed processes without waiting for grace period to expire' do
             allow(Gitlab::ProcessManagement).to receive(:any_alive?).and_return(false)
diff --git a/spec/metrics_server/metrics_server_spec.rb b/spec/metrics_server/metrics_server_spec.rb
index 4a3580ed4b19ea00d01de2c2a9b34bb8e63f8f58..7cc4f6724c4ff0827f4688b9b6f3f147768a1ca8 100644
--- a/spec/metrics_server/metrics_server_spec.rb
+++ b/spec/metrics_server/metrics_server_spec.rb
@@ -267,28 +267,13 @@
     end
 
     context 'when the supervisor callback is invoked' do
-      context 'and the supervisor is alive' do
-        it 'restarts the metrics server' do
-          expect(supervisor).to receive(:alive).and_return(true)
-          expect(supervisor).to receive(:supervise).and_yield
-          expect(Process).to receive(:spawn).with(
-            include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
-          ).twice.and_return(42)
-
-          described_class.start_for_puma
-        end
-      end
+      it 'restarts the metrics server' do
+        expect(supervisor).to receive(:supervise).and_yield
+        expect(Process).to receive(:spawn).with(
+          include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
+        ).twice.and_return(42)
 
-      context 'and the supervisor is not alive' do
-        it 'does not restart the server' do
-          expect(supervisor).to receive(:alive).and_return(false)
-          expect(supervisor).to receive(:supervise).and_yield
-          expect(Process).to receive(:spawn).with(
-            include('METRICS_SERVER_TARGET' => 'puma'), end_with('bin/metrics-server'), anything
-          ).once.and_return(42)
-
-          described_class.start_for_puma
-        end
+        described_class.start_for_puma
       end
     end
   end