From d299c78fa96ee76d16a95524cf04cf5f8bc0c708 Mon Sep 17 00:00:00 2001
From: Pedro Pombeiro <noreply@pedro.pombei.ro>
Date: Mon, 8 Apr 2024 16:56:45 +0000
Subject: [PATCH] Capture running builds on non-shared runners

Required for runner fleet dashboard.

Changelog: changed
---
 app/models/ci/running_build.rb                |  20 ++--
 app/services/ci/update_build_queue_service.rb |  29 +++--
 .../add_all_ci_running_builds.yml             |   9 ++
 .../remove_all_ci_running_builds.yml          |   9 ++
 .../ci/runner/runner_fleet_pipeline_seeder.rb |   5 +-
 spec/factories/ci/builds.rb                   |   2 +-
 .../runner_fleet_pipeline_seeder_spec.rb      |   4 +-
 spec/models/ci/running_build_spec.rb          |  46 +++++---
 spec/services/ci/register_job_service_spec.rb |   4 +-
 .../ci/update_build_queue_service_spec.rb     | 110 +++++++++++++++---
 10 files changed, 183 insertions(+), 55 deletions(-)
 create mode 100644 config/feature_flags/gitlab_com_derisk/add_all_ci_running_builds.yml
 create mode 100644 config/feature_flags/gitlab_com_derisk/remove_all_ci_running_builds.yml

diff --git a/app/models/ci/running_build.rb b/app/models/ci/running_build.rb
index e70ba3c97c31..238eb7af6c81 100644
--- a/app/models/ci/running_build.rb
+++ b/app/models/ci/running_build.rb
@@ -2,12 +2,6 @@
 
 module Ci
   # This model represents metadata for a running build.
-  # Despite the generic RunningBuild name, in this first iteration it applies only to shared runners
-  #   (see Ci::RunningBuild.upsert_shared_runner_build!).
-  # The decision to insert all of the running builds here was deferred to avoid the pressure on the database as
-  # at this time that was not necessary.
-  # We can reconsider the decision to limit this only to shared runners when there is more evidence that inserting all
-  # of the running builds there is worth the additional pressure.
   class RunningBuild < Ci::ApplicationRecord
     include Ci::Partitionable
 
@@ -22,11 +16,15 @@ class RunningBuild < Ci::ApplicationRecord
 
     enum runner_type: ::Ci::Runner.runner_types
 
-    def self.upsert_shared_runner_build!(build)
-      unless build.shared_runner_build?
+    def self.upsert_build!(build)
+      unless add_ci_running_build?(build)
         raise ArgumentError, 'build has not been picked by a shared runner'
       end
 
+      if build.runner.nil?
+        raise ArgumentError, 'build has not been picked by a runner'
+      end
+
       entry = self.new(
         build: build,
         project: build.project,
@@ -38,5 +36,11 @@ def self.upsert_shared_runner_build!(build)
 
       self.upsert(entry.attributes.compact, returning: %w[build_id], unique_by: :build_id)
     end
+
+    private_class_method def self.add_ci_running_build?(build)
+      return true if Feature.enabled?(:add_all_ci_running_builds, Project.actor_from_id(build.project_id))
+
+      build.shared_runner_build?
+    end
   end
 end
diff --git a/app/services/ci/update_build_queue_service.rb b/app/services/ci/update_build_queue_service.rb
index 40941dd4cd05..e467a49738fb 100644
--- a/app/services/ci/update_build_queue_service.rb
+++ b/app/services/ci/update_build_queue_service.rb
@@ -50,18 +50,19 @@ def remove!(build)
     end
 
     ##
-    # Add shared runner build tracking entry (used for queuing).
+    # Add runner build tracking entry (used for queuing and for runner fleet dashboard).
     #
     def track(build, transition)
-      return unless build.shared_runner_build?
+      return if build.runner.nil?
+      return unless add_ci_running_build?(build)
 
       raise InvalidQueueTransition unless transition.to == 'running'
 
       transition.within_transaction do
-        result = ::Ci::RunningBuild.upsert_shared_runner_build!(build)
+        result = ::Ci::RunningBuild.upsert_build!(build)
 
         unless result.empty?
-          metrics.increment_queue_operation(:shared_runner_build_new)
+          metrics.increment_queue_operation(:shared_runner_build_new) if build.shared_runner_build?
 
           result.rows.dig(0, 0)
         end
@@ -69,11 +70,11 @@ def track(build, transition)
     end
 
     ##
-    # Remove a runtime build tracking entry for a shared runner build (used for
-    # queuing).
+    # Remove a runtime build tracking entry for a runner build (used for queuing and for runner fleet dashboard).
     #
     def untrack(build, transition)
-      return unless build.shared_runner_build?
+      return if build.runner.nil?
+      return unless remove_ci_running_build?(build)
 
       raise InvalidQueueTransition unless transition.from == 'running'
 
@@ -81,7 +82,7 @@ def untrack(build, transition)
         removed = build.all_runtime_metadata.delete_all
 
         if removed > 0
-          metrics.increment_queue_operation(:shared_runner_build_done)
+          metrics.increment_queue_operation(:shared_runner_build_done) if build.shared_runner_build?
 
           build.id
         end
@@ -109,5 +110,17 @@ def tick_for(build, runners)
         runner.pick_build!(build)
       end
     end
+
+    def add_ci_running_build?(build)
+      return true if Feature.enabled?(:add_all_ci_running_builds, Project.actor_from_id(build.project_id))
+
+      build.shared_runner_build?
+    end
+
+    def remove_ci_running_build?(build)
+      return true if Feature.enabled?(:remove_all_ci_running_builds, Project.actor_from_id(build.project_id))
+
+      build.shared_runner_build?
+    end
   end
 end
diff --git a/config/feature_flags/gitlab_com_derisk/add_all_ci_running_builds.yml b/config/feature_flags/gitlab_com_derisk/add_all_ci_running_builds.yml
new file mode 100644
index 000000000000..252105574a11
--- /dev/null
+++ b/config/feature_flags/gitlab_com_derisk/add_all_ci_running_builds.yml
@@ -0,0 +1,9 @@
+---
+name: add_all_ci_running_builds
+feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/437846
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/147943
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/452166
+milestone: '16.11'
+group: group::runner
+type: gitlab_com_derisk
+default_enabled: false
diff --git a/config/feature_flags/gitlab_com_derisk/remove_all_ci_running_builds.yml b/config/feature_flags/gitlab_com_derisk/remove_all_ci_running_builds.yml
new file mode 100644
index 000000000000..971feebe83df
--- /dev/null
+++ b/config/feature_flags/gitlab_com_derisk/remove_all_ci_running_builds.yml
@@ -0,0 +1,9 @@
+---
+name: remove_all_ci_running_builds
+feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/437846
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/147943
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/452166
+milestone: '16.11'
+group: group::runner
+type: gitlab_com_derisk
+default_enabled: false
diff --git a/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb b/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb
index 2cd9afc5bdce..daf99eba89b5 100644
--- a/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb
+++ b/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder.rb
@@ -171,7 +171,10 @@ def create_build(pipeline, runner, job_status, index)
 
             ::Ci::Build.transaction do
               build = ::Ci::Build.new(importing: true, **build_attrs).tap(&:save!)
-              ::Ci::RunningBuild.upsert_shared_runner_build!(build) if build.running? && build.shared_runner_build?
+              if build.running? &&
+                  (Feature.enabled?(:add_all_ci_running_builds, build.project) || build.shared_runner_build?)
+                ::Ci::RunningBuild.upsert_build!(build)
+              end
             end
           end
 
diff --git a/spec/factories/ci/builds.rb b/spec/factories/ci/builds.rb
index 99a65ed13d2c..ddf57abc11b4 100644
--- a/spec/factories/ci/builds.rb
+++ b/spec/factories/ci/builds.rb
@@ -310,7 +310,7 @@
       runner factory: :ci_runner
 
       after(:create) do |build|
-        ::Ci::RunningBuild.upsert_shared_runner_build!(build)
+        ::Ci::RunningBuild.upsert_build!(build)
       end
     end
 
diff --git a/spec/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder_spec.rb b/spec/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder_spec.rb
index 930782dfadfe..5cd0670148f8 100644
--- a/spec/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder_spec.rb
+++ b/spec/lib/gitlab/seeders/ci/runner/runner_fleet_pipeline_seeder_spec.rb
@@ -33,8 +33,8 @@ def runner_ids_for_project(runner_count, project)
         expect { seeder.seed }.to change { Ci::Build.count }.by(job_count)
           .and change { Ci::Pipeline.count }.by(4)
 
-        expect(Ci::Pipeline.where.not(started_at: nil).map(&:queued_duration)).to all(be < 5.minutes)
-        expect(Ci::Build.where.not(started_at: nil).map(&:queued_duration)).to all(be < 5.minutes)
+        expect(Ci::Pipeline.where.not(started_at: nil).map(&:queued_duration)).to all(be <= 5.minutes)
+        expect(Ci::Build.where.not(started_at: nil).map(&:queued_duration)).to all(be <= 5.minutes)
 
         projects_to_runners.first(3).each do |project|
           expect(Ci::Build.where(runner_id: project[:runner_ids])).not_to be_empty
diff --git a/spec/models/ci/running_build_spec.rb b/spec/models/ci/running_build_spec.rb
index 0a81eaf9ac06..cb19bd522818 100644
--- a/spec/models/ci/running_build_spec.rb
+++ b/spec/models/ci/running_build_spec.rb
@@ -9,12 +9,12 @@
   let(:runner) { create(:ci_runner, :instance_type) }
   let(:build) { create(:ci_build, :running, runner: runner, pipeline: pipeline) }
 
-  describe '.upsert_shared_runner_build!' do
+  describe '.upsert_build!' do
+    subject(:upsert_build) { described_class.upsert_build!(build) }
+
     context 'another pending entry does not exist' do
       it 'creates a new pending entry' do
-        result = described_class.upsert_shared_runner_build!(build)
-
-        expect(result.rows.dig(0, 0)).to eq build.id
+        expect(upsert_build.rows.dig(0, 0)).to eq build.id
         expect(build.reload.runtime_metadata).to be_present
       end
     end
@@ -25,18 +25,25 @@
       end
 
       it 'returns a build id as a result' do
-        result = described_class.upsert_shared_runner_build!(build)
-
-        expect(result.rows.dig(0, 0)).to eq build.id
+        expect(upsert_build.rows.dig(0, 0)).to eq build.id
       end
     end
 
     context 'when build has been picked by a project runner' do
-      let(:runner) { create(:ci_runner, :project) }
+      let_it_be(:runner) { create(:ci_runner, :project, projects: [project]) }
 
-      it 'raises an error' do
-        expect { described_class.upsert_shared_runner_build!(build) }
-          .to raise_error(ArgumentError, 'build has not been picked by a shared runner')
+      it 'returns a build id as a result' do
+        expect(upsert_build.rows.dig(0, 0)).to eq build.id
+      end
+
+      context 'with add_all_ci_running_builds FF disabled' do
+        before do
+          stub_feature_flags(add_all_ci_running_builds: false)
+        end
+
+        it 'raises an error' do
+          expect { upsert_build }.to raise_error(ArgumentError, 'build has not been picked by a shared runner')
+        end
       end
     end
 
@@ -44,8 +51,17 @@
       let(:build) { create(:ci_build, pipeline: pipeline) }
 
       it 'raises an error' do
-        expect { described_class.upsert_shared_runner_build!(build) }
-          .to raise_error(ArgumentError, 'build has not been picked by a shared runner')
+        expect { upsert_build }.to raise_error(ArgumentError, 'build has not been picked by a runner')
+      end
+
+      context 'with add_all_ci_running_builds FF disabled' do
+        before do
+          stub_feature_flags(add_all_ci_running_builds: false)
+        end
+
+        it 'raises an error' do
+          expect { upsert_build }.to raise_error(ArgumentError, 'build has not been picked by a shared runner')
+        end
       end
     end
   end
@@ -63,8 +79,8 @@
     it 'assigns the same partition id as the one that build has', :aggregate_failures do
       expect(new_build.partition_id).to eq ci_testing_partition_id_for_check_constraints
 
-      described_class.upsert_shared_runner_build!(build)
-      described_class.upsert_shared_runner_build!(new_build)
+      described_class.upsert_build!(build)
+      described_class.upsert_build!(new_build)
 
       expect(build.reload.runtime_metadata.partition_id).to eq pipeline.partition_id
       expect(new_build.reload.runtime_metadata.partition_id).to eq ci_testing_partition_id_for_check_constraints
diff --git a/spec/services/ci/register_job_service_spec.rb b/spec/services/ci/register_job_service_spec.rb
index f29e1a369141..07aa21f713ba 100644
--- a/spec/services/ci/register_job_service_spec.rb
+++ b/spec/services/ci/register_job_service_spec.rb
@@ -948,8 +948,8 @@ module Ci
           let(:build3) { create(:ci_build, :running, pipeline: pipeline, runner: shared_runner) }
 
           before do
-            ::Ci::RunningBuild.upsert_shared_runner_build!(build2)
-            ::Ci::RunningBuild.upsert_shared_runner_build!(build3)
+            ::Ci::RunningBuild.upsert_build!(build2)
+            ::Ci::RunningBuild.upsert_build!(build3)
           end
 
           it 'counts job queuing time histogram with expected labels' do
diff --git a/spec/services/ci/update_build_queue_service_spec.rb b/spec/services/ci/update_build_queue_service_spec.rb
index c5959127f343..8c5fed4fe25d 100644
--- a/spec/services/ci/update_build_queue_service_spec.rb
+++ b/spec/services/ci/update_build_queue_service_spec.rb
@@ -3,7 +3,8 @@
 require 'spec_helper'
 
 RSpec.describe Ci::UpdateBuildQueueService, feature_category: :continuous_integration do
-  let(:project) { create(:project, :repository) }
+  let_it_be_with_refind(:project) { create(:project, :repository) }
+
   let(:pipeline) { create(:ci_pipeline, project: project) }
   let(:build) { create(:ci_build, pipeline: pipeline) }
 
@@ -127,13 +128,16 @@
     end
   end
 
-  describe 'shared runner builds tracking' do
-    let(:runner) { create(:ci_runner, :instance_type) }
+  describe 'runner builds tracking' do
+    let_it_be(:runner) { create(:ci_runner, :instance_type) }
+
     let(:build) { create(:ci_build, runner: runner, pipeline: pipeline) }
 
     describe '#track' do
       let(:transition) { double('transition') }
 
+      subject(:build_id) { described_class.new.track(build, transition) }
+
       before do
         allow(transition).to receive(:to).and_return('running')
         allow(transition).to receive(:within_transaction).and_yield
@@ -141,7 +145,7 @@
 
       context 'when a shared runner build can be tracked' do
         it 'creates a new shared runner build tracking entry' do
-          build_id = subject.track(build, transition)
+          expect { build_id }.to change { Ci::RunningBuild.count }.from(0).to(1)
 
           expect(build_id).to eq build.id
         end
@@ -157,6 +161,48 @@
         end
       end
 
+      context 'when a project runner build can be tracked' do
+        let_it_be(:runner) { create(:ci_runner, :project, projects: [project]) }
+
+        it 'creates a new project runner build tracking entry' do
+          expect { build_id }.to change { Ci::RunningBuild.count }.from(0).to(1)
+
+          expect(build_id).to eq build.id
+        end
+
+        it 'does not increment new shared runner build metric' do
+          metrics = spy('metrics')
+
+          described_class.new(metrics).track(build, transition)
+
+          expect(metrics)
+            .not_to have_received(:increment_queue_operation)
+            .with(:shared_runner_build_new)
+        end
+
+        context 'with add_all_ci_running_builds FF disabled' do
+          before do
+            stub_feature_flags(add_all_ci_running_builds: false)
+          end
+
+          it 'does not create a new project runner build tracking entry' do
+            is_expected.to be_nil
+          end
+        end
+      end
+
+      context 'when runner is nil' do
+        let(:build) { create(:ci_build, runner: nil, pipeline: pipeline) }
+
+        it 'does nothing' do
+          expect(transition).not_to receive(:to)
+          expect(transition).not_to receive(:within_transaction)
+          expect(::Ci::RunningBuild).not_to receive(:upsert_build!)
+
+          expect(build_id).to be_nil
+        end
+      end
+
       context 'when invalid transition is detected' do
         it 'raises an error' do
           allow(transition).to receive(:to).and_return('pending')
@@ -172,9 +218,7 @@
         end
 
         it 'does nothing and returns build id' do
-          build_id = subject.track(build, transition)
-
-          expect(build_id).to eq build.id
+          is_expected.to eq build.id
         end
       end
     end
@@ -182,6 +226,8 @@
     describe '#untrack' do
       let(:transition) { double('transition') }
 
+      subject(:build_id) { described_class.new.untrack(build, transition) }
+
       before do
         allow(transition).to receive(:from).and_return('running')
         allow(transition).to receive(:within_transaction).and_yield
@@ -193,9 +239,7 @@
         end
 
         it 'removes shared runner build' do
-          build_id = subject.untrack(build, transition)
-
-          expect(build_id).to eq build.id
+          is_expected.to eq build.id
         end
 
         it 'increments shared runner build done metric' do
@@ -209,11 +253,41 @@
         end
       end
 
+      context 'when project runner build tracking entry exists' do
+        let_it_be(:runner) { create(:ci_runner, :project, projects: [project]) }
+
+        before do
+          create(:ci_running_build, build: build, project: project, runner: runner)
+        end
+
+        it 'removes project runner build' do
+          is_expected.to eq build.id
+        end
+
+        it 'does not increment shared runner build done metric' do
+          metrics = spy('metrics')
+
+          described_class.new(metrics).untrack(build, transition)
+
+          expect(metrics)
+            .not_to have_received(:increment_queue_operation)
+            .with(:shared_runner_build_done)
+        end
+
+        context 'with remove_all_ci_running_builds FF disabled' do
+          before do
+            stub_feature_flags(remove_all_ci_running_builds: false)
+          end
+
+          it 'does not remove project runner build' do
+            is_expected.to be_nil
+          end
+        end
+      end
+
       context 'when tracking entry does not exist' do
         it 'does nothing if there is no tracking entry to remove' do
-          build_id = subject.untrack(build, transition)
-
-          expect(build_id).to be_nil
+          is_expected.to be_nil
         end
       end
 
@@ -278,7 +352,7 @@
     end
 
     context 'when updating project runners' do
-      let(:runner) { create(:ci_runner, :project, projects: [project]) }
+      let_it_be_with_refind(:runner) { create(:ci_runner, :project, projects: [project]) }
 
       it_behaves_like 'matching build'
       it_behaves_like 'mismatching tags'
@@ -293,7 +367,7 @@
     end
 
     context 'when updating shared runners' do
-      let(:runner) { create(:ci_runner, :instance) }
+      let_it_be(:runner) { create(:ci_runner, :instance) }
 
       it_behaves_like 'matching build'
       it_behaves_like 'mismatching tags'
@@ -309,9 +383,9 @@
     end
 
     context 'when updating group runners' do
-      let(:group) { create(:group) }
-      let(:project) { create(:project, group: group) }
-      let(:runner) { create(:ci_runner, :group, groups: [group]) }
+      let_it_be(:group) { create(:group) }
+      let_it_be_with_refind(:project) { create(:project, group: group) }
+      let_it_be_with_refind(:runner) { create(:ci_runner, :group, groups: [group]) }
 
       it_behaves_like 'matching build'
       it_behaves_like 'mismatching tags'
-- 
GitLab