From f6b17d65512ce7482ac9e0a5c10e1ec9365ba1e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9my=20Coutable?= <remy@rymai.me>
Date: Tue, 16 Jul 2024 10:18:36 +0200
Subject: [PATCH] ci: Allow to pass tags for the rspec-predictive jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémy Coutable <remy@rymai.me>
---
 .gitlab-ci.yml                                |  4 +-
 .gitlab/ci/rails.gitlab-ci.yml                | 17 +++-
 .../rails/rspec-predictive.gitlab-ci.yml.erb  |  6 +-
 scripts/generate_rspec_pipeline.rb            | 40 ++++++----
 spec/scripts/generate_rspec_pipeline_spec.rb  | 79 +++++++++++++++++++
 5 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0ef5d8305488..7a8204d33eb8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -23,7 +23,7 @@ stages:
 default:
   image: $DEFAULT_CI_IMAGE
   tags:
-    - gitlab-org
+    - $DEFAULT_JOB_TAG
   # All jobs are interruptible by default
   interruptible: true
   # Default job timeout doesn't work: https://gitlab.com/gitlab-org/gitlab/-/issues/387528
@@ -161,6 +161,8 @@ workflow:
 variables:
   PG_VERSION: "14"
   DEFAULT_CI_IMAGE: "${REGISTRY_HOST}/${REGISTRY_GROUP}/gitlab-build-images/${BUILD_OS}-${OS_VERSION}-ruby-${RUBY_VERSION}-golang-${GO_VERSION}-rust-${RUST_VERSION}-node-${NODE_VERSION}-postgresql-${PG_VERSION}:rubygems-${RUBYGEMS_VERSION}-git-2.36-lfs-2.9-chrome-${CHROME_VERSION}-yarn-1.22-graphicsmagick-1.3.36"
+  DEFAULT_JOB_TAG: "gitlab-org"
+  DEFAULT_RSPEC_PREDICTIVE_JOB_TAGS: "${DEFAULT_JOB_TAG}" # Separated by commas, overridden in JiHu
   # We set $GITLAB_DEPENDENCY_PROXY to another variable (since it's set at the group level and has higher precedence than .gitlab-ci.yml)
   # so that we can override $GITLAB_DEPENDENCY_PROXY_ADDRESS in workflow rules.
   GITLAB_DEPENDENCY_PROXY_ADDRESS: "${GITLAB_DEPENDENCY_PROXY}"
diff --git a/.gitlab/ci/rails.gitlab-ci.yml b/.gitlab/ci/rails.gitlab-ci.yml
index 0e2993bffcfd..95734f182745 100644
--- a/.gitlab/ci/rails.gitlab-ci.yml
+++ b/.gitlab/ci/rails.gitlab-ci.yml
@@ -658,8 +658,21 @@ rspec-predictive:pipeline-generate:
   stage: prepare
   needs: ["detect-tests", "retrieve-tests-metadata"]
   script:
-    - scripts/generate_rspec_pipeline.rb -t "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}" -k "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" -f "${RSPEC_MATCHING_TESTS_FOSS_PATH}" -o "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}.yml"
-    - scripts/generate_rspec_pipeline.rb -t "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}" -k "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" -f "${RSPEC_MATCHING_TESTS_EE_PATH}" -o "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}-ee.yml" -p "ee/"
+    - |
+      scripts/generate_rspec_pipeline.rb \
+        -t "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}" \
+        -k "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" \
+        -f "${RSPEC_MATCHING_TESTS_FOSS_PATH}" \
+        -j "${DEFAULT_RSPEC_PREDICTIVE_JOB_TAGS}" \
+        -o "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}.yml"
+    - |
+      scripts/generate_rspec_pipeline.rb \
+        -t "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}" \
+        -k "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" \
+        -f "${RSPEC_MATCHING_TESTS_EE_PATH}" \
+        -j "${DEFAULT_RSPEC_PREDICTIVE_JOB_TAGS}" \
+        -o "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}-ee.yml" \
+        -p "ee/"
     - echo "Content of ${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}.yml:"
     - cat "${RSPEC_PREDICTIVE_PIPELINE_TEMPLATE_YML}.yml"
     - echo "\n================================================\n"
diff --git a/.gitlab/ci/rails/rspec-predictive.gitlab-ci.yml.erb b/.gitlab/ci/rails/rspec-predictive.gitlab-ci.yml.erb
index c8ced7ad15ba..f36c855d30f8 100644
--- a/.gitlab/ci/rails/rspec-predictive.gitlab-ci.yml.erb
+++ b/.gitlab/ci/rails/rspec-predictive.gitlab-ci.yml.erb
@@ -5,8 +5,12 @@ include:
 
 default:
   image: $DEFAULT_CI_IMAGE
+  <%- if job_tags.any? -%>
   tags:
-    - gitlab-org
+    <%- job_tags.each do |job_tag| -%>
+    - <%= job_tag %>
+    <%- end -%>
+  <%- end -%>
   # Default job timeout set to 90m https://gitlab.com/gitlab-com/gl-infra/infrastructure/-/issues/10520
   timeout: 90m
   interruptible: true
diff --git a/scripts/generate_rspec_pipeline.rb b/scripts/generate_rspec_pipeline.rb
index 70a0c87be35b..5844ae7f7d7e 100755
--- a/scripts/generate_rspec_pipeline.rb
+++ b/scripts/generate_rspec_pipeline.rb
@@ -18,30 +18,31 @@ class GenerateRspecPipeline
   SETUP_DURATION_IN_SECONDS = 180.0 # 3 MINUTES
   OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS = OPTIMAL_TEST_JOB_DURATION_IN_SECONDS - SETUP_DURATION_IN_SECONDS
 
-  # As of 2022-09-01:
+  # As of 2024-07-16:
   # $ find spec -type f | wc -l
-  #  12825
+  #  16007 (`SPEC_FILES_COUNT`)
   # and
   # $ find ee/spec -type f | wc -l
-  #  5610
-  # which gives a total of 18435 test files (`NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE`).
+  #  8548 (`EE_SPEC_FILES_COUNT`)
+  # which gives a total of 24555 test files (`ALL_SPEC_FILES_COUNT`).
   #
   # Total time to run all tests (based on https://gitlab-org.gitlab.io/rspec_profiling_stats/)
-  # is 170183 seconds (`DURATION_OF_THE_TEST_SUITE_IN_SECONDS`).
+  # is 251509 seconds (`TEST_SUITE_DURATION_IN_SECONDS`).
   #
-  # This gives an approximate 170183 / 18435 = 9.2 seconds per test file
+  # This gives an approximate 251509 / 24555 = 10.2 seconds per test file
   # (`DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS`).
   #
   # If we want each test job to finish in 10 minutes, given we have 3 minutes of setup (`SETUP_DURATION_IN_SECONDS`),
   # then we need to give 7 minutes of testing to each test node (`OPTIMAL_TEST_RUNTIME_DURATION_IN_SECONDS`).
-  # (7 * 60) / 9.2 = 45.6
+  # (7 * 60) / 10.2 = 41.17
   #
   # So if we'd want to run the full test suites in 10 minutes (`OPTIMAL_TEST_JOB_DURATION_IN_SECONDS`),
-  # we'd need to run at max 45 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`).
-  NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE = 18_435
-  DURATION_OF_THE_TEST_SUITE_IN_SECONDS = 170_183
-  DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS =
-    DURATION_OF_THE_TEST_SUITE_IN_SECONDS / NUMBER_OF_TESTS_IN_TOTAL_IN_THE_TEST_SUITE
+  # we'd need to run at max 41 test file per nodes (`#optimal_test_file_count_per_node_per_test_level`).
+  SPEC_FILES_COUNT = 16007
+  EE_SPEC_FILES_COUNT = 8548
+  ALL_SPEC_FILES_COUNT = SPEC_FILES_COUNT + EE_SPEC_FILES_COUNT
+  TEST_SUITE_DURATION_IN_SECONDS = 251509
+  DEFAULT_AVERAGE_TEST_FILE_DURATION_IN_SECONDS = TEST_SUITE_DURATION_IN_SECONDS / ALL_SPEC_FILES_COUNT
 
   # pipeline_template_path: A YAML pipeline configuration template to generate the final pipeline config from
   # rspec_files_path: A file containing RSpec files to run, separated by a space
@@ -51,11 +52,12 @@ class GenerateRspecPipeline
   #                          `"#{pipeline_template_path}.yml"`)
   def initialize(
     pipeline_template_path:, rspec_files_path: nil, knapsack_report_path: nil, test_suite_prefix: nil,
-    generated_pipeline_path: nil)
+    job_tags: [], generated_pipeline_path: nil)
     @pipeline_template_path = pipeline_template_path.to_s
     @rspec_files_path = rspec_files_path.to_s
     @knapsack_report_path = knapsack_report_path.to_s
     @test_suite_prefix = test_suite_prefix
+    @job_tags = job_tags
     @generated_pipeline_path = generated_pipeline_path || "#{pipeline_template_path}.yml"
 
     raise ArgumentError unless File.exist?(@pipeline_template_path)
@@ -72,7 +74,7 @@ def generate!
     info "generated_pipeline_path: #{generated_pipeline_path}"
 
     File.open(generated_pipeline_path, 'w') do |handle|
-      pipeline_yaml = ERB.new(File.read(pipeline_template_path)).result_with_hash(**erb_binding)
+      pipeline_yaml = ERB.new(File.read(pipeline_template_path), trim_mode: '-').result_with_hash(**erb_binding)
       handle.write(pipeline_yaml.squeeze("\n").strip)
     end
   end
@@ -80,7 +82,7 @@ def generate!
   private
 
   attr_reader :pipeline_template_path, :rspec_files_path, :knapsack_report_path, :test_suite_prefix,
-    :generated_pipeline_path
+    :job_tags, :generated_pipeline_path
 
   def info(text)
     $stdout.puts "[#{self.class.name}] #{text}"
@@ -94,7 +96,8 @@ def erb_binding
     {
       rspec_files_per_test_level: rspec_files_per_test_level,
       test_suite_prefix: test_suite_prefix,
-      repo_from_artifacts: ENV['CI_FETCH_REPO_GIT_STRATEGY'] == 'none'
+      repo_from_artifacts: ENV['CI_FETCH_REPO_GIT_STRATEGY'] == 'none',
+      job_tags: job_tags
     }
   end
 
@@ -204,6 +207,11 @@ def test_level_service
       options[:test_suite_prefix] = value
     end
 
+    opts.on("-j", "--job-tags job_tags", String, "Job tags (default to `[]`) " \
+                                                 "separated by commas") do |value|
+      options[:job_tags] = value.split(',')
+    end
+
     opts.on("-o", "--generated-pipeline-path generated_pipeline_path", String, "Path where to write the pipeline " \
                                                                                "config") do |value|
       options[:generated_pipeline_path] = value
diff --git a/spec/scripts/generate_rspec_pipeline_spec.rb b/spec/scripts/generate_rspec_pipeline_spec.rb
index 894c33968b8e..779f7c551b79 100644
--- a/spec/scripts/generate_rspec_pipeline_spec.rb
+++ b/spec/scripts/generate_rspec_pipeline_spec.rb
@@ -223,6 +223,85 @@
       end
     end
 
+    describe 'job_tags option' do
+      let(:pipeline_template_content) do
+        <<~YAML
+        default:
+          image: $DEFAULT_CI_IMAGE
+          <%- if job_tags.any? -%>
+          tags:
+            <%- job_tags.each do |job_tag| -%>
+            - <%= job_tag %>
+            <%- end -%>
+          <%- end -%>
+        YAML
+      end
+
+      before do
+        subject.generate!
+      end
+
+      context 'when job_tags is not given' do
+        subject do
+          described_class.new(
+            rspec_files_path: rspec_files.path,
+            pipeline_template_path: pipeline_template.path
+          )
+        end
+
+        it 'generates the pipeline config with no tags' do
+          expect(File.read("#{pipeline_template.path}.yml"))
+            .to eq(
+              <<~YAML.chomp
+                    default:
+                      image: $DEFAULT_CI_IMAGE
+              YAML
+            )
+        end
+      end
+
+      context 'when job_tags is given' do
+        subject do
+          described_class.new(
+            rspec_files_path: rspec_files.path,
+            pipeline_template_path: pipeline_template.path,
+            job_tags: job_tags
+          )
+        end
+
+        context 'with two tags' do
+          let(:job_tags) { %w[foo bar] }
+
+          it 'generates the pipeline config with the expected tags' do
+            expect(File.read("#{pipeline_template.path}.yml"))
+              .to eq(
+                <<~YAML.chomp
+                      default:
+                        image: $DEFAULT_CI_IMAGE
+                        tags:
+                          - foo
+                          - bar
+                YAML
+              )
+          end
+        end
+
+        context 'with empty tags array' do
+          let(:job_tags) { [] }
+
+          it 'generates the pipeline without any tags defined' do
+            expect(File.read("#{pipeline_template.path}.yml"))
+              .to eq(
+                <<~YAML.chomp
+                      default:
+                        image: $DEFAULT_CI_IMAGE
+                YAML
+              )
+          end
+        end
+      end
+    end
+
     context 'when generated_pipeline_path is given' do
       let(:custom_pipeline_filename) { Tempfile.new(['custom_pipeline_filename', '.yml']) }
 
-- 
GitLab