From f9f4a70a25a075b59bf2c882719ece0d36796665 Mon Sep 17 00:00:00 2001
From: David Dieulivol <ddieulivol@gitlab.com>
Date: Fri, 4 Aug 2023 14:46:48 +0000
Subject: [PATCH] No git clone for rspec:coverage job

Caveat: Bundler apparently needs to have all the local gems
available on the filesystem, so we download them separately
via the API as well (only a few MiBs).
---
 .gitlab/ci/global.gitlab-ci.yml        |  2 +-
 .gitlab/ci/rails.gitlab-ci.yml         | 14 +++++++++++
 doc/development/pipelines/internals.md |  9 +++++++
 scripts/utils.sh                       | 35 ++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/.gitlab/ci/global.gitlab-ci.yml b/.gitlab/ci/global.gitlab-ci.yml
index ca8d81737b848..b4a3232fcf7cc 100644
--- a/.gitlab/ci/global.gitlab-ci.yml
+++ b/.gitlab/ci/global.gitlab-ci.yml
@@ -509,4 +509,4 @@
       url="${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/repository/files/scripts%2Futils.sh/raw?ref=${CI_COMMIT_SHA}"
       curl -f --header "Private-Token: ${PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE}" "${url}" --create-dirs --output scripts/utils.sh
     - source scripts/utils.sh
-    - download_files ${FILES_TO_DOWNLOAD}
+    - run_timed_command "download_files ${FILES_TO_DOWNLOAD}"
diff --git a/.gitlab/ci/rails.gitlab-ci.yml b/.gitlab/ci/rails.gitlab-ci.yml
index 2844669a89d7b..5b654627465b3 100644
--- a/.gitlab/ci/rails.gitlab-ci.yml
+++ b/.gitlab/ci/rails.gitlab-ci.yml
@@ -447,6 +447,7 @@ rspec:artifact-collector ee:
 rspec:coverage:
   extends:
     - .coverage-base
+    - .fast-no-clone-job
     - .rails:rules:rspec-coverage
   stage: post-test
   needs:
@@ -477,7 +478,20 @@ rspec:coverage:
     # Memory jobs
     - job: memory-on-boot
       optional: true
+  variables:
+    FILES_TO_DOWNLOAD: >
+      config/bundler_setup.rb
+      Gemfile
+      Gemfile.lock
+      scripts/merge-simplecov
+      spec/simplecov_env_core.rb
+      spec/simplecov_env.rb
+  before_script:
+    - !reference [".fast-no-clone-job", before_script]
+    - run_timed_command "download_local_gems"
+    - !reference [".coverage-base", before_script]
   script:
+    - chmod u+x scripts/merge-simplecov # Not the right permissions when downloading the script via the API.
     - run_timed_command "bundle exec scripts/merge-simplecov"
   coverage: '/LOC \((\d+\.\d+%)\) covered.$/'
   artifacts:
diff --git a/doc/development/pipelines/internals.md b/doc/development/pipelines/internals.md
index 97424e02437e0..904ac11511d37 100644
--- a/doc/development/pipelines/internals.md
+++ b/doc/development/pipelines/internals.md
@@ -368,6 +368,8 @@ my-job:
 
 - This pattern does not work if a script relies on `git` to access the repository, because we don't have the repository without cloning or fetching.
 - The job using this pattern needs to have `curl` available.
+- If you need to run `bundle install` in the job (even using `BUNDLE_ONLY`), you need to download the gems that are stored in the `gitlab-org/gitlab` project.
+  - You can use the `download_local_gems` shell command for that purpose.
 
 #### Where is this pattern used?
 
@@ -387,5 +389,12 @@ my-job:
     - `scripts/review_apps/review-apps.sh`
     - `scripts/review_apps/seed-dast-test-data.sh`
     - `VERSION`
+  - `rspec:coverage` for:
+    - `config/bundler_setup.rb`
+    - `Gemfile.lock`
+    - `Gemfile`
+    - `scripts/merge-simplecov`
+    - `spec/simplecov_env_core.rb`
+    - `spec/simplecov_env.rb`
 
 Additionally, `scripts/utils.sh` is always downloaded from the API when this pattern is used (this file contains the code for `.fast-no-clone-job`).
diff --git a/scripts/utils.sh b/scripts/utils.sh
index 4ed56b2de1a82..e19622d07c6b1 100644
--- a/scripts/utils.sh
+++ b/scripts/utils.sh
@@ -416,3 +416,38 @@ function url_encode() {
     -e 's/}/%7d/g' \
     -e 's/~/%7e/g'
 }
+
+# Download the local gems in `gems` and `vendor/gems` folders from the API.
+#
+# This is useful if you need to run bundle install while not doing a git clone of the gitlab-org/gitlab repo.
+function download_local_gems() {
+  for folder_path in vendor/gems gems; do
+    local output="${folder_path}.tar.gz"
+
+    # From https://docs.gitlab.com/ee/api/repositories.html#get-file-archive:
+    #
+    #   This endpoint can be accessed without authentication if the repository is publicly accessible.
+    #   For GitLab.com users, this endpoint has a rate limit threshold of 5 requests per minute.
+    #
+    # We don't want to set a token for public repo (e.g. gitlab-org/gitlab), as 5 requests/minute can
+    # potentially be reached with many pipelines running in parallel.
+    local private_token_header=""
+    if [[ "${CI_PROJECT_VISIBILITY}" != "public" ]]; then
+      private_token_header="Private-Token: ${PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE}"
+    fi
+
+    echo "Downloading ${folder_path}"
+
+    url=${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/repository/archive
+    curl -f \
+      --get \
+      --header "${private_token_header}" \
+      --output "${output}" \
+      --data-urlencode "sha=${CI_COMMIT_SHA}" \
+      --data-urlencode "path=${folder_path}" \
+      "${url}"
+
+    tar -zxf "${output}" --strip-component 1
+    rm "${output}"
+  done
+}
-- 
GitLab