From 87d6128966f38a335f05d0a19fbb7d359125ac5d Mon Sep 17 00:00:00 2001
From: Erick Bajao <fbajao@gitlab.com>
Date: Mon, 21 Mar 2022 08:44:32 +0000
Subject: [PATCH] Make refresh task fetch IDs from CSV URL

---
 lib/tasks/ci/build_artifacts.rake             | 20 ----------
 ...oject_statistics_build_artifacts_size.rake | 31 ++++++++++++----
 ...atistics_build_artifacts_size_rake_spec.rb | 37 +++++++++++--------
 3 files changed, 46 insertions(+), 42 deletions(-)
 delete mode 100644 lib/tasks/ci/build_artifacts.rake

diff --git a/lib/tasks/ci/build_artifacts.rake b/lib/tasks/ci/build_artifacts.rake
deleted file mode 100644
index 4f4faef5a626..000000000000
--- a/lib/tasks/ci/build_artifacts.rake
+++ /dev/null
@@ -1,20 +0,0 @@
-# frozen_string_literal: true
-
-require 'httparty'
-require 'csv'
-
-namespace :ci do
-  namespace :build_artifacts do
-    desc "GitLab | CI | Fetch projects with incorrect artifact size on GitLab.com"
-    task :project_with_incorrect_artifact_size do
-      csv_url = ENV['SISENSE_PROJECT_IDS_WITH_INCORRECT_ARTIFACTS_URL']
-
-      # rubocop: disable Gitlab/HTTParty
-      body = HTTParty.get(csv_url)
-      # rubocop: enable Gitlab/HTTParty
-
-      table = CSV.parse(body.parsed_response, headers: true)
-      puts table['PROJECT_ID'].join(' ')
-    end
-  end
-end
diff --git a/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake b/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake
index 1cc18d14d78b..6d423f47fe61 100644
--- a/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake
+++ b/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake
@@ -1,23 +1,40 @@
 # frozen_string_literal: true
 
+require 'httparty'
+require 'csv'
+
 namespace :gitlab do
-  desc "GitLab | Refresh build artifacts size project statistics for given project IDs"
+  desc "GitLab | Refresh build artifacts size project statistics for given list of Project IDs from remote CSV"
 
   BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE = 500
 
-  task :refresh_project_statistics_build_artifacts_size, [:project_ids] => :environment do |_t, args|
-    project_ids = []
-    project_ids = $stdin.read.split unless $stdin.tty?
-    project_ids = args.project_ids.to_s.split unless project_ids.any?
+  task :refresh_project_statistics_build_artifacts_size, [:csv_url] => :environment do |_t, args|
+    csv_url = args.csv_url
+
+    # rubocop: disable Gitlab/HTTParty
+    body = HTTParty.get(csv_url)
+    # rubocop: enable Gitlab/HTTParty
+
+    table = CSV.parse(body.to_s, headers: true)
+    project_ids = table['PROJECT_ID']
+
+    puts "Loaded #{project_ids.size} project ids to import"
+
+    imported = 0
+    missing = 0
 
     if project_ids.any?
-      project_ids.in_groups_of(BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE) do |ids|
+      project_ids.in_groups_of(BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE, false) do |ids|
         projects = Project.where(id: ids)
         Projects::BuildArtifactsSizeRefresh.enqueue_refresh(projects)
+
+        imported += projects.size
+        missing += ids.size - projects.size
+        puts "#{imported}/#{project_ids.size} (missing projects: #{missing})"
       end
       puts 'Done.'.green
     else
-      puts 'Please provide a string of space-separated project IDs as the argument or through the STDIN'.red
+      puts 'Project IDs must be listed in the CSV under the header PROJECT_ID'.red
     end
   end
 end
diff --git a/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb b/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb
index e57704d0ebe5..dcdd3f679286 100644
--- a/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb
+++ b/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb
@@ -11,37 +11,44 @@
     let_it_be(:project_3) { create(:project) }
 
     let(:string_of_ids) { "#{project_1.id} #{project_2.id} #{project_3.id} 999999" }
+    let(:csv_url) { 'https://www.example.com/foo.csv' }
+    let(:csv_body) do
+      <<~BODY
+        PROJECT_ID
+        #{project_1.id}
+        #{project_2.id}
+        #{project_3.id}
+      BODY
+    end
 
     before do
       Rake.application.rake_require('tasks/gitlab/refresh_project_statistics_build_artifacts_size')
 
       stub_const("BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE", 2)
-    end
-
-    context 'when given a list of space-separated IDs through STDIN' do
-      before do
-        allow($stdin).to receive(:tty?).and_return(false)
-        allow($stdin).to receive(:read).and_return(string_of_ids)
-      end
-
-      it 'enqueues the projects for refresh' do
-        expect { run_rake_task(rake_task) }.to output(/Done/).to_stdout
 
-        expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3])
-      end
+      stub_request(:get, csv_url).to_return(status: 200, body: csv_body)
     end
 
     context 'when given a list of space-separated IDs through rake argument' do
       it 'enqueues the projects for refresh' do
-        expect { run_rake_task(rake_task, string_of_ids) }.to output(/Done/).to_stdout
+        expect { run_rake_task(rake_task, csv_url) }.to output(/Done/).to_stdout
 
         expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3])
       end
     end
 
-    context 'when not given any IDs' do
+    context 'when CSV has invalid header' do
+      let(:csv_body) do
+        <<~BODY
+          projectid
+          #{project_1.id}
+          #{project_2.id}
+          #{project_3.id}
+        BODY
+      end
+
       it 'returns an error message' do
-        expect { run_rake_task(rake_task) }.to output(/Please provide a string of space-separated project IDs/).to_stdout
+        expect { run_rake_task(rake_task, csv_url) }.to output(/Project IDs must be listed in the CSV under the header PROJECT_ID/).to_stdout
       end
     end
   end
-- 
GitLab