From 87d6128966f38a335f05d0a19fbb7d359125ac5d Mon Sep 17 00:00:00 2001 From: Erick Bajao <fbajao@gitlab.com> Date: Mon, 21 Mar 2022 08:44:32 +0000 Subject: [PATCH] Make refresh task fetch IDs from CSV URL --- lib/tasks/ci/build_artifacts.rake | 20 ---------- ...oject_statistics_build_artifacts_size.rake | 31 ++++++++++++---- ...atistics_build_artifacts_size_rake_spec.rb | 37 +++++++++++-------- 3 files changed, 46 insertions(+), 42 deletions(-) delete mode 100644 lib/tasks/ci/build_artifacts.rake diff --git a/lib/tasks/ci/build_artifacts.rake b/lib/tasks/ci/build_artifacts.rake deleted file mode 100644 index 4f4faef5a626..000000000000 --- a/lib/tasks/ci/build_artifacts.rake +++ /dev/null @@ -1,20 +0,0 @@ -# frozen_string_literal: true - -require 'httparty' -require 'csv' - -namespace :ci do - namespace :build_artifacts do - desc "GitLab | CI | Fetch projects with incorrect artifact size on GitLab.com" - task :project_with_incorrect_artifact_size do - csv_url = ENV['SISENSE_PROJECT_IDS_WITH_INCORRECT_ARTIFACTS_URL'] - - # rubocop: disable Gitlab/HTTParty - body = HTTParty.get(csv_url) - # rubocop: enable Gitlab/HTTParty - - table = CSV.parse(body.parsed_response, headers: true) - puts table['PROJECT_ID'].join(' ') - end - end -end diff --git a/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake b/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake index 1cc18d14d78b..6d423f47fe61 100644 --- a/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake +++ b/lib/tasks/gitlab/refresh_project_statistics_build_artifacts_size.rake @@ -1,23 +1,40 @@ # frozen_string_literal: true +require 'httparty' +require 'csv' + namespace :gitlab do - desc "GitLab | Refresh build artifacts size project statistics for given project IDs" + desc "GitLab | Refresh build artifacts size project statistics for given list of Project IDs from remote CSV" BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE = 500 - task :refresh_project_statistics_build_artifacts_size, [:project_ids] => :environment do |_t, args| - project_ids = [] - project_ids = $stdin.read.split unless $stdin.tty? - project_ids = args.project_ids.to_s.split unless project_ids.any? + task :refresh_project_statistics_build_artifacts_size, [:csv_url] => :environment do |_t, args| + csv_url = args.csv_url + + # rubocop: disable Gitlab/HTTParty + body = HTTParty.get(csv_url) + # rubocop: enable Gitlab/HTTParty + + table = CSV.parse(body.to_s, headers: true) + project_ids = table['PROJECT_ID'] + + puts "Loaded #{project_ids.size} project ids to import" + + imported = 0 + missing = 0 if project_ids.any? - project_ids.in_groups_of(BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE) do |ids| + project_ids.in_groups_of(BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE, false) do |ids| projects = Project.where(id: ids) Projects::BuildArtifactsSizeRefresh.enqueue_refresh(projects) + + imported += projects.size + missing += ids.size - projects.size + puts "#{imported}/#{project_ids.size} (missing projects: #{missing})" end puts 'Done.'.green else - puts 'Please provide a string of space-separated project IDs as the argument or through the STDIN'.red + puts 'Project IDs must be listed in the CSV under the header PROJECT_ID'.red end end end diff --git a/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb b/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb index e57704d0ebe5..dcdd3f679286 100644 --- a/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb +++ b/spec/tasks/gitlab/refresh_project_statistics_build_artifacts_size_rake_spec.rb @@ -11,37 +11,44 @@ let_it_be(:project_3) { create(:project) } let(:string_of_ids) { "#{project_1.id} #{project_2.id} #{project_3.id} 999999" } + let(:csv_url) { 'https://www.example.com/foo.csv' } + let(:csv_body) do + <<~BODY + PROJECT_ID + #{project_1.id} + #{project_2.id} + #{project_3.id} + BODY + end before do Rake.application.rake_require('tasks/gitlab/refresh_project_statistics_build_artifacts_size') stub_const("BUILD_ARTIFACTS_SIZE_REFRESH_ENQUEUE_BATCH_SIZE", 2) - end - - context 'when given a list of space-separated IDs through STDIN' do - before do - allow($stdin).to receive(:tty?).and_return(false) - allow($stdin).to receive(:read).and_return(string_of_ids) - end - - it 'enqueues the projects for refresh' do - expect { run_rake_task(rake_task) }.to output(/Done/).to_stdout - expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3]) - end + stub_request(:get, csv_url).to_return(status: 200, body: csv_body) end context 'when given a list of space-separated IDs through rake argument' do it 'enqueues the projects for refresh' do - expect { run_rake_task(rake_task, string_of_ids) }.to output(/Done/).to_stdout + expect { run_rake_task(rake_task, csv_url) }.to output(/Done/).to_stdout expect(Projects::BuildArtifactsSizeRefresh.all.map(&:project)).to match_array([project_1, project_2, project_3]) end end - context 'when not given any IDs' do + context 'when CSV has invalid header' do + let(:csv_body) do + <<~BODY + projectid + #{project_1.id} + #{project_2.id} + #{project_3.id} + BODY + end + it 'returns an error message' do - expect { run_rake_task(rake_task) }.to output(/Please provide a string of space-separated project IDs/).to_stdout + expect { run_rake_task(rake_task, csv_url) }.to output(/Project IDs must be listed in the CSV under the header PROJECT_ID/).to_stdout end end end -- GitLab