diff --git a/doc/api/graphql/reference/index.md b/doc/api/graphql/reference/index.md index eae8f6ef45e9a53577095a490e9d1c2bd3b1f17c..e8dcf831d9f78868c1d56fbcb4a81fe167974e14 100644 --- a/doc/api/graphql/reference/index.md +++ b/doc/api/graphql/reference/index.md @@ -6747,6 +6747,7 @@ Input type: `RunnersExportUsageInput` | Name | Type | Description | | ---- | ---- | ----------- | | <a id="mutationrunnersexportusageclientmutationid"></a>`clientMutationId` | [`String`](#string) | A unique identifier for the client performing the mutation. | +| <a id="mutationrunnersexportusagemaxprojectcount"></a>`maxProjectCount` | [`Int`](#int) | Maximum number of projects to return. All other runner usage will be attributed to a '<Other projects>' entry. Defaults to 1000 projects. | | <a id="mutationrunnersexportusagetype"></a>`type` | [`CiRunnerType`](#cirunnertype) | Scope of the runners to include in the report. | #### Fields diff --git a/ee/app/graphql/mutations/ci/runners/export_usage.rb b/ee/app/graphql/mutations/ci/runners/export_usage.rb index 7ddd6be160b039253858da8ba964a84820cf277d..8fd316962afe4b3e458c03a3f2ce31e859aed02a 100644 --- a/ee/app/graphql/mutations/ci/runners/export_usage.rb +++ b/ee/app/graphql/mutations/ci/runners/export_usage.rb @@ -10,15 +10,32 @@ class ExportUsage < BaseMutation required: false, description: 'Scope of the runners to include in the report.' + argument :max_project_count, ::GraphQL::Types::Int, + required: false, + description: + "Maximum number of projects to return. All other runner usage will be attributed " \ + "to a '<Other projects>' entry. " \ + "Defaults to #{::Ci::Runners::GenerateUsageCsvService::DEFAULT_PROJECT_COUNT} projects." + def ready?(**args) raise_resource_not_available_error! unless Ability.allowed?(current_user, :read_runner_usage) + max_project_count = args.fetch( + :max_project_count, ::Ci::Runners::GenerateUsageCsvService::DEFAULT_PROJECT_COUNT + ) + + unless max_project_count.between?(1, ::Ci::Runners::GenerateUsageCsvService::MAX_PROJECT_COUNT) + raise Gitlab::Graphql::Errors::ArgumentError, + "maxProjectCount must be between 1 and #{::Ci::Runners::GenerateUsageCsvService::MAX_PROJECT_COUNT}" + end + super end - def resolve(type: nil) - type = ::Ci::Runner.runner_types[type] - ::Ci::Runners::ExportUsageCsvWorker.perform_async(current_user.id, type) # rubocop: disable CodeReuse/Worker -- this worker sends out emails + def resolve(type: nil, max_project_count: nil) + ::Ci::Runners::ExportUsageCsvWorker.perform_async( # rubocop: disable CodeReuse/Worker -- this worker sends out emails + current_user.id, { runner_type: ::Ci::Runner.runner_types[type], max_project_count: max_project_count } + ) { errors: [] diff --git a/ee/app/services/ci/runners/generate_usage_csv_service.rb b/ee/app/services/ci/runners/generate_usage_csv_service.rb index d6d6bcb11b8e4619e60eb027fbed61d8a6ab8ee9..3e33b66fbd750fdab2336bc6381074ed43981cbd 100644 --- a/ee/app/services/ci/runners/generate_usage_csv_service.rb +++ b/ee/app/services/ci/runners/generate_usage_csv_service.rb @@ -8,19 +8,24 @@ module Runners class GenerateUsageCsvService attr_reader :project_ids, :runner_type, :from_date, :to_date - REPORT_ENTRY_LIMIT = 500 # Max number of projects listed in report + DEFAULT_PROJECT_COUNT = 1_000 + MAX_PROJECT_COUNT = 1_000 + OTHER_PROJECTS_NAME = '<Other projects>' # @param [User] current_user The user performing the reporting # @param [Symbol] runner_type The type of runners to report on. Defaults to nil, reporting on all runner types # @param [Date] from_date The start date of the period to examine. Defaults to start of last full month # @param [Date] to_date The end date of the period to examine. Defaults to end of month - def initialize(current_user:, runner_type: nil, from_date: nil, to_date: nil) + # @param [Integer] max_project_count The maximum number of projects in the report. All others will be folded + # into an 'Other projects' entry. Defaults to 1000 + def initialize(current_user:, runner_type: nil, from_date: nil, to_date: nil, max_project_count: nil) runner_type = Ci::Runner.runner_types[runner_type] if runner_type.is_a?(Symbol) @current_user = current_user @runner_type = runner_type @from_date = from_date || Date.current.prev_month.beginning_of_month @to_date = to_date || @from_date.end_of_month + @max_project_count = [MAX_PROJECT_COUNT, max_project_count || DEFAULT_PROJECT_COUNT].min end def execute @@ -28,10 +33,19 @@ def execute return insufficient_permissions unless Ability.allowed?(@current_user, :read_runner_usage) result = ClickHouse::Client.select(clickhouse_query, :main) - csv_builder = CsvBuilder::SingleBatch.new(replace_with_project_paths(result), header_to_value_hash) + rows = transform_rows(result) + csv_builder = CsvBuilder::SingleBatch.new(rows, header_to_value_hash) csv_data = csv_builder.render(ExportCsv::BaseService::TARGET_FILESIZE) + export_status = csv_builder.status - ServiceResponse.success(payload: { csv_data: csv_data, status: csv_builder.status }) + others_row_created = rows.last.present? && rows.last['grouped_project_id'].nil? + if others_row_created + # Do not report <Other projects> row + export_status[:rows_written] = export_status[:rows_written] - 1 + export_status[:rows_expected] = export_status[:rows_expected] - 1 + end + + ServiceResponse.success(payload: { csv_data: csv_data, status: export_status }) rescue StandardError => e Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e) ServiceResponse.error(message: 'Failed to generate export', reason: :clickhouse_error) @@ -49,7 +63,7 @@ def insufficient_permissions def header_to_value_hash { - 'Project ID' => 'project_id', + 'Project ID' => 'grouped_project_id', 'Project path' => 'project_path', 'Build count' => 'count_builds', 'Total duration (minutes)' => 'total_duration_in_mins', @@ -58,15 +72,32 @@ def header_to_value_hash end def clickhouse_query + # This query computes the top-used projects, and performs a union to add the aggregates + # for the projects not in that list raw_query = <<~SQL.squish - SELECT project_id, - countMerge(count_builds) AS count_builds, - sumSimpleState(total_duration) / 60000 AS total_duration_in_mins - FROM ci_used_minutes_mv - WHERE #{where_clause} - GROUP BY project_id - ORDER BY total_duration_in_mins DESC, project_id - LIMIT #{REPORT_ENTRY_LIMIT}; + WITH top_projects AS + ( + SELECT + project_id, + countMerge(count_builds) AS count_builds, + sumSimpleState(total_duration) / 60000 AS total_duration_in_mins + FROM ci_used_minutes_mv + WHERE #{where_clause} + GROUP BY project_id + ORDER BY + total_duration_in_mins DESC, + project_id ASC + LIMIT {max_project_count: UInt64} + ) + SELECT project_id AS grouped_project_id, count_builds, total_duration_in_mins + FROM top_projects + UNION ALL + SELECT + NULL AS grouped_project_id, + countMerge(count_builds) AS count_builds, + sumSimpleState(total_duration) / 60000 AS total_duration_in_mins + FROM ci_used_minutes_mv + WHERE #{where_clause} AND project_id NOT IN (SELECT project_id FROM top_projects) SQL ClickHouse::Client::Query.new(raw_query: raw_query, placeholders: placeholders) @@ -84,7 +115,8 @@ def placeholders placeholders = { runner_type: runner_type, from_date: format_date(@from_date), - to_date: format_date(@to_date + 1) # Include jobs until the end of the day + to_date: format_date(@to_date + 1), # Include jobs until the end of the day + max_project_count: @max_project_count } placeholders.compact @@ -94,20 +126,30 @@ def format_date(date) date.strftime('%Y-%m-%d %H:%M:%S') end - def replace_with_project_paths(result) + def transform_rows(result) # rubocop: disable CodeReuse/ActiveRecord -- This is a ClickHouse query - ids = result.pluck('project_id') # rubocop: disable Database/AvoidUsingPluckWithoutLimit -- The limit is already implemented in the ClickHouse query + ids = result.pluck('grouped_project_id') # rubocop: disable Database/AvoidUsingPluckWithoutLimit -- The limit is already implemented in the ClickHouse query # rubocop: enable CodeReuse/ActiveRecord return result if ids.empty? - projects = Project.inc_routes.id_in(ids).limit(REPORT_ENTRY_LIMIT).to_h { |p| [p.id, p.full_path] } + projects = Project.inc_routes.id_in(ids).to_h { |p| [p.id, p.full_path] } + projects[nil] = OTHER_PROJECTS_NAME - # Annotate rows with project paths + # Annotate rows with project paths and human-readable durations result.each do |row| - row['project_path'] = projects[row['project_id']] + row['project_path'] = projects[row['grouped_project_id']&.to_i] row['total_duration_human_readable'] = ActiveSupport::Duration.build(row['total_duration_in_mins'] * 60).inspect end + + # Perform special treatment for <Other projects> entry (if existing), moving it to the end of the list + other_projects_row = result.find { |row| row['grouped_project_id'].nil? } + if other_projects_row + result.reject! { |row| row['grouped_project_id'].nil? } + result << other_projects_row if other_projects_row['count_builds'] > 0 + end + + result end end end diff --git a/ee/app/services/ci/runners/send_usage_csv_service.rb b/ee/app/services/ci/runners/send_usage_csv_service.rb index 14aab53beb57ab3b693992da9cd6ce9b126debb6..6ae6ed50b397c200850c0d02b5283f821919f2a1 100644 --- a/ee/app/services/ci/runners/send_usage_csv_service.rb +++ b/ee/app/services/ci/runners/send_usage_csv_service.rb @@ -10,11 +10,14 @@ class SendUsageCsvService # @param [Symbol] runner_type The type of runners to report on. Defaults to nil, reporting on all runner types # @param [Date] from_date The start date of the period to examine. Defaults to start of last full month # @param [Date] to_date The end date of the period to examine. Defaults to end of month - def initialize(current_user:, runner_type: nil, from_date: nil, to_date: nil) + # @param [Integer] max_project_count The maximum number of projects in the report. All others will be folded + # into an 'Other projects' entry. Defaults to 1000 + def initialize(current_user:, runner_type: nil, from_date: nil, to_date: nil, max_project_count: nil) @current_user = current_user @runner_type = runner_type @from_date = from_date @to_date = to_date + @max_project_count = max_project_count end def execute @@ -22,7 +25,8 @@ def execute current_user: @current_user, runner_type: @runner_type, from_date: @from_date, - to_date: @to_date + to_date: @to_date, + max_project_count: @max_project_count ) result = generate_csv_service.execute diff --git a/ee/app/workers/ci/runners/export_usage_csv_worker.rb b/ee/app/workers/ci/runners/export_usage_csv_worker.rb index cfef67f2abd703cfbe7359994f7d5c674f0d460d..4a86028332545c031aaa7f0aa0b9d25d7a89b4d3 100644 --- a/ee/app/workers/ci/runners/export_usage_csv_worker.rb +++ b/ee/app/workers/ci/runners/export_usage_csv_worker.rb @@ -14,10 +14,13 @@ class ExportUsageCsvWorker worker_resource_boundary :cpu loggable_arguments 0, 1 - def perform(current_user_id, runner_type) - user = User.find(current_user_id) + def perform(current_user_id, params) + params.symbolize_keys! - result = Ci::Runners::SendUsageCsvService.new(current_user: user, runner_type: runner_type).execute + user = User.find(current_user_id) + result = Ci::Runners::SendUsageCsvService.new( + current_user: user, **params.slice(:runner_type, :max_project_count) + ).execute log_extra_metadata_on_done(:status, result.status) log_extra_metadata_on_done(:message, result.message) if result.message log_extra_metadata_on_done(:csv_status, result.payload[:status]) if result.payload[:status] diff --git a/ee/spec/requests/api/graphql/mutations/ci/runners/export_usage_spec.rb b/ee/spec/requests/api/graphql/mutations/ci/runners/export_usage_spec.rb index a32e4d12e6481d843a5b92cf106601a157b45c82..f0c9cfab27fec2ef2e04420cdde8bd821955f003 100644 --- a/ee/spec/requests/api/graphql/mutations/ci/runners/export_usage_spec.rb +++ b/ee/spec/requests/api/graphql/mutations/ci/runners/export_usage_spec.rb @@ -21,9 +21,10 @@ finished_at: start_time + 10.minutes, runner: group_runner) end - let(:runner_type) { 'GROUP_TYPE' } + let(:runner_type) { 'group_type' } + let(:max_project_count) { 7 } let(:mutation) do - graphql_mutation(:runners_export_usage, type: runner_type) do + graphql_mutation(:runners_export_usage, type: runner_type.upcase, max_project_count: max_project_count) do <<~QL errors QL @@ -42,9 +43,13 @@ end it 'sends email with report' do + expect(::Ci::Runners::ExportUsageCsvWorker).to receive(:perform_async) + .with(current_user.id, { + runner_type: ::Ci::Runner.runner_types[runner_type], max_project_count: max_project_count + }).and_call_original expect(Notify).to receive(:runner_usage_by_project_csv_email) .with( - user: current_user, from_time: DateTime.new(2023, 11, 1), to_time: DateTime.new(2023, 11, 1).end_of_month, + user: current_user, from_date: Date.new(2023, 11, 1), to_date: Date.new(2023, 11, 1).end_of_month, csv_data: anything, export_status: anything ) do |args| expect(args.dig(:export_status, :rows_written)).to eq 1 @@ -59,6 +64,26 @@ expect_graphql_errors_to_be_empty end + context 'when max_project_count is out-of-range' do + context 'and is below acceptable range' do + let(:max_project_count) { 0 } + + it 'returns an error' do + post_response + expect_graphql_errors_to_include('maxProjectCount must be between 1 and 1000') + end + end + + context 'and is above acceptable range' do + let(:max_project_count) { ::Ci::Runners::GenerateUsageCsvService::MAX_PROJECT_COUNT + 1 } + + it 'returns an error' do + post_response + expect_graphql_errors_to_include('maxProjectCount must be between 1 and 1000') + end + end + end + context 'when feature is not available' do before do stub_licensed_features(runner_performance_insights: false) diff --git a/ee/spec/services/ci/runners/generate_usage_csv_service_spec.rb b/ee/spec/services/ci/runners/generate_usage_csv_service_spec.rb index a63ce484a28a448d9f315b0fc6588e701d123108..b50f18da2a9ad2040fbd569468089f4f22537a57 100644 --- a/ee/spec/services/ci/runners/generate_usage_csv_service_spec.rb +++ b/ee/spec/services/ci/runners/generate_usage_csv_service_spec.rb @@ -28,8 +28,12 @@ let(:runner_type) { nil } let(:from_date) { nil } let(:to_date) { nil } + let(:max_project_count) { nil } + let(:response_status) { response.payload[:status] } + let(:response_csv_lines) { response.payload[:csv_data].lines } let(:service) do - described_class.new(current_user: current_user, runner_type: runner_type, from_date: from_date, to_date: to_date) + described_class.new(current_user: current_user, runner_type: runner_type, from_date: from_date, to_date: to_date, + max_project_count: max_project_count) end let(:expected_header) { "Project ID,Project path,Build count,Total duration (minutes),Total duration\n" } @@ -111,7 +115,7 @@ .and_call_original end - expect(response.payload[:csv_data].lines).to eq([ + expect(response_csv_lines).to eq([ expected_header, "#{builds[21].project_id},#{builds[21].project.full_path},2,130,2 hours and 10 minutes\n", "#{builds[0].project_id},#{builds[0].project.full_path},1,14,14 minutes\n", @@ -121,19 +125,34 @@ "#{builds.last.project_id},#{builds.last.project.full_path},1,7,7 minutes\n" ]) - expect(response.payload[:status]).to eq({ rows_expected: 6, rows_written: 6, truncated: false }) + expect(response_status).to eq({ rows_expected: 6, rows_written: 6, truncated: false }) + end + + context "when max_project_count doesn't fit all projects" do + let(:max_project_count) { 2 } + + it 'exports usage data for the 2 top-K projects plus aggregate for other projects', :aggregate_failures do + expect(response_csv_lines).to eq([ + expected_header, + "#{builds[21].project_id},#{builds[21].project.full_path},2,130,2 hours and 10 minutes\n", + "#{builds[0].project_id},#{builds[0].project.full_path},1,14,14 minutes\n", + ",<Other projects>,4,49,49 minutes\n" + ]) + + expect(response_status).to eq({ rows_expected: 2, rows_written: 2, truncated: false }) + end end context 'with group_type runner_type argument specified' do let(:runner_type) { :group_type } it 'exports usage data for runners of specified type' do - expect(response.payload[:csv_data].lines).to eq([ + expect(response_csv_lines).to eq([ expected_header, "#{builds[21].project_id},#{builds[21].project.full_path},1,120,2 hours\n" ]) - expect(response.payload[:status]).to eq({ rows_expected: 1, rows_written: 1, truncated: false }) + expect(response_status).to eq({ rows_expected: 1, rows_written: 1, truncated: false }) end end @@ -141,8 +160,8 @@ let(:runner_type) { :project_type } it 'exports usage data for runners of specified type' do - expect(response.payload[:csv_data].lines).to contain_exactly(expected_header) - expect(response.payload[:status]).to eq({ rows_expected: 0, rows_written: 0, truncated: false }) + expect(response_csv_lines).to contain_exactly(expected_header) + expect(response_status).to eq({ rows_expected: 0, rows_written: 0, truncated: false }) end end @@ -152,7 +171,7 @@ let(:expected_to_date) { from_date.end_of_month } it 'exports usage data for runners which finished builds before date' do - expect(response.payload[:status]).to eq({ rows_expected: 16, rows_written: 16, truncated: false }) + expect(response_status).to eq({ rows_expected: 16, rows_written: 16, truncated: false }) end end @@ -162,7 +181,7 @@ let(:expected_to_date) { from_date.end_of_month } it 'exports usage data for runners which finished builds before date' do - expect(response.payload[:status]).to eq({ rows_expected: 0, rows_written: 0, truncated: false }) + expect(response_status).to eq({ rows_expected: 0, rows_written: 0, truncated: false }) end end @@ -176,7 +195,7 @@ end it 'exports usage data for runners which finished builds after date' do - expect(response.payload[:status]).to eq({ rows_expected: 6, rows_written: 6, truncated: false }) + expect(response_status).to eq({ rows_expected: 6, rows_written: 6, truncated: false }) end end @@ -190,7 +209,7 @@ end it 'exports usage data for runners which finished builds after date' do - expect(response.payload[:status]).to eq({ rows_expected: 16, rows_written: 16, truncated: false }) + expect(response_status).to eq({ rows_expected: 16, rows_written: 16, truncated: false }) end end diff --git a/ee/spec/services/ci/runners/send_usage_csv_service_spec.rb b/ee/spec/services/ci/runners/send_usage_csv_service_spec.rb index 0cdde287b947b03c79900e9e5d1003f6382991e6..305ceec08df8adb95b8d2d06a05ed4a7e40ed271 100644 --- a/ee/spec/services/ci/runners/send_usage_csv_service_spec.rb +++ b/ee/spec/services/ci/runners/send_usage_csv_service_spec.rb @@ -11,8 +11,10 @@ let(:from_date) { 1.month.ago } let(:to_date) { Date.current } + let(:max_project_count) { 5 } let(:service) do - described_class.new(current_user: current_user, runner_type: :instance_type, from_date: from_date, to_date: to_date) + described_class.new(current_user: current_user, runner_type: :instance_type, from_date: from_date, to_date: to_date, + max_project_count: max_project_count) end subject(:response) { service.execute } @@ -28,7 +30,10 @@ end it 'sends the csv by email' do - expect_next_instance_of(Ci::Runners::GenerateUsageCsvService) do |service| + expect_next_instance_of(Ci::Runners::GenerateUsageCsvService, + current_user: current_user, runner_type: :instance_type, from_date: from_date, to_date: to_date, + max_project_count: max_project_count + ) do |service| expect(service).to receive(:execute).and_call_original end diff --git a/ee/spec/workers/ci/runners/export_usage_csv_worker_spec.rb b/ee/spec/workers/ci/runners/export_usage_csv_worker_spec.rb index 224ca815cf53ab413d8f0842692ba590fd6f4359..01c27a9fb137b541ad995a3cce154bb27b0f6cb1 100644 --- a/ee/spec/workers/ci/runners/export_usage_csv_worker_spec.rb +++ b/ee/spec/workers/ci/runners/export_usage_csv_worker_spec.rb @@ -9,18 +9,19 @@ let(:worker) { described_class.new } describe '#perform' do - subject(:perform) { worker.perform(current_user.id, runner_type) } + subject(:perform) { worker.perform(current_user.id, params) } let(:current_user) { admin } - let(:runner_type) { 1 } + let(:params) { { runner_type: 1, max_project_count: 25 } } before do stub_licensed_features(runner_performance_insights: true) end it 'delegates to Ci::Runners::SendUsageCsvService' do - expect_next_instance_of(Ci::Runners::SendUsageCsvService, - { current_user: current_user, runner_type: runner_type }) do |service| + expect_next_instance_of(Ci::Runners::SendUsageCsvService, { + current_user: current_user, runner_type: params[:runner_type], max_project_count: params[:max_project_count] + }) do |service| expect(service).to receive(:execute).and_call_original end