From 28df9bdf5ff3c85c81b231bee7fe3d33f2616f50 Mon Sep 17 00:00:00 2001 From: Eduardo Bonet <ebonet@gitlab.com> Date: Thu, 6 Apr 2023 10:03:57 +0000 Subject: [PATCH] Allows for fetching candidate data as csv Adds a new format to ::Projects::Ml::ExperimentsController.show that allows downloading the data as a csv file. Changelog: added --- .../projects/ml/experiments_controller.rb | 21 +- app/models/ml/candidate.rb | 2 +- app/presenters/ml/candidates_csv_presenter.rb | 49 +++++ .../ml/candidates_csv_presenter_spec.rb | 84 ++++++++ .../ml/experiments_controller_spec.rb | 186 +++++++++++------- 5 files changed, 261 insertions(+), 81 deletions(-) create mode 100644 app/presenters/ml/candidates_csv_presenter.rb create mode 100644 spec/presenters/ml/candidates_csv_presenter_spec.rb diff --git a/app/controllers/projects/ml/experiments_controller.rb b/app/controllers/projects/ml/experiments_controller.rb index be333fff6e80f..dece3f98c57b9 100644 --- a/app/controllers/projects/ml/experiments_controller.rb +++ b/app/controllers/projects/ml/experiments_controller.rb @@ -27,13 +27,22 @@ def show .transform_keys(&:underscore) .permit(:name, :order_by, :sort, :order_by_type) - paginator = CandidateFinder - .new(@experiment, find_params) - .execute - .keyset_paginate(cursor: params[:cursor], per_page: MAX_CANDIDATES_PER_PAGE) + finder = CandidateFinder.new(@experiment, find_params) - @candidates = paginator.records - @page_info = page_info(paginator) + respond_to do |format| + format.csv do + csv_data = ::Ml::CandidatesCsvPresenter.new(finder.execute).present + + send_data(csv_data, type: 'text/csv; charset=utf-8', filename: 'candidates.csv') + end + + format.html do + paginator = finder.execute.keyset_paginate(cursor: params[:cursor], per_page: MAX_CANDIDATES_PER_PAGE) + + @candidates = paginator.records + @page_info = page_info(paginator) + end + end end def destroy diff --git a/app/models/ml/candidate.rb b/app/models/ml/candidate.rb index d8c466b9839f7..40758defafc6e 100644 --- a/app/models/ml/candidate.rb +++ b/app/models/ml/candidate.rb @@ -28,7 +28,7 @@ class Candidate < ApplicationRecord scope: :project, init: AtomicInternalId.project_init(self, :internal_id) - scope :including_relationships, -> { includes(:latest_metrics, :params, :user, :package) } + scope :including_relationships, -> { includes(:latest_metrics, :params, :user, :package, :project) } scope :by_name, ->(name) { where("ml_candidates.name LIKE ?", "%#{sanitize_sql_like(name)}%") } # rubocop:disable GitlabSecurity/SqlInjection scope :order_by_metric, ->(metric, direction) do diff --git a/app/presenters/ml/candidates_csv_presenter.rb b/app/presenters/ml/candidates_csv_presenter.rb new file mode 100644 index 0000000000000..8e2baf6bd28ec --- /dev/null +++ b/app/presenters/ml/candidates_csv_presenter.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module Ml + class CandidatesCsvPresenter + CANDIDATE_ASSOCIATIONS = [:latest_metrics, :params, :experiment].freeze + # This file size limit is mainly to avoid the generation to hog resources from the server. The value is arbitrary + # can be update once we have better insight into usage. + TARGET_FILESIZE = 2.megabytes + + def initialize(candidates) + @candidates = candidates + end + + def present + CsvBuilder.new(@candidates, headers, CANDIDATE_ASSOCIATIONS).render(TARGET_FILESIZE) + end + + private + + def headers + metric_names = columns_names(&:metrics) + param_names = columns_names(&:params) + + candidate_to_metrics = @candidates.to_h do |candidate| + [candidate.id, candidate.latest_metrics.to_h { |m| [m.name, m.value] }] + end + + candidate_to_params = @candidates.to_h do |candidate| + [candidate.id, candidate.params.to_h { |m| [m.name, m.value] }] + end + + { + project_id: 'project_id', + experiment_iid: ->(c) { c.experiment.iid }, + candidate_iid: 'internal_id', + name: 'name', + external_id: 'eid', + start_time: 'start_time', + end_time: 'end_time', + **param_names.index_with { |name| ->(c) { candidate_to_params.dig(c.id, name) } }, + **metric_names.index_with { |name| ->(c) { candidate_to_metrics.dig(c.id, name) } } + } + end + + def columns_names(&selector) + @candidates.flat_map(&selector).map(&:name).uniq + end + end +end diff --git a/spec/presenters/ml/candidates_csv_presenter_spec.rb b/spec/presenters/ml/candidates_csv_presenter_spec.rb new file mode 100644 index 0000000000000..fea00565859a1 --- /dev/null +++ b/spec/presenters/ml/candidates_csv_presenter_spec.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Ml::CandidatesCsvPresenter, feature_category: :mlops do + # rubocop:disable RSpec/FactoryBot/AvoidCreate + let_it_be(:project) { create(:project, :repository) } + let_it_be(:experiment) { create(:ml_experiments, user_id: project.creator, project: project) } + + let_it_be(:candidate0) do + create(:ml_candidates, experiment: experiment, user: project.creator, + project: project, start_time: 1234, end_time: 5678).tap do |c| + c.params.create!([{ name: 'param1', value: 'p1' }, { name: 'param2', value: 'p2' }]) + c.metrics.create!( + [{ name: 'metric1', value: 0.1 }, { name: 'metric2', value: 0.2 }, { name: 'metric3', value: 0.3 }] + ) + end + end + + let_it_be(:candidate1) do + create(:ml_candidates, experiment: experiment, user: project.creator, name: 'candidate1', + project: project, start_time: 1111, end_time: 2222).tap do |c| + c.params.create([{ name: 'param2', value: 'p3' }, { name: 'param3', value: 'p4' }]) + c.metrics.create!(name: 'metric3', value: 0.4) + end + end + # rubocop:enable RSpec/FactoryBot/AvoidCreate + + describe '.present' do + subject { described_class.new(::Ml::Candidate.where(id: [candidate0.id, candidate1.id])).present } + + it 'generates header row correctly' do + expected_header = %w[project_id experiment_iid candidate_iid name external_id start_time end_time param1 param2 + param3 metric1 metric2 metric3].join(',') + header = subject.split("\n")[0] + + expect(header).to eq(expected_header) + end + + it 'generates the first row correctly' do + expected_row = [ + candidate0.project_id, + 1, # experiment.iid + 1, # candidate0.internal_id + '', # candidate0 has no name, column is empty + candidate0.eid, + candidate0.start_time, + candidate0.end_time, + candidate0.params[0].value, + candidate0.params[1].value, + '', # candidate0 has no param3, column is empty + candidate0.metrics[0].value, + candidate0.metrics[1].value, + candidate0.metrics[2].value + ].map(&:to_s) + + row = subject.split("\n")[1].split(",") + + expect(row).to match_array(expected_row) + end + + it 'generates the second row correctly' do + expected_row = [ + candidate1.project_id, + 1, # experiment.iid + 2, # candidate1.internal_id + 'candidate1', + candidate1.eid, + candidate1.start_time, + candidate1.end_time, + '', # candidate1 has no param1, column is empty + candidate1.params[0].value, + candidate1.params[1].value, + '', # candidate1 has no metric1, column is empty + '', # candidate1 has no metric2, column is empty + candidate1.metrics[0].value + ].map(&:to_s) + + row = subject.split("\n")[2].split(",") + + expect(row).to match_array(expected_row) + end + end +end diff --git a/spec/requests/projects/ml/experiments_controller_spec.rb b/spec/requests/projects/ml/experiments_controller_spec.rb index 9586f91fe5e90..5a8496a250a48 100644 --- a/spec/requests/projects/ml/experiments_controller_spec.rb +++ b/spec/requests/projects/ml/experiments_controller_spec.rb @@ -122,109 +122,143 @@ end describe 'GET show' do - it 'renders the template' do - show_experiment - - expect(response).to render_template('projects/ml/experiments/show') - end + describe 'html' do + it 'renders the template' do + show_experiment - describe 'pagination' do - let_it_be(:candidates) do - create_list(:ml_candidates, 5, experiment: experiment).tap do |c| - c.first.metrics.create!(name: 'metric1', value: 0.3) - c[1].metrics.create!(name: 'metric1', value: 0.2) - c.last.metrics.create!(name: 'metric1', value: 0.6) - end + expect(response).to render_template('projects/ml/experiments/show') end - let(:params) { basic_params.merge(id: experiment.iid) } + describe 'pagination' do + let_it_be(:candidates) do + create_list(:ml_candidates, 5, experiment: experiment).tap do |c| + c.first.metrics.create!(name: 'metric1', value: 0.3) + c[1].metrics.create!(name: 'metric1', value: 0.2) + c.last.metrics.create!(name: 'metric1', value: 0.6) + end + end - before do - stub_const("Projects::Ml::ExperimentsController::MAX_CANDIDATES_PER_PAGE", 2) + let(:params) { basic_params.merge(id: experiment.iid) } - show_experiment - end + before do + stub_const("Projects::Ml::ExperimentsController::MAX_CANDIDATES_PER_PAGE", 2) - it 'fetches only MAX_CANDIDATES_PER_PAGE candidates' do - expect(assigns(:candidates).size).to eq(2) - end + show_experiment + end - it 'paginates' do - received = assigns(:page_info) + it 'fetches only MAX_CANDIDATES_PER_PAGE candidates' do + expect(assigns(:candidates).size).to eq(2) + end - expect(received).to include({ - has_next_page: true, - has_previous_page: false, - start_cursor: nil - }) - end + it 'paginates' do + received = assigns(:page_info) - context 'when order by metric' do - let(:params) do - { - order_by: "metric1", - order_by_type: "metric", - sort: "desc" - } + expect(received).to include({ + has_next_page: true, + has_previous_page: false, + start_cursor: nil + }) end - it 'paginates', :aggregate_failures do - page = assigns(:candidates) + context 'when order by metric' do + let(:params) do + { + order_by: "metric1", + order_by_type: "metric", + sort: "desc" + } + end - expect(page.first).to eq(candidates.last) - expect(page.last).to eq(candidates.first) + it 'paginates', :aggregate_failures do + page = assigns(:candidates) - new_params = params.merge(cursor: assigns(:page_info)[:end_cursor]) + expect(page.first).to eq(candidates.last) + expect(page.last).to eq(candidates.first) - show_experiment(new_params) + new_params = params.merge(cursor: assigns(:page_info)[:end_cursor]) - new_page = assigns(:candidates) + show_experiment(new_params: new_params) - expect(new_page.first).to eq(candidates[1]) + new_page = assigns(:candidates) + + expect(new_page.first).to eq(candidates[1]) + end end end - end - describe 'search' do - let(:params) do - basic_params.merge( - name: 'some_name', - orderBy: 'name', - orderByType: 'metric', - sort: 'asc', - invalid: 'invalid' - ) + describe 'search' do + let(:params) do + basic_params.merge( + name: 'some_name', + orderBy: 'name', + orderByType: 'metric', + sort: 'asc', + invalid: 'invalid' + ) + end + + it 'formats and filters the parameters' do + expect(Projects::Ml::CandidateFinder).to receive(:new).and_call_original do |exp, params| + expect(params.to_h).to include({ + name: 'some_name', + order_by: 'name', + order_by_type: 'metric', + sort: 'asc' + }) + end + + show_experiment + end end - it 'formats and filters the parameters' do - expect(Projects::Ml::CandidateFinder).to receive(:new).and_call_original do |exp, params| - expect(params.to_h).to include({ - name: 'some_name', - order_by: 'name', - order_by_type: 'metric', - sort: 'asc' - }) + it 'does not perform N+1 sql queries' do + control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment } + + create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment) + + expect { show_experiment }.not_to exceed_all_query_limit(control_count) + end + + describe '404' do + before do + show_experiment end - show_experiment + it_behaves_like '404 if experiment does not exist' + it_behaves_like '404 if feature flag disabled' end end - it 'does not perform N+1 sql queries' do - control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment } + describe 'csv' do + it 'responds with :ok', :aggregate_failures do + show_experiment_csv - create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment) + expect(response).to have_gitlab_http_status(:ok) + expect(response.headers['Content-Type']).to eq('text/csv; charset=utf-8') + end - expect { show_experiment }.not_to exceed_all_query_limit(control_count) - end + it 'calls the presenter' do + allow(::Ml::CandidatesCsvPresenter).to receive(:new).and_call_original - describe '404' do - before do - show_experiment + show_experiment_csv + end + + it 'does not perform N+1 sql queries' do + control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment_csv } + + create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment) + + expect { show_experiment_csv }.not_to exceed_all_query_limit(control_count) end - it_behaves_like '404 if experiment does not exist' - it_behaves_like '404 if feature flag disabled' + describe '404' do + before do + show_experiment_csv + end + + it_behaves_like '404 if experiment does not exist' + it_behaves_like '404 if feature flag disabled' + end end end @@ -253,8 +287,12 @@ private - def show_experiment(new_params = nil) - get project_ml_experiment_path(project, experiment_iid), params: new_params || params + def show_experiment(new_params: nil, format: :html) + get project_ml_experiment_path(project, experiment_iid, format: format), params: new_params || params + end + + def show_experiment_csv + show_experiment(format: :csv) end def list_experiments(new_params = nil) -- GitLab