From 28df9bdf5ff3c85c81b231bee7fe3d33f2616f50 Mon Sep 17 00:00:00 2001
From: Eduardo Bonet <ebonet@gitlab.com>
Date: Thu, 6 Apr 2023 10:03:57 +0000
Subject: [PATCH] Allows for fetching candidate data as csv

Adds a new format to ::Projects::Ml::ExperimentsController.show that
allows downloading the data as a csv file.

Changelog: added
---
 .../projects/ml/experiments_controller.rb     |  21 +-
 app/models/ml/candidate.rb                    |   2 +-
 app/presenters/ml/candidates_csv_presenter.rb |  49 +++++
 .../ml/candidates_csv_presenter_spec.rb       |  84 ++++++++
 .../ml/experiments_controller_spec.rb         | 186 +++++++++++-------
 5 files changed, 261 insertions(+), 81 deletions(-)
 create mode 100644 app/presenters/ml/candidates_csv_presenter.rb
 create mode 100644 spec/presenters/ml/candidates_csv_presenter_spec.rb

diff --git a/app/controllers/projects/ml/experiments_controller.rb b/app/controllers/projects/ml/experiments_controller.rb
index be333fff6e80f..dece3f98c57b9 100644
--- a/app/controllers/projects/ml/experiments_controller.rb
+++ b/app/controllers/projects/ml/experiments_controller.rb
@@ -27,13 +27,22 @@ def show
                         .transform_keys(&:underscore)
                         .permit(:name, :order_by, :sort, :order_by_type)
 
-        paginator = CandidateFinder
-                        .new(@experiment, find_params)
-                        .execute
-                        .keyset_paginate(cursor: params[:cursor], per_page: MAX_CANDIDATES_PER_PAGE)
+        finder = CandidateFinder.new(@experiment, find_params)
 
-        @candidates = paginator.records
-        @page_info = page_info(paginator)
+        respond_to do |format|
+          format.csv do
+            csv_data = ::Ml::CandidatesCsvPresenter.new(finder.execute).present
+
+            send_data(csv_data, type: 'text/csv; charset=utf-8', filename: 'candidates.csv')
+          end
+
+          format.html do
+            paginator = finder.execute.keyset_paginate(cursor: params[:cursor], per_page: MAX_CANDIDATES_PER_PAGE)
+
+            @candidates = paginator.records
+            @page_info = page_info(paginator)
+          end
+        end
       end
 
       def destroy
diff --git a/app/models/ml/candidate.rb b/app/models/ml/candidate.rb
index d8c466b9839f7..40758defafc6e 100644
--- a/app/models/ml/candidate.rb
+++ b/app/models/ml/candidate.rb
@@ -28,7 +28,7 @@ class Candidate < ApplicationRecord
       scope: :project,
       init: AtomicInternalId.project_init(self, :internal_id)
 
-    scope :including_relationships, -> { includes(:latest_metrics, :params, :user, :package) }
+    scope :including_relationships, -> { includes(:latest_metrics, :params, :user, :package, :project) }
     scope :by_name, ->(name) { where("ml_candidates.name LIKE ?", "%#{sanitize_sql_like(name)}%") } # rubocop:disable GitlabSecurity/SqlInjection
 
     scope :order_by_metric, ->(metric, direction) do
diff --git a/app/presenters/ml/candidates_csv_presenter.rb b/app/presenters/ml/candidates_csv_presenter.rb
new file mode 100644
index 0000000000000..8e2baf6bd28ec
--- /dev/null
+++ b/app/presenters/ml/candidates_csv_presenter.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module Ml
+  class CandidatesCsvPresenter
+    CANDIDATE_ASSOCIATIONS = [:latest_metrics, :params, :experiment].freeze
+    # This file size limit is mainly to avoid the generation to hog resources from the server. The value is arbitrary
+    # can be update once we have better insight into usage.
+    TARGET_FILESIZE = 2.megabytes
+
+    def initialize(candidates)
+      @candidates = candidates
+    end
+
+    def present
+      CsvBuilder.new(@candidates, headers, CANDIDATE_ASSOCIATIONS).render(TARGET_FILESIZE)
+    end
+
+    private
+
+    def headers
+      metric_names = columns_names(&:metrics)
+      param_names = columns_names(&:params)
+
+      candidate_to_metrics = @candidates.to_h do |candidate|
+        [candidate.id, candidate.latest_metrics.to_h { |m| [m.name, m.value] }]
+      end
+
+      candidate_to_params = @candidates.to_h do |candidate|
+        [candidate.id, candidate.params.to_h { |m| [m.name, m.value] }]
+      end
+
+      {
+        project_id: 'project_id',
+        experiment_iid: ->(c) { c.experiment.iid },
+        candidate_iid: 'internal_id',
+        name: 'name',
+        external_id: 'eid',
+        start_time: 'start_time',
+        end_time: 'end_time',
+        **param_names.index_with { |name| ->(c) { candidate_to_params.dig(c.id, name) } },
+        **metric_names.index_with { |name| ->(c) { candidate_to_metrics.dig(c.id, name) } }
+      }
+    end
+
+    def columns_names(&selector)
+      @candidates.flat_map(&selector).map(&:name).uniq
+    end
+  end
+end
diff --git a/spec/presenters/ml/candidates_csv_presenter_spec.rb b/spec/presenters/ml/candidates_csv_presenter_spec.rb
new file mode 100644
index 0000000000000..fea00565859a1
--- /dev/null
+++ b/spec/presenters/ml/candidates_csv_presenter_spec.rb
@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe ::Ml::CandidatesCsvPresenter, feature_category: :mlops do
+  # rubocop:disable RSpec/FactoryBot/AvoidCreate
+  let_it_be(:project) { create(:project, :repository) }
+  let_it_be(:experiment) { create(:ml_experiments, user_id: project.creator, project: project) }
+
+  let_it_be(:candidate0) do
+    create(:ml_candidates, experiment: experiment, user: project.creator,
+      project: project, start_time: 1234, end_time: 5678).tap do |c|
+      c.params.create!([{ name: 'param1', value: 'p1' }, { name: 'param2', value: 'p2' }])
+      c.metrics.create!(
+        [{ name: 'metric1', value: 0.1 }, { name: 'metric2', value: 0.2 }, { name: 'metric3', value: 0.3 }]
+      )
+    end
+  end
+
+  let_it_be(:candidate1) do
+    create(:ml_candidates, experiment: experiment, user: project.creator, name: 'candidate1',
+      project: project, start_time: 1111, end_time: 2222).tap do |c|
+      c.params.create([{ name: 'param2', value: 'p3' }, { name: 'param3', value: 'p4' }])
+      c.metrics.create!(name: 'metric3', value: 0.4)
+    end
+  end
+  # rubocop:enable RSpec/FactoryBot/AvoidCreate
+
+  describe '.present' do
+    subject { described_class.new(::Ml::Candidate.where(id: [candidate0.id, candidate1.id])).present }
+
+    it 'generates header row correctly' do
+      expected_header = %w[project_id experiment_iid candidate_iid name external_id start_time end_time param1 param2
+        param3 metric1 metric2 metric3].join(',')
+      header = subject.split("\n")[0]
+
+      expect(header).to eq(expected_header)
+    end
+
+    it 'generates the first row correctly' do
+      expected_row = [
+        candidate0.project_id,
+        1, # experiment.iid
+        1, # candidate0.internal_id
+        '', # candidate0 has no name, column is empty
+        candidate0.eid,
+        candidate0.start_time,
+        candidate0.end_time,
+        candidate0.params[0].value,
+        candidate0.params[1].value,
+        '', # candidate0 has no param3, column is empty
+        candidate0.metrics[0].value,
+        candidate0.metrics[1].value,
+        candidate0.metrics[2].value
+      ].map(&:to_s)
+
+      row = subject.split("\n")[1].split(",")
+
+      expect(row).to match_array(expected_row)
+    end
+
+    it 'generates the second row correctly' do
+      expected_row = [
+        candidate1.project_id,
+        1, # experiment.iid
+        2, # candidate1.internal_id
+        'candidate1',
+        candidate1.eid,
+        candidate1.start_time,
+        candidate1.end_time,
+        '', # candidate1 has no param1, column is empty
+        candidate1.params[0].value,
+        candidate1.params[1].value,
+        '', # candidate1 has no metric1, column is empty
+        '', # candidate1 has no metric2, column is empty
+        candidate1.metrics[0].value
+      ].map(&:to_s)
+
+      row = subject.split("\n")[2].split(",")
+
+      expect(row).to match_array(expected_row)
+    end
+  end
+end
diff --git a/spec/requests/projects/ml/experiments_controller_spec.rb b/spec/requests/projects/ml/experiments_controller_spec.rb
index 9586f91fe5e90..5a8496a250a48 100644
--- a/spec/requests/projects/ml/experiments_controller_spec.rb
+++ b/spec/requests/projects/ml/experiments_controller_spec.rb
@@ -122,109 +122,143 @@
   end
 
   describe 'GET show' do
-    it 'renders the template' do
-      show_experiment
-
-      expect(response).to render_template('projects/ml/experiments/show')
-    end
+    describe 'html' do
+      it 'renders the template' do
+        show_experiment
 
-    describe 'pagination' do
-      let_it_be(:candidates) do
-        create_list(:ml_candidates, 5, experiment: experiment).tap do |c|
-          c.first.metrics.create!(name: 'metric1', value: 0.3)
-          c[1].metrics.create!(name: 'metric1', value: 0.2)
-          c.last.metrics.create!(name: 'metric1', value: 0.6)
-        end
+        expect(response).to render_template('projects/ml/experiments/show')
       end
 
-      let(:params) { basic_params.merge(id: experiment.iid) }
+      describe 'pagination' do
+        let_it_be(:candidates) do
+          create_list(:ml_candidates, 5, experiment: experiment).tap do |c|
+            c.first.metrics.create!(name: 'metric1', value: 0.3)
+            c[1].metrics.create!(name: 'metric1', value: 0.2)
+            c.last.metrics.create!(name: 'metric1', value: 0.6)
+          end
+        end
 
-      before do
-        stub_const("Projects::Ml::ExperimentsController::MAX_CANDIDATES_PER_PAGE", 2)
+        let(:params) { basic_params.merge(id: experiment.iid) }
 
-        show_experiment
-      end
+        before do
+          stub_const("Projects::Ml::ExperimentsController::MAX_CANDIDATES_PER_PAGE", 2)
 
-      it 'fetches only MAX_CANDIDATES_PER_PAGE candidates' do
-        expect(assigns(:candidates).size).to eq(2)
-      end
+          show_experiment
+        end
 
-      it 'paginates' do
-        received = assigns(:page_info)
+        it 'fetches only MAX_CANDIDATES_PER_PAGE candidates' do
+          expect(assigns(:candidates).size).to eq(2)
+        end
 
-        expect(received).to include({
-          has_next_page: true,
-          has_previous_page: false,
-          start_cursor: nil
-        })
-      end
+        it 'paginates' do
+          received = assigns(:page_info)
 
-      context 'when order by metric' do
-        let(:params) do
-          {
-            order_by: "metric1",
-            order_by_type: "metric",
-            sort: "desc"
-          }
+          expect(received).to include({
+            has_next_page: true,
+            has_previous_page: false,
+            start_cursor: nil
+          })
         end
 
-        it 'paginates', :aggregate_failures do
-          page = assigns(:candidates)
+        context 'when order by metric' do
+          let(:params) do
+            {
+              order_by: "metric1",
+              order_by_type: "metric",
+              sort: "desc"
+            }
+          end
 
-          expect(page.first).to eq(candidates.last)
-          expect(page.last).to eq(candidates.first)
+          it 'paginates', :aggregate_failures do
+            page = assigns(:candidates)
 
-          new_params = params.merge(cursor: assigns(:page_info)[:end_cursor])
+            expect(page.first).to eq(candidates.last)
+            expect(page.last).to eq(candidates.first)
 
-          show_experiment(new_params)
+            new_params = params.merge(cursor: assigns(:page_info)[:end_cursor])
 
-          new_page = assigns(:candidates)
+            show_experiment(new_params: new_params)
 
-          expect(new_page.first).to eq(candidates[1])
+            new_page = assigns(:candidates)
+
+            expect(new_page.first).to eq(candidates[1])
+          end
         end
       end
-    end
 
-    describe 'search' do
-      let(:params) do
-        basic_params.merge(
-          name: 'some_name',
-          orderBy: 'name',
-          orderByType: 'metric',
-          sort: 'asc',
-          invalid: 'invalid'
-        )
+      describe 'search' do
+        let(:params) do
+          basic_params.merge(
+            name: 'some_name',
+            orderBy: 'name',
+            orderByType: 'metric',
+            sort: 'asc',
+            invalid: 'invalid'
+          )
+        end
+
+        it 'formats and filters the parameters' do
+          expect(Projects::Ml::CandidateFinder).to receive(:new).and_call_original do |exp, params|
+            expect(params.to_h).to include({
+              name: 'some_name',
+              order_by: 'name',
+              order_by_type: 'metric',
+              sort: 'asc'
+            })
+          end
+
+          show_experiment
+        end
       end
 
-      it 'formats and filters the parameters' do
-        expect(Projects::Ml::CandidateFinder).to receive(:new).and_call_original do |exp, params|
-          expect(params.to_h).to include({
-            name: 'some_name',
-            order_by: 'name',
-            order_by_type: 'metric',
-            sort: 'asc'
-          })
+      it 'does not perform N+1 sql queries' do
+        control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment }
+
+        create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment)
+
+        expect { show_experiment }.not_to exceed_all_query_limit(control_count)
+      end
+
+      describe '404' do
+        before do
+          show_experiment
         end
 
-        show_experiment
+        it_behaves_like '404 if experiment does not exist'
+        it_behaves_like '404 if feature flag disabled'
       end
     end
 
-    it 'does not perform N+1 sql queries' do
-      control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment }
+    describe 'csv' do
+      it 'responds with :ok', :aggregate_failures do
+        show_experiment_csv
 
-      create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment)
+        expect(response).to have_gitlab_http_status(:ok)
+        expect(response.headers['Content-Type']).to eq('text/csv; charset=utf-8')
+      end
 
-      expect { show_experiment }.not_to exceed_all_query_limit(control_count)
-    end
+      it 'calls the presenter' do
+        allow(::Ml::CandidatesCsvPresenter).to receive(:new).and_call_original
 
-    describe '404' do
-      before do
-        show_experiment
+        show_experiment_csv
+      end
+
+      it 'does not perform N+1 sql queries' do
+        control_count = ActiveRecord::QueryRecorder.new(skip_cached: false) { show_experiment_csv }
+
+        create_list(:ml_candidates, 2, :with_metrics_and_params, experiment: experiment)
+
+        expect { show_experiment_csv }.not_to exceed_all_query_limit(control_count)
       end
 
-      it_behaves_like '404 if experiment does not exist'
-      it_behaves_like '404 if feature flag disabled'
+      describe '404' do
+        before do
+          show_experiment_csv
+        end
+
+        it_behaves_like '404 if experiment does not exist'
+        it_behaves_like '404 if feature flag disabled'
+      end
     end
   end
 
@@ -253,8 +287,12 @@
 
   private
 
-  def show_experiment(new_params = nil)
-    get project_ml_experiment_path(project, experiment_iid), params: new_params || params
+  def show_experiment(new_params: nil, format: :html)
+    get project_ml_experiment_path(project, experiment_iid, format: format), params: new_params || params
+  end
+
+  def show_experiment_csv
+    show_experiment(format: :csv)
   end
 
   def list_experiments(new_params = nil)
-- 
GitLab