From 3052f0656789ed2ff37b1683e414b57211b27bcd Mon Sep 17 00:00:00 2001
From: Allison Browne <abrowne@gitlab.com>
Date: Fri, 5 Jul 2024 14:57:59 +0000
Subject: [PATCH] Add troubleshoot command to move RCA to Chat

---
 app/controllers/projects/jobs_controller.rb   |   2 +
 .../ee/projects/jobs_controller.rb            |  15 ++
 ee/app/models/ee/ci/build.rb                  |   2 +-
 .../wip/root_cause_analysis_duo.yml           |   9 +
 .../chain/tools/troubleshoot_job/executor.rb  | 137 ++++++++++++
 .../troubleshoot_job/prompts/anthropic.rb     |  28 +++
 ee/lib/gitlab/llm/completions/chat.rb         |   3 +-
 .../ee/projects/jobs_controller_spec.rb       |   8 +
 .../tools/troubleshoot_job/executor_spec.rb   | 202 ++++++++++++++++++
 .../prompts/anthropic_spec.rb                 |  49 +++++
 .../lib/gitlab/llm/completions/chat_spec.rb   |   8 +
 locale/gitlab.pot                             |   6 +
 12 files changed, 467 insertions(+), 2 deletions(-)
 create mode 100644 ee/app/controllers/ee/projects/jobs_controller.rb
 create mode 100644 ee/config/feature_flags/wip/root_cause_analysis_duo.yml
 create mode 100644 ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb
 create mode 100644 ee/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic.rb
 create mode 100644 ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb
 create mode 100644 ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic_spec.rb

diff --git a/app/controllers/projects/jobs_controller.rb b/app/controllers/projects/jobs_controller.rb
index 444da8881f34..cae07fc588e4 100644
--- a/app/controllers/projects/jobs_controller.rb
+++ b/app/controllers/projects/jobs_controller.rb
@@ -282,3 +282,5 @@ def push_ai_build_failure_cause
     push_frontend_feature_flag(:ai_build_failure_cause, @project)
   end
 end
+
+Projects::JobsController.prepend_mod_with('Projects::JobsController')
diff --git a/ee/app/controllers/ee/projects/jobs_controller.rb b/ee/app/controllers/ee/projects/jobs_controller.rb
new file mode 100644
index 000000000000..22fcd51e140f
--- /dev/null
+++ b/ee/app/controllers/ee/projects/jobs_controller.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+module EE
+  module Projects
+    module JobsController
+      extend ActiveSupport::Concern
+
+      prepended do
+        before_action only: [:show] do
+          push_frontend_feature_flag(:root_cause_analysis_duo, @current_user)
+        end
+      end
+    end
+  end
+end
diff --git a/ee/app/models/ee/ci/build.rb b/ee/app/models/ee/ci/build.rb
index 44fc34b4d4e8..6e7655ee87a1 100644
--- a/ee/app/models/ee/ci/build.rb
+++ b/ee/app/models/ee/ci/build.rb
@@ -28,7 +28,7 @@ module Build
       }.freeze
 
       prepended do
-        include Ai::Model
+        include ::Ai::Model
         include UsageStatistics
         include FromUnion
 
diff --git a/ee/config/feature_flags/wip/root_cause_analysis_duo.yml b/ee/config/feature_flags/wip/root_cause_analysis_duo.yml
new file mode 100644
index 000000000000..d15fff687c6b
--- /dev/null
+++ b/ee/config/feature_flags/wip/root_cause_analysis_duo.yml
@@ -0,0 +1,9 @@
+---
+name: root_cause_analysis_duo
+feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/441681
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/154040
+rollout_issue_url: https://gitlab.com/gitlab-com/gl-infra/production-engineering/-/issues/25552
+milestone: '17.2'
+group: group::pipeline execution
+type: wip
+default_enabled: false
diff --git a/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb
new file mode 100644
index 000000000000..066f9870f4b1
--- /dev/null
+++ b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb
@@ -0,0 +1,137 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Chain
+      module Tools
+        module TroubleshootJob
+          class Executor < SlashCommandTool
+            extend ::Gitlab::Utils::Override
+            include Concerns::AiDependent
+            include ::Gitlab::Utils::StrongMemoize
+
+            NAME = 'TroubleshootJob'
+            RESOURCE_NAME = 'Ci::Build'
+            HUMAN_NAME = 'Troubleshoot Job'
+            DESCRIPTION = 'Useful tool to troubleshoot job-related issues.'
+            EXAMPLE = "Question: My job is failing with an error. How can I fix it and figure out why it failed? " \
+              'Picked tools: "TroubleshootJob" tool. ' \
+              'Reason: The question is about troubleshooting a job issue. "TroubleshootJob" tool ' \
+              'can process this question.'
+            PROVIDER_PROMPT_CLASSES = {
+              ai_gateway: ::Gitlab::Llm::Chain::Tools::TroubleshootJob::Prompts::Anthropic,
+              anthropic: ::Gitlab::Llm::Chain::Tools::TroubleshootJob::Prompts::Anthropic
+            }.freeze
+
+            PROMPT_TEMPLATE = [
+              Utils::Prompt.as_system(
+                <<~PROMPT
+                  You are a Software engineer's or DevOps engineer's Assistant.
+                  You can explain the root cause of a GitLab CI verification job code failure from the job log.
+                  %<language_info>s
+                PROMPT
+              ),
+              Utils::Prompt.as_user(
+                <<~PROMPT.chomp
+                  Below are the job logs surrounded by the xml tag: <log>
+
+                  <log>
+                    %<selected_text>s
+                  <log>
+
+                  %<input>s
+
+                  Think step by step and try to determine why the job failed and explain it so that
+                  any Software engineer could understand the root cause of the failure.
+                  Please provide an example fix under the heading "Example Fix".
+                  Any code blocks in response should be formatted in markdown.
+                PROMPT
+              )
+            ].freeze
+
+            SLASH_COMMANDS = {
+              '/rca' => {
+                description: 'Troubleshoot a job based on the logs.',
+                instruction: 'Troubleshoot the job log.',
+                instruction_with_input: "Troubleshoot the job log. Input: %<input>s."
+              }
+            }.freeze
+
+            def self.slash_commands
+              SLASH_COMMANDS
+            end
+
+            override :perform
+            def perform
+              error_message = if disabled?
+                                _('This feature is not enabled yet.')
+                              elsif !job.is_a?(::Ci::Build)
+                                _('This command is used for troubleshooting jobs and can only be invoked from ' \
+                                  'a job log page.')
+                              elsif !job.failed?
+                                _('This command is used for troubleshooting jobs and can only be invoked from ' \
+                                  'a failed job log page.')
+                              end
+
+              return error_with_message(error_message) if error_message
+
+              super
+            end
+
+            private
+
+            def disabled?
+              Feature.disabled?(:root_cause_analysis_duo, context.current_user)
+            end
+
+            def selected_text_options
+              {
+                selected_text: job_log,
+                language_info: language_info
+              }
+            end
+
+            def job_log
+              # Line limit should be reworked based on
+              # the results of the prompt library and prompt engineering.
+              # 1000*100/4
+              # 1000 lines, ~100 char per line (can be more), ~4 tokens per character
+              # ~25000 tokens
+              job.trace.raw(last_lines: 1000)
+            end
+
+            def job
+              context.resource
+            end
+            strong_memoize_attr :job
+
+            def authorize
+              context.current_user.can?(:read_build_trace, job) &&
+                Utils::ChatAuthorizer.context(context: context).allowed?
+            end
+
+            def resource_name
+              RESOURCE_NAME
+            end
+
+            # Detects what code is used in the project
+            # example return value:  "The repository code is written in Go, Ruby, Makefile, Shell and Dockerfile."
+            def language_info
+              language_names = job.project.repository_languages.map(&:name)
+              return '' if language_names.empty?
+
+              last_language = language_names.pop
+              languages_comma_seperated = language_names.join(', ')
+
+              if language_names.size >= 1
+                "The repository code is written in #{languages_comma_seperated} and #{last_language}."
+              else
+                "The repository code is written in #{last_language}."
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic.rb b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic.rb
new file mode 100644
index 000000000000..3aafbf683b9e
--- /dev/null
+++ b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic.rb
@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Chain
+      module Tools
+        module TroubleshootJob
+          module Prompts
+            class Anthropic
+              include Concerns::AnthropicPrompt
+
+              def self.prompt(variables)
+                {
+                  prompt: Utils::Prompt.role_conversation(
+                    Utils::Prompt.format_conversation(
+                      Gitlab::Llm::Chain::Tools::TroubleshootJob::Executor::PROMPT_TEMPLATE,
+                      variables
+                    )
+                  )
+                }
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/completions/chat.rb b/ee/lib/gitlab/llm/completions/chat.rb
index a2379b1045b4..597c19f9871f 100644
--- a/ee/lib/gitlab/llm/completions/chat.rb
+++ b/ee/lib/gitlab/llm/completions/chat.rb
@@ -20,7 +20,8 @@ class Chat < Base
           ::Gitlab::Llm::Chain::Tools::ExplainCode,
           ::Gitlab::Llm::Chain::Tools::WriteTests,
           ::Gitlab::Llm::Chain::Tools::RefactorCode,
-          ::Gitlab::Llm::Chain::Tools::ExplainVulnerability
+          ::Gitlab::Llm::Chain::Tools::ExplainVulnerability,
+          ::Gitlab::Llm::Chain::Tools::TroubleshootJob
         ].freeze
 
         # @param [Gitlab::Llm::AiMessage] prompt_message - user question
diff --git a/ee/spec/controllers/ee/projects/jobs_controller_spec.rb b/ee/spec/controllers/ee/projects/jobs_controller_spec.rb
index 6054b828c62c..408e5cc87b52 100644
--- a/ee/spec/controllers/ee/projects/jobs_controller_spec.rb
+++ b/ee/spec/controllers/ee/projects/jobs_controller_spec.rb
@@ -6,6 +6,7 @@
   describe 'GET #show', :clean_gitlab_redis_shared_state do
     context 'when requesting JSON' do
       let_it_be(:user) { create(:user) }
+      let_it_be(:project) { create(:project) }
 
       let(:merge_request) { create(:merge_request, source_project: project) }
       let(:runner) { create(:ci_runner, :instance, description: 'Shared runner') }
@@ -21,6 +22,13 @@
         end
       end
 
+      it 'pushes the root_cause_analysis_duo feature flag' do
+        expect(controller).to receive(:push_frontend_feature_flag).with(:root_cause_analysis_duo, user)
+        expect(controller).to receive(:push_frontend_feature_flag).and_call_original
+
+        get_show(id: job.id, format: :json)
+      end
+
       context 'with shared runner that has quota' do
         let(:project) { create(:project, :repository, :private, shared_runners_enabled: true) }
 
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb
new file mode 100644
index 000000000000..373a267fd16c
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb
@@ -0,0 +1,202 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Chain::Tools::TroubleshootJob::Executor, feature_category: :continuous_integration do
+  let(:ai_request_double) { instance_double(Gitlab::Llm::Chain::Requests::Anthropic) }
+  let_it_be(:user) { create(:user) }
+  let_it_be(:project) { create(:project) }
+  let_it_be(:build) { create(:ci_build, :failed, :trace_live, project: project) }
+  let(:stream_response_handler) { nil }
+  let(:input) { 'user input' }
+  let(:options) { { input: input } }
+  let(:command) { nil }
+  let(:prompt_class) { Gitlab::Llm::Chain::Tools::TroubleshootJob::Prompts::Anthropic }
+
+  let(:context) do
+    Gitlab::Llm::Chain::GitlabContext.new(
+      current_user: user,
+      container: nil,
+      resource: build,
+      ai_request: ai_request_double
+    )
+  end
+
+  subject(:tool) do
+    described_class.new(
+      context: context,
+      options: options,
+      stream_response_handler: stream_response_handler,
+      command: command
+    )
+  end
+
+  RSpec.shared_context 'with repo languages' do
+    before do
+      allow(project).to receive(:repository_languages).and_return(
+        repository_languages.map do |lang|
+          instance_double(RepositoryLanguage, name: lang)
+        end
+      )
+    end
+  end
+
+  describe '#name' do
+    it 'returns the correct tool name' do
+      expect(described_class::NAME).to eq('TroubleshootJob')
+    end
+
+    it 'returns the correct human-readable name' do
+      expect(described_class::HUMAN_NAME).to eq('Troubleshoot Job')
+    end
+  end
+
+  describe '#description' do
+    it 'returns the correct description' do
+      expect(described_class::DESCRIPTION).to include('Useful tool to troubleshoot job-related issues.')
+    end
+  end
+
+  describe '#resource_name' do
+    it 'returns the correct description' do
+      expect(described_class::RESOURCE_NAME).to include('Ci::Build')
+    end
+  end
+
+  describe '#execute' do
+    context 'when the user is authorized' do
+      include_context 'with stubbed LLM authorizer', allowed: true
+
+      before do
+        allow(user).to receive(:can?).and_call_original
+        allow(user).to receive(:can?).with(:read_build, build).and_return(true)
+        allow(user).to receive(:can?).with(:read_build_trace, build).and_return(true)
+        allow(Gitlab::Llm::Chain::Utils::ChatAuthorizer).to receive_message_chain(:context, :allowed?).and_return(true)
+        allow(tool).to receive(:provider_prompt_class).and_return(prompt_class)
+      end
+
+      it 'performs the troubleshooting' do
+        expect(tool).to receive(:request).and_return('Troubleshooting response')
+        expect(tool.execute.content).to eq('Troubleshooting response')
+      end
+
+      context 'with repository languages' do
+        include_context 'with repo languages'
+
+        let(:repository_languages) { %w[C C++] }
+
+        it 'calls prompt with correct params' do
+          allow(tool).to receive(:provider_prompt_class).and_return(prompt_class)
+          expect(prompt_class).to receive(:prompt).with(a_hash_including(
+            input: input,
+            language_info: "The repository code is written in C and C++.",
+            selected_text: build.trace.raw
+          ))
+
+          tool.execute
+        end
+      end
+
+      context 'when the feature is disabled' do
+        before do
+          stub_feature_flags(root_cause_analysis_duo: false)
+        end
+
+        it 'returns an error message' do
+          expect(tool.execute.content).to eq('This feature is not enabled yet.')
+        end
+      end
+
+      context 'when the job is not failed' do
+        let(:build) { create(:ci_build, :running, project: project) }
+
+        it 'returns an error message' do
+          content = tool.execute.content
+
+          expect(content).to include('This command is used for troubleshooting jobs')
+          expect(content).to include('failed job')
+        end
+      end
+
+      context 'when the resource is not a Ci::Build' do
+        let(:context) do
+          Gitlab::Llm::Chain::GitlabContext.new(
+            current_user: user,
+            container: nil,
+            resource: project,
+            ai_request: nil
+          )
+        end
+
+        before do
+          allow(user).to receive(:can?).with(:read_build_trace, project).and_return(true)
+        end
+
+        it 'returns an error message' do
+          expect(tool.execute.content).to include('This command is used for troubleshooting jobs')
+        end
+      end
+    end
+
+    context 'when the user is not authorized' do
+      include_context 'with stubbed LLM authorizer', allowed: false
+
+      before do
+        allow(tool).to receive(:provider_prompt_class).and_return(
+          ::Gitlab::Llm::Chain::Tools::TroubleshootJob::Prompts::Anthropic
+        )
+        allow(user).to receive(:can?).with(:read_build_trace, build).and_return(false)
+      end
+
+      it 'returns an error message' do
+        expect(tool.execute.content).to include(
+          "you don't have access to them, or your session has expired."
+        )
+      end
+    end
+
+    describe '#job_log' do
+      context 'when the job is present and failed' do
+        it 'returns the job trace' do
+          expect(tool.send(:job_log)).to eq(build.trace.raw)
+        end
+      end
+    end
+
+    describe '#language_info' do
+      include_context 'with repo languages'
+
+      context 'without languages' do
+        let(:repository_languages) { [] }
+
+        it 'returns an empty string' do
+          expect(tool.send(:language_info)).to eq('')
+        end
+      end
+
+      context 'when more than one language' do
+        let(:repository_languages) { %w[Ruby JavaScript Go] }
+
+        it 'returns the correct language information' do
+          expect(tool.send(:language_info)).to eq('The repository code is written in Ruby, JavaScript and Go.')
+        end
+      end
+
+      context 'with two languages' do
+        let(:repository_languages) { %w[JavaScript Go] }
+
+        it 'returns the correct language information' do
+          expect(tool.send(:language_info)).to eq('The repository code is written in JavaScript and Go.')
+        end
+      end
+
+      context 'when one language' do
+        let(:repository_languages) { %w[Ruby] }
+
+        it 'returns the correct language information' do
+          expect(tool.send(:language_info)).to eq('The repository code is written in Ruby.')
+        end
+      end
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic_spec.rb
new file mode 100644
index 000000000000..88853adbcd4e
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/prompts/anthropic_spec.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Chain::Tools::TroubleshootJob::Prompts::Anthropic, feature_category: :duo_chat do
+  let(:user) { create(:user) }
+
+  describe '.prompt' do
+    it 'returns prompt', :aggregate_failures do
+      result = described_class.prompt(
+        input: 'question',
+        language_info: 'The repo is written in Ruby.',
+        selected_text: 'BUILD LOG'
+      )
+      prompt = result[:prompt]
+
+      expected_system_prompt = <<~PROMPT
+        You are a Software engineer's or DevOps engineer's Assistant.
+        You can explain the root cause of a GitLab CI verification job code failure from the job log.
+        The repo is written in Ruby.
+      PROMPT
+
+      expected_user_prompt = <<~PROMPT.chomp
+          Below are the job logs surrounded by the xml tag: <log>
+
+          <log>
+            BUILD LOG
+          <log>
+
+          question
+
+          Think step by step and try to determine why the job failed and explain it so that
+          any Software engineer could understand the root cause of the failure.
+          Please provide an example fix under the heading "Example Fix".
+          Any code blocks in response should be formatted in markdown.
+      PROMPT
+
+      expected_prompt = [
+        {
+          role: :system, content: expected_system_prompt
+        },
+        {
+          role: :user, content: expected_user_prompt
+        }
+      ]
+      expect(prompt).to eq(expected_prompt)
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
index 6aab877abf12..5409084dac6e 100644
--- a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
@@ -288,6 +288,14 @@
         end
       end
 
+      context 'when /rca is used' do
+        let(:command) { '/rca' }
+
+        it_behaves_like 'slash command execution' do
+          let(:expected_tool) { ::Gitlab::Llm::Chain::Tools::TroubleshootJob::Executor }
+        end
+      end
+
       context 'when /tests is used' do
         let(:command) { '/tests' }
 
diff --git a/locale/gitlab.pot b/locale/gitlab.pot
index f8c7f2b5b787..52dadf0ab7c3 100644
--- a/locale/gitlab.pot
+++ b/locale/gitlab.pot
@@ -54123,6 +54123,12 @@ msgstr ""
 msgid "This command is used for explaining vulnerabilities and can only be invoked from a vulnerability detail page."
 msgstr ""
 
+msgid "This command is used for troubleshooting jobs and can only be invoked from a failed job log page."
+msgstr ""
+
+msgid "This command is used for troubleshooting jobs and can only be invoked from a job log page."
+msgstr ""
+
 msgid "This comment changed after you started editing it. Review the %{startTag}updated comment%{endTag} to ensure information is not lost."
 msgstr ""
 
-- 
GitLab