From e1d788af0d0f2c742ec209c75605f23371a9ccff Mon Sep 17 00:00:00 2001
From: Tian Gao <tgao@gitlab.com>
Date: Wed, 2 Oct 2024 22:13:29 +0000
Subject: [PATCH] Migrate prompt from rails to AIGW for anthropic

In this MR we migrate params for context_block, libraries_block,
user_instruction block to AIGW.
---
 .../code_generation/ai_gateway_messages.rb    |  66 ++++++-
 .../ai_gateway_messages_spec.rb               |  36 ++++
 ...mples.rb => ai_gateway_shared_examples.rb} | 177 +++++++++++++++++-
 .../anthropic_messages_spec.rb                |   4 +-
 .../tasks/code_generation_spec.rb             |  90 +++++----
 ee/spec/requests/api/code_suggestions_spec.rb |   8 +-
 6 files changed, 324 insertions(+), 57 deletions(-)
 create mode 100644 ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb
 rename ee/spec/lib/code_suggestions/prompts/code_generation/{anthropic_shared_examples.rb => ai_gateway_shared_examples.rb} (83%)

diff --git a/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb b/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb
index 176c9f5237f0..29a42f4d9e0d 100644
--- a/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb
+++ b/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb
@@ -12,7 +12,6 @@ class AiGatewayMessages < CodeSuggestions::Prompts::Base
         # response time grows with prompt size, so we don't use upper limit size of prompt window
         MAX_INPUT_CHARS = 50000
         GATEWAY_PROMPT_VERSION = 3
-        CONTENT_TYPES = { file: 'file', snippet: 'snippet' }.freeze
 
         def request_params
           {
@@ -37,7 +36,10 @@ def request_params
         def code_generation_enhancer
           {
             **examples_section_params,
-            **existing_code_block_params
+            **existing_code_block_params,
+            **context_block_params,
+            **libraries_block_params,
+            **user_instruction_params
           }
         end
 
@@ -50,8 +52,6 @@ def examples_section_params
         end
 
         def existing_code_block_params
-          return {} unless params[:prefix].present?
-
           trimmed_prefix = prefix.to_s.last(MAX_INPUT_CHARS)
           trimmed_suffix = suffix.to_s.first(MAX_INPUT_CHARS - trimmed_prefix.size)
 
@@ -60,6 +60,64 @@ def existing_code_block_params
             trimmed_suffix: trimmed_suffix
           }
         end
+
+        def context_block_params
+          related_files = []
+          related_snippets = []
+
+          params[:context]&.each do |context|
+            if context[:type] == ::Ai::AdditionalContext::CODE_SUGGESTIONS_CONTEXT_TYPES[:file]
+              related_files << <<~FILE_CONTENT
+              <file_content file_name="#{context[:name]}">
+              #{context[:content]}
+              </file_content>
+              FILE_CONTENT
+            elsif context[:type] == ::Ai::AdditionalContext::CODE_SUGGESTIONS_CONTEXT_TYPES[:snippet]
+              related_snippets << <<~SNIPPET_CONTENT
+              <snippet_content name="#{context[:name]}">
+              #{context[:content]}
+              </snippet_content>
+              SNIPPET_CONTENT
+            end
+          end
+
+          {
+            related_files: related_files,
+            related_snippets: related_snippets
+          }
+        end
+
+        def libraries_block_params
+          if libraries.any?
+            Gitlab::InternalEvents.track_event(
+              'include_repository_xray_data_into_code_generation_prompt',
+              project: params[:project],
+              namespace: params[:project]&.namespace,
+              user: params[:current_user]
+            )
+          end
+
+          { libraries: libraries }
+        end
+
+        def libraries
+          return [] unless xray_report
+
+          xray_report.libs.map { |l| l['name'] } # rubocop:disable Rails/Pluck -- libs is an array
+        end
+        strong_memoize_attr :libraries
+
+        def xray_report
+          ::Projects::XrayReport.for_project(params[:project]).for_lang(language.x_ray_lang).first
+        end
+        strong_memoize_attr :xray_report
+
+        def user_instruction_params
+          instruction = params[:instruction]&.instruction.presence ||
+            'Generate the best possible code based on instructions.'
+
+          { user_instruction: instruction }
+        end
       end
     end
   end
diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb
new file mode 100644
index 000000000000..ec60784bcda0
--- /dev/null
+++ b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+require_relative 'ai_gateway_shared_examples'
+
+RSpec.describe CodeSuggestions::Prompts::CodeGeneration::AiGatewayMessages, feature_category: :code_suggestions do
+  let(:prompt_version) { 3 }
+
+  it_behaves_like 'code generation AI Gateway request params' do
+    def expected_request_params
+      {
+        prompt_components: [
+          {
+            type: 'code_editor_generation',
+            payload: {
+              file_name: expected_file_name,
+              content_above_cursor: expected_content_above_cursor,
+              content_below_cursor: expected_content_below_cursor,
+              language_identifier: expected_language_identifier,
+              prompt_id: 'code_suggestions/generations',
+              prompt_enhancer: {
+                examples_array: expected_examples_array,
+                trimmed_prefix: expected_trimmed_prefix,
+                trimmed_suffix: expected_trimmed_suffix,
+                related_files: expected_related_files,
+                related_snippets: expected_related_snippets,
+                libraries: expected_libraries,
+                user_instruction: expected_user_instruction
+              }
+            }
+          }
+        ]
+      }
+    end
+  end
+end
diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb
similarity index 83%
rename from ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb
rename to ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb
index cf7a4a23cc1b..87274e8de721 100644
--- a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb
+++ b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
-RSpec.shared_examples 'anthropic prompt' do
+# AIGW v2 api for code generation receives prompt
+RSpec.shared_examples 'code generation AI Gateway request prompt' do
   let(:language) { instance_double(CodeSuggestions::ProgrammingLanguage, x_ray_lang: x_ray_lang) }
   let(:language_name) { 'Go' }
   let(:x_ray_lang) { nil }
@@ -316,19 +317,19 @@
       context 'when context is available' do
         let(:main_go_content) do
           <<~CONTENT
-          package main
+            package main
 
-          func main()
-            fullName("John", "Doe")
-          }
+            func main()
+              fullName("John", "Doe")
+            }
           CONTENT
         end
 
         let(:full_name_func_content) do
           <<~CONTENT
-          func fullName(first, last string) {
-            fmt.Println(first, last)
-          }
+            func fullName(first, last string) {
+              fmt.Println(first, last)
+            }
           CONTENT
         end
 
@@ -615,3 +616,163 @@
     end
   end
 end
+
+# AIGW v3 api for code generation receives params
+RSpec.shared_examples 'code generation AI Gateway request params' do
+  let_it_be(:current_user) { create(:user) }
+  let_it_be(:project) { create(:project) }
+
+  let(:file_name) { 'main.go' }
+  let(:content_above_cursor) { "package main\n\nimport \"fmt\"\n\nfunc main() {\n" }
+  let(:content_below_cursor) { "func test() {\n" }
+  let(:comment) { 'My comment instructions' }
+  let(:instruction) { instance_double(CodeSuggestions::Instruction, instruction: comment, trigger_type: 'comment') }
+  let(:examples) { [{ example: 'func hello() {', response: 'func hello() {<new_code>fmt.Println("hello")' }] }
+
+  let(:context) do
+    [
+      { type: 'file', name: 'main.go', content:
+        <<~CONTENT
+          package main
+
+          func main()
+            fullName("John", "Doe")
+          }
+        CONTENT
+      },
+      { type: 'snippet', name: 'fullName', content:
+        <<~CONTENT
+          func fullName(first, last string) {
+            fmt.Println(first, last)
+          }
+        CONTENT
+      }
+    ]
+  end
+
+  let(:current_file_params) do
+    {
+      file_name: file_name,
+      content_above_cursor: content_above_cursor,
+      content_below_cursor: content_below_cursor
+    }
+  end
+
+  let(:params) do
+    {
+      current_user: current_user,
+      project: project,
+      instruction: instruction,
+      current_file: current_file_params,
+      context: context
+    }
+  end
+
+  subject { described_class.new(params) }
+
+  describe '#request_params' do
+    context 'when all parameters are present' do
+      before_all do
+        create(:xray_report, lang: 'go', project: project,
+          payload: { libs: [{ name: 'zlib (1.2.3)' }, { name: 'boost (2.0.0)' }] })
+      end
+
+      let(:expected_file_name) { file_name }
+      let(:expected_content_above_cursor) { content_above_cursor }
+      let(:expected_content_below_cursor) { content_below_cursor }
+      let(:expected_language_identifier) { 'Go' }
+      let(:expected_examples_array) { examples }
+      let(:expected_trimmed_prefix) { content_above_cursor }
+      let(:expected_trimmed_suffix) { content_below_cursor }
+      let(:expected_libraries) { ['zlib (1.2.3)', 'boost (2.0.0)'] }
+      let(:expected_user_instruction) { comment }
+
+      let(:expected_related_files) do
+        [
+          "<file_content file_name=\"main.go\">\npackage main\n\nfunc main()\n  " \
+            "fullName(\"John\", \"Doe\")\n}\n\n</file_content>\n"
+        ]
+      end
+
+      let(:expected_related_snippets) do
+        [
+          "<snippet_content name=\"fullName\">\nfunc fullName(first, last string) {\n  " \
+            "fmt.Println(first, last)\n}\n\n</snippet_content>\n"
+        ]
+      end
+
+      before do
+        allow_next_instance_of(CodeSuggestions::ProgrammingLanguage) do |instance|
+          allow(instance).to receive(:generation_examples).with(type: instruction.trigger_type).and_return(examples)
+        end
+      end
+
+      it 'returns expected request params' do
+        expect(subject.request_params).to eq(expected_request_params)
+      end
+
+      it 'tracks an X-Ray event' do
+        expect(Gitlab::InternalEvents).to receive(:track_event).with(
+          'include_repository_xray_data_into_code_generation_prompt',
+          project: project,
+          namespace: project.namespace,
+          user: current_user
+        )
+
+        subject.request_params
+      end
+
+      context 'when the prefix length exceeds the prompt limit' do
+        let(:limit) { 10 }
+        let(:expected_trimmed_prefix) { content_above_cursor.last(limit) }
+        let(:expected_trimmed_suffix) { '' }
+
+        before do
+          stub_const('CodeSuggestions::Prompts::CodeGeneration::AiGatewayMessages::MAX_INPUT_CHARS', limit)
+        end
+
+        it 'returns expected request params' do
+          expect(subject.request_params).to eq(expected_request_params)
+        end
+
+        context 'when the combined prefix and suffix length exceeds the prompt limit' do
+          let(:limit) { content_above_cursor.size + 5 }
+          let(:expected_trimmed_prefix) { content_above_cursor }
+          let(:expected_trimmed_suffix) { content_below_cursor.first(5) }
+
+          it 'returns expected request params' do
+            expect(subject.request_params).to eq(expected_request_params)
+          end
+        end
+      end
+    end
+
+    context 'when all parameters are blank' do
+      let(:instruction) { nil }
+      let(:context) { nil }
+      let(:current_file_params) { nil }
+
+      let(:expected_file_name) { '' }
+      let(:expected_content_above_cursor) { nil }
+      let(:expected_content_below_cursor) { nil }
+      let(:expected_language_identifier) { '' }
+      let(:expected_examples_array) { [] }
+      let(:expected_trimmed_prefix) { '' }
+      let(:expected_trimmed_suffix) { '' }
+      let(:expected_libraries) { [] }
+      let(:expected_user_instruction) { 'Generate the best possible code based on instructions.' }
+      let(:expected_related_files) { [] }
+      let(:expected_related_snippets) { [] }
+
+      it 'returns expected request params' do
+        expect(subject.request_params).to eq(expected_request_params)
+      end
+
+      it 'does not track an X-Ray event' do
+        expect(Gitlab::InternalEvents).not_to receive(:track_event)
+
+        subject.request_params
+      end
+    end
+  end
+end
diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb
index dda9504c2cae..00ac633ac452 100644
--- a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb
+++ b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb
@@ -1,12 +1,12 @@
 # frozen_string_literal: true
 
 require 'spec_helper'
-require_relative 'anthropic_shared_examples'
+require_relative 'ai_gateway_shared_examples'
 
 RSpec.describe CodeSuggestions::Prompts::CodeGeneration::AnthropicMessages, feature_category: :code_suggestions do
   let(:prompt_version) { 3 }
 
-  it_behaves_like 'anthropic prompt' do
+  it_behaves_like 'code generation AI Gateway request prompt' do
     def expected_prompt
       [
         { role: :system, content: system_prompt },
diff --git a/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb b/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb
index be242d1855a4..849816929a47 100644
--- a/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb
+++ b/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb
@@ -44,30 +44,34 @@
 
     let(:anthropic_request_params) do
       {
-        "prompt_components" => [
+        'prompt_components' => [
           {
-            "type" => "code_editor_generation",
-            "payload" => {
-              "file_name" => "test.py",
-              "content_above_cursor" => "some prefix",
-              "content_below_cursor" => "some suffix",
-              "language_identifier" => "Python",
-              "prompt_id" => "code_suggestions/generations",
-              "prompt_enhancer" => {
-                "examples_array" => [
+            'type' => 'code_editor_generation',
+            'payload' => {
+              'file_name' => 'test.py',
+              'content_above_cursor' => 'some prefix',
+              'content_below_cursor' => 'some suffix',
+              'language_identifier' => 'Python',
+              'prompt_id' => 'code_suggestions/generations',
+              'prompt_enhancer' => {
+                'examples_array' => [
                   {
-                    "example" => "class Project:\\n  def __init__(self, name, public):{{cursor}}\\n\\n ",
-                    "response" => "return self.visibility == 'PUBLIC'",
-                    "trigger_type" => "comment"
+                    'example' => 'class Project:\\n  def __init__(self, name, public):{{cursor}}\\n\\n ',
+                    'response' => "return self.visibility == 'PUBLIC'",
+                    'trigger_type' => 'comment'
                   },
                   {
-                    "example" => "# get the current user's name from the session data\\n{{cursor}}",
-                    "response" => "username = session['username']\\nreturn username",
-                    "trigger_type" => "comment"
+                    'example' => "# get the current user's name from the session data\\n{{cursor}}",
+                    'response' => "username = session['username']\\nreturn username",
+                    'trigger_type' => 'comment'
                   }
                 ],
-                "trimmed_prefix" => "some prefix",
-                "trimmed_suffix" => "some suffix"
+                'trimmed_prefix' => 'some prefix',
+                'trimmed_suffix' => 'some suffix',
+                'related_files' => '',
+                'related_snippets' => '',
+                'libraries' => '',
+                'user_instruction' => 'Generate the best possible code based on instructions.'
               }
             }
           }
@@ -94,40 +98,44 @@
       let(:endpoint_path) { 'v3/code/completions' }
       let(:expected_body) do
         {
-          "current_file" => {
-            "content_above_cursor" => "fix",
-            "content_below_cursor" => "som",
-            "file_name" => "test.py"
+          'current_file' => {
+            'content_above_cursor' => 'fix',
+            'content_below_cursor' => 'som',
+            'file_name' => 'test.py'
           },
-          "prompt_components" => [
+          'prompt_components' => [
             {
-              "payload" => {
-                "content_above_cursor" => "some prefix",
-                "content_below_cursor" => "some suffix",
-                "file_name" => "test.py",
-                "language_identifier" => "Python",
-                "prompt_enhancer" => {
-                  "examples_array" => [
+              'payload' => {
+                'content_above_cursor' => 'some prefix',
+                'content_below_cursor' => 'some suffix',
+                'file_name' => 'test.py',
+                'language_identifier' => 'Python',
+                'prompt_enhancer' => {
+                  'examples_array' => [
                     {
-                      "example" => "class Project:\\n  def __init__(self, name, public):{{cursor}}\\n\\n ",
-                      "response" => "return self.visibility == 'PUBLIC'",
-                      "trigger_type" => "comment"
+                      'example' => 'class Project:\\n  def __init__(self, name, public):{{cursor}}\\n\\n ',
+                      'response' => "return self.visibility == 'PUBLIC'",
+                      'trigger_type' => 'comment'
                     },
                     {
-                      "example" => "# get the current user's name from the session data\\n{{cursor}}",
-                      "response" => "username = session['username']\\nreturn username",
-                      "trigger_type" => "comment"
+                      'example' => "# get the current user's name from the session data\\n{{cursor}}",
+                      'response' => "username = session['username']\\nreturn username",
+                      'trigger_type' => 'comment'
                     }
                   ],
-                  "trimmed_prefix" => "some prefix",
-                  "trimmed_suffix" => "some suffix"
+                  'trimmed_prefix' => 'some prefix',
+                  'trimmed_suffix' => 'some suffix',
+                  'related_files' => '',
+                  'related_snippets' => '',
+                  'libraries' => '',
+                  'user_instruction' => 'Generate the best possible code based on instructions.'
                 },
-                "prompt_id" => "code_suggestions/generations"
+                'prompt_id' => 'code_suggestions/generations'
               },
-              "type" => "code_editor_generation"
+              'type' => 'code_editor_generation'
             }
           ],
-          "telemetry" => [{ "model_engine" => "anthropic" }]
+          'telemetry' => [{ 'model_engine' => 'anthropic' }]
         }
       end
 
diff --git a/ee/spec/requests/api/code_suggestions_spec.rb b/ee/spec/requests/api/code_suggestions_spec.rb
index d10bd3c9b9c1..bcf377b6b856 100644
--- a/ee/spec/requests/api/code_suggestions_spec.rb
+++ b/ee/spec/requests/api/code_suggestions_spec.rb
@@ -191,9 +191,13 @@ def is_even(n: int) ->
                     "trigger_type" => "comment"
                   }
                 ],
-                "trimmed_prefix" => "def is_even(n: int) ->\n# A " \
+                'trimmed_prefix' => "def is_even(n: int) ->\n# A " \
                   "function that outputs the first 20 fibonacci numbers\n",
-                "trimmed_suffix" => ""
+                'trimmed_suffix' => '',
+                'related_files' => [],
+                'related_snippets' => [],
+                'libraries' => [],
+                'user_instruction' => 'Generate the best possible code based on instructions.'
               }
             }
           }
-- 
GitLab