From 8622a816cd7cfc298cce6e28d79525dbf80de204 Mon Sep 17 00:00:00 2001
From: Nicolas Dular <ndular@gitlab.com>
Date: Fri, 22 Mar 2024 16:51:47 +0000
Subject: [PATCH] Fix formatting of issue summaries

In some summaries, we found random numbers within tags or closing the
summary with """. This was due to the format we requested from the LLM.

With the new prompt we separate each comment within a <comment> tag,
remove the random number and the """ at the end of the prompt.

Changelog: changed
EE: true
---
 .../llm/chain/tools/summarize_comments/executor.rb | 13 ++++++-------
 .../summarize_comments/prompts/anthropic_spec.rb   | 14 +++++++-------
 .../summarize_comments/prompts/vertex_ai_spec.rb   | 14 +++++++-------
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
index 3293807e5172..13d4d977b2b0 100644
--- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
@@ -30,16 +30,16 @@ class Executor < Tool
               Utils::Prompt.as_system(
                 <<~PROMPT
                   You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-                  Comments are between two identical sets of 3-digit numbers surrounded by < > sign.
+                  Each comment is wrapped in a <comment> tag.
 
-                  <%<num>s>
                   %<notes_content>s
-                  <%<num>s>
 
                   Desired markdown format:
                   **<summary_title>**
-                  <bullet_points>
-                  """
+                  - <bullet_point>
+                  - <bullet_point>
+                  - <bullet_point>
+                  - ...
 
                   Focus on extracting information related to one another and that are the majority of the content.
                   Ignore phrases that are not connected to others.
@@ -57,7 +57,6 @@ def perform(&block)
               content = if notes.exists?
                           notes_content = notes_to_summarize(notes) # rubocop: disable CodeReuse/ActiveRecord
                           options[:notes_content] = notes_content
-                          options[:num] = Random.rand(100..999)
 
                           if options[:raw_ai_response]
                             request(&block)
@@ -87,7 +86,7 @@ def notes_to_summarize(notes)
 
                   break notes_content if notes_content.size + note[1].size >= input_content_limit
 
-                  notes_content << note[1]
+                  notes_content << (format("<comment>%<note>s</comment>", note: note[1]))
                 end
               end
 
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
index 2613fe8b3598..092d4f8d139b 100644
--- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
@@ -5,7 +5,7 @@
 RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic, feature_category: :duo_chat do
   describe '.prompt' do
     it 'returns prompt' do
-      prompt = described_class.prompt({ notes_content: 'foo', num: 123 })[:prompt]
+      prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt]
 
       expect(prompt).to include('Human:')
       expect(prompt).to include('Assistant:')
@@ -13,16 +13,16 @@
       expect(prompt).to include(
         <<~PROMPT
           You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-          Comments are between two identical sets of 3-digit numbers surrounded by < > sign.
+          Each comment is wrapped in a <comment> tag.
 
-          <123>
-          foo
-          <123>
+          <comment>foo</comment>
 
           Desired markdown format:
           **<summary_title>**
-          <bullet_points>
-          """
+          - <bullet_point>
+          - <bullet_point>
+          - <bullet_point>
+          - ...
 
           Focus on extracting information related to one another and that are the majority of the content.
           Ignore phrases that are not connected to others.
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb
index 32bef186e4bf..1033e17a1c1c 100644
--- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb
@@ -5,21 +5,21 @@
 RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::VertexAi, feature_category: :duo_chat do
   describe '.prompt' do
     it 'returns prompt' do
-      prompt = described_class.prompt({ notes_content: 'foo', num: 123 })[:prompt]
+      prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt]
 
       expect(prompt).to include(
         <<~PROMPT
           You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-          Comments are between two identical sets of 3-digit numbers surrounded by < > sign.
+          Each comment is wrapped in a <comment> tag.
 
-          <123>
-          foo
-          <123>
+          <comment>foo</comment>
 
           Desired markdown format:
           **<summary_title>**
-          <bullet_points>
-          """
+          - <bullet_point>
+          - <bullet_point>
+          - <bullet_point>
+          - ...
 
           Focus on extracting information related to one another and that are the majority of the content.
           Ignore phrases that are not connected to others.
-- 
GitLab