From 8622a816cd7cfc298cce6e28d79525dbf80de204 Mon Sep 17 00:00:00 2001 From: Nicolas Dular <ndular@gitlab.com> Date: Fri, 22 Mar 2024 16:51:47 +0000 Subject: [PATCH] Fix formatting of issue summaries In some summaries, we found random numbers within tags or closing the summary with """. This was due to the format we requested from the LLM. With the new prompt we separate each comment within a <comment> tag, remove the random number and the """ at the end of the prompt. Changelog: changed EE: true --- .../llm/chain/tools/summarize_comments/executor.rb | 13 ++++++------- .../summarize_comments/prompts/anthropic_spec.rb | 14 +++++++------- .../summarize_comments/prompts/vertex_ai_spec.rb | 14 +++++++------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb index 3293807e5172..13d4d977b2b0 100644 --- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb +++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb @@ -30,16 +30,16 @@ class Executor < Tool Utils::Prompt.as_system( <<~PROMPT You are an assistant that extracts the most important information from the comments in maximum 10 bullet points. - Comments are between two identical sets of 3-digit numbers surrounded by < > sign. + Each comment is wrapped in a <comment> tag. - <%<num>s> %<notes_content>s - <%<num>s> Desired markdown format: **<summary_title>** - <bullet_points> - """ + - <bullet_point> + - <bullet_point> + - <bullet_point> + - ... Focus on extracting information related to one another and that are the majority of the content. Ignore phrases that are not connected to others. @@ -57,7 +57,6 @@ def perform(&block) content = if notes.exists? notes_content = notes_to_summarize(notes) # rubocop: disable CodeReuse/ActiveRecord options[:notes_content] = notes_content - options[:num] = Random.rand(100..999) if options[:raw_ai_response] request(&block) @@ -87,7 +86,7 @@ def notes_to_summarize(notes) break notes_content if notes_content.size + note[1].size >= input_content_limit - notes_content << note[1] + notes_content << (format("<comment>%<note>s</comment>", note: note[1])) end end diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb index 2613fe8b3598..092d4f8d139b 100644 --- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb @@ -5,7 +5,7 @@ RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic, feature_category: :duo_chat do describe '.prompt' do it 'returns prompt' do - prompt = described_class.prompt({ notes_content: 'foo', num: 123 })[:prompt] + prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt] expect(prompt).to include('Human:') expect(prompt).to include('Assistant:') @@ -13,16 +13,16 @@ expect(prompt).to include( <<~PROMPT You are an assistant that extracts the most important information from the comments in maximum 10 bullet points. - Comments are between two identical sets of 3-digit numbers surrounded by < > sign. + Each comment is wrapped in a <comment> tag. - <123> - foo - <123> + <comment>foo</comment> Desired markdown format: **<summary_title>** - <bullet_points> - """ + - <bullet_point> + - <bullet_point> + - <bullet_point> + - ... Focus on extracting information related to one another and that are the majority of the content. Ignore phrases that are not connected to others. diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb index 32bef186e4bf..1033e17a1c1c 100644 --- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb @@ -5,21 +5,21 @@ RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::VertexAi, feature_category: :duo_chat do describe '.prompt' do it 'returns prompt' do - prompt = described_class.prompt({ notes_content: 'foo', num: 123 })[:prompt] + prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt] expect(prompt).to include( <<~PROMPT You are an assistant that extracts the most important information from the comments in maximum 10 bullet points. - Comments are between two identical sets of 3-digit numbers surrounded by < > sign. + Each comment is wrapped in a <comment> tag. - <123> - foo - <123> + <comment>foo</comment> Desired markdown format: **<summary_title>** - <bullet_points> - """ + - <bullet_point> + - <bullet_point> + - <bullet_point> + - ... Focus on extracting information related to one another and that are the majority of the content. Ignore phrases that are not connected to others. -- GitLab