diff --git a/config/feature_flags/development/summarize_notes_with_anthropic.yml b/config/feature_flags/development/summarize_notes_with_anthropic.yml
deleted file mode 100644
index 145d75e2781560238c58a7a45b5a4bdb962e4cae..0000000000000000000000000000000000000000
--- a/config/feature_flags/development/summarize_notes_with_anthropic.yml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-name: summarize_notes_with_anthropic
-introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/134731
-rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/work_items/430196
-milestone: '16.6'
-type: development
-group: group::duo chat
-default_enabled: true
diff --git a/ee/lib/gitlab/llm/anthropic/client.rb b/ee/lib/gitlab/llm/anthropic/client.rb
index 74b97363d540f0879a6af53dd86302c91dba0682..73805ab1ba1923bc3eeeaf2cf81242989cd20b50 100644
--- a/ee/lib/gitlab/llm/anthropic/client.rb
+++ b/ee/lib/gitlab/llm/anthropic/client.rb
@@ -76,6 +76,24 @@ def messages_complete(messages:, **options)
           response
         end
 
+        def messages_stream(messages:, **options)
+          return unless enabled?
+
+          response_body = ""
+          perform_messages_request(messages: messages, options: options.merge(stream: true)) do |parsed_event|
+            response_body += parsed_event.dig('delta', 'text') if parsed_event.dig('delta', 'text')
+
+            yield parsed_event if block_given?
+          end
+          logger.info_or_debug(user, message: "Received response from Anthropic", response: response_body)
+
+          track_prompt_size(token_size(messages))
+          track_response_size(token_size(response_body))
+
+          response_body
+        end
+        traceable :messages_stream, name: 'Request to Anthropic', run_type: 'llm'
+
         private
 
         attr_reader :user, :logger, :tracking_context, :unit_primitive
@@ -109,7 +127,11 @@ def perform_messages_request(messages:, options:)
             timeout: timeout,
             allow_local_requests: true,
             stream_body: options.fetch(:stream, false)
-          )
+          ) do |fragment|
+            parse_sse_events(fragment).each do |parsed_event|
+              yield parsed_event if block_given?
+            end
+          end
 
           raise Gitlab::AiGateway::ForbiddenError if response.forbidden?
 
diff --git a/ee/lib/gitlab/llm/chain/requests/anthropic.rb b/ee/lib/gitlab/llm/chain/requests/anthropic.rb
index 1d65091ed74dde5046cddcd56b10b47220a3b929..bbf274d17287628114e6eb5089f3cdc8cd9825e3 100644
--- a/ee/lib/gitlab/llm/chain/requests/anthropic.rb
+++ b/ee/lib/gitlab/llm/chain/requests/anthropic.rb
@@ -7,8 +7,6 @@ module Requests
         class Anthropic < Base
           attr_reader :ai_client
 
-          TEMPERATURE = 0.1
-          STOP_WORDS = ["\n\nHuman", "Observation:"].freeze
           PROMPT_SIZE = 30_000
 
           def initialize(user, unit_primitive:, tracking_context: {})
@@ -20,15 +18,14 @@ def initialize(user, unit_primitive:, tracking_context: {})
 
           # TODO: unit primitive param is temporarily added to provide parity with ai_gateway-related method
           def request(prompt, unit_primitive: nil) # rubocop: disable Lint/UnusedMethodArgument -- added to provide parity with ai_gateway-related method
-            return unless prompt[:prompt]
+            return unless prompt[:messages]
 
-            ai_client.stream(
-              prompt: prompt[:prompt],
-              **default_options.merge(prompt.fetch(:options, {}))
+            ai_client.messages_stream(
+              **prompt
             ) do |data|
               logger.info(message: "Streaming error", error: data&.dig("error")) if data&.dig("error")
 
-              content = data&.dig("completion").to_s
+              content = data&.dig('delta', 'text').to_s
               yield content if block_given?
             end
           end
@@ -36,13 +33,6 @@ def request(prompt, unit_primitive: nil) # rubocop: disable Lint/UnusedMethodArg
           private
 
           attr_reader :user, :logger
-
-          def default_options
-            {
-              temperature: TEMPERATURE,
-              stop_sequences: STOP_WORDS
-            }
-          end
         end
       end
     end
diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
index 47ab3d802eb9188fb0f191583d33b929b2c41232..65a83bad1fecce18f38a40e2cc667dbfd93f6379 100644
--- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
+++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb
@@ -22,31 +22,39 @@ class Executor < SlashCommandTool
 
             PROVIDER_PROMPT_CLASSES = {
               ai_gateway: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic,
-              anthropic: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic,
-              vertex_ai: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::VertexAi
+              anthropic: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic
             }.freeze
 
+            SYSTEM_PROMPT = Utils::Prompt.as_system(
+              <<~PROMPT
+              You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
+              PROMPT
+            )
+
+            USER_PROMPT = Utils::Prompt.as_user(
+              <<~PROMPT
+              Each comment is wrapped in a <comment> tag.
+
+              Desired markdown format:
+              **<summary_title>**
+              - <bullet_point>
+              - <bullet_point>
+              - <bullet_point>
+              - ...
+
+              %<notes_content>s
+
+              Focus on extracting information related to one another and that are the majority of the content.
+              Ignore phrases that are not connected to others.
+              Do not specify what you are ignoring.
+              Do not answer questions.
+              PROMPT
+            )
+
             PROMPT_TEMPLATE = [
-              Utils::Prompt.as_system(
-                <<~PROMPT
-                  You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-                  Each comment is wrapped in a <comment> tag.
-
-                  %<notes_content>s
-
-                  Desired markdown format:
-                  **<summary_title>**
-                  - <bullet_point>
-                  - <bullet_point>
-                  - <bullet_point>
-                  - ...
-
-                  Focus on extracting information related to one another and that are the majority of the content.
-                  Ignore phrases that are not connected to others.
-                  Do not specify what you are ignoring.
-                  Do not answer questions.
-                PROMPT
-              )
+              SYSTEM_PROMPT,
+              USER_PROMPT,
+              Utils::Prompt.as_assistant("")
             ].freeze
 
             SLASH_COMMANDS = {
diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor_old.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor_old.rb
index 2039715eef38630725b9fefd63f8128dc65b1843..701c79440873560ba35bca4296133854c2fe66e6 100644
--- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor_old.rb
+++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/executor_old.rb
@@ -21,33 +21,34 @@ class ExecutorOld < Tool
               PROMPT
 
             PROVIDER_PROMPT_CLASSES = {
-              ai_gateway: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic,
-              anthropic: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic,
-              vertex_ai: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::VertexAi
+              ai_gateway: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::AnthropicOld,
+              anthropic: ::Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::AnthropicOld
             }.freeze
 
-            PROMPT_TEMPLATE = [
-              Utils::Prompt.as_system(
-                <<~PROMPT
-                  You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-                  Each comment is wrapped in a <comment> tag.
-
-                  %<notes_content>s
-
-                  Desired markdown format:
-                  **<summary_title>**
-                  - <bullet_point>
-                  - <bullet_point>
-                  - <bullet_point>
-                  - ...
-
-                  Focus on extracting information related to one another and that are the majority of the content.
-                  Ignore phrases that are not connected to others.
-                  Do not specify what you are ignoring.
-                  Do not answer questions.
-                PROMPT
-              )
-            ].freeze
+            SYSTEM_PROMPT = Utils::Prompt.as_system(
+              <<~PROMPT
+              You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
+              PROMPT
+            )
+            USER_PROMPT = Utils::Prompt.as_user(
+              <<~PROMPT
+              Each comment is wrapped in a <comment> tag.
+
+              Desired markdown format:
+              **<summary_title>**
+              - <bullet_point>
+              - <bullet_point>
+              - <bullet_point>
+              - ...
+
+              %<notes_content>s
+
+              Focus on extracting information related to one another and that are the majority of the content.
+              Ignore phrases that are not connected to others.
+              Do not specify what you are ignoring.
+              Do not answer questions.
+              PROMPT
+            )
 
             def perform(&)
               notes = NotesFinder.new(context.current_user, target: resource).execute.by_humans
@@ -55,7 +56,6 @@ def perform(&)
               content = if notes.exists?
                           notes_content = notes_to_summarize(notes)
                           options[:notes_content] = notes_content
-
                           if options[:raw_ai_response]
                             request(&)
                           else
@@ -77,11 +77,9 @@ def perform(&)
 
             def notes_to_summarize(notes)
               notes_content = +""
-              input_content_limit = provider_prompt_class::MAX_CHARACTERS - PROMPT_TEMPLATE.size
+              input_content_limit = provider_prompt_class::MAX_CHARACTERS - SYSTEM_PROMPT.size - USER_PROMPT.size
               notes.each_batch do |batch|
                 batch.pluck(:id, :note).each do |note| # rubocop: disable CodeReuse/ActiveRecord -- we need to pluck just id and note
-                  input_content_limit = provider_prompt_class::MAX_CHARACTERS
-
                   break notes_content if notes_content.size + note[1].size >= input_content_limit
 
                   notes_content << (format("<comment>%<note>s</comment>", note: note[1]))
diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic.rb
index 6b7d66f10a5251012d11818e1942cab8fcda8397..0c3182c62268f24de5c2039e427a9c8962d7aff3 100644
--- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic.rb
+++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic.rb
@@ -9,20 +9,18 @@ module Prompts
             class Anthropic
               include Concerns::AnthropicPrompt
 
-              OUTPUT_TOKEN_LIMIT = 2048
-
-              def self.prompt(options)
-                base_prompt = Utils::Prompt.no_role_text(
-                  ::Gitlab::Llm::Chain::Tools::SummarizeComments::Executor::PROMPT_TEMPLATE, options
+              def self.prompt(variables)
+                conversation = Utils::Prompt.role_conversation(
+                  Utils::Prompt.format_conversation(
+                    ::Gitlab::Llm::Chain::Tools::SummarizeComments::Executor::PROMPT_TEMPLATE,
+                    variables
+                  )
                 )
 
-                Requests::Anthropic.prompt(
-                  "\n\nHuman: #{base_prompt}\n\nAssistant:",
-                  options: {
-                    model: ::Gitlab::Llm::Anthropic::Client::DEFAULT_INSTANT_MODEL,
-                    max_tokens_to_sample: OUTPUT_TOKEN_LIMIT
-                  }
-                )
+                {
+                  prompt: conversation,
+                  options: { model: ::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET }
+                }
               end
             end
           end
diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old.rb
new file mode 100644
index 0000000000000000000000000000000000000000..72dbfecb81779928227dd9327978ecea4413cf15
--- /dev/null
+++ b/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Chain
+      module Tools
+        module SummarizeComments
+          module Prompts
+            class AnthropicOld
+              include Concerns::AnthropicPrompt
+
+              TEMPERATURE = 0.1
+
+              def self.prompt(variables)
+                {
+                  messages: Gitlab::Llm::Chain::Utils::Prompt.role_conversation(
+                    Gitlab::Llm::Chain::Utils::Prompt.format_conversation([
+                      ::Gitlab::Llm::Chain::Tools::SummarizeComments::ExecutorOld::USER_PROMPT,
+                      Utils::Prompt.as_assistant("")
+                    ], variables)
+                  ),
+                  system: Gitlab::Llm::Chain::Utils::Prompt.no_role_text(
+                    [::Gitlab::Llm::Chain::Tools::SummarizeComments::ExecutorOld::SYSTEM_PROMPT], variables),
+                  model: ::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET,
+                  temperature: TEMPERATURE
+                }
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai.rb b/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai.rb
deleted file mode 100644
index 175f75c38fc5cda2b6a57acc731ff0dd3e25babf..0000000000000000000000000000000000000000
--- a/ee/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Llm
-    module Chain
-      module Tools
-        module SummarizeComments
-          module Prompts
-            class VertexAi
-              include Concerns::VertexAiPrompt
-
-              OUTPUT_TOKEN_LIMIT = 1024
-
-              def self.prompt(options)
-                prompt = Utils::Prompt.no_role_text(
-                  ::Gitlab::Llm::Chain::Tools::SummarizeComments::Executor::PROMPT_TEMPLATE, options
-                )
-
-                {
-                  prompt: prompt,
-                  options: {
-                    max_output_tokens: OUTPUT_TOKEN_LIMIT
-                  }
-                }
-              end
-            end
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
index e31ccf3dc01469327c11a9afd9644ee177b6794f..7a49b5613832a772b72a17e87d150eb3554e4194 100644
--- a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
+++ b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
@@ -37,13 +37,8 @@ def execute
         private
 
         def ai_provider_request(user)
-          if Feature.enabled?(:summarize_notes_with_anthropic, user)
-            ::Gitlab::Llm::Chain::Requests::Anthropic.new(user,
-              unit_primitive: 'summarize_issue_discussions', tracking_context: tracking_context)
-          else
-            ::Gitlab::Llm::Chain::Requests::VertexAi.new(user,
-              unit_primitive: 'summarize_issue_discussions', tracking_context: tracking_context)
-          end
+          ::Gitlab::Llm::Chain::Requests::Anthropic.new(user,
+            unit_primitive: 'summarize_issue_discussions', tracking_context: tracking_context)
         end
 
         def issuable
diff --git a/ee/spec/lib/gitlab/llm/chain/concerns/ai_dependent_spec.rb b/ee/spec/lib/gitlab/llm/chain/concerns/ai_dependent_spec.rb
index f07d680f54ffd06c121a90cf9338b184961d06f5..4bfd32c7690ff047dfdaa59282e1d35bf426c64c 100644
--- a/ee/spec/lib/gitlab/llm/chain/concerns/ai_dependent_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/concerns/ai_dependent_spec.rb
@@ -27,6 +27,36 @@
       tool.prompt
     end
 
+    context "when calling summarize comments tool" do
+      let_it_be(:project) { create(:project) }
+      let_it_be(:issue) { create(:issue, project: project) }
+      let_it_be(:note) do
+        create(:note_on_issue, noteable: issue, project: project, note: "Please correct this small nit")
+      end
+
+      let(:context) do
+        Gitlab::Llm::Chain::GitlabContext.new(
+          current_user: user, container: double, resource: issue, ai_request: ai_request
+        )
+      end
+
+      let(:tool) do
+        ::Gitlab::Llm::Chain::Tools::SummarizeComments::Executor.new(
+          context: context,
+          options: {
+            input: 'Summarize issue comments.',
+            notes_content: "<comment>#{note.note}</comment>"
+          }
+        )
+      end
+
+      it "returns prompt" do
+        expect(tool.class::PROVIDER_PROMPT_CLASSES[:anthropic]).to receive(:prompt).and_call_original
+
+        tool.prompt
+      end
+    end
+
     context 'when there are no provider prompt classes' do
       let(:dummy_tool_class) do
         Class.new(::Gitlab::Llm::Chain::Tools::Tool) do
diff --git a/ee/spec/lib/gitlab/llm/chain/requests/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/requests/anthropic_spec.rb
index 7de998482506f2e0b54543fd4027f2b9c7b19fcd..731d99f9b52b086ae3e962d5e64bb6f3a048659d 100644
--- a/ee/spec/lib/gitlab/llm/chain/requests/anthropic_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/requests/anthropic_spec.rb
@@ -19,12 +19,27 @@
     let(:instance) { described_class.new(user, unit_primitive: 'duo_chat') }
     let(:logger) { instance_double(Gitlab::Llm::Logger) }
     let(:ai_client) { double }
-    let(:response) { { "completion" => "Hello World " } }
+    let(:prompt_message) do
+      [
+        {
+          role: :user,
+          content: "Some user request"
+        }
+      ]
+    end
+
+    let(:response) do
+      {
+        "delta" => {
+          "type" => "text_delta",
+          "text" => "Hello World"
+        }
+      }
+    end
+
     let(:expected_params) do
       {
-        prompt: "some user request",
-        temperature: 0.1,
-        stop_sequences: ["\n\nHuman", "Observation:"]
+        messages: prompt_message
       }
     end
 
@@ -33,41 +48,41 @@
       allow(instance).to receive(:ai_client).and_return(ai_client)
     end
 
-    context 'with prompt and options' do
-      let(:params) { { prompt: "some user request", options: { max_tokens: 4000 } } }
+    context 'with prompt' do
+      let(:params) do
+        { messages: prompt_message, max_tokens: 4000 }
+      end
 
-      it 'calls the anthropic streaming endpoint and yields response without stripping it' do
-        expect(ai_client).to receive(:stream).with(expected_params.merge({ max_tokens: 4000 })).and_yield(response)
+      it 'calls the anthropic messages streaming endpoint and yields response without stripping it' do
+        expect(ai_client).to receive(:messages_stream).with(expected_params.merge(max_tokens: 4000)).and_yield(response)
 
-        expect { |b| instance.request(params, &b) }.to yield_with_args(
-          "Hello World "
-        )
+        expect { |b| instance.request(params, &b) }.to yield_with_args("Hello World")
       end
 
       it 'returns the response from anthropic' do
-        expect(ai_client).to receive(:stream).with(expected_params.merge({ max_tokens: 4000 }))
-          .and_return(response["completion"])
+        expect(ai_client).to receive(:messages_stream).with(expected_params.merge({ max_tokens: 4000 }))
+          .and_return(response)
 
-        expect(request).to eq("Hello World ")
+        expect(request["delta"]["text"]).to eq("Hello World")
       end
     end
 
     context 'when options are not present' do
-      let(:params) { { prompt: "some user request" } }
+      let(:params) { { messages: prompt_message } }
 
       it 'calls the anthropic streaming endpoint' do
-        expect(ai_client).to receive(:stream).with(expected_params)
+        expect(ai_client).to receive(:messages_stream).with(expected_params)
 
         request
       end
     end
 
     context 'when stream errors' do
-      let(:params) { { prompt: "some user request" } }
+      let(:params) { { messages: prompt_message } }
       let(:response) { { "error" => { "type" => "overload_error", message: "Overloaded" } } }
 
       it 'logs the error' do
-        expect(ai_client).to receive(:stream).with(expected_params).and_yield(response)
+        expect(ai_client).to receive(:messages_stream).with(expected_params).and_yield(response)
         expect(logger).to receive(:info).with(hash_including(message: "Streaming error", error: response["error"]))
 
         request
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/executor_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/executor_spec.rb
index 617405e3c9d49bb522f4e24e5886c3cbb8c49c79..12a69f18449b6c732dd70cf1df843cd5d7712177 100644
--- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/executor_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/executor_spec.rb
@@ -55,11 +55,12 @@
 
         expected_prompt = <<~PROMPT.chomp
           You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-          Each comment is wrapped in a <comment> tag.
         PROMPT
 
-        expect(prompt).to include(expected_prompt)
-        expect(prompt).to include(note.note)
+        system_prompt = prompt[0][:content]
+        user_prompt = prompt[1][:content]
+        expect(system_prompt).to include(expected_prompt)
+        expect(user_prompt).to include(note.note)
       end
 
       it 'sets the correct unit primitive' do
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..baf7304d331ba05242cf3333f513b1eeaea3ccc0
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_old_spec.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::AnthropicOld, feature_category: :duo_chat do
+  let(:variables) do
+    {
+      notes_content: '<comment>foo</comment>'
+    }
+  end
+
+  describe '.prompt' do
+    it "returns prompt" do
+      prompt = described_class.prompt(variables)
+
+      expect(prompt[:messages]).to be_an(Array)
+
+      expect(prompt[:system]).to be_a(String)
+      expect(prompt[:system]).to eq(system_prompt_content)
+
+      expect(prompt[:temperature]).to eq(described_class::TEMPERATURE)
+    end
+
+    it "calls with claude 3_5 sonnet model" do
+      model = described_class.prompt(variables)[:model]
+
+      expect(model).to eq(::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET)
+    end
+
+    it "includes ExecutorOld prompts" do
+      prompt = described_class.prompt(variables)
+
+      expect(prompt[:messages]).to include(
+        a_hash_including(
+          role: :user,
+          content: a_string_including("Each comment is wrapped in a <comment> tag.")
+                    .and(a_string_including("<comment>foo</comment>"))
+                    .and(a_string_including("Do not answer questions."))
+        )
+      )
+
+      expect(prompt[:system]).to include(system_prompt_content)
+    end
+  end
+
+  def system_prompt_content
+    Gitlab::Llm::Chain::Tools::SummarizeComments::ExecutorOld::SYSTEM_PROMPT[1]
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
index 092d4f8d139bb5bc9d5a3dff60bccce6efb7ec8b..ca92304fc39549d17a8887696fbe9e3c9933b2d8 100644
--- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/anthropic_spec.rb
@@ -3,33 +3,36 @@
 require 'spec_helper'
 
 RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::Anthropic, feature_category: :duo_chat do
+  let(:variables) do
+    {
+      notes_content: '<comment>foo</comment>'
+    }
+  end
+
   describe '.prompt' do
-    it 'returns prompt' do
-      prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt]
-
-      expect(prompt).to include('Human:')
-      expect(prompt).to include('Assistant:')
-      expect(prompt).to include('foo')
-      expect(prompt).to include(
-        <<~PROMPT
-          You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-          Each comment is wrapped in a <comment> tag.
-
-          <comment>foo</comment>
-
-          Desired markdown format:
-          **<summary_title>**
-          - <bullet_point>
-          - <bullet_point>
-          - <bullet_point>
-          - ...
-
-          Focus on extracting information related to one another and that are the majority of the content.
-          Ignore phrases that are not connected to others.
-          Do not specify what you are ignoring.
-          Do not answer questions.
-        PROMPT
-      )
+    it "returns prompt" do
+      prompt = described_class.prompt(variables)[:prompt]
+      expect(prompt.length).to eq(3)
+
+      expect(prompt[0][:role]).to eq(:system)
+      expect(prompt[0][:content]).to eq(system_prompt_content)
+
+      expect(prompt[1][:role]).to eq(:user)
+      expect(prompt[1][:content]).to eq(format(
+        Gitlab::Llm::Chain::Tools::SummarizeComments::ExecutorOld::USER_PROMPT[1], variables).to_s)
+
+      expect(prompt[2][:role]).to eq(:assistant)
+      expect(prompt[2][:content]).to be_empty
     end
+
+    it "calls with claude 3 haiku model" do
+      model = described_class.prompt(variables)[:options][:model]
+
+      expect(model).to eq(::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET)
+    end
+  end
+
+  def system_prompt_content
+    Gitlab::Llm::Chain::Tools::SummarizeComments::ExecutorOld::SYSTEM_PROMPT[1]
   end
 end
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb
deleted file mode 100644
index 1033e17a1c1c99f5bd5df6f70b9c51793de5cf6e..0000000000000000000000000000000000000000
--- a/ee/spec/lib/gitlab/llm/chain/tools/summarize_comments/prompts/vertex_ai_spec.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe Gitlab::Llm::Chain::Tools::SummarizeComments::Prompts::VertexAi, feature_category: :duo_chat do
-  describe '.prompt' do
-    it 'returns prompt' do
-      prompt = described_class.prompt({ notes_content: '<comment>foo</comment>' })[:prompt]
-
-      expect(prompt).to include(
-        <<~PROMPT
-          You are an assistant that extracts the most important information from the comments in maximum 10 bullet points.
-          Each comment is wrapped in a <comment> tag.
-
-          <comment>foo</comment>
-
-          Desired markdown format:
-          **<summary_title>**
-          - <bullet_point>
-          - <bullet_point>
-          - <bullet_point>
-          - ...
-
-          Focus on extracting information related to one another and that are the majority of the content.
-          Ignore phrases that are not connected to others.
-          Do not specify what you are ignoring.
-          Do not answer questions.
-        PROMPT
-      )
-    end
-  end
-end
diff --git a/ee/spec/lib/gitlab/llm/completions/summarize_all_open_notes_spec.rb b/ee/spec/lib/gitlab/llm/completions/summarize_all_open_notes_spec.rb
index 638172f6a1f1bc4470e4ab9c0b2f1e46476c4a2c..f4bd3961a4eeac32af081191a515ca2fe37db6e2 100644
--- a/ee/spec/lib/gitlab/llm/completions/summarize_all_open_notes_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions/summarize_all_open_notes_spec.rb
@@ -20,10 +20,10 @@
     )
   end
 
-  RSpec.shared_examples 'performs completion' do
+  RSpec.shared_examples 'performs messages stream' do
     it 'returns summary' do
       expect_next_instance_of(ai_request_class) do |instance|
-        expect(instance).to receive(completion_method).and_return(ai_response)
+        expect(instance).to receive(messages_method).and_return(ai_response)
       end
 
       response_modifier = double
@@ -109,7 +109,7 @@
 
   describe "#execute", :saas do
     let(:ai_request_class) { ::Gitlab::Llm::Anthropic::Client }
-    let(:completion_method) { :stream }
+    let(:messages_method) { :messages_stream }
     let(:options) { {} }
 
     let_it_be(:user) { create(:user) }
@@ -149,20 +149,7 @@
         let_it_be(:notes) { create_pair(:note_on_issue, project: project, noteable: issuable) }
         let_it_be(:system_note) { create(:note_on_issue, :system, project: project, noteable: issuable) }
 
-        # anthropic as provider as summarize_notes_with_anthropic is enabled by default.
-        it_behaves_like 'performs completion'
-
-        context 'with vertex_ai provider' do
-          let(:completion_method) { :text }
-          let(:ai_request_class) { ::Gitlab::Llm::VertexAi::Client }
-          let(:ai_response) { { "predictions" => [{ "content" => "some ai response text" }] } }
-
-          before do
-            stub_feature_flags(summarize_notes_with_anthropic: false)
-          end
-
-          it_behaves_like 'performs completion'
-        end
+        it_behaves_like 'performs messages stream'
       end
 
       context 'for a work item' do
@@ -170,7 +157,7 @@
         let_it_be(:notes) { create_pair(:note_on_work_item, project: project, noteable: issuable) }
         let_it_be(:system_note) { create(:note_on_work_item, :system, project: project, noteable: issuable) }
 
-        it_behaves_like 'performs completion'
+        it_behaves_like 'performs messages stream'
       end
 
       context 'for a merge request' do
@@ -187,7 +174,7 @@
         let_it_be(:notes) { create_pair(:note_on_epic, noteable: issuable) }
         let_it_be(:system_note) { create(:note_on_epic, :system, noteable: issuable) }
 
-        it_behaves_like 'performs completion'
+        it_behaves_like 'performs messages stream'
       end
     end
   end