diff --git a/config/feature_flags/development/summarize_review_vertex.yml b/config/feature_flags/development/summarize_review_vertex.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57ceae2e7e85f143943fa0e203d791d09443ed14
--- /dev/null
+++ b/config/feature_flags/development/summarize_review_vertex.yml
@@ -0,0 +1,8 @@
+---
+name: summarize_review_vertex
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/127190
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/419374
+milestone: '16.3'
+type: development
+group: group::code review
+default_enabled: false
diff --git a/ee/lib/gitlab/llm/completions_factory.rb b/ee/lib/gitlab/llm/completions_factory.rb
index 1c5be467763ec0573b04a754826b237541331a71..fb27e347bbda60f755f13f6b41bb74c224ad36a3 100644
--- a/ee/lib/gitlab/llm/completions_factory.rb
+++ b/ee/lib/gitlab/llm/completions_factory.rb
@@ -13,8 +13,8 @@ class CompletionsFactory
           prompt_class: nil
         },
         summarize_review: {
-          service_class: ::Gitlab::Llm::OpenAi::Completions::SummarizeReview,
-          prompt_class: ::Gitlab::Llm::OpenAi::Templates::SummarizeReview
+          service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeReview,
+          prompt_class: ::Gitlab::Llm::Templates::SummarizeReview
         },
         explain_code: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::ExplainCode,
diff --git a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
index 3c5a8b615bc69335a1146042a8bceaa7a8ca4179..8a46c7073083ebffeda4b4277dc527d9f5fd9ed5 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
@@ -5,27 +5,6 @@ module Llm
     module OpenAi
       module Completions
         class SummarizeReview < Gitlab::Llm::Completions::Base
-          TOTAL_MODEL_TOKEN_LIMIT = 4000
-
-          # 0.5 + 0.25 = 0.75, leaving a 0.25 buffer for the input token limit
-          #
-          # We want this for 2 reasons:
-          # - 25% for output tokens: OpenAI token limit includes both tokenized input prompt as well as the response
-          # We may come want to adjust these rations as we learn more, but for now leaving a 25% ration of the total
-          # limit seems sensible.
-          # - 25% buffer for input tokens: we approximate the token count by dividing character count by 4. That is no
-          # very accurate at all, so we need some buffer in case we exceed that so that we avoid getting an error
-          # response as much as possible. A better alternative is to use tiktoken_ruby gem which is coming in a
-          # follow-up, see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/117176
-          #
-          INPUT_TOKEN_LIMIT = (TOTAL_MODEL_TOKEN_LIMIT * 0.5).to_i.freeze
-
-          # approximate that one token is ~4 characters. A better way of doing this is using tiktoken_ruby gem,
-          # which is a wrapper on OpenAI's token counting lib in python.
-          # see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
-          #
-          INPUT_CONTENT_LIMIT = INPUT_TOKEN_LIMIT * 4
-
           def execute(user, merge_request, _ = {})
             return unless user
             return unless merge_request
@@ -33,13 +12,8 @@ def execute(user, merge_request, _ = {})
             draft_notes = merge_request.draft_notes.authored_by(user)
             return if draft_notes.empty?
 
-            options = ai_prompt_class.get_options(prepared_draft_notes_content(draft_notes))
-
-            ai_response = Gitlab::Llm::OpenAi::Client.new(user).chat(
-              content: nil,
-              **options
-            )
-            response_modifier = Gitlab::Llm::OpenAi::ResponseModifiers::Chat.new(ai_response)
+            response = response_for(user, draft_notes)
+            response_modifier = Gitlab::Llm::OpenAi::ResponseModifiers::Chat.new(response)
 
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: response_options
@@ -48,18 +22,13 @@ def execute(user, merge_request, _ = {})
 
           private
 
-          def prepared_draft_notes_content(draft_notes)
-            draft_notes_content = []
-
-            draft_notes.each do |draft_note|
-              draft_note_line = "Comment: #{draft_note.note}\n"
-
-              if (draft_notes_content.length + draft_note_line.length) < INPUT_CONTENT_LIMIT
-                draft_notes_content << draft_note_line
-              end
-            end
-
-            draft_notes_content.join("\n")
+          def response_for(user, draft_notes)
+            Gitlab::Llm::OpenAi::Client
+              .new(user)
+              .chat(
+                content: ai_prompt_class.new(draft_notes).to_prompt,
+                moderated: true
+              )
           end
         end
       end
diff --git a/ee/lib/gitlab/llm/open_ai/templates/summarize_review.rb b/ee/lib/gitlab/llm/open_ai/templates/summarize_review.rb
deleted file mode 100644
index 472be017e094f48359043882742d2d226479bea6..0000000000000000000000000000000000000000
--- a/ee/lib/gitlab/llm/open_ai/templates/summarize_review.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Llm
-    module OpenAi
-      module Templates
-        class SummarizeReview
-          SYSTEM_CONTENT = "You are a sophisticated code review assistant."
-          DRAFT_NOTE_CONTEXT = <<-TEMPLATE
-You are acting as the reviewer for this merge request and MUST respond in first person as if you reviewed it and should always use 'I'. You are provided with the corresponding code comment. Use this information to create an overall summary which MUST mention the types of comments left, a comment can be either: question or recommendation. This summary MUST NOT be longer than 3 sentences. This summary MUST give an indication of the topics the review covered. The summary MUST be written in present simple tense and MUST be as concise as possible. The summary MUST also include an estimate of the overall work needed, using any of the following: "small amount of work, decent amount or significant work required" but the comment MUST make sure to note this is only an estimate, for example, "I estimate there is...". Code review comments:
-          TEMPLATE
-
-          def self.get_options(draft_notes_content)
-            {
-              messages: [
-                { role: "system", content: SYSTEM_CONTENT },
-                { role: "user", content: "#{DRAFT_NOTE_CONTEXT}\n\n#{draft_notes_content}" }
-              ],
-              temperature: 0.2
-            }
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/ee/lib/gitlab/llm/templates/summarize_review.rb b/ee/lib/gitlab/llm/templates/summarize_review.rb
new file mode 100644
index 0000000000000000000000000000000000000000..4341ee428b0bb3585986146e89a04640a84e8c2d
--- /dev/null
+++ b/ee/lib/gitlab/llm/templates/summarize_review.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Templates
+      class SummarizeReview
+        TOTAL_MODEL_TOKEN_LIMIT = 4000
+
+        # 0.5 + 0.25 = 0.75, leaving a 0.25 buffer for the input token limit
+        #
+        # We want this for 2 reasons:
+        # - 25% for output tokens: OpenAI token limit includes both tokenized input prompt as well as the response
+        # We may come want to adjust these rations as we learn more, but for now leaving a 25% ration of the total
+        # limit seems sensible.
+        # - 25% buffer for input tokens: we approximate the token count by dividing character count by 4. That is no
+        # very accurate at all, so we need some buffer in case we exceed that so that we avoid getting an error
+        # response as much as possible. A better alternative is to use tiktoken_ruby gem which is coming in a
+        # follow-up, see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/117176
+        #
+        INPUT_TOKEN_LIMIT = (TOTAL_MODEL_TOKEN_LIMIT * 0.5).to_i.freeze
+
+        # approximate that one token is ~4 characters. A better way of doing this is using tiktoken_ruby gem,
+        # which is a wrapper on OpenAI's token counting lib in python.
+        # see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
+        #
+        INPUT_CONTENT_LIMIT = INPUT_TOKEN_LIMIT * 4
+
+        def initialize(draft_notes)
+          @draft_notes = draft_notes
+        end
+
+        def to_prompt
+          <<-PROMPT
+          You are acting as the reviewer for this merge request and MUST respond in first person as if you reviewed it and should always use 'I'. You are provided with the corresponding code comment. Use this information to create an overall summary which MUST mention the types of comments left, a comment can be either: question or recommendation. This summary MUST NOT be longer than 3 sentences. This summary MUST give an indication of the topics the review covered. The summary MUST be written in present simple tense and MUST be as concise as possible. The summary MUST also include an estimate of the overall work needed, using any of the following: "small amount of work, decent amount or significant work required" but the comment MUST make sure to note this is only an estimate, for example, "I estimate there is...". Code review comments:
+
+          #{draft_notes_content}
+          PROMPT
+        end
+
+        private
+
+        attr_reader :draft_notes
+
+        def draft_notes_content
+          content = []
+
+          draft_notes.each do |draft_note|
+            draft_note_line = "Comment: #{draft_note.note}\n"
+
+            content << draft_note_line if (content.length + draft_note_line.length) < INPUT_CONTENT_LIMIT
+          end
+
+          content.join("\n")
+        end
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
new file mode 100644
index 0000000000000000000000000000000000000000..dbfe5d9d31e7c71982c418dce07d4d4e870cb86e
--- /dev/null
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module VertexAi
+      module Completions
+        class SummarizeReview < Gitlab::Llm::Completions::Base
+          DEFAULT_ERROR = 'An unexpected error has occurred.'
+
+          def execute(user, merge_request, options)
+            unless vertex_ai?(merge_request)
+              return ::Gitlab::Llm::OpenAi::Completions::SummarizeReview
+                .new(ai_prompt_class)
+                .execute(user, merge_request, options)
+            end
+
+            draft_notes = merge_request.draft_notes.authored_by(user)
+            return if draft_notes.empty?
+
+            response = response_for(user, draft_notes)
+            response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response)
+
+            ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
+              user, merge_request, response_modifier, options: options
+            ).execute
+          rescue StandardError => error
+            Gitlab::ErrorTracking.track_exception(error)
+
+            response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(
+              { error: { message: DEFAULT_ERROR } }.to_json
+            )
+
+            ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
+              user, merge_request, response_modifier, options: options
+            ).execute
+          end
+
+          private
+
+          def response_for(user, draft_notes)
+            Gitlab::Llm::VertexAi::Client
+              .new(user)
+              .text(content: ai_prompt_class.new(draft_notes).to_prompt)
+          end
+
+          def vertex_ai?(merge_request)
+            Feature.enabled?(:summarize_review_vertex, merge_request.project)
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
index b75cc499444e45ad6c76bd10767f9614d0ce29d9..329bfed21abff2d25983314bd2c277f5adbeb422 100644
--- a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
@@ -8,8 +8,8 @@
       let(:completion_name) { :summarize_review }
 
       it 'returns completion service' do
-        completion_class = ::Gitlab::Llm::OpenAi::Completions::SummarizeReview
-        template_class = ::Gitlab::Llm::OpenAi::Templates::SummarizeReview
+        completion_class = ::Gitlab::Llm::VertexAi::Completions::SummarizeReview
+        template_class = ::Gitlab::Llm::Templates::SummarizeReview
 
         expect(completion_class).to receive(:new).with(template_class, {}).and_call_original
 
diff --git a/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb
index 68ed422b1b82672102e304031bccd939f9af1082..60b6ce8e50b884a21dc7ff9e758a1e10a1faedd5 100644
--- a/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb
@@ -13,17 +13,7 @@
   let!(:draft_note_by_current_user) { create(:draft_note, merge_request: merge_request, author: user) }
   let!(:draft_note_by_random_user) { create(:draft_note, merge_request: merge_request) }
 
-  let(:template_class) { ::Gitlab::Llm::OpenAi::Templates::SummarizeReview }
-
-  let(:ai_options) do
-    {
-      messages: [
-        { role: "system", content: "You are a helpful assistant that summarizes reviews." },
-        { role: "user", content: "Some content" }
-      ],
-      temperature: 0.2
-    }
-  end
+  let(:template_class) { ::Gitlab::Llm::Templates::SummarizeReview }
 
   let(:ai_response) do
     {
@@ -69,15 +59,14 @@
             .and_call_original
         end
 
-        expect(Gitlab::Llm::OpenAi::Templates::SummarizeReview)
-          .to receive(:get_options)
-          .with("Comment: #{draft_note_by_current_user.note}\n")
-          .and_return(ai_options)
+        expect_next_instance_of(template_class) do |template|
+          expect(template).to receive(:to_prompt).and_return('AI prompt')
+        end
 
         expect_next_instance_of(Gitlab::Llm::OpenAi::Client) do |instance|
           expect(instance)
             .to receive(:chat)
-            .with(content: nil, **ai_options)
+            .with(content: 'AI prompt', moderated: true)
             .and_return(ai_response)
         end
 
diff --git a/ee/spec/lib/gitlab/llm/open_ai/templates/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/templates/summarize_review_spec.rb
deleted file mode 100644
index ce12d63c6a3cc10c4a0f7a2b7823e8ce955db4f1..0000000000000000000000000000000000000000
--- a/ee/spec/lib/gitlab/llm/open_ai/templates/summarize_review_spec.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-require "spec_helper"
-
-RSpec.describe Gitlab::Llm::OpenAi::Templates::SummarizeReview, feature_category: :code_review_workflow do
-  let(:additional_text) { "Some message content" }
-
-  describe ".get_options" do
-    it "returns correct parameters" do
-      expect(described_class.get_options(additional_text)).to eq(
-        {
-          messages:
-          [
-            {
-              role: "system",
-              content: described_class::SYSTEM_CONTENT
-            },
-            {
-              role: "user",
-              content: "#{described_class::DRAFT_NOTE_CONTEXT}\n\n#{additional_text}"
-            }
-          ],
-          temperature: 0.2
-        }
-      )
-    end
-  end
-end
diff --git a/ee/spec/lib/gitlab/llm/templates/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/templates/summarize_review_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..11795afa54e5fb609cecd94389a50ac31cd602c6
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/templates/summarize_review_spec.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Templates::SummarizeReview, feature_category: :code_review_workflow do
+  let_it_be(:project) { create(:project, :repository) }
+  let_it_be(:merge_request) { create(:merge_request, source_project: project, target_project: project) }
+  let_it_be(:draft_note_1) { create(:draft_note, merge_request: merge_request) }
+  let_it_be(:draft_note_2) { create(:draft_note, merge_request: merge_request) }
+
+  subject { described_class.new([draft_note_1, draft_note_2]) }
+
+  describe '#to_prompt' do
+    it 'includes lines per note' do
+      prompt = subject.to_prompt
+
+      expect(prompt).to include("Comment: #{draft_note_1.note}")
+      expect(prompt).to include("Comment: #{draft_note_2.note}")
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/vertex_ai/completions/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/vertex_ai/completions/summarize_review_spec.rb
new file mode 100644
index 0000000000000000000000000000000000000000..d6a8353f98d0eabf283d35fb09191c9232a81167
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/vertex_ai/completions/summarize_review_spec.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::VertexAi::Completions::SummarizeReview, feature_category: :code_review_workflow do
+  let(:prompt_class) { Gitlab::Llm::Templates::SummarizeReview }
+  let(:options) { { request_id: 'uuid' } }
+  let(:response_modifier) { double }
+  let(:response_service) { double }
+  let_it_be(:user) { create(:user) }
+  let_it_be(:merge_request) { create(:merge_request) }
+  let_it_be(:draft_note_by_random_user) { create(:draft_note, merge_request: merge_request) }
+  let(:params) { [user, merge_request, response_modifier, { options: { request_id: 'uuid' } }] }
+
+  subject { described_class.new(prompt_class, options) }
+
+  describe '#execute' do
+    context 'when the feature flag is disabled' do
+      before do
+        stub_feature_flags(summarize_review_vertex: false)
+      end
+
+      it 'falls back to the OpenAI implementation' do
+        allow_next_instance_of(::Gitlab::Llm::OpenAi::Completions::SummarizeReview) do |completion|
+          expect(completion).to receive(:execute).with(user, merge_request, options)
+        end
+
+        expect(::Gitlab::Llm::VertexAi::Client).not_to receive(:new)
+
+        subject.execute(user, merge_request, options)
+      end
+    end
+
+    context 'when there are no draft notes authored by user' do
+      it 'does not make AI request' do
+        expect(Gitlab::Llm::VertexAi::Client).not_to receive(:new)
+
+        subject.execute(user, merge_request, options)
+      end
+    end
+
+    context 'when there are draft notes authored by user' do
+      let_it_be(:draft_note_by_current_user) { create(:draft_note, merge_request: merge_request, author: user) }
+
+      context 'when the text model returns an unsuccessful response' do
+        before do
+          allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+            allow(client).to receive(:text).and_return(
+              { error: 'Error' }.to_json
+            )
+          end
+        end
+
+        it 'publishes the error to the graphql subscription' do
+          errors = { error: 'Error' }
+          expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+            .to receive(:new)
+            .with(errors.to_json)
+            .and_return(response_modifier)
+
+          expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+            .to receive(:new)
+            .with(*params)
+            .and_return(response_service)
+
+          expect(response_service).to receive(:execute)
+
+          subject.execute(user, merge_request, options)
+        end
+      end
+
+      context 'when the text model returns a successful response' do
+        let(:example_answer) { "AI generated review summary" }
+
+        let(:example_response) do
+          {
+            "predictions" => [
+              {
+                "candidates" => [
+                  {
+                    "author" => "",
+                    "content" => example_answer
+                  }
+                ],
+                "safetyAttributes" => {
+                  "categories" => ["Violent"],
+                  "scores" => [0.4000000059604645],
+                  "blocked" => false
+                }
+              }
+            ],
+            "deployedModelId" => "1",
+            "model" => "projects/1/locations/us-central1/models/text-bison",
+            "modelDisplayName" => "text-bison",
+            "modelVersionId" => "1"
+          }
+        end
+
+        before do
+          allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+            allow(client).to receive(:text).and_return(example_response.to_json)
+          end
+        end
+
+        it 'publishes the content from the AI response' do
+          expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+            .to receive(:new)
+            .with(example_response.to_json)
+            .and_return(response_modifier)
+
+          expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+            .to receive(:new)
+            .with(*params)
+            .and_return(response_service)
+
+          expect(response_service).to receive(:execute)
+
+          subject.execute(user, merge_request, options)
+        end
+
+        context 'when an unexpected error is raised' do
+          let(:error) { StandardError.new("Error") }
+
+          before do
+            allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+              allow(client).to receive(:text).and_raise(error)
+            end
+            allow(Gitlab::ErrorTracking).to receive(:track_exception)
+          end
+
+          it 'records the error' do
+            subject.execute(user, merge_request, options)
+            expect(Gitlab::ErrorTracking).to have_received(:track_exception).with(error)
+          end
+
+          it 'publishes a generic error to the graphql subscription' do
+            errors = { error: { message: 'An unexpected error has occurred.' } }
+
+            expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+              .to receive(:new)
+              .with(errors.to_json)
+              .and_return(response_modifier)
+
+            expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+              .to receive(:new)
+              .with(*params)
+              .and_return(response_service)
+
+            expect(response_service).to receive(:execute)
+
+            subject.execute(user, merge_request, options)
+          end
+        end
+      end
+    end
+  end
+end