Add VertexAI support for summarizing pending comments

We want to move to VertexAI for summarizing review draft notes. This is behind the `summarize_review_vertex` feature flag.

Add VertexAI support for summarizing pending comments
65e4d656 · Patrick Bajao · Kerri Miller · df4abd18 · 65e4d656 · 65e4d656
--- a/config/feature_flags/development/summarize_review_vertex.yml
+++ b/config/feature_flags/development/summarize_review_vertex.yml
+---
+name: summarize_review_vertex
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/127190
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/419374
+milestone: '16.3'
+type: development
+group: group::code review
+default_enabled: false
--- a/ee/lib/gitlab/llm/completions_factory.rb
+++ b/ee/lib/gitlab/llm/completions_factory.rb
@@ -13,8 +13,8 @@ class CompletionsFactory
          prompt_class: nil
        },
        summarize_review: {
-          service_class: ::Gitlab::Llm::OpenAi::Completions::SummarizeReview,
-          prompt_class: ::Gitlab::Llm::OpenAi::Templates::SummarizeReview
+          service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeReview,
+          prompt_class: ::Gitlab::Llm::Templates::SummarizeReview
        },
        explain_code: {
          service_class: ::Gitlab::Llm::VertexAi::Completions::ExplainCode,

--- a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
@@ -5,27 +5,6 @@ module Llm
    module OpenAi
      module Completions
        class SummarizeReview < Gitlab::Llm::Completions::Base
-          TOTAL_MODEL_TOKEN_LIMIT = 4000
-
-          # 0.5 + 0.25 = 0.75, leaving a 0.25 buffer for the input token limit
-          #
-          # We want this for 2 reasons:
-          # - 25% for output tokens: OpenAI token limit includes both tokenized input prompt as well as the response
-          # We may come want to adjust these rations as we learn more, but for now leaving a 25% ration of the total
-          # limit seems sensible.
-          # - 25% buffer for input tokens: we approximate the token count by dividing character count by 4. That is no
-          # very accurate at all, so we need some buffer in case we exceed that so that we avoid getting an error
-          # response as much as possible. A better alternative is to use tiktoken_ruby gem which is coming in a
-          # follow-up, see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/117176
-          #
-          INPUT_TOKEN_LIMIT = (TOTAL_MODEL_TOKEN_LIMIT * 0.5).to_i.freeze
-
-          # approximate that one token is ~4 characters. A better way of doing this is using tiktoken_ruby gem,
-          # which is a wrapper on OpenAI's token counting lib in python.
-          # see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
-          #
-          INPUT_CONTENT_LIMIT = INPUT_TOKEN_LIMIT * 4
-
          def execute(user, merge_request, _ = {})
            return unless user
            return unless merge_request
@@ -33,13 +12,8 @@ def execute(user, merge_request, _ = {})
            draft_notes = merge_request.draft_notes.authored_by(user)
            return if draft_notes.empty?

-            options = ai_prompt_class.get_options(prepared_draft_notes_content(draft_notes))
-
-            ai_response = Gitlab::Llm::OpenAi::Client.new(user).chat(
-              content: nil,
-              **options
-            )
-            response_modifier = Gitlab::Llm::OpenAi::ResponseModifiers::Chat.new(ai_response)
+            response = response_for(user, draft_notes)
+            response_modifier = Gitlab::Llm::OpenAi::ResponseModifiers::Chat.new(response)

            ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
              user, merge_request, response_modifier, options: response_options
@@ -48,18 +22,13 @@ def execute(user, merge_request, _ = {})

          private

-          def prepared_draft_notes_content(draft_notes)
-            draft_notes_content = []
-
-            draft_notes.each do |draft_note|
-              draft_note_line = "Comment: #{draft_note.note}\n"
-
-              if (draft_notes_content.length + draft_note_line.length) < INPUT_CONTENT_LIMIT
-                draft_notes_content << draft_note_line
-              end
-            end
-
-            draft_notes_content.join("\n")
+          def response_for(user, draft_notes)
+            Gitlab::Llm::OpenAi::Client
+              .new(user)
+              .chat(
+                content: ai_prompt_class.new(draft_notes).to_prompt,
+                moderated: true
+              )
          end
        end
      end

--- a/ee/lib/gitlab/llm/open_ai/templates/summarize_review.rb
+++ b/ee/lib/gitlab/llm/open_ai/templates/summarize_review.rb
-# frozen_string_literal: true
-
-module Gitlab
-  module Llm
-    module OpenAi
-      module Templates
-        class SummarizeReview
-          SYSTEM_CONTENT = "You are a sophisticated code review assistant."
-          DRAFT_NOTE_CONTEXT = <<-TEMPLATE
-You are acting as the reviewer for this merge request and MUST respond in first person as if you reviewed it and should always use 'I'. You are provided with the corresponding code comment. Use this information to create an overall summary which MUST mention the types of comments left, a comment can be either: question or recommendation. This summary MUST NOT be longer than 3 sentences. This summary MUST give an indication of the topics the review covered. The summary MUST be written in present simple tense and MUST be as concise as possible. The summary MUST also include an estimate of the overall work needed, using any of the following: "small amount of work, decent amount or significant work required" but the comment MUST make sure to note this is only an estimate, for example, "I estimate there is...". Code review comments:
-          TEMPLATE
-
-          def self.get_options(draft_notes_content)
-            {
-              messages: [
-                { role: "system", content: SYSTEM_CONTENT },
-                { role: "user", content: "#{DRAFT_NOTE_CONTEXT}\n\n#{draft_notes_content}" }
-              ],
-              temperature: 0.2
-            }
-          end
-        end
-      end
-    end
-  end
-end
--- a/ee/lib/gitlab/llm/templates/summarize_review.rb
+++ b/ee/lib/gitlab/llm/templates/summarize_review.rb
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Templates
+      class SummarizeReview
+        TOTAL_MODEL_TOKEN_LIMIT = 4000
+
+        # 0.5 + 0.25 = 0.75, leaving a 0.25 buffer for the input token limit
+        #
+        # We want this for 2 reasons:
+        # - 25% for output tokens: OpenAI token limit includes both tokenized input prompt as well as the response
+        # We may come want to adjust these rations as we learn more, but for now leaving a 25% ration of the total
+        # limit seems sensible.
+        # - 25% buffer for input tokens: we approximate the token count by dividing character count by 4. That is no
+        # very accurate at all, so we need some buffer in case we exceed that so that we avoid getting an error
+        # response as much as possible. A better alternative is to use tiktoken_ruby gem which is coming in a
+        # follow-up, see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/117176
+        #
+        INPUT_TOKEN_LIMIT = (TOTAL_MODEL_TOKEN_LIMIT * 0.5).to_i.freeze
+
+        # approximate that one token is ~4 characters. A better way of doing this is using tiktoken_ruby gem,
+        # which is a wrapper on OpenAI's token counting lib in python.
+        # see https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
+        #
+        INPUT_CONTENT_LIMIT = INPUT_TOKEN_LIMIT * 4
+
+        def initialize(draft_notes)
+          @draft_notes = draft_notes
+        end
+
+        def to_prompt
+          <<-PROMPT
+          You are acting as the reviewer for this merge request and MUST respond in first person as if you reviewed it and should always use 'I'. You are provided with the corresponding code comment. Use this information to create an overall summary which MUST mention the types of comments left, a comment can be either: question or recommendation. This summary MUST NOT be longer than 3 sentences. This summary MUST give an indication of the topics the review covered. The summary MUST be written in present simple tense and MUST be as concise as possible. The summary MUST also include an estimate of the overall work needed, using any of the following: "small amount of work, decent amount or significant work required" but the comment MUST make sure to note this is only an estimate, for example, "I estimate there is...". Code review comments:
+
+          #{draft_notes_content}
+          PROMPT
+        end
+
+        private
+
+        attr_reader :draft_notes
+
+        def draft_notes_content
+          content = []
+
+          draft_notes.each do |draft_note|
+            draft_note_line = "Comment: #{draft_note.note}\n"
+
+            content << draft_note_line if (content.length + draft_note_line.length) < INPUT_CONTENT_LIMIT
+          end
+
+          content.join("\n")
+        end
+      end
+    end
+  end
+end
--- a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module VertexAi
+      module Completions
+        class SummarizeReview < Gitlab::Llm::Completions::Base
+          DEFAULT_ERROR = 'An unexpected error has occurred.'
+
+          def execute(user, merge_request, options)
+            unless vertex_ai?(merge_request)
+              return ::Gitlab::Llm::OpenAi::Completions::SummarizeReview
+                .new(ai_prompt_class)
+                .execute(user, merge_request, options)
+            end
+
+            draft_notes = merge_request.draft_notes.authored_by(user)
+            return if draft_notes.empty?
+
+            response = response_for(user, draft_notes)
+            response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response)
+
+            ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
+              user, merge_request, response_modifier, options: options
+            ).execute
+          rescue StandardError => error
+            Gitlab::ErrorTracking.track_exception(error)
+
+            response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(
+              { error: { message: DEFAULT_ERROR } }.to_json
+            )
+
+            ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
+              user, merge_request, response_modifier, options: options
+            ).execute
+          end
+
+          private
+
+          def response_for(user, draft_notes)
+            Gitlab::Llm::VertexAi::Client
+              .new(user)
+              .text(content: ai_prompt_class.new(draft_notes).to_prompt)
+          end
+
+          def vertex_ai?(merge_request)
+            Feature.enabled?(:summarize_review_vertex, merge_request.project)
+          end
+        end
+      end
+    end
+  end
+end
--- a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
@@ -8,8 +8,8 @@
      let(:completion_name) { :summarize_review }

      it 'returns completion service' do
-        completion_class = ::Gitlab::Llm::OpenAi::Completions::SummarizeReview
-        template_class = ::Gitlab::Llm::OpenAi::Templates::SummarizeReview
+        completion_class = ::Gitlab::Llm::VertexAi::Completions::SummarizeReview
+        template_class = ::Gitlab::Llm::Templates::SummarizeReview

        expect(completion_class).to receive(:new).with(template_class, {}).and_call_original


--- a/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/completions/summarize_review_spec.rb
@@ -13,17 +13,7 @@
  let!(:draft_note_by_current_user) { create(:draft_note, merge_request: merge_request, author: user) }
  let!(:draft_note_by_random_user) { create(:draft_note, merge_request: merge_request) }

-  let(:template_class) { ::Gitlab::Llm::OpenAi::Templates::SummarizeReview }
-
-  let(:ai_options) do
-    {
-      messages: [
-        { role: "system", content: "You are a helpful assistant that summarizes reviews." },
-        { role: "user", content: "Some content" }
-      ],
-      temperature: 0.2
-    }
-  end
+  let(:template_class) { ::Gitlab::Llm::Templates::SummarizeReview }

  let(:ai_response) do
    {
@@ -69,15 +59,14 @@
            .and_call_original
        end

-        expect(Gitlab::Llm::OpenAi::Templates::SummarizeReview)
-          .to receive(:get_options)
-          .with("Comment: #{draft_note_by_current_user.note}\n")
-          .and_return(ai_options)
+        expect_next_instance_of(template_class) do |template|
+          expect(template).to receive(:to_prompt).and_return('AI prompt')
+        end

        expect_next_instance_of(Gitlab::Llm::OpenAi::Client) do |instance|
          expect(instance)
            .to receive(:chat)
-            .with(content: nil, **ai_options)
+            .with(content: 'AI prompt', moderated: true)
            .and_return(ai_response)
        end


--- a/ee/spec/lib/gitlab/llm/open_ai/templates/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/templates/summarize_review_spec.rb
-# frozen_string_literal: true
-
-require "spec_helper"
-
-RSpec.describe Gitlab::Llm::OpenAi::Templates::SummarizeReview, feature_category: :code_review_workflow do
-  let(:additional_text) { "Some message content" }
-
-  describe ".get_options" do
-    it "returns correct parameters" do
-      expect(described_class.get_options(additional_text)).to eq(
-        {
-          messages:
-          [
-            {
-              role: "system",
-              content: described_class::SYSTEM_CONTENT
-            },
-            {
-              role: "user",
-              content: "#{described_class::DRAFT_NOTE_CONTEXT}\n\n#{additional_text}"
-            }
-          ],
-          temperature: 0.2
-        }
-      )
-    end
-  end
-end
--- a/ee/spec/lib/gitlab/llm/templates/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/templates/summarize_review_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Templates::SummarizeReview, feature_category: :code_review_workflow do
+  let_it_be(:project) { create(:project, :repository) }
+  let_it_be(:merge_request) { create(:merge_request, source_project: project, target_project: project) }
+  let_it_be(:draft_note_1) { create(:draft_note, merge_request: merge_request) }
+  let_it_be(:draft_note_2) { create(:draft_note, merge_request: merge_request) }
+
+  subject { described_class.new([draft_note_1, draft_note_2]) }
+
+  describe '#to_prompt' do
+    it 'includes lines per note' do
+      prompt = subject.to_prompt
+
+      expect(prompt).to include("Comment: #{draft_note_1.note}")
+      expect(prompt).to include("Comment: #{draft_note_2.note}")
+    end
+  end
+end
--- a/ee/spec/lib/gitlab/llm/vertex_ai/completions/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/vertex_ai/completions/summarize_review_spec.rb
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::VertexAi::Completions::SummarizeReview, feature_category: :code_review_workflow do
+  let(:prompt_class) { Gitlab::Llm::Templates::SummarizeReview }
+  let(:options) { { request_id: 'uuid' } }
+  let(:response_modifier) { double }
+  let(:response_service) { double }
+  let_it_be(:user) { create(:user) }
+  let_it_be(:merge_request) { create(:merge_request) }
+  let_it_be(:draft_note_by_random_user) { create(:draft_note, merge_request: merge_request) }
+  let(:params) { [user, merge_request, response_modifier, { options: { request_id: 'uuid' } }] }
+
+  subject { described_class.new(prompt_class, options) }
+
+  describe '#execute' do
+    context 'when the feature flag is disabled' do
+      before do
+        stub_feature_flags(summarize_review_vertex: false)
+      end
+
+      it 'falls back to the OpenAI implementation' do
+        allow_next_instance_of(::Gitlab::Llm::OpenAi::Completions::SummarizeReview) do |completion|
+          expect(completion).to receive(:execute).with(user, merge_request, options)
+        end
+
+        expect(::Gitlab::Llm::VertexAi::Client).not_to receive(:new)
+
+        subject.execute(user, merge_request, options)
+      end
+    end
+
+    context 'when there are no draft notes authored by user' do
+      it 'does not make AI request' do
+        expect(Gitlab::Llm::VertexAi::Client).not_to receive(:new)
+
+        subject.execute(user, merge_request, options)
+      end
+    end
+
+    context 'when there are draft notes authored by user' do
+      let_it_be(:draft_note_by_current_user) { create(:draft_note, merge_request: merge_request, author: user) }
+
+      context 'when the text model returns an unsuccessful response' do
+        before do
+          allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+            allow(client).to receive(:text).and_return(
+              { error: 'Error' }.to_json
+            )
+          end
+        end
+
+        it 'publishes the error to the graphql subscription' do
+          errors = { error: 'Error' }
+          expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+            .to receive(:new)
+            .with(errors.to_json)
+            .and_return(response_modifier)
+
+          expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+            .to receive(:new)
+            .with(*params)
+            .and_return(response_service)
+
+          expect(response_service).to receive(:execute)
+
+          subject.execute(user, merge_request, options)
+        end
+      end
+
+      context 'when the text model returns a successful response' do
+        let(:example_answer) { "AI generated review summary" }
+
+        let(:example_response) do
+          {
+            "predictions" => [
+              {
+                "candidates" => [
+                  {
+                    "author" => "",
+                    "content" => example_answer
+                  }
+                ],
+                "safetyAttributes" => {
+                  "categories" => ["Violent"],
+                  "scores" => [0.4000000059604645],
+                  "blocked" => false
+                }
+              }
+            ],
+            "deployedModelId" => "1",
+            "model" => "projects/1/locations/us-central1/models/text-bison",
+            "modelDisplayName" => "text-bison",
+            "modelVersionId" => "1"
+          }
+        end
+
+        before do
+          allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+            allow(client).to receive(:text).and_return(example_response.to_json)
+          end
+        end
+
+        it 'publishes the content from the AI response' do
+          expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+            .to receive(:new)
+            .with(example_response.to_json)
+            .and_return(response_modifier)
+
+          expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+            .to receive(:new)
+            .with(*params)
+            .and_return(response_service)
+
+          expect(response_service).to receive(:execute)
+
+          subject.execute(user, merge_request, options)
+        end
+
+        context 'when an unexpected error is raised' do
+          let(:error) { StandardError.new("Error") }
+
+          before do
+            allow_next_instance_of(Gitlab::Llm::VertexAi::Client) do |client|
+              allow(client).to receive(:text).and_raise(error)
+            end
+            allow(Gitlab::ErrorTracking).to receive(:track_exception)
+          end
+
+          it 'records the error' do
+            subject.execute(user, merge_request, options)
+            expect(Gitlab::ErrorTracking).to have_received(:track_exception).with(error)
+          end
+
+          it 'publishes a generic error to the graphql subscription' do
+            errors = { error: { message: 'An unexpected error has occurred.' } }
+
+            expect(::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions)
+              .to receive(:new)
+              .with(errors.to_json)
+              .and_return(response_modifier)
+
+            expect(::Gitlab::Llm::GraphqlSubscriptionResponseService)
+              .to receive(:new)
+              .with(*params)
+              .and_return(response_service)
+
+            expect(response_service).to receive(:execute)
+
+            subject.execute(user, merge_request, options)
+          end
+        end
+      end
+    end
+  end
+end