From 95c65597e8847333f5e86b61bcf77c6ce28bbf68 Mon Sep 17 00:00:00 2001
From: Jan Provaznik <jprovaznik@gitlab.com>
Date: Wed, 30 Aug 2023 17:23:01 +0000
Subject: [PATCH] Update AI client SLI

* with this change success ratio of AI requests is measured outside of
  exponential backoff
* llm_chat_answers SLI is replaced with more generic llm_completion
  which tracks error ratio of all AI actions
---
 ee/app/workers/llm/completion_worker.rb       | 25 +++++++++++-
 ee/lib/gitlab/llm/anthropic/client.rb         |  9 -----
 .../llm/anthropic/completions/tanuki_bot.rb   |  2 +
 ee/lib/gitlab/llm/completions/chat.rb         |  7 +---
 .../llm/completions/explain_vulnerability.rb  | 10 ++++-
 .../completions/summarize_all_open_notes.rb   |  2 +
 ee/lib/gitlab/llm/completions_factory.rb      | 39 ++++++++++++-------
 .../llm/concerns/exponential_backoff.rb       | 11 +++++-
 .../gitlab/llm/concerns/measured_request.rb   | 17 --------
 ee/lib/gitlab/llm/open_ai/client.rb           |  5 ---
 .../llm/open_ai/completions/explain_code.rb   |  2 +
 .../completions/generate_commit_message.rb    |  4 ++
 .../completions/generate_description.rb       |  2 +
 .../open_ai/completions/generate_test_file.rb |  4 ++
 .../open_ai/completions/summarize_review.rb   |  2 +
 ee/lib/gitlab/llm/vertex_ai/client.rb         |  5 ---
 .../completions/analyze_ci_job_failure.rb     |  9 +++--
 .../llm/vertex_ai/completions/explain_code.rb |  2 +
 .../fill_in_merge_request_template.rb         |  2 +
 .../completions/generate_commit_message.rb    |  2 +
 .../completions/generate_test_file.rb         |  4 ++
 .../vertex_ai/completions/summarize_review.rb |  4 ++
 .../completions/summarize_submitted_review.rb |  3 +-
 ee/lib/gitlab/metrics/llm.rb                  | 25 ++++++++----
 .../lib/gitlab/llm/anthropic/client_spec.rb   | 15 ++++++-
 .../lib/gitlab/llm/completions/chat_spec.rb   |  4 --
 .../gitlab/llm/completions_factory_spec.rb    | 12 ++++++
 ee/spec/lib/gitlab/llm/open_ai/client_spec.rb | 15 ++++++-
 .../completions/generate_test_file_spec.rb    | 24 ++++++++++++
 .../lib/gitlab/llm/vertex_ai/client_spec.rb   | 16 +++++++-
 ee/spec/lib/gitlab/metrics/llm_spec.rb        | 18 +++++++--
 .../llm/measured_request_shared_examples.rb   | 12 +++---
 ee/spec/workers/llm/completion_worker_spec.rb | 19 ++++++++-
 33 files changed, 239 insertions(+), 93 deletions(-)
 delete mode 100644 ee/lib/gitlab/llm/concerns/measured_request.rb

diff --git a/ee/app/workers/llm/completion_worker.rb b/ee/app/workers/llm/completion_worker.rb
index 5dc7fc1dfcb93..b94905b0768fe 100644
--- a/ee/app/workers/llm/completion_worker.rb
+++ b/ee/app/workers/llm/completion_worker.rb
@@ -29,17 +29,38 @@ def perform(user_id, resource_id, resource_class, ai_action_name, options = {})
       return if resource && !user.can?("read_#{resource.to_ability_name}", resource)
 
       options[:extra_resource] = ::Llm::ExtraResourceFinder.new(user, options.delete(:referer_url)).execute
-
       params = options.extract!(:request_id, :internal_request, :cache_response, :client_subscription_id)
       logger.debug(message: "Params", params: params)
+
       ai_completion = ::Gitlab::Llm::CompletionsFactory.completion(ai_action_name.to_sym, params)
+      raise NameError, "completion class for action #{ai_action_name} not found" unless ai_completion
+
       logger.debug(message: "Getting Completion Service from factory", class_name: ai_completion.class.name)
+      response = ai_completion.execute(user, resource, options)
+      update_error_rate(ai_action_name, response)
+    rescue StandardError => err
+      update_error_rate(ai_action_name)
 
-      ai_completion.execute(user, resource, options) if ai_completion
+      raise err
     end
 
     private
 
+    def update_error_rate(ai_action_name, response = nil)
+      completion = ::Gitlab::Llm::CompletionsFactory::COMPLETIONS[ai_action_name.to_sym]
+      return unless completion
+
+      success = response.try(:errors)&.empty?
+
+      Gitlab::Metrics::Sli::ErrorRate[:llm_completion].increment(
+        labels: {
+          feature_category: completion[:feature_category],
+          service_class: completion[:service_class].name
+        },
+        error: !success
+      )
+    end
+
     def logger
       @logger ||= Gitlab::Llm::Logger.build
     end
diff --git a/ee/lib/gitlab/llm/anthropic/client.rb b/ee/lib/gitlab/llm/anthropic/client.rb
index 018f1d5a7aaa4..e7cb7934162e2 100644
--- a/ee/lib/gitlab/llm/anthropic/client.rb
+++ b/ee/lib/gitlab/llm/anthropic/client.rb
@@ -5,7 +5,6 @@ module Llm
     module Anthropic
       class Client
         include ::Gitlab::Llm::Concerns::ExponentialBackoff
-        include ::Gitlab::Llm::Concerns::MeasuredRequest
 
         URL = 'https://api.anthropic.com'
         DEFAULT_MODEL = 'claude-2'
@@ -23,9 +22,6 @@ def complete(prompt:, **options)
           # We do not allow to set `stream` because the separate `#stream` method should be used for streaming.
           # The reason is that streaming the response would not work with the exponential backoff mechanism.
           perform_completion_request(prompt: prompt, options: options.except(:stream))
-        rescue StandardError => e
-          increment_metric(client: :anthropic)
-          raise e
         end
 
         def stream(prompt:, **options)
@@ -40,9 +36,6 @@ def stream(prompt:, **options)
           end
 
           response_body
-        rescue StandardError => e
-          increment_metric(client: :anthropic)
-          raise e
         end
 
         private
@@ -67,8 +60,6 @@ def perform_completion_request(prompt:, options:)
 
           logger.debug(message: "Received response from Anthropic", response: response)
 
-          increment_metric(client: :anthropic, response: response)
-
           response
         end
 
diff --git a/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb b/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb
index 0383c82eab43d..f7f3c933739e9 100644
--- a/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb
+++ b/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb
@@ -21,6 +21,8 @@ def execute(user, resource, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, resource, response_modifier, options: response_options
             ).execute
+
+            response_modifier
           end
         end
       end
diff --git a/ee/lib/gitlab/llm/completions/chat.rb b/ee/lib/gitlab/llm/completions/chat.rb
index 80f90b44be7ff..c47324975e82c 100644
--- a/ee/lib/gitlab/llm/completions/chat.rb
+++ b/ee/lib/gitlab/llm/completions/chat.rb
@@ -39,11 +39,6 @@ def execute(user, resource, options)
             stream_response_handler: stream_response_handler
           ).execute
 
-          Gitlab::Metrics::Sli::Apdex[:llm_chat_answers].increment(
-            labels: { tool: response.last_tool_name || :unknown },
-            success: response.status == :ok
-          )
-
           response_modifier = Gitlab::Llm::Chain::ResponseModifier.new(response)
 
           context.tools_used.each do |tool|
@@ -59,6 +54,8 @@ def execute(user, resource, options)
           end
 
           response_handler.execute(response: response_modifier)
+
+          response_modifier
         end
 
         def tools(user)
diff --git a/ee/lib/gitlab/llm/completions/explain_vulnerability.rb b/ee/lib/gitlab/llm/completions/explain_vulnerability.rb
index 6f87b911860a6..66c2d3b1e026e 100644
--- a/ee/lib/gitlab/llm/completions/explain_vulnerability.rb
+++ b/ee/lib/gitlab/llm/completions/explain_vulnerability.rb
@@ -11,16 +11,22 @@ class ExplainVulnerability < Gitlab::Llm::Completions::Base
 
         def execute(user, vulnerability, options)
           response = response_for(user, vulnerability, options)
+          response_modifier = modify_response(response, vulnerability)
+          ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
+            user, vulnerability, response_modifier, options: response_options
+          ).execute
+
+          response_modifier
         rescue StandardError => error
           Gitlab::ErrorTracking.track_exception(error)
 
           response = formatted_error_response(error_message(error))
-        ensure
           response_modifier = modify_response(response, vulnerability)
-
           ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
             user, vulnerability, response_modifier, options: response_options
           ).execute
+
+          response_modifier
         end
 
         private
diff --git a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
index 1008e85988a0e..c57d709f7c7bd 100644
--- a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
+++ b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb
@@ -23,6 +23,8 @@ def execute(user, issuable, options = {})
           ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
             user, issuable, response_modifier, options: response_options
           ).execute
+
+          response_modifier
         end
 
         private
diff --git a/ee/lib/gitlab/llm/completions_factory.rb b/ee/lib/gitlab/llm/completions_factory.rb
index 5d50cf79011fd..f0783bc34ee68 100644
--- a/ee/lib/gitlab/llm/completions_factory.rb
+++ b/ee/lib/gitlab/llm/completions_factory.rb
@@ -6,55 +6,68 @@ class CompletionsFactory
       COMPLETIONS = {
         explain_vulnerability: {
           service_class: ::Gitlab::Llm::Completions::ExplainVulnerability,
-          prompt_class: ::Gitlab::Llm::Templates::ExplainVulnerability
+          prompt_class: ::Gitlab::Llm::Templates::ExplainVulnerability,
+          feature_category: :vulnerability_management
         },
         summarize_comments: {
           service_class: ::Gitlab::Llm::Completions::SummarizeAllOpenNotes,
-          prompt_class: nil
+          prompt_class: nil,
+          feature_category: :ai_abstraction_layer
         },
         summarize_review: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeReview,
-          prompt_class: ::Gitlab::Llm::Templates::SummarizeReview
+          prompt_class: ::Gitlab::Llm::Templates::SummarizeReview,
+          feature_category: :ai_abstraction_layer
         },
         explain_code: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::ExplainCode,
-          prompt_class: ::Gitlab::Llm::VertexAi::Templates::ExplainCode
+          prompt_class: ::Gitlab::Llm::VertexAi::Templates::ExplainCode,
+          feature_category: :ai_abstraction_layer
         },
         explain_code_open_ai: {
           service_class: ::Gitlab::Llm::OpenAi::Completions::ExplainCode,
-          prompt_class: ::Gitlab::Llm::OpenAi::Templates::ExplainCode
+          prompt_class: ::Gitlab::Llm::OpenAi::Templates::ExplainCode,
+          feature_category: :code_review_workflow
         },
         tanuki_bot: {
           service_class: ::Gitlab::Llm::Anthropic::Completions::TanukiBot,
-          prompt_class: ::Gitlab::Llm::Anthropic::Templates::TanukiBot
+          prompt_class: ::Gitlab::Llm::Anthropic::Templates::TanukiBot,
+          feature_category: :ai_abstraction_layer
         },
         generate_test_file: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::GenerateTestFile,
-          prompt_class: ::Gitlab::Llm::Templates::GenerateTestFile
+          prompt_class: ::Gitlab::Llm::Templates::GenerateTestFile,
+          feature_category: :code_review_workflow
         },
         generate_description: {
           service_class: ::Gitlab::Llm::OpenAi::Completions::GenerateDescription,
-          prompt_class: ::Gitlab::Llm::OpenAi::Templates::GenerateDescription
+          prompt_class: ::Gitlab::Llm::OpenAi::Templates::GenerateDescription,
+          feature_category: :ai_abstraction_layer
         },
         generate_commit_message: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::GenerateCommitMessage,
-          prompt_class: ::Gitlab::Llm::Templates::GenerateCommitMessage
+          prompt_class: ::Gitlab::Llm::Templates::GenerateCommitMessage,
+          feature_category: :code_review_workflow
         },
         analyze_ci_job_failure: {
           service_class: Gitlab::Llm::VertexAi::Completions::AnalyzeCiJobFailure,
-          prompt_class: nil
+          prompt_class: nil,
+          feature_category: :continuous_integration
         },
         chat: {
           service_class: ::Gitlab::Llm::Completions::Chat,
-          prompt_class: nil
+          prompt_class: nil,
+          feature_category: :duo_chat
         },
         fill_in_merge_request_template: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::FillInMergeRequestTemplate,
-          prompt_class: ::Gitlab::Llm::Templates::FillInMergeRequestTemplate
+          prompt_class: ::Gitlab::Llm::Templates::FillInMergeRequestTemplate,
+          feature_category: :code_review_workflow
         },
         summarize_submitted_review: {
           service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeSubmittedReview,
-          prompt_class: ::Gitlab::Llm::Templates::SummarizeSubmittedReview
+          prompt_class: ::Gitlab::Llm::Templates::SummarizeSubmittedReview,
+          feature_category: :code_review_workflow
         }
       }.freeze
 
diff --git a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
index 66dc650e90221..4536476779894 100644
--- a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
+++ b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
@@ -23,7 +23,7 @@ def retry_methods_with_exponential_backoff(*method_names)
 
             define_method(method_name) do |*args, **kwargs|
               run_with_circuit do
-                retry_with_exponential_backoff do
+                retry_with_monitored_exponential_backoff do
                   original_method.bind_call(self, *args, **kwargs)
                 end
               end
@@ -33,6 +33,15 @@ def retry_methods_with_exponential_backoff(*method_names)
 
         private
 
+        def retry_with_monitored_exponential_backoff(&block)
+          response = retry_with_exponential_backoff(&block)
+        ensure
+          success = (200...299).cover?(response&.code)
+          client = Gitlab::Metrics::Llm.client_label(self.class)
+
+          Gitlab::Metrics::Sli::ErrorRate[:llm_client_request].increment(labels: { client: client }, error: !success)
+        end
+
         def retry_with_exponential_backoff
           retries = 0
           delay = INITIAL_DELAY
diff --git a/ee/lib/gitlab/llm/concerns/measured_request.rb b/ee/lib/gitlab/llm/concerns/measured_request.rb
deleted file mode 100644
index 6a92699a0bd42..0000000000000
--- a/ee/lib/gitlab/llm/concerns/measured_request.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-module Gitlab
-  module Llm
-    module Concerns
-      module MeasuredRequest
-        extend ActiveSupport::Concern
-
-        def increment_metric(client:, response: nil)
-          success = (200...299).cover?(response&.code)
-
-          Gitlab::Metrics::Sli::Apdex[:llm_client_request].increment(labels: { client: client }, success: success)
-        end
-      end
-    end
-  end
-end
diff --git a/ee/lib/gitlab/llm/open_ai/client.rb b/ee/lib/gitlab/llm/open_ai/client.rb
index ebd5b619eaec2..23bcbe9440c43 100644
--- a/ee/lib/gitlab/llm/open_ai/client.rb
+++ b/ee/lib/gitlab/llm/open_ai/client.rb
@@ -7,7 +7,6 @@ module Llm
     module OpenAi
       class Client
         include ::Gitlab::Llm::Concerns::ExponentialBackoff
-        include ::Gitlab::Llm::Concerns::MeasuredRequest
 
         InputModerationError = Class.new(StandardError)
         OutputModerationError = Class.new(StandardError)
@@ -119,16 +118,12 @@ def request(endpoint:, moderated:, **options)
           logger.debug(message: "Received response from OpenAI", response: response)
 
           track_cost(endpoint, response.parsed_response&.dig('usage'))
-          increment_metric(client: :open_ai, response: response)
 
           if should_moderate?(:output, moderated)
             moderate!(:output, moderation_output(endpoint, response.parsed_response))
           end
 
           response
-        rescue StandardError => e
-          increment_metric(client: :open_ai)
-          raise e
         end
 
         def track_cost(endpoint, usage_data)
diff --git a/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb b/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb
index b2a6d305445c8..0ce287a390ac6 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb
@@ -14,6 +14,8 @@ def execute(user, project, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService
               .new(user, project, response_modifier, options: response_options)
               .execute
+
+            response_modifier
           end
         end
       end
diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb
index a85ca524bc500..f4c89a32ad655 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb
@@ -14,6 +14,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           rescue StandardError => error
             Gitlab::ErrorTracking.track_exception(error)
 
@@ -24,6 +26,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb
index 2e9f2ecf6f52e..b790e7aa01cd6 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb
@@ -41,6 +41,8 @@ def execute(user, issuable, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, issuable, response_modifier, options: response_options
             ).execute
+
+            response_modifier
           end
         end
       end
diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb
index 7d26c9220a507..f103a71cbbe31 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb
@@ -14,6 +14,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           rescue StandardError => error
             Gitlab::ErrorTracking.track_exception(error)
 
@@ -24,6 +26,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
index 8a46c7073083e..231f64912b0ca 100644
--- a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
+++ b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb
@@ -18,6 +18,8 @@ def execute(user, merge_request, _ = {})
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: response_options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/client.rb b/ee/lib/gitlab/llm/vertex_ai/client.rb
index 057c9b937f310..e1b7ffa062ddf 100644
--- a/ee/lib/gitlab/llm/vertex_ai/client.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/client.rb
@@ -5,7 +5,6 @@ module Llm
     module VertexAi
       class Client
         include ::Gitlab::Llm::Concerns::ExponentialBackoff
-        include ::Gitlab::Llm::Concerns::MeasuredRequest
         extend ::Gitlab::Utils::Override
 
         def initialize(_user, client_options = {})
@@ -109,12 +108,8 @@ def request(content:, config:, **options)
           )
 
           logger.debug(message: "Received response from Vertex", response: response)
-          increment_metric(client: :vertex_ai, response: response)
 
           response
-        rescue StandardError => e
-          increment_metric(client: :vertex_ai)
-          raise e
         end
 
         def service_name
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb b/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb
index 43f939bc035c7..27357930cc05a 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb
@@ -28,12 +28,13 @@ def execute(user, job, _options)
             @job = job
 
             response = request
-            content = response&.deep_symbolize_keys&.dig(:predictions, 0, :content)
-
-            return unless content
+            response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response)
+            return unless response_modifier.response_body
 
             analysis = Ai::JobFailureAnalysis.new(@job)
-            analysis.save_content(content)
+            analysis.save_content(response_modifier.response_body)
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb b/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb
index 0937ab4ab6955..cdbe257d48237 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb
@@ -14,6 +14,8 @@ def execute(user, project, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService
               .new(user, project, response_modifier, options: response_options)
               .execute
+
+            response_modifier
           end
         end
       end
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb b/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb
index 45a9400736436..4a41be74a92be 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb
@@ -12,6 +12,8 @@ def execute(user, project, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, project, response_modifier, options: response_options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb b/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb
index 16142767b09e5..2030220aaaf75 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb
@@ -30,6 +30,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb b/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb
index f3c50ee93e85c..ba9464e4d46b5 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb
@@ -20,6 +20,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           rescue StandardError => error
             Gitlab::ErrorTracking.track_exception(error)
 
@@ -30,6 +32,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
index 5706bb0d9b9a3..f6f29fe634b55 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb
@@ -17,6 +17,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           rescue StandardError => error
             Gitlab::ErrorTracking.track_exception(error)
 
@@ -27,6 +29,8 @@ def execute(user, merge_request, options)
             ::Gitlab::Llm::GraphqlSubscriptionResponseService.new(
               user, merge_request, response_modifier, options: options
             ).execute
+
+            response_modifier
           end
 
           private
diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb
index b8f5d438bfd5f..ac18ae3b9858b 100644
--- a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb
@@ -14,8 +14,9 @@ def execute(user, merge_request, options)
 
             response = response_for(user, review)
             response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response)
-
             store_response(response_modifier, review, mr_diff)
+
+            response_modifier
           end
           # rubocop:enable CodeReuse/ActiveRecord
 
diff --git a/ee/lib/gitlab/metrics/llm.rb b/ee/lib/gitlab/metrics/llm.rb
index 119a4f87fda83..bef43c204d41c 100644
--- a/ee/lib/gitlab/metrics/llm.rb
+++ b/ee/lib/gitlab/metrics/llm.rb
@@ -4,16 +4,25 @@ module Gitlab
   module Metrics
     module Llm
       class << self
+        CLIENT_NAMES = {
+          'Gitlab::Llm::VertexAi::Client' => :vertex_ai,
+          'Gitlab::Llm::Anthropic::Client' => :anthropic,
+          'Gitlab::Llm::OpenAi::Client' => :open_ai
+        }.freeze
+
         def initialize_slis!
-          tool_labels = Gitlab::Llm::Completions::Chat::TOOLS.map { |tool_class| { tool: tool_class::Executor::NAME } }
-          tool_labels << { tool: :unknown }
+          completion_labels = Gitlab::Llm::CompletionsFactory::COMPLETIONS.values.map do |completion|
+            { feature_category: completion[:feature_category], service_class: completion[:service_class].name }
+          end
+
+          Gitlab::Metrics::Sli::ErrorRate.initialize_sli(:llm_completion, completion_labels)
+
+          client_labels = (CLIENT_NAMES.values + [:unknown]).map { |client| { client: client } }
+          Gitlab::Metrics::Sli::ErrorRate.initialize_sli(:llm_client_request, client_labels)
+        end
 
-          Gitlab::Metrics::Sli::Apdex.initialize_sli(:llm_chat_answers, tool_labels)
-          Gitlab::Metrics::Sli::Apdex.initialize_sli(:llm_client_request, [
-            { client: :anthropic },
-            { client: :vertex_ai },
-            { client: :open_ai }
-          ])
+        def client_label(cls)
+          CLIENT_NAMES.fetch(cls.name, :unknown)
         end
       end
     end
diff --git a/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb b/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb
index fa10ea0f4c2f4..b2ca617e5883f 100644
--- a/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb
@@ -42,6 +42,7 @@
   end
 
   let(:response_body) { expected_response.to_json }
+  let(:http_status) { 200 }
 
   before do
     stub_application_setting(anthropic_api_key: api_key)
@@ -51,7 +52,7 @@
         headers: expected_request_headers
       )
       .to_return(
-        status: 200,
+        status: http_status,
         body: response_body,
         headers: { 'Content-Type' => 'application/json' }
       )
@@ -70,7 +71,17 @@
           allow(Gitlab::HTTP).to receive(:post).and_raise(StandardError)
         end
 
-        it_behaves_like 'measured Llm request with error'
+        it_behaves_like 'measured Llm request with error', StandardError
+      end
+
+      context 'when request is retried' do
+        let(:http_status) { 429 }
+
+        before do
+          stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0)
+        end
+
+        it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError
       end
     end
 
diff --git a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
index 87512f96a3183..5f2852691a0d1 100644
--- a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb
@@ -61,9 +61,6 @@
         expect(instance).to receive(:execute).and_return(answer)
       end
 
-      expect(Gitlab::Metrics::Sli::Apdex[:llm_chat_answers])
-        .to receive(:increment)
-        .with(labels: { tool: "IssueIdentifier" }, success: true)
       expect(response_handler).to receive(:execute)
       expect(::Gitlab::Llm::ResponseService).to receive(:new).with(context, { request_id: 'uuid' })
         .and_return(response_handler)
@@ -127,7 +124,6 @@
           expect(instance).to receive(:execute).and_return(answer)
         end
 
-        allow(Gitlab::Metrics::Sli::Apdex[:llm_chat_answers]).to receive(:increment)
         allow(::Gitlab::Llm::Chain::GitlabContext).to receive(:new).and_return(context)
 
         subject
diff --git a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
index 843cb69cbf91b..18cdcf67244a7 100644
--- a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
+++ b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb
@@ -3,6 +3,18 @@
 require 'spec_helper'
 
 RSpec.describe Gitlab::Llm::CompletionsFactory, feature_category: :ai_abstraction_layer do
+  describe 'completion definitions' do
+    it 'has a valid :feature_category set', :aggregate_failures do
+      feature_categories = Gitlab::FeatureCategories.default.categories.map(&:to_sym).to_set
+
+      ::Gitlab::Llm::CompletionsFactory::COMPLETIONS.each do |action, completion|
+        expect(completion[:feature_category]).to be_a(Symbol)
+        expect(feature_categories)
+          .to(include(completion[:feature_category]), "expected #{action} to declare a valid feature_category")
+      end
+    end
+  end
+
   describe ".completion" do
     context 'with existing completion' do
       let(:completion_name) { :summarize_review }
diff --git a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
index 2d118a38aa9f3..fcfc37010922c 100644
--- a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
@@ -107,7 +107,7 @@
 
       counter = instance_double(Prometheus::Client::Counter, increment: true)
 
-      allow(Gitlab::Metrics::Sli::Apdex[:llm_client_request]).to receive(:increment)
+      allow(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request]).to receive(:increment)
 
       allow(Gitlab::Metrics)
         .to receive(:counter)
@@ -317,7 +317,18 @@
           end
         end
 
-        it_behaves_like 'measured Llm request with error'
+        it_behaves_like 'measured Llm request with error', StandardError
+      end
+
+      context 'when request is retried' do
+        let(:http_status) { 429 }
+
+        before do
+          stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0)
+          allow(response_double).to receive(:too_many_requests?).and_return(true)
+        end
+
+        it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError
       end
     end
   end
diff --git a/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb
index 814a92e31780d..5fdba89c74d1b 100644
--- a/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb
@@ -70,6 +70,30 @@
 
         generate_test_file
       end
+
+      context 'when an unexpected error is raised' do
+        let(:error) { StandardError.new("Error") }
+        let(:response_modifier) { double }
+        let(:response_service) { double }
+
+        before do
+          allow_next_instance_of(Gitlab::Llm::OpenAi::Client) do |client|
+            allow(client).to receive(:chat).and_raise(error)
+          end
+        end
+
+        it 'publishes a generic error to the graphql subscription' do
+          errors = { error: { message: 'An unexpected error has occurred.' } }
+
+          expect(Gitlab::ErrorTracking).to receive(:track_exception).with(error)
+          expect(::Gitlab::Llm::OpenAi::ResponseModifiers::Chat).to receive(:new)
+            .with(errors.to_json).and_return(response_modifier)
+          expect(::Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new).and_return(response_service)
+          expect(response_service).to receive(:execute)
+
+          generate_test_file
+        end
+      end
     end
   end
 end
diff --git a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
index 357e86583171b..b7453165ae4b3 100644
--- a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
@@ -210,11 +210,13 @@
       )
     end
 
+    let(:http_status) { 200 }
+
     subject { described_class.new(user).text(content: 'anything', **options) }
 
     before do
       allow(Gitlab::Llm::VertexAi::Configuration).to receive(:new).and_return(config)
-      stub_request(:post, url).to_return(status: 200, body: 'some response')
+      stub_request(:post, url).to_return(status: http_status, body: 'some response')
     end
 
     context 'when measuring request success' do
@@ -227,7 +229,17 @@
           allow(Gitlab::HTTP).to receive(:post).and_raise(StandardError)
         end
 
-        it_behaves_like 'measured Llm request with error'
+        it_behaves_like 'measured Llm request with error', StandardError
+      end
+
+      context 'when request is retried' do
+        let(:http_status) { 429 }
+
+        before do
+          stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0)
+        end
+
+        it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError
       end
     end
   end
diff --git a/ee/spec/lib/gitlab/metrics/llm_spec.rb b/ee/spec/lib/gitlab/metrics/llm_spec.rb
index 2cca385fe4190..4dea49cb4aa91 100644
--- a/ee/spec/lib/gitlab/metrics/llm_spec.rb
+++ b/ee/spec/lib/gitlab/metrics/llm_spec.rb
@@ -5,11 +5,11 @@
 RSpec.describe Gitlab::Metrics::Llm, feature_category: :ai_abstraction_layer do
   describe '#initialize_slis!' do
     it 'initializes Apdex SLIs for Llm' do
-      expect(Gitlab::Metrics::Sli::Apdex).to receive(:initialize_sli).with(
-        :llm_chat_answers,
+      expect(Gitlab::Metrics::Sli::ErrorRate).to receive(:initialize_sli).with(
+        :llm_completion,
         a_kind_of(Array)
       )
-      expect(Gitlab::Metrics::Sli::Apdex).to receive(:initialize_sli).with(
+      expect(Gitlab::Metrics::Sli::ErrorRate).to receive(:initialize_sli).with(
         :llm_client_request,
         a_kind_of(Array)
       )
@@ -17,4 +17,16 @@
       described_class.initialize_slis!
     end
   end
+
+  describe '#client_label' do
+    it 'returns client label for known clients class' do
+      expect(described_class.client_label(Gitlab::Llm::VertexAi::Client)).to eq(:vertex_ai)
+      expect(described_class.client_label(Gitlab::Llm::Anthropic::Client)).to eq(:anthropic)
+      expect(described_class.client_label(Gitlab::Llm::OpenAi::Client)).to eq(:open_ai)
+    end
+
+    it 'returns :unknwon for other classes' do
+      expect(described_class.client_label(Gitlab::Llm::Cache)).to eq(:unknown)
+    end
+  end
 end
diff --git a/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb
index f8ac223d67e30..77cacd25560e2 100644
--- a/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb
+++ b/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb
@@ -2,18 +2,18 @@
 
 RSpec.shared_examples 'measured Llm request' do
   it 'inrements llm_client_request counter' do
-    expect(Gitlab::Metrics::Sli::Apdex[:llm_client_request])
-      .to receive(:increment).with(labels: { client: client }, success: true)
+    expect(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request])
+      .to receive(:increment).with(labels: { client: client }, error: false)
 
     subject
   end
 end
 
-RSpec.shared_examples 'measured Llm request with error' do
+RSpec.shared_examples 'measured Llm request with error' do |error_cls|
   it 'inrements llm_client_request counter with success false' do
-    expect(Gitlab::Metrics::Sli::Apdex[:llm_client_request])
-      .to receive(:increment).with(labels: { client: client }, success: false)
+    expect(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request])
+      .to receive(:increment).with(labels: { client: client }, error: true)
 
-    expect { subject }.to raise_error(StandardError)
+    expect { subject }.to raise_error(error_cls)
   end
 end
diff --git a/ee/spec/workers/llm/completion_worker_spec.rb b/ee/spec/workers/llm/completion_worker_spec.rb
index c198eb3c06721..734e4ca05e47d 100644
--- a/ee/spec/workers/llm/completion_worker_spec.rb
+++ b/ee/spec/workers/llm/completion_worker_spec.rb
@@ -108,8 +108,23 @@
       context 'when issue type is not supported' do
         let(:resource_type) { 'invalid' }
 
-        it 'raises a NameError' do
-          expect { subject }.to raise_error(NameError, "uninitialized constant Invalid")
+        it 'raises a NameError and updates error rate' do
+          expect(Gitlab::Metrics::Sli::ErrorRate[:llm_completion])
+            .to receive(:increment)
+            .with(labels: {
+              feature_category: :ai_abstraction_layer,
+              service_class: 'Gitlab::Llm::Completions::SummarizeAllOpenNotes'
+            }, error: true)
+
+          expect { subject }.to raise_error(NameError, 'uninitialized constant Invalid')
+        end
+      end
+
+      context 'when invalid action_name is used' do
+        let(:ai_action_name) { :some_action }
+
+        it 'raises an exception' do
+          expect { subject }.to raise_error(NameError, 'completion class for action some_action not found')
         end
       end
     end
-- 
GitLab