From 95c65597e8847333f5e86b61bcf77c6ce28bbf68 Mon Sep 17 00:00:00 2001 From: Jan Provaznik <jprovaznik@gitlab.com> Date: Wed, 30 Aug 2023 17:23:01 +0000 Subject: [PATCH] Update AI client SLI * with this change success ratio of AI requests is measured outside of exponential backoff * llm_chat_answers SLI is replaced with more generic llm_completion which tracks error ratio of all AI actions --- ee/app/workers/llm/completion_worker.rb | 25 +++++++++++- ee/lib/gitlab/llm/anthropic/client.rb | 9 ----- .../llm/anthropic/completions/tanuki_bot.rb | 2 + ee/lib/gitlab/llm/completions/chat.rb | 7 +--- .../llm/completions/explain_vulnerability.rb | 10 ++++- .../completions/summarize_all_open_notes.rb | 2 + ee/lib/gitlab/llm/completions_factory.rb | 39 ++++++++++++------- .../llm/concerns/exponential_backoff.rb | 11 +++++- .../gitlab/llm/concerns/measured_request.rb | 17 -------- ee/lib/gitlab/llm/open_ai/client.rb | 5 --- .../llm/open_ai/completions/explain_code.rb | 2 + .../completions/generate_commit_message.rb | 4 ++ .../completions/generate_description.rb | 2 + .../open_ai/completions/generate_test_file.rb | 4 ++ .../open_ai/completions/summarize_review.rb | 2 + ee/lib/gitlab/llm/vertex_ai/client.rb | 5 --- .../completions/analyze_ci_job_failure.rb | 9 +++-- .../llm/vertex_ai/completions/explain_code.rb | 2 + .../fill_in_merge_request_template.rb | 2 + .../completions/generate_commit_message.rb | 2 + .../completions/generate_test_file.rb | 4 ++ .../vertex_ai/completions/summarize_review.rb | 4 ++ .../completions/summarize_submitted_review.rb | 3 +- ee/lib/gitlab/metrics/llm.rb | 25 ++++++++---- .../lib/gitlab/llm/anthropic/client_spec.rb | 15 ++++++- .../lib/gitlab/llm/completions/chat_spec.rb | 4 -- .../gitlab/llm/completions_factory_spec.rb | 12 ++++++ ee/spec/lib/gitlab/llm/open_ai/client_spec.rb | 15 ++++++- .../completions/generate_test_file_spec.rb | 24 ++++++++++++ .../lib/gitlab/llm/vertex_ai/client_spec.rb | 16 +++++++- ee/spec/lib/gitlab/metrics/llm_spec.rb | 18 +++++++-- .../llm/measured_request_shared_examples.rb | 12 +++--- ee/spec/workers/llm/completion_worker_spec.rb | 19 ++++++++- 33 files changed, 239 insertions(+), 93 deletions(-) delete mode 100644 ee/lib/gitlab/llm/concerns/measured_request.rb diff --git a/ee/app/workers/llm/completion_worker.rb b/ee/app/workers/llm/completion_worker.rb index 5dc7fc1dfcb93..b94905b0768fe 100644 --- a/ee/app/workers/llm/completion_worker.rb +++ b/ee/app/workers/llm/completion_worker.rb @@ -29,17 +29,38 @@ def perform(user_id, resource_id, resource_class, ai_action_name, options = {}) return if resource && !user.can?("read_#{resource.to_ability_name}", resource) options[:extra_resource] = ::Llm::ExtraResourceFinder.new(user, options.delete(:referer_url)).execute - params = options.extract!(:request_id, :internal_request, :cache_response, :client_subscription_id) logger.debug(message: "Params", params: params) + ai_completion = ::Gitlab::Llm::CompletionsFactory.completion(ai_action_name.to_sym, params) + raise NameError, "completion class for action #{ai_action_name} not found" unless ai_completion + logger.debug(message: "Getting Completion Service from factory", class_name: ai_completion.class.name) + response = ai_completion.execute(user, resource, options) + update_error_rate(ai_action_name, response) + rescue StandardError => err + update_error_rate(ai_action_name) - ai_completion.execute(user, resource, options) if ai_completion + raise err end private + def update_error_rate(ai_action_name, response = nil) + completion = ::Gitlab::Llm::CompletionsFactory::COMPLETIONS[ai_action_name.to_sym] + return unless completion + + success = response.try(:errors)&.empty? + + Gitlab::Metrics::Sli::ErrorRate[:llm_completion].increment( + labels: { + feature_category: completion[:feature_category], + service_class: completion[:service_class].name + }, + error: !success + ) + end + def logger @logger ||= Gitlab::Llm::Logger.build end diff --git a/ee/lib/gitlab/llm/anthropic/client.rb b/ee/lib/gitlab/llm/anthropic/client.rb index 018f1d5a7aaa4..e7cb7934162e2 100644 --- a/ee/lib/gitlab/llm/anthropic/client.rb +++ b/ee/lib/gitlab/llm/anthropic/client.rb @@ -5,7 +5,6 @@ module Llm module Anthropic class Client include ::Gitlab::Llm::Concerns::ExponentialBackoff - include ::Gitlab::Llm::Concerns::MeasuredRequest URL = 'https://api.anthropic.com' DEFAULT_MODEL = 'claude-2' @@ -23,9 +22,6 @@ def complete(prompt:, **options) # We do not allow to set `stream` because the separate `#stream` method should be used for streaming. # The reason is that streaming the response would not work with the exponential backoff mechanism. perform_completion_request(prompt: prompt, options: options.except(:stream)) - rescue StandardError => e - increment_metric(client: :anthropic) - raise e end def stream(prompt:, **options) @@ -40,9 +36,6 @@ def stream(prompt:, **options) end response_body - rescue StandardError => e - increment_metric(client: :anthropic) - raise e end private @@ -67,8 +60,6 @@ def perform_completion_request(prompt:, options:) logger.debug(message: "Received response from Anthropic", response: response) - increment_metric(client: :anthropic, response: response) - response end diff --git a/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb b/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb index 0383c82eab43d..f7f3c933739e9 100644 --- a/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb +++ b/ee/lib/gitlab/llm/anthropic/completions/tanuki_bot.rb @@ -21,6 +21,8 @@ def execute(user, resource, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, resource, response_modifier, options: response_options ).execute + + response_modifier end end end diff --git a/ee/lib/gitlab/llm/completions/chat.rb b/ee/lib/gitlab/llm/completions/chat.rb index 80f90b44be7ff..c47324975e82c 100644 --- a/ee/lib/gitlab/llm/completions/chat.rb +++ b/ee/lib/gitlab/llm/completions/chat.rb @@ -39,11 +39,6 @@ def execute(user, resource, options) stream_response_handler: stream_response_handler ).execute - Gitlab::Metrics::Sli::Apdex[:llm_chat_answers].increment( - labels: { tool: response.last_tool_name || :unknown }, - success: response.status == :ok - ) - response_modifier = Gitlab::Llm::Chain::ResponseModifier.new(response) context.tools_used.each do |tool| @@ -59,6 +54,8 @@ def execute(user, resource, options) end response_handler.execute(response: response_modifier) + + response_modifier end def tools(user) diff --git a/ee/lib/gitlab/llm/completions/explain_vulnerability.rb b/ee/lib/gitlab/llm/completions/explain_vulnerability.rb index 6f87b911860a6..66c2d3b1e026e 100644 --- a/ee/lib/gitlab/llm/completions/explain_vulnerability.rb +++ b/ee/lib/gitlab/llm/completions/explain_vulnerability.rb @@ -11,16 +11,22 @@ class ExplainVulnerability < Gitlab::Llm::Completions::Base def execute(user, vulnerability, options) response = response_for(user, vulnerability, options) + response_modifier = modify_response(response, vulnerability) + ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( + user, vulnerability, response_modifier, options: response_options + ).execute + + response_modifier rescue StandardError => error Gitlab::ErrorTracking.track_exception(error) response = formatted_error_response(error_message(error)) - ensure response_modifier = modify_response(response, vulnerability) - ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, vulnerability, response_modifier, options: response_options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb index 1008e85988a0e..c57d709f7c7bd 100644 --- a/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb +++ b/ee/lib/gitlab/llm/completions/summarize_all_open_notes.rb @@ -23,6 +23,8 @@ def execute(user, issuable, options = {}) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, issuable, response_modifier, options: response_options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/completions_factory.rb b/ee/lib/gitlab/llm/completions_factory.rb index 5d50cf79011fd..f0783bc34ee68 100644 --- a/ee/lib/gitlab/llm/completions_factory.rb +++ b/ee/lib/gitlab/llm/completions_factory.rb @@ -6,55 +6,68 @@ class CompletionsFactory COMPLETIONS = { explain_vulnerability: { service_class: ::Gitlab::Llm::Completions::ExplainVulnerability, - prompt_class: ::Gitlab::Llm::Templates::ExplainVulnerability + prompt_class: ::Gitlab::Llm::Templates::ExplainVulnerability, + feature_category: :vulnerability_management }, summarize_comments: { service_class: ::Gitlab::Llm::Completions::SummarizeAllOpenNotes, - prompt_class: nil + prompt_class: nil, + feature_category: :ai_abstraction_layer }, summarize_review: { service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeReview, - prompt_class: ::Gitlab::Llm::Templates::SummarizeReview + prompt_class: ::Gitlab::Llm::Templates::SummarizeReview, + feature_category: :ai_abstraction_layer }, explain_code: { service_class: ::Gitlab::Llm::VertexAi::Completions::ExplainCode, - prompt_class: ::Gitlab::Llm::VertexAi::Templates::ExplainCode + prompt_class: ::Gitlab::Llm::VertexAi::Templates::ExplainCode, + feature_category: :ai_abstraction_layer }, explain_code_open_ai: { service_class: ::Gitlab::Llm::OpenAi::Completions::ExplainCode, - prompt_class: ::Gitlab::Llm::OpenAi::Templates::ExplainCode + prompt_class: ::Gitlab::Llm::OpenAi::Templates::ExplainCode, + feature_category: :code_review_workflow }, tanuki_bot: { service_class: ::Gitlab::Llm::Anthropic::Completions::TanukiBot, - prompt_class: ::Gitlab::Llm::Anthropic::Templates::TanukiBot + prompt_class: ::Gitlab::Llm::Anthropic::Templates::TanukiBot, + feature_category: :ai_abstraction_layer }, generate_test_file: { service_class: ::Gitlab::Llm::VertexAi::Completions::GenerateTestFile, - prompt_class: ::Gitlab::Llm::Templates::GenerateTestFile + prompt_class: ::Gitlab::Llm::Templates::GenerateTestFile, + feature_category: :code_review_workflow }, generate_description: { service_class: ::Gitlab::Llm::OpenAi::Completions::GenerateDescription, - prompt_class: ::Gitlab::Llm::OpenAi::Templates::GenerateDescription + prompt_class: ::Gitlab::Llm::OpenAi::Templates::GenerateDescription, + feature_category: :ai_abstraction_layer }, generate_commit_message: { service_class: ::Gitlab::Llm::VertexAi::Completions::GenerateCommitMessage, - prompt_class: ::Gitlab::Llm::Templates::GenerateCommitMessage + prompt_class: ::Gitlab::Llm::Templates::GenerateCommitMessage, + feature_category: :code_review_workflow }, analyze_ci_job_failure: { service_class: Gitlab::Llm::VertexAi::Completions::AnalyzeCiJobFailure, - prompt_class: nil + prompt_class: nil, + feature_category: :continuous_integration }, chat: { service_class: ::Gitlab::Llm::Completions::Chat, - prompt_class: nil + prompt_class: nil, + feature_category: :duo_chat }, fill_in_merge_request_template: { service_class: ::Gitlab::Llm::VertexAi::Completions::FillInMergeRequestTemplate, - prompt_class: ::Gitlab::Llm::Templates::FillInMergeRequestTemplate + prompt_class: ::Gitlab::Llm::Templates::FillInMergeRequestTemplate, + feature_category: :code_review_workflow }, summarize_submitted_review: { service_class: ::Gitlab::Llm::VertexAi::Completions::SummarizeSubmittedReview, - prompt_class: ::Gitlab::Llm::Templates::SummarizeSubmittedReview + prompt_class: ::Gitlab::Llm::Templates::SummarizeSubmittedReview, + feature_category: :code_review_workflow } }.freeze diff --git a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb index 66dc650e90221..4536476779894 100644 --- a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb +++ b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb @@ -23,7 +23,7 @@ def retry_methods_with_exponential_backoff(*method_names) define_method(method_name) do |*args, **kwargs| run_with_circuit do - retry_with_exponential_backoff do + retry_with_monitored_exponential_backoff do original_method.bind_call(self, *args, **kwargs) end end @@ -33,6 +33,15 @@ def retry_methods_with_exponential_backoff(*method_names) private + def retry_with_monitored_exponential_backoff(&block) + response = retry_with_exponential_backoff(&block) + ensure + success = (200...299).cover?(response&.code) + client = Gitlab::Metrics::Llm.client_label(self.class) + + Gitlab::Metrics::Sli::ErrorRate[:llm_client_request].increment(labels: { client: client }, error: !success) + end + def retry_with_exponential_backoff retries = 0 delay = INITIAL_DELAY diff --git a/ee/lib/gitlab/llm/concerns/measured_request.rb b/ee/lib/gitlab/llm/concerns/measured_request.rb deleted file mode 100644 index 6a92699a0bd42..0000000000000 --- a/ee/lib/gitlab/llm/concerns/measured_request.rb +++ /dev/null @@ -1,17 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Llm - module Concerns - module MeasuredRequest - extend ActiveSupport::Concern - - def increment_metric(client:, response: nil) - success = (200...299).cover?(response&.code) - - Gitlab::Metrics::Sli::Apdex[:llm_client_request].increment(labels: { client: client }, success: success) - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/open_ai/client.rb b/ee/lib/gitlab/llm/open_ai/client.rb index ebd5b619eaec2..23bcbe9440c43 100644 --- a/ee/lib/gitlab/llm/open_ai/client.rb +++ b/ee/lib/gitlab/llm/open_ai/client.rb @@ -7,7 +7,6 @@ module Llm module OpenAi class Client include ::Gitlab::Llm::Concerns::ExponentialBackoff - include ::Gitlab::Llm::Concerns::MeasuredRequest InputModerationError = Class.new(StandardError) OutputModerationError = Class.new(StandardError) @@ -119,16 +118,12 @@ def request(endpoint:, moderated:, **options) logger.debug(message: "Received response from OpenAI", response: response) track_cost(endpoint, response.parsed_response&.dig('usage')) - increment_metric(client: :open_ai, response: response) if should_moderate?(:output, moderated) moderate!(:output, moderation_output(endpoint, response.parsed_response)) end response - rescue StandardError => e - increment_metric(client: :open_ai) - raise e end def track_cost(endpoint, usage_data) diff --git a/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb b/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb index b2a6d305445c8..0ce287a390ac6 100644 --- a/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb +++ b/ee/lib/gitlab/llm/open_ai/completions/explain_code.rb @@ -14,6 +14,8 @@ def execute(user, project, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService .new(user, project, response_modifier, options: response_options) .execute + + response_modifier end end end diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb index a85ca524bc500..f4c89a32ad655 100644 --- a/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb +++ b/ee/lib/gitlab/llm/open_ai/completions/generate_commit_message.rb @@ -14,6 +14,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier rescue StandardError => error Gitlab::ErrorTracking.track_exception(error) @@ -24,6 +26,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb index 2e9f2ecf6f52e..b790e7aa01cd6 100644 --- a/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb +++ b/ee/lib/gitlab/llm/open_ai/completions/generate_description.rb @@ -41,6 +41,8 @@ def execute(user, issuable, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, issuable, response_modifier, options: response_options ).execute + + response_modifier end end end diff --git a/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb b/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb index 7d26c9220a507..f103a71cbbe31 100644 --- a/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb +++ b/ee/lib/gitlab/llm/open_ai/completions/generate_test_file.rb @@ -14,6 +14,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier rescue StandardError => error Gitlab::ErrorTracking.track_exception(error) @@ -24,6 +26,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb index 8a46c7073083e..231f64912b0ca 100644 --- a/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb +++ b/ee/lib/gitlab/llm/open_ai/completions/summarize_review.rb @@ -18,6 +18,8 @@ def execute(user, merge_request, _ = {}) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: response_options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/client.rb b/ee/lib/gitlab/llm/vertex_ai/client.rb index 057c9b937f310..e1b7ffa062ddf 100644 --- a/ee/lib/gitlab/llm/vertex_ai/client.rb +++ b/ee/lib/gitlab/llm/vertex_ai/client.rb @@ -5,7 +5,6 @@ module Llm module VertexAi class Client include ::Gitlab::Llm::Concerns::ExponentialBackoff - include ::Gitlab::Llm::Concerns::MeasuredRequest extend ::Gitlab::Utils::Override def initialize(_user, client_options = {}) @@ -109,12 +108,8 @@ def request(content:, config:, **options) ) logger.debug(message: "Received response from Vertex", response: response) - increment_metric(client: :vertex_ai, response: response) response - rescue StandardError => e - increment_metric(client: :vertex_ai) - raise e end def service_name diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb b/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb index 43f939bc035c7..27357930cc05a 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/analyze_ci_job_failure.rb @@ -28,12 +28,13 @@ def execute(user, job, _options) @job = job response = request - content = response&.deep_symbolize_keys&.dig(:predictions, 0, :content) - - return unless content + response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response) + return unless response_modifier.response_body analysis = Ai::JobFailureAnalysis.new(@job) - analysis.save_content(content) + analysis.save_content(response_modifier.response_body) + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb b/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb index 0937ab4ab6955..cdbe257d48237 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/explain_code.rb @@ -14,6 +14,8 @@ def execute(user, project, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService .new(user, project, response_modifier, options: response_options) .execute + + response_modifier end end end diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb b/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb index 45a9400736436..4a41be74a92be 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/fill_in_merge_request_template.rb @@ -12,6 +12,8 @@ def execute(user, project, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, project, response_modifier, options: response_options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb b/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb index 16142767b09e5..2030220aaaf75 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/generate_commit_message.rb @@ -30,6 +30,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb b/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb index f3c50ee93e85c..ba9464e4d46b5 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/generate_test_file.rb @@ -20,6 +20,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier rescue StandardError => error Gitlab::ErrorTracking.track_exception(error) @@ -30,6 +32,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb index 5706bb0d9b9a3..f6f29fe634b55 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_review.rb @@ -17,6 +17,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier rescue StandardError => error Gitlab::ErrorTracking.track_exception(error) @@ -27,6 +29,8 @@ def execute(user, merge_request, options) ::Gitlab::Llm::GraphqlSubscriptionResponseService.new( user, merge_request, response_modifier, options: options ).execute + + response_modifier end private diff --git a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb index b8f5d438bfd5f..ac18ae3b9858b 100644 --- a/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb +++ b/ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb @@ -14,8 +14,9 @@ def execute(user, merge_request, options) response = response_for(user, review) response_modifier = ::Gitlab::Llm::VertexAi::ResponseModifiers::Predictions.new(response) - store_response(response_modifier, review, mr_diff) + + response_modifier end # rubocop:enable CodeReuse/ActiveRecord diff --git a/ee/lib/gitlab/metrics/llm.rb b/ee/lib/gitlab/metrics/llm.rb index 119a4f87fda83..bef43c204d41c 100644 --- a/ee/lib/gitlab/metrics/llm.rb +++ b/ee/lib/gitlab/metrics/llm.rb @@ -4,16 +4,25 @@ module Gitlab module Metrics module Llm class << self + CLIENT_NAMES = { + 'Gitlab::Llm::VertexAi::Client' => :vertex_ai, + 'Gitlab::Llm::Anthropic::Client' => :anthropic, + 'Gitlab::Llm::OpenAi::Client' => :open_ai + }.freeze + def initialize_slis! - tool_labels = Gitlab::Llm::Completions::Chat::TOOLS.map { |tool_class| { tool: tool_class::Executor::NAME } } - tool_labels << { tool: :unknown } + completion_labels = Gitlab::Llm::CompletionsFactory::COMPLETIONS.values.map do |completion| + { feature_category: completion[:feature_category], service_class: completion[:service_class].name } + end + + Gitlab::Metrics::Sli::ErrorRate.initialize_sli(:llm_completion, completion_labels) + + client_labels = (CLIENT_NAMES.values + [:unknown]).map { |client| { client: client } } + Gitlab::Metrics::Sli::ErrorRate.initialize_sli(:llm_client_request, client_labels) + end - Gitlab::Metrics::Sli::Apdex.initialize_sli(:llm_chat_answers, tool_labels) - Gitlab::Metrics::Sli::Apdex.initialize_sli(:llm_client_request, [ - { client: :anthropic }, - { client: :vertex_ai }, - { client: :open_ai } - ]) + def client_label(cls) + CLIENT_NAMES.fetch(cls.name, :unknown) end end end diff --git a/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb b/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb index fa10ea0f4c2f4..b2ca617e5883f 100644 --- a/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb +++ b/ee/spec/lib/gitlab/llm/anthropic/client_spec.rb @@ -42,6 +42,7 @@ end let(:response_body) { expected_response.to_json } + let(:http_status) { 200 } before do stub_application_setting(anthropic_api_key: api_key) @@ -51,7 +52,7 @@ headers: expected_request_headers ) .to_return( - status: 200, + status: http_status, body: response_body, headers: { 'Content-Type' => 'application/json' } ) @@ -70,7 +71,17 @@ allow(Gitlab::HTTP).to receive(:post).and_raise(StandardError) end - it_behaves_like 'measured Llm request with error' + it_behaves_like 'measured Llm request with error', StandardError + end + + context 'when request is retried' do + let(:http_status) { 429 } + + before do + stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0) + end + + it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError end end diff --git a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb index 87512f96a3183..5f2852691a0d1 100644 --- a/ee/spec/lib/gitlab/llm/completions/chat_spec.rb +++ b/ee/spec/lib/gitlab/llm/completions/chat_spec.rb @@ -61,9 +61,6 @@ expect(instance).to receive(:execute).and_return(answer) end - expect(Gitlab::Metrics::Sli::Apdex[:llm_chat_answers]) - .to receive(:increment) - .with(labels: { tool: "IssueIdentifier" }, success: true) expect(response_handler).to receive(:execute) expect(::Gitlab::Llm::ResponseService).to receive(:new).with(context, { request_id: 'uuid' }) .and_return(response_handler) @@ -127,7 +124,6 @@ expect(instance).to receive(:execute).and_return(answer) end - allow(Gitlab::Metrics::Sli::Apdex[:llm_chat_answers]).to receive(:increment) allow(::Gitlab::Llm::Chain::GitlabContext).to receive(:new).and_return(context) subject diff --git a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb index 843cb69cbf91b..18cdcf67244a7 100644 --- a/ee/spec/lib/gitlab/llm/completions_factory_spec.rb +++ b/ee/spec/lib/gitlab/llm/completions_factory_spec.rb @@ -3,6 +3,18 @@ require 'spec_helper' RSpec.describe Gitlab::Llm::CompletionsFactory, feature_category: :ai_abstraction_layer do + describe 'completion definitions' do + it 'has a valid :feature_category set', :aggregate_failures do + feature_categories = Gitlab::FeatureCategories.default.categories.map(&:to_sym).to_set + + ::Gitlab::Llm::CompletionsFactory::COMPLETIONS.each do |action, completion| + expect(completion[:feature_category]).to be_a(Symbol) + expect(feature_categories) + .to(include(completion[:feature_category]), "expected #{action} to declare a valid feature_category") + end + end + end + describe ".completion" do context 'with existing completion' do let(:completion_name) { :summarize_review } diff --git a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb index 2d118a38aa9f3..fcfc37010922c 100644 --- a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb +++ b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb @@ -107,7 +107,7 @@ counter = instance_double(Prometheus::Client::Counter, increment: true) - allow(Gitlab::Metrics::Sli::Apdex[:llm_client_request]).to receive(:increment) + allow(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request]).to receive(:increment) allow(Gitlab::Metrics) .to receive(:counter) @@ -317,7 +317,18 @@ end end - it_behaves_like 'measured Llm request with error' + it_behaves_like 'measured Llm request with error', StandardError + end + + context 'when request is retried' do + let(:http_status) { 429 } + + before do + stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0) + allow(response_double).to receive(:too_many_requests?).and_return(true) + end + + it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError end end end diff --git a/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb index 814a92e31780d..5fdba89c74d1b 100644 --- a/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb +++ b/ee/spec/lib/gitlab/llm/open_ai/completions/generate_test_file_spec.rb @@ -70,6 +70,30 @@ generate_test_file end + + context 'when an unexpected error is raised' do + let(:error) { StandardError.new("Error") } + let(:response_modifier) { double } + let(:response_service) { double } + + before do + allow_next_instance_of(Gitlab::Llm::OpenAi::Client) do |client| + allow(client).to receive(:chat).and_raise(error) + end + end + + it 'publishes a generic error to the graphql subscription' do + errors = { error: { message: 'An unexpected error has occurred.' } } + + expect(Gitlab::ErrorTracking).to receive(:track_exception).with(error) + expect(::Gitlab::Llm::OpenAi::ResponseModifiers::Chat).to receive(:new) + .with(errors.to_json).and_return(response_modifier) + expect(::Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new).and_return(response_service) + expect(response_service).to receive(:execute) + + generate_test_file + end + end end end end diff --git a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb index 357e86583171b..b7453165ae4b3 100644 --- a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb +++ b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb @@ -210,11 +210,13 @@ ) end + let(:http_status) { 200 } + subject { described_class.new(user).text(content: 'anything', **options) } before do allow(Gitlab::Llm::VertexAi::Configuration).to receive(:new).and_return(config) - stub_request(:post, url).to_return(status: 200, body: 'some response') + stub_request(:post, url).to_return(status: http_status, body: 'some response') end context 'when measuring request success' do @@ -227,7 +229,17 @@ allow(Gitlab::HTTP).to receive(:post).and_raise(StandardError) end - it_behaves_like 'measured Llm request with error' + it_behaves_like 'measured Llm request with error', StandardError + end + + context 'when request is retried' do + let(:http_status) { 429 } + + before do + stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0) + end + + it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError end end end diff --git a/ee/spec/lib/gitlab/metrics/llm_spec.rb b/ee/spec/lib/gitlab/metrics/llm_spec.rb index 2cca385fe4190..4dea49cb4aa91 100644 --- a/ee/spec/lib/gitlab/metrics/llm_spec.rb +++ b/ee/spec/lib/gitlab/metrics/llm_spec.rb @@ -5,11 +5,11 @@ RSpec.describe Gitlab::Metrics::Llm, feature_category: :ai_abstraction_layer do describe '#initialize_slis!' do it 'initializes Apdex SLIs for Llm' do - expect(Gitlab::Metrics::Sli::Apdex).to receive(:initialize_sli).with( - :llm_chat_answers, + expect(Gitlab::Metrics::Sli::ErrorRate).to receive(:initialize_sli).with( + :llm_completion, a_kind_of(Array) ) - expect(Gitlab::Metrics::Sli::Apdex).to receive(:initialize_sli).with( + expect(Gitlab::Metrics::Sli::ErrorRate).to receive(:initialize_sli).with( :llm_client_request, a_kind_of(Array) ) @@ -17,4 +17,16 @@ described_class.initialize_slis! end end + + describe '#client_label' do + it 'returns client label for known clients class' do + expect(described_class.client_label(Gitlab::Llm::VertexAi::Client)).to eq(:vertex_ai) + expect(described_class.client_label(Gitlab::Llm::Anthropic::Client)).to eq(:anthropic) + expect(described_class.client_label(Gitlab::Llm::OpenAi::Client)).to eq(:open_ai) + end + + it 'returns :unknwon for other classes' do + expect(described_class.client_label(Gitlab::Llm::Cache)).to eq(:unknown) + end + end end diff --git a/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb index f8ac223d67e30..77cacd25560e2 100644 --- a/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb +++ b/ee/spec/support/shared_examples/lib/gitlab/llm/measured_request_shared_examples.rb @@ -2,18 +2,18 @@ RSpec.shared_examples 'measured Llm request' do it 'inrements llm_client_request counter' do - expect(Gitlab::Metrics::Sli::Apdex[:llm_client_request]) - .to receive(:increment).with(labels: { client: client }, success: true) + expect(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request]) + .to receive(:increment).with(labels: { client: client }, error: false) subject end end -RSpec.shared_examples 'measured Llm request with error' do +RSpec.shared_examples 'measured Llm request with error' do |error_cls| it 'inrements llm_client_request counter with success false' do - expect(Gitlab::Metrics::Sli::Apdex[:llm_client_request]) - .to receive(:increment).with(labels: { client: client }, success: false) + expect(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request]) + .to receive(:increment).with(labels: { client: client }, error: true) - expect { subject }.to raise_error(StandardError) + expect { subject }.to raise_error(error_cls) end end diff --git a/ee/spec/workers/llm/completion_worker_spec.rb b/ee/spec/workers/llm/completion_worker_spec.rb index c198eb3c06721..734e4ca05e47d 100644 --- a/ee/spec/workers/llm/completion_worker_spec.rb +++ b/ee/spec/workers/llm/completion_worker_spec.rb @@ -108,8 +108,23 @@ context 'when issue type is not supported' do let(:resource_type) { 'invalid' } - it 'raises a NameError' do - expect { subject }.to raise_error(NameError, "uninitialized constant Invalid") + it 'raises a NameError and updates error rate' do + expect(Gitlab::Metrics::Sli::ErrorRate[:llm_completion]) + .to receive(:increment) + .with(labels: { + feature_category: :ai_abstraction_layer, + service_class: 'Gitlab::Llm::Completions::SummarizeAllOpenNotes' + }, error: true) + + expect { subject }.to raise_error(NameError, 'uninitialized constant Invalid') + end + end + + context 'when invalid action_name is used' do + let(:ai_action_name) { :some_action } + + it 'raises an exception' do + expect { subject }.to raise_error(NameError, 'completion class for action some_action not found') end end end -- GitLab