diff --git a/ee/lib/gitlab/llm/ai_gateway/client.rb b/ee/lib/gitlab/llm/ai_gateway/client.rb index 4057cefe03a193e624bd742fd3bce26eb5868c27..2b463ee790f3a17bcb400968ab9e01f692d5241b 100644 --- a/ee/lib/gitlab/llm/ai_gateway/client.rb +++ b/ee/lib/gitlab/llm/ai_gateway/client.rb @@ -27,7 +27,7 @@ def complete(url:, body:, timeout: DEFAULT_TIMEOUT) perform_completion_request(url: url, body: body, timeout: timeout, stream: false) end - log_response_received(response.parsed_response) + log_response_received(response.parsed_response) if response&.success? response end diff --git a/ee/lib/gitlab/llm/ai_gateway/completions/base.rb b/ee/lib/gitlab/llm/ai_gateway/completions/base.rb index ba33a26784e628ad4dcfc4251fa90000d4654f1d..50dc8f519a785fd2abc118b65ee23b1d350ccba2 100644 --- a/ee/lib/gitlab/llm/ai_gateway/completions/base.rb +++ b/ee/lib/gitlab/llm/ai_gateway/completions/base.rb @@ -45,7 +45,10 @@ def request! body: { 'inputs' => inputs } ) - Gitlab::Json.parse(response.body) + return if response&.body.blank? + return Gitlab::Json.parse(response.body) if response&.success? + + { 'detail' => DEFAULT_ERROR } rescue ArgumentError => e { 'detail' => e.message } rescue StandardError => e diff --git a/ee/lib/gitlab/llm/anthropic/client.rb b/ee/lib/gitlab/llm/anthropic/client.rb index 46c50d15354a9fdbdaeeef31662e936b6ebf4518..9e81529a276771cb3c8b094466c175bfe82b80e0 100644 --- a/ee/lib/gitlab/llm/anthropic/client.rb +++ b/ee/lib/gitlab/llm/anthropic/client.rb @@ -29,6 +29,12 @@ def complete(prompt:, **options) perform_completion_request(prompt: prompt, options: options.except(:stream)) end + # retry_with_exponential_backoff will return nil if a 5xx server request is returned. + # If the client returns a 4xx response, we still want to return the response + # in case the caller wants to parse the errors: https://docs.anthropic.com/en/api/errors + return unless response&.parsed_response + return response unless response.success? + response_completion = response["completion"] log_response_received(response_completion, 'completion') @@ -67,7 +73,13 @@ def messages_complete(messages:, **options) perform_messages_request(messages: messages, options: options.except(:stream)) end - response_completion = response&.dig('content', 0, 'text') + # retry_with_exponential_backoff will return nil if a 5xx server request is returned. + # If the client returns a 4xx response, we still want to return the response + # in case the caller wants to parse the errors: https://docs.anthropic.com/en/api/errors + return unless response&.parsed_response + return response unless response.success? + + response_completion = response.dig('content', 0, 'text') log_response_received(response_completion, 'messages') track_prompt_size(token_size(messages)) diff --git a/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb b/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb index 51626369f2b8e527e6ce8d78a105af7da3287c2a..bc3ee4efbb137580b3a4e887a7b04658b0200dde 100644 --- a/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb +++ b/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb @@ -29,9 +29,11 @@ def execute(&) def get_search_results(question) response = Gitlab::Llm::AiGateway::DocsClient.new(current_user) - .search(query: question) || {} + .search(query: question) - response.dig('response', 'results')&.map(&:with_indifferent_access) + return {} unless response&.success? && response.parsed_response + + response.parsed_response.dig('response', 'results')&.map(&:with_indifferent_access) end private @@ -53,6 +55,11 @@ def get_completions_ai_gateway(search_documents) yield data if block_given? end + unless final_prompt_result&.success? && final_prompt_result.parsed_response + final_prompt_error = "Error retrieving completions with final prompt: #{final_prompt_result}" + raise Gitlab::Llm::AiGateway::Client::ConnectionError, final_prompt_error + end + log_conditional_info(current_user, message: "Got Final Result for documentation question content", event_name: 'response_received', diff --git a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb index 447b4ce48dbaba55c4e5d4bbf8577f3ba007c05a..1ecf4d53a5b8891c07fd2569794c9862a25b3148 100644 --- a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb +++ b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb @@ -42,8 +42,7 @@ def run_retry_with_exponential_backoff return unless response.present? - http_response = response.response - return if http_response.nil? || http_response.body.blank? + return if response.response.nil? raise Gitlab::CircuitBreaker::InternalServerError if response.server_error? diff --git a/ee/lib/gitlab/llm/vertex_ai/client.rb b/ee/lib/gitlab/llm/vertex_ai/client.rb index eeed9ce18b31539075649eee4f00185eb252e499..21c3ee549cf773a02ff7920e99121fb4696cd57a 100644 --- a/ee/lib/gitlab/llm/vertex_ai/client.rb +++ b/ee/lib/gitlab/llm/vertex_ai/client.rb @@ -141,14 +141,26 @@ def request(content:, config:, **options) ai_component: 'abstraction_layer', unit_primitive: unit_primitive) - if response.present? - track_token_usage(response) - else + # 500 errors will return as nil. This preserves the previous behavior + # of returning nil if a blank body is detected. + if response.nil? || response&.body.blank? log_error(message: "Empty response from Vertex", event_name: 'empty_response_received', ai_component: 'abstraction_layer', unit_primitive: unit_primitive ) + return + end + + if response.success? && response.parsed_response.present? + track_token_usage(response) + else + log_error(message: "Failed response from Vertex", + event_name: 'failed_response_received', + ai_component: 'abstraction_layer', + unit_primitive: unit_primitive, + status: response + ) end response diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb index c05a386c5bed84061660cd7b1d54606b6806a734..4b96dd37a72e8c6e2ed4438249a8ca54081203a8 100644 --- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb +++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb @@ -9,8 +9,8 @@ let(:prompt_message) { build(:ai_message, ai_action: ai_action, user: user, resource: resource) } let(:inputs) { { prompt: "What's your name?" } } let(:response) { "I'm Duo" } - let(:http_response) { instance_double(HTTParty::Response, body: %("#{response}")) } - let(:processed_repsonse) { response } + let(:http_response) { instance_double(HTTParty::Response, body: %("#{response}"), success?: true) } + let(:processed_response) { response } let(:response_modifier_class) { Gitlab::Llm::AiGateway::ResponseModifiers::Base } let(:response_modifier) { instance_double(Gitlab::Llm::AiGateway::ResponseModifiers::Base) } let(:response_service) { instance_double(Gitlab::Llm::GraphqlSubscriptionResponseService) } @@ -56,7 +56,7 @@ .and_return(http_response) end - expect(response_modifier_class).to receive(:new).with(processed_repsonse) + expect(response_modifier_class).to receive(:new).with(processed_response) .and_return(response_modifier) expect(Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new) .with(user, resource, response_modifier, options: response_options).and_return(response_service) @@ -82,19 +82,19 @@ context 'when the subclass raises an ArgumentError when gathering inputs' do let(:http_response) { nil } - let(:processed_repsonse) { { 'detail' => 'Something went wrong.' } } + let(:processed_response) { { 'detail' => 'Something went wrong.' } } before do subclass.define_method(:inputs) { raise ArgumentError, 'Something went wrong.' } end # Note: The completion "executes successfully" in that it relays the error to the user via GraphQL, which we check - # by changing the `let(:processed_repsonse)` in this context + # by changing the `let(:processed_response)` in this context it_behaves_like 'executing successfully' end context 'when an unexpected error is raised' do - let(:processed_repsonse) { { 'detail' => 'An unexpected error has occurred.' } } + let(:processed_response) { { 'detail' => 'An unexpected error has occurred.' } } before do allow(Gitlab::Json).to receive(:parse).and_raise(StandardError) @@ -104,7 +104,7 @@ end context 'when the subclass overrides the post_process method' do - let(:processed_repsonse) { response.upcase } + let(:processed_response) { response.upcase } before do subclass.define_method(:post_process) { |response| response.upcase } diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb index 6b3c73b972dcf63b76ceba61f23fa5119434f261..b8ea746e688dae7ac66b8d92160bcbc6de0ddaa8 100644 --- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb +++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb @@ -11,7 +11,7 @@ let(:ai_options) { { content: content, description_template_name: description_template_name } } let(:template_class) { ::Gitlab::Llm::Templates::GenerateDescription } let(:ai_client) { instance_double(Gitlab::Llm::AiGateway::Client) } - let(:ai_response) { instance_double(HTTParty::Response, body: %("Success")) } + let(:ai_response) { instance_double(HTTParty::Response, body: %("Success"), success?: true) } let(:uuid) { SecureRandom.uuid } let(:prompt_message) do build(:ai_message, :generate_description, user: user, resource: issuable, content: content, request_id: uuid) @@ -43,6 +43,26 @@ expect(generate_description[:ai_message].content).to eq("Success") end + + context 'with an unsuccessful request' do + let(:ai_response) { instance_double(HTTParty::Response, body: %("Failed"), success?: false) } + + it 'returns an error' do + expect(Gitlab::Llm::AiGateway::Client).to receive(:new).with( + user, + service_name: :generate_description, + tracking_context: tracking_context + ) + expect(ai_client).to receive(:complete).with( + url: "#{Gitlab::AiGateway.url}/v1/prompts/generate_description", + body: { 'inputs' => { content: content, template: expected_template } } + ).and_return(ai_response) + + expect(::Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new).and_call_original + + expect(generate_description[:ai_message].content).to eq({ "detail" => "An unexpected error has occurred." }) + end + end end describe "#execute" do diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb index e9b6ed502c18524fe4814b57148a8fdb970ca5d6..2de5da214ea323a2fa51411aef27d796efc61bee 100644 --- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb +++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb @@ -36,7 +36,7 @@ end let(:example_answer) { { "response" => "AI generated review summary" } } - let(:example_response) { instance_double(HTTParty::Response, body: example_answer.to_json) } + let(:example_response) { instance_double(HTTParty::Response, body: example_answer.to_json, success?: true) } shared_examples_for 'summarize review' do it 'publishes the content from the AI response' do diff --git a/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb index b78f4ce089b50db0f0186616bb5c3128945e82ac..7e1564675ab08a53e95a3c017ead1ed16d36e730 100644 --- a/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb @@ -16,12 +16,17 @@ let(:instance) { described_class.new(current_user: user, question: question) } let(:ai_gateway_request) { ::Gitlab::Llm::Chain::Requests::AiGateway.new(user) } let(:attrs) { embeddings.pluck(:id).map { |x| "CNT-IDX-#{x}" }.join(", ") } - let(:completion_response) { { 'response' => "#{answer} ATTRS: #{attrs}" } } + let(:completion_body) { { 'response' => "#{answer} ATTRS: #{attrs}" } } + let(:completion_response) do + instance_double(HTTParty::Response, code: 200, success?: true, body: completion_body.to_json, + parsed_response: completion_body) + end + let(:model) { ::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET } let(:docs_search_client) { ::Gitlab::Llm::AiGateway::DocsClient.new(user) } let(:docs_search_args) { { query: question } } - let(:docs_search_response) do + let(:docs_search_body) do { 'response' => { 'results' => [ @@ -35,6 +40,12 @@ } end + let(:docs_search_body_json) { docs_search_body.to_json } + let(:docs_search_response) do + instance_double(HTTParty::Response, success?: true, code: 200, body: docs_search_body_json, + parsed_response: docs_search_body) + end + describe '#execute' do subject(:execute) { instance.execute } @@ -87,6 +98,18 @@ execute end + it 'raises an error when final prompt request fails' do + expect(ai_gateway_request).to receive(:request) + .with({ prompt: instance_of(Array), + options: { model: model, max_tokens: 256 } }) + .once.and_return(nil) + expect(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response) + + expect(logger).to receive(:error).with(a_hash_including(message: "Streaming error", error: anything)) + + execute + end + context 'when user has AI features disabled' do before do allow(::Gitlab::Llm::TanukiBot).to receive(:enabled_for?).with(user: user).and_return(false) @@ -106,7 +129,7 @@ end context 'when no documents are found' do - let(:docs_search_response) { {} } + let(:docs_search_body) { {} } it 'returns an empty response message' do expect(execute.response_body).to eq(empty_response_message) @@ -114,7 +137,8 @@ end context 'when DocsClient returns nil' do - let(:docs_search_response) { nil } + let(:docs_search_body) { nil } + let(:docs_search_body_json) { nil } it 'returns an empty response message' do expect(execute.response_body).to eq(empty_response_message) diff --git a/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb b/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb index e4c660a9920654610d6f1e2fdf892eea004c99f5..3bb30d662ae20d33937fc775a6eb1e15d6684062 100644 --- a/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb +++ b/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb @@ -30,6 +30,14 @@ instance_double(HTTParty::Response, response: nil) end + let(:no_content_response) do + instance_double(HTTParty::Response, + code: 204, + response: Net::HTTPNoContent.new("1.1", "204", "No Content"), + server_error?: false, too_many_requests?: false + ) + end + let(:server_error) do instance_double(HTTParty::Response, code: 503, success?: false, parsed_response: body, @@ -186,6 +194,15 @@ def dummy_method(response_caller) end end + context 'when the function response is no content' do + it 'does not retry the function' do + allow(response_caller).to receive(:call).and_return(no_content_response) + + expect(subject).to eq(no_content_response) + expect(response_caller).to have_received(:call).once + end + end + context 'when the function response is a server error' do it 'returns a nil response' do allow(response_caller).to receive(:call).and_return(server_error) diff --git a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb index 8c755d776e0cfbbcaa7a9ab35e084620ca2cfc61..8aa2317ba40fd76825097611feff1547365c5deb 100644 --- a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb +++ b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb @@ -136,6 +136,16 @@ end end + context 'when a 403 error is returned from the API' do + before do + stub_request(:post, url).to_return(status: 403, body: "403 Unauthorized") + end + + it 'returns a 403 response' do + expect(response.code).to eq(403) + end + end + context 'when a failed response is returned from the API' do include_context 'when a failed response is returned from the API' diff --git a/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb index 42036382d2dc684715eec55578b02038bd4d5da1..1ef9002852a07787bccf84281a1627b3fd9d4f40 100644 --- a/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb +++ b/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb @@ -93,6 +93,53 @@ it_behaves_like 'measured Llm request with error', StandardError end + context 'when response is a 500 error' do + let(:http_status) { 500 } + let(:response_body) { nil } + let(:response_headers) { nil } + + it 'returns nil' do + expect(complete).to be_nil + end + end + + context 'when response is 204 No Content' do + let(:http_status) { 204 } + let(:response_body) { nil } + let(:response_headers) { nil } + + it_behaves_like 'measured Llm request' + end + + context 'when response is a bad request with no body' do + let(:http_status) { 400 } + let(:response_body) { nil } + let(:response_headers) { nil } + + it 'returns nil' do + expect(complete).to be_nil + end + end + + context 'when response is a an authentication error' do + let(:http_status) { 401 } + let(:response_body) do + { + "type" => "error", + "error" => { + "type" => "authentication_error", + "message" => "There’s an issue with your API key." + } + }.to_json + end + + it 'returns a response' do + expect(complete).to be_present + expect(complete.parsed_response['type']).to eq('error') + expect(complete.parsed_response['error']['type']).to eq('authentication_error') + end + end + context 'when request is retried' do let(:http_status) { 429 } @@ -425,6 +472,35 @@ end end + context 'when response is a bad request with no body' do + let(:http_status) { 400 } + let(:response_body) { nil } + let(:response_headers) { nil } + + it 'returns nil' do + expect(subject).to be_nil + end + end + + context 'when response is a an authentication error' do + let(:http_status) { 401 } + let(:response_body) do + { + "type" => "error", + "error" => { + "type" => "authentication_error", + "message" => "There’s an issue with your API key." + } + }.to_json + end + + it 'returns a response' do + expect(subject).to be_present + expect(subject.parsed_response['type']).to eq('error') + expect(subject.parsed_response['error']['type']).to eq('authentication_error') + end + end + context 'when request is retried once' do before do stub_request(:post, "#{ai_gateway_url}/v1/proxy/anthropic/v1/messages")